root/altq/altq_rio.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. rio_alloc
  2. rio_destroy
  3. rio_getstats
  4. dscp2index
  5. rio_addq
  6. rio_getq

    1 /*      $OpenBSD: altq_rio.c,v 1.10 2007/06/17 19:58:58 jasper Exp $    */
    2 /*      $KAME: altq_rio.c,v 1.8 2000/12/14 08:12:46 thorpej Exp $       */
    3 
    4 /*
    5  * Copyright (C) 1998-2000
    6  *      Sony Computer Science Laboratories Inc.  All rights reserved.
    7  *
    8  * Redistribution and use in source and binary forms, with or without
    9  * modification, are permitted provided that the following conditions
   10  * are met:
   11  * 1. Redistributions of source code must retain the above copyright
   12  *    notice, this list of conditions and the following disclaimer.
   13  * 2. Redistributions in binary form must reproduce the above copyright
   14  *    notice, this list of conditions and the following disclaimer in the
   15  *    documentation and/or other materials provided with the distribution.
   16  *
   17  * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
   18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   20  * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
   21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   27  * SUCH DAMAGE.
   28  */
   29 /*
   30  * Copyright (c) 1990-1994 Regents of the University of California.
   31  * All rights reserved.
   32  *
   33  * Redistribution and use in source and binary forms, with or without
   34  * modification, are permitted provided that the following conditions
   35  * are met:
   36  * 1. Redistributions of source code must retain the above copyright
   37  *    notice, this list of conditions and the following disclaimer.
   38  * 2. Redistributions in binary form must reproduce the above copyright
   39  *    notice, this list of conditions and the following disclaimer in the
   40  *    documentation and/or other materials provided with the distribution.
   41  * 3. All advertising materials mentioning features or use of this software
   42  *    must display the following acknowledgement:
   43  *      This product includes software developed by the Computer Systems
   44  *      Engineering Group at Lawrence Berkeley Laboratory.
   45  * 4. Neither the name of the University nor of the Laboratory may be used
   46  *    to endorse or promote products derived from this software without
   47  *    specific prior written permission.
   48  *
   49  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   59  * SUCH DAMAGE.
   60  */
   61 
   62 #include <sys/param.h>
   63 #include <sys/malloc.h>
   64 #include <sys/mbuf.h>
   65 #include <sys/socket.h>
   66 #include <sys/systm.h>
   67 #include <sys/errno.h>
   68 
   69 #include <net/if.h>
   70 #include <net/if_types.h>
   71 
   72 #include <netinet/in.h>
   73 #include <netinet/in_systm.h>
   74 #include <netinet/ip.h>
   75 #ifdef INET6
   76 #include <netinet/ip6.h>
   77 #endif
   78 
   79 #include <net/pfvar.h>
   80 #include <altq/altq.h>
   81 #include <altq/altq_cdnr.h>
   82 #include <altq/altq_red.h>
   83 #include <altq/altq_rio.h>
   84 
   85 /*
   86  * RIO: RED with IN/OUT bit
   87  *   described in
   88  *      "Explicit Allocation of Best Effort Packet Delivery Service"
   89  *      David D. Clark and Wenjia Fang, MIT Lab for Computer Science
   90  *      http://diffserv.lcs.mit.edu/Papers/exp-alloc-ddc-wf.{ps,pdf}
   91  *
   92  * this implementation is extended to support more than 2 drop precedence
   93  * values as described in RFC2597 (Assured Forwarding PHB Group).
   94  *
   95  */
   96 /*
   97  * AF DS (differentiated service) codepoints.
   98  * (classes can be mapped to CBQ or H-FSC classes.)
   99  *
  100  *      0   1   2   3   4   5   6   7
  101  *    +---+---+---+---+---+---+---+---+
  102  *    |   CLASS   |DropPre| 0 |  CU   |
  103  *    +---+---+---+---+---+---+---+---+
  104  *
  105  *    class 1: 001
  106  *    class 2: 010
  107  *    class 3: 011
  108  *    class 4: 100
  109  *
  110  *    low drop prec:    01
  111  *    medium drop prec: 10
  112  *    high drop prec:   01
  113  */
  114 
  115 /* normal red parameters */
  116 #define W_WEIGHT        512     /* inverse of weight of EWMA (511/512) */
  117                                 /* q_weight = 0.00195 */
  118 
  119 /* red parameters for a slow link */
  120 #define W_WEIGHT_1      128     /* inverse of weight of EWMA (127/128) */
  121                                 /* q_weight = 0.0078125 */
  122 
  123 /* red parameters for a very slow link (e.g., dialup) */
  124 #define W_WEIGHT_2      64      /* inverse of weight of EWMA (63/64) */
  125                                 /* q_weight = 0.015625 */
  126 
  127 /* fixed-point uses 12-bit decimal places */
  128 #define FP_SHIFT        12      /* fixed-point shift */
  129 
  130 /* red parameters for drop probability */
  131 #define INV_P_MAX       10      /* inverse of max drop probability */
  132 #define TH_MIN           5      /* min threshold */
  133 #define TH_MAX          15      /* max threshold */
  134 
  135 #define RIO_LIMIT       60      /* default max queue length */
  136 #define RIO_STATS               /* collect statistics */
  137 
  138 #define TV_DELTA(a, b, delta) {                                 \
  139         int     xxs;                                    \
  140                                                                 \
  141         delta = (a)->tv_usec - (b)->tv_usec;                    \
  142         if ((xxs = (a)->tv_sec - (b)->tv_sec) != 0) {           \
  143                 if (xxs < 0) {                                  \
  144                         delta = 60000000;                       \
  145                 } else if (xxs > 4)  {                          \
  146                         if (xxs > 60)                           \
  147                                 delta = 60000000;               \
  148                         else                                    \
  149                                 delta += xxs * 1000000;         \
  150                 } else while (xxs > 0) {                        \
  151                         delta += 1000000;                       \
  152                         xxs--;                                  \
  153                 }                                               \
  154         }                                                       \
  155 }
  156 
  157 /* default rio parameter values */
  158 static struct redparams default_rio_params[RIO_NDROPPREC] = {
  159   /* th_min,             th_max,     inv_pmax */
  160   { TH_MAX * 2 + TH_MIN, TH_MAX * 3, INV_P_MAX }, /* low drop precedence */
  161   { TH_MAX + TH_MIN,     TH_MAX * 2, INV_P_MAX }, /* medium drop precedence */
  162   { TH_MIN,              TH_MAX,     INV_P_MAX }  /* high drop precedence */
  163 };
  164 
  165 /* internal function prototypes */
  166 static int dscp2index(u_int8_t);
  167 
  168 rio_t *
  169 rio_alloc(int weight, struct redparams *params, int flags, int pkttime)
  170 {
  171         rio_t   *rp;
  172         int      w, i;
  173         int      npkts_per_sec;
  174 
  175         MALLOC(rp, rio_t *, sizeof(rio_t), M_DEVBUF, M_WAITOK);
  176         if (rp == NULL)
  177                 return (NULL);
  178         bzero(rp, sizeof(rio_t));
  179 
  180         rp->rio_flags = flags;
  181         if (pkttime == 0)
  182                 /* default packet time: 1000 bytes / 10Mbps * 8 * 1000000 */
  183                 rp->rio_pkttime = 800;
  184         else
  185                 rp->rio_pkttime = pkttime;
  186 
  187         if (weight != 0)
  188                 rp->rio_weight = weight;
  189         else {
  190                 /* use default */
  191                 rp->rio_weight = W_WEIGHT;
  192 
  193                 /* when the link is very slow, adjust red parameters */
  194                 npkts_per_sec = 1000000 / rp->rio_pkttime;
  195                 if (npkts_per_sec < 50) {
  196                         /* up to about 400Kbps */
  197                         rp->rio_weight = W_WEIGHT_2;
  198                 } else if (npkts_per_sec < 300) {
  199                         /* up to about 2.4Mbps */
  200                         rp->rio_weight = W_WEIGHT_1;
  201                 }
  202         }
  203 
  204         /* calculate wshift.  weight must be power of 2 */
  205         w = rp->rio_weight;
  206         for (i = 0; w > 1; i++)
  207                 w = w >> 1;
  208         rp->rio_wshift = i;
  209         w = 1 << rp->rio_wshift;
  210         if (w != rp->rio_weight) {
  211                 printf("invalid weight value %d for red! use %d\n",
  212                        rp->rio_weight, w);
  213                 rp->rio_weight = w;
  214         }
  215 
  216         /* allocate weight table */
  217         rp->rio_wtab = wtab_alloc(rp->rio_weight);
  218 
  219         for (i = 0; i < RIO_NDROPPREC; i++) {
  220                 struct dropprec_state *prec = &rp->rio_precstate[i];
  221 
  222                 prec->avg = 0;
  223                 prec->idle = 1;
  224 
  225                 if (params == NULL || params[i].inv_pmax == 0)
  226                         prec->inv_pmax = default_rio_params[i].inv_pmax;
  227                 else
  228                         prec->inv_pmax = params[i].inv_pmax;
  229                 if (params == NULL || params[i].th_min == 0)
  230                         prec->th_min = default_rio_params[i].th_min;
  231                 else
  232                         prec->th_min = params[i].th_min;
  233                 if (params == NULL || params[i].th_max == 0)
  234                         prec->th_max = default_rio_params[i].th_max;
  235                 else
  236                         prec->th_max = params[i].th_max;
  237 
  238                 /*
  239                  * th_min_s and th_max_s are scaled versions of th_min
  240                  * and th_max to be compared with avg.
  241                  */
  242                 prec->th_min_s = prec->th_min << (rp->rio_wshift + FP_SHIFT);
  243                 prec->th_max_s = prec->th_max << (rp->rio_wshift + FP_SHIFT);
  244 
  245                 /*
  246                  * precompute probability denominator
  247                  *  probd = (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point
  248                  */
  249                 prec->probd = (2 * (prec->th_max - prec->th_min)
  250                                * prec->inv_pmax) << FP_SHIFT;
  251 
  252                 microtime(&prec->last);
  253         }
  254 
  255         return (rp);
  256 }
  257 
  258 void
  259 rio_destroy(rio_t *rp)
  260 {
  261         wtab_destroy(rp->rio_wtab);
  262         FREE(rp, M_DEVBUF);
  263 }
  264 
  265 void
  266 rio_getstats(rio_t *rp, struct redstats *sp)
  267 {
  268         int     i;
  269 
  270         for (i = 0; i < RIO_NDROPPREC; i++) {
  271                 bcopy(&rp->q_stats[i], sp, sizeof(struct redstats));
  272                 sp->q_avg = rp->rio_precstate[i].avg >> rp->rio_wshift;
  273                 sp++;
  274         }
  275 }
  276 
  277 #if (RIO_NDROPPREC == 3)
  278 /*
  279  * internally, a drop precedence value is converted to an index
  280  * starting from 0.
  281  */
  282 static int
  283 dscp2index(u_int8_t dscp)
  284 {
  285         int     dpindex = dscp & AF_DROPPRECMASK;
  286 
  287         if (dpindex == 0)
  288                 return (0);
  289         return ((dpindex >> 3) - 1);
  290 }
  291 #endif
  292 
  293 #if 1
  294 /*
  295  * kludge: when a packet is dequeued, we need to know its drop precedence
  296  * in order to keep the queue length of each drop precedence.
  297  * use m_pkthdr.rcvif to pass this info.
  298  */
  299 #define RIOM_SET_PRECINDEX(m, idx)      \
  300         do { (m)->m_pkthdr.rcvif = (struct ifnet *)((long)(idx)); } while (0)
  301 #define RIOM_GET_PRECINDEX(m)   \
  302         ({ long idx; idx = (long)((m)->m_pkthdr.rcvif); \
  303         (m)->m_pkthdr.rcvif = NULL; idx; })
  304 #endif
  305 
  306 int
  307 rio_addq(rio_t *rp, class_queue_t *q, struct mbuf *m,
  308     struct altq_pktattr *pktattr)
  309 {
  310         int                      avg, droptype;
  311         u_int8_t                 dsfield, odsfield;
  312         int                      dpindex, i, n, t;
  313         struct timeval           now;
  314         struct dropprec_state   *prec;
  315 
  316         dsfield = odsfield = read_dsfield(m, pktattr);
  317         dpindex = dscp2index(dsfield);
  318 
  319         /*
  320          * update avg of the precedence states whose drop precedence
  321          * is larger than or equal to the drop precedence of the packet
  322          */
  323         now.tv_sec = 0;
  324         for (i = dpindex; i < RIO_NDROPPREC; i++) {
  325                 prec = &rp->rio_precstate[i];
  326                 avg = prec->avg;
  327                 if (prec->idle) {
  328                         prec->idle = 0;
  329                         if (now.tv_sec == 0)
  330                                 microtime(&now);
  331                         t = (now.tv_sec - prec->last.tv_sec);
  332                         if (t > 60)
  333                                 avg = 0;
  334                         else {
  335                                 t = t * 1000000 +
  336                                         (now.tv_usec - prec->last.tv_usec);
  337                                 n = t / rp->rio_pkttime;
  338                                 /* calculate (avg = (1 - Wq)^n * avg) */
  339                                 if (n > 0)
  340                                         avg = (avg >> FP_SHIFT) *
  341                                                 pow_w(rp->rio_wtab, n);
  342                         }
  343                 }
  344 
  345                 /* run estimator. (avg is scaled by WEIGHT in fixed-point) */
  346                 avg += (prec->qlen << FP_SHIFT) - (avg >> rp->rio_wshift);
  347                 prec->avg = avg;                /* save the new value */
  348                 /*
  349                  * count keeps a tally of arriving traffic that has not
  350                  * been dropped.
  351                  */
  352                 prec->count++;
  353         }
  354 
  355         prec = &rp->rio_precstate[dpindex];
  356         avg = prec->avg;
  357 
  358         /* see if we drop early */
  359         droptype = DTYPE_NODROP;
  360         if (avg >= prec->th_min_s && prec->qlen > 1) {
  361                 if (avg >= prec->th_max_s) {
  362                         /* avg >= th_max: forced drop */
  363                         droptype = DTYPE_FORCED;
  364                 } else if (prec->old == 0) {
  365                         /* first exceeds th_min */
  366                         prec->count = 1;
  367                         prec->old = 1;
  368                 } else if (drop_early((avg - prec->th_min_s) >> rp->rio_wshift,
  369                                       prec->probd, prec->count)) {
  370                         /* unforced drop by red */
  371                         droptype = DTYPE_EARLY;
  372                 }
  373         } else {
  374                 /* avg < th_min */
  375                 prec->old = 0;
  376         }
  377 
  378         /*
  379          * if the queue length hits the hard limit, it's a forced drop.
  380          */
  381         if (droptype == DTYPE_NODROP && qlen(q) >= qlimit(q))
  382                 droptype = DTYPE_FORCED;
  383 
  384         if (droptype != DTYPE_NODROP) {
  385                 /* always drop incoming packet (as opposed to randomdrop) */
  386                 for (i = dpindex; i < RIO_NDROPPREC; i++)
  387                         rp->rio_precstate[i].count = 0;
  388 #ifdef RIO_STATS
  389                 if (droptype == DTYPE_EARLY)
  390                         rp->q_stats[dpindex].drop_unforced++;
  391                 else
  392                         rp->q_stats[dpindex].drop_forced++;
  393                 PKTCNTR_ADD(&rp->q_stats[dpindex].drop_cnt, m_pktlen(m));
  394 #endif
  395                 m_freem(m);
  396                 return (-1);
  397         }
  398 
  399         for (i = dpindex; i < RIO_NDROPPREC; i++)
  400                 rp->rio_precstate[i].qlen++;
  401 
  402         /* save drop precedence index in mbuf hdr */
  403         RIOM_SET_PRECINDEX(m, dpindex);
  404 
  405         if (rp->rio_flags & RIOF_CLEARDSCP)
  406                 dsfield &= ~DSCP_MASK;
  407 
  408         if (dsfield != odsfield)
  409                 write_dsfield(m, pktattr, dsfield);
  410 
  411         _addq(q, m);
  412 
  413 #ifdef RIO_STATS
  414         PKTCNTR_ADD(&rp->q_stats[dpindex].xmit_cnt, m_pktlen(m));
  415 #endif
  416         return (0);
  417 }
  418 
  419 struct mbuf *
  420 rio_getq(rio_t *rp, class_queue_t *q)
  421 {
  422         struct mbuf     *m;
  423         int              dpindex, i;
  424 
  425         if ((m = _getq(q)) == NULL)
  426                 return NULL;
  427 
  428         dpindex = RIOM_GET_PRECINDEX(m);
  429         for (i = dpindex; i < RIO_NDROPPREC; i++) {
  430                 if (--rp->rio_precstate[i].qlen == 0) {
  431                         if (rp->rio_precstate[i].idle == 0) {
  432                                 rp->rio_precstate[i].idle = 1;
  433                                 microtime(&rp->rio_precstate[i].last);
  434                         }
  435                 }
  436         }
  437         return (m);
  438 }

/* [<][>][^][v][top][bottom][index][help] */