root/altq/altq_rmclass.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. rmc_newclass
  2. rmc_modclass
  3. rmc_wrr_set_weights
  4. rmc_get_weight
  5. rmc_depth_compute
  6. rmc_depth_recompute
  7. rmc_delete_class
  8. rmc_init
  9. rmc_queue_packet
  10. rmc_tl_satisfied
  11. rmc_satisfied
  12. rmc_under_limit
  13. _rmc_wrr_dequeue_next
  14. _rmc_prr_dequeue_next
  15. rmc_dequeue_next
  16. rmc_update_class_util
  17. rmc_drop_action
  18. rmc_dropall
  19. rmc_delay_action
  20. rmc_restart
  21. rmc_root_overlimit
  22. _rmc_addq
  23. _rmc_dropq
  24. _rmc_getq
  25. _rmc_pollq
  26. rmc_funcname
  27. cbqtrace_dump
  28. _addq
  29. _getq
  30. _getq_tail
  31. _getq_random
  32. _removeq
  33. _flushq

    1 /*      $OpenBSD: altq_rmclass.c,v 1.12 2006/03/04 22:40:15 brad Exp $  */
    2 /*      $KAME: altq_rmclass.c,v 1.10 2001/02/09 07:20:40 kjc Exp $      */
    3 
    4 /*
    5  * Copyright (c) 1991-1997 Regents of the University of California.
    6  * All rights reserved.
    7  *
    8  * Redistribution and use in source and binary forms, with or without
    9  * modification, are permitted provided that the following conditions
   10  * are met:
   11  * 1. Redistributions of source code must retain the above copyright
   12  *    notice, this list of conditions and the following disclaimer.
   13  * 2. Redistributions in binary form must reproduce the above copyright
   14  *    notice, this list of conditions and the following disclaimer in the
   15  *    documentation and/or other materials provided with the distribution.
   16  * 3. All advertising materials mentioning features or use of this software
   17  *    must display the following acknowledgement:
   18  *      This product includes software developed by the Network Research
   19  *      Group at Lawrence Berkeley Laboratory.
   20  * 4. Neither the name of the University nor of the Laboratory may be used
   21  *    to endorse or promote products derived from this software without
   22  *    specific prior written permission.
   23  *
   24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   34  * SUCH DAMAGE.
   35  *
   36  * LBL code modified by speer@eng.sun.com, May 1977.
   37  * For questions and/or comments, please send mail to cbq@ee.lbl.gov
   38  */
   39 
   40 #include <sys/param.h>
   41 #include <sys/malloc.h>
   42 #include <sys/mbuf.h>
   43 #include <sys/socket.h>
   44 #include <sys/systm.h>
   45 #include <sys/errno.h>
   46 #include <sys/time.h>
   47 
   48 #include <net/if.h>
   49 
   50 #include <altq/altq.h>
   51 #include <altq/altq_rmclass.h>
   52 #include <altq/altq_rmclass_debug.h>
   53 #include <altq/altq_red.h>
   54 #include <altq/altq_rio.h>
   55 
   56 /*
   57  * Local Macros
   58  */
   59 
   60 #define reset_cutoff(ifd)       { ifd->cutoff_ = RM_MAXDEPTH; }
   61 
   62 /*
   63  * Local routines.
   64  */
   65 
   66 static int       rmc_satisfied(struct rm_class *, struct timeval *);
   67 static void      rmc_wrr_set_weights(struct rm_ifdat *);
   68 static void      rmc_depth_compute(struct rm_class *);
   69 static void      rmc_depth_recompute(rm_class_t *);
   70 
   71 static mbuf_t   *_rmc_wrr_dequeue_next(struct rm_ifdat *, int);
   72 static mbuf_t   *_rmc_prr_dequeue_next(struct rm_ifdat *, int);
   73 
   74 static int       _rmc_addq(rm_class_t *, mbuf_t *);
   75 static void      _rmc_dropq(rm_class_t *);
   76 static mbuf_t   *_rmc_getq(rm_class_t *);
   77 static mbuf_t   *_rmc_pollq(rm_class_t *);
   78 
   79 static int       rmc_under_limit(struct rm_class *, struct timeval *);
   80 static void      rmc_tl_satisfied(struct rm_ifdat *, struct timeval *);
   81 static void      rmc_drop_action(struct rm_class *);
   82 static void      rmc_restart(struct rm_class *);
   83 static void      rmc_root_overlimit(struct rm_class *, struct rm_class *);
   84 
   85 #define BORROW_OFFTIME
   86 /*
   87  * BORROW_OFFTIME (experimental):
   88  * borrow the offtime of the class borrowing from.
   89  * the reason is that when its own offtime is set, the class is unable
   90  * to borrow much, especially when cutoff is taking effect.
   91  * but when the borrowed class is overloaded (advidle is close to minidle),
   92  * use the borrowing class's offtime to avoid overload.
   93  */
   94 #define ADJUST_CUTOFF
   95 /*
   96  * ADJUST_CUTOFF (experimental):
   97  * if no underlimit class is found due to cutoff, increase cutoff and
   98  * retry the scheduling loop.
   99  * also, don't invoke delay_actions while cutoff is taking effect,
  100  * since a sleeping class won't have a chance to be scheduled in the
  101  * next loop.
  102  *
  103  * now heuristics for setting the top-level variable (cutoff_) becomes:
  104  *      1. if a packet arrives for a not-overlimit class, set cutoff
  105  *         to the depth of the class.
  106  *      2. if cutoff is i, and a packet arrives for an overlimit class
  107  *         with an underlimit ancestor at a lower level than i (say j),
  108  *         then set cutoff to j.
  109  *      3. at scheduling a packet, if there is no underlimit class
  110  *         due to the current cutoff level, increase cutoff by 1 and
  111  *         then try to schedule again.
  112  */
  113 
  114 /*
  115  * rm_class_t *
  116  * rmc_newclass(...) - Create a new resource management class at priority
  117  * 'pri' on the interface given by 'ifd'.
  118  *
  119  * nsecPerByte  is the data rate of the interface in nanoseconds/byte.
  120  *              E.g., 800 for a 10Mb/s ethernet.  If the class gets less
  121  *              than 100% of the bandwidth, this number should be the
  122  *              'effective' rate for the class.  Let f be the
  123  *              bandwidth fraction allocated to this class, and let
  124  *              nsPerByte be the data rate of the output link in
  125  *              nanoseconds/byte.  Then nsecPerByte is set to
  126  *              nsPerByte / f.  E.g., 1600 (= 800 / .5)
  127  *              for a class that gets 50% of an ethernet's bandwidth.
  128  *
  129  * action       the routine to call when the class is over limit.
  130  *
  131  * maxq         max allowable queue size for class (in packets).
  132  *
  133  * parent       parent class pointer.
  134  *
  135  * borrow       class to borrow from (should be either 'parent' or null).
  136  *
  137  * maxidle      max value allowed for class 'idle' time estimate (this
  138  *              parameter determines how large an initial burst of packets
  139  *              can be before overlimit action is invoked.
  140  *
  141  * offtime      how long 'delay' action will delay when class goes over
  142  *              limit (this parameter determines the steady-state burst
  143  *              size when a class is running over its limit).
  144  *
  145  * Maxidle and offtime have to be computed from the following:  If the
  146  * average packet size is s, the bandwidth fraction allocated to this
  147  * class is f, we want to allow b packet bursts, and the gain of the
  148  * averaging filter is g (= 1 - 2^(-RM_FILTER_GAIN)), then:
  149  *
  150  *   ptime = s * nsPerByte * (1 - f) / f
  151  *   maxidle = ptime * (1 - g^b) / g^b
  152  *   minidle = -ptime * (1 / (f - 1))
  153  *   offtime = ptime * (1 + 1/(1 - g) * (1 - g^(b - 1)) / g^(b - 1)
  154  *
  155  * Operationally, it's convenient to specify maxidle & offtime in units
  156  * independent of the link bandwidth so the maxidle & offtime passed to
  157  * this routine are the above values multiplied by 8*f/(1000*nsPerByte).
  158  * (The constant factor is a scale factor needed to make the parameters
  159  * integers.  This scaling also means that the 'unscaled' values of
  160  * maxidle*nsecPerByte/8 and offtime*nsecPerByte/8 will be in microseconds,
  161  * not nanoseconds.)  Also note that the 'idle' filter computation keeps
  162  * an estimate scaled upward by 2^RM_FILTER_GAIN so the passed value of
  163  * maxidle also must be scaled upward by this value.  Thus, the passed
  164  * values for maxidle and offtime can be computed as follows:
  165  *
  166  * maxidle = maxidle * 2^RM_FILTER_GAIN * 8 / (1000 * nsecPerByte)
  167  * offtime = offtime * 8 / (1000 * nsecPerByte)
  168  *
  169  * When USE_HRTIME is employed, then maxidle and offtime become:
  170  *      maxidle = maxilde * (8.0 / nsecPerByte);
  171  *      offtime = offtime * (8.0 / nsecPerByte);
  172  */
  173 
  174 struct rm_class *
  175 rmc_newclass(int pri, struct rm_ifdat *ifd, u_int nsecPerByte,
  176     void (*action)(rm_class_t *, rm_class_t *), int maxq,
  177     struct rm_class *parent, struct rm_class *borrow, u_int maxidle,
  178     int minidle, u_int offtime, int pktsize, int flags)
  179 {
  180         struct rm_class *cl;
  181         struct rm_class *peer;
  182         int              s;
  183 
  184         if (pri >= RM_MAXPRIO)
  185                 return (NULL);
  186 #ifndef ALTQ_RED
  187         if (flags & RMCF_RED) {
  188 #ifdef ALTQ_DEBUG
  189                 printf("rmc_newclass: RED not configured for CBQ!\n");
  190 #endif
  191                 return (NULL);
  192         }
  193 #endif
  194 #ifndef ALTQ_RIO
  195         if (flags & RMCF_RIO) {
  196 #ifdef ALTQ_DEBUG
  197                 printf("rmc_newclass: RIO not configured for CBQ!\n");
  198 #endif
  199                 return (NULL);
  200         }
  201 #endif
  202 
  203         MALLOC(cl, struct rm_class *, sizeof(struct rm_class),
  204                M_DEVBUF, M_WAITOK);
  205         if (cl == NULL)
  206                 return (NULL);
  207         bzero(cl, sizeof(struct rm_class));
  208         CALLOUT_INIT(&cl->callout_);
  209         MALLOC(cl->q_, class_queue_t *, sizeof(class_queue_t),
  210                M_DEVBUF, M_WAITOK);
  211         if (cl->q_ == NULL) {
  212                 FREE(cl, M_DEVBUF);
  213                 return (NULL);
  214         }
  215         bzero(cl->q_, sizeof(class_queue_t));
  216 
  217         /*
  218          * Class initialization.
  219          */
  220         cl->children_ = NULL;
  221         cl->parent_ = parent;
  222         cl->borrow_ = borrow;
  223         cl->leaf_ = 1;
  224         cl->ifdat_ = ifd;
  225         cl->pri_ = pri;
  226         cl->allotment_ = RM_NS_PER_SEC / nsecPerByte; /* Bytes per sec */
  227         cl->depth_ = 0;
  228         cl->qthresh_ = 0;
  229         cl->ns_per_byte_ = nsecPerByte;
  230 
  231         qlimit(cl->q_) = maxq;
  232         qtype(cl->q_) = Q_DROPHEAD;
  233         qlen(cl->q_) = 0;
  234         cl->flags_ = flags;
  235 
  236 #if 1 /* minidle is also scaled in ALTQ */
  237         cl->minidle_ = (minidle * (int)nsecPerByte) / 8;
  238         if (cl->minidle_ > 0)
  239                 cl->minidle_ = 0;
  240 #else
  241         cl->minidle_ = minidle;
  242 #endif
  243         cl->maxidle_ = (maxidle * nsecPerByte) / 8;
  244         if (cl->maxidle_ == 0)
  245                 cl->maxidle_ = 1;
  246 #if 1 /* offtime is also scaled in ALTQ */
  247         cl->avgidle_ = cl->maxidle_;
  248         cl->offtime_ = ((offtime * nsecPerByte) / 8) >> RM_FILTER_GAIN;
  249         if (cl->offtime_ == 0)
  250                 cl->offtime_ = 1;
  251 #else
  252         cl->avgidle_ = 0;
  253         cl->offtime_ = (offtime * nsecPerByte) / 8;
  254 #endif
  255         cl->overlimit = action;
  256 
  257 #ifdef ALTQ_RED
  258         if (flags & (RMCF_RED|RMCF_RIO)) {
  259                 int red_flags, red_pkttime;
  260 
  261                 red_flags = 0;
  262                 if (flags & RMCF_ECN)
  263                         red_flags |= REDF_ECN;
  264                 if (flags & RMCF_FLOWVALVE)
  265                         red_flags |= REDF_FLOWVALVE;
  266 #ifdef ALTQ_RIO
  267                 if (flags & RMCF_CLEARDSCP)
  268                         red_flags |= RIOF_CLEARDSCP;
  269 #endif
  270                 red_pkttime = nsecPerByte * pktsize  / 1000;
  271 
  272                 if (flags & RMCF_RED) {
  273                         cl->red_ = red_alloc(0, 0,
  274                             qlimit(cl->q_) * 10/100,
  275                             qlimit(cl->q_) * 30/100,
  276                             red_flags, red_pkttime);
  277                         if (cl->red_ != NULL)
  278                                 qtype(cl->q_) = Q_RED;
  279                 }
  280 #ifdef ALTQ_RIO
  281                 else {
  282                         cl->red_ = (red_t *)rio_alloc(0, NULL,
  283                                                       red_flags, red_pkttime);
  284                         if (cl->red_ != NULL)
  285                                 qtype(cl->q_) = Q_RIO;
  286                 }
  287 #endif
  288         }
  289 #endif /* ALTQ_RED */
  290 
  291         /*
  292          * put the class into the class tree
  293          */
  294         s = splnet();
  295         if ((peer = ifd->active_[pri]) != NULL) {
  296                 /* find the last class at this pri */
  297                 cl->peer_ = peer;
  298                 while (peer->peer_ != ifd->active_[pri])
  299                         peer = peer->peer_;
  300                 peer->peer_ = cl;
  301         } else {
  302                 ifd->active_[pri] = cl;
  303                 cl->peer_ = cl;
  304         }
  305 
  306         if (cl->parent_) {
  307                 cl->next_ = parent->children_;
  308                 parent->children_ = cl;
  309                 parent->leaf_ = 0;
  310         }
  311 
  312         /*
  313          * Compute the depth of this class and its ancestors in the class
  314          * hierarchy.
  315          */
  316         rmc_depth_compute(cl);
  317 
  318         /*
  319          * If CBQ's WRR is enabled, then initialize the class WRR state.
  320          */
  321         if (ifd->wrr_) {
  322                 ifd->num_[pri]++;
  323                 ifd->alloc_[pri] += cl->allotment_;
  324                 rmc_wrr_set_weights(ifd);
  325         }
  326         splx(s);
  327         return (cl);
  328 }
  329 
  330 int
  331 rmc_modclass(struct rm_class *cl, u_int nsecPerByte, int maxq, u_int maxidle,
  332     int minidle, u_int offtime, int pktsize)
  333 {
  334         struct rm_ifdat *ifd;
  335         u_int            old_allotment;
  336         int              s;
  337 
  338         ifd = cl->ifdat_;
  339         old_allotment = cl->allotment_;
  340 
  341         s = splnet();
  342         cl->allotment_ = RM_NS_PER_SEC / nsecPerByte; /* Bytes per sec */
  343         cl->qthresh_ = 0;
  344         cl->ns_per_byte_ = nsecPerByte;
  345 
  346         qlimit(cl->q_) = maxq;
  347 
  348 #if 1 /* minidle is also scaled in ALTQ */
  349         cl->minidle_ = (minidle * nsecPerByte) / 8;
  350         if (cl->minidle_ > 0)
  351                 cl->minidle_ = 0;
  352 #else
  353         cl->minidle_ = minidle;
  354 #endif
  355         cl->maxidle_ = (maxidle * nsecPerByte) / 8;
  356         if (cl->maxidle_ == 0)
  357                 cl->maxidle_ = 1;
  358 #if 1 /* offtime is also scaled in ALTQ */
  359         cl->avgidle_ = cl->maxidle_;
  360         cl->offtime_ = ((offtime * nsecPerByte) / 8) >> RM_FILTER_GAIN;
  361         if (cl->offtime_ == 0)
  362                 cl->offtime_ = 1;
  363 #else
  364         cl->avgidle_ = 0;
  365         cl->offtime_ = (offtime * nsecPerByte) / 8;
  366 #endif
  367 
  368         /*
  369          * If CBQ's WRR is enabled, then initialize the class WRR state.
  370          */
  371         if (ifd->wrr_) {
  372                 ifd->alloc_[cl->pri_] += cl->allotment_ - old_allotment;
  373                 rmc_wrr_set_weights(ifd);
  374         }
  375         splx(s);
  376         return (0);
  377 }
  378 
  379 /*
  380  * static void
  381  * rmc_wrr_set_weights(struct rm_ifdat *ifdat) - This function computes
  382  *      the appropriate run robin weights for the CBQ weighted round robin
  383  *      algorithm.
  384  *
  385  *      Returns: NONE
  386  */
  387 
  388 static void
  389 rmc_wrr_set_weights(struct rm_ifdat *ifd)
  390 {
  391         int             i;
  392         struct rm_class *cl, *clh;
  393 
  394         for (i = 0; i < RM_MAXPRIO; i++) {
  395                 /*
  396                  * This is inverted from that of the simulator to
  397                  * maintain precision.
  398                  */
  399                 if (ifd->num_[i] == 0)
  400                         ifd->M_[i] = 0;
  401                 else
  402                         ifd->M_[i] = ifd->alloc_[i] /
  403                                 (ifd->num_[i] * ifd->maxpkt_);
  404                 /*
  405                  * Compute the weighted allotment for each class.
  406                  * This takes the expensive div instruction out
  407                  * of the main loop for the wrr scheduling path.
  408                  * These only get recomputed when a class comes or
  409                  * goes.
  410                  */
  411                 if (ifd->active_[i] != NULL) {
  412                         clh = cl = ifd->active_[i];
  413                         do {
  414                                 /* safe-guard for slow link or alloc_ == 0 */
  415                                 if (ifd->M_[i] == 0)
  416                                         cl->w_allotment_ = 0;
  417                                 else
  418                                         cl->w_allotment_ = cl->allotment_ /
  419                                                 ifd->M_[i];
  420                                 cl = cl->peer_;
  421                         } while ((cl != NULL) && (cl != clh));
  422                 }
  423         }
  424 }
  425 
  426 int
  427 rmc_get_weight(struct rm_ifdat *ifd, int pri)
  428 {
  429         if ((pri >= 0) && (pri < RM_MAXPRIO))
  430                 return (ifd->M_[pri]);
  431         else
  432                 return (0);
  433 }
  434 
  435 /*
  436  * static void
  437  * rmc_depth_compute(struct rm_class *cl) - This function computes the
  438  *      appropriate depth of class 'cl' and its ancestors.
  439  *
  440  *      Returns:        NONE
  441  */
  442 
  443 static void
  444 rmc_depth_compute(struct rm_class *cl)
  445 {
  446         rm_class_t      *t = cl, *p;
  447 
  448         /*
  449          * Recompute the depth for the branch of the tree.
  450          */
  451         while (t != NULL) {
  452                 p = t->parent_;
  453                 if (p && (t->depth_ >= p->depth_)) {
  454                         p->depth_ = t->depth_ + 1;
  455                         t = p;
  456                 } else
  457                         t = NULL;
  458         }
  459 }
  460 
  461 /*
  462  * static void
  463  * rmc_depth_recompute(struct rm_class *cl) - This function re-computes
  464  *      the depth of the tree after a class has been deleted.
  465  *
  466  *      Returns:        NONE
  467  */
  468 
  469 static void
  470 rmc_depth_recompute(rm_class_t *cl)
  471 {
  472 #if 1 /* ALTQ */
  473         rm_class_t      *p, *t;
  474 
  475         p = cl;
  476         while (p != NULL) {
  477                 if ((t = p->children_) == NULL) {
  478                         p->depth_ = 0;
  479                 } else {
  480                         int cdepth = 0;
  481 
  482                         while (t != NULL) {
  483                                 if (t->depth_ > cdepth)
  484                                         cdepth = t->depth_;
  485                                 t = t->next_;
  486                         }
  487 
  488                         if (p->depth_ == cdepth + 1)
  489                                 /* no change to this parent */
  490                                 return;
  491 
  492                         p->depth_ = cdepth + 1;
  493                 }
  494 
  495                 p = p->parent_;
  496         }
  497 #else
  498         rm_class_t      *t;
  499 
  500         if (cl->depth_ >= 1) {
  501                 if (cl->children_ == NULL) {
  502                         cl->depth_ = 0;
  503                 } else if ((t = cl->children_) != NULL) {
  504                         while (t != NULL) {
  505                                 if (t->children_ != NULL)
  506                                         rmc_depth_recompute(t);
  507                                 t = t->next_;
  508                         }
  509                 } else
  510                         rmc_depth_compute(cl);
  511         }
  512 #endif
  513 }
  514 
  515 /*
  516  * void
  517  * rmc_delete_class(struct rm_ifdat *ifdat, struct rm_class *cl) - This
  518  *      function deletes a class from the link-sharing structure and frees
  519  *      all resources associated with the class.
  520  *
  521  *      Returns: NONE
  522  */
  523 
  524 void
  525 rmc_delete_class(struct rm_ifdat *ifd, struct rm_class *cl)
  526 {
  527         struct rm_class *p, *head, *previous;
  528         int              s;
  529 
  530         ASSERT(cl->children_ == NULL);
  531 
  532         if (cl->sleeping_)
  533                 CALLOUT_STOP(&cl->callout_);
  534 
  535         s = splnet();
  536         /*
  537          * Free packets in the packet queue.
  538          * XXX - this may not be a desired behavior.  Packets should be
  539          *              re-queued.
  540          */
  541         rmc_dropall(cl);
  542 
  543         /*
  544          * If the class has a parent, then remove the class from the
  545          * class from the parent's children chain.
  546          */
  547         if (cl->parent_ != NULL) {
  548                 head = cl->parent_->children_;
  549                 p = previous = head;
  550                 if (head->next_ == NULL) {
  551                         ASSERT(head == cl);
  552                         cl->parent_->children_ = NULL;
  553                         cl->parent_->leaf_ = 1;
  554                 } else while (p != NULL) {
  555                         if (p == cl) {
  556                                 if (cl == head)
  557                                         cl->parent_->children_ = cl->next_;
  558                                 else
  559                                         previous->next_ = cl->next_;
  560                                 cl->next_ = NULL;
  561                                 p = NULL;
  562                         } else {
  563                                 previous = p;
  564                                 p = p->next_;
  565                         }
  566                 }
  567         }
  568 
  569         /*
  570          * Delete class from class priority peer list.
  571          */
  572         if ((p = ifd->active_[cl->pri_]) != NULL) {
  573                 /*
  574                  * If there is more than one member of this priority
  575                  * level, then look for class(cl) in the priority level.
  576                  */
  577                 if (p != p->peer_) {
  578                         while (p->peer_ != cl)
  579                                 p = p->peer_;
  580                         p->peer_ = cl->peer_;
  581 
  582                         if (ifd->active_[cl->pri_] == cl)
  583                                 ifd->active_[cl->pri_] = cl->peer_;
  584                 } else {
  585                         ASSERT(p == cl);
  586                         ifd->active_[cl->pri_] = NULL;
  587                 }
  588         }
  589 
  590         /*
  591          * Recompute the WRR weights.
  592          */
  593         if (ifd->wrr_) {
  594                 ifd->alloc_[cl->pri_] -= cl->allotment_;
  595                 ifd->num_[cl->pri_]--;
  596                 rmc_wrr_set_weights(ifd);
  597         }
  598 
  599         /*
  600          * Re-compute the depth of the tree.
  601          */
  602 #if 1 /* ALTQ */
  603         rmc_depth_recompute(cl->parent_);
  604 #else
  605         rmc_depth_recompute(ifd->root_);
  606 #endif
  607 
  608         splx(s);
  609 
  610         /*
  611          * Free the class structure.
  612          */
  613         if (cl->red_ != NULL) {
  614 #ifdef ALTQ_RIO
  615                 if (q_is_rio(cl->q_))
  616                         rio_destroy((rio_t *)cl->red_);
  617 #endif
  618 #ifdef ALTQ_RED
  619                 if (q_is_red(cl->q_))
  620                         red_destroy(cl->red_);
  621 #endif
  622         }
  623         FREE(cl->q_, M_DEVBUF);
  624         FREE(cl, M_DEVBUF);
  625 }
  626 
  627 
  628 /*
  629  * void
  630  * rmc_init(...) - Initialize the resource management data structures
  631  *      associated with the output portion of interface 'ifp'.  'ifd' is
  632  *      where the structures will be built (for backwards compatibility, the
  633  *      structures aren't kept in the ifnet struct).  'nsecPerByte'
  634  *      gives the link speed (inverse of bandwidth) in nanoseconds/byte.
  635  *      'restart' is the driver-specific routine that the generic 'delay
  636  *      until under limit' action will call to restart output.  `maxq'
  637  *      is the queue size of the 'link' & 'default' classes.  'maxqueued'
  638  *      is the maximum number of packets that the resource management
  639  *      code will allow to be queued 'downstream' (this is typically 1).
  640  *
  641  *      Returns:        NONE
  642  */
  643 
  644 void
  645 rmc_init(struct ifaltq *ifq, struct rm_ifdat *ifd, u_int nsecPerByte,
  646     void (*restart)(struct ifaltq *), int maxq, int maxqueued, u_int maxidle,
  647     int minidle, u_int offtime, int flags)
  648 {
  649         int             i, mtu;
  650 
  651         /*
  652          * Initialize the CBQ tracing/debug facility.
  653          */
  654         CBQTRACEINIT();
  655 
  656         bzero((char *)ifd, sizeof (*ifd));
  657         mtu = ifq->altq_ifp->if_mtu;
  658         ifd->ifq_ = ifq;
  659         ifd->restart = restart;
  660         ifd->maxqueued_ = maxqueued;
  661         ifd->ns_per_byte_ = nsecPerByte;
  662         ifd->maxpkt_ = mtu;
  663         ifd->wrr_ = (flags & RMCF_WRR) ? 1 : 0;
  664         ifd->efficient_ = (flags & RMCF_EFFICIENT) ? 1 : 0;
  665 #if 1
  666         ifd->maxiftime_ = mtu * nsecPerByte / 1000 * 16;
  667         if (mtu * nsecPerByte > 10 * 1000000)
  668                 ifd->maxiftime_ /= 4;
  669 #endif
  670 
  671         reset_cutoff(ifd);
  672         CBQTRACE(rmc_init, 'INIT', ifd->cutoff_);
  673 
  674         /*
  675          * Initialize the CBQ's WRR state.
  676          */
  677         for (i = 0; i < RM_MAXPRIO; i++) {
  678                 ifd->alloc_[i] = 0;
  679                 ifd->M_[i] = 0;
  680                 ifd->num_[i] = 0;
  681                 ifd->na_[i] = 0;
  682                 ifd->active_[i] = NULL;
  683         }
  684 
  685         /*
  686          * Initialize current packet state.
  687          */
  688         ifd->qi_ = 0;
  689         ifd->qo_ = 0;
  690         for (i = 0; i < RM_MAXQUEUED; i++) {
  691                 ifd->class_[i] = NULL;
  692                 ifd->curlen_[i] = 0;
  693                 ifd->borrowed_[i] = NULL;
  694         }
  695 
  696         /*
  697          * Create the root class of the link-sharing structure.
  698          */
  699         if ((ifd->root_ = rmc_newclass(0, ifd,
  700                                        nsecPerByte,
  701                                        rmc_root_overlimit, maxq, 0, 0,
  702                                        maxidle, minidle, offtime,
  703                                        0, 0)) == NULL) {
  704                 printf("rmc_init: root class not allocated\n");
  705                 return ;
  706         }
  707         ifd->root_->depth_ = 0;
  708 }
  709 
  710 /*
  711  * void
  712  * rmc_queue_packet(struct rm_class *cl, mbuf_t *m) - Add packet given by
  713  *      mbuf 'm' to queue for resource class 'cl'.  This routine is called
  714  *      by a driver's if_output routine.  This routine must be called with
  715  *      output packet completion interrupts locked out (to avoid racing with
  716  *      rmc_dequeue_next).
  717  *
  718  *      Returns:        0 on successful queueing
  719  *                      -1 when packet drop occurs
  720  */
  721 int
  722 rmc_queue_packet(struct rm_class *cl, mbuf_t *m)
  723 {
  724         struct timeval   now;
  725         struct rm_ifdat *ifd = cl->ifdat_;
  726         int              cpri = cl->pri_;
  727         int              is_empty = qempty(cl->q_);
  728 
  729         RM_GETTIME(now);
  730         if (ifd->cutoff_ > 0) {
  731                 if (TV_LT(&cl->undertime_, &now)) {
  732                         if (ifd->cutoff_ > cl->depth_)
  733                                 ifd->cutoff_ = cl->depth_;
  734                         CBQTRACE(rmc_queue_packet, 'ffoc', cl->depth_);
  735                 }
  736 #if 1 /* ALTQ */
  737                 else {
  738                         /*
  739                          * the class is overlimit. if the class has
  740                          * underlimit ancestors, set cutoff to the lowest
  741                          * depth among them.
  742                          */
  743                         struct rm_class *borrow = cl->borrow_;
  744 
  745                         while (borrow != NULL &&
  746                                borrow->depth_ < ifd->cutoff_) {
  747                                 if (TV_LT(&borrow->undertime_, &now)) {
  748                                         ifd->cutoff_ = borrow->depth_;
  749                                         CBQTRACE(rmc_queue_packet, 'ffob', ifd->cutoff_);
  750                                         break;
  751                                 }
  752                                 borrow = borrow->borrow_;
  753                         }
  754                 }
  755 #else /* !ALTQ */
  756                 else if ((ifd->cutoff_ > 1) && cl->borrow_) {
  757                         if (TV_LT(&cl->borrow_->undertime_, &now)) {
  758                                 ifd->cutoff_ = cl->borrow_->depth_;
  759                                 CBQTRACE(rmc_queue_packet, 'ffob',
  760                                          cl->borrow_->depth_);
  761                         }
  762                 }
  763 #endif /* !ALTQ */
  764         }
  765 
  766         if (_rmc_addq(cl, m) < 0)
  767                 /* failed */
  768                 return (-1);
  769 
  770         if (is_empty) {
  771                 CBQTRACE(rmc_queue_packet, 'ytpe', cl->stats_.handle);
  772                 ifd->na_[cpri]++;
  773         }
  774 
  775         if (qlen(cl->q_) > qlimit(cl->q_)) {
  776                 /* note: qlimit can be set to 0 or 1 */
  777                 rmc_drop_action(cl);
  778                 return (-1);
  779         }
  780         return (0);
  781 }
  782 
  783 /*
  784  * void
  785  * rmc_tl_satisfied(struct rm_ifdat *ifd, struct timeval *now) - Check all
  786  *      classes to see if they are satisfied.
  787  */
  788 
  789 static void
  790 rmc_tl_satisfied(struct rm_ifdat *ifd, struct timeval *now)
  791 {
  792         int              i;
  793         rm_class_t      *p, *bp;
  794 
  795         for (i = RM_MAXPRIO - 1; i >= 0; i--) {
  796                 if ((bp = ifd->active_[i]) != NULL) {
  797                         p = bp;
  798                         do {
  799                                 if (!rmc_satisfied(p, now)) {
  800                                         ifd->cutoff_ = p->depth_;
  801                                         return;
  802                                 }
  803                                 p = p->peer_;
  804                         } while (p != bp);
  805                 }
  806         }
  807 
  808         reset_cutoff(ifd);
  809 }
  810 
  811 /*
  812  * rmc_satisfied - Return 1 of the class is satisfied.  O, otherwise.
  813  */
  814 
  815 static int
  816 rmc_satisfied(struct rm_class *cl, struct timeval *now)
  817 {
  818         rm_class_t      *p;
  819 
  820         if (cl == NULL)
  821                 return (1);
  822         if (TV_LT(now, &cl->undertime_))
  823                 return (1);
  824         if (cl->depth_ == 0) {
  825                 if (!cl->sleeping_ && (qlen(cl->q_) > cl->qthresh_))
  826                         return (0);
  827                 else
  828                         return (1);
  829         }
  830         if (cl->children_ != NULL) {
  831                 p = cl->children_;
  832                 while (p != NULL) {
  833                         if (!rmc_satisfied(p, now))
  834                                 return (0);
  835                         p = p->next_;
  836                 }
  837         }
  838 
  839         return (1);
  840 }
  841 
  842 /*
  843  * Return 1 if class 'cl' is under limit or can borrow from a parent,
  844  * 0 if overlimit.  As a side-effect, this routine will invoke the
  845  * class overlimit action if the class if overlimit.
  846  */
  847 
  848 static int
  849 rmc_under_limit(struct rm_class *cl, struct timeval *now)
  850 {
  851         rm_class_t      *p = cl;
  852         rm_class_t      *top;
  853         struct rm_ifdat *ifd = cl->ifdat_;
  854 
  855         ifd->borrowed_[ifd->qi_] = NULL;
  856         /*
  857          * If cl is the root class, then always return that it is
  858          * underlimit.  Otherwise, check to see if the class is underlimit.
  859          */
  860         if (cl->parent_ == NULL)
  861                 return (1);
  862 
  863         if (cl->sleeping_) {
  864                 if (TV_LT(now, &cl->undertime_))
  865                         return (0);
  866 
  867                 CALLOUT_STOP(&cl->callout_);
  868                 cl->sleeping_ = 0;
  869                 cl->undertime_.tv_sec = 0;
  870                 return (1);
  871         }
  872 
  873         top = NULL;
  874         while (cl->undertime_.tv_sec && TV_LT(now, &cl->undertime_)) {
  875                 if (((cl = cl->borrow_) == NULL) ||
  876                     (cl->depth_ > ifd->cutoff_)) {
  877 #ifdef ADJUST_CUTOFF
  878                         if (cl != NULL)
  879                                 /* cutoff is taking effect, just
  880                                    return false without calling
  881                                    the delay action. */
  882                                 return (0);
  883 #endif
  884 #ifdef BORROW_OFFTIME
  885                         /*
  886                          * check if the class can borrow offtime too.
  887                          * borrow offtime from the top of the borrow
  888                          * chain if the top class is not overloaded.
  889                          */
  890                         if (cl != NULL) {
  891                                 /* cutoff is taking effect, use this class as top. */
  892                                 top = cl;
  893                                 CBQTRACE(rmc_under_limit, 'ffou', ifd->cutoff_);
  894                         }
  895                         if (top != NULL && top->avgidle_ == top->minidle_)
  896                                 top = NULL;
  897                         p->overtime_ = *now;
  898                         (p->overlimit)(p, top);
  899 #else
  900                         p->overtime_ = *now;
  901                         (p->overlimit)(p, NULL);
  902 #endif
  903                         return (0);
  904                 }
  905                 top = cl;
  906         }
  907 
  908         if (cl != p)
  909                 ifd->borrowed_[ifd->qi_] = cl;
  910         return (1);
  911 }
  912 
  913 /*
  914  * _rmc_wrr_dequeue_next() - This is scheduler for WRR as opposed to
  915  *      Packet-by-packet round robin.
  916  *
  917  * The heart of the weighted round-robin scheduler, which decides which
  918  * class next gets to send a packet.  Highest priority first, then
  919  * weighted round-robin within priorites.
  920  *
  921  * Each able-to-send class gets to send until its byte allocation is
  922  * exhausted.  Thus, the active pointer is only changed after a class has
  923  * exhausted its allocation.
  924  *
  925  * If the scheduler finds no class that is underlimit or able to borrow,
  926  * then the first class found that had a nonzero queue and is allowed to
  927  * borrow gets to send.
  928  */
  929 
  930 static mbuf_t *
  931 _rmc_wrr_dequeue_next(struct rm_ifdat *ifd, int op)
  932 {
  933         struct rm_class *cl = NULL, *first = NULL;
  934         u_int            deficit;
  935         int              cpri;
  936         mbuf_t          *m;
  937         struct timeval   now;
  938 
  939         RM_GETTIME(now);
  940 
  941         /*
  942          * if the driver polls the top of the queue and then removes
  943          * the polled packet, we must return the same packet.
  944          */
  945         if (op == ALTDQ_REMOVE && ifd->pollcache_) {
  946                 cl = ifd->pollcache_;
  947                 cpri = cl->pri_;
  948                 if (ifd->efficient_) {
  949                         /* check if this class is overlimit */
  950                         if (cl->undertime_.tv_sec != 0 &&
  951                             rmc_under_limit(cl, &now) == 0)
  952                                 first = cl;
  953                 }
  954                 ifd->pollcache_ = NULL;
  955                 goto _wrr_out;
  956         }
  957         else {
  958                 /* mode == ALTDQ_POLL || pollcache == NULL */
  959                 ifd->pollcache_ = NULL;
  960                 ifd->borrowed_[ifd->qi_] = NULL;
  961         }
  962 #ifdef ADJUST_CUTOFF
  963  _again:
  964 #endif
  965         for (cpri = RM_MAXPRIO - 1; cpri >= 0; cpri--) {
  966                 if (ifd->na_[cpri] == 0)
  967                         continue;
  968                 deficit = 0;
  969                 /*
  970                  * Loop through twice for a priority level, if some class
  971                  * was unable to send a packet the first round because
  972                  * of the weighted round-robin mechanism.
  973                  * During the second loop at this level, deficit==2.
  974                  * (This second loop is not needed if for every class,
  975                  * "M[cl->pri_])" times "cl->allotment" is greater than
  976                  * the byte size for the largest packet in the class.)
  977                  */
  978  _wrr_loop:
  979                 cl = ifd->active_[cpri];
  980                 ASSERT(cl != NULL);
  981                 do {
  982                         if ((deficit < 2) && (cl->bytes_alloc_ <= 0))
  983                                 cl->bytes_alloc_ += cl->w_allotment_;
  984                         if (!qempty(cl->q_)) {
  985                                 if ((cl->undertime_.tv_sec == 0) ||
  986                                     rmc_under_limit(cl, &now)) {
  987                                         if (cl->bytes_alloc_ > 0 || deficit > 1)
  988                                                 goto _wrr_out;
  989 
  990                                         /* underlimit but no alloc */
  991                                         deficit = 1;
  992 #if 1
  993                                         ifd->borrowed_[ifd->qi_] = NULL;
  994 #endif
  995                                 }
  996                                 else if (first == NULL && cl->borrow_ != NULL)
  997                                         first = cl; /* borrowing candidate */
  998                         }
  999 
 1000                         cl->bytes_alloc_ = 0;
 1001                         cl = cl->peer_;
 1002                 } while (cl != ifd->active_[cpri]);
 1003 
 1004                 if (deficit == 1) {
 1005                         /* first loop found an underlimit class with deficit */
 1006                         /* Loop on same priority level, with new deficit.  */
 1007                         deficit = 2;
 1008                         goto _wrr_loop;
 1009                 }
 1010         }
 1011 
 1012 #ifdef ADJUST_CUTOFF
 1013         /*
 1014          * no underlimit class found.  if cutoff is taking effect,
 1015          * increase cutoff and try again.
 1016          */
 1017         if (first != NULL && ifd->cutoff_ < ifd->root_->depth_) {
 1018                 ifd->cutoff_++;
 1019                 CBQTRACE(_rmc_wrr_dequeue_next, 'ojda', ifd->cutoff_);
 1020                 goto _again;
 1021         }
 1022 #endif /* ADJUST_CUTOFF */
 1023         /*
 1024          * If LINK_EFFICIENCY is turned on, then the first overlimit
 1025          * class we encounter will send a packet if all the classes
 1026          * of the link-sharing structure are overlimit.
 1027          */
 1028         reset_cutoff(ifd);
 1029         CBQTRACE(_rmc_wrr_dequeue_next, 'otsr', ifd->cutoff_);
 1030 
 1031         if (!ifd->efficient_ || first == NULL)
 1032                 return (NULL);
 1033 
 1034         cl = first;
 1035         cpri = cl->pri_;
 1036 #if 0   /* too time-consuming for nothing */
 1037         if (cl->sleeping_)
 1038                 CALLOUT_STOP(&cl->callout_);
 1039         cl->sleeping_ = 0;
 1040         cl->undertime_.tv_sec = 0;
 1041 #endif
 1042         ifd->borrowed_[ifd->qi_] = cl->borrow_;
 1043         ifd->cutoff_ = cl->borrow_->depth_;
 1044 
 1045         /*
 1046          * Deque the packet and do the book keeping...
 1047          */
 1048  _wrr_out:
 1049         if (op == ALTDQ_REMOVE) {
 1050                 m = _rmc_getq(cl);
 1051                 if (m == NULL)
 1052                         panic("_rmc_wrr_dequeue_next");
 1053                 if (qempty(cl->q_))
 1054                         ifd->na_[cpri]--;
 1055 
 1056                 /*
 1057                  * Update class statistics and link data.
 1058                  */
 1059                 if (cl->bytes_alloc_ > 0)
 1060                         cl->bytes_alloc_ -= m_pktlen(m);
 1061 
 1062                 if ((cl->bytes_alloc_ <= 0) || first == cl)
 1063                         ifd->active_[cl->pri_] = cl->peer_;
 1064                 else
 1065                         ifd->active_[cl->pri_] = cl;
 1066 
 1067                 ifd->class_[ifd->qi_] = cl;
 1068                 ifd->curlen_[ifd->qi_] = m_pktlen(m);
 1069                 ifd->now_[ifd->qi_] = now;
 1070                 ifd->qi_ = (ifd->qi_ + 1) % ifd->maxqueued_;
 1071                 ifd->queued_++;
 1072         } else {
 1073                 /* mode == ALTDQ_PPOLL */
 1074                 m = _rmc_pollq(cl);
 1075                 ifd->pollcache_ = cl;
 1076         }
 1077         return (m);
 1078 }
 1079 
 1080 /*
 1081  * Dequeue & return next packet from the highest priority class that
 1082  * has a packet to send & has enough allocation to send it.  This
 1083  * routine is called by a driver whenever it needs a new packet to
 1084  * output.
 1085  */
 1086 static mbuf_t *
 1087 _rmc_prr_dequeue_next(struct rm_ifdat *ifd, int op)
 1088 {
 1089         mbuf_t          *m;
 1090         int              cpri;
 1091         struct rm_class *cl, *first = NULL;
 1092         struct timeval   now;
 1093 
 1094         RM_GETTIME(now);
 1095 
 1096         /*
 1097          * if the driver polls the top of the queue and then removes
 1098          * the polled packet, we must return the same packet.
 1099          */
 1100         if (op == ALTDQ_REMOVE && ifd->pollcache_) {
 1101                 cl = ifd->pollcache_;
 1102                 cpri = cl->pri_;
 1103                 ifd->pollcache_ = NULL;
 1104                 goto _prr_out;
 1105         } else {
 1106                 /* mode == ALTDQ_POLL || pollcache == NULL */
 1107                 ifd->pollcache_ = NULL;
 1108                 ifd->borrowed_[ifd->qi_] = NULL;
 1109         }
 1110 #ifdef ADJUST_CUTOFF
 1111  _again:
 1112 #endif
 1113         for (cpri = RM_MAXPRIO - 1; cpri >= 0; cpri--) {
 1114                 if (ifd->na_[cpri] == 0)
 1115                         continue;
 1116                 cl = ifd->active_[cpri];
 1117                 ASSERT(cl != NULL);
 1118                 do {
 1119                         if (!qempty(cl->q_)) {
 1120                                 if ((cl->undertime_.tv_sec == 0) ||
 1121                                     rmc_under_limit(cl, &now))
 1122                                         goto _prr_out;
 1123                                 if (first == NULL && cl->borrow_ != NULL)
 1124                                         first = cl;
 1125                         }
 1126                         cl = cl->peer_;
 1127                 } while (cl != ifd->active_[cpri]);
 1128         }
 1129 
 1130 #ifdef ADJUST_CUTOFF
 1131         /*
 1132          * no underlimit class found.  if cutoff is taking effect, increase
 1133          * cutoff and try again.
 1134          */
 1135         if (first != NULL && ifd->cutoff_ < ifd->root_->depth_) {
 1136                 ifd->cutoff_++;
 1137                 goto _again;
 1138         }
 1139 #endif /* ADJUST_CUTOFF */
 1140         /*
 1141          * If LINK_EFFICIENCY is turned on, then the first overlimit
 1142          * class we encounter will send a packet if all the classes
 1143          * of the link-sharing structure are overlimit.
 1144          */
 1145         reset_cutoff(ifd);
 1146         if (!ifd->efficient_ || first == NULL)
 1147                 return (NULL);
 1148 
 1149         cl = first;
 1150         cpri = cl->pri_;
 1151 #if 0   /* too time-consuming for nothing */
 1152         if (cl->sleeping_)
 1153                 CALLOUT_STOP(&cl->callout_);
 1154         cl->sleeping_ = 0;
 1155         cl->undertime_.tv_sec = 0;
 1156 #endif
 1157         ifd->borrowed_[ifd->qi_] = cl->borrow_;
 1158         ifd->cutoff_ = cl->borrow_->depth_;
 1159 
 1160         /*
 1161          * Deque the packet and do the book keeping...
 1162          */
 1163  _prr_out:
 1164         if (op == ALTDQ_REMOVE) {
 1165                 m = _rmc_getq(cl);
 1166                 if (m == NULL)
 1167                         panic("_rmc_prr_dequeue_next");
 1168                 if (qempty(cl->q_))
 1169                         ifd->na_[cpri]--;
 1170 
 1171                 ifd->active_[cpri] = cl->peer_;
 1172 
 1173                 ifd->class_[ifd->qi_] = cl;
 1174                 ifd->curlen_[ifd->qi_] = m_pktlen(m);
 1175                 ifd->now_[ifd->qi_] = now;
 1176                 ifd->qi_ = (ifd->qi_ + 1) % ifd->maxqueued_;
 1177                 ifd->queued_++;
 1178         } else {
 1179                 /* mode == ALTDQ_POLL */
 1180                 m = _rmc_pollq(cl);
 1181                 ifd->pollcache_ = cl;
 1182         }
 1183         return (m);
 1184 }
 1185 
 1186 /*
 1187  * mbuf_t *
 1188  * rmc_dequeue_next(struct rm_ifdat *ifd, struct timeval *now) - this function
 1189  *      is invoked by the packet driver to get the next packet to be
 1190  *      dequeued and output on the link.  If WRR is enabled, then the
 1191  *      WRR dequeue next routine will determine the next packet to sent.
 1192  *      Otherwise, packet-by-packet round robin is invoked.
 1193  *
 1194  *      Returns:        NULL, if a packet is not available or if all
 1195  *                      classes are overlimit.
 1196  *
 1197  *                      Otherwise, Pointer to the next packet.
 1198  */
 1199 
 1200 mbuf_t *
 1201 rmc_dequeue_next(struct rm_ifdat *ifd, int mode)
 1202 {
 1203         if (ifd->queued_ >= ifd->maxqueued_)
 1204                 return (NULL);
 1205         else if (ifd->wrr_)
 1206                 return (_rmc_wrr_dequeue_next(ifd, mode));
 1207         else
 1208                 return (_rmc_prr_dequeue_next(ifd, mode));
 1209 }
 1210 
 1211 /*
 1212  * Update the utilization estimate for the packet that just completed.
 1213  * The packet's class & the parent(s) of that class all get their
 1214  * estimators updated.  This routine is called by the driver's output-
 1215  * packet-completion interrupt service routine.
 1216  */
 1217 
 1218 /*
 1219  * a macro to approximate "divide by 1000" that gives 0.000999,
 1220  * if a value has enough effective digits.
 1221  * (on pentium, mul takes 9 cycles but div takes 46!)
 1222  */
 1223 #define NSEC_TO_USEC(t) (((t) >> 10) + ((t) >> 16) + ((t) >> 17))
 1224 void
 1225 rmc_update_class_util(struct rm_ifdat *ifd)
 1226 {
 1227         int              idle, avgidle, pktlen;
 1228         int              pkt_time, tidle;
 1229         rm_class_t      *cl, *borrowed;
 1230         rm_class_t      *borrows;
 1231         struct timeval  *nowp;
 1232 
 1233         /*
 1234          * Get the most recent completed class.
 1235          */
 1236         if ((cl = ifd->class_[ifd->qo_]) == NULL)
 1237                 return;
 1238 
 1239         pktlen = ifd->curlen_[ifd->qo_];
 1240         borrowed = ifd->borrowed_[ifd->qo_];
 1241         borrows = borrowed;
 1242 
 1243         PKTCNTR_ADD(&cl->stats_.xmit_cnt, pktlen);
 1244 
 1245         /*
 1246          * Run estimator on class and its ancestors.
 1247          */
 1248         /*
 1249          * rm_update_class_util is designed to be called when the
 1250          * transfer is completed from a xmit complete interrupt,
 1251          * but most drivers don't implement an upcall for that.
 1252          * so, just use estimated completion time.
 1253          * as a result, ifd->qi_ and ifd->qo_ are always synced.
 1254          */
 1255         nowp = &ifd->now_[ifd->qo_];
 1256         /* get pkt_time (for link) in usec */
 1257 #if 1  /* use approximation */
 1258         pkt_time = ifd->curlen_[ifd->qo_] * ifd->ns_per_byte_;
 1259         pkt_time = NSEC_TO_USEC(pkt_time);
 1260 #else
 1261         pkt_time = ifd->curlen_[ifd->qo_] * ifd->ns_per_byte_ / 1000;
 1262 #endif
 1263 #if 1 /* ALTQ4PPP */
 1264         if (TV_LT(nowp, &ifd->ifnow_)) {
 1265                 int iftime;
 1266 
 1267                 /*
 1268                  * make sure the estimated completion time does not go
 1269                  * too far.  it can happen when the link layer supports
 1270                  * data compression or the interface speed is set to
 1271                  * a much lower value.
 1272                  */
 1273                 TV_DELTA(&ifd->ifnow_, nowp, iftime);
 1274                 if (iftime+pkt_time < ifd->maxiftime_) {
 1275                         TV_ADD_DELTA(&ifd->ifnow_, pkt_time, &ifd->ifnow_);
 1276                 } else {
 1277                         TV_ADD_DELTA(nowp, ifd->maxiftime_, &ifd->ifnow_);
 1278                 }
 1279         } else {
 1280                 TV_ADD_DELTA(nowp, pkt_time, &ifd->ifnow_);
 1281         }
 1282 #else
 1283         if (TV_LT(nowp, &ifd->ifnow_)) {
 1284                 TV_ADD_DELTA(&ifd->ifnow_, pkt_time, &ifd->ifnow_);
 1285         } else {
 1286                 TV_ADD_DELTA(nowp, pkt_time, &ifd->ifnow_);
 1287         }
 1288 #endif
 1289 
 1290         while (cl != NULL) {
 1291                 TV_DELTA(&ifd->ifnow_, &cl->last_, idle);
 1292                 if (idle >= 2000000)
 1293                         /*
 1294                          * this class is idle enough, reset avgidle.
 1295                          * (TV_DELTA returns 2000000 us when delta is large.)
 1296                          */
 1297                         cl->avgidle_ = cl->maxidle_;
 1298 
 1299                 /* get pkt_time (for class) in usec */
 1300 #if 1  /* use approximation */
 1301                 pkt_time = pktlen * cl->ns_per_byte_;
 1302                 pkt_time = NSEC_TO_USEC(pkt_time);
 1303 #else
 1304                 pkt_time = pktlen * cl->ns_per_byte_ / 1000;
 1305 #endif
 1306                 idle -= pkt_time;
 1307 
 1308                 avgidle = cl->avgidle_;
 1309                 avgidle += idle - (avgidle >> RM_FILTER_GAIN);
 1310                 cl->avgidle_ = avgidle;
 1311 
 1312                 /* Are we overlimit ? */
 1313                 if (avgidle <= 0) {
 1314                         CBQTRACE(rmc_update_class_util, 'milo', cl->stats_.handle);
 1315 #if 1 /* ALTQ */
 1316                         /*
 1317                          * need some lower bound for avgidle, otherwise
 1318                          * a borrowing class gets unbounded penalty.
 1319                          */
 1320                         if (avgidle < cl->minidle_)
 1321                                 avgidle = cl->avgidle_ = cl->minidle_;
 1322 #endif
 1323                         /* set next idle to make avgidle 0 */
 1324                         tidle = pkt_time +
 1325                                 (((1 - RM_POWER) * avgidle) >> RM_FILTER_GAIN);
 1326                         TV_ADD_DELTA(nowp, tidle, &cl->undertime_);
 1327                         ++cl->stats_.over;
 1328                 } else {
 1329                         cl->avgidle_ =
 1330                             (avgidle > cl->maxidle_) ? cl->maxidle_ : avgidle;
 1331                         cl->undertime_.tv_sec = 0;
 1332                         if (cl->sleeping_) {
 1333                                 CALLOUT_STOP(&cl->callout_);
 1334                                 cl->sleeping_ = 0;
 1335                         }
 1336                 }
 1337 
 1338                 if (borrows != NULL) {
 1339                         if (borrows != cl)
 1340                                 ++cl->stats_.borrows;
 1341                         else
 1342                                 borrows = NULL;
 1343                 }
 1344                 cl->last_ = ifd->ifnow_;
 1345                 cl->last_pkttime_ = pkt_time;
 1346 
 1347 #if 1
 1348                 if (cl->parent_ == NULL) {
 1349                         /* take stats of root class */
 1350                         PKTCNTR_ADD(&cl->stats_.xmit_cnt, pktlen);
 1351                 }
 1352 #endif
 1353 
 1354                 cl = cl->parent_;
 1355         }
 1356 
 1357         /*
 1358          * Check to see if cutoff needs to set to a new level.
 1359          */
 1360         cl = ifd->class_[ifd->qo_];
 1361         if (borrowed && (ifd->cutoff_ >= borrowed->depth_)) {
 1362 #if 1 /* ALTQ */
 1363                 if ((qlen(cl->q_) <= 0) || TV_LT(nowp, &borrowed->undertime_)) {
 1364                         rmc_tl_satisfied(ifd, nowp);
 1365                         CBQTRACE(rmc_update_class_util, 'broe', ifd->cutoff_);
 1366                 } else {
 1367                         ifd->cutoff_ = borrowed->depth_;
 1368                         CBQTRACE(rmc_update_class_util, 'ffob', borrowed->depth_);
 1369                 }
 1370 #else /* !ALTQ */
 1371                 if ((qlen(cl->q_) <= 1) || TV_LT(&now, &borrowed->undertime_)) {
 1372                         reset_cutoff(ifd);
 1373 #ifdef notdef
 1374                         rmc_tl_satisfied(ifd, &now);
 1375 #endif
 1376                         CBQTRACE(rmc_update_class_util, 'broe', ifd->cutoff_);
 1377                 } else {
 1378                         ifd->cutoff_ = borrowed->depth_;
 1379                         CBQTRACE(rmc_update_class_util, 'ffob', borrowed->depth_);
 1380                 }
 1381 #endif /* !ALTQ */
 1382         }
 1383 
 1384         /*
 1385          * Release class slot
 1386          */
 1387         ifd->borrowed_[ifd->qo_] = NULL;
 1388         ifd->class_[ifd->qo_] = NULL;
 1389         ifd->qo_ = (ifd->qo_ + 1) % ifd->maxqueued_;
 1390         ifd->queued_--;
 1391 }
 1392 
 1393 /*
 1394  * void
 1395  * rmc_drop_action(struct rm_class *cl) - Generic (not protocol-specific)
 1396  *      over-limit action routines.  These get invoked by rmc_under_limit()
 1397  *      if a class with packets to send if over its bandwidth limit & can't
 1398  *      borrow from a parent class.
 1399  *
 1400  *      Returns: NONE
 1401  */
 1402 
 1403 static void
 1404 rmc_drop_action(struct rm_class *cl)
 1405 {
 1406         struct rm_ifdat *ifd = cl->ifdat_;
 1407 
 1408         ASSERT(qlen(cl->q_) > 0);
 1409         _rmc_dropq(cl);
 1410         if (qempty(cl->q_))
 1411                 ifd->na_[cl->pri_]--;
 1412 }
 1413 
 1414 void rmc_dropall(struct rm_class *cl)
 1415 {
 1416         struct rm_ifdat *ifd = cl->ifdat_;
 1417 
 1418         if (!qempty(cl->q_)) {
 1419                 _flushq(cl->q_);
 1420 
 1421                 ifd->na_[cl->pri_]--;
 1422         }
 1423 }
 1424 
 1425 /*
 1426  * void
 1427  * rmc_delay_action(struct rm_class *cl) - This function is the generic CBQ
 1428  *      delay action routine.  It is invoked via rmc_under_limit when the
 1429  *      packet is discovered to be overlimit.
 1430  *
 1431  *      If the delay action is result of borrow class being overlimit, then
 1432  *      delay for the offtime of the borrowing class that is overlimit.
 1433  *
 1434  *      Returns: NONE
 1435  */
 1436 
 1437 void
 1438 rmc_delay_action(struct rm_class *cl, struct rm_class *borrow)
 1439 {
 1440         int     delay, t, extradelay;
 1441 
 1442         cl->stats_.overactions++;
 1443         TV_DELTA(&cl->undertime_, &cl->overtime_, delay);
 1444 #ifndef BORROW_OFFTIME
 1445         delay += cl->offtime_;
 1446 #endif
 1447 
 1448         if (!cl->sleeping_) {
 1449                 CBQTRACE(rmc_delay_action, 'yled', cl->stats_.handle);
 1450 #ifdef BORROW_OFFTIME
 1451                 if (borrow != NULL)
 1452                         extradelay = borrow->offtime_;
 1453                 else
 1454 #endif
 1455                         extradelay = cl->offtime_;
 1456 
 1457 #ifdef ALTQ
 1458                 /*
 1459                  * XXX recalculate suspend time:
 1460                  * current undertime is (tidle + pkt_time) calculated
 1461                  * from the last transmission.
 1462                  *      tidle: time required to bring avgidle back to 0
 1463                  *      pkt_time: target waiting time for this class
 1464                  * we need to replace pkt_time by offtime
 1465                  */
 1466                 extradelay -= cl->last_pkttime_;
 1467 #endif
 1468                 if (extradelay > 0) {
 1469                         TV_ADD_DELTA(&cl->undertime_, extradelay, &cl->undertime_);
 1470                         delay += extradelay;
 1471                 }
 1472 
 1473                 cl->sleeping_ = 1;
 1474                 cl->stats_.delays++;
 1475 
 1476                 /*
 1477                  * Since packets are phased randomly with respect to the
 1478                  * clock, 1 tick (the next clock tick) can be an arbitrarily
 1479                  * short time so we have to wait for at least two ticks.
 1480                  * NOTE:  If there's no other traffic, we need the timer as
 1481                  * a 'backstop' to restart this class.
 1482                  */
 1483                 if (delay > tick * 2) {
 1484 #ifdef __FreeBSD__
 1485                         /* FreeBSD rounds up the tick */
 1486                         t = hzto(&cl->undertime_);
 1487 #else
 1488                         /* other BSDs round down the tick */
 1489                         t = hzto(&cl->undertime_) + 1;
 1490 #endif
 1491                 } else
 1492                         t = 2;
 1493                 CALLOUT_RESET(&cl->callout_, t,
 1494                               (timeout_t *)rmc_restart, (caddr_t)cl);
 1495         }
 1496 }
 1497 
 1498 /*
 1499  * void
 1500  * rmc_restart() - is just a helper routine for rmc_delay_action -- it is
 1501  *      called by the system timer code & is responsible checking if the
 1502  *      class is still sleeping (it might have been restarted as a side
 1503  *      effect of the queue scan on a packet arrival) and, if so, restarting
 1504  *      output for the class.  Inspecting the class state & restarting output
 1505  *      require locking the class structure.  In general the driver is
 1506  *      responsible for locking but this is the only routine that is not
 1507  *      called directly or indirectly from the interface driver so it has
 1508  *      know about system locking conventions.  Under bsd, locking is done
 1509  *      by raising IPL to splnet so that's what's implemented here.  On a
 1510  *      different system this would probably need to be changed.
 1511  *
 1512  *      Returns:        NONE
 1513  */
 1514 
 1515 static void
 1516 rmc_restart(struct rm_class *cl)
 1517 {
 1518         struct rm_ifdat *ifd = cl->ifdat_;
 1519         int              s;
 1520 
 1521         s = splnet();
 1522         if (cl->sleeping_) {
 1523                 cl->sleeping_ = 0;
 1524                 cl->undertime_.tv_sec = 0;
 1525 
 1526                 if (ifd->queued_ < ifd->maxqueued_ && ifd->restart != NULL) {
 1527                         CBQTRACE(rmc_restart, 'trts', cl->stats_.handle);
 1528                         (ifd->restart)(ifd->ifq_);
 1529                 }
 1530         }
 1531         splx(s);
 1532 }
 1533 
 1534 /*
 1535  * void
 1536  * rmc_root_overlimit(struct rm_class *cl) - This the generic overlimit
 1537  *      handling routine for the root class of the link sharing structure.
 1538  *
 1539  *      Returns: NONE
 1540  */
 1541 
 1542 static void
 1543 rmc_root_overlimit(struct rm_class *cl, struct rm_class *borrow)
 1544 {
 1545     panic("rmc_root_overlimit");
 1546 }
 1547 
 1548 /*
 1549  * Packet Queue handling routines.  Eventually, this is to localize the
 1550  *      effects on the code whether queues are red queues or droptail
 1551  *      queues.
 1552  */
 1553 
 1554 static int
 1555 _rmc_addq(rm_class_t *cl, mbuf_t *m)
 1556 {
 1557 #ifdef ALTQ_RIO
 1558         if (q_is_rio(cl->q_))
 1559                 return rio_addq((rio_t *)cl->red_, cl->q_, m, cl->pktattr_);
 1560 #endif
 1561 #ifdef ALTQ_RED
 1562         if (q_is_red(cl->q_))
 1563                 return red_addq(cl->red_, cl->q_, m, cl->pktattr_);
 1564 #endif /* ALTQ_RED */
 1565 
 1566         if (cl->flags_ & RMCF_CLEARDSCP)
 1567                 write_dsfield(m, cl->pktattr_, 0);
 1568 
 1569         _addq(cl->q_, m);
 1570         return (0);
 1571 }
 1572 
 1573 /* note: _rmc_dropq is not called for red */
 1574 static void
 1575 _rmc_dropq(rm_class_t *cl)
 1576 {
 1577         mbuf_t  *m;
 1578 
 1579         if ((m = _getq(cl->q_)) != NULL)
 1580                 m_freem(m);
 1581 }
 1582 
 1583 static mbuf_t *
 1584 _rmc_getq(rm_class_t *cl)
 1585 {
 1586 #ifdef ALTQ_RIO
 1587         if (q_is_rio(cl->q_))
 1588                 return rio_getq((rio_t *)cl->red_, cl->q_);
 1589 #endif
 1590 #ifdef ALTQ_RED
 1591         if (q_is_red(cl->q_))
 1592                 return red_getq(cl->red_, cl->q_);
 1593 #endif
 1594         return _getq(cl->q_);
 1595 }
 1596 
 1597 static mbuf_t *
 1598 _rmc_pollq(rm_class_t *cl)
 1599 {
 1600         return qhead(cl->q_);
 1601 }
 1602 
 1603 #ifdef CBQ_TRACE
 1604 
 1605 struct cbqtrace          cbqtrace_buffer[NCBQTRACE+1];
 1606 struct cbqtrace         *cbqtrace_ptr = NULL;
 1607 int                      cbqtrace_count;
 1608 
 1609 /*
 1610  * DDB hook to trace cbq events:
 1611  *  the last 1024 events are held in a circular buffer.
 1612  *  use "call cbqtrace_dump(N)" to display 20 events from Nth event.
 1613  */
 1614 void cbqtrace_dump(int);
 1615 static char *rmc_funcname(void *);
 1616 
 1617 static struct rmc_funcs {
 1618         void    *func;
 1619         char    *name;
 1620 } rmc_funcs[] =
 1621 {
 1622         rmc_init,               "rmc_init",
 1623         rmc_queue_packet,       "rmc_queue_packet",
 1624         rmc_under_limit,        "rmc_under_limit",
 1625         rmc_update_class_util,  "rmc_update_class_util",
 1626         rmc_delay_action,       "rmc_delay_action",
 1627         rmc_restart,            "rmc_restart",
 1628         _rmc_wrr_dequeue_next,  "_rmc_wrr_dequeue_next",
 1629         NULL,                   NULL
 1630 };
 1631 
 1632 static char *rmc_funcname(void *func)
 1633 {
 1634         struct rmc_funcs *fp;
 1635 
 1636         for (fp = rmc_funcs; fp->func != NULL; fp++)
 1637                 if (fp->func == func)
 1638                         return (fp->name);
 1639         return ("unknown");
 1640 }
 1641 
 1642 void cbqtrace_dump(int counter)
 1643 {
 1644         int      i, *p;
 1645         char    *cp;
 1646 
 1647         counter = counter % NCBQTRACE;
 1648         p = (int *)&cbqtrace_buffer[counter];
 1649 
 1650         for (i=0; i<20; i++) {
 1651                 printf("[0x%x] ", *p++);
 1652                 printf("%s: ", rmc_funcname((void *)*p++));
 1653                 cp = (char *)p++;
 1654                 printf("%c%c%c%c: ", cp[0], cp[1], cp[2], cp[3]);
 1655                 printf("%d\n",*p++);
 1656 
 1657                 if (p >= (int *)&cbqtrace_buffer[NCBQTRACE])
 1658                         p = (int *)cbqtrace_buffer;
 1659         }
 1660 }
 1661 #endif /* CBQ_TRACE */
 1662 
 1663 #if defined(ALTQ_CBQ) || defined(ALTQ_RED) || defined(ALTQ_RIO) || defined(ALTQ_HFSC) || defined(ALTQ_PRIQ)
 1664 #if !defined(__GNUC__) || defined(ALTQ_DEBUG)
 1665 
 1666 void
 1667 _addq(class_queue_t *q, mbuf_t *m)
 1668 {
 1669         mbuf_t  *m0;
 1670 
 1671         if ((m0 = qtail(q)) != NULL)
 1672                 m->m_nextpkt = m0->m_nextpkt;
 1673         else
 1674                 m0 = m;
 1675         m0->m_nextpkt = m;
 1676         qtail(q) = m;
 1677         qlen(q)++;
 1678 }
 1679 
 1680 mbuf_t *
 1681 _getq(class_queue_t *q)
 1682 {
 1683         mbuf_t  *m, *m0;
 1684 
 1685         if ((m = qtail(q)) == NULL)
 1686                 return (NULL);
 1687         if ((m0 = m->m_nextpkt) != m)
 1688                 m->m_nextpkt = m0->m_nextpkt;
 1689         else {
 1690                 ASSERT(qlen(q) == 1);
 1691                 qtail(q) = NULL;
 1692         }
 1693         qlen(q)--;
 1694         m0->m_nextpkt = NULL;
 1695         return (m0);
 1696 }
 1697 
 1698 /* drop a packet at the tail of the queue */
 1699 mbuf_t *
 1700 _getq_tail(class_queue_t *q)
 1701 {
 1702         mbuf_t  *m, *m0, *prev;
 1703 
 1704         if ((m = m0 = qtail(q)) == NULL)
 1705                 return NULL;
 1706         do {
 1707                 prev = m0;
 1708                 m0 = m0->m_nextpkt;
 1709         } while (m0 != m);
 1710         prev->m_nextpkt = m->m_nextpkt;
 1711         if (prev == m)  {
 1712                 ASSERT(qlen(q) == 1);
 1713                 qtail(q) = NULL;
 1714         } else
 1715                 qtail(q) = prev;
 1716         qlen(q)--;
 1717         m->m_nextpkt = NULL;
 1718         return (m);
 1719 }
 1720 
 1721 /* randomly select a packet in the queue */
 1722 mbuf_t *
 1723 _getq_random(class_queue_t *q)
 1724 {
 1725         struct mbuf     *m;
 1726         int              i, n;
 1727 
 1728         if ((m = qtail(q)) == NULL)
 1729                 return NULL;
 1730         if (m->m_nextpkt == m) {
 1731                 ASSERT(qlen(q) == 1);
 1732                 qtail(q) = NULL;
 1733         } else {
 1734                 struct mbuf *prev = NULL;
 1735 
 1736                 n = random() % qlen(q) + 1;
 1737                 for (i = 0; i < n; i++) {
 1738                         prev = m;
 1739                         m = m->m_nextpkt;
 1740                 }
 1741                 prev->m_nextpkt = m->m_nextpkt;
 1742                 if (m == qtail(q))
 1743                         qtail(q) = prev;
 1744         }
 1745         qlen(q)--;
 1746         m->m_nextpkt = NULL;
 1747         return (m);
 1748 }
 1749 
 1750 void
 1751 _removeq(class_queue_t *q, mbuf_t *m)
 1752 {
 1753         mbuf_t  *m0, *prev;
 1754 
 1755         m0 = qtail(q);
 1756         do {
 1757                 prev = m0;
 1758                 m0 = m0->m_nextpkt;
 1759         } while (m0 != m);
 1760         prev->m_nextpkt = m->m_nextpkt;
 1761         if (prev == m)
 1762                 qtail(q) = NULL;
 1763         else if (qtail(q) == m)
 1764                 qtail(q) = prev;
 1765         qlen(q)--;
 1766 }
 1767 
 1768 void
 1769 _flushq(class_queue_t *q)
 1770 {
 1771         mbuf_t *m;
 1772 
 1773         while ((m = _getq(q)) != NULL)
 1774                 m_freem(m);
 1775         ASSERT(qlen(q) == 0);
 1776 }
 1777 
 1778 #endif /* !__GNUC__ || ALTQ_DEBUG */
 1779 #endif /* ALTQ_CBQ || ALTQ_RED || ALTQ_RIO || ALTQ_HFSC || ALTQ_PRIQ */

/* [<][>][^][v][top][bottom][index][help] */