1 /* $OpenBSD: altq_rmclass.c,v 1.12 2006/03/04 22:40:15 brad Exp $ */
2 /* $KAME: altq_rmclass.c,v 1.10 2001/02/09 07:20:40 kjc Exp $ */
3
4 /*
5 * Copyright (c) 1991-1997 Regents of the University of California.
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 * must display the following acknowledgement:
18 * This product includes software developed by the Network Research
19 * Group at Lawrence Berkeley Laboratory.
20 * 4. Neither the name of the University nor of the Laboratory may be used
21 * to endorse or promote products derived from this software without
22 * specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 * LBL code modified by speer@eng.sun.com, May 1977.
37 * For questions and/or comments, please send mail to cbq@ee.lbl.gov
38 */
39
40 #include <sys/param.h>
41 #include <sys/malloc.h>
42 #include <sys/mbuf.h>
43 #include <sys/socket.h>
44 #include <sys/systm.h>
45 #include <sys/errno.h>
46 #include <sys/time.h>
47
48 #include <net/if.h>
49
50 #include <altq/altq.h>
51 #include <altq/altq_rmclass.h>
52 #include <altq/altq_rmclass_debug.h>
53 #include <altq/altq_red.h>
54 #include <altq/altq_rio.h>
55
56 /*
57 * Local Macros
58 */
59
60 #define reset_cutoff(ifd) { ifd->cutoff_ = RM_MAXDEPTH; }
61
62 /*
63 * Local routines.
64 */
65
66 static int rmc_satisfied(struct rm_class *, struct timeval *);
67 static void rmc_wrr_set_weights(struct rm_ifdat *);
68 static void rmc_depth_compute(struct rm_class *);
69 static void rmc_depth_recompute(rm_class_t *);
70
71 static mbuf_t *_rmc_wrr_dequeue_next(struct rm_ifdat *, int);
72 static mbuf_t *_rmc_prr_dequeue_next(struct rm_ifdat *, int);
73
74 static int _rmc_addq(rm_class_t *, mbuf_t *);
75 static void _rmc_dropq(rm_class_t *);
76 static mbuf_t *_rmc_getq(rm_class_t *);
77 static mbuf_t *_rmc_pollq(rm_class_t *);
78
79 static int rmc_under_limit(struct rm_class *, struct timeval *);
80 static void rmc_tl_satisfied(struct rm_ifdat *, struct timeval *);
81 static void rmc_drop_action(struct rm_class *);
82 static void rmc_restart(struct rm_class *);
83 static void rmc_root_overlimit(struct rm_class *, struct rm_class *);
84
85 #define BORROW_OFFTIME
86 /*
87 * BORROW_OFFTIME (experimental):
88 * borrow the offtime of the class borrowing from.
89 * the reason is that when its own offtime is set, the class is unable
90 * to borrow much, especially when cutoff is taking effect.
91 * but when the borrowed class is overloaded (advidle is close to minidle),
92 * use the borrowing class's offtime to avoid overload.
93 */
94 #define ADJUST_CUTOFF
95 /*
96 * ADJUST_CUTOFF (experimental):
97 * if no underlimit class is found due to cutoff, increase cutoff and
98 * retry the scheduling loop.
99 * also, don't invoke delay_actions while cutoff is taking effect,
100 * since a sleeping class won't have a chance to be scheduled in the
101 * next loop.
102 *
103 * now heuristics for setting the top-level variable (cutoff_) becomes:
104 * 1. if a packet arrives for a not-overlimit class, set cutoff
105 * to the depth of the class.
106 * 2. if cutoff is i, and a packet arrives for an overlimit class
107 * with an underlimit ancestor at a lower level than i (say j),
108 * then set cutoff to j.
109 * 3. at scheduling a packet, if there is no underlimit class
110 * due to the current cutoff level, increase cutoff by 1 and
111 * then try to schedule again.
112 */
113
114 /*
115 * rm_class_t *
116 * rmc_newclass(...) - Create a new resource management class at priority
117 * 'pri' on the interface given by 'ifd'.
118 *
119 * nsecPerByte is the data rate of the interface in nanoseconds/byte.
120 * E.g., 800 for a 10Mb/s ethernet. If the class gets less
121 * than 100% of the bandwidth, this number should be the
122 * 'effective' rate for the class. Let f be the
123 * bandwidth fraction allocated to this class, and let
124 * nsPerByte be the data rate of the output link in
125 * nanoseconds/byte. Then nsecPerByte is set to
126 * nsPerByte / f. E.g., 1600 (= 800 / .5)
127 * for a class that gets 50% of an ethernet's bandwidth.
128 *
129 * action the routine to call when the class is over limit.
130 *
131 * maxq max allowable queue size for class (in packets).
132 *
133 * parent parent class pointer.
134 *
135 * borrow class to borrow from (should be either 'parent' or null).
136 *
137 * maxidle max value allowed for class 'idle' time estimate (this
138 * parameter determines how large an initial burst of packets
139 * can be before overlimit action is invoked.
140 *
141 * offtime how long 'delay' action will delay when class goes over
142 * limit (this parameter determines the steady-state burst
143 * size when a class is running over its limit).
144 *
145 * Maxidle and offtime have to be computed from the following: If the
146 * average packet size is s, the bandwidth fraction allocated to this
147 * class is f, we want to allow b packet bursts, and the gain of the
148 * averaging filter is g (= 1 - 2^(-RM_FILTER_GAIN)), then:
149 *
150 * ptime = s * nsPerByte * (1 - f) / f
151 * maxidle = ptime * (1 - g^b) / g^b
152 * minidle = -ptime * (1 / (f - 1))
153 * offtime = ptime * (1 + 1/(1 - g) * (1 - g^(b - 1)) / g^(b - 1)
154 *
155 * Operationally, it's convenient to specify maxidle & offtime in units
156 * independent of the link bandwidth so the maxidle & offtime passed to
157 * this routine are the above values multiplied by 8*f/(1000*nsPerByte).
158 * (The constant factor is a scale factor needed to make the parameters
159 * integers. This scaling also means that the 'unscaled' values of
160 * maxidle*nsecPerByte/8 and offtime*nsecPerByte/8 will be in microseconds,
161 * not nanoseconds.) Also note that the 'idle' filter computation keeps
162 * an estimate scaled upward by 2^RM_FILTER_GAIN so the passed value of
163 * maxidle also must be scaled upward by this value. Thus, the passed
164 * values for maxidle and offtime can be computed as follows:
165 *
166 * maxidle = maxidle * 2^RM_FILTER_GAIN * 8 / (1000 * nsecPerByte)
167 * offtime = offtime * 8 / (1000 * nsecPerByte)
168 *
169 * When USE_HRTIME is employed, then maxidle and offtime become:
170 * maxidle = maxilde * (8.0 / nsecPerByte);
171 * offtime = offtime * (8.0 / nsecPerByte);
172 */
173
174 struct rm_class *
175 rmc_newclass(int pri, struct rm_ifdat *ifd, u_int nsecPerByte,
176 void (*action)(rm_class_t *, rm_class_t *), int maxq,
177 struct rm_class *parent, struct rm_class *borrow, u_int maxidle,
178 int minidle, u_int offtime, int pktsize, int flags)
179 {
180 struct rm_class *cl;
181 struct rm_class *peer;
182 int s;
183
184 if (pri >= RM_MAXPRIO)
185 return (NULL);
186 #ifndef ALTQ_RED
187 if (flags & RMCF_RED) {
188 #ifdef ALTQ_DEBUG
189 printf("rmc_newclass: RED not configured for CBQ!\n");
190 #endif
191 return (NULL);
192 }
193 #endif
194 #ifndef ALTQ_RIO
195 if (flags & RMCF_RIO) {
196 #ifdef ALTQ_DEBUG
197 printf("rmc_newclass: RIO not configured for CBQ!\n");
198 #endif
199 return (NULL);
200 }
201 #endif
202
203 MALLOC(cl, struct rm_class *, sizeof(struct rm_class),
204 M_DEVBUF, M_WAITOK);
205 if (cl == NULL)
206 return (NULL);
207 bzero(cl, sizeof(struct rm_class));
208 CALLOUT_INIT(&cl->callout_);
209 MALLOC(cl->q_, class_queue_t *, sizeof(class_queue_t),
210 M_DEVBUF, M_WAITOK);
211 if (cl->q_ == NULL) {
212 FREE(cl, M_DEVBUF);
213 return (NULL);
214 }
215 bzero(cl->q_, sizeof(class_queue_t));
216
217 /*
218 * Class initialization.
219 */
220 cl->children_ = NULL;
221 cl->parent_ = parent;
222 cl->borrow_ = borrow;
223 cl->leaf_ = 1;
224 cl->ifdat_ = ifd;
225 cl->pri_ = pri;
226 cl->allotment_ = RM_NS_PER_SEC / nsecPerByte; /* Bytes per sec */
227 cl->depth_ = 0;
228 cl->qthresh_ = 0;
229 cl->ns_per_byte_ = nsecPerByte;
230
231 qlimit(cl->q_) = maxq;
232 qtype(cl->q_) = Q_DROPHEAD;
233 qlen(cl->q_) = 0;
234 cl->flags_ = flags;
235
236 #if 1 /* minidle is also scaled in ALTQ */
237 cl->minidle_ = (minidle * (int)nsecPerByte) / 8;
238 if (cl->minidle_ > 0)
239 cl->minidle_ = 0;
240 #else
241 cl->minidle_ = minidle;
242 #endif
243 cl->maxidle_ = (maxidle * nsecPerByte) / 8;
244 if (cl->maxidle_ == 0)
245 cl->maxidle_ = 1;
246 #if 1 /* offtime is also scaled in ALTQ */
247 cl->avgidle_ = cl->maxidle_;
248 cl->offtime_ = ((offtime * nsecPerByte) / 8) >> RM_FILTER_GAIN;
249 if (cl->offtime_ == 0)
250 cl->offtime_ = 1;
251 #else
252 cl->avgidle_ = 0;
253 cl->offtime_ = (offtime * nsecPerByte) / 8;
254 #endif
255 cl->overlimit = action;
256
257 #ifdef ALTQ_RED
258 if (flags & (RMCF_RED|RMCF_RIO)) {
259 int red_flags, red_pkttime;
260
261 red_flags = 0;
262 if (flags & RMCF_ECN)
263 red_flags |= REDF_ECN;
264 if (flags & RMCF_FLOWVALVE)
265 red_flags |= REDF_FLOWVALVE;
266 #ifdef ALTQ_RIO
267 if (flags & RMCF_CLEARDSCP)
268 red_flags |= RIOF_CLEARDSCP;
269 #endif
270 red_pkttime = nsecPerByte * pktsize / 1000;
271
272 if (flags & RMCF_RED) {
273 cl->red_ = red_alloc(0, 0,
274 qlimit(cl->q_) * 10/100,
275 qlimit(cl->q_) * 30/100,
276 red_flags, red_pkttime);
277 if (cl->red_ != NULL)
278 qtype(cl->q_) = Q_RED;
279 }
280 #ifdef ALTQ_RIO
281 else {
282 cl->red_ = (red_t *)rio_alloc(0, NULL,
283 red_flags, red_pkttime);
284 if (cl->red_ != NULL)
285 qtype(cl->q_) = Q_RIO;
286 }
287 #endif
288 }
289 #endif /* ALTQ_RED */
290
291 /*
292 * put the class into the class tree
293 */
294 s = splnet();
295 if ((peer = ifd->active_[pri]) != NULL) {
296 /* find the last class at this pri */
297 cl->peer_ = peer;
298 while (peer->peer_ != ifd->active_[pri])
299 peer = peer->peer_;
300 peer->peer_ = cl;
301 } else {
302 ifd->active_[pri] = cl;
303 cl->peer_ = cl;
304 }
305
306 if (cl->parent_) {
307 cl->next_ = parent->children_;
308 parent->children_ = cl;
309 parent->leaf_ = 0;
310 }
311
312 /*
313 * Compute the depth of this class and its ancestors in the class
314 * hierarchy.
315 */
316 rmc_depth_compute(cl);
317
318 /*
319 * If CBQ's WRR is enabled, then initialize the class WRR state.
320 */
321 if (ifd->wrr_) {
322 ifd->num_[pri]++;
323 ifd->alloc_[pri] += cl->allotment_;
324 rmc_wrr_set_weights(ifd);
325 }
326 splx(s);
327 return (cl);
328 }
329
330 int
331 rmc_modclass(struct rm_class *cl, u_int nsecPerByte, int maxq, u_int maxidle,
332 int minidle, u_int offtime, int pktsize)
333 {
334 struct rm_ifdat *ifd;
335 u_int old_allotment;
336 int s;
337
338 ifd = cl->ifdat_;
339 old_allotment = cl->allotment_;
340
341 s = splnet();
342 cl->allotment_ = RM_NS_PER_SEC / nsecPerByte; /* Bytes per sec */
343 cl->qthresh_ = 0;
344 cl->ns_per_byte_ = nsecPerByte;
345
346 qlimit(cl->q_) = maxq;
347
348 #if 1 /* minidle is also scaled in ALTQ */
349 cl->minidle_ = (minidle * nsecPerByte) / 8;
350 if (cl->minidle_ > 0)
351 cl->minidle_ = 0;
352 #else
353 cl->minidle_ = minidle;
354 #endif
355 cl->maxidle_ = (maxidle * nsecPerByte) / 8;
356 if (cl->maxidle_ == 0)
357 cl->maxidle_ = 1;
358 #if 1 /* offtime is also scaled in ALTQ */
359 cl->avgidle_ = cl->maxidle_;
360 cl->offtime_ = ((offtime * nsecPerByte) / 8) >> RM_FILTER_GAIN;
361 if (cl->offtime_ == 0)
362 cl->offtime_ = 1;
363 #else
364 cl->avgidle_ = 0;
365 cl->offtime_ = (offtime * nsecPerByte) / 8;
366 #endif
367
368 /*
369 * If CBQ's WRR is enabled, then initialize the class WRR state.
370 */
371 if (ifd->wrr_) {
372 ifd->alloc_[cl->pri_] += cl->allotment_ - old_allotment;
373 rmc_wrr_set_weights(ifd);
374 }
375 splx(s);
376 return (0);
377 }
378
379 /*
380 * static void
381 * rmc_wrr_set_weights(struct rm_ifdat *ifdat) - This function computes
382 * the appropriate run robin weights for the CBQ weighted round robin
383 * algorithm.
384 *
385 * Returns: NONE
386 */
387
388 static void
389 rmc_wrr_set_weights(struct rm_ifdat *ifd)
390 {
391 int i;
392 struct rm_class *cl, *clh;
393
394 for (i = 0; i < RM_MAXPRIO; i++) {
395 /*
396 * This is inverted from that of the simulator to
397 * maintain precision.
398 */
399 if (ifd->num_[i] == 0)
400 ifd->M_[i] = 0;
401 else
402 ifd->M_[i] = ifd->alloc_[i] /
403 (ifd->num_[i] * ifd->maxpkt_);
404 /*
405 * Compute the weighted allotment for each class.
406 * This takes the expensive div instruction out
407 * of the main loop for the wrr scheduling path.
408 * These only get recomputed when a class comes or
409 * goes.
410 */
411 if (ifd->active_[i] != NULL) {
412 clh = cl = ifd->active_[i];
413 do {
414 /* safe-guard for slow link or alloc_ == 0 */
415 if (ifd->M_[i] == 0)
416 cl->w_allotment_ = 0;
417 else
418 cl->w_allotment_ = cl->allotment_ /
419 ifd->M_[i];
420 cl = cl->peer_;
421 } while ((cl != NULL) && (cl != clh));
422 }
423 }
424 }
425
426 int
427 rmc_get_weight(struct rm_ifdat *ifd, int pri)
428 {
429 if ((pri >= 0) && (pri < RM_MAXPRIO))
430 return (ifd->M_[pri]);
431 else
432 return (0);
433 }
434
435 /*
436 * static void
437 * rmc_depth_compute(struct rm_class *cl) - This function computes the
438 * appropriate depth of class 'cl' and its ancestors.
439 *
440 * Returns: NONE
441 */
442
443 static void
444 rmc_depth_compute(struct rm_class *cl)
445 {
446 rm_class_t *t = cl, *p;
447
448 /*
449 * Recompute the depth for the branch of the tree.
450 */
451 while (t != NULL) {
452 p = t->parent_;
453 if (p && (t->depth_ >= p->depth_)) {
454 p->depth_ = t->depth_ + 1;
455 t = p;
456 } else
457 t = NULL;
458 }
459 }
460
461 /*
462 * static void
463 * rmc_depth_recompute(struct rm_class *cl) - This function re-computes
464 * the depth of the tree after a class has been deleted.
465 *
466 * Returns: NONE
467 */
468
469 static void
470 rmc_depth_recompute(rm_class_t *cl)
471 {
472 #if 1 /* ALTQ */
473 rm_class_t *p, *t;
474
475 p = cl;
476 while (p != NULL) {
477 if ((t = p->children_) == NULL) {
478 p->depth_ = 0;
479 } else {
480 int cdepth = 0;
481
482 while (t != NULL) {
483 if (t->depth_ > cdepth)
484 cdepth = t->depth_;
485 t = t->next_;
486 }
487
488 if (p->depth_ == cdepth + 1)
489 /* no change to this parent */
490 return;
491
492 p->depth_ = cdepth + 1;
493 }
494
495 p = p->parent_;
496 }
497 #else
498 rm_class_t *t;
499
500 if (cl->depth_ >= 1) {
501 if (cl->children_ == NULL) {
502 cl->depth_ = 0;
503 } else if ((t = cl->children_) != NULL) {
504 while (t != NULL) {
505 if (t->children_ != NULL)
506 rmc_depth_recompute(t);
507 t = t->next_;
508 }
509 } else
510 rmc_depth_compute(cl);
511 }
512 #endif
513 }
514
515 /*
516 * void
517 * rmc_delete_class(struct rm_ifdat *ifdat, struct rm_class *cl) - This
518 * function deletes a class from the link-sharing structure and frees
519 * all resources associated with the class.
520 *
521 * Returns: NONE
522 */
523
524 void
525 rmc_delete_class(struct rm_ifdat *ifd, struct rm_class *cl)
526 {
527 struct rm_class *p, *head, *previous;
528 int s;
529
530 ASSERT(cl->children_ == NULL);
531
532 if (cl->sleeping_)
533 CALLOUT_STOP(&cl->callout_);
534
535 s = splnet();
536 /*
537 * Free packets in the packet queue.
538 * XXX - this may not be a desired behavior. Packets should be
539 * re-queued.
540 */
541 rmc_dropall(cl);
542
543 /*
544 * If the class has a parent, then remove the class from the
545 * class from the parent's children chain.
546 */
547 if (cl->parent_ != NULL) {
548 head = cl->parent_->children_;
549 p = previous = head;
550 if (head->next_ == NULL) {
551 ASSERT(head == cl);
552 cl->parent_->children_ = NULL;
553 cl->parent_->leaf_ = 1;
554 } else while (p != NULL) {
555 if (p == cl) {
556 if (cl == head)
557 cl->parent_->children_ = cl->next_;
558 else
559 previous->next_ = cl->next_;
560 cl->next_ = NULL;
561 p = NULL;
562 } else {
563 previous = p;
564 p = p->next_;
565 }
566 }
567 }
568
569 /*
570 * Delete class from class priority peer list.
571 */
572 if ((p = ifd->active_[cl->pri_]) != NULL) {
573 /*
574 * If there is more than one member of this priority
575 * level, then look for class(cl) in the priority level.
576 */
577 if (p != p->peer_) {
578 while (p->peer_ != cl)
579 p = p->peer_;
580 p->peer_ = cl->peer_;
581
582 if (ifd->active_[cl->pri_] == cl)
583 ifd->active_[cl->pri_] = cl->peer_;
584 } else {
585 ASSERT(p == cl);
586 ifd->active_[cl->pri_] = NULL;
587 }
588 }
589
590 /*
591 * Recompute the WRR weights.
592 */
593 if (ifd->wrr_) {
594 ifd->alloc_[cl->pri_] -= cl->allotment_;
595 ifd->num_[cl->pri_]--;
596 rmc_wrr_set_weights(ifd);
597 }
598
599 /*
600 * Re-compute the depth of the tree.
601 */
602 #if 1 /* ALTQ */
603 rmc_depth_recompute(cl->parent_);
604 #else
605 rmc_depth_recompute(ifd->root_);
606 #endif
607
608 splx(s);
609
610 /*
611 * Free the class structure.
612 */
613 if (cl->red_ != NULL) {
614 #ifdef ALTQ_RIO
615 if (q_is_rio(cl->q_))
616 rio_destroy((rio_t *)cl->red_);
617 #endif
618 #ifdef ALTQ_RED
619 if (q_is_red(cl->q_))
620 red_destroy(cl->red_);
621 #endif
622 }
623 FREE(cl->q_, M_DEVBUF);
624 FREE(cl, M_DEVBUF);
625 }
626
627
628 /*
629 * void
630 * rmc_init(...) - Initialize the resource management data structures
631 * associated with the output portion of interface 'ifp'. 'ifd' is
632 * where the structures will be built (for backwards compatibility, the
633 * structures aren't kept in the ifnet struct). 'nsecPerByte'
634 * gives the link speed (inverse of bandwidth) in nanoseconds/byte.
635 * 'restart' is the driver-specific routine that the generic 'delay
636 * until under limit' action will call to restart output. `maxq'
637 * is the queue size of the 'link' & 'default' classes. 'maxqueued'
638 * is the maximum number of packets that the resource management
639 * code will allow to be queued 'downstream' (this is typically 1).
640 *
641 * Returns: NONE
642 */
643
644 void
645 rmc_init(struct ifaltq *ifq, struct rm_ifdat *ifd, u_int nsecPerByte,
646 void (*restart)(struct ifaltq *), int maxq, int maxqueued, u_int maxidle,
647 int minidle, u_int offtime, int flags)
648 {
649 int i, mtu;
650
651 /*
652 * Initialize the CBQ tracing/debug facility.
653 */
654 CBQTRACEINIT();
655
656 bzero((char *)ifd, sizeof (*ifd));
657 mtu = ifq->altq_ifp->if_mtu;
658 ifd->ifq_ = ifq;
659 ifd->restart = restart;
660 ifd->maxqueued_ = maxqueued;
661 ifd->ns_per_byte_ = nsecPerByte;
662 ifd->maxpkt_ = mtu;
663 ifd->wrr_ = (flags & RMCF_WRR) ? 1 : 0;
664 ifd->efficient_ = (flags & RMCF_EFFICIENT) ? 1 : 0;
665 #if 1
666 ifd->maxiftime_ = mtu * nsecPerByte / 1000 * 16;
667 if (mtu * nsecPerByte > 10 * 1000000)
668 ifd->maxiftime_ /= 4;
669 #endif
670
671 reset_cutoff(ifd);
672 CBQTRACE(rmc_init, 'INIT', ifd->cutoff_);
673
674 /*
675 * Initialize the CBQ's WRR state.
676 */
677 for (i = 0; i < RM_MAXPRIO; i++) {
678 ifd->alloc_[i] = 0;
679 ifd->M_[i] = 0;
680 ifd->num_[i] = 0;
681 ifd->na_[i] = 0;
682 ifd->active_[i] = NULL;
683 }
684
685 /*
686 * Initialize current packet state.
687 */
688 ifd->qi_ = 0;
689 ifd->qo_ = 0;
690 for (i = 0; i < RM_MAXQUEUED; i++) {
691 ifd->class_[i] = NULL;
692 ifd->curlen_[i] = 0;
693 ifd->borrowed_[i] = NULL;
694 }
695
696 /*
697 * Create the root class of the link-sharing structure.
698 */
699 if ((ifd->root_ = rmc_newclass(0, ifd,
700 nsecPerByte,
701 rmc_root_overlimit, maxq, 0, 0,
702 maxidle, minidle, offtime,
703 0, 0)) == NULL) {
704 printf("rmc_init: root class not allocated\n");
705 return ;
706 }
707 ifd->root_->depth_ = 0;
708 }
709
710 /*
711 * void
712 * rmc_queue_packet(struct rm_class *cl, mbuf_t *m) - Add packet given by
713 * mbuf 'm' to queue for resource class 'cl'. This routine is called
714 * by a driver's if_output routine. This routine must be called with
715 * output packet completion interrupts locked out (to avoid racing with
716 * rmc_dequeue_next).
717 *
718 * Returns: 0 on successful queueing
719 * -1 when packet drop occurs
720 */
721 int
722 rmc_queue_packet(struct rm_class *cl, mbuf_t *m)
723 {
724 struct timeval now;
725 struct rm_ifdat *ifd = cl->ifdat_;
726 int cpri = cl->pri_;
727 int is_empty = qempty(cl->q_);
728
729 RM_GETTIME(now);
730 if (ifd->cutoff_ > 0) {
731 if (TV_LT(&cl->undertime_, &now)) {
732 if (ifd->cutoff_ > cl->depth_)
733 ifd->cutoff_ = cl->depth_;
734 CBQTRACE(rmc_queue_packet, 'ffoc', cl->depth_);
735 }
736 #if 1 /* ALTQ */
737 else {
738 /*
739 * the class is overlimit. if the class has
740 * underlimit ancestors, set cutoff to the lowest
741 * depth among them.
742 */
743 struct rm_class *borrow = cl->borrow_;
744
745 while (borrow != NULL &&
746 borrow->depth_ < ifd->cutoff_) {
747 if (TV_LT(&borrow->undertime_, &now)) {
748 ifd->cutoff_ = borrow->depth_;
749 CBQTRACE(rmc_queue_packet, 'ffob', ifd->cutoff_);
750 break;
751 }
752 borrow = borrow->borrow_;
753 }
754 }
755 #else /* !ALTQ */
756 else if ((ifd->cutoff_ > 1) && cl->borrow_) {
757 if (TV_LT(&cl->borrow_->undertime_, &now)) {
758 ifd->cutoff_ = cl->borrow_->depth_;
759 CBQTRACE(rmc_queue_packet, 'ffob',
760 cl->borrow_->depth_);
761 }
762 }
763 #endif /* !ALTQ */
764 }
765
766 if (_rmc_addq(cl, m) < 0)
767 /* failed */
768 return (-1);
769
770 if (is_empty) {
771 CBQTRACE(rmc_queue_packet, 'ytpe', cl->stats_.handle);
772 ifd->na_[cpri]++;
773 }
774
775 if (qlen(cl->q_) > qlimit(cl->q_)) {
776 /* note: qlimit can be set to 0 or 1 */
777 rmc_drop_action(cl);
778 return (-1);
779 }
780 return (0);
781 }
782
783 /*
784 * void
785 * rmc_tl_satisfied(struct rm_ifdat *ifd, struct timeval *now) - Check all
786 * classes to see if they are satisfied.
787 */
788
789 static void
790 rmc_tl_satisfied(struct rm_ifdat *ifd, struct timeval *now)
791 {
792 int i;
793 rm_class_t *p, *bp;
794
795 for (i = RM_MAXPRIO - 1; i >= 0; i--) {
796 if ((bp = ifd->active_[i]) != NULL) {
797 p = bp;
798 do {
799 if (!rmc_satisfied(p, now)) {
800 ifd->cutoff_ = p->depth_;
801 return;
802 }
803 p = p->peer_;
804 } while (p != bp);
805 }
806 }
807
808 reset_cutoff(ifd);
809 }
810
811 /*
812 * rmc_satisfied - Return 1 of the class is satisfied. O, otherwise.
813 */
814
815 static int
816 rmc_satisfied(struct rm_class *cl, struct timeval *now)
817 {
818 rm_class_t *p;
819
820 if (cl == NULL)
821 return (1);
822 if (TV_LT(now, &cl->undertime_))
823 return (1);
824 if (cl->depth_ == 0) {
825 if (!cl->sleeping_ && (qlen(cl->q_) > cl->qthresh_))
826 return (0);
827 else
828 return (1);
829 }
830 if (cl->children_ != NULL) {
831 p = cl->children_;
832 while (p != NULL) {
833 if (!rmc_satisfied(p, now))
834 return (0);
835 p = p->next_;
836 }
837 }
838
839 return (1);
840 }
841
842 /*
843 * Return 1 if class 'cl' is under limit or can borrow from a parent,
844 * 0 if overlimit. As a side-effect, this routine will invoke the
845 * class overlimit action if the class if overlimit.
846 */
847
848 static int
849 rmc_under_limit(struct rm_class *cl, struct timeval *now)
850 {
851 rm_class_t *p = cl;
852 rm_class_t *top;
853 struct rm_ifdat *ifd = cl->ifdat_;
854
855 ifd->borrowed_[ifd->qi_] = NULL;
856 /*
857 * If cl is the root class, then always return that it is
858 * underlimit. Otherwise, check to see if the class is underlimit.
859 */
860 if (cl->parent_ == NULL)
861 return (1);
862
863 if (cl->sleeping_) {
864 if (TV_LT(now, &cl->undertime_))
865 return (0);
866
867 CALLOUT_STOP(&cl->callout_);
868 cl->sleeping_ = 0;
869 cl->undertime_.tv_sec = 0;
870 return (1);
871 }
872
873 top = NULL;
874 while (cl->undertime_.tv_sec && TV_LT(now, &cl->undertime_)) {
875 if (((cl = cl->borrow_) == NULL) ||
876 (cl->depth_ > ifd->cutoff_)) {
877 #ifdef ADJUST_CUTOFF
878 if (cl != NULL)
879 /* cutoff is taking effect, just
880 return false without calling
881 the delay action. */
882 return (0);
883 #endif
884 #ifdef BORROW_OFFTIME
885 /*
886 * check if the class can borrow offtime too.
887 * borrow offtime from the top of the borrow
888 * chain if the top class is not overloaded.
889 */
890 if (cl != NULL) {
891 /* cutoff is taking effect, use this class as top. */
892 top = cl;
893 CBQTRACE(rmc_under_limit, 'ffou', ifd->cutoff_);
894 }
895 if (top != NULL && top->avgidle_ == top->minidle_)
896 top = NULL;
897 p->overtime_ = *now;
898 (p->overlimit)(p, top);
899 #else
900 p->overtime_ = *now;
901 (p->overlimit)(p, NULL);
902 #endif
903 return (0);
904 }
905 top = cl;
906 }
907
908 if (cl != p)
909 ifd->borrowed_[ifd->qi_] = cl;
910 return (1);
911 }
912
913 /*
914 * _rmc_wrr_dequeue_next() - This is scheduler for WRR as opposed to
915 * Packet-by-packet round robin.
916 *
917 * The heart of the weighted round-robin scheduler, which decides which
918 * class next gets to send a packet. Highest priority first, then
919 * weighted round-robin within priorites.
920 *
921 * Each able-to-send class gets to send until its byte allocation is
922 * exhausted. Thus, the active pointer is only changed after a class has
923 * exhausted its allocation.
924 *
925 * If the scheduler finds no class that is underlimit or able to borrow,
926 * then the first class found that had a nonzero queue and is allowed to
927 * borrow gets to send.
928 */
929
930 static mbuf_t *
931 _rmc_wrr_dequeue_next(struct rm_ifdat *ifd, int op)
932 {
933 struct rm_class *cl = NULL, *first = NULL;
934 u_int deficit;
935 int cpri;
936 mbuf_t *m;
937 struct timeval now;
938
939 RM_GETTIME(now);
940
941 /*
942 * if the driver polls the top of the queue and then removes
943 * the polled packet, we must return the same packet.
944 */
945 if (op == ALTDQ_REMOVE && ifd->pollcache_) {
946 cl = ifd->pollcache_;
947 cpri = cl->pri_;
948 if (ifd->efficient_) {
949 /* check if this class is overlimit */
950 if (cl->undertime_.tv_sec != 0 &&
951 rmc_under_limit(cl, &now) == 0)
952 first = cl;
953 }
954 ifd->pollcache_ = NULL;
955 goto _wrr_out;
956 }
957 else {
958 /* mode == ALTDQ_POLL || pollcache == NULL */
959 ifd->pollcache_ = NULL;
960 ifd->borrowed_[ifd->qi_] = NULL;
961 }
962 #ifdef ADJUST_CUTOFF
963 _again:
964 #endif
965 for (cpri = RM_MAXPRIO - 1; cpri >= 0; cpri--) {
966 if (ifd->na_[cpri] == 0)
967 continue;
968 deficit = 0;
969 /*
970 * Loop through twice for a priority level, if some class
971 * was unable to send a packet the first round because
972 * of the weighted round-robin mechanism.
973 * During the second loop at this level, deficit==2.
974 * (This second loop is not needed if for every class,
975 * "M[cl->pri_])" times "cl->allotment" is greater than
976 * the byte size for the largest packet in the class.)
977 */
978 _wrr_loop:
979 cl = ifd->active_[cpri];
980 ASSERT(cl != NULL);
981 do {
982 if ((deficit < 2) && (cl->bytes_alloc_ <= 0))
983 cl->bytes_alloc_ += cl->w_allotment_;
984 if (!qempty(cl->q_)) {
985 if ((cl->undertime_.tv_sec == 0) ||
986 rmc_under_limit(cl, &now)) {
987 if (cl->bytes_alloc_ > 0 || deficit > 1)
988 goto _wrr_out;
989
990 /* underlimit but no alloc */
991 deficit = 1;
992 #if 1
993 ifd->borrowed_[ifd->qi_] = NULL;
994 #endif
995 }
996 else if (first == NULL && cl->borrow_ != NULL)
997 first = cl; /* borrowing candidate */
998 }
999
1000 cl->bytes_alloc_ = 0;
1001 cl = cl->peer_;
1002 } while (cl != ifd->active_[cpri]);
1003
1004 if (deficit == 1) {
1005 /* first loop found an underlimit class with deficit */
1006 /* Loop on same priority level, with new deficit. */
1007 deficit = 2;
1008 goto _wrr_loop;
1009 }
1010 }
1011
1012 #ifdef ADJUST_CUTOFF
1013 /*
1014 * no underlimit class found. if cutoff is taking effect,
1015 * increase cutoff and try again.
1016 */
1017 if (first != NULL && ifd->cutoff_ < ifd->root_->depth_) {
1018 ifd->cutoff_++;
1019 CBQTRACE(_rmc_wrr_dequeue_next, 'ojda', ifd->cutoff_);
1020 goto _again;
1021 }
1022 #endif /* ADJUST_CUTOFF */
1023 /*
1024 * If LINK_EFFICIENCY is turned on, then the first overlimit
1025 * class we encounter will send a packet if all the classes
1026 * of the link-sharing structure are overlimit.
1027 */
1028 reset_cutoff(ifd);
1029 CBQTRACE(_rmc_wrr_dequeue_next, 'otsr', ifd->cutoff_);
1030
1031 if (!ifd->efficient_ || first == NULL)
1032 return (NULL);
1033
1034 cl = first;
1035 cpri = cl->pri_;
1036 #if 0 /* too time-consuming for nothing */
1037 if (cl->sleeping_)
1038 CALLOUT_STOP(&cl->callout_);
1039 cl->sleeping_ = 0;
1040 cl->undertime_.tv_sec = 0;
1041 #endif
1042 ifd->borrowed_[ifd->qi_] = cl->borrow_;
1043 ifd->cutoff_ = cl->borrow_->depth_;
1044
1045 /*
1046 * Deque the packet and do the book keeping...
1047 */
1048 _wrr_out:
1049 if (op == ALTDQ_REMOVE) {
1050 m = _rmc_getq(cl);
1051 if (m == NULL)
1052 panic("_rmc_wrr_dequeue_next");
1053 if (qempty(cl->q_))
1054 ifd->na_[cpri]--;
1055
1056 /*
1057 * Update class statistics and link data.
1058 */
1059 if (cl->bytes_alloc_ > 0)
1060 cl->bytes_alloc_ -= m_pktlen(m);
1061
1062 if ((cl->bytes_alloc_ <= 0) || first == cl)
1063 ifd->active_[cl->pri_] = cl->peer_;
1064 else
1065 ifd->active_[cl->pri_] = cl;
1066
1067 ifd->class_[ifd->qi_] = cl;
1068 ifd->curlen_[ifd->qi_] = m_pktlen(m);
1069 ifd->now_[ifd->qi_] = now;
1070 ifd->qi_ = (ifd->qi_ + 1) % ifd->maxqueued_;
1071 ifd->queued_++;
1072 } else {
1073 /* mode == ALTDQ_PPOLL */
1074 m = _rmc_pollq(cl);
1075 ifd->pollcache_ = cl;
1076 }
1077 return (m);
1078 }
1079
1080 /*
1081 * Dequeue & return next packet from the highest priority class that
1082 * has a packet to send & has enough allocation to send it. This
1083 * routine is called by a driver whenever it needs a new packet to
1084 * output.
1085 */
1086 static mbuf_t *
1087 _rmc_prr_dequeue_next(struct rm_ifdat *ifd, int op)
1088 {
1089 mbuf_t *m;
1090 int cpri;
1091 struct rm_class *cl, *first = NULL;
1092 struct timeval now;
1093
1094 RM_GETTIME(now);
1095
1096 /*
1097 * if the driver polls the top of the queue and then removes
1098 * the polled packet, we must return the same packet.
1099 */
1100 if (op == ALTDQ_REMOVE && ifd->pollcache_) {
1101 cl = ifd->pollcache_;
1102 cpri = cl->pri_;
1103 ifd->pollcache_ = NULL;
1104 goto _prr_out;
1105 } else {
1106 /* mode == ALTDQ_POLL || pollcache == NULL */
1107 ifd->pollcache_ = NULL;
1108 ifd->borrowed_[ifd->qi_] = NULL;
1109 }
1110 #ifdef ADJUST_CUTOFF
1111 _again:
1112 #endif
1113 for (cpri = RM_MAXPRIO - 1; cpri >= 0; cpri--) {
1114 if (ifd->na_[cpri] == 0)
1115 continue;
1116 cl = ifd->active_[cpri];
1117 ASSERT(cl != NULL);
1118 do {
1119 if (!qempty(cl->q_)) {
1120 if ((cl->undertime_.tv_sec == 0) ||
1121 rmc_under_limit(cl, &now))
1122 goto _prr_out;
1123 if (first == NULL && cl->borrow_ != NULL)
1124 first = cl;
1125 }
1126 cl = cl->peer_;
1127 } while (cl != ifd->active_[cpri]);
1128 }
1129
1130 #ifdef ADJUST_CUTOFF
1131 /*
1132 * no underlimit class found. if cutoff is taking effect, increase
1133 * cutoff and try again.
1134 */
1135 if (first != NULL && ifd->cutoff_ < ifd->root_->depth_) {
1136 ifd->cutoff_++;
1137 goto _again;
1138 }
1139 #endif /* ADJUST_CUTOFF */
1140 /*
1141 * If LINK_EFFICIENCY is turned on, then the first overlimit
1142 * class we encounter will send a packet if all the classes
1143 * of the link-sharing structure are overlimit.
1144 */
1145 reset_cutoff(ifd);
1146 if (!ifd->efficient_ || first == NULL)
1147 return (NULL);
1148
1149 cl = first;
1150 cpri = cl->pri_;
1151 #if 0 /* too time-consuming for nothing */
1152 if (cl->sleeping_)
1153 CALLOUT_STOP(&cl->callout_);
1154 cl->sleeping_ = 0;
1155 cl->undertime_.tv_sec = 0;
1156 #endif
1157 ifd->borrowed_[ifd->qi_] = cl->borrow_;
1158 ifd->cutoff_ = cl->borrow_->depth_;
1159
1160 /*
1161 * Deque the packet and do the book keeping...
1162 */
1163 _prr_out:
1164 if (op == ALTDQ_REMOVE) {
1165 m = _rmc_getq(cl);
1166 if (m == NULL)
1167 panic("_rmc_prr_dequeue_next");
1168 if (qempty(cl->q_))
1169 ifd->na_[cpri]--;
1170
1171 ifd->active_[cpri] = cl->peer_;
1172
1173 ifd->class_[ifd->qi_] = cl;
1174 ifd->curlen_[ifd->qi_] = m_pktlen(m);
1175 ifd->now_[ifd->qi_] = now;
1176 ifd->qi_ = (ifd->qi_ + 1) % ifd->maxqueued_;
1177 ifd->queued_++;
1178 } else {
1179 /* mode == ALTDQ_POLL */
1180 m = _rmc_pollq(cl);
1181 ifd->pollcache_ = cl;
1182 }
1183 return (m);
1184 }
1185
1186 /*
1187 * mbuf_t *
1188 * rmc_dequeue_next(struct rm_ifdat *ifd, struct timeval *now) - this function
1189 * is invoked by the packet driver to get the next packet to be
1190 * dequeued and output on the link. If WRR is enabled, then the
1191 * WRR dequeue next routine will determine the next packet to sent.
1192 * Otherwise, packet-by-packet round robin is invoked.
1193 *
1194 * Returns: NULL, if a packet is not available or if all
1195 * classes are overlimit.
1196 *
1197 * Otherwise, Pointer to the next packet.
1198 */
1199
1200 mbuf_t *
1201 rmc_dequeue_next(struct rm_ifdat *ifd, int mode)
1202 {
1203 if (ifd->queued_ >= ifd->maxqueued_)
1204 return (NULL);
1205 else if (ifd->wrr_)
1206 return (_rmc_wrr_dequeue_next(ifd, mode));
1207 else
1208 return (_rmc_prr_dequeue_next(ifd, mode));
1209 }
1210
1211 /*
1212 * Update the utilization estimate for the packet that just completed.
1213 * The packet's class & the parent(s) of that class all get their
1214 * estimators updated. This routine is called by the driver's output-
1215 * packet-completion interrupt service routine.
1216 */
1217
1218 /*
1219 * a macro to approximate "divide by 1000" that gives 0.000999,
1220 * if a value has enough effective digits.
1221 * (on pentium, mul takes 9 cycles but div takes 46!)
1222 */
1223 #define NSEC_TO_USEC(t) (((t) >> 10) + ((t) >> 16) + ((t) >> 17))
1224 void
1225 rmc_update_class_util(struct rm_ifdat *ifd)
1226 {
1227 int idle, avgidle, pktlen;
1228 int pkt_time, tidle;
1229 rm_class_t *cl, *borrowed;
1230 rm_class_t *borrows;
1231 struct timeval *nowp;
1232
1233 /*
1234 * Get the most recent completed class.
1235 */
1236 if ((cl = ifd->class_[ifd->qo_]) == NULL)
1237 return;
1238
1239 pktlen = ifd->curlen_[ifd->qo_];
1240 borrowed = ifd->borrowed_[ifd->qo_];
1241 borrows = borrowed;
1242
1243 PKTCNTR_ADD(&cl->stats_.xmit_cnt, pktlen);
1244
1245 /*
1246 * Run estimator on class and its ancestors.
1247 */
1248 /*
1249 * rm_update_class_util is designed to be called when the
1250 * transfer is completed from a xmit complete interrupt,
1251 * but most drivers don't implement an upcall for that.
1252 * so, just use estimated completion time.
1253 * as a result, ifd->qi_ and ifd->qo_ are always synced.
1254 */
1255 nowp = &ifd->now_[ifd->qo_];
1256 /* get pkt_time (for link) in usec */
1257 #if 1 /* use approximation */
1258 pkt_time = ifd->curlen_[ifd->qo_] * ifd->ns_per_byte_;
1259 pkt_time = NSEC_TO_USEC(pkt_time);
1260 #else
1261 pkt_time = ifd->curlen_[ifd->qo_] * ifd->ns_per_byte_ / 1000;
1262 #endif
1263 #if 1 /* ALTQ4PPP */
1264 if (TV_LT(nowp, &ifd->ifnow_)) {
1265 int iftime;
1266
1267 /*
1268 * make sure the estimated completion time does not go
1269 * too far. it can happen when the link layer supports
1270 * data compression or the interface speed is set to
1271 * a much lower value.
1272 */
1273 TV_DELTA(&ifd->ifnow_, nowp, iftime);
1274 if (iftime+pkt_time < ifd->maxiftime_) {
1275 TV_ADD_DELTA(&ifd->ifnow_, pkt_time, &ifd->ifnow_);
1276 } else {
1277 TV_ADD_DELTA(nowp, ifd->maxiftime_, &ifd->ifnow_);
1278 }
1279 } else {
1280 TV_ADD_DELTA(nowp, pkt_time, &ifd->ifnow_);
1281 }
1282 #else
1283 if (TV_LT(nowp, &ifd->ifnow_)) {
1284 TV_ADD_DELTA(&ifd->ifnow_, pkt_time, &ifd->ifnow_);
1285 } else {
1286 TV_ADD_DELTA(nowp, pkt_time, &ifd->ifnow_);
1287 }
1288 #endif
1289
1290 while (cl != NULL) {
1291 TV_DELTA(&ifd->ifnow_, &cl->last_, idle);
1292 if (idle >= 2000000)
1293 /*
1294 * this class is idle enough, reset avgidle.
1295 * (TV_DELTA returns 2000000 us when delta is large.)
1296 */
1297 cl->avgidle_ = cl->maxidle_;
1298
1299 /* get pkt_time (for class) in usec */
1300 #if 1 /* use approximation */
1301 pkt_time = pktlen * cl->ns_per_byte_;
1302 pkt_time = NSEC_TO_USEC(pkt_time);
1303 #else
1304 pkt_time = pktlen * cl->ns_per_byte_ / 1000;
1305 #endif
1306 idle -= pkt_time;
1307
1308 avgidle = cl->avgidle_;
1309 avgidle += idle - (avgidle >> RM_FILTER_GAIN);
1310 cl->avgidle_ = avgidle;
1311
1312 /* Are we overlimit ? */
1313 if (avgidle <= 0) {
1314 CBQTRACE(rmc_update_class_util, 'milo', cl->stats_.handle);
1315 #if 1 /* ALTQ */
1316 /*
1317 * need some lower bound for avgidle, otherwise
1318 * a borrowing class gets unbounded penalty.
1319 */
1320 if (avgidle < cl->minidle_)
1321 avgidle = cl->avgidle_ = cl->minidle_;
1322 #endif
1323 /* set next idle to make avgidle 0 */
1324 tidle = pkt_time +
1325 (((1 - RM_POWER) * avgidle) >> RM_FILTER_GAIN);
1326 TV_ADD_DELTA(nowp, tidle, &cl->undertime_);
1327 ++cl->stats_.over;
1328 } else {
1329 cl->avgidle_ =
1330 (avgidle > cl->maxidle_) ? cl->maxidle_ : avgidle;
1331 cl->undertime_.tv_sec = 0;
1332 if (cl->sleeping_) {
1333 CALLOUT_STOP(&cl->callout_);
1334 cl->sleeping_ = 0;
1335 }
1336 }
1337
1338 if (borrows != NULL) {
1339 if (borrows != cl)
1340 ++cl->stats_.borrows;
1341 else
1342 borrows = NULL;
1343 }
1344 cl->last_ = ifd->ifnow_;
1345 cl->last_pkttime_ = pkt_time;
1346
1347 #if 1
1348 if (cl->parent_ == NULL) {
1349 /* take stats of root class */
1350 PKTCNTR_ADD(&cl->stats_.xmit_cnt, pktlen);
1351 }
1352 #endif
1353
1354 cl = cl->parent_;
1355 }
1356
1357 /*
1358 * Check to see if cutoff needs to set to a new level.
1359 */
1360 cl = ifd->class_[ifd->qo_];
1361 if (borrowed && (ifd->cutoff_ >= borrowed->depth_)) {
1362 #if 1 /* ALTQ */
1363 if ((qlen(cl->q_) <= 0) || TV_LT(nowp, &borrowed->undertime_)) {
1364 rmc_tl_satisfied(ifd, nowp);
1365 CBQTRACE(rmc_update_class_util, 'broe', ifd->cutoff_);
1366 } else {
1367 ifd->cutoff_ = borrowed->depth_;
1368 CBQTRACE(rmc_update_class_util, 'ffob', borrowed->depth_);
1369 }
1370 #else /* !ALTQ */
1371 if ((qlen(cl->q_) <= 1) || TV_LT(&now, &borrowed->undertime_)) {
1372 reset_cutoff(ifd);
1373 #ifdef notdef
1374 rmc_tl_satisfied(ifd, &now);
1375 #endif
1376 CBQTRACE(rmc_update_class_util, 'broe', ifd->cutoff_);
1377 } else {
1378 ifd->cutoff_ = borrowed->depth_;
1379 CBQTRACE(rmc_update_class_util, 'ffob', borrowed->depth_);
1380 }
1381 #endif /* !ALTQ */
1382 }
1383
1384 /*
1385 * Release class slot
1386 */
1387 ifd->borrowed_[ifd->qo_] = NULL;
1388 ifd->class_[ifd->qo_] = NULL;
1389 ifd->qo_ = (ifd->qo_ + 1) % ifd->maxqueued_;
1390 ifd->queued_--;
1391 }
1392
1393 /*
1394 * void
1395 * rmc_drop_action(struct rm_class *cl) - Generic (not protocol-specific)
1396 * over-limit action routines. These get invoked by rmc_under_limit()
1397 * if a class with packets to send if over its bandwidth limit & can't
1398 * borrow from a parent class.
1399 *
1400 * Returns: NONE
1401 */
1402
1403 static void
1404 rmc_drop_action(struct rm_class *cl)
1405 {
1406 struct rm_ifdat *ifd = cl->ifdat_;
1407
1408 ASSERT(qlen(cl->q_) > 0);
1409 _rmc_dropq(cl);
1410 if (qempty(cl->q_))
1411 ifd->na_[cl->pri_]--;
1412 }
1413
1414 void rmc_dropall(struct rm_class *cl)
1415 {
1416 struct rm_ifdat *ifd = cl->ifdat_;
1417
1418 if (!qempty(cl->q_)) {
1419 _flushq(cl->q_);
1420
1421 ifd->na_[cl->pri_]--;
1422 }
1423 }
1424
1425 /*
1426 * void
1427 * rmc_delay_action(struct rm_class *cl) - This function is the generic CBQ
1428 * delay action routine. It is invoked via rmc_under_limit when the
1429 * packet is discovered to be overlimit.
1430 *
1431 * If the delay action is result of borrow class being overlimit, then
1432 * delay for the offtime of the borrowing class that is overlimit.
1433 *
1434 * Returns: NONE
1435 */
1436
1437 void
1438 rmc_delay_action(struct rm_class *cl, struct rm_class *borrow)
1439 {
1440 int delay, t, extradelay;
1441
1442 cl->stats_.overactions++;
1443 TV_DELTA(&cl->undertime_, &cl->overtime_, delay);
1444 #ifndef BORROW_OFFTIME
1445 delay += cl->offtime_;
1446 #endif
1447
1448 if (!cl->sleeping_) {
1449 CBQTRACE(rmc_delay_action, 'yled', cl->stats_.handle);
1450 #ifdef BORROW_OFFTIME
1451 if (borrow != NULL)
1452 extradelay = borrow->offtime_;
1453 else
1454 #endif
1455 extradelay = cl->offtime_;
1456
1457 #ifdef ALTQ
1458 /*
1459 * XXX recalculate suspend time:
1460 * current undertime is (tidle + pkt_time) calculated
1461 * from the last transmission.
1462 * tidle: time required to bring avgidle back to 0
1463 * pkt_time: target waiting time for this class
1464 * we need to replace pkt_time by offtime
1465 */
1466 extradelay -= cl->last_pkttime_;
1467 #endif
1468 if (extradelay > 0) {
1469 TV_ADD_DELTA(&cl->undertime_, extradelay, &cl->undertime_);
1470 delay += extradelay;
1471 }
1472
1473 cl->sleeping_ = 1;
1474 cl->stats_.delays++;
1475
1476 /*
1477 * Since packets are phased randomly with respect to the
1478 * clock, 1 tick (the next clock tick) can be an arbitrarily
1479 * short time so we have to wait for at least two ticks.
1480 * NOTE: If there's no other traffic, we need the timer as
1481 * a 'backstop' to restart this class.
1482 */
1483 if (delay > tick * 2) {
1484 #ifdef __FreeBSD__
1485 /* FreeBSD rounds up the tick */
1486 t = hzto(&cl->undertime_);
1487 #else
1488 /* other BSDs round down the tick */
1489 t = hzto(&cl->undertime_) + 1;
1490 #endif
1491 } else
1492 t = 2;
1493 CALLOUT_RESET(&cl->callout_, t,
1494 (timeout_t *)rmc_restart, (caddr_t)cl);
1495 }
1496 }
1497
1498 /*
1499 * void
1500 * rmc_restart() - is just a helper routine for rmc_delay_action -- it is
1501 * called by the system timer code & is responsible checking if the
1502 * class is still sleeping (it might have been restarted as a side
1503 * effect of the queue scan on a packet arrival) and, if so, restarting
1504 * output for the class. Inspecting the class state & restarting output
1505 * require locking the class structure. In general the driver is
1506 * responsible for locking but this is the only routine that is not
1507 * called directly or indirectly from the interface driver so it has
1508 * know about system locking conventions. Under bsd, locking is done
1509 * by raising IPL to splnet so that's what's implemented here. On a
1510 * different system this would probably need to be changed.
1511 *
1512 * Returns: NONE
1513 */
1514
1515 static void
1516 rmc_restart(struct rm_class *cl)
1517 {
1518 struct rm_ifdat *ifd = cl->ifdat_;
1519 int s;
1520
1521 s = splnet();
1522 if (cl->sleeping_) {
1523 cl->sleeping_ = 0;
1524 cl->undertime_.tv_sec = 0;
1525
1526 if (ifd->queued_ < ifd->maxqueued_ && ifd->restart != NULL) {
1527 CBQTRACE(rmc_restart, 'trts', cl->stats_.handle);
1528 (ifd->restart)(ifd->ifq_);
1529 }
1530 }
1531 splx(s);
1532 }
1533
1534 /*
1535 * void
1536 * rmc_root_overlimit(struct rm_class *cl) - This the generic overlimit
1537 * handling routine for the root class of the link sharing structure.
1538 *
1539 * Returns: NONE
1540 */
1541
1542 static void
1543 rmc_root_overlimit(struct rm_class *cl, struct rm_class *borrow)
1544 {
1545 panic("rmc_root_overlimit");
1546 }
1547
1548 /*
1549 * Packet Queue handling routines. Eventually, this is to localize the
1550 * effects on the code whether queues are red queues or droptail
1551 * queues.
1552 */
1553
1554 static int
1555 _rmc_addq(rm_class_t *cl, mbuf_t *m)
1556 {
1557 #ifdef ALTQ_RIO
1558 if (q_is_rio(cl->q_))
1559 return rio_addq((rio_t *)cl->red_, cl->q_, m, cl->pktattr_);
1560 #endif
1561 #ifdef ALTQ_RED
1562 if (q_is_red(cl->q_))
1563 return red_addq(cl->red_, cl->q_, m, cl->pktattr_);
1564 #endif /* ALTQ_RED */
1565
1566 if (cl->flags_ & RMCF_CLEARDSCP)
1567 write_dsfield(m, cl->pktattr_, 0);
1568
1569 _addq(cl->q_, m);
1570 return (0);
1571 }
1572
1573 /* note: _rmc_dropq is not called for red */
1574 static void
1575 _rmc_dropq(rm_class_t *cl)
1576 {
1577 mbuf_t *m;
1578
1579 if ((m = _getq(cl->q_)) != NULL)
1580 m_freem(m);
1581 }
1582
1583 static mbuf_t *
1584 _rmc_getq(rm_class_t *cl)
1585 {
1586 #ifdef ALTQ_RIO
1587 if (q_is_rio(cl->q_))
1588 return rio_getq((rio_t *)cl->red_, cl->q_);
1589 #endif
1590 #ifdef ALTQ_RED
1591 if (q_is_red(cl->q_))
1592 return red_getq(cl->red_, cl->q_);
1593 #endif
1594 return _getq(cl->q_);
1595 }
1596
1597 static mbuf_t *
1598 _rmc_pollq(rm_class_t *cl)
1599 {
1600 return qhead(cl->q_);
1601 }
1602
1603 #ifdef CBQ_TRACE
1604
1605 struct cbqtrace cbqtrace_buffer[NCBQTRACE+1];
1606 struct cbqtrace *cbqtrace_ptr = NULL;
1607 int cbqtrace_count;
1608
1609 /*
1610 * DDB hook to trace cbq events:
1611 * the last 1024 events are held in a circular buffer.
1612 * use "call cbqtrace_dump(N)" to display 20 events from Nth event.
1613 */
1614 void cbqtrace_dump(int);
1615 static char *rmc_funcname(void *);
1616
1617 static struct rmc_funcs {
1618 void *func;
1619 char *name;
1620 } rmc_funcs[] =
1621 {
1622 rmc_init, "rmc_init",
1623 rmc_queue_packet, "rmc_queue_packet",
1624 rmc_under_limit, "rmc_under_limit",
1625 rmc_update_class_util, "rmc_update_class_util",
1626 rmc_delay_action, "rmc_delay_action",
1627 rmc_restart, "rmc_restart",
1628 _rmc_wrr_dequeue_next, "_rmc_wrr_dequeue_next",
1629 NULL, NULL
1630 };
1631
1632 static char *rmc_funcname(void *func)
1633 {
1634 struct rmc_funcs *fp;
1635
1636 for (fp = rmc_funcs; fp->func != NULL; fp++)
1637 if (fp->func == func)
1638 return (fp->name);
1639 return ("unknown");
1640 }
1641
1642 void cbqtrace_dump(int counter)
1643 {
1644 int i, *p;
1645 char *cp;
1646
1647 counter = counter % NCBQTRACE;
1648 p = (int *)&cbqtrace_buffer[counter];
1649
1650 for (i=0; i<20; i++) {
1651 printf("[0x%x] ", *p++);
1652 printf("%s: ", rmc_funcname((void *)*p++));
1653 cp = (char *)p++;
1654 printf("%c%c%c%c: ", cp[0], cp[1], cp[2], cp[3]);
1655 printf("%d\n",*p++);
1656
1657 if (p >= (int *)&cbqtrace_buffer[NCBQTRACE])
1658 p = (int *)cbqtrace_buffer;
1659 }
1660 }
1661 #endif /* CBQ_TRACE */
1662
1663 #if defined(ALTQ_CBQ) || defined(ALTQ_RED) || defined(ALTQ_RIO) || defined(ALTQ_HFSC) || defined(ALTQ_PRIQ)
1664 #if !defined(__GNUC__) || defined(ALTQ_DEBUG)
1665
1666 void
1667 _addq(class_queue_t *q, mbuf_t *m)
1668 {
1669 mbuf_t *m0;
1670
1671 if ((m0 = qtail(q)) != NULL)
1672 m->m_nextpkt = m0->m_nextpkt;
1673 else
1674 m0 = m;
1675 m0->m_nextpkt = m;
1676 qtail(q) = m;
1677 qlen(q)++;
1678 }
1679
1680 mbuf_t *
1681 _getq(class_queue_t *q)
1682 {
1683 mbuf_t *m, *m0;
1684
1685 if ((m = qtail(q)) == NULL)
1686 return (NULL);
1687 if ((m0 = m->m_nextpkt) != m)
1688 m->m_nextpkt = m0->m_nextpkt;
1689 else {
1690 ASSERT(qlen(q) == 1);
1691 qtail(q) = NULL;
1692 }
1693 qlen(q)--;
1694 m0->m_nextpkt = NULL;
1695 return (m0);
1696 }
1697
1698 /* drop a packet at the tail of the queue */
1699 mbuf_t *
1700 _getq_tail(class_queue_t *q)
1701 {
1702 mbuf_t *m, *m0, *prev;
1703
1704 if ((m = m0 = qtail(q)) == NULL)
1705 return NULL;
1706 do {
1707 prev = m0;
1708 m0 = m0->m_nextpkt;
1709 } while (m0 != m);
1710 prev->m_nextpkt = m->m_nextpkt;
1711 if (prev == m) {
1712 ASSERT(qlen(q) == 1);
1713 qtail(q) = NULL;
1714 } else
1715 qtail(q) = prev;
1716 qlen(q)--;
1717 m->m_nextpkt = NULL;
1718 return (m);
1719 }
1720
1721 /* randomly select a packet in the queue */
1722 mbuf_t *
1723 _getq_random(class_queue_t *q)
1724 {
1725 struct mbuf *m;
1726 int i, n;
1727
1728 if ((m = qtail(q)) == NULL)
1729 return NULL;
1730 if (m->m_nextpkt == m) {
1731 ASSERT(qlen(q) == 1);
1732 qtail(q) = NULL;
1733 } else {
1734 struct mbuf *prev = NULL;
1735
1736 n = random() % qlen(q) + 1;
1737 for (i = 0; i < n; i++) {
1738 prev = m;
1739 m = m->m_nextpkt;
1740 }
1741 prev->m_nextpkt = m->m_nextpkt;
1742 if (m == qtail(q))
1743 qtail(q) = prev;
1744 }
1745 qlen(q)--;
1746 m->m_nextpkt = NULL;
1747 return (m);
1748 }
1749
1750 void
1751 _removeq(class_queue_t *q, mbuf_t *m)
1752 {
1753 mbuf_t *m0, *prev;
1754
1755 m0 = qtail(q);
1756 do {
1757 prev = m0;
1758 m0 = m0->m_nextpkt;
1759 } while (m0 != m);
1760 prev->m_nextpkt = m->m_nextpkt;
1761 if (prev == m)
1762 qtail(q) = NULL;
1763 else if (qtail(q) == m)
1764 qtail(q) = prev;
1765 qlen(q)--;
1766 }
1767
1768 void
1769 _flushq(class_queue_t *q)
1770 {
1771 mbuf_t *m;
1772
1773 while ((m = _getq(q)) != NULL)
1774 m_freem(m);
1775 ASSERT(qlen(q) == 0);
1776 }
1777
1778 #endif /* !__GNUC__ || ALTQ_DEBUG */
1779 #endif /* ALTQ_CBQ || ALTQ_RED || ALTQ_RIO || ALTQ_HFSC || ALTQ_PRIQ */