blob: 48830cac1014cfe4f10972f0f4e40967ac0ab405 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * net/sched/sch_cbq.c Class-Based Queueing discipline.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 *
11 */
12
Linus Torvalds1da177e2005-04-16 15:20:36 -070013#include <linux/module.h>
14#include <asm/uaccess.h>
15#include <asm/system.h>
16#include <linux/bitops.h>
17#include <linux/types.h>
18#include <linux/kernel.h>
19#include <linux/sched.h>
20#include <linux/string.h>
21#include <linux/mm.h>
22#include <linux/socket.h>
23#include <linux/sockios.h>
24#include <linux/in.h>
25#include <linux/errno.h>
26#include <linux/interrupt.h>
27#include <linux/if_ether.h>
28#include <linux/inet.h>
29#include <linux/netdevice.h>
30#include <linux/etherdevice.h>
31#include <linux/notifier.h>
32#include <net/ip.h>
33#include <net/route.h>
34#include <linux/skbuff.h>
35#include <net/sock.h>
36#include <net/pkt_sched.h>
37
38
39/* Class-Based Queueing (CBQ) algorithm.
40 =======================================
41
42 Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +090043 Management Models for Packet Networks",
Linus Torvalds1da177e2005-04-16 15:20:36 -070044 IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
45
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +090046 [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
Linus Torvalds1da177e2005-04-16 15:20:36 -070047
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +090048 [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
Linus Torvalds1da177e2005-04-16 15:20:36 -070049 Parameters", 1996
50
51 [4] Sally Floyd and Michael Speer, "Experimental Results
52 for Class-Based Queueing", 1998, not published.
53
54 -----------------------------------------------------------------------
55
56 Algorithm skeleton was taken from NS simulator cbq.cc.
57 If someone wants to check this code against the LBL version,
58 he should take into account that ONLY the skeleton was borrowed,
59 the implementation is different. Particularly:
60
61 --- The WRR algorithm is different. Our version looks more
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +090062 reasonable (I hope) and works when quanta are allowed to be
63 less than MTU, which is always the case when real time classes
64 have small rates. Note, that the statement of [3] is
65 incomplete, delay may actually be estimated even if class
66 per-round allotment is less than MTU. Namely, if per-round
67 allotment is W*r_i, and r_1+...+r_k = r < 1
Linus Torvalds1da177e2005-04-16 15:20:36 -070068
69 delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
70
71 In the worst case we have IntServ estimate with D = W*r+k*MTU
72 and C = MTU*r. The proof (if correct at all) is trivial.
73
74
75 --- It seems that cbq-2.0 is not very accurate. At least, I cannot
76 interpret some places, which look like wrong translations
77 from NS. Anyone is advised to find these differences
78 and explain to me, why I am wrong 8).
79
80 --- Linux has no EOI event, so that we cannot estimate true class
81 idle time. Workaround is to consider the next dequeue event
82 as sign that previous packet is finished. This is wrong because of
83 internal device queueing, but on a permanently loaded link it is true.
84 Moreover, combined with clock integrator, this scheme looks
85 very close to an ideal solution. */
86
87struct cbq_sched_data;
88
89
90struct cbq_class
91{
92 struct cbq_class *next; /* hash table link */
93 struct cbq_class *next_alive; /* next class with backlog in this priority band */
94
95/* Parameters */
96 u32 classid;
97 unsigned char priority; /* class priority */
98 unsigned char priority2; /* priority to be used after overlimit */
99 unsigned char ewma_log; /* time constant for idle time calculation */
100 unsigned char ovl_strategy;
101#ifdef CONFIG_NET_CLS_POLICE
102 unsigned char police;
103#endif
104
105 u32 defmap;
106
107 /* Link-sharing scheduler parameters */
108 long maxidle; /* Class parameters: see below. */
109 long offtime;
110 long minidle;
111 u32 avpkt;
112 struct qdisc_rate_table *R_tab;
113
114 /* Overlimit strategy parameters */
115 void (*overlimit)(struct cbq_class *cl);
116 long penalty;
117
118 /* General scheduler (WRR) parameters */
119 long allot;
120 long quantum; /* Allotment per WRR round */
121 long weight; /* Relative allotment: see below */
122
123 struct Qdisc *qdisc; /* Ptr to CBQ discipline */
124 struct cbq_class *split; /* Ptr to split node */
125 struct cbq_class *share; /* Ptr to LS parent in the class tree */
126 struct cbq_class *tparent; /* Ptr to tree parent in the class tree */
127 struct cbq_class *borrow; /* NULL if class is bandwidth limited;
128 parent otherwise */
129 struct cbq_class *sibling; /* Sibling chain */
130 struct cbq_class *children; /* Pointer to children chain */
131
132 struct Qdisc *q; /* Elementary queueing discipline */
133
134
135/* Variables */
136 unsigned char cpriority; /* Effective priority */
137 unsigned char delayed;
138 unsigned char level; /* level of the class in hierarchy:
139 0 for leaf classes, and maximal
140 level of children + 1 for nodes.
141 */
142
143 psched_time_t last; /* Last end of service */
144 psched_time_t undertime;
145 long avgidle;
146 long deficit; /* Saved deficit for WRR */
147 unsigned long penalized;
148 struct gnet_stats_basic bstats;
149 struct gnet_stats_queue qstats;
150 struct gnet_stats_rate_est rate_est;
151 spinlock_t *stats_lock;
152 struct tc_cbq_xstats xstats;
153
154 struct tcf_proto *filter_list;
155
156 int refcnt;
157 int filters;
158
159 struct cbq_class *defaults[TC_PRIO_MAX+1];
160};
161
162struct cbq_sched_data
163{
164 struct cbq_class *classes[16]; /* Hash table of all classes */
165 int nclasses[TC_CBQ_MAXPRIO+1];
166 unsigned quanta[TC_CBQ_MAXPRIO+1];
167
168 struct cbq_class link;
169
170 unsigned activemask;
171 struct cbq_class *active[TC_CBQ_MAXPRIO+1]; /* List of all classes
172 with backlog */
173
174#ifdef CONFIG_NET_CLS_POLICE
175 struct cbq_class *rx_class;
176#endif
177 struct cbq_class *tx_class;
178 struct cbq_class *tx_borrowed;
179 int tx_len;
180 psched_time_t now; /* Cached timestamp */
181 psched_time_t now_rt; /* Cached real time */
182 unsigned pmask;
183
184 struct timer_list delay_timer;
185 struct timer_list wd_timer; /* Watchdog timer,
186 started when CBQ has
187 backlog, but cannot
188 transmit just now */
189 long wd_expires;
190 int toplevel;
191 u32 hgenerator;
192};
193
194
195#define L2T(cl,len) ((cl)->R_tab->data[(len)>>(cl)->R_tab->rate.cell_log])
196
197
198static __inline__ unsigned cbq_hash(u32 h)
199{
200 h ^= h>>8;
201 h ^= h>>4;
202 return h&0xF;
203}
204
205static __inline__ struct cbq_class *
206cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
207{
208 struct cbq_class *cl;
209
210 for (cl = q->classes[cbq_hash(classid)]; cl; cl = cl->next)
211 if (cl->classid == classid)
212 return cl;
213 return NULL;
214}
215
216#ifdef CONFIG_NET_CLS_POLICE
217
218static struct cbq_class *
219cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
220{
221 struct cbq_class *cl, *new;
222
223 for (cl = this->tparent; cl; cl = cl->tparent)
224 if ((new = cl->defaults[TC_PRIO_BESTEFFORT]) != NULL && new != this)
225 return new;
226
227 return NULL;
228}
229
230#endif
231
232/* Classify packet. The procedure is pretty complicated, but
233 it allows us to combine link sharing and priority scheduling
234 transparently.
235
236 Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
237 so that it resolves to split nodes. Then packets are classified
238 by logical priority, or a more specific classifier may be attached
239 to the split node.
240 */
241
242static struct cbq_class *
243cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
244{
245 struct cbq_sched_data *q = qdisc_priv(sch);
246 struct cbq_class *head = &q->link;
247 struct cbq_class **defmap;
248 struct cbq_class *cl = NULL;
249 u32 prio = skb->priority;
250 struct tcf_result res;
251
252 /*
253 * Step 1. If skb->priority points to one of our classes, use it.
254 */
255 if (TC_H_MAJ(prio^sch->handle) == 0 &&
256 (cl = cbq_class_lookup(q, prio)) != NULL)
257 return cl;
258
Jamal Hadi Salim29f1df62006-01-08 22:35:55 -0800259 *qerr = NET_XMIT_BYPASS;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700260 for (;;) {
261 int result = 0;
262 defmap = head->defaults;
263
264 /*
265 * Step 2+n. Apply classifier.
266 */
267 if (!head->filter_list || (result = tc_classify(skb, head->filter_list, &res)) < 0)
268 goto fallback;
269
270 if ((cl = (void*)res.class) == NULL) {
271 if (TC_H_MAJ(res.classid))
272 cl = cbq_class_lookup(q, res.classid);
273 else if ((cl = defmap[res.classid&TC_PRIO_MAX]) == NULL)
274 cl = defmap[TC_PRIO_BESTEFFORT];
275
276 if (cl == NULL || cl->level >= head->level)
277 goto fallback;
278 }
279
280#ifdef CONFIG_NET_CLS_ACT
281 switch (result) {
282 case TC_ACT_QUEUED:
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900283 case TC_ACT_STOLEN:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700284 *qerr = NET_XMIT_SUCCESS;
285 case TC_ACT_SHOT:
286 return NULL;
287 }
288#elif defined(CONFIG_NET_CLS_POLICE)
289 switch (result) {
290 case TC_POLICE_RECLASSIFY:
291 return cbq_reclassify(skb, cl);
292 case TC_POLICE_SHOT:
293 return NULL;
294 default:
295 break;
296 }
297#endif
298 if (cl->level == 0)
299 return cl;
300
301 /*
302 * Step 3+n. If classifier selected a link sharing class,
303 * apply agency specific classifier.
304 * Repeat this procdure until we hit a leaf node.
305 */
306 head = cl;
307 }
308
309fallback:
310 cl = head;
311
312 /*
313 * Step 4. No success...
314 */
315 if (TC_H_MAJ(prio) == 0 &&
316 !(cl = head->defaults[prio&TC_PRIO_MAX]) &&
317 !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
318 return head;
319
320 return cl;
321}
322
323/*
324 A packet has just been enqueued on the empty class.
325 cbq_activate_class adds it to the tail of active class list
326 of its priority band.
327 */
328
329static __inline__ void cbq_activate_class(struct cbq_class *cl)
330{
331 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
332 int prio = cl->cpriority;
333 struct cbq_class *cl_tail;
334
335 cl_tail = q->active[prio];
336 q->active[prio] = cl;
337
338 if (cl_tail != NULL) {
339 cl->next_alive = cl_tail->next_alive;
340 cl_tail->next_alive = cl;
341 } else {
342 cl->next_alive = cl;
343 q->activemask |= (1<<prio);
344 }
345}
346
347/*
348 Unlink class from active chain.
349 Note that this same procedure is done directly in cbq_dequeue*
350 during round-robin procedure.
351 */
352
353static void cbq_deactivate_class(struct cbq_class *this)
354{
355 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
356 int prio = this->cpriority;
357 struct cbq_class *cl;
358 struct cbq_class *cl_prev = q->active[prio];
359
360 do {
361 cl = cl_prev->next_alive;
362 if (cl == this) {
363 cl_prev->next_alive = cl->next_alive;
364 cl->next_alive = NULL;
365
366 if (cl == q->active[prio]) {
367 q->active[prio] = cl_prev;
368 if (cl == q->active[prio]) {
369 q->active[prio] = NULL;
370 q->activemask &= ~(1<<prio);
371 return;
372 }
373 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700374 return;
375 }
376 } while ((cl_prev = cl) != q->active[prio]);
377}
378
379static void
380cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
381{
382 int toplevel = q->toplevel;
383
384 if (toplevel > cl->level && !(cl->q->flags&TCQ_F_THROTTLED)) {
385 psched_time_t now;
386 psched_tdiff_t incr;
387
388 PSCHED_GET_TIME(now);
389 incr = PSCHED_TDIFF(now, q->now_rt);
390 PSCHED_TADD2(q->now, incr, now);
391
392 do {
393 if (PSCHED_TLESS(cl->undertime, now)) {
394 q->toplevel = cl->level;
395 return;
396 }
397 } while ((cl=cl->borrow) != NULL && toplevel > cl->level);
398 }
399}
400
401static int
402cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
403{
404 struct cbq_sched_data *q = qdisc_priv(sch);
405 int len = skb->len;
406 int ret;
407 struct cbq_class *cl = cbq_classify(skb, sch, &ret);
408
409#ifdef CONFIG_NET_CLS_POLICE
410 q->rx_class = cl;
411#endif
412 if (cl == NULL) {
Jamal Hadi Salim29f1df62006-01-08 22:35:55 -0800413 if (ret == NET_XMIT_BYPASS)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700414 sch->qstats.drops++;
415 kfree_skb(skb);
416 return ret;
417 }
418
419#ifdef CONFIG_NET_CLS_POLICE
420 cl->q->__parent = sch;
421#endif
422 if ((ret = cl->q->enqueue(skb, cl->q)) == NET_XMIT_SUCCESS) {
423 sch->q.qlen++;
424 sch->bstats.packets++;
425 sch->bstats.bytes+=len;
426 cbq_mark_toplevel(q, cl);
427 if (!cl->next_alive)
428 cbq_activate_class(cl);
429 return ret;
430 }
431
432 sch->qstats.drops++;
433 cbq_mark_toplevel(q, cl);
434 cl->qstats.drops++;
435 return ret;
436}
437
438static int
439cbq_requeue(struct sk_buff *skb, struct Qdisc *sch)
440{
441 struct cbq_sched_data *q = qdisc_priv(sch);
442 struct cbq_class *cl;
443 int ret;
444
445 if ((cl = q->tx_class) == NULL) {
446 kfree_skb(skb);
447 sch->qstats.drops++;
448 return NET_XMIT_CN;
449 }
450 q->tx_class = NULL;
451
452 cbq_mark_toplevel(q, cl);
453
454#ifdef CONFIG_NET_CLS_POLICE
455 q->rx_class = cl;
456 cl->q->__parent = sch;
457#endif
458 if ((ret = cl->q->ops->requeue(skb, cl->q)) == 0) {
459 sch->q.qlen++;
460 sch->qstats.requeues++;
461 if (!cl->next_alive)
462 cbq_activate_class(cl);
463 return 0;
464 }
465 sch->qstats.drops++;
466 cl->qstats.drops++;
467 return ret;
468}
469
470/* Overlimit actions */
471
472/* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
473
474static void cbq_ovl_classic(struct cbq_class *cl)
475{
476 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
477 psched_tdiff_t delay = PSCHED_TDIFF(cl->undertime, q->now);
478
479 if (!cl->delayed) {
480 delay += cl->offtime;
481
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900482 /*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700483 Class goes to sleep, so that it will have no
484 chance to work avgidle. Let's forgive it 8)
485
486 BTW cbq-2.0 has a crap in this
487 place, apparently they forgot to shift it by cl->ewma_log.
488 */
489 if (cl->avgidle < 0)
490 delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
491 if (cl->avgidle < cl->minidle)
492 cl->avgidle = cl->minidle;
493 if (delay <= 0)
494 delay = 1;
495 PSCHED_TADD2(q->now, delay, cl->undertime);
496
497 cl->xstats.overactions++;
498 cl->delayed = 1;
499 }
500 if (q->wd_expires == 0 || q->wd_expires > delay)
501 q->wd_expires = delay;
502
503 /* Dirty work! We must schedule wakeups based on
504 real available rate, rather than leaf rate,
505 which may be tiny (even zero).
506 */
507 if (q->toplevel == TC_CBQ_MAXLEVEL) {
508 struct cbq_class *b;
509 psched_tdiff_t base_delay = q->wd_expires;
510
511 for (b = cl->borrow; b; b = b->borrow) {
512 delay = PSCHED_TDIFF(b->undertime, q->now);
513 if (delay < base_delay) {
514 if (delay <= 0)
515 delay = 1;
516 base_delay = delay;
517 }
518 }
519
520 q->wd_expires = base_delay;
521 }
522}
523
524/* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
525 they go overlimit
526 */
527
528static void cbq_ovl_rclassic(struct cbq_class *cl)
529{
530 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
531 struct cbq_class *this = cl;
532
533 do {
534 if (cl->level > q->toplevel) {
535 cl = NULL;
536 break;
537 }
538 } while ((cl = cl->borrow) != NULL);
539
540 if (cl == NULL)
541 cl = this;
542 cbq_ovl_classic(cl);
543}
544
545/* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
546
547static void cbq_ovl_delay(struct cbq_class *cl)
548{
549 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
550 psched_tdiff_t delay = PSCHED_TDIFF(cl->undertime, q->now);
551
552 if (!cl->delayed) {
553 unsigned long sched = jiffies;
554
555 delay += cl->offtime;
556 if (cl->avgidle < 0)
557 delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
558 if (cl->avgidle < cl->minidle)
559 cl->avgidle = cl->minidle;
560 PSCHED_TADD2(q->now, delay, cl->undertime);
561
562 if (delay > 0) {
563 sched += PSCHED_US2JIFFIE(delay) + cl->penalty;
564 cl->penalized = sched;
565 cl->cpriority = TC_CBQ_MAXPRIO;
566 q->pmask |= (1<<TC_CBQ_MAXPRIO);
567 if (del_timer(&q->delay_timer) &&
568 (long)(q->delay_timer.expires - sched) > 0)
569 q->delay_timer.expires = sched;
570 add_timer(&q->delay_timer);
571 cl->delayed = 1;
572 cl->xstats.overactions++;
573 return;
574 }
575 delay = 1;
576 }
577 if (q->wd_expires == 0 || q->wd_expires > delay)
578 q->wd_expires = delay;
579}
580
581/* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
582
583static void cbq_ovl_lowprio(struct cbq_class *cl)
584{
585 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
586
587 cl->penalized = jiffies + cl->penalty;
588
589 if (cl->cpriority != cl->priority2) {
590 cl->cpriority = cl->priority2;
591 q->pmask |= (1<<cl->cpriority);
592 cl->xstats.overactions++;
593 }
594 cbq_ovl_classic(cl);
595}
596
597/* TC_CBQ_OVL_DROP: penalize class by dropping */
598
599static void cbq_ovl_drop(struct cbq_class *cl)
600{
601 if (cl->q->ops->drop)
602 if (cl->q->ops->drop(cl->q))
603 cl->qdisc->q.qlen--;
604 cl->xstats.overactions++;
605 cbq_ovl_classic(cl);
606}
607
608static void cbq_watchdog(unsigned long arg)
609{
610 struct Qdisc *sch = (struct Qdisc*)arg;
611
612 sch->flags &= ~TCQ_F_THROTTLED;
613 netif_schedule(sch->dev);
614}
615
616static unsigned long cbq_undelay_prio(struct cbq_sched_data *q, int prio)
617{
618 struct cbq_class *cl;
619 struct cbq_class *cl_prev = q->active[prio];
620 unsigned long now = jiffies;
621 unsigned long sched = now;
622
623 if (cl_prev == NULL)
624 return now;
625
626 do {
627 cl = cl_prev->next_alive;
628 if ((long)(now - cl->penalized) > 0) {
629 cl_prev->next_alive = cl->next_alive;
630 cl->next_alive = NULL;
631 cl->cpriority = cl->priority;
632 cl->delayed = 0;
633 cbq_activate_class(cl);
634
635 if (cl == q->active[prio]) {
636 q->active[prio] = cl_prev;
637 if (cl == q->active[prio]) {
638 q->active[prio] = NULL;
639 return 0;
640 }
641 }
642
643 cl = cl_prev->next_alive;
644 } else if ((long)(sched - cl->penalized) > 0)
645 sched = cl->penalized;
646 } while ((cl_prev = cl) != q->active[prio]);
647
648 return (long)(sched - now);
649}
650
651static void cbq_undelay(unsigned long arg)
652{
653 struct Qdisc *sch = (struct Qdisc*)arg;
654 struct cbq_sched_data *q = qdisc_priv(sch);
655 long delay = 0;
656 unsigned pmask;
657
658 pmask = q->pmask;
659 q->pmask = 0;
660
661 while (pmask) {
662 int prio = ffz(~pmask);
663 long tmp;
664
665 pmask &= ~(1<<prio);
666
667 tmp = cbq_undelay_prio(q, prio);
668 if (tmp > 0) {
669 q->pmask |= 1<<prio;
670 if (tmp < delay || delay == 0)
671 delay = tmp;
672 }
673 }
674
675 if (delay) {
676 q->delay_timer.expires = jiffies + delay;
677 add_timer(&q->delay_timer);
678 }
679
680 sch->flags &= ~TCQ_F_THROTTLED;
681 netif_schedule(sch->dev);
682}
683
684
685#ifdef CONFIG_NET_CLS_POLICE
686
687static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
688{
689 int len = skb->len;
690 struct Qdisc *sch = child->__parent;
691 struct cbq_sched_data *q = qdisc_priv(sch);
692 struct cbq_class *cl = q->rx_class;
693
694 q->rx_class = NULL;
695
696 if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
697
698 cbq_mark_toplevel(q, cl);
699
700 q->rx_class = cl;
701 cl->q->__parent = sch;
702
703 if (cl->q->enqueue(skb, cl->q) == 0) {
704 sch->q.qlen++;
705 sch->bstats.packets++;
706 sch->bstats.bytes+=len;
707 if (!cl->next_alive)
708 cbq_activate_class(cl);
709 return 0;
710 }
711 sch->qstats.drops++;
712 return 0;
713 }
714
715 sch->qstats.drops++;
716 return -1;
717}
718#endif
719
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900720/*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700721 It is mission critical procedure.
722
723 We "regenerate" toplevel cutoff, if transmitting class
724 has backlog and it is not regulated. It is not part of
725 original CBQ description, but looks more reasonable.
726 Probably, it is wrong. This question needs further investigation.
727*/
728
729static __inline__ void
730cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
731 struct cbq_class *borrowed)
732{
733 if (cl && q->toplevel >= borrowed->level) {
734 if (cl->q->q.qlen > 1) {
735 do {
736 if (PSCHED_IS_PASTPERFECT(borrowed->undertime)) {
737 q->toplevel = borrowed->level;
738 return;
739 }
740 } while ((borrowed=borrowed->borrow) != NULL);
741 }
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900742#if 0
Linus Torvalds1da177e2005-04-16 15:20:36 -0700743 /* It is not necessary now. Uncommenting it
744 will save CPU cycles, but decrease fairness.
745 */
746 q->toplevel = TC_CBQ_MAXLEVEL;
747#endif
748 }
749}
750
751static void
752cbq_update(struct cbq_sched_data *q)
753{
754 struct cbq_class *this = q->tx_class;
755 struct cbq_class *cl = this;
756 int len = q->tx_len;
757
758 q->tx_class = NULL;
759
760 for ( ; cl; cl = cl->share) {
761 long avgidle = cl->avgidle;
762 long idle;
763
764 cl->bstats.packets++;
765 cl->bstats.bytes += len;
766
767 /*
768 (now - last) is total time between packet right edges.
769 (last_pktlen/rate) is "virtual" busy time, so that
770
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900771 idle = (now - last) - last_pktlen/rate
Linus Torvalds1da177e2005-04-16 15:20:36 -0700772 */
773
774 idle = PSCHED_TDIFF(q->now, cl->last);
775 if ((unsigned long)idle > 128*1024*1024) {
776 avgidle = cl->maxidle;
777 } else {
778 idle -= L2T(cl, len);
779
780 /* true_avgidle := (1-W)*true_avgidle + W*idle,
781 where W=2^{-ewma_log}. But cl->avgidle is scaled:
782 cl->avgidle == true_avgidle/W,
783 hence:
784 */
785 avgidle += idle - (avgidle>>cl->ewma_log);
786 }
787
788 if (avgidle <= 0) {
789 /* Overlimit or at-limit */
790
791 if (avgidle < cl->minidle)
792 avgidle = cl->minidle;
793
794 cl->avgidle = avgidle;
795
796 /* Calculate expected time, when this class
797 will be allowed to send.
798 It will occur, when:
799 (1-W)*true_avgidle + W*delay = 0, i.e.
800 idle = (1/W - 1)*(-true_avgidle)
801 or
802 idle = (1 - W)*(-cl->avgidle);
803 */
804 idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
805
806 /*
807 That is not all.
808 To maintain the rate allocated to the class,
809 we add to undertime virtual clock,
810 necessary to complete transmitted packet.
811 (len/phys_bandwidth has been already passed
812 to the moment of cbq_update)
813 */
814
815 idle -= L2T(&q->link, len);
816 idle += L2T(cl, len);
817
818 PSCHED_AUDIT_TDIFF(idle);
819
820 PSCHED_TADD2(q->now, idle, cl->undertime);
821 } else {
822 /* Underlimit */
823
824 PSCHED_SET_PASTPERFECT(cl->undertime);
825 if (avgidle > cl->maxidle)
826 cl->avgidle = cl->maxidle;
827 else
828 cl->avgidle = avgidle;
829 }
830 cl->last = q->now;
831 }
832
833 cbq_update_toplevel(q, this, q->tx_borrowed);
834}
835
836static __inline__ struct cbq_class *
837cbq_under_limit(struct cbq_class *cl)
838{
839 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
840 struct cbq_class *this_cl = cl;
841
842 if (cl->tparent == NULL)
843 return cl;
844
845 if (PSCHED_IS_PASTPERFECT(cl->undertime) ||
846 !PSCHED_TLESS(q->now, cl->undertime)) {
847 cl->delayed = 0;
848 return cl;
849 }
850
851 do {
852 /* It is very suspicious place. Now overlimit
853 action is generated for not bounded classes
854 only if link is completely congested.
855 Though it is in agree with ancestor-only paradigm,
856 it looks very stupid. Particularly,
857 it means that this chunk of code will either
858 never be called or result in strong amplification
859 of burstiness. Dangerous, silly, and, however,
860 no another solution exists.
861 */
862 if ((cl = cl->borrow) == NULL) {
863 this_cl->qstats.overlimits++;
864 this_cl->overlimit(this_cl);
865 return NULL;
866 }
867 if (cl->level > q->toplevel)
868 return NULL;
869 } while (!PSCHED_IS_PASTPERFECT(cl->undertime) &&
870 PSCHED_TLESS(q->now, cl->undertime));
871
872 cl->delayed = 0;
873 return cl;
874}
875
876static __inline__ struct sk_buff *
877cbq_dequeue_prio(struct Qdisc *sch, int prio)
878{
879 struct cbq_sched_data *q = qdisc_priv(sch);
880 struct cbq_class *cl_tail, *cl_prev, *cl;
881 struct sk_buff *skb;
882 int deficit;
883
884 cl_tail = cl_prev = q->active[prio];
885 cl = cl_prev->next_alive;
886
887 do {
888 deficit = 0;
889
890 /* Start round */
891 do {
892 struct cbq_class *borrow = cl;
893
894 if (cl->q->q.qlen &&
895 (borrow = cbq_under_limit(cl)) == NULL)
896 goto skip_class;
897
898 if (cl->deficit <= 0) {
899 /* Class exhausted its allotment per
900 this round. Switch to the next one.
901 */
902 deficit = 1;
903 cl->deficit += cl->quantum;
904 goto next_class;
905 }
906
907 skb = cl->q->dequeue(cl->q);
908
909 /* Class did not give us any skb :-(
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900910 It could occur even if cl->q->q.qlen != 0
Linus Torvalds1da177e2005-04-16 15:20:36 -0700911 f.e. if cl->q == "tbf"
912 */
913 if (skb == NULL)
914 goto skip_class;
915
916 cl->deficit -= skb->len;
917 q->tx_class = cl;
918 q->tx_borrowed = borrow;
919 if (borrow != cl) {
920#ifndef CBQ_XSTATS_BORROWS_BYTES
921 borrow->xstats.borrows++;
922 cl->xstats.borrows++;
923#else
924 borrow->xstats.borrows += skb->len;
925 cl->xstats.borrows += skb->len;
926#endif
927 }
928 q->tx_len = skb->len;
929
930 if (cl->deficit <= 0) {
931 q->active[prio] = cl;
932 cl = cl->next_alive;
933 cl->deficit += cl->quantum;
934 }
935 return skb;
936
937skip_class:
938 if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
939 /* Class is empty or penalized.
940 Unlink it from active chain.
941 */
942 cl_prev->next_alive = cl->next_alive;
943 cl->next_alive = NULL;
944
945 /* Did cl_tail point to it? */
946 if (cl == cl_tail) {
947 /* Repair it! */
948 cl_tail = cl_prev;
949
950 /* Was it the last class in this band? */
951 if (cl == cl_tail) {
952 /* Kill the band! */
953 q->active[prio] = NULL;
954 q->activemask &= ~(1<<prio);
955 if (cl->q->q.qlen)
956 cbq_activate_class(cl);
957 return NULL;
958 }
959
960 q->active[prio] = cl_tail;
961 }
962 if (cl->q->q.qlen)
963 cbq_activate_class(cl);
964
965 cl = cl_prev;
966 }
967
968next_class:
969 cl_prev = cl;
970 cl = cl->next_alive;
971 } while (cl_prev != cl_tail);
972 } while (deficit);
973
974 q->active[prio] = cl_prev;
975
976 return NULL;
977}
978
979static __inline__ struct sk_buff *
980cbq_dequeue_1(struct Qdisc *sch)
981{
982 struct cbq_sched_data *q = qdisc_priv(sch);
983 struct sk_buff *skb;
984 unsigned activemask;
985
986 activemask = q->activemask&0xFF;
987 while (activemask) {
988 int prio = ffz(~activemask);
989 activemask &= ~(1<<prio);
990 skb = cbq_dequeue_prio(sch, prio);
991 if (skb)
992 return skb;
993 }
994 return NULL;
995}
996
997static struct sk_buff *
998cbq_dequeue(struct Qdisc *sch)
999{
1000 struct sk_buff *skb;
1001 struct cbq_sched_data *q = qdisc_priv(sch);
1002 psched_time_t now;
1003 psched_tdiff_t incr;
1004
1005 PSCHED_GET_TIME(now);
1006 incr = PSCHED_TDIFF(now, q->now_rt);
1007
1008 if (q->tx_class) {
1009 psched_tdiff_t incr2;
1010 /* Time integrator. We calculate EOS time
1011 by adding expected packet transmission time.
1012 If real time is greater, we warp artificial clock,
1013 so that:
1014
1015 cbq_time = max(real_time, work);
1016 */
1017 incr2 = L2T(&q->link, q->tx_len);
1018 PSCHED_TADD(q->now, incr2);
1019 cbq_update(q);
1020 if ((incr -= incr2) < 0)
1021 incr = 0;
1022 }
1023 PSCHED_TADD(q->now, incr);
1024 q->now_rt = now;
1025
1026 for (;;) {
1027 q->wd_expires = 0;
1028
1029 skb = cbq_dequeue_1(sch);
1030 if (skb) {
1031 sch->q.qlen--;
1032 sch->flags &= ~TCQ_F_THROTTLED;
1033 return skb;
1034 }
1035
1036 /* All the classes are overlimit.
1037
1038 It is possible, if:
1039
1040 1. Scheduler is empty.
1041 2. Toplevel cutoff inhibited borrowing.
1042 3. Root class is overlimit.
1043
1044 Reset 2d and 3d conditions and retry.
1045
1046 Note, that NS and cbq-2.0 are buggy, peeking
1047 an arbitrary class is appropriate for ancestor-only
1048 sharing, but not for toplevel algorithm.
1049
1050 Our version is better, but slower, because it requires
1051 two passes, but it is unavoidable with top-level sharing.
1052 */
1053
1054 if (q->toplevel == TC_CBQ_MAXLEVEL &&
1055 PSCHED_IS_PASTPERFECT(q->link.undertime))
1056 break;
1057
1058 q->toplevel = TC_CBQ_MAXLEVEL;
1059 PSCHED_SET_PASTPERFECT(q->link.undertime);
1060 }
1061
1062 /* No packets in scheduler or nobody wants to give them to us :-(
1063 Sigh... start watchdog timer in the last case. */
1064
1065 if (sch->q.qlen) {
1066 sch->qstats.overlimits++;
1067 if (q->wd_expires) {
1068 long delay = PSCHED_US2JIFFIE(q->wd_expires);
1069 if (delay <= 0)
1070 delay = 1;
1071 mod_timer(&q->wd_timer, jiffies + delay);
1072 sch->flags |= TCQ_F_THROTTLED;
1073 }
1074 }
1075 return NULL;
1076}
1077
1078/* CBQ class maintanance routines */
1079
1080static void cbq_adjust_levels(struct cbq_class *this)
1081{
1082 if (this == NULL)
1083 return;
1084
1085 do {
1086 int level = 0;
1087 struct cbq_class *cl;
1088
1089 if ((cl = this->children) != NULL) {
1090 do {
1091 if (cl->level > level)
1092 level = cl->level;
1093 } while ((cl = cl->sibling) != this->children);
1094 }
1095 this->level = level+1;
1096 } while ((this = this->tparent) != NULL);
1097}
1098
1099static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
1100{
1101 struct cbq_class *cl;
1102 unsigned h;
1103
1104 if (q->quanta[prio] == 0)
1105 return;
1106
1107 for (h=0; h<16; h++) {
1108 for (cl = q->classes[h]; cl; cl = cl->next) {
1109 /* BUGGGG... Beware! This expression suffer of
1110 arithmetic overflows!
1111 */
1112 if (cl->priority == prio) {
1113 cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
1114 q->quanta[prio];
1115 }
1116 if (cl->quantum <= 0 || cl->quantum>32*cl->qdisc->dev->mtu) {
1117 printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->classid, cl->quantum);
1118 cl->quantum = cl->qdisc->dev->mtu/2 + 1;
1119 }
1120 }
1121 }
1122}
1123
1124static void cbq_sync_defmap(struct cbq_class *cl)
1125{
1126 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1127 struct cbq_class *split = cl->split;
1128 unsigned h;
1129 int i;
1130
1131 if (split == NULL)
1132 return;
1133
1134 for (i=0; i<=TC_PRIO_MAX; i++) {
1135 if (split->defaults[i] == cl && !(cl->defmap&(1<<i)))
1136 split->defaults[i] = NULL;
1137 }
1138
1139 for (i=0; i<=TC_PRIO_MAX; i++) {
1140 int level = split->level;
1141
1142 if (split->defaults[i])
1143 continue;
1144
1145 for (h=0; h<16; h++) {
1146 struct cbq_class *c;
1147
1148 for (c = q->classes[h]; c; c = c->next) {
1149 if (c->split == split && c->level < level &&
1150 c->defmap&(1<<i)) {
1151 split->defaults[i] = c;
1152 level = c->level;
1153 }
1154 }
1155 }
1156 }
1157}
1158
1159static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask)
1160{
1161 struct cbq_class *split = NULL;
1162
1163 if (splitid == 0) {
1164 if ((split = cl->split) == NULL)
1165 return;
1166 splitid = split->classid;
1167 }
1168
1169 if (split == NULL || split->classid != splitid) {
1170 for (split = cl->tparent; split; split = split->tparent)
1171 if (split->classid == splitid)
1172 break;
1173 }
1174
1175 if (split == NULL)
1176 return;
1177
1178 if (cl->split != split) {
1179 cl->defmap = 0;
1180 cbq_sync_defmap(cl);
1181 cl->split = split;
1182 cl->defmap = def&mask;
1183 } else
1184 cl->defmap = (cl->defmap&~mask)|(def&mask);
1185
1186 cbq_sync_defmap(cl);
1187}
1188
1189static void cbq_unlink_class(struct cbq_class *this)
1190{
1191 struct cbq_class *cl, **clp;
1192 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1193
1194 for (clp = &q->classes[cbq_hash(this->classid)]; (cl = *clp) != NULL; clp = &cl->next) {
1195 if (cl == this) {
1196 *clp = cl->next;
1197 cl->next = NULL;
1198 break;
1199 }
1200 }
1201
1202 if (this->tparent) {
1203 clp=&this->sibling;
1204 cl = *clp;
1205 do {
1206 if (cl == this) {
1207 *clp = cl->sibling;
1208 break;
1209 }
1210 clp = &cl->sibling;
1211 } while ((cl = *clp) != this->sibling);
1212
1213 if (this->tparent->children == this) {
1214 this->tparent->children = this->sibling;
1215 if (this->sibling == this)
1216 this->tparent->children = NULL;
1217 }
1218 } else {
1219 BUG_TRAP(this->sibling == this);
1220 }
1221}
1222
1223static void cbq_link_class(struct cbq_class *this)
1224{
1225 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1226 unsigned h = cbq_hash(this->classid);
1227 struct cbq_class *parent = this->tparent;
1228
1229 this->sibling = this;
1230 this->next = q->classes[h];
1231 q->classes[h] = this;
1232
1233 if (parent == NULL)
1234 return;
1235
1236 if (parent->children == NULL) {
1237 parent->children = this;
1238 } else {
1239 this->sibling = parent->children->sibling;
1240 parent->children->sibling = this;
1241 }
1242}
1243
1244static unsigned int cbq_drop(struct Qdisc* sch)
1245{
1246 struct cbq_sched_data *q = qdisc_priv(sch);
1247 struct cbq_class *cl, *cl_head;
1248 int prio;
1249 unsigned int len;
1250
1251 for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
1252 if ((cl_head = q->active[prio]) == NULL)
1253 continue;
1254
1255 cl = cl_head;
1256 do {
1257 if (cl->q->ops->drop && (len = cl->q->ops->drop(cl->q))) {
1258 sch->q.qlen--;
Jarek Poplawskia37ef2e2006-12-08 00:25:55 -08001259 if (!cl->q->q.qlen)
1260 cbq_deactivate_class(cl);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001261 return len;
1262 }
1263 } while ((cl = cl->next_alive) != cl_head);
1264 }
1265 return 0;
1266}
1267
1268static void
1269cbq_reset(struct Qdisc* sch)
1270{
1271 struct cbq_sched_data *q = qdisc_priv(sch);
1272 struct cbq_class *cl;
1273 int prio;
1274 unsigned h;
1275
1276 q->activemask = 0;
1277 q->pmask = 0;
1278 q->tx_class = NULL;
1279 q->tx_borrowed = NULL;
1280 del_timer(&q->wd_timer);
1281 del_timer(&q->delay_timer);
1282 q->toplevel = TC_CBQ_MAXLEVEL;
1283 PSCHED_GET_TIME(q->now);
1284 q->now_rt = q->now;
1285
1286 for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
1287 q->active[prio] = NULL;
1288
1289 for (h = 0; h < 16; h++) {
1290 for (cl = q->classes[h]; cl; cl = cl->next) {
1291 qdisc_reset(cl->q);
1292
1293 cl->next_alive = NULL;
1294 PSCHED_SET_PASTPERFECT(cl->undertime);
1295 cl->avgidle = cl->maxidle;
1296 cl->deficit = cl->quantum;
1297 cl->cpriority = cl->priority;
1298 }
1299 }
1300 sch->q.qlen = 0;
1301}
1302
1303
1304static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
1305{
1306 if (lss->change&TCF_CBQ_LSS_FLAGS) {
1307 cl->share = (lss->flags&TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
1308 cl->borrow = (lss->flags&TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
1309 }
1310 if (lss->change&TCF_CBQ_LSS_EWMA)
1311 cl->ewma_log = lss->ewma_log;
1312 if (lss->change&TCF_CBQ_LSS_AVPKT)
1313 cl->avpkt = lss->avpkt;
1314 if (lss->change&TCF_CBQ_LSS_MINIDLE)
1315 cl->minidle = -(long)lss->minidle;
1316 if (lss->change&TCF_CBQ_LSS_MAXIDLE) {
1317 cl->maxidle = lss->maxidle;
1318 cl->avgidle = lss->maxidle;
1319 }
1320 if (lss->change&TCF_CBQ_LSS_OFFTIME)
1321 cl->offtime = lss->offtime;
1322 return 0;
1323}
1324
1325static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl)
1326{
1327 q->nclasses[cl->priority]--;
1328 q->quanta[cl->priority] -= cl->weight;
1329 cbq_normalize_quanta(q, cl->priority);
1330}
1331
1332static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl)
1333{
1334 q->nclasses[cl->priority]++;
1335 q->quanta[cl->priority] += cl->weight;
1336 cbq_normalize_quanta(q, cl->priority);
1337}
1338
1339static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
1340{
1341 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1342
1343 if (wrr->allot)
1344 cl->allot = wrr->allot;
1345 if (wrr->weight)
1346 cl->weight = wrr->weight;
1347 if (wrr->priority) {
1348 cl->priority = wrr->priority-1;
1349 cl->cpriority = cl->priority;
1350 if (cl->priority >= cl->priority2)
1351 cl->priority2 = TC_CBQ_MAXPRIO-1;
1352 }
1353
1354 cbq_addprio(q, cl);
1355 return 0;
1356}
1357
1358static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
1359{
1360 switch (ovl->strategy) {
1361 case TC_CBQ_OVL_CLASSIC:
1362 cl->overlimit = cbq_ovl_classic;
1363 break;
1364 case TC_CBQ_OVL_DELAY:
1365 cl->overlimit = cbq_ovl_delay;
1366 break;
1367 case TC_CBQ_OVL_LOWPRIO:
1368 if (ovl->priority2-1 >= TC_CBQ_MAXPRIO ||
1369 ovl->priority2-1 <= cl->priority)
1370 return -EINVAL;
1371 cl->priority2 = ovl->priority2-1;
1372 cl->overlimit = cbq_ovl_lowprio;
1373 break;
1374 case TC_CBQ_OVL_DROP:
1375 cl->overlimit = cbq_ovl_drop;
1376 break;
1377 case TC_CBQ_OVL_RCLASSIC:
1378 cl->overlimit = cbq_ovl_rclassic;
1379 break;
1380 default:
1381 return -EINVAL;
1382 }
1383 cl->penalty = (ovl->penalty*HZ)/1000;
1384 return 0;
1385}
1386
1387#ifdef CONFIG_NET_CLS_POLICE
1388static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p)
1389{
1390 cl->police = p->police;
1391
1392 if (cl->q->handle) {
1393 if (p->police == TC_POLICE_RECLASSIFY)
1394 cl->q->reshape_fail = cbq_reshape_fail;
1395 else
1396 cl->q->reshape_fail = NULL;
1397 }
1398 return 0;
1399}
1400#endif
1401
1402static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
1403{
1404 cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
1405 return 0;
1406}
1407
1408static int cbq_init(struct Qdisc *sch, struct rtattr *opt)
1409{
1410 struct cbq_sched_data *q = qdisc_priv(sch);
1411 struct rtattr *tb[TCA_CBQ_MAX];
1412 struct tc_ratespec *r;
1413
1414 if (rtattr_parse_nested(tb, TCA_CBQ_MAX, opt) < 0 ||
1415 tb[TCA_CBQ_RTAB-1] == NULL || tb[TCA_CBQ_RATE-1] == NULL ||
1416 RTA_PAYLOAD(tb[TCA_CBQ_RATE-1]) < sizeof(struct tc_ratespec))
1417 return -EINVAL;
1418
1419 if (tb[TCA_CBQ_LSSOPT-1] &&
1420 RTA_PAYLOAD(tb[TCA_CBQ_LSSOPT-1]) < sizeof(struct tc_cbq_lssopt))
1421 return -EINVAL;
1422
1423 r = RTA_DATA(tb[TCA_CBQ_RATE-1]);
1424
1425 if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB-1])) == NULL)
1426 return -EINVAL;
1427
1428 q->link.refcnt = 1;
1429 q->link.sibling = &q->link;
1430 q->link.classid = sch->handle;
1431 q->link.qdisc = sch;
Patrick McHardy9f9afec2006-11-29 17:35:18 -08001432 if (!(q->link.q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
1433 sch->handle)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001434 q->link.q = &noop_qdisc;
1435
1436 q->link.priority = TC_CBQ_MAXPRIO-1;
1437 q->link.priority2 = TC_CBQ_MAXPRIO-1;
1438 q->link.cpriority = TC_CBQ_MAXPRIO-1;
1439 q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
1440 q->link.overlimit = cbq_ovl_classic;
1441 q->link.allot = psched_mtu(sch->dev);
1442 q->link.quantum = q->link.allot;
1443 q->link.weight = q->link.R_tab->rate.rate;
1444
1445 q->link.ewma_log = TC_CBQ_DEF_EWMA;
1446 q->link.avpkt = q->link.allot/2;
1447 q->link.minidle = -0x7FFFFFFF;
1448 q->link.stats_lock = &sch->dev->queue_lock;
1449
1450 init_timer(&q->wd_timer);
1451 q->wd_timer.data = (unsigned long)sch;
1452 q->wd_timer.function = cbq_watchdog;
1453 init_timer(&q->delay_timer);
1454 q->delay_timer.data = (unsigned long)sch;
1455 q->delay_timer.function = cbq_undelay;
1456 q->toplevel = TC_CBQ_MAXLEVEL;
1457 PSCHED_GET_TIME(q->now);
1458 q->now_rt = q->now;
1459
1460 cbq_link_class(&q->link);
1461
1462 if (tb[TCA_CBQ_LSSOPT-1])
1463 cbq_set_lss(&q->link, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1464
1465 cbq_addprio(q, &q->link);
1466 return 0;
1467}
1468
1469static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
1470{
1471 unsigned char *b = skb->tail;
1472
1473 RTA_PUT(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate);
1474 return skb->len;
1475
1476rtattr_failure:
1477 skb_trim(skb, b - skb->data);
1478 return -1;
1479}
1480
1481static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
1482{
1483 unsigned char *b = skb->tail;
1484 struct tc_cbq_lssopt opt;
1485
1486 opt.flags = 0;
1487 if (cl->borrow == NULL)
1488 opt.flags |= TCF_CBQ_LSS_BOUNDED;
1489 if (cl->share == NULL)
1490 opt.flags |= TCF_CBQ_LSS_ISOLATED;
1491 opt.ewma_log = cl->ewma_log;
1492 opt.level = cl->level;
1493 opt.avpkt = cl->avpkt;
1494 opt.maxidle = cl->maxidle;
1495 opt.minidle = (u32)(-cl->minidle);
1496 opt.offtime = cl->offtime;
1497 opt.change = ~0;
1498 RTA_PUT(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt);
1499 return skb->len;
1500
1501rtattr_failure:
1502 skb_trim(skb, b - skb->data);
1503 return -1;
1504}
1505
1506static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
1507{
1508 unsigned char *b = skb->tail;
1509 struct tc_cbq_wrropt opt;
1510
1511 opt.flags = 0;
1512 opt.allot = cl->allot;
1513 opt.priority = cl->priority+1;
1514 opt.cpriority = cl->cpriority+1;
1515 opt.weight = cl->weight;
1516 RTA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt);
1517 return skb->len;
1518
1519rtattr_failure:
1520 skb_trim(skb, b - skb->data);
1521 return -1;
1522}
1523
1524static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
1525{
1526 unsigned char *b = skb->tail;
1527 struct tc_cbq_ovl opt;
1528
1529 opt.strategy = cl->ovl_strategy;
1530 opt.priority2 = cl->priority2+1;
Patrick McHardy8a470772005-06-28 12:56:45 -07001531 opt.pad = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001532 opt.penalty = (cl->penalty*1000)/HZ;
1533 RTA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
1534 return skb->len;
1535
1536rtattr_failure:
1537 skb_trim(skb, b - skb->data);
1538 return -1;
1539}
1540
1541static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
1542{
1543 unsigned char *b = skb->tail;
1544 struct tc_cbq_fopt opt;
1545
1546 if (cl->split || cl->defmap) {
1547 opt.split = cl->split ? cl->split->classid : 0;
1548 opt.defmap = cl->defmap;
1549 opt.defchange = ~0;
1550 RTA_PUT(skb, TCA_CBQ_FOPT, sizeof(opt), &opt);
1551 }
1552 return skb->len;
1553
1554rtattr_failure:
1555 skb_trim(skb, b - skb->data);
1556 return -1;
1557}
1558
1559#ifdef CONFIG_NET_CLS_POLICE
1560static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
1561{
1562 unsigned char *b = skb->tail;
1563 struct tc_cbq_police opt;
1564
1565 if (cl->police) {
1566 opt.police = cl->police;
Patrick McHardy9ef1d4c2005-06-28 12:55:30 -07001567 opt.__res1 = 0;
1568 opt.__res2 = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001569 RTA_PUT(skb, TCA_CBQ_POLICE, sizeof(opt), &opt);
1570 }
1571 return skb->len;
1572
1573rtattr_failure:
1574 skb_trim(skb, b - skb->data);
1575 return -1;
1576}
1577#endif
1578
1579static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
1580{
1581 if (cbq_dump_lss(skb, cl) < 0 ||
1582 cbq_dump_rate(skb, cl) < 0 ||
1583 cbq_dump_wrr(skb, cl) < 0 ||
1584 cbq_dump_ovl(skb, cl) < 0 ||
1585#ifdef CONFIG_NET_CLS_POLICE
1586 cbq_dump_police(skb, cl) < 0 ||
1587#endif
1588 cbq_dump_fopt(skb, cl) < 0)
1589 return -1;
1590 return 0;
1591}
1592
1593static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
1594{
1595 struct cbq_sched_data *q = qdisc_priv(sch);
1596 unsigned char *b = skb->tail;
1597 struct rtattr *rta;
1598
1599 rta = (struct rtattr*)b;
1600 RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1601 if (cbq_dump_attr(skb, &q->link) < 0)
1602 goto rtattr_failure;
1603 rta->rta_len = skb->tail - b;
1604 return skb->len;
1605
1606rtattr_failure:
1607 skb_trim(skb, b - skb->data);
1608 return -1;
1609}
1610
1611static int
1612cbq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
1613{
1614 struct cbq_sched_data *q = qdisc_priv(sch);
1615
1616 q->link.xstats.avgidle = q->link.avgidle;
1617 return gnet_stats_copy_app(d, &q->link.xstats, sizeof(q->link.xstats));
1618}
1619
1620static int
1621cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1622 struct sk_buff *skb, struct tcmsg *tcm)
1623{
1624 struct cbq_class *cl = (struct cbq_class*)arg;
1625 unsigned char *b = skb->tail;
1626 struct rtattr *rta;
1627
1628 if (cl->tparent)
1629 tcm->tcm_parent = cl->tparent->classid;
1630 else
1631 tcm->tcm_parent = TC_H_ROOT;
1632 tcm->tcm_handle = cl->classid;
1633 tcm->tcm_info = cl->q->handle;
1634
1635 rta = (struct rtattr*)b;
1636 RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1637 if (cbq_dump_attr(skb, cl) < 0)
1638 goto rtattr_failure;
1639 rta->rta_len = skb->tail - b;
1640 return skb->len;
1641
1642rtattr_failure:
1643 skb_trim(skb, b - skb->data);
1644 return -1;
1645}
1646
1647static int
1648cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
1649 struct gnet_dump *d)
1650{
1651 struct cbq_sched_data *q = qdisc_priv(sch);
1652 struct cbq_class *cl = (struct cbq_class*)arg;
1653
1654 cl->qstats.qlen = cl->q->q.qlen;
1655 cl->xstats.avgidle = cl->avgidle;
1656 cl->xstats.undertime = 0;
1657
1658 if (!PSCHED_IS_PASTPERFECT(cl->undertime))
1659 cl->xstats.undertime = PSCHED_TDIFF(cl->undertime, q->now);
1660
1661 if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
1662#ifdef CONFIG_NET_ESTIMATOR
1663 gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
1664#endif
1665 gnet_stats_copy_queue(d, &cl->qstats) < 0)
1666 return -1;
1667
1668 return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1669}
1670
1671static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1672 struct Qdisc **old)
1673{
1674 struct cbq_class *cl = (struct cbq_class*)arg;
1675
1676 if (cl) {
1677 if (new == NULL) {
Patrick McHardy9f9afec2006-11-29 17:35:18 -08001678 if ((new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
1679 cl->classid)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001680 return -ENOBUFS;
1681 } else {
1682#ifdef CONFIG_NET_CLS_POLICE
1683 if (cl->police == TC_POLICE_RECLASSIFY)
1684 new->reshape_fail = cbq_reshape_fail;
1685#endif
1686 }
1687 sch_tree_lock(sch);
Jarek Poplawskia37ef2e2006-12-08 00:25:55 -08001688 *old = xchg(&cl->q, new);
Patrick McHardy5e50da02006-11-29 17:36:20 -08001689 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001690 qdisc_reset(*old);
1691 sch_tree_unlock(sch);
1692
1693 return 0;
1694 }
1695 return -ENOENT;
1696}
1697
1698static struct Qdisc *
1699cbq_leaf(struct Qdisc *sch, unsigned long arg)
1700{
1701 struct cbq_class *cl = (struct cbq_class*)arg;
1702
1703 return cl ? cl->q : NULL;
1704}
1705
Jarek Poplawskia37ef2e2006-12-08 00:25:55 -08001706static void cbq_qlen_notify(struct Qdisc *sch, unsigned long arg)
1707{
1708 struct cbq_class *cl = (struct cbq_class *)arg;
1709
1710 if (cl->q->q.qlen == 0)
1711 cbq_deactivate_class(cl);
1712}
1713
Linus Torvalds1da177e2005-04-16 15:20:36 -07001714static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
1715{
1716 struct cbq_sched_data *q = qdisc_priv(sch);
1717 struct cbq_class *cl = cbq_class_lookup(q, classid);
1718
1719 if (cl) {
1720 cl->refcnt++;
1721 return (unsigned long)cl;
1722 }
1723 return 0;
1724}
1725
1726static void cbq_destroy_filters(struct cbq_class *cl)
1727{
1728 struct tcf_proto *tp;
1729
1730 while ((tp = cl->filter_list) != NULL) {
1731 cl->filter_list = tp->next;
1732 tcf_destroy(tp);
1733 }
1734}
1735
1736static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
1737{
1738 struct cbq_sched_data *q = qdisc_priv(sch);
1739
1740 BUG_TRAP(!cl->filters);
1741
1742 cbq_destroy_filters(cl);
1743 qdisc_destroy(cl->q);
1744 qdisc_put_rtab(cl->R_tab);
1745#ifdef CONFIG_NET_ESTIMATOR
1746 gen_kill_estimator(&cl->bstats, &cl->rate_est);
1747#endif
1748 if (cl != &q->link)
1749 kfree(cl);
1750}
1751
1752static void
1753cbq_destroy(struct Qdisc* sch)
1754{
1755 struct cbq_sched_data *q = qdisc_priv(sch);
1756 struct cbq_class *cl;
1757 unsigned h;
1758
1759#ifdef CONFIG_NET_CLS_POLICE
1760 q->rx_class = NULL;
1761#endif
1762 /*
1763 * Filters must be destroyed first because we don't destroy the
1764 * classes from root to leafs which means that filters can still
1765 * be bound to classes which have been destroyed already. --TGR '04
1766 */
1767 for (h = 0; h < 16; h++)
1768 for (cl = q->classes[h]; cl; cl = cl->next)
1769 cbq_destroy_filters(cl);
1770
1771 for (h = 0; h < 16; h++) {
1772 struct cbq_class *next;
1773
1774 for (cl = q->classes[h]; cl; cl = next) {
1775 next = cl->next;
1776 cbq_destroy_class(sch, cl);
1777 }
1778 }
1779}
1780
1781static void cbq_put(struct Qdisc *sch, unsigned long arg)
1782{
1783 struct cbq_class *cl = (struct cbq_class*)arg;
1784
1785 if (--cl->refcnt == 0) {
1786#ifdef CONFIG_NET_CLS_POLICE
1787 struct cbq_sched_data *q = qdisc_priv(sch);
1788
1789 spin_lock_bh(&sch->dev->queue_lock);
1790 if (q->rx_class == cl)
1791 q->rx_class = NULL;
1792 spin_unlock_bh(&sch->dev->queue_lock);
1793#endif
1794
1795 cbq_destroy_class(sch, cl);
1796 }
1797}
1798
1799static int
1800cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **tca,
1801 unsigned long *arg)
1802{
1803 int err;
1804 struct cbq_sched_data *q = qdisc_priv(sch);
1805 struct cbq_class *cl = (struct cbq_class*)*arg;
1806 struct rtattr *opt = tca[TCA_OPTIONS-1];
1807 struct rtattr *tb[TCA_CBQ_MAX];
1808 struct cbq_class *parent;
1809 struct qdisc_rate_table *rtab = NULL;
1810
1811 if (opt==NULL || rtattr_parse_nested(tb, TCA_CBQ_MAX, opt))
1812 return -EINVAL;
1813
1814 if (tb[TCA_CBQ_OVL_STRATEGY-1] &&
1815 RTA_PAYLOAD(tb[TCA_CBQ_OVL_STRATEGY-1]) < sizeof(struct tc_cbq_ovl))
1816 return -EINVAL;
1817
1818 if (tb[TCA_CBQ_FOPT-1] &&
1819 RTA_PAYLOAD(tb[TCA_CBQ_FOPT-1]) < sizeof(struct tc_cbq_fopt))
1820 return -EINVAL;
1821
1822 if (tb[TCA_CBQ_RATE-1] &&
1823 RTA_PAYLOAD(tb[TCA_CBQ_RATE-1]) < sizeof(struct tc_ratespec))
1824 return -EINVAL;
1825
1826 if (tb[TCA_CBQ_LSSOPT-1] &&
1827 RTA_PAYLOAD(tb[TCA_CBQ_LSSOPT-1]) < sizeof(struct tc_cbq_lssopt))
1828 return -EINVAL;
1829
1830 if (tb[TCA_CBQ_WRROPT-1] &&
1831 RTA_PAYLOAD(tb[TCA_CBQ_WRROPT-1]) < sizeof(struct tc_cbq_wrropt))
1832 return -EINVAL;
1833
1834#ifdef CONFIG_NET_CLS_POLICE
1835 if (tb[TCA_CBQ_POLICE-1] &&
1836 RTA_PAYLOAD(tb[TCA_CBQ_POLICE-1]) < sizeof(struct tc_cbq_police))
1837 return -EINVAL;
1838#endif
1839
1840 if (cl) {
1841 /* Check parent */
1842 if (parentid) {
1843 if (cl->tparent && cl->tparent->classid != parentid)
1844 return -EINVAL;
1845 if (!cl->tparent && parentid != TC_H_ROOT)
1846 return -EINVAL;
1847 }
1848
1849 if (tb[TCA_CBQ_RATE-1]) {
1850 rtab = qdisc_get_rtab(RTA_DATA(tb[TCA_CBQ_RATE-1]), tb[TCA_CBQ_RTAB-1]);
1851 if (rtab == NULL)
1852 return -EINVAL;
1853 }
1854
1855 /* Change class parameters */
1856 sch_tree_lock(sch);
1857
1858 if (cl->next_alive != NULL)
1859 cbq_deactivate_class(cl);
1860
1861 if (rtab) {
1862 rtab = xchg(&cl->R_tab, rtab);
1863 qdisc_put_rtab(rtab);
1864 }
1865
1866 if (tb[TCA_CBQ_LSSOPT-1])
1867 cbq_set_lss(cl, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1868
1869 if (tb[TCA_CBQ_WRROPT-1]) {
1870 cbq_rmprio(q, cl);
1871 cbq_set_wrr(cl, RTA_DATA(tb[TCA_CBQ_WRROPT-1]));
1872 }
1873
1874 if (tb[TCA_CBQ_OVL_STRATEGY-1])
1875 cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1]));
1876
1877#ifdef CONFIG_NET_CLS_POLICE
1878 if (tb[TCA_CBQ_POLICE-1])
1879 cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1]));
1880#endif
1881
1882 if (tb[TCA_CBQ_FOPT-1])
1883 cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1]));
1884
1885 if (cl->q->q.qlen)
1886 cbq_activate_class(cl);
1887
1888 sch_tree_unlock(sch);
1889
1890#ifdef CONFIG_NET_ESTIMATOR
1891 if (tca[TCA_RATE-1])
1892 gen_replace_estimator(&cl->bstats, &cl->rate_est,
1893 cl->stats_lock, tca[TCA_RATE-1]);
1894#endif
1895 return 0;
1896 }
1897
1898 if (parentid == TC_H_ROOT)
1899 return -EINVAL;
1900
1901 if (tb[TCA_CBQ_WRROPT-1] == NULL || tb[TCA_CBQ_RATE-1] == NULL ||
1902 tb[TCA_CBQ_LSSOPT-1] == NULL)
1903 return -EINVAL;
1904
1905 rtab = qdisc_get_rtab(RTA_DATA(tb[TCA_CBQ_RATE-1]), tb[TCA_CBQ_RTAB-1]);
1906 if (rtab == NULL)
1907 return -EINVAL;
1908
1909 if (classid) {
1910 err = -EINVAL;
1911 if (TC_H_MAJ(classid^sch->handle) || cbq_class_lookup(q, classid))
1912 goto failure;
1913 } else {
1914 int i;
1915 classid = TC_H_MAKE(sch->handle,0x8000);
1916
1917 for (i=0; i<0x8000; i++) {
1918 if (++q->hgenerator >= 0x8000)
1919 q->hgenerator = 1;
1920 if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
1921 break;
1922 }
1923 err = -ENOSR;
1924 if (i >= 0x8000)
1925 goto failure;
1926 classid = classid|q->hgenerator;
1927 }
1928
1929 parent = &q->link;
1930 if (parentid) {
1931 parent = cbq_class_lookup(q, parentid);
1932 err = -EINVAL;
1933 if (parent == NULL)
1934 goto failure;
1935 }
1936
1937 err = -ENOBUFS;
Panagiotis Issaris0da974f2006-07-21 14:51:30 -07001938 cl = kzalloc(sizeof(*cl), GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001939 if (cl == NULL)
1940 goto failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001941 cl->R_tab = rtab;
1942 rtab = NULL;
1943 cl->refcnt = 1;
Patrick McHardy9f9afec2006-11-29 17:35:18 -08001944 if (!(cl->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, classid)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001945 cl->q = &noop_qdisc;
1946 cl->classid = classid;
1947 cl->tparent = parent;
1948 cl->qdisc = sch;
1949 cl->allot = parent->allot;
1950 cl->quantum = cl->allot;
1951 cl->weight = cl->R_tab->rate.rate;
1952 cl->stats_lock = &sch->dev->queue_lock;
1953
1954 sch_tree_lock(sch);
1955 cbq_link_class(cl);
1956 cl->borrow = cl->tparent;
1957 if (cl->tparent != &q->link)
1958 cl->share = cl->tparent;
1959 cbq_adjust_levels(parent);
1960 cl->minidle = -0x7FFFFFFF;
1961 cbq_set_lss(cl, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1962 cbq_set_wrr(cl, RTA_DATA(tb[TCA_CBQ_WRROPT-1]));
1963 if (cl->ewma_log==0)
1964 cl->ewma_log = q->link.ewma_log;
1965 if (cl->maxidle==0)
1966 cl->maxidle = q->link.maxidle;
1967 if (cl->avpkt==0)
1968 cl->avpkt = q->link.avpkt;
1969 cl->overlimit = cbq_ovl_classic;
1970 if (tb[TCA_CBQ_OVL_STRATEGY-1])
1971 cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1]));
1972#ifdef CONFIG_NET_CLS_POLICE
1973 if (tb[TCA_CBQ_POLICE-1])
1974 cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1]));
1975#endif
1976 if (tb[TCA_CBQ_FOPT-1])
1977 cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1]));
1978 sch_tree_unlock(sch);
1979
1980#ifdef CONFIG_NET_ESTIMATOR
1981 if (tca[TCA_RATE-1])
1982 gen_new_estimator(&cl->bstats, &cl->rate_est,
1983 cl->stats_lock, tca[TCA_RATE-1]);
1984#endif
1985
1986 *arg = (unsigned long)cl;
1987 return 0;
1988
1989failure:
1990 qdisc_put_rtab(rtab);
1991 return err;
1992}
1993
1994static int cbq_delete(struct Qdisc *sch, unsigned long arg)
1995{
1996 struct cbq_sched_data *q = qdisc_priv(sch);
1997 struct cbq_class *cl = (struct cbq_class*)arg;
Jarek Poplawskia37ef2e2006-12-08 00:25:55 -08001998 unsigned int qlen;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001999
2000 if (cl->filters || cl->children || cl == &q->link)
2001 return -EBUSY;
2002
2003 sch_tree_lock(sch);
2004
Jarek Poplawskia37ef2e2006-12-08 00:25:55 -08002005 qlen = cl->q->q.qlen;
2006 qdisc_reset(cl->q);
2007 qdisc_tree_decrease_qlen(cl->q, qlen);
2008
Linus Torvalds1da177e2005-04-16 15:20:36 -07002009 if (cl->next_alive)
2010 cbq_deactivate_class(cl);
2011
2012 if (q->tx_borrowed == cl)
2013 q->tx_borrowed = q->tx_class;
2014 if (q->tx_class == cl) {
2015 q->tx_class = NULL;
2016 q->tx_borrowed = NULL;
2017 }
2018#ifdef CONFIG_NET_CLS_POLICE
2019 if (q->rx_class == cl)
2020 q->rx_class = NULL;
2021#endif
2022
2023 cbq_unlink_class(cl);
2024 cbq_adjust_levels(cl->tparent);
2025 cl->defmap = 0;
2026 cbq_sync_defmap(cl);
2027
2028 cbq_rmprio(q, cl);
2029 sch_tree_unlock(sch);
2030
2031 if (--cl->refcnt == 0)
2032 cbq_destroy_class(sch, cl);
2033
2034 return 0;
2035}
2036
2037static struct tcf_proto **cbq_find_tcf(struct Qdisc *sch, unsigned long arg)
2038{
2039 struct cbq_sched_data *q = qdisc_priv(sch);
2040 struct cbq_class *cl = (struct cbq_class *)arg;
2041
2042 if (cl == NULL)
2043 cl = &q->link;
2044
2045 return &cl->filter_list;
2046}
2047
2048static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
2049 u32 classid)
2050{
2051 struct cbq_sched_data *q = qdisc_priv(sch);
2052 struct cbq_class *p = (struct cbq_class*)parent;
2053 struct cbq_class *cl = cbq_class_lookup(q, classid);
2054
2055 if (cl) {
2056 if (p && p->level <= cl->level)
2057 return 0;
2058 cl->filters++;
2059 return (unsigned long)cl;
2060 }
2061 return 0;
2062}
2063
2064static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
2065{
2066 struct cbq_class *cl = (struct cbq_class*)arg;
2067
2068 cl->filters--;
2069}
2070
2071static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2072{
2073 struct cbq_sched_data *q = qdisc_priv(sch);
2074 unsigned h;
2075
2076 if (arg->stop)
2077 return;
2078
2079 for (h = 0; h < 16; h++) {
2080 struct cbq_class *cl;
2081
2082 for (cl = q->classes[h]; cl; cl = cl->next) {
2083 if (arg->count < arg->skip) {
2084 arg->count++;
2085 continue;
2086 }
2087 if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2088 arg->stop = 1;
2089 return;
2090 }
2091 arg->count++;
2092 }
2093 }
2094}
2095
2096static struct Qdisc_class_ops cbq_class_ops = {
2097 .graft = cbq_graft,
2098 .leaf = cbq_leaf,
Jarek Poplawskia37ef2e2006-12-08 00:25:55 -08002099 .qlen_notify = cbq_qlen_notify,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002100 .get = cbq_get,
2101 .put = cbq_put,
2102 .change = cbq_change_class,
2103 .delete = cbq_delete,
2104 .walk = cbq_walk,
2105 .tcf_chain = cbq_find_tcf,
2106 .bind_tcf = cbq_bind_filter,
2107 .unbind_tcf = cbq_unbind_filter,
2108 .dump = cbq_dump_class,
2109 .dump_stats = cbq_dump_class_stats,
2110};
2111
2112static struct Qdisc_ops cbq_qdisc_ops = {
2113 .next = NULL,
2114 .cl_ops = &cbq_class_ops,
2115 .id = "cbq",
2116 .priv_size = sizeof(struct cbq_sched_data),
2117 .enqueue = cbq_enqueue,
2118 .dequeue = cbq_dequeue,
2119 .requeue = cbq_requeue,
2120 .drop = cbq_drop,
2121 .init = cbq_init,
2122 .reset = cbq_reset,
2123 .destroy = cbq_destroy,
2124 .change = NULL,
2125 .dump = cbq_dump,
2126 .dump_stats = cbq_dump_stats,
2127 .owner = THIS_MODULE,
2128};
2129
2130static int __init cbq_module_init(void)
2131{
2132 return register_qdisc(&cbq_qdisc_ops);
2133}
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09002134static void __exit cbq_module_exit(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002135{
2136 unregister_qdisc(&cbq_qdisc_ops);
2137}
2138module_init(cbq_module_init)
2139module_exit(cbq_module_exit)
2140MODULE_LICENSE("GPL");