blob: 32f6a308bad6d7840187b45ac871cb785adf852b [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * net/sched/sch_cbq.c Class-Based Queueing discipline.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 *
11 */
12
Linus Torvalds1da177e2005-04-16 15:20:36 -070013#include <linux/module.h>
14#include <asm/uaccess.h>
15#include <asm/system.h>
16#include <linux/bitops.h>
17#include <linux/types.h>
18#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070019#include <linux/string.h>
20#include <linux/mm.h>
21#include <linux/socket.h>
22#include <linux/sockios.h>
23#include <linux/in.h>
24#include <linux/errno.h>
25#include <linux/interrupt.h>
26#include <linux/if_ether.h>
27#include <linux/inet.h>
28#include <linux/netdevice.h>
29#include <linux/etherdevice.h>
30#include <linux/notifier.h>
31#include <net/ip.h>
32#include <net/route.h>
33#include <linux/skbuff.h>
34#include <net/sock.h>
35#include <net/pkt_sched.h>
36
37
38/* Class-Based Queueing (CBQ) algorithm.
39 =======================================
40
41 Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +090042 Management Models for Packet Networks",
Linus Torvalds1da177e2005-04-16 15:20:36 -070043 IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
44
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +090045 [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
Linus Torvalds1da177e2005-04-16 15:20:36 -070046
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +090047 [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
Linus Torvalds1da177e2005-04-16 15:20:36 -070048 Parameters", 1996
49
50 [4] Sally Floyd and Michael Speer, "Experimental Results
51 for Class-Based Queueing", 1998, not published.
52
53 -----------------------------------------------------------------------
54
55 Algorithm skeleton was taken from NS simulator cbq.cc.
56 If someone wants to check this code against the LBL version,
57 he should take into account that ONLY the skeleton was borrowed,
58 the implementation is different. Particularly:
59
60 --- The WRR algorithm is different. Our version looks more
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +090061 reasonable (I hope) and works when quanta are allowed to be
62 less than MTU, which is always the case when real time classes
63 have small rates. Note, that the statement of [3] is
64 incomplete, delay may actually be estimated even if class
65 per-round allotment is less than MTU. Namely, if per-round
66 allotment is W*r_i, and r_1+...+r_k = r < 1
Linus Torvalds1da177e2005-04-16 15:20:36 -070067
68 delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
69
70 In the worst case we have IntServ estimate with D = W*r+k*MTU
71 and C = MTU*r. The proof (if correct at all) is trivial.
72
73
74 --- It seems that cbq-2.0 is not very accurate. At least, I cannot
75 interpret some places, which look like wrong translations
76 from NS. Anyone is advised to find these differences
77 and explain to me, why I am wrong 8).
78
79 --- Linux has no EOI event, so that we cannot estimate true class
80 idle time. Workaround is to consider the next dequeue event
81 as sign that previous packet is finished. This is wrong because of
82 internal device queueing, but on a permanently loaded link it is true.
83 Moreover, combined with clock integrator, this scheme looks
84 very close to an ideal solution. */
85
86struct cbq_sched_data;
87
88
89struct cbq_class
90{
91 struct cbq_class *next; /* hash table link */
92 struct cbq_class *next_alive; /* next class with backlog in this priority band */
93
94/* Parameters */
95 u32 classid;
96 unsigned char priority; /* class priority */
97 unsigned char priority2; /* priority to be used after overlimit */
98 unsigned char ewma_log; /* time constant for idle time calculation */
99 unsigned char ovl_strategy;
100#ifdef CONFIG_NET_CLS_POLICE
101 unsigned char police;
102#endif
103
104 u32 defmap;
105
106 /* Link-sharing scheduler parameters */
107 long maxidle; /* Class parameters: see below. */
108 long offtime;
109 long minidle;
110 u32 avpkt;
111 struct qdisc_rate_table *R_tab;
112
113 /* Overlimit strategy parameters */
114 void (*overlimit)(struct cbq_class *cl);
115 long penalty;
116
117 /* General scheduler (WRR) parameters */
118 long allot;
119 long quantum; /* Allotment per WRR round */
120 long weight; /* Relative allotment: see below */
121
122 struct Qdisc *qdisc; /* Ptr to CBQ discipline */
123 struct cbq_class *split; /* Ptr to split node */
124 struct cbq_class *share; /* Ptr to LS parent in the class tree */
125 struct cbq_class *tparent; /* Ptr to tree parent in the class tree */
126 struct cbq_class *borrow; /* NULL if class is bandwidth limited;
127 parent otherwise */
128 struct cbq_class *sibling; /* Sibling chain */
129 struct cbq_class *children; /* Pointer to children chain */
130
131 struct Qdisc *q; /* Elementary queueing discipline */
132
133
134/* Variables */
135 unsigned char cpriority; /* Effective priority */
136 unsigned char delayed;
137 unsigned char level; /* level of the class in hierarchy:
138 0 for leaf classes, and maximal
139 level of children + 1 for nodes.
140 */
141
142 psched_time_t last; /* Last end of service */
143 psched_time_t undertime;
144 long avgidle;
145 long deficit; /* Saved deficit for WRR */
146 unsigned long penalized;
147 struct gnet_stats_basic bstats;
148 struct gnet_stats_queue qstats;
149 struct gnet_stats_rate_est rate_est;
150 spinlock_t *stats_lock;
151 struct tc_cbq_xstats xstats;
152
153 struct tcf_proto *filter_list;
154
155 int refcnt;
156 int filters;
157
158 struct cbq_class *defaults[TC_PRIO_MAX+1];
159};
160
161struct cbq_sched_data
162{
163 struct cbq_class *classes[16]; /* Hash table of all classes */
164 int nclasses[TC_CBQ_MAXPRIO+1];
165 unsigned quanta[TC_CBQ_MAXPRIO+1];
166
167 struct cbq_class link;
168
169 unsigned activemask;
170 struct cbq_class *active[TC_CBQ_MAXPRIO+1]; /* List of all classes
171 with backlog */
172
173#ifdef CONFIG_NET_CLS_POLICE
174 struct cbq_class *rx_class;
175#endif
176 struct cbq_class *tx_class;
177 struct cbq_class *tx_borrowed;
178 int tx_len;
179 psched_time_t now; /* Cached timestamp */
180 psched_time_t now_rt; /* Cached real time */
181 unsigned pmask;
182
183 struct timer_list delay_timer;
Patrick McHardy88a99352007-03-16 01:21:11 -0700184 struct qdisc_watchdog watchdog; /* Watchdog timer,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700185 started when CBQ has
186 backlog, but cannot
187 transmit just now */
Patrick McHardy88a99352007-03-16 01:21:11 -0700188 psched_tdiff_t wd_expires;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700189 int toplevel;
190 u32 hgenerator;
191};
192
193
194#define L2T(cl,len) ((cl)->R_tab->data[(len)>>(cl)->R_tab->rate.cell_log])
195
196
197static __inline__ unsigned cbq_hash(u32 h)
198{
199 h ^= h>>8;
200 h ^= h>>4;
201 return h&0xF;
202}
203
204static __inline__ struct cbq_class *
205cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
206{
207 struct cbq_class *cl;
208
209 for (cl = q->classes[cbq_hash(classid)]; cl; cl = cl->next)
210 if (cl->classid == classid)
211 return cl;
212 return NULL;
213}
214
215#ifdef CONFIG_NET_CLS_POLICE
216
217static struct cbq_class *
218cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
219{
220 struct cbq_class *cl, *new;
221
222 for (cl = this->tparent; cl; cl = cl->tparent)
223 if ((new = cl->defaults[TC_PRIO_BESTEFFORT]) != NULL && new != this)
224 return new;
225
226 return NULL;
227}
228
229#endif
230
231/* Classify packet. The procedure is pretty complicated, but
232 it allows us to combine link sharing and priority scheduling
233 transparently.
234
235 Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
236 so that it resolves to split nodes. Then packets are classified
237 by logical priority, or a more specific classifier may be attached
238 to the split node.
239 */
240
241static struct cbq_class *
242cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
243{
244 struct cbq_sched_data *q = qdisc_priv(sch);
245 struct cbq_class *head = &q->link;
246 struct cbq_class **defmap;
247 struct cbq_class *cl = NULL;
248 u32 prio = skb->priority;
249 struct tcf_result res;
250
251 /*
252 * Step 1. If skb->priority points to one of our classes, use it.
253 */
254 if (TC_H_MAJ(prio^sch->handle) == 0 &&
255 (cl = cbq_class_lookup(q, prio)) != NULL)
256 return cl;
257
Jamal Hadi Salim29f1df62006-01-08 22:35:55 -0800258 *qerr = NET_XMIT_BYPASS;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700259 for (;;) {
260 int result = 0;
261 defmap = head->defaults;
262
263 /*
264 * Step 2+n. Apply classifier.
265 */
266 if (!head->filter_list || (result = tc_classify(skb, head->filter_list, &res)) < 0)
267 goto fallback;
268
269 if ((cl = (void*)res.class) == NULL) {
270 if (TC_H_MAJ(res.classid))
271 cl = cbq_class_lookup(q, res.classid);
272 else if ((cl = defmap[res.classid&TC_PRIO_MAX]) == NULL)
273 cl = defmap[TC_PRIO_BESTEFFORT];
274
275 if (cl == NULL || cl->level >= head->level)
276 goto fallback;
277 }
278
279#ifdef CONFIG_NET_CLS_ACT
280 switch (result) {
281 case TC_ACT_QUEUED:
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900282 case TC_ACT_STOLEN:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700283 *qerr = NET_XMIT_SUCCESS;
284 case TC_ACT_SHOT:
285 return NULL;
286 }
287#elif defined(CONFIG_NET_CLS_POLICE)
288 switch (result) {
289 case TC_POLICE_RECLASSIFY:
290 return cbq_reclassify(skb, cl);
291 case TC_POLICE_SHOT:
292 return NULL;
293 default:
294 break;
295 }
296#endif
297 if (cl->level == 0)
298 return cl;
299
300 /*
301 * Step 3+n. If classifier selected a link sharing class,
302 * apply agency specific classifier.
303 * Repeat this procdure until we hit a leaf node.
304 */
305 head = cl;
306 }
307
308fallback:
309 cl = head;
310
311 /*
312 * Step 4. No success...
313 */
314 if (TC_H_MAJ(prio) == 0 &&
315 !(cl = head->defaults[prio&TC_PRIO_MAX]) &&
316 !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
317 return head;
318
319 return cl;
320}
321
322/*
323 A packet has just been enqueued on the empty class.
324 cbq_activate_class adds it to the tail of active class list
325 of its priority band.
326 */
327
328static __inline__ void cbq_activate_class(struct cbq_class *cl)
329{
330 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
331 int prio = cl->cpriority;
332 struct cbq_class *cl_tail;
333
334 cl_tail = q->active[prio];
335 q->active[prio] = cl;
336
337 if (cl_tail != NULL) {
338 cl->next_alive = cl_tail->next_alive;
339 cl_tail->next_alive = cl;
340 } else {
341 cl->next_alive = cl;
342 q->activemask |= (1<<prio);
343 }
344}
345
346/*
347 Unlink class from active chain.
348 Note that this same procedure is done directly in cbq_dequeue*
349 during round-robin procedure.
350 */
351
352static void cbq_deactivate_class(struct cbq_class *this)
353{
354 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
355 int prio = this->cpriority;
356 struct cbq_class *cl;
357 struct cbq_class *cl_prev = q->active[prio];
358
359 do {
360 cl = cl_prev->next_alive;
361 if (cl == this) {
362 cl_prev->next_alive = cl->next_alive;
363 cl->next_alive = NULL;
364
365 if (cl == q->active[prio]) {
366 q->active[prio] = cl_prev;
367 if (cl == q->active[prio]) {
368 q->active[prio] = NULL;
369 q->activemask &= ~(1<<prio);
370 return;
371 }
372 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700373 return;
374 }
375 } while ((cl_prev = cl) != q->active[prio]);
376}
377
378static void
379cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
380{
381 int toplevel = q->toplevel;
382
383 if (toplevel > cl->level && !(cl->q->flags&TCQ_F_THROTTLED)) {
384 psched_time_t now;
385 psched_tdiff_t incr;
386
387 PSCHED_GET_TIME(now);
388 incr = PSCHED_TDIFF(now, q->now_rt);
389 PSCHED_TADD2(q->now, incr, now);
390
391 do {
392 if (PSCHED_TLESS(cl->undertime, now)) {
393 q->toplevel = cl->level;
394 return;
395 }
396 } while ((cl=cl->borrow) != NULL && toplevel > cl->level);
397 }
398}
399
400static int
401cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
402{
403 struct cbq_sched_data *q = qdisc_priv(sch);
404 int len = skb->len;
405 int ret;
406 struct cbq_class *cl = cbq_classify(skb, sch, &ret);
407
408#ifdef CONFIG_NET_CLS_POLICE
409 q->rx_class = cl;
410#endif
411 if (cl == NULL) {
Jamal Hadi Salim29f1df62006-01-08 22:35:55 -0800412 if (ret == NET_XMIT_BYPASS)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700413 sch->qstats.drops++;
414 kfree_skb(skb);
415 return ret;
416 }
417
418#ifdef CONFIG_NET_CLS_POLICE
419 cl->q->__parent = sch;
420#endif
421 if ((ret = cl->q->enqueue(skb, cl->q)) == NET_XMIT_SUCCESS) {
422 sch->q.qlen++;
423 sch->bstats.packets++;
424 sch->bstats.bytes+=len;
425 cbq_mark_toplevel(q, cl);
426 if (!cl->next_alive)
427 cbq_activate_class(cl);
428 return ret;
429 }
430
431 sch->qstats.drops++;
432 cbq_mark_toplevel(q, cl);
433 cl->qstats.drops++;
434 return ret;
435}
436
437static int
438cbq_requeue(struct sk_buff *skb, struct Qdisc *sch)
439{
440 struct cbq_sched_data *q = qdisc_priv(sch);
441 struct cbq_class *cl;
442 int ret;
443
444 if ((cl = q->tx_class) == NULL) {
445 kfree_skb(skb);
446 sch->qstats.drops++;
447 return NET_XMIT_CN;
448 }
449 q->tx_class = NULL;
450
451 cbq_mark_toplevel(q, cl);
452
453#ifdef CONFIG_NET_CLS_POLICE
454 q->rx_class = cl;
455 cl->q->__parent = sch;
456#endif
457 if ((ret = cl->q->ops->requeue(skb, cl->q)) == 0) {
458 sch->q.qlen++;
459 sch->qstats.requeues++;
460 if (!cl->next_alive)
461 cbq_activate_class(cl);
462 return 0;
463 }
464 sch->qstats.drops++;
465 cl->qstats.drops++;
466 return ret;
467}
468
469/* Overlimit actions */
470
471/* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
472
473static void cbq_ovl_classic(struct cbq_class *cl)
474{
475 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
476 psched_tdiff_t delay = PSCHED_TDIFF(cl->undertime, q->now);
477
478 if (!cl->delayed) {
479 delay += cl->offtime;
480
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900481 /*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700482 Class goes to sleep, so that it will have no
483 chance to work avgidle. Let's forgive it 8)
484
485 BTW cbq-2.0 has a crap in this
486 place, apparently they forgot to shift it by cl->ewma_log.
487 */
488 if (cl->avgidle < 0)
489 delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
490 if (cl->avgidle < cl->minidle)
491 cl->avgidle = cl->minidle;
492 if (delay <= 0)
493 delay = 1;
494 PSCHED_TADD2(q->now, delay, cl->undertime);
495
496 cl->xstats.overactions++;
497 cl->delayed = 1;
498 }
499 if (q->wd_expires == 0 || q->wd_expires > delay)
500 q->wd_expires = delay;
501
502 /* Dirty work! We must schedule wakeups based on
503 real available rate, rather than leaf rate,
504 which may be tiny (even zero).
505 */
506 if (q->toplevel == TC_CBQ_MAXLEVEL) {
507 struct cbq_class *b;
508 psched_tdiff_t base_delay = q->wd_expires;
509
510 for (b = cl->borrow; b; b = b->borrow) {
511 delay = PSCHED_TDIFF(b->undertime, q->now);
512 if (delay < base_delay) {
513 if (delay <= 0)
514 delay = 1;
515 base_delay = delay;
516 }
517 }
518
519 q->wd_expires = base_delay;
520 }
521}
522
523/* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
524 they go overlimit
525 */
526
527static void cbq_ovl_rclassic(struct cbq_class *cl)
528{
529 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
530 struct cbq_class *this = cl;
531
532 do {
533 if (cl->level > q->toplevel) {
534 cl = NULL;
535 break;
536 }
537 } while ((cl = cl->borrow) != NULL);
538
539 if (cl == NULL)
540 cl = this;
541 cbq_ovl_classic(cl);
542}
543
544/* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
545
546static void cbq_ovl_delay(struct cbq_class *cl)
547{
548 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
549 psched_tdiff_t delay = PSCHED_TDIFF(cl->undertime, q->now);
550
551 if (!cl->delayed) {
552 unsigned long sched = jiffies;
553
554 delay += cl->offtime;
555 if (cl->avgidle < 0)
556 delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
557 if (cl->avgidle < cl->minidle)
558 cl->avgidle = cl->minidle;
559 PSCHED_TADD2(q->now, delay, cl->undertime);
560
561 if (delay > 0) {
562 sched += PSCHED_US2JIFFIE(delay) + cl->penalty;
563 cl->penalized = sched;
564 cl->cpriority = TC_CBQ_MAXPRIO;
565 q->pmask |= (1<<TC_CBQ_MAXPRIO);
566 if (del_timer(&q->delay_timer) &&
567 (long)(q->delay_timer.expires - sched) > 0)
568 q->delay_timer.expires = sched;
569 add_timer(&q->delay_timer);
570 cl->delayed = 1;
571 cl->xstats.overactions++;
572 return;
573 }
574 delay = 1;
575 }
576 if (q->wd_expires == 0 || q->wd_expires > delay)
577 q->wd_expires = delay;
578}
579
580/* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
581
582static void cbq_ovl_lowprio(struct cbq_class *cl)
583{
584 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
585
586 cl->penalized = jiffies + cl->penalty;
587
588 if (cl->cpriority != cl->priority2) {
589 cl->cpriority = cl->priority2;
590 q->pmask |= (1<<cl->cpriority);
591 cl->xstats.overactions++;
592 }
593 cbq_ovl_classic(cl);
594}
595
596/* TC_CBQ_OVL_DROP: penalize class by dropping */
597
598static void cbq_ovl_drop(struct cbq_class *cl)
599{
600 if (cl->q->ops->drop)
601 if (cl->q->ops->drop(cl->q))
602 cl->qdisc->q.qlen--;
603 cl->xstats.overactions++;
604 cbq_ovl_classic(cl);
605}
606
Linus Torvalds1da177e2005-04-16 15:20:36 -0700607static unsigned long cbq_undelay_prio(struct cbq_sched_data *q, int prio)
608{
609 struct cbq_class *cl;
610 struct cbq_class *cl_prev = q->active[prio];
611 unsigned long now = jiffies;
612 unsigned long sched = now;
613
614 if (cl_prev == NULL)
Patrick McHardye9054a32007-03-16 01:21:40 -0700615 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700616
617 do {
618 cl = cl_prev->next_alive;
619 if ((long)(now - cl->penalized) > 0) {
620 cl_prev->next_alive = cl->next_alive;
621 cl->next_alive = NULL;
622 cl->cpriority = cl->priority;
623 cl->delayed = 0;
624 cbq_activate_class(cl);
625
626 if (cl == q->active[prio]) {
627 q->active[prio] = cl_prev;
628 if (cl == q->active[prio]) {
629 q->active[prio] = NULL;
630 return 0;
631 }
632 }
633
634 cl = cl_prev->next_alive;
635 } else if ((long)(sched - cl->penalized) > 0)
636 sched = cl->penalized;
637 } while ((cl_prev = cl) != q->active[prio]);
638
639 return (long)(sched - now);
640}
641
642static void cbq_undelay(unsigned long arg)
643{
644 struct Qdisc *sch = (struct Qdisc*)arg;
645 struct cbq_sched_data *q = qdisc_priv(sch);
646 long delay = 0;
647 unsigned pmask;
648
649 pmask = q->pmask;
650 q->pmask = 0;
651
652 while (pmask) {
653 int prio = ffz(~pmask);
654 long tmp;
655
656 pmask &= ~(1<<prio);
657
658 tmp = cbq_undelay_prio(q, prio);
659 if (tmp > 0) {
660 q->pmask |= 1<<prio;
661 if (tmp < delay || delay == 0)
662 delay = tmp;
663 }
664 }
665
666 if (delay) {
667 q->delay_timer.expires = jiffies + delay;
668 add_timer(&q->delay_timer);
669 }
670
671 sch->flags &= ~TCQ_F_THROTTLED;
672 netif_schedule(sch->dev);
673}
674
675
676#ifdef CONFIG_NET_CLS_POLICE
677
678static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
679{
680 int len = skb->len;
681 struct Qdisc *sch = child->__parent;
682 struct cbq_sched_data *q = qdisc_priv(sch);
683 struct cbq_class *cl = q->rx_class;
684
685 q->rx_class = NULL;
686
687 if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
688
689 cbq_mark_toplevel(q, cl);
690
691 q->rx_class = cl;
692 cl->q->__parent = sch;
693
694 if (cl->q->enqueue(skb, cl->q) == 0) {
695 sch->q.qlen++;
696 sch->bstats.packets++;
697 sch->bstats.bytes+=len;
698 if (!cl->next_alive)
699 cbq_activate_class(cl);
700 return 0;
701 }
702 sch->qstats.drops++;
703 return 0;
704 }
705
706 sch->qstats.drops++;
707 return -1;
708}
709#endif
710
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900711/*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700712 It is mission critical procedure.
713
714 We "regenerate" toplevel cutoff, if transmitting class
715 has backlog and it is not regulated. It is not part of
716 original CBQ description, but looks more reasonable.
717 Probably, it is wrong. This question needs further investigation.
718*/
719
720static __inline__ void
721cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
722 struct cbq_class *borrowed)
723{
724 if (cl && q->toplevel >= borrowed->level) {
725 if (cl->q->q.qlen > 1) {
726 do {
727 if (PSCHED_IS_PASTPERFECT(borrowed->undertime)) {
728 q->toplevel = borrowed->level;
729 return;
730 }
731 } while ((borrowed=borrowed->borrow) != NULL);
732 }
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900733#if 0
Linus Torvalds1da177e2005-04-16 15:20:36 -0700734 /* It is not necessary now. Uncommenting it
735 will save CPU cycles, but decrease fairness.
736 */
737 q->toplevel = TC_CBQ_MAXLEVEL;
738#endif
739 }
740}
741
742static void
743cbq_update(struct cbq_sched_data *q)
744{
745 struct cbq_class *this = q->tx_class;
746 struct cbq_class *cl = this;
747 int len = q->tx_len;
748
749 q->tx_class = NULL;
750
751 for ( ; cl; cl = cl->share) {
752 long avgidle = cl->avgidle;
753 long idle;
754
755 cl->bstats.packets++;
756 cl->bstats.bytes += len;
757
758 /*
759 (now - last) is total time between packet right edges.
760 (last_pktlen/rate) is "virtual" busy time, so that
761
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900762 idle = (now - last) - last_pktlen/rate
Linus Torvalds1da177e2005-04-16 15:20:36 -0700763 */
764
765 idle = PSCHED_TDIFF(q->now, cl->last);
766 if ((unsigned long)idle > 128*1024*1024) {
767 avgidle = cl->maxidle;
768 } else {
769 idle -= L2T(cl, len);
770
771 /* true_avgidle := (1-W)*true_avgidle + W*idle,
772 where W=2^{-ewma_log}. But cl->avgidle is scaled:
773 cl->avgidle == true_avgidle/W,
774 hence:
775 */
776 avgidle += idle - (avgidle>>cl->ewma_log);
777 }
778
779 if (avgidle <= 0) {
780 /* Overlimit or at-limit */
781
782 if (avgidle < cl->minidle)
783 avgidle = cl->minidle;
784
785 cl->avgidle = avgidle;
786
787 /* Calculate expected time, when this class
788 will be allowed to send.
789 It will occur, when:
790 (1-W)*true_avgidle + W*delay = 0, i.e.
791 idle = (1/W - 1)*(-true_avgidle)
792 or
793 idle = (1 - W)*(-cl->avgidle);
794 */
795 idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
796
797 /*
798 That is not all.
799 To maintain the rate allocated to the class,
800 we add to undertime virtual clock,
801 necessary to complete transmitted packet.
802 (len/phys_bandwidth has been already passed
803 to the moment of cbq_update)
804 */
805
806 idle -= L2T(&q->link, len);
807 idle += L2T(cl, len);
808
809 PSCHED_AUDIT_TDIFF(idle);
810
811 PSCHED_TADD2(q->now, idle, cl->undertime);
812 } else {
813 /* Underlimit */
814
815 PSCHED_SET_PASTPERFECT(cl->undertime);
816 if (avgidle > cl->maxidle)
817 cl->avgidle = cl->maxidle;
818 else
819 cl->avgidle = avgidle;
820 }
821 cl->last = q->now;
822 }
823
824 cbq_update_toplevel(q, this, q->tx_borrowed);
825}
826
827static __inline__ struct cbq_class *
828cbq_under_limit(struct cbq_class *cl)
829{
830 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
831 struct cbq_class *this_cl = cl;
832
833 if (cl->tparent == NULL)
834 return cl;
835
836 if (PSCHED_IS_PASTPERFECT(cl->undertime) ||
837 !PSCHED_TLESS(q->now, cl->undertime)) {
838 cl->delayed = 0;
839 return cl;
840 }
841
842 do {
843 /* It is very suspicious place. Now overlimit
844 action is generated for not bounded classes
845 only if link is completely congested.
846 Though it is in agree with ancestor-only paradigm,
847 it looks very stupid. Particularly,
848 it means that this chunk of code will either
849 never be called or result in strong amplification
850 of burstiness. Dangerous, silly, and, however,
851 no another solution exists.
852 */
853 if ((cl = cl->borrow) == NULL) {
854 this_cl->qstats.overlimits++;
855 this_cl->overlimit(this_cl);
856 return NULL;
857 }
858 if (cl->level > q->toplevel)
859 return NULL;
860 } while (!PSCHED_IS_PASTPERFECT(cl->undertime) &&
861 PSCHED_TLESS(q->now, cl->undertime));
862
863 cl->delayed = 0;
864 return cl;
865}
866
867static __inline__ struct sk_buff *
868cbq_dequeue_prio(struct Qdisc *sch, int prio)
869{
870 struct cbq_sched_data *q = qdisc_priv(sch);
871 struct cbq_class *cl_tail, *cl_prev, *cl;
872 struct sk_buff *skb;
873 int deficit;
874
875 cl_tail = cl_prev = q->active[prio];
876 cl = cl_prev->next_alive;
877
878 do {
879 deficit = 0;
880
881 /* Start round */
882 do {
883 struct cbq_class *borrow = cl;
884
885 if (cl->q->q.qlen &&
886 (borrow = cbq_under_limit(cl)) == NULL)
887 goto skip_class;
888
889 if (cl->deficit <= 0) {
890 /* Class exhausted its allotment per
891 this round. Switch to the next one.
892 */
893 deficit = 1;
894 cl->deficit += cl->quantum;
895 goto next_class;
896 }
897
898 skb = cl->q->dequeue(cl->q);
899
900 /* Class did not give us any skb :-(
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900901 It could occur even if cl->q->q.qlen != 0
Linus Torvalds1da177e2005-04-16 15:20:36 -0700902 f.e. if cl->q == "tbf"
903 */
904 if (skb == NULL)
905 goto skip_class;
906
907 cl->deficit -= skb->len;
908 q->tx_class = cl;
909 q->tx_borrowed = borrow;
910 if (borrow != cl) {
911#ifndef CBQ_XSTATS_BORROWS_BYTES
912 borrow->xstats.borrows++;
913 cl->xstats.borrows++;
914#else
915 borrow->xstats.borrows += skb->len;
916 cl->xstats.borrows += skb->len;
917#endif
918 }
919 q->tx_len = skb->len;
920
921 if (cl->deficit <= 0) {
922 q->active[prio] = cl;
923 cl = cl->next_alive;
924 cl->deficit += cl->quantum;
925 }
926 return skb;
927
928skip_class:
929 if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
930 /* Class is empty or penalized.
931 Unlink it from active chain.
932 */
933 cl_prev->next_alive = cl->next_alive;
934 cl->next_alive = NULL;
935
936 /* Did cl_tail point to it? */
937 if (cl == cl_tail) {
938 /* Repair it! */
939 cl_tail = cl_prev;
940
941 /* Was it the last class in this band? */
942 if (cl == cl_tail) {
943 /* Kill the band! */
944 q->active[prio] = NULL;
945 q->activemask &= ~(1<<prio);
946 if (cl->q->q.qlen)
947 cbq_activate_class(cl);
948 return NULL;
949 }
950
951 q->active[prio] = cl_tail;
952 }
953 if (cl->q->q.qlen)
954 cbq_activate_class(cl);
955
956 cl = cl_prev;
957 }
958
959next_class:
960 cl_prev = cl;
961 cl = cl->next_alive;
962 } while (cl_prev != cl_tail);
963 } while (deficit);
964
965 q->active[prio] = cl_prev;
966
967 return NULL;
968}
969
970static __inline__ struct sk_buff *
971cbq_dequeue_1(struct Qdisc *sch)
972{
973 struct cbq_sched_data *q = qdisc_priv(sch);
974 struct sk_buff *skb;
975 unsigned activemask;
976
977 activemask = q->activemask&0xFF;
978 while (activemask) {
979 int prio = ffz(~activemask);
980 activemask &= ~(1<<prio);
981 skb = cbq_dequeue_prio(sch, prio);
982 if (skb)
983 return skb;
984 }
985 return NULL;
986}
987
988static struct sk_buff *
989cbq_dequeue(struct Qdisc *sch)
990{
991 struct sk_buff *skb;
992 struct cbq_sched_data *q = qdisc_priv(sch);
993 psched_time_t now;
994 psched_tdiff_t incr;
995
996 PSCHED_GET_TIME(now);
997 incr = PSCHED_TDIFF(now, q->now_rt);
998
999 if (q->tx_class) {
1000 psched_tdiff_t incr2;
1001 /* Time integrator. We calculate EOS time
1002 by adding expected packet transmission time.
1003 If real time is greater, we warp artificial clock,
1004 so that:
1005
1006 cbq_time = max(real_time, work);
1007 */
1008 incr2 = L2T(&q->link, q->tx_len);
1009 PSCHED_TADD(q->now, incr2);
1010 cbq_update(q);
1011 if ((incr -= incr2) < 0)
1012 incr = 0;
1013 }
1014 PSCHED_TADD(q->now, incr);
1015 q->now_rt = now;
1016
1017 for (;;) {
1018 q->wd_expires = 0;
1019
1020 skb = cbq_dequeue_1(sch);
1021 if (skb) {
1022 sch->q.qlen--;
1023 sch->flags &= ~TCQ_F_THROTTLED;
1024 return skb;
1025 }
1026
1027 /* All the classes are overlimit.
1028
1029 It is possible, if:
1030
1031 1. Scheduler is empty.
1032 2. Toplevel cutoff inhibited borrowing.
1033 3. Root class is overlimit.
1034
1035 Reset 2d and 3d conditions and retry.
1036
1037 Note, that NS and cbq-2.0 are buggy, peeking
1038 an arbitrary class is appropriate for ancestor-only
1039 sharing, but not for toplevel algorithm.
1040
1041 Our version is better, but slower, because it requires
1042 two passes, but it is unavoidable with top-level sharing.
1043 */
1044
1045 if (q->toplevel == TC_CBQ_MAXLEVEL &&
1046 PSCHED_IS_PASTPERFECT(q->link.undertime))
1047 break;
1048
1049 q->toplevel = TC_CBQ_MAXLEVEL;
1050 PSCHED_SET_PASTPERFECT(q->link.undertime);
1051 }
1052
1053 /* No packets in scheduler or nobody wants to give them to us :-(
1054 Sigh... start watchdog timer in the last case. */
1055
1056 if (sch->q.qlen) {
1057 sch->qstats.overlimits++;
Patrick McHardy88a99352007-03-16 01:21:11 -07001058 if (q->wd_expires)
1059 qdisc_watchdog_schedule(&q->watchdog,
1060 q->now + q->wd_expires);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001061 }
1062 return NULL;
1063}
1064
1065/* CBQ class maintanance routines */
1066
1067static void cbq_adjust_levels(struct cbq_class *this)
1068{
1069 if (this == NULL)
1070 return;
1071
1072 do {
1073 int level = 0;
1074 struct cbq_class *cl;
1075
1076 if ((cl = this->children) != NULL) {
1077 do {
1078 if (cl->level > level)
1079 level = cl->level;
1080 } while ((cl = cl->sibling) != this->children);
1081 }
1082 this->level = level+1;
1083 } while ((this = this->tparent) != NULL);
1084}
1085
1086static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
1087{
1088 struct cbq_class *cl;
1089 unsigned h;
1090
1091 if (q->quanta[prio] == 0)
1092 return;
1093
1094 for (h=0; h<16; h++) {
1095 for (cl = q->classes[h]; cl; cl = cl->next) {
1096 /* BUGGGG... Beware! This expression suffer of
1097 arithmetic overflows!
1098 */
1099 if (cl->priority == prio) {
1100 cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
1101 q->quanta[prio];
1102 }
1103 if (cl->quantum <= 0 || cl->quantum>32*cl->qdisc->dev->mtu) {
1104 printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->classid, cl->quantum);
1105 cl->quantum = cl->qdisc->dev->mtu/2 + 1;
1106 }
1107 }
1108 }
1109}
1110
1111static void cbq_sync_defmap(struct cbq_class *cl)
1112{
1113 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1114 struct cbq_class *split = cl->split;
1115 unsigned h;
1116 int i;
1117
1118 if (split == NULL)
1119 return;
1120
1121 for (i=0; i<=TC_PRIO_MAX; i++) {
1122 if (split->defaults[i] == cl && !(cl->defmap&(1<<i)))
1123 split->defaults[i] = NULL;
1124 }
1125
1126 for (i=0; i<=TC_PRIO_MAX; i++) {
1127 int level = split->level;
1128
1129 if (split->defaults[i])
1130 continue;
1131
1132 for (h=0; h<16; h++) {
1133 struct cbq_class *c;
1134
1135 for (c = q->classes[h]; c; c = c->next) {
1136 if (c->split == split && c->level < level &&
1137 c->defmap&(1<<i)) {
1138 split->defaults[i] = c;
1139 level = c->level;
1140 }
1141 }
1142 }
1143 }
1144}
1145
1146static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask)
1147{
1148 struct cbq_class *split = NULL;
1149
1150 if (splitid == 0) {
1151 if ((split = cl->split) == NULL)
1152 return;
1153 splitid = split->classid;
1154 }
1155
1156 if (split == NULL || split->classid != splitid) {
1157 for (split = cl->tparent; split; split = split->tparent)
1158 if (split->classid == splitid)
1159 break;
1160 }
1161
1162 if (split == NULL)
1163 return;
1164
1165 if (cl->split != split) {
1166 cl->defmap = 0;
1167 cbq_sync_defmap(cl);
1168 cl->split = split;
1169 cl->defmap = def&mask;
1170 } else
1171 cl->defmap = (cl->defmap&~mask)|(def&mask);
1172
1173 cbq_sync_defmap(cl);
1174}
1175
1176static void cbq_unlink_class(struct cbq_class *this)
1177{
1178 struct cbq_class *cl, **clp;
1179 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1180
1181 for (clp = &q->classes[cbq_hash(this->classid)]; (cl = *clp) != NULL; clp = &cl->next) {
1182 if (cl == this) {
1183 *clp = cl->next;
1184 cl->next = NULL;
1185 break;
1186 }
1187 }
1188
1189 if (this->tparent) {
1190 clp=&this->sibling;
1191 cl = *clp;
1192 do {
1193 if (cl == this) {
1194 *clp = cl->sibling;
1195 break;
1196 }
1197 clp = &cl->sibling;
1198 } while ((cl = *clp) != this->sibling);
1199
1200 if (this->tparent->children == this) {
1201 this->tparent->children = this->sibling;
1202 if (this->sibling == this)
1203 this->tparent->children = NULL;
1204 }
1205 } else {
1206 BUG_TRAP(this->sibling == this);
1207 }
1208}
1209
1210static void cbq_link_class(struct cbq_class *this)
1211{
1212 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1213 unsigned h = cbq_hash(this->classid);
1214 struct cbq_class *parent = this->tparent;
1215
1216 this->sibling = this;
1217 this->next = q->classes[h];
1218 q->classes[h] = this;
1219
1220 if (parent == NULL)
1221 return;
1222
1223 if (parent->children == NULL) {
1224 parent->children = this;
1225 } else {
1226 this->sibling = parent->children->sibling;
1227 parent->children->sibling = this;
1228 }
1229}
1230
1231static unsigned int cbq_drop(struct Qdisc* sch)
1232{
1233 struct cbq_sched_data *q = qdisc_priv(sch);
1234 struct cbq_class *cl, *cl_head;
1235 int prio;
1236 unsigned int len;
1237
1238 for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
1239 if ((cl_head = q->active[prio]) == NULL)
1240 continue;
1241
1242 cl = cl_head;
1243 do {
1244 if (cl->q->ops->drop && (len = cl->q->ops->drop(cl->q))) {
1245 sch->q.qlen--;
Jarek Poplawskia37ef2e2006-12-08 00:25:55 -08001246 if (!cl->q->q.qlen)
1247 cbq_deactivate_class(cl);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001248 return len;
1249 }
1250 } while ((cl = cl->next_alive) != cl_head);
1251 }
1252 return 0;
1253}
1254
1255static void
1256cbq_reset(struct Qdisc* sch)
1257{
1258 struct cbq_sched_data *q = qdisc_priv(sch);
1259 struct cbq_class *cl;
1260 int prio;
1261 unsigned h;
1262
1263 q->activemask = 0;
1264 q->pmask = 0;
1265 q->tx_class = NULL;
1266 q->tx_borrowed = NULL;
Patrick McHardy88a99352007-03-16 01:21:11 -07001267 qdisc_watchdog_cancel(&q->watchdog);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001268 del_timer(&q->delay_timer);
1269 q->toplevel = TC_CBQ_MAXLEVEL;
1270 PSCHED_GET_TIME(q->now);
1271 q->now_rt = q->now;
1272
1273 for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
1274 q->active[prio] = NULL;
1275
1276 for (h = 0; h < 16; h++) {
1277 for (cl = q->classes[h]; cl; cl = cl->next) {
1278 qdisc_reset(cl->q);
1279
1280 cl->next_alive = NULL;
1281 PSCHED_SET_PASTPERFECT(cl->undertime);
1282 cl->avgidle = cl->maxidle;
1283 cl->deficit = cl->quantum;
1284 cl->cpriority = cl->priority;
1285 }
1286 }
1287 sch->q.qlen = 0;
1288}
1289
1290
1291static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
1292{
1293 if (lss->change&TCF_CBQ_LSS_FLAGS) {
1294 cl->share = (lss->flags&TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
1295 cl->borrow = (lss->flags&TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
1296 }
1297 if (lss->change&TCF_CBQ_LSS_EWMA)
1298 cl->ewma_log = lss->ewma_log;
1299 if (lss->change&TCF_CBQ_LSS_AVPKT)
1300 cl->avpkt = lss->avpkt;
1301 if (lss->change&TCF_CBQ_LSS_MINIDLE)
1302 cl->minidle = -(long)lss->minidle;
1303 if (lss->change&TCF_CBQ_LSS_MAXIDLE) {
1304 cl->maxidle = lss->maxidle;
1305 cl->avgidle = lss->maxidle;
1306 }
1307 if (lss->change&TCF_CBQ_LSS_OFFTIME)
1308 cl->offtime = lss->offtime;
1309 return 0;
1310}
1311
1312static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl)
1313{
1314 q->nclasses[cl->priority]--;
1315 q->quanta[cl->priority] -= cl->weight;
1316 cbq_normalize_quanta(q, cl->priority);
1317}
1318
1319static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl)
1320{
1321 q->nclasses[cl->priority]++;
1322 q->quanta[cl->priority] += cl->weight;
1323 cbq_normalize_quanta(q, cl->priority);
1324}
1325
1326static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
1327{
1328 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1329
1330 if (wrr->allot)
1331 cl->allot = wrr->allot;
1332 if (wrr->weight)
1333 cl->weight = wrr->weight;
1334 if (wrr->priority) {
1335 cl->priority = wrr->priority-1;
1336 cl->cpriority = cl->priority;
1337 if (cl->priority >= cl->priority2)
1338 cl->priority2 = TC_CBQ_MAXPRIO-1;
1339 }
1340
1341 cbq_addprio(q, cl);
1342 return 0;
1343}
1344
1345static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
1346{
1347 switch (ovl->strategy) {
1348 case TC_CBQ_OVL_CLASSIC:
1349 cl->overlimit = cbq_ovl_classic;
1350 break;
1351 case TC_CBQ_OVL_DELAY:
1352 cl->overlimit = cbq_ovl_delay;
1353 break;
1354 case TC_CBQ_OVL_LOWPRIO:
1355 if (ovl->priority2-1 >= TC_CBQ_MAXPRIO ||
1356 ovl->priority2-1 <= cl->priority)
1357 return -EINVAL;
1358 cl->priority2 = ovl->priority2-1;
1359 cl->overlimit = cbq_ovl_lowprio;
1360 break;
1361 case TC_CBQ_OVL_DROP:
1362 cl->overlimit = cbq_ovl_drop;
1363 break;
1364 case TC_CBQ_OVL_RCLASSIC:
1365 cl->overlimit = cbq_ovl_rclassic;
1366 break;
1367 default:
1368 return -EINVAL;
1369 }
1370 cl->penalty = (ovl->penalty*HZ)/1000;
1371 return 0;
1372}
1373
1374#ifdef CONFIG_NET_CLS_POLICE
1375static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p)
1376{
1377 cl->police = p->police;
1378
1379 if (cl->q->handle) {
1380 if (p->police == TC_POLICE_RECLASSIFY)
1381 cl->q->reshape_fail = cbq_reshape_fail;
1382 else
1383 cl->q->reshape_fail = NULL;
1384 }
1385 return 0;
1386}
1387#endif
1388
1389static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
1390{
1391 cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
1392 return 0;
1393}
1394
1395static int cbq_init(struct Qdisc *sch, struct rtattr *opt)
1396{
1397 struct cbq_sched_data *q = qdisc_priv(sch);
1398 struct rtattr *tb[TCA_CBQ_MAX];
1399 struct tc_ratespec *r;
1400
1401 if (rtattr_parse_nested(tb, TCA_CBQ_MAX, opt) < 0 ||
1402 tb[TCA_CBQ_RTAB-1] == NULL || tb[TCA_CBQ_RATE-1] == NULL ||
1403 RTA_PAYLOAD(tb[TCA_CBQ_RATE-1]) < sizeof(struct tc_ratespec))
1404 return -EINVAL;
1405
1406 if (tb[TCA_CBQ_LSSOPT-1] &&
1407 RTA_PAYLOAD(tb[TCA_CBQ_LSSOPT-1]) < sizeof(struct tc_cbq_lssopt))
1408 return -EINVAL;
1409
1410 r = RTA_DATA(tb[TCA_CBQ_RATE-1]);
1411
1412 if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB-1])) == NULL)
1413 return -EINVAL;
1414
1415 q->link.refcnt = 1;
1416 q->link.sibling = &q->link;
1417 q->link.classid = sch->handle;
1418 q->link.qdisc = sch;
Patrick McHardy9f9afec2006-11-29 17:35:18 -08001419 if (!(q->link.q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
1420 sch->handle)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001421 q->link.q = &noop_qdisc;
1422
1423 q->link.priority = TC_CBQ_MAXPRIO-1;
1424 q->link.priority2 = TC_CBQ_MAXPRIO-1;
1425 q->link.cpriority = TC_CBQ_MAXPRIO-1;
1426 q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
1427 q->link.overlimit = cbq_ovl_classic;
1428 q->link.allot = psched_mtu(sch->dev);
1429 q->link.quantum = q->link.allot;
1430 q->link.weight = q->link.R_tab->rate.rate;
1431
1432 q->link.ewma_log = TC_CBQ_DEF_EWMA;
1433 q->link.avpkt = q->link.allot/2;
1434 q->link.minidle = -0x7FFFFFFF;
1435 q->link.stats_lock = &sch->dev->queue_lock;
1436
Patrick McHardy88a99352007-03-16 01:21:11 -07001437 qdisc_watchdog_init(&q->watchdog, sch);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001438 init_timer(&q->delay_timer);
1439 q->delay_timer.data = (unsigned long)sch;
1440 q->delay_timer.function = cbq_undelay;
1441 q->toplevel = TC_CBQ_MAXLEVEL;
1442 PSCHED_GET_TIME(q->now);
1443 q->now_rt = q->now;
1444
1445 cbq_link_class(&q->link);
1446
1447 if (tb[TCA_CBQ_LSSOPT-1])
1448 cbq_set_lss(&q->link, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1449
1450 cbq_addprio(q, &q->link);
1451 return 0;
1452}
1453
1454static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
1455{
1456 unsigned char *b = skb->tail;
1457
1458 RTA_PUT(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate);
1459 return skb->len;
1460
1461rtattr_failure:
1462 skb_trim(skb, b - skb->data);
1463 return -1;
1464}
1465
1466static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
1467{
1468 unsigned char *b = skb->tail;
1469 struct tc_cbq_lssopt opt;
1470
1471 opt.flags = 0;
1472 if (cl->borrow == NULL)
1473 opt.flags |= TCF_CBQ_LSS_BOUNDED;
1474 if (cl->share == NULL)
1475 opt.flags |= TCF_CBQ_LSS_ISOLATED;
1476 opt.ewma_log = cl->ewma_log;
1477 opt.level = cl->level;
1478 opt.avpkt = cl->avpkt;
1479 opt.maxidle = cl->maxidle;
1480 opt.minidle = (u32)(-cl->minidle);
1481 opt.offtime = cl->offtime;
1482 opt.change = ~0;
1483 RTA_PUT(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt);
1484 return skb->len;
1485
1486rtattr_failure:
1487 skb_trim(skb, b - skb->data);
1488 return -1;
1489}
1490
1491static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
1492{
1493 unsigned char *b = skb->tail;
1494 struct tc_cbq_wrropt opt;
1495
1496 opt.flags = 0;
1497 opt.allot = cl->allot;
1498 opt.priority = cl->priority+1;
1499 opt.cpriority = cl->cpriority+1;
1500 opt.weight = cl->weight;
1501 RTA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt);
1502 return skb->len;
1503
1504rtattr_failure:
1505 skb_trim(skb, b - skb->data);
1506 return -1;
1507}
1508
1509static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
1510{
1511 unsigned char *b = skb->tail;
1512 struct tc_cbq_ovl opt;
1513
1514 opt.strategy = cl->ovl_strategy;
1515 opt.priority2 = cl->priority2+1;
Patrick McHardy8a470772005-06-28 12:56:45 -07001516 opt.pad = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001517 opt.penalty = (cl->penalty*1000)/HZ;
1518 RTA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
1519 return skb->len;
1520
1521rtattr_failure:
1522 skb_trim(skb, b - skb->data);
1523 return -1;
1524}
1525
1526static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
1527{
1528 unsigned char *b = skb->tail;
1529 struct tc_cbq_fopt opt;
1530
1531 if (cl->split || cl->defmap) {
1532 opt.split = cl->split ? cl->split->classid : 0;
1533 opt.defmap = cl->defmap;
1534 opt.defchange = ~0;
1535 RTA_PUT(skb, TCA_CBQ_FOPT, sizeof(opt), &opt);
1536 }
1537 return skb->len;
1538
1539rtattr_failure:
1540 skb_trim(skb, b - skb->data);
1541 return -1;
1542}
1543
1544#ifdef CONFIG_NET_CLS_POLICE
1545static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
1546{
1547 unsigned char *b = skb->tail;
1548 struct tc_cbq_police opt;
1549
1550 if (cl->police) {
1551 opt.police = cl->police;
Patrick McHardy9ef1d4c2005-06-28 12:55:30 -07001552 opt.__res1 = 0;
1553 opt.__res2 = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001554 RTA_PUT(skb, TCA_CBQ_POLICE, sizeof(opt), &opt);
1555 }
1556 return skb->len;
1557
1558rtattr_failure:
1559 skb_trim(skb, b - skb->data);
1560 return -1;
1561}
1562#endif
1563
1564static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
1565{
1566 if (cbq_dump_lss(skb, cl) < 0 ||
1567 cbq_dump_rate(skb, cl) < 0 ||
1568 cbq_dump_wrr(skb, cl) < 0 ||
1569 cbq_dump_ovl(skb, cl) < 0 ||
1570#ifdef CONFIG_NET_CLS_POLICE
1571 cbq_dump_police(skb, cl) < 0 ||
1572#endif
1573 cbq_dump_fopt(skb, cl) < 0)
1574 return -1;
1575 return 0;
1576}
1577
1578static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
1579{
1580 struct cbq_sched_data *q = qdisc_priv(sch);
1581 unsigned char *b = skb->tail;
1582 struct rtattr *rta;
1583
1584 rta = (struct rtattr*)b;
1585 RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1586 if (cbq_dump_attr(skb, &q->link) < 0)
1587 goto rtattr_failure;
1588 rta->rta_len = skb->tail - b;
1589 return skb->len;
1590
1591rtattr_failure:
1592 skb_trim(skb, b - skb->data);
1593 return -1;
1594}
1595
1596static int
1597cbq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
1598{
1599 struct cbq_sched_data *q = qdisc_priv(sch);
1600
1601 q->link.xstats.avgidle = q->link.avgidle;
1602 return gnet_stats_copy_app(d, &q->link.xstats, sizeof(q->link.xstats));
1603}
1604
1605static int
1606cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1607 struct sk_buff *skb, struct tcmsg *tcm)
1608{
1609 struct cbq_class *cl = (struct cbq_class*)arg;
1610 unsigned char *b = skb->tail;
1611 struct rtattr *rta;
1612
1613 if (cl->tparent)
1614 tcm->tcm_parent = cl->tparent->classid;
1615 else
1616 tcm->tcm_parent = TC_H_ROOT;
1617 tcm->tcm_handle = cl->classid;
1618 tcm->tcm_info = cl->q->handle;
1619
1620 rta = (struct rtattr*)b;
1621 RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1622 if (cbq_dump_attr(skb, cl) < 0)
1623 goto rtattr_failure;
1624 rta->rta_len = skb->tail - b;
1625 return skb->len;
1626
1627rtattr_failure:
1628 skb_trim(skb, b - skb->data);
1629 return -1;
1630}
1631
1632static int
1633cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
1634 struct gnet_dump *d)
1635{
1636 struct cbq_sched_data *q = qdisc_priv(sch);
1637 struct cbq_class *cl = (struct cbq_class*)arg;
1638
1639 cl->qstats.qlen = cl->q->q.qlen;
1640 cl->xstats.avgidle = cl->avgidle;
1641 cl->xstats.undertime = 0;
1642
1643 if (!PSCHED_IS_PASTPERFECT(cl->undertime))
1644 cl->xstats.undertime = PSCHED_TDIFF(cl->undertime, q->now);
1645
1646 if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
1647#ifdef CONFIG_NET_ESTIMATOR
1648 gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
1649#endif
1650 gnet_stats_copy_queue(d, &cl->qstats) < 0)
1651 return -1;
1652
1653 return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1654}
1655
1656static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1657 struct Qdisc **old)
1658{
1659 struct cbq_class *cl = (struct cbq_class*)arg;
1660
1661 if (cl) {
1662 if (new == NULL) {
Patrick McHardy9f9afec2006-11-29 17:35:18 -08001663 if ((new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
1664 cl->classid)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001665 return -ENOBUFS;
1666 } else {
1667#ifdef CONFIG_NET_CLS_POLICE
1668 if (cl->police == TC_POLICE_RECLASSIFY)
1669 new->reshape_fail = cbq_reshape_fail;
1670#endif
1671 }
1672 sch_tree_lock(sch);
Jarek Poplawskia37ef2e2006-12-08 00:25:55 -08001673 *old = xchg(&cl->q, new);
Patrick McHardy5e50da02006-11-29 17:36:20 -08001674 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001675 qdisc_reset(*old);
1676 sch_tree_unlock(sch);
1677
1678 return 0;
1679 }
1680 return -ENOENT;
1681}
1682
1683static struct Qdisc *
1684cbq_leaf(struct Qdisc *sch, unsigned long arg)
1685{
1686 struct cbq_class *cl = (struct cbq_class*)arg;
1687
1688 return cl ? cl->q : NULL;
1689}
1690
Jarek Poplawskia37ef2e2006-12-08 00:25:55 -08001691static void cbq_qlen_notify(struct Qdisc *sch, unsigned long arg)
1692{
1693 struct cbq_class *cl = (struct cbq_class *)arg;
1694
1695 if (cl->q->q.qlen == 0)
1696 cbq_deactivate_class(cl);
1697}
1698
Linus Torvalds1da177e2005-04-16 15:20:36 -07001699static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
1700{
1701 struct cbq_sched_data *q = qdisc_priv(sch);
1702 struct cbq_class *cl = cbq_class_lookup(q, classid);
1703
1704 if (cl) {
1705 cl->refcnt++;
1706 return (unsigned long)cl;
1707 }
1708 return 0;
1709}
1710
1711static void cbq_destroy_filters(struct cbq_class *cl)
1712{
1713 struct tcf_proto *tp;
1714
1715 while ((tp = cl->filter_list) != NULL) {
1716 cl->filter_list = tp->next;
1717 tcf_destroy(tp);
1718 }
1719}
1720
1721static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
1722{
1723 struct cbq_sched_data *q = qdisc_priv(sch);
1724
1725 BUG_TRAP(!cl->filters);
1726
1727 cbq_destroy_filters(cl);
1728 qdisc_destroy(cl->q);
1729 qdisc_put_rtab(cl->R_tab);
1730#ifdef CONFIG_NET_ESTIMATOR
1731 gen_kill_estimator(&cl->bstats, &cl->rate_est);
1732#endif
1733 if (cl != &q->link)
1734 kfree(cl);
1735}
1736
1737static void
1738cbq_destroy(struct Qdisc* sch)
1739{
1740 struct cbq_sched_data *q = qdisc_priv(sch);
1741 struct cbq_class *cl;
1742 unsigned h;
1743
1744#ifdef CONFIG_NET_CLS_POLICE
1745 q->rx_class = NULL;
1746#endif
1747 /*
1748 * Filters must be destroyed first because we don't destroy the
1749 * classes from root to leafs which means that filters can still
1750 * be bound to classes which have been destroyed already. --TGR '04
1751 */
1752 for (h = 0; h < 16; h++)
1753 for (cl = q->classes[h]; cl; cl = cl->next)
1754 cbq_destroy_filters(cl);
1755
1756 for (h = 0; h < 16; h++) {
1757 struct cbq_class *next;
1758
1759 for (cl = q->classes[h]; cl; cl = next) {
1760 next = cl->next;
1761 cbq_destroy_class(sch, cl);
1762 }
1763 }
1764}
1765
1766static void cbq_put(struct Qdisc *sch, unsigned long arg)
1767{
1768 struct cbq_class *cl = (struct cbq_class*)arg;
1769
1770 if (--cl->refcnt == 0) {
1771#ifdef CONFIG_NET_CLS_POLICE
1772 struct cbq_sched_data *q = qdisc_priv(sch);
1773
1774 spin_lock_bh(&sch->dev->queue_lock);
1775 if (q->rx_class == cl)
1776 q->rx_class = NULL;
1777 spin_unlock_bh(&sch->dev->queue_lock);
1778#endif
1779
1780 cbq_destroy_class(sch, cl);
1781 }
1782}
1783
1784static int
1785cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **tca,
1786 unsigned long *arg)
1787{
1788 int err;
1789 struct cbq_sched_data *q = qdisc_priv(sch);
1790 struct cbq_class *cl = (struct cbq_class*)*arg;
1791 struct rtattr *opt = tca[TCA_OPTIONS-1];
1792 struct rtattr *tb[TCA_CBQ_MAX];
1793 struct cbq_class *parent;
1794 struct qdisc_rate_table *rtab = NULL;
1795
1796 if (opt==NULL || rtattr_parse_nested(tb, TCA_CBQ_MAX, opt))
1797 return -EINVAL;
1798
1799 if (tb[TCA_CBQ_OVL_STRATEGY-1] &&
1800 RTA_PAYLOAD(tb[TCA_CBQ_OVL_STRATEGY-1]) < sizeof(struct tc_cbq_ovl))
1801 return -EINVAL;
1802
1803 if (tb[TCA_CBQ_FOPT-1] &&
1804 RTA_PAYLOAD(tb[TCA_CBQ_FOPT-1]) < sizeof(struct tc_cbq_fopt))
1805 return -EINVAL;
1806
1807 if (tb[TCA_CBQ_RATE-1] &&
1808 RTA_PAYLOAD(tb[TCA_CBQ_RATE-1]) < sizeof(struct tc_ratespec))
1809 return -EINVAL;
1810
1811 if (tb[TCA_CBQ_LSSOPT-1] &&
1812 RTA_PAYLOAD(tb[TCA_CBQ_LSSOPT-1]) < sizeof(struct tc_cbq_lssopt))
1813 return -EINVAL;
1814
1815 if (tb[TCA_CBQ_WRROPT-1] &&
1816 RTA_PAYLOAD(tb[TCA_CBQ_WRROPT-1]) < sizeof(struct tc_cbq_wrropt))
1817 return -EINVAL;
1818
1819#ifdef CONFIG_NET_CLS_POLICE
1820 if (tb[TCA_CBQ_POLICE-1] &&
1821 RTA_PAYLOAD(tb[TCA_CBQ_POLICE-1]) < sizeof(struct tc_cbq_police))
1822 return -EINVAL;
1823#endif
1824
1825 if (cl) {
1826 /* Check parent */
1827 if (parentid) {
1828 if (cl->tparent && cl->tparent->classid != parentid)
1829 return -EINVAL;
1830 if (!cl->tparent && parentid != TC_H_ROOT)
1831 return -EINVAL;
1832 }
1833
1834 if (tb[TCA_CBQ_RATE-1]) {
1835 rtab = qdisc_get_rtab(RTA_DATA(tb[TCA_CBQ_RATE-1]), tb[TCA_CBQ_RTAB-1]);
1836 if (rtab == NULL)
1837 return -EINVAL;
1838 }
1839
1840 /* Change class parameters */
1841 sch_tree_lock(sch);
1842
1843 if (cl->next_alive != NULL)
1844 cbq_deactivate_class(cl);
1845
1846 if (rtab) {
1847 rtab = xchg(&cl->R_tab, rtab);
1848 qdisc_put_rtab(rtab);
1849 }
1850
1851 if (tb[TCA_CBQ_LSSOPT-1])
1852 cbq_set_lss(cl, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1853
1854 if (tb[TCA_CBQ_WRROPT-1]) {
1855 cbq_rmprio(q, cl);
1856 cbq_set_wrr(cl, RTA_DATA(tb[TCA_CBQ_WRROPT-1]));
1857 }
1858
1859 if (tb[TCA_CBQ_OVL_STRATEGY-1])
1860 cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1]));
1861
1862#ifdef CONFIG_NET_CLS_POLICE
1863 if (tb[TCA_CBQ_POLICE-1])
1864 cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1]));
1865#endif
1866
1867 if (tb[TCA_CBQ_FOPT-1])
1868 cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1]));
1869
1870 if (cl->q->q.qlen)
1871 cbq_activate_class(cl);
1872
1873 sch_tree_unlock(sch);
1874
1875#ifdef CONFIG_NET_ESTIMATOR
1876 if (tca[TCA_RATE-1])
1877 gen_replace_estimator(&cl->bstats, &cl->rate_est,
1878 cl->stats_lock, tca[TCA_RATE-1]);
1879#endif
1880 return 0;
1881 }
1882
1883 if (parentid == TC_H_ROOT)
1884 return -EINVAL;
1885
1886 if (tb[TCA_CBQ_WRROPT-1] == NULL || tb[TCA_CBQ_RATE-1] == NULL ||
1887 tb[TCA_CBQ_LSSOPT-1] == NULL)
1888 return -EINVAL;
1889
1890 rtab = qdisc_get_rtab(RTA_DATA(tb[TCA_CBQ_RATE-1]), tb[TCA_CBQ_RTAB-1]);
1891 if (rtab == NULL)
1892 return -EINVAL;
1893
1894 if (classid) {
1895 err = -EINVAL;
1896 if (TC_H_MAJ(classid^sch->handle) || cbq_class_lookup(q, classid))
1897 goto failure;
1898 } else {
1899 int i;
1900 classid = TC_H_MAKE(sch->handle,0x8000);
1901
1902 for (i=0; i<0x8000; i++) {
1903 if (++q->hgenerator >= 0x8000)
1904 q->hgenerator = 1;
1905 if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
1906 break;
1907 }
1908 err = -ENOSR;
1909 if (i >= 0x8000)
1910 goto failure;
1911 classid = classid|q->hgenerator;
1912 }
1913
1914 parent = &q->link;
1915 if (parentid) {
1916 parent = cbq_class_lookup(q, parentid);
1917 err = -EINVAL;
1918 if (parent == NULL)
1919 goto failure;
1920 }
1921
1922 err = -ENOBUFS;
Panagiotis Issaris0da974f2006-07-21 14:51:30 -07001923 cl = kzalloc(sizeof(*cl), GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001924 if (cl == NULL)
1925 goto failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001926 cl->R_tab = rtab;
1927 rtab = NULL;
1928 cl->refcnt = 1;
Patrick McHardy9f9afec2006-11-29 17:35:18 -08001929 if (!(cl->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, classid)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001930 cl->q = &noop_qdisc;
1931 cl->classid = classid;
1932 cl->tparent = parent;
1933 cl->qdisc = sch;
1934 cl->allot = parent->allot;
1935 cl->quantum = cl->allot;
1936 cl->weight = cl->R_tab->rate.rate;
1937 cl->stats_lock = &sch->dev->queue_lock;
1938
1939 sch_tree_lock(sch);
1940 cbq_link_class(cl);
1941 cl->borrow = cl->tparent;
1942 if (cl->tparent != &q->link)
1943 cl->share = cl->tparent;
1944 cbq_adjust_levels(parent);
1945 cl->minidle = -0x7FFFFFFF;
1946 cbq_set_lss(cl, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1947 cbq_set_wrr(cl, RTA_DATA(tb[TCA_CBQ_WRROPT-1]));
1948 if (cl->ewma_log==0)
1949 cl->ewma_log = q->link.ewma_log;
1950 if (cl->maxidle==0)
1951 cl->maxidle = q->link.maxidle;
1952 if (cl->avpkt==0)
1953 cl->avpkt = q->link.avpkt;
1954 cl->overlimit = cbq_ovl_classic;
1955 if (tb[TCA_CBQ_OVL_STRATEGY-1])
1956 cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1]));
1957#ifdef CONFIG_NET_CLS_POLICE
1958 if (tb[TCA_CBQ_POLICE-1])
1959 cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1]));
1960#endif
1961 if (tb[TCA_CBQ_FOPT-1])
1962 cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1]));
1963 sch_tree_unlock(sch);
1964
1965#ifdef CONFIG_NET_ESTIMATOR
1966 if (tca[TCA_RATE-1])
1967 gen_new_estimator(&cl->bstats, &cl->rate_est,
1968 cl->stats_lock, tca[TCA_RATE-1]);
1969#endif
1970
1971 *arg = (unsigned long)cl;
1972 return 0;
1973
1974failure:
1975 qdisc_put_rtab(rtab);
1976 return err;
1977}
1978
1979static int cbq_delete(struct Qdisc *sch, unsigned long arg)
1980{
1981 struct cbq_sched_data *q = qdisc_priv(sch);
1982 struct cbq_class *cl = (struct cbq_class*)arg;
Jarek Poplawskia37ef2e2006-12-08 00:25:55 -08001983 unsigned int qlen;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001984
1985 if (cl->filters || cl->children || cl == &q->link)
1986 return -EBUSY;
1987
1988 sch_tree_lock(sch);
1989
Jarek Poplawskia37ef2e2006-12-08 00:25:55 -08001990 qlen = cl->q->q.qlen;
1991 qdisc_reset(cl->q);
1992 qdisc_tree_decrease_qlen(cl->q, qlen);
1993
Linus Torvalds1da177e2005-04-16 15:20:36 -07001994 if (cl->next_alive)
1995 cbq_deactivate_class(cl);
1996
1997 if (q->tx_borrowed == cl)
1998 q->tx_borrowed = q->tx_class;
1999 if (q->tx_class == cl) {
2000 q->tx_class = NULL;
2001 q->tx_borrowed = NULL;
2002 }
2003#ifdef CONFIG_NET_CLS_POLICE
2004 if (q->rx_class == cl)
2005 q->rx_class = NULL;
2006#endif
2007
2008 cbq_unlink_class(cl);
2009 cbq_adjust_levels(cl->tparent);
2010 cl->defmap = 0;
2011 cbq_sync_defmap(cl);
2012
2013 cbq_rmprio(q, cl);
2014 sch_tree_unlock(sch);
2015
2016 if (--cl->refcnt == 0)
2017 cbq_destroy_class(sch, cl);
2018
2019 return 0;
2020}
2021
2022static struct tcf_proto **cbq_find_tcf(struct Qdisc *sch, unsigned long arg)
2023{
2024 struct cbq_sched_data *q = qdisc_priv(sch);
2025 struct cbq_class *cl = (struct cbq_class *)arg;
2026
2027 if (cl == NULL)
2028 cl = &q->link;
2029
2030 return &cl->filter_list;
2031}
2032
2033static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
2034 u32 classid)
2035{
2036 struct cbq_sched_data *q = qdisc_priv(sch);
2037 struct cbq_class *p = (struct cbq_class*)parent;
2038 struct cbq_class *cl = cbq_class_lookup(q, classid);
2039
2040 if (cl) {
2041 if (p && p->level <= cl->level)
2042 return 0;
2043 cl->filters++;
2044 return (unsigned long)cl;
2045 }
2046 return 0;
2047}
2048
2049static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
2050{
2051 struct cbq_class *cl = (struct cbq_class*)arg;
2052
2053 cl->filters--;
2054}
2055
2056static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2057{
2058 struct cbq_sched_data *q = qdisc_priv(sch);
2059 unsigned h;
2060
2061 if (arg->stop)
2062 return;
2063
2064 for (h = 0; h < 16; h++) {
2065 struct cbq_class *cl;
2066
2067 for (cl = q->classes[h]; cl; cl = cl->next) {
2068 if (arg->count < arg->skip) {
2069 arg->count++;
2070 continue;
2071 }
2072 if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2073 arg->stop = 1;
2074 return;
2075 }
2076 arg->count++;
2077 }
2078 }
2079}
2080
2081static struct Qdisc_class_ops cbq_class_ops = {
2082 .graft = cbq_graft,
2083 .leaf = cbq_leaf,
Jarek Poplawskia37ef2e2006-12-08 00:25:55 -08002084 .qlen_notify = cbq_qlen_notify,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002085 .get = cbq_get,
2086 .put = cbq_put,
2087 .change = cbq_change_class,
2088 .delete = cbq_delete,
2089 .walk = cbq_walk,
2090 .tcf_chain = cbq_find_tcf,
2091 .bind_tcf = cbq_bind_filter,
2092 .unbind_tcf = cbq_unbind_filter,
2093 .dump = cbq_dump_class,
2094 .dump_stats = cbq_dump_class_stats,
2095};
2096
2097static struct Qdisc_ops cbq_qdisc_ops = {
2098 .next = NULL,
2099 .cl_ops = &cbq_class_ops,
2100 .id = "cbq",
2101 .priv_size = sizeof(struct cbq_sched_data),
2102 .enqueue = cbq_enqueue,
2103 .dequeue = cbq_dequeue,
2104 .requeue = cbq_requeue,
2105 .drop = cbq_drop,
2106 .init = cbq_init,
2107 .reset = cbq_reset,
2108 .destroy = cbq_destroy,
2109 .change = NULL,
2110 .dump = cbq_dump,
2111 .dump_stats = cbq_dump_stats,
2112 .owner = THIS_MODULE,
2113};
2114
2115static int __init cbq_module_init(void)
2116{
2117 return register_qdisc(&cbq_qdisc_ops);
2118}
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09002119static void __exit cbq_module_exit(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002120{
2121 unregister_qdisc(&cbq_qdisc_ops);
2122}
2123module_init(cbq_module_init)
2124module_exit(cbq_module_exit)
2125MODULE_LICENSE("GPL");