blob: ee2d5967d109bdd61c7eb67c4ec04d585974b1f5 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * net/sched/sch_cbq.c Class-Based Queueing discipline.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 *
11 */
12
Linus Torvalds1da177e2005-04-16 15:20:36 -070013#include <linux/module.h>
14#include <asm/uaccess.h>
15#include <asm/system.h>
16#include <linux/bitops.h>
17#include <linux/types.h>
18#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070019#include <linux/string.h>
20#include <linux/mm.h>
21#include <linux/socket.h>
22#include <linux/sockios.h>
23#include <linux/in.h>
24#include <linux/errno.h>
25#include <linux/interrupt.h>
26#include <linux/if_ether.h>
27#include <linux/inet.h>
28#include <linux/netdevice.h>
29#include <linux/etherdevice.h>
30#include <linux/notifier.h>
31#include <net/ip.h>
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -070032#include <net/netlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070033#include <net/route.h>
34#include <linux/skbuff.h>
35#include <net/sock.h>
36#include <net/pkt_sched.h>
37
38
39/* Class-Based Queueing (CBQ) algorithm.
40 =======================================
41
42 Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +090043 Management Models for Packet Networks",
Linus Torvalds1da177e2005-04-16 15:20:36 -070044 IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
45
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +090046 [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
Linus Torvalds1da177e2005-04-16 15:20:36 -070047
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +090048 [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
Linus Torvalds1da177e2005-04-16 15:20:36 -070049 Parameters", 1996
50
51 [4] Sally Floyd and Michael Speer, "Experimental Results
52 for Class-Based Queueing", 1998, not published.
53
54 -----------------------------------------------------------------------
55
56 Algorithm skeleton was taken from NS simulator cbq.cc.
57 If someone wants to check this code against the LBL version,
58 he should take into account that ONLY the skeleton was borrowed,
59 the implementation is different. Particularly:
60
61 --- The WRR algorithm is different. Our version looks more
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +090062 reasonable (I hope) and works when quanta are allowed to be
63 less than MTU, which is always the case when real time classes
64 have small rates. Note, that the statement of [3] is
65 incomplete, delay may actually be estimated even if class
66 per-round allotment is less than MTU. Namely, if per-round
67 allotment is W*r_i, and r_1+...+r_k = r < 1
Linus Torvalds1da177e2005-04-16 15:20:36 -070068
69 delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
70
71 In the worst case we have IntServ estimate with D = W*r+k*MTU
72 and C = MTU*r. The proof (if correct at all) is trivial.
73
74
75 --- It seems that cbq-2.0 is not very accurate. At least, I cannot
76 interpret some places, which look like wrong translations
77 from NS. Anyone is advised to find these differences
78 and explain to me, why I am wrong 8).
79
80 --- Linux has no EOI event, so that we cannot estimate true class
81 idle time. Workaround is to consider the next dequeue event
82 as sign that previous packet is finished. This is wrong because of
83 internal device queueing, but on a permanently loaded link it is true.
84 Moreover, combined with clock integrator, this scheme looks
85 very close to an ideal solution. */
86
87struct cbq_sched_data;
88
89
90struct cbq_class
91{
92 struct cbq_class *next; /* hash table link */
93 struct cbq_class *next_alive; /* next class with backlog in this priority band */
94
95/* Parameters */
96 u32 classid;
97 unsigned char priority; /* class priority */
98 unsigned char priority2; /* priority to be used after overlimit */
99 unsigned char ewma_log; /* time constant for idle time calculation */
100 unsigned char ovl_strategy;
101#ifdef CONFIG_NET_CLS_POLICE
102 unsigned char police;
103#endif
104
105 u32 defmap;
106
107 /* Link-sharing scheduler parameters */
108 long maxidle; /* Class parameters: see below. */
109 long offtime;
110 long minidle;
111 u32 avpkt;
112 struct qdisc_rate_table *R_tab;
113
114 /* Overlimit strategy parameters */
115 void (*overlimit)(struct cbq_class *cl);
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700116 psched_tdiff_t penalty;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700117
118 /* General scheduler (WRR) parameters */
119 long allot;
120 long quantum; /* Allotment per WRR round */
121 long weight; /* Relative allotment: see below */
122
123 struct Qdisc *qdisc; /* Ptr to CBQ discipline */
124 struct cbq_class *split; /* Ptr to split node */
125 struct cbq_class *share; /* Ptr to LS parent in the class tree */
126 struct cbq_class *tparent; /* Ptr to tree parent in the class tree */
127 struct cbq_class *borrow; /* NULL if class is bandwidth limited;
128 parent otherwise */
129 struct cbq_class *sibling; /* Sibling chain */
130 struct cbq_class *children; /* Pointer to children chain */
131
132 struct Qdisc *q; /* Elementary queueing discipline */
133
134
135/* Variables */
136 unsigned char cpriority; /* Effective priority */
137 unsigned char delayed;
138 unsigned char level; /* level of the class in hierarchy:
139 0 for leaf classes, and maximal
140 level of children + 1 for nodes.
141 */
142
143 psched_time_t last; /* Last end of service */
144 psched_time_t undertime;
145 long avgidle;
146 long deficit; /* Saved deficit for WRR */
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700147 psched_time_t penalized;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700148 struct gnet_stats_basic bstats;
149 struct gnet_stats_queue qstats;
150 struct gnet_stats_rate_est rate_est;
151 spinlock_t *stats_lock;
152 struct tc_cbq_xstats xstats;
153
154 struct tcf_proto *filter_list;
155
156 int refcnt;
157 int filters;
158
159 struct cbq_class *defaults[TC_PRIO_MAX+1];
160};
161
162struct cbq_sched_data
163{
164 struct cbq_class *classes[16]; /* Hash table of all classes */
165 int nclasses[TC_CBQ_MAXPRIO+1];
166 unsigned quanta[TC_CBQ_MAXPRIO+1];
167
168 struct cbq_class link;
169
170 unsigned activemask;
171 struct cbq_class *active[TC_CBQ_MAXPRIO+1]; /* List of all classes
172 with backlog */
173
174#ifdef CONFIG_NET_CLS_POLICE
175 struct cbq_class *rx_class;
176#endif
177 struct cbq_class *tx_class;
178 struct cbq_class *tx_borrowed;
179 int tx_len;
180 psched_time_t now; /* Cached timestamp */
181 psched_time_t now_rt; /* Cached real time */
182 unsigned pmask;
183
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700184 struct hrtimer delay_timer;
Patrick McHardy88a99352007-03-16 01:21:11 -0700185 struct qdisc_watchdog watchdog; /* Watchdog timer,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700186 started when CBQ has
187 backlog, but cannot
188 transmit just now */
Patrick McHardy88a99352007-03-16 01:21:11 -0700189 psched_tdiff_t wd_expires;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700190 int toplevel;
191 u32 hgenerator;
192};
193
194
195#define L2T(cl,len) ((cl)->R_tab->data[(len)>>(cl)->R_tab->rate.cell_log])
196
197
198static __inline__ unsigned cbq_hash(u32 h)
199{
200 h ^= h>>8;
201 h ^= h>>4;
202 return h&0xF;
203}
204
205static __inline__ struct cbq_class *
206cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
207{
208 struct cbq_class *cl;
209
210 for (cl = q->classes[cbq_hash(classid)]; cl; cl = cl->next)
211 if (cl->classid == classid)
212 return cl;
213 return NULL;
214}
215
216#ifdef CONFIG_NET_CLS_POLICE
217
218static struct cbq_class *
219cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
220{
221 struct cbq_class *cl, *new;
222
223 for (cl = this->tparent; cl; cl = cl->tparent)
224 if ((new = cl->defaults[TC_PRIO_BESTEFFORT]) != NULL && new != this)
225 return new;
226
227 return NULL;
228}
229
230#endif
231
232/* Classify packet. The procedure is pretty complicated, but
233 it allows us to combine link sharing and priority scheduling
234 transparently.
235
236 Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
237 so that it resolves to split nodes. Then packets are classified
238 by logical priority, or a more specific classifier may be attached
239 to the split node.
240 */
241
242static struct cbq_class *
243cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
244{
245 struct cbq_sched_data *q = qdisc_priv(sch);
246 struct cbq_class *head = &q->link;
247 struct cbq_class **defmap;
248 struct cbq_class *cl = NULL;
249 u32 prio = skb->priority;
250 struct tcf_result res;
251
252 /*
253 * Step 1. If skb->priority points to one of our classes, use it.
254 */
255 if (TC_H_MAJ(prio^sch->handle) == 0 &&
256 (cl = cbq_class_lookup(q, prio)) != NULL)
257 return cl;
258
Jamal Hadi Salim29f1df62006-01-08 22:35:55 -0800259 *qerr = NET_XMIT_BYPASS;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700260 for (;;) {
261 int result = 0;
262 defmap = head->defaults;
263
264 /*
265 * Step 2+n. Apply classifier.
266 */
267 if (!head->filter_list || (result = tc_classify(skb, head->filter_list, &res)) < 0)
268 goto fallback;
269
270 if ((cl = (void*)res.class) == NULL) {
271 if (TC_H_MAJ(res.classid))
272 cl = cbq_class_lookup(q, res.classid);
273 else if ((cl = defmap[res.classid&TC_PRIO_MAX]) == NULL)
274 cl = defmap[TC_PRIO_BESTEFFORT];
275
276 if (cl == NULL || cl->level >= head->level)
277 goto fallback;
278 }
279
280#ifdef CONFIG_NET_CLS_ACT
281 switch (result) {
282 case TC_ACT_QUEUED:
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900283 case TC_ACT_STOLEN:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700284 *qerr = NET_XMIT_SUCCESS;
285 case TC_ACT_SHOT:
286 return NULL;
287 }
288#elif defined(CONFIG_NET_CLS_POLICE)
289 switch (result) {
290 case TC_POLICE_RECLASSIFY:
291 return cbq_reclassify(skb, cl);
292 case TC_POLICE_SHOT:
293 return NULL;
294 default:
295 break;
296 }
297#endif
298 if (cl->level == 0)
299 return cl;
300
301 /*
302 * Step 3+n. If classifier selected a link sharing class,
303 * apply agency specific classifier.
304 * Repeat this procdure until we hit a leaf node.
305 */
306 head = cl;
307 }
308
309fallback:
310 cl = head;
311
312 /*
313 * Step 4. No success...
314 */
315 if (TC_H_MAJ(prio) == 0 &&
316 !(cl = head->defaults[prio&TC_PRIO_MAX]) &&
317 !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
318 return head;
319
320 return cl;
321}
322
323/*
324 A packet has just been enqueued on the empty class.
325 cbq_activate_class adds it to the tail of active class list
326 of its priority band.
327 */
328
329static __inline__ void cbq_activate_class(struct cbq_class *cl)
330{
331 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
332 int prio = cl->cpriority;
333 struct cbq_class *cl_tail;
334
335 cl_tail = q->active[prio];
336 q->active[prio] = cl;
337
338 if (cl_tail != NULL) {
339 cl->next_alive = cl_tail->next_alive;
340 cl_tail->next_alive = cl;
341 } else {
342 cl->next_alive = cl;
343 q->activemask |= (1<<prio);
344 }
345}
346
347/*
348 Unlink class from active chain.
349 Note that this same procedure is done directly in cbq_dequeue*
350 during round-robin procedure.
351 */
352
353static void cbq_deactivate_class(struct cbq_class *this)
354{
355 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
356 int prio = this->cpriority;
357 struct cbq_class *cl;
358 struct cbq_class *cl_prev = q->active[prio];
359
360 do {
361 cl = cl_prev->next_alive;
362 if (cl == this) {
363 cl_prev->next_alive = cl->next_alive;
364 cl->next_alive = NULL;
365
366 if (cl == q->active[prio]) {
367 q->active[prio] = cl_prev;
368 if (cl == q->active[prio]) {
369 q->active[prio] = NULL;
370 q->activemask &= ~(1<<prio);
371 return;
372 }
373 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700374 return;
375 }
376 } while ((cl_prev = cl) != q->active[prio]);
377}
378
379static void
380cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
381{
382 int toplevel = q->toplevel;
383
384 if (toplevel > cl->level && !(cl->q->flags&TCQ_F_THROTTLED)) {
385 psched_time_t now;
386 psched_tdiff_t incr;
387
Patrick McHardy3bebcda2007-03-23 11:29:25 -0700388 now = psched_get_time();
Patrick McHardy8edc0c32007-03-23 11:28:55 -0700389 incr = now - q->now_rt;
Patrick McHardy7c59e252007-03-23 11:27:45 -0700390 now = q->now + incr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700391
392 do {
Patrick McHardy104e0872007-03-23 11:28:07 -0700393 if (cl->undertime < now) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700394 q->toplevel = cl->level;
395 return;
396 }
397 } while ((cl=cl->borrow) != NULL && toplevel > cl->level);
398 }
399}
400
401static int
402cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
403{
404 struct cbq_sched_data *q = qdisc_priv(sch);
405 int len = skb->len;
406 int ret;
407 struct cbq_class *cl = cbq_classify(skb, sch, &ret);
408
409#ifdef CONFIG_NET_CLS_POLICE
410 q->rx_class = cl;
411#endif
412 if (cl == NULL) {
Jamal Hadi Salim29f1df62006-01-08 22:35:55 -0800413 if (ret == NET_XMIT_BYPASS)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700414 sch->qstats.drops++;
415 kfree_skb(skb);
416 return ret;
417 }
418
419#ifdef CONFIG_NET_CLS_POLICE
420 cl->q->__parent = sch;
421#endif
422 if ((ret = cl->q->enqueue(skb, cl->q)) == NET_XMIT_SUCCESS) {
423 sch->q.qlen++;
424 sch->bstats.packets++;
425 sch->bstats.bytes+=len;
426 cbq_mark_toplevel(q, cl);
427 if (!cl->next_alive)
428 cbq_activate_class(cl);
429 return ret;
430 }
431
432 sch->qstats.drops++;
433 cbq_mark_toplevel(q, cl);
434 cl->qstats.drops++;
435 return ret;
436}
437
438static int
439cbq_requeue(struct sk_buff *skb, struct Qdisc *sch)
440{
441 struct cbq_sched_data *q = qdisc_priv(sch);
442 struct cbq_class *cl;
443 int ret;
444
445 if ((cl = q->tx_class) == NULL) {
446 kfree_skb(skb);
447 sch->qstats.drops++;
448 return NET_XMIT_CN;
449 }
450 q->tx_class = NULL;
451
452 cbq_mark_toplevel(q, cl);
453
454#ifdef CONFIG_NET_CLS_POLICE
455 q->rx_class = cl;
456 cl->q->__parent = sch;
457#endif
458 if ((ret = cl->q->ops->requeue(skb, cl->q)) == 0) {
459 sch->q.qlen++;
460 sch->qstats.requeues++;
461 if (!cl->next_alive)
462 cbq_activate_class(cl);
463 return 0;
464 }
465 sch->qstats.drops++;
466 cl->qstats.drops++;
467 return ret;
468}
469
470/* Overlimit actions */
471
472/* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
473
474static void cbq_ovl_classic(struct cbq_class *cl)
475{
476 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
Patrick McHardy8edc0c32007-03-23 11:28:55 -0700477 psched_tdiff_t delay = cl->undertime - q->now;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700478
479 if (!cl->delayed) {
480 delay += cl->offtime;
481
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900482 /*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700483 Class goes to sleep, so that it will have no
484 chance to work avgidle. Let's forgive it 8)
485
486 BTW cbq-2.0 has a crap in this
487 place, apparently they forgot to shift it by cl->ewma_log.
488 */
489 if (cl->avgidle < 0)
490 delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
491 if (cl->avgidle < cl->minidle)
492 cl->avgidle = cl->minidle;
493 if (delay <= 0)
494 delay = 1;
Patrick McHardy7c59e252007-03-23 11:27:45 -0700495 cl->undertime = q->now + delay;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700496
497 cl->xstats.overactions++;
498 cl->delayed = 1;
499 }
500 if (q->wd_expires == 0 || q->wd_expires > delay)
501 q->wd_expires = delay;
502
503 /* Dirty work! We must schedule wakeups based on
504 real available rate, rather than leaf rate,
505 which may be tiny (even zero).
506 */
507 if (q->toplevel == TC_CBQ_MAXLEVEL) {
508 struct cbq_class *b;
509 psched_tdiff_t base_delay = q->wd_expires;
510
511 for (b = cl->borrow; b; b = b->borrow) {
Patrick McHardy8edc0c32007-03-23 11:28:55 -0700512 delay = b->undertime - q->now;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700513 if (delay < base_delay) {
514 if (delay <= 0)
515 delay = 1;
516 base_delay = delay;
517 }
518 }
519
520 q->wd_expires = base_delay;
521 }
522}
523
524/* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
525 they go overlimit
526 */
527
528static void cbq_ovl_rclassic(struct cbq_class *cl)
529{
530 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
531 struct cbq_class *this = cl;
532
533 do {
534 if (cl->level > q->toplevel) {
535 cl = NULL;
536 break;
537 }
538 } while ((cl = cl->borrow) != NULL);
539
540 if (cl == NULL)
541 cl = this;
542 cbq_ovl_classic(cl);
543}
544
545/* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
546
547static void cbq_ovl_delay(struct cbq_class *cl)
548{
549 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
Patrick McHardy8edc0c32007-03-23 11:28:55 -0700550 psched_tdiff_t delay = cl->undertime - q->now;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700551
552 if (!cl->delayed) {
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700553 psched_time_t sched = q->now;
554 ktime_t expires;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700555
556 delay += cl->offtime;
557 if (cl->avgidle < 0)
558 delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
559 if (cl->avgidle < cl->minidle)
560 cl->avgidle = cl->minidle;
Patrick McHardy7c59e252007-03-23 11:27:45 -0700561 cl->undertime = q->now + delay;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700562
563 if (delay > 0) {
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700564 sched += delay + cl->penalty;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700565 cl->penalized = sched;
566 cl->cpriority = TC_CBQ_MAXPRIO;
567 q->pmask |= (1<<TC_CBQ_MAXPRIO);
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700568
569 expires = ktime_set(0, 0);
570 expires = ktime_add_ns(expires, PSCHED_US2NS(sched));
571 if (hrtimer_try_to_cancel(&q->delay_timer) &&
572 ktime_to_ns(ktime_sub(q->delay_timer.expires,
573 expires)) > 0)
574 q->delay_timer.expires = expires;
575 hrtimer_restart(&q->delay_timer);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700576 cl->delayed = 1;
577 cl->xstats.overactions++;
578 return;
579 }
580 delay = 1;
581 }
582 if (q->wd_expires == 0 || q->wd_expires > delay)
583 q->wd_expires = delay;
584}
585
586/* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
587
588static void cbq_ovl_lowprio(struct cbq_class *cl)
589{
590 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
591
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700592 cl->penalized = q->now + cl->penalty;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700593
594 if (cl->cpriority != cl->priority2) {
595 cl->cpriority = cl->priority2;
596 q->pmask |= (1<<cl->cpriority);
597 cl->xstats.overactions++;
598 }
599 cbq_ovl_classic(cl);
600}
601
602/* TC_CBQ_OVL_DROP: penalize class by dropping */
603
604static void cbq_ovl_drop(struct cbq_class *cl)
605{
606 if (cl->q->ops->drop)
607 if (cl->q->ops->drop(cl->q))
608 cl->qdisc->q.qlen--;
609 cl->xstats.overactions++;
610 cbq_ovl_classic(cl);
611}
612
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700613static psched_tdiff_t cbq_undelay_prio(struct cbq_sched_data *q, int prio,
614 psched_time_t now)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700615{
616 struct cbq_class *cl;
617 struct cbq_class *cl_prev = q->active[prio];
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700618 psched_time_t sched = now;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700619
620 if (cl_prev == NULL)
Patrick McHardye9054a32007-03-16 01:21:40 -0700621 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700622
623 do {
624 cl = cl_prev->next_alive;
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700625 if (now - cl->penalized > 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700626 cl_prev->next_alive = cl->next_alive;
627 cl->next_alive = NULL;
628 cl->cpriority = cl->priority;
629 cl->delayed = 0;
630 cbq_activate_class(cl);
631
632 if (cl == q->active[prio]) {
633 q->active[prio] = cl_prev;
634 if (cl == q->active[prio]) {
635 q->active[prio] = NULL;
636 return 0;
637 }
638 }
639
640 cl = cl_prev->next_alive;
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700641 } else if (sched - cl->penalized > 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700642 sched = cl->penalized;
643 } while ((cl_prev = cl) != q->active[prio]);
644
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700645 return sched - now;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700646}
647
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700648static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700649{
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700650 struct cbq_sched_data *q = container_of(timer, struct cbq_sched_data,
651 delay_timer);
652 struct Qdisc *sch = q->watchdog.qdisc;
653 psched_time_t now;
654 psched_tdiff_t delay = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700655 unsigned pmask;
656
Patrick McHardy3bebcda2007-03-23 11:29:25 -0700657 now = psched_get_time();
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700658
Linus Torvalds1da177e2005-04-16 15:20:36 -0700659 pmask = q->pmask;
660 q->pmask = 0;
661
662 while (pmask) {
663 int prio = ffz(~pmask);
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700664 psched_tdiff_t tmp;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700665
666 pmask &= ~(1<<prio);
667
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700668 tmp = cbq_undelay_prio(q, prio, now);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700669 if (tmp > 0) {
670 q->pmask |= 1<<prio;
671 if (tmp < delay || delay == 0)
672 delay = tmp;
673 }
674 }
675
676 if (delay) {
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700677 ktime_t time;
678
679 time = ktime_set(0, 0);
680 time = ktime_add_ns(time, PSCHED_US2NS(now + delay));
681 hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700682 }
683
684 sch->flags &= ~TCQ_F_THROTTLED;
685 netif_schedule(sch->dev);
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700686 return HRTIMER_NORESTART;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700687}
688
689
690#ifdef CONFIG_NET_CLS_POLICE
691
692static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
693{
694 int len = skb->len;
695 struct Qdisc *sch = child->__parent;
696 struct cbq_sched_data *q = qdisc_priv(sch);
697 struct cbq_class *cl = q->rx_class;
698
699 q->rx_class = NULL;
700
701 if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
702
703 cbq_mark_toplevel(q, cl);
704
705 q->rx_class = cl;
706 cl->q->__parent = sch;
707
708 if (cl->q->enqueue(skb, cl->q) == 0) {
709 sch->q.qlen++;
710 sch->bstats.packets++;
711 sch->bstats.bytes+=len;
712 if (!cl->next_alive)
713 cbq_activate_class(cl);
714 return 0;
715 }
716 sch->qstats.drops++;
717 return 0;
718 }
719
720 sch->qstats.drops++;
721 return -1;
722}
723#endif
724
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900725/*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700726 It is mission critical procedure.
727
728 We "regenerate" toplevel cutoff, if transmitting class
729 has backlog and it is not regulated. It is not part of
730 original CBQ description, but looks more reasonable.
731 Probably, it is wrong. This question needs further investigation.
732*/
733
734static __inline__ void
735cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
736 struct cbq_class *borrowed)
737{
738 if (cl && q->toplevel >= borrowed->level) {
739 if (cl->q->q.qlen > 1) {
740 do {
Patrick McHardya0849802007-03-23 11:28:30 -0700741 if (borrowed->undertime == PSCHED_PASTPERFECT) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700742 q->toplevel = borrowed->level;
743 return;
744 }
745 } while ((borrowed=borrowed->borrow) != NULL);
746 }
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900747#if 0
Linus Torvalds1da177e2005-04-16 15:20:36 -0700748 /* It is not necessary now. Uncommenting it
749 will save CPU cycles, but decrease fairness.
750 */
751 q->toplevel = TC_CBQ_MAXLEVEL;
752#endif
753 }
754}
755
756static void
757cbq_update(struct cbq_sched_data *q)
758{
759 struct cbq_class *this = q->tx_class;
760 struct cbq_class *cl = this;
761 int len = q->tx_len;
762
763 q->tx_class = NULL;
764
765 for ( ; cl; cl = cl->share) {
766 long avgidle = cl->avgidle;
767 long idle;
768
769 cl->bstats.packets++;
770 cl->bstats.bytes += len;
771
772 /*
773 (now - last) is total time between packet right edges.
774 (last_pktlen/rate) is "virtual" busy time, so that
775
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900776 idle = (now - last) - last_pktlen/rate
Linus Torvalds1da177e2005-04-16 15:20:36 -0700777 */
778
Patrick McHardy8edc0c32007-03-23 11:28:55 -0700779 idle = q->now - cl->last;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700780 if ((unsigned long)idle > 128*1024*1024) {
781 avgidle = cl->maxidle;
782 } else {
783 idle -= L2T(cl, len);
784
785 /* true_avgidle := (1-W)*true_avgidle + W*idle,
786 where W=2^{-ewma_log}. But cl->avgidle is scaled:
787 cl->avgidle == true_avgidle/W,
788 hence:
789 */
790 avgidle += idle - (avgidle>>cl->ewma_log);
791 }
792
793 if (avgidle <= 0) {
794 /* Overlimit or at-limit */
795
796 if (avgidle < cl->minidle)
797 avgidle = cl->minidle;
798
799 cl->avgidle = avgidle;
800
801 /* Calculate expected time, when this class
802 will be allowed to send.
803 It will occur, when:
804 (1-W)*true_avgidle + W*delay = 0, i.e.
805 idle = (1/W - 1)*(-true_avgidle)
806 or
807 idle = (1 - W)*(-cl->avgidle);
808 */
809 idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
810
811 /*
812 That is not all.
813 To maintain the rate allocated to the class,
814 we add to undertime virtual clock,
815 necessary to complete transmitted packet.
816 (len/phys_bandwidth has been already passed
817 to the moment of cbq_update)
818 */
819
820 idle -= L2T(&q->link, len);
821 idle += L2T(cl, len);
822
Patrick McHardy7c59e252007-03-23 11:27:45 -0700823 cl->undertime = q->now + idle;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700824 } else {
825 /* Underlimit */
826
Patrick McHardya0849802007-03-23 11:28:30 -0700827 cl->undertime = PSCHED_PASTPERFECT;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700828 if (avgidle > cl->maxidle)
829 cl->avgidle = cl->maxidle;
830 else
831 cl->avgidle = avgidle;
832 }
833 cl->last = q->now;
834 }
835
836 cbq_update_toplevel(q, this, q->tx_borrowed);
837}
838
839static __inline__ struct cbq_class *
840cbq_under_limit(struct cbq_class *cl)
841{
842 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
843 struct cbq_class *this_cl = cl;
844
845 if (cl->tparent == NULL)
846 return cl;
847
Patrick McHardya0849802007-03-23 11:28:30 -0700848 if (cl->undertime == PSCHED_PASTPERFECT || q->now >= cl->undertime) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700849 cl->delayed = 0;
850 return cl;
851 }
852
853 do {
854 /* It is very suspicious place. Now overlimit
855 action is generated for not bounded classes
856 only if link is completely congested.
857 Though it is in agree with ancestor-only paradigm,
858 it looks very stupid. Particularly,
859 it means that this chunk of code will either
860 never be called or result in strong amplification
861 of burstiness. Dangerous, silly, and, however,
862 no another solution exists.
863 */
864 if ((cl = cl->borrow) == NULL) {
865 this_cl->qstats.overlimits++;
866 this_cl->overlimit(this_cl);
867 return NULL;
868 }
869 if (cl->level > q->toplevel)
870 return NULL;
Patrick McHardya0849802007-03-23 11:28:30 -0700871 } while (cl->undertime != PSCHED_PASTPERFECT && q->now < cl->undertime);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700872
873 cl->delayed = 0;
874 return cl;
875}
876
877static __inline__ struct sk_buff *
878cbq_dequeue_prio(struct Qdisc *sch, int prio)
879{
880 struct cbq_sched_data *q = qdisc_priv(sch);
881 struct cbq_class *cl_tail, *cl_prev, *cl;
882 struct sk_buff *skb;
883 int deficit;
884
885 cl_tail = cl_prev = q->active[prio];
886 cl = cl_prev->next_alive;
887
888 do {
889 deficit = 0;
890
891 /* Start round */
892 do {
893 struct cbq_class *borrow = cl;
894
895 if (cl->q->q.qlen &&
896 (borrow = cbq_under_limit(cl)) == NULL)
897 goto skip_class;
898
899 if (cl->deficit <= 0) {
900 /* Class exhausted its allotment per
901 this round. Switch to the next one.
902 */
903 deficit = 1;
904 cl->deficit += cl->quantum;
905 goto next_class;
906 }
907
908 skb = cl->q->dequeue(cl->q);
909
910 /* Class did not give us any skb :-(
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900911 It could occur even if cl->q->q.qlen != 0
Linus Torvalds1da177e2005-04-16 15:20:36 -0700912 f.e. if cl->q == "tbf"
913 */
914 if (skb == NULL)
915 goto skip_class;
916
917 cl->deficit -= skb->len;
918 q->tx_class = cl;
919 q->tx_borrowed = borrow;
920 if (borrow != cl) {
921#ifndef CBQ_XSTATS_BORROWS_BYTES
922 borrow->xstats.borrows++;
923 cl->xstats.borrows++;
924#else
925 borrow->xstats.borrows += skb->len;
926 cl->xstats.borrows += skb->len;
927#endif
928 }
929 q->tx_len = skb->len;
930
931 if (cl->deficit <= 0) {
932 q->active[prio] = cl;
933 cl = cl->next_alive;
934 cl->deficit += cl->quantum;
935 }
936 return skb;
937
938skip_class:
939 if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
940 /* Class is empty or penalized.
941 Unlink it from active chain.
942 */
943 cl_prev->next_alive = cl->next_alive;
944 cl->next_alive = NULL;
945
946 /* Did cl_tail point to it? */
947 if (cl == cl_tail) {
948 /* Repair it! */
949 cl_tail = cl_prev;
950
951 /* Was it the last class in this band? */
952 if (cl == cl_tail) {
953 /* Kill the band! */
954 q->active[prio] = NULL;
955 q->activemask &= ~(1<<prio);
956 if (cl->q->q.qlen)
957 cbq_activate_class(cl);
958 return NULL;
959 }
960
961 q->active[prio] = cl_tail;
962 }
963 if (cl->q->q.qlen)
964 cbq_activate_class(cl);
965
966 cl = cl_prev;
967 }
968
969next_class:
970 cl_prev = cl;
971 cl = cl->next_alive;
972 } while (cl_prev != cl_tail);
973 } while (deficit);
974
975 q->active[prio] = cl_prev;
976
977 return NULL;
978}
979
980static __inline__ struct sk_buff *
981cbq_dequeue_1(struct Qdisc *sch)
982{
983 struct cbq_sched_data *q = qdisc_priv(sch);
984 struct sk_buff *skb;
985 unsigned activemask;
986
987 activemask = q->activemask&0xFF;
988 while (activemask) {
989 int prio = ffz(~activemask);
990 activemask &= ~(1<<prio);
991 skb = cbq_dequeue_prio(sch, prio);
992 if (skb)
993 return skb;
994 }
995 return NULL;
996}
997
998static struct sk_buff *
999cbq_dequeue(struct Qdisc *sch)
1000{
1001 struct sk_buff *skb;
1002 struct cbq_sched_data *q = qdisc_priv(sch);
1003 psched_time_t now;
1004 psched_tdiff_t incr;
1005
Patrick McHardy3bebcda2007-03-23 11:29:25 -07001006 now = psched_get_time();
Patrick McHardy8edc0c32007-03-23 11:28:55 -07001007 incr = now - q->now_rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001008
1009 if (q->tx_class) {
1010 psched_tdiff_t incr2;
1011 /* Time integrator. We calculate EOS time
1012 by adding expected packet transmission time.
1013 If real time is greater, we warp artificial clock,
1014 so that:
1015
1016 cbq_time = max(real_time, work);
1017 */
1018 incr2 = L2T(&q->link, q->tx_len);
Patrick McHardy7c59e252007-03-23 11:27:45 -07001019 q->now += incr2;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001020 cbq_update(q);
1021 if ((incr -= incr2) < 0)
1022 incr = 0;
1023 }
Patrick McHardy7c59e252007-03-23 11:27:45 -07001024 q->now += incr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001025 q->now_rt = now;
1026
1027 for (;;) {
1028 q->wd_expires = 0;
1029
1030 skb = cbq_dequeue_1(sch);
1031 if (skb) {
1032 sch->q.qlen--;
1033 sch->flags &= ~TCQ_F_THROTTLED;
1034 return skb;
1035 }
1036
1037 /* All the classes are overlimit.
1038
1039 It is possible, if:
1040
1041 1. Scheduler is empty.
1042 2. Toplevel cutoff inhibited borrowing.
1043 3. Root class is overlimit.
1044
1045 Reset 2d and 3d conditions and retry.
1046
1047 Note, that NS and cbq-2.0 are buggy, peeking
1048 an arbitrary class is appropriate for ancestor-only
1049 sharing, but not for toplevel algorithm.
1050
1051 Our version is better, but slower, because it requires
1052 two passes, but it is unavoidable with top-level sharing.
1053 */
1054
1055 if (q->toplevel == TC_CBQ_MAXLEVEL &&
Patrick McHardya0849802007-03-23 11:28:30 -07001056 q->link.undertime == PSCHED_PASTPERFECT)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001057 break;
1058
1059 q->toplevel = TC_CBQ_MAXLEVEL;
Patrick McHardya0849802007-03-23 11:28:30 -07001060 q->link.undertime = PSCHED_PASTPERFECT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001061 }
1062
1063 /* No packets in scheduler or nobody wants to give them to us :-(
1064 Sigh... start watchdog timer in the last case. */
1065
1066 if (sch->q.qlen) {
1067 sch->qstats.overlimits++;
Patrick McHardy88a99352007-03-16 01:21:11 -07001068 if (q->wd_expires)
1069 qdisc_watchdog_schedule(&q->watchdog,
Patrick McHardybb239ac2007-03-16 12:31:28 -07001070 now + q->wd_expires);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001071 }
1072 return NULL;
1073}
1074
1075/* CBQ class maintanance routines */
1076
1077static void cbq_adjust_levels(struct cbq_class *this)
1078{
1079 if (this == NULL)
1080 return;
1081
1082 do {
1083 int level = 0;
1084 struct cbq_class *cl;
1085
1086 if ((cl = this->children) != NULL) {
1087 do {
1088 if (cl->level > level)
1089 level = cl->level;
1090 } while ((cl = cl->sibling) != this->children);
1091 }
1092 this->level = level+1;
1093 } while ((this = this->tparent) != NULL);
1094}
1095
1096static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
1097{
1098 struct cbq_class *cl;
1099 unsigned h;
1100
1101 if (q->quanta[prio] == 0)
1102 return;
1103
1104 for (h=0; h<16; h++) {
1105 for (cl = q->classes[h]; cl; cl = cl->next) {
1106 /* BUGGGG... Beware! This expression suffer of
1107 arithmetic overflows!
1108 */
1109 if (cl->priority == prio) {
1110 cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
1111 q->quanta[prio];
1112 }
1113 if (cl->quantum <= 0 || cl->quantum>32*cl->qdisc->dev->mtu) {
1114 printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->classid, cl->quantum);
1115 cl->quantum = cl->qdisc->dev->mtu/2 + 1;
1116 }
1117 }
1118 }
1119}
1120
1121static void cbq_sync_defmap(struct cbq_class *cl)
1122{
1123 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1124 struct cbq_class *split = cl->split;
1125 unsigned h;
1126 int i;
1127
1128 if (split == NULL)
1129 return;
1130
1131 for (i=0; i<=TC_PRIO_MAX; i++) {
1132 if (split->defaults[i] == cl && !(cl->defmap&(1<<i)))
1133 split->defaults[i] = NULL;
1134 }
1135
1136 for (i=0; i<=TC_PRIO_MAX; i++) {
1137 int level = split->level;
1138
1139 if (split->defaults[i])
1140 continue;
1141
1142 for (h=0; h<16; h++) {
1143 struct cbq_class *c;
1144
1145 for (c = q->classes[h]; c; c = c->next) {
1146 if (c->split == split && c->level < level &&
1147 c->defmap&(1<<i)) {
1148 split->defaults[i] = c;
1149 level = c->level;
1150 }
1151 }
1152 }
1153 }
1154}
1155
1156static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask)
1157{
1158 struct cbq_class *split = NULL;
1159
1160 if (splitid == 0) {
1161 if ((split = cl->split) == NULL)
1162 return;
1163 splitid = split->classid;
1164 }
1165
1166 if (split == NULL || split->classid != splitid) {
1167 for (split = cl->tparent; split; split = split->tparent)
1168 if (split->classid == splitid)
1169 break;
1170 }
1171
1172 if (split == NULL)
1173 return;
1174
1175 if (cl->split != split) {
1176 cl->defmap = 0;
1177 cbq_sync_defmap(cl);
1178 cl->split = split;
1179 cl->defmap = def&mask;
1180 } else
1181 cl->defmap = (cl->defmap&~mask)|(def&mask);
1182
1183 cbq_sync_defmap(cl);
1184}
1185
1186static void cbq_unlink_class(struct cbq_class *this)
1187{
1188 struct cbq_class *cl, **clp;
1189 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1190
1191 for (clp = &q->classes[cbq_hash(this->classid)]; (cl = *clp) != NULL; clp = &cl->next) {
1192 if (cl == this) {
1193 *clp = cl->next;
1194 cl->next = NULL;
1195 break;
1196 }
1197 }
1198
1199 if (this->tparent) {
1200 clp=&this->sibling;
1201 cl = *clp;
1202 do {
1203 if (cl == this) {
1204 *clp = cl->sibling;
1205 break;
1206 }
1207 clp = &cl->sibling;
1208 } while ((cl = *clp) != this->sibling);
1209
1210 if (this->tparent->children == this) {
1211 this->tparent->children = this->sibling;
1212 if (this->sibling == this)
1213 this->tparent->children = NULL;
1214 }
1215 } else {
1216 BUG_TRAP(this->sibling == this);
1217 }
1218}
1219
1220static void cbq_link_class(struct cbq_class *this)
1221{
1222 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1223 unsigned h = cbq_hash(this->classid);
1224 struct cbq_class *parent = this->tparent;
1225
1226 this->sibling = this;
1227 this->next = q->classes[h];
1228 q->classes[h] = this;
1229
1230 if (parent == NULL)
1231 return;
1232
1233 if (parent->children == NULL) {
1234 parent->children = this;
1235 } else {
1236 this->sibling = parent->children->sibling;
1237 parent->children->sibling = this;
1238 }
1239}
1240
1241static unsigned int cbq_drop(struct Qdisc* sch)
1242{
1243 struct cbq_sched_data *q = qdisc_priv(sch);
1244 struct cbq_class *cl, *cl_head;
1245 int prio;
1246 unsigned int len;
1247
1248 for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
1249 if ((cl_head = q->active[prio]) == NULL)
1250 continue;
1251
1252 cl = cl_head;
1253 do {
1254 if (cl->q->ops->drop && (len = cl->q->ops->drop(cl->q))) {
1255 sch->q.qlen--;
Jarek Poplawskia37ef2e2006-12-08 00:25:55 -08001256 if (!cl->q->q.qlen)
1257 cbq_deactivate_class(cl);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001258 return len;
1259 }
1260 } while ((cl = cl->next_alive) != cl_head);
1261 }
1262 return 0;
1263}
1264
1265static void
1266cbq_reset(struct Qdisc* sch)
1267{
1268 struct cbq_sched_data *q = qdisc_priv(sch);
1269 struct cbq_class *cl;
1270 int prio;
1271 unsigned h;
1272
1273 q->activemask = 0;
1274 q->pmask = 0;
1275 q->tx_class = NULL;
1276 q->tx_borrowed = NULL;
Patrick McHardy88a99352007-03-16 01:21:11 -07001277 qdisc_watchdog_cancel(&q->watchdog);
Patrick McHardy1a13cb62007-03-16 01:22:20 -07001278 hrtimer_cancel(&q->delay_timer);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001279 q->toplevel = TC_CBQ_MAXLEVEL;
Patrick McHardy3bebcda2007-03-23 11:29:25 -07001280 q->now = psched_get_time();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001281 q->now_rt = q->now;
1282
1283 for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
1284 q->active[prio] = NULL;
1285
1286 for (h = 0; h < 16; h++) {
1287 for (cl = q->classes[h]; cl; cl = cl->next) {
1288 qdisc_reset(cl->q);
1289
1290 cl->next_alive = NULL;
Patrick McHardya0849802007-03-23 11:28:30 -07001291 cl->undertime = PSCHED_PASTPERFECT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001292 cl->avgidle = cl->maxidle;
1293 cl->deficit = cl->quantum;
1294 cl->cpriority = cl->priority;
1295 }
1296 }
1297 sch->q.qlen = 0;
1298}
1299
1300
1301static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
1302{
1303 if (lss->change&TCF_CBQ_LSS_FLAGS) {
1304 cl->share = (lss->flags&TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
1305 cl->borrow = (lss->flags&TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
1306 }
1307 if (lss->change&TCF_CBQ_LSS_EWMA)
1308 cl->ewma_log = lss->ewma_log;
1309 if (lss->change&TCF_CBQ_LSS_AVPKT)
1310 cl->avpkt = lss->avpkt;
1311 if (lss->change&TCF_CBQ_LSS_MINIDLE)
1312 cl->minidle = -(long)lss->minidle;
1313 if (lss->change&TCF_CBQ_LSS_MAXIDLE) {
1314 cl->maxidle = lss->maxidle;
1315 cl->avgidle = lss->maxidle;
1316 }
1317 if (lss->change&TCF_CBQ_LSS_OFFTIME)
1318 cl->offtime = lss->offtime;
1319 return 0;
1320}
1321
1322static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl)
1323{
1324 q->nclasses[cl->priority]--;
1325 q->quanta[cl->priority] -= cl->weight;
1326 cbq_normalize_quanta(q, cl->priority);
1327}
1328
1329static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl)
1330{
1331 q->nclasses[cl->priority]++;
1332 q->quanta[cl->priority] += cl->weight;
1333 cbq_normalize_quanta(q, cl->priority);
1334}
1335
1336static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
1337{
1338 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1339
1340 if (wrr->allot)
1341 cl->allot = wrr->allot;
1342 if (wrr->weight)
1343 cl->weight = wrr->weight;
1344 if (wrr->priority) {
1345 cl->priority = wrr->priority-1;
1346 cl->cpriority = cl->priority;
1347 if (cl->priority >= cl->priority2)
1348 cl->priority2 = TC_CBQ_MAXPRIO-1;
1349 }
1350
1351 cbq_addprio(q, cl);
1352 return 0;
1353}
1354
1355static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
1356{
1357 switch (ovl->strategy) {
1358 case TC_CBQ_OVL_CLASSIC:
1359 cl->overlimit = cbq_ovl_classic;
1360 break;
1361 case TC_CBQ_OVL_DELAY:
1362 cl->overlimit = cbq_ovl_delay;
1363 break;
1364 case TC_CBQ_OVL_LOWPRIO:
1365 if (ovl->priority2-1 >= TC_CBQ_MAXPRIO ||
1366 ovl->priority2-1 <= cl->priority)
1367 return -EINVAL;
1368 cl->priority2 = ovl->priority2-1;
1369 cl->overlimit = cbq_ovl_lowprio;
1370 break;
1371 case TC_CBQ_OVL_DROP:
1372 cl->overlimit = cbq_ovl_drop;
1373 break;
1374 case TC_CBQ_OVL_RCLASSIC:
1375 cl->overlimit = cbq_ovl_rclassic;
1376 break;
1377 default:
1378 return -EINVAL;
1379 }
Patrick McHardy1a13cb62007-03-16 01:22:20 -07001380 cl->penalty = ovl->penalty;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001381 return 0;
1382}
1383
1384#ifdef CONFIG_NET_CLS_POLICE
1385static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p)
1386{
1387 cl->police = p->police;
1388
1389 if (cl->q->handle) {
1390 if (p->police == TC_POLICE_RECLASSIFY)
1391 cl->q->reshape_fail = cbq_reshape_fail;
1392 else
1393 cl->q->reshape_fail = NULL;
1394 }
1395 return 0;
1396}
1397#endif
1398
1399static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
1400{
1401 cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
1402 return 0;
1403}
1404
1405static int cbq_init(struct Qdisc *sch, struct rtattr *opt)
1406{
1407 struct cbq_sched_data *q = qdisc_priv(sch);
1408 struct rtattr *tb[TCA_CBQ_MAX];
1409 struct tc_ratespec *r;
1410
1411 if (rtattr_parse_nested(tb, TCA_CBQ_MAX, opt) < 0 ||
1412 tb[TCA_CBQ_RTAB-1] == NULL || tb[TCA_CBQ_RATE-1] == NULL ||
1413 RTA_PAYLOAD(tb[TCA_CBQ_RATE-1]) < sizeof(struct tc_ratespec))
1414 return -EINVAL;
1415
1416 if (tb[TCA_CBQ_LSSOPT-1] &&
1417 RTA_PAYLOAD(tb[TCA_CBQ_LSSOPT-1]) < sizeof(struct tc_cbq_lssopt))
1418 return -EINVAL;
1419
1420 r = RTA_DATA(tb[TCA_CBQ_RATE-1]);
1421
1422 if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB-1])) == NULL)
1423 return -EINVAL;
1424
1425 q->link.refcnt = 1;
1426 q->link.sibling = &q->link;
1427 q->link.classid = sch->handle;
1428 q->link.qdisc = sch;
Patrick McHardy9f9afec2006-11-29 17:35:18 -08001429 if (!(q->link.q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
1430 sch->handle)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001431 q->link.q = &noop_qdisc;
1432
1433 q->link.priority = TC_CBQ_MAXPRIO-1;
1434 q->link.priority2 = TC_CBQ_MAXPRIO-1;
1435 q->link.cpriority = TC_CBQ_MAXPRIO-1;
1436 q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
1437 q->link.overlimit = cbq_ovl_classic;
1438 q->link.allot = psched_mtu(sch->dev);
1439 q->link.quantum = q->link.allot;
1440 q->link.weight = q->link.R_tab->rate.rate;
1441
1442 q->link.ewma_log = TC_CBQ_DEF_EWMA;
1443 q->link.avpkt = q->link.allot/2;
1444 q->link.minidle = -0x7FFFFFFF;
1445 q->link.stats_lock = &sch->dev->queue_lock;
1446
Patrick McHardy88a99352007-03-16 01:21:11 -07001447 qdisc_watchdog_init(&q->watchdog, sch);
Patrick McHardy1a13cb62007-03-16 01:22:20 -07001448 hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001449 q->delay_timer.function = cbq_undelay;
1450 q->toplevel = TC_CBQ_MAXLEVEL;
Patrick McHardy3bebcda2007-03-23 11:29:25 -07001451 q->now = psched_get_time();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001452 q->now_rt = q->now;
1453
1454 cbq_link_class(&q->link);
1455
1456 if (tb[TCA_CBQ_LSSOPT-1])
1457 cbq_set_lss(&q->link, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1458
1459 cbq_addprio(q, &q->link);
1460 return 0;
1461}
1462
1463static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
1464{
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001465 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001466
1467 RTA_PUT(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate);
1468 return skb->len;
1469
1470rtattr_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001471 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001472 return -1;
1473}
1474
1475static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
1476{
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001477 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001478 struct tc_cbq_lssopt opt;
1479
1480 opt.flags = 0;
1481 if (cl->borrow == NULL)
1482 opt.flags |= TCF_CBQ_LSS_BOUNDED;
1483 if (cl->share == NULL)
1484 opt.flags |= TCF_CBQ_LSS_ISOLATED;
1485 opt.ewma_log = cl->ewma_log;
1486 opt.level = cl->level;
1487 opt.avpkt = cl->avpkt;
1488 opt.maxidle = cl->maxidle;
1489 opt.minidle = (u32)(-cl->minidle);
1490 opt.offtime = cl->offtime;
1491 opt.change = ~0;
1492 RTA_PUT(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt);
1493 return skb->len;
1494
1495rtattr_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001496 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001497 return -1;
1498}
1499
1500static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
1501{
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001502 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001503 struct tc_cbq_wrropt opt;
1504
1505 opt.flags = 0;
1506 opt.allot = cl->allot;
1507 opt.priority = cl->priority+1;
1508 opt.cpriority = cl->cpriority+1;
1509 opt.weight = cl->weight;
1510 RTA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt);
1511 return skb->len;
1512
1513rtattr_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001514 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001515 return -1;
1516}
1517
1518static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
1519{
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001520 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001521 struct tc_cbq_ovl opt;
1522
1523 opt.strategy = cl->ovl_strategy;
1524 opt.priority2 = cl->priority2+1;
Patrick McHardy8a470772005-06-28 12:56:45 -07001525 opt.pad = 0;
Patrick McHardy1a13cb62007-03-16 01:22:20 -07001526 opt.penalty = cl->penalty;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001527 RTA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
1528 return skb->len;
1529
1530rtattr_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001531 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001532 return -1;
1533}
1534
1535static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
1536{
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001537 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001538 struct tc_cbq_fopt opt;
1539
1540 if (cl->split || cl->defmap) {
1541 opt.split = cl->split ? cl->split->classid : 0;
1542 opt.defmap = cl->defmap;
1543 opt.defchange = ~0;
1544 RTA_PUT(skb, TCA_CBQ_FOPT, sizeof(opt), &opt);
1545 }
1546 return skb->len;
1547
1548rtattr_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001549 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001550 return -1;
1551}
1552
1553#ifdef CONFIG_NET_CLS_POLICE
1554static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
1555{
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001556 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001557 struct tc_cbq_police opt;
1558
1559 if (cl->police) {
1560 opt.police = cl->police;
Patrick McHardy9ef1d4c2005-06-28 12:55:30 -07001561 opt.__res1 = 0;
1562 opt.__res2 = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001563 RTA_PUT(skb, TCA_CBQ_POLICE, sizeof(opt), &opt);
1564 }
1565 return skb->len;
1566
1567rtattr_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001568 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001569 return -1;
1570}
1571#endif
1572
1573static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
1574{
1575 if (cbq_dump_lss(skb, cl) < 0 ||
1576 cbq_dump_rate(skb, cl) < 0 ||
1577 cbq_dump_wrr(skb, cl) < 0 ||
1578 cbq_dump_ovl(skb, cl) < 0 ||
1579#ifdef CONFIG_NET_CLS_POLICE
1580 cbq_dump_police(skb, cl) < 0 ||
1581#endif
1582 cbq_dump_fopt(skb, cl) < 0)
1583 return -1;
1584 return 0;
1585}
1586
1587static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
1588{
1589 struct cbq_sched_data *q = qdisc_priv(sch);
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001590 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001591 struct rtattr *rta;
1592
1593 rta = (struct rtattr*)b;
1594 RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1595 if (cbq_dump_attr(skb, &q->link) < 0)
1596 goto rtattr_failure;
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001597 rta->rta_len = skb_tail_pointer(skb) - b;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001598 return skb->len;
1599
1600rtattr_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001601 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001602 return -1;
1603}
1604
1605static int
1606cbq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
1607{
1608 struct cbq_sched_data *q = qdisc_priv(sch);
1609
1610 q->link.xstats.avgidle = q->link.avgidle;
1611 return gnet_stats_copy_app(d, &q->link.xstats, sizeof(q->link.xstats));
1612}
1613
1614static int
1615cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1616 struct sk_buff *skb, struct tcmsg *tcm)
1617{
1618 struct cbq_class *cl = (struct cbq_class*)arg;
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001619 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001620 struct rtattr *rta;
1621
1622 if (cl->tparent)
1623 tcm->tcm_parent = cl->tparent->classid;
1624 else
1625 tcm->tcm_parent = TC_H_ROOT;
1626 tcm->tcm_handle = cl->classid;
1627 tcm->tcm_info = cl->q->handle;
1628
1629 rta = (struct rtattr*)b;
1630 RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1631 if (cbq_dump_attr(skb, cl) < 0)
1632 goto rtattr_failure;
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001633 rta->rta_len = skb_tail_pointer(skb) - b;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001634 return skb->len;
1635
1636rtattr_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001637 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001638 return -1;
1639}
1640
1641static int
1642cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
1643 struct gnet_dump *d)
1644{
1645 struct cbq_sched_data *q = qdisc_priv(sch);
1646 struct cbq_class *cl = (struct cbq_class*)arg;
1647
1648 cl->qstats.qlen = cl->q->q.qlen;
1649 cl->xstats.avgidle = cl->avgidle;
1650 cl->xstats.undertime = 0;
1651
Patrick McHardya0849802007-03-23 11:28:30 -07001652 if (cl->undertime != PSCHED_PASTPERFECT)
Patrick McHardy8edc0c32007-03-23 11:28:55 -07001653 cl->xstats.undertime = cl->undertime - q->now;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001654
1655 if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
1656#ifdef CONFIG_NET_ESTIMATOR
1657 gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
1658#endif
1659 gnet_stats_copy_queue(d, &cl->qstats) < 0)
1660 return -1;
1661
1662 return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1663}
1664
1665static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1666 struct Qdisc **old)
1667{
1668 struct cbq_class *cl = (struct cbq_class*)arg;
1669
1670 if (cl) {
1671 if (new == NULL) {
Patrick McHardy9f9afec2006-11-29 17:35:18 -08001672 if ((new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
1673 cl->classid)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001674 return -ENOBUFS;
1675 } else {
1676#ifdef CONFIG_NET_CLS_POLICE
1677 if (cl->police == TC_POLICE_RECLASSIFY)
1678 new->reshape_fail = cbq_reshape_fail;
1679#endif
1680 }
1681 sch_tree_lock(sch);
Jarek Poplawskia37ef2e2006-12-08 00:25:55 -08001682 *old = xchg(&cl->q, new);
Patrick McHardy5e50da02006-11-29 17:36:20 -08001683 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001684 qdisc_reset(*old);
1685 sch_tree_unlock(sch);
1686
1687 return 0;
1688 }
1689 return -ENOENT;
1690}
1691
1692static struct Qdisc *
1693cbq_leaf(struct Qdisc *sch, unsigned long arg)
1694{
1695 struct cbq_class *cl = (struct cbq_class*)arg;
1696
1697 return cl ? cl->q : NULL;
1698}
1699
Jarek Poplawskia37ef2e2006-12-08 00:25:55 -08001700static void cbq_qlen_notify(struct Qdisc *sch, unsigned long arg)
1701{
1702 struct cbq_class *cl = (struct cbq_class *)arg;
1703
1704 if (cl->q->q.qlen == 0)
1705 cbq_deactivate_class(cl);
1706}
1707
Linus Torvalds1da177e2005-04-16 15:20:36 -07001708static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
1709{
1710 struct cbq_sched_data *q = qdisc_priv(sch);
1711 struct cbq_class *cl = cbq_class_lookup(q, classid);
1712
1713 if (cl) {
1714 cl->refcnt++;
1715 return (unsigned long)cl;
1716 }
1717 return 0;
1718}
1719
Linus Torvalds1da177e2005-04-16 15:20:36 -07001720static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
1721{
1722 struct cbq_sched_data *q = qdisc_priv(sch);
1723
1724 BUG_TRAP(!cl->filters);
1725
Patrick McHardya48b5a62007-03-23 11:29:43 -07001726 tcf_destroy_chain(cl->filter_list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001727 qdisc_destroy(cl->q);
1728 qdisc_put_rtab(cl->R_tab);
1729#ifdef CONFIG_NET_ESTIMATOR
1730 gen_kill_estimator(&cl->bstats, &cl->rate_est);
1731#endif
1732 if (cl != &q->link)
1733 kfree(cl);
1734}
1735
1736static void
1737cbq_destroy(struct Qdisc* sch)
1738{
1739 struct cbq_sched_data *q = qdisc_priv(sch);
1740 struct cbq_class *cl;
1741 unsigned h;
1742
1743#ifdef CONFIG_NET_CLS_POLICE
1744 q->rx_class = NULL;
1745#endif
1746 /*
1747 * Filters must be destroyed first because we don't destroy the
1748 * classes from root to leafs which means that filters can still
1749 * be bound to classes which have been destroyed already. --TGR '04
1750 */
Patrick McHardyb00b4bf2007-06-05 16:06:59 -07001751 for (h = 0; h < 16; h++) {
1752 for (cl = q->classes[h]; cl; cl = cl->next) {
Patrick McHardya48b5a62007-03-23 11:29:43 -07001753 tcf_destroy_chain(cl->filter_list);
Patrick McHardyb00b4bf2007-06-05 16:06:59 -07001754 cl->filter_list = NULL;
1755 }
1756 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001757 for (h = 0; h < 16; h++) {
1758 struct cbq_class *next;
1759
1760 for (cl = q->classes[h]; cl; cl = next) {
1761 next = cl->next;
1762 cbq_destroy_class(sch, cl);
1763 }
1764 }
1765}
1766
1767static void cbq_put(struct Qdisc *sch, unsigned long arg)
1768{
1769 struct cbq_class *cl = (struct cbq_class*)arg;
1770
1771 if (--cl->refcnt == 0) {
1772#ifdef CONFIG_NET_CLS_POLICE
1773 struct cbq_sched_data *q = qdisc_priv(sch);
1774
1775 spin_lock_bh(&sch->dev->queue_lock);
1776 if (q->rx_class == cl)
1777 q->rx_class = NULL;
1778 spin_unlock_bh(&sch->dev->queue_lock);
1779#endif
1780
1781 cbq_destroy_class(sch, cl);
1782 }
1783}
1784
1785static int
1786cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **tca,
1787 unsigned long *arg)
1788{
1789 int err;
1790 struct cbq_sched_data *q = qdisc_priv(sch);
1791 struct cbq_class *cl = (struct cbq_class*)*arg;
1792 struct rtattr *opt = tca[TCA_OPTIONS-1];
1793 struct rtattr *tb[TCA_CBQ_MAX];
1794 struct cbq_class *parent;
1795 struct qdisc_rate_table *rtab = NULL;
1796
1797 if (opt==NULL || rtattr_parse_nested(tb, TCA_CBQ_MAX, opt))
1798 return -EINVAL;
1799
1800 if (tb[TCA_CBQ_OVL_STRATEGY-1] &&
1801 RTA_PAYLOAD(tb[TCA_CBQ_OVL_STRATEGY-1]) < sizeof(struct tc_cbq_ovl))
1802 return -EINVAL;
1803
1804 if (tb[TCA_CBQ_FOPT-1] &&
1805 RTA_PAYLOAD(tb[TCA_CBQ_FOPT-1]) < sizeof(struct tc_cbq_fopt))
1806 return -EINVAL;
1807
1808 if (tb[TCA_CBQ_RATE-1] &&
1809 RTA_PAYLOAD(tb[TCA_CBQ_RATE-1]) < sizeof(struct tc_ratespec))
1810 return -EINVAL;
1811
1812 if (tb[TCA_CBQ_LSSOPT-1] &&
1813 RTA_PAYLOAD(tb[TCA_CBQ_LSSOPT-1]) < sizeof(struct tc_cbq_lssopt))
1814 return -EINVAL;
1815
1816 if (tb[TCA_CBQ_WRROPT-1] &&
1817 RTA_PAYLOAD(tb[TCA_CBQ_WRROPT-1]) < sizeof(struct tc_cbq_wrropt))
1818 return -EINVAL;
1819
1820#ifdef CONFIG_NET_CLS_POLICE
1821 if (tb[TCA_CBQ_POLICE-1] &&
1822 RTA_PAYLOAD(tb[TCA_CBQ_POLICE-1]) < sizeof(struct tc_cbq_police))
1823 return -EINVAL;
1824#endif
1825
1826 if (cl) {
1827 /* Check parent */
1828 if (parentid) {
1829 if (cl->tparent && cl->tparent->classid != parentid)
1830 return -EINVAL;
1831 if (!cl->tparent && parentid != TC_H_ROOT)
1832 return -EINVAL;
1833 }
1834
1835 if (tb[TCA_CBQ_RATE-1]) {
1836 rtab = qdisc_get_rtab(RTA_DATA(tb[TCA_CBQ_RATE-1]), tb[TCA_CBQ_RTAB-1]);
1837 if (rtab == NULL)
1838 return -EINVAL;
1839 }
1840
1841 /* Change class parameters */
1842 sch_tree_lock(sch);
1843
1844 if (cl->next_alive != NULL)
1845 cbq_deactivate_class(cl);
1846
1847 if (rtab) {
1848 rtab = xchg(&cl->R_tab, rtab);
1849 qdisc_put_rtab(rtab);
1850 }
1851
1852 if (tb[TCA_CBQ_LSSOPT-1])
1853 cbq_set_lss(cl, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1854
1855 if (tb[TCA_CBQ_WRROPT-1]) {
1856 cbq_rmprio(q, cl);
1857 cbq_set_wrr(cl, RTA_DATA(tb[TCA_CBQ_WRROPT-1]));
1858 }
1859
1860 if (tb[TCA_CBQ_OVL_STRATEGY-1])
1861 cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1]));
1862
1863#ifdef CONFIG_NET_CLS_POLICE
1864 if (tb[TCA_CBQ_POLICE-1])
1865 cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1]));
1866#endif
1867
1868 if (tb[TCA_CBQ_FOPT-1])
1869 cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1]));
1870
1871 if (cl->q->q.qlen)
1872 cbq_activate_class(cl);
1873
1874 sch_tree_unlock(sch);
1875
1876#ifdef CONFIG_NET_ESTIMATOR
1877 if (tca[TCA_RATE-1])
1878 gen_replace_estimator(&cl->bstats, &cl->rate_est,
1879 cl->stats_lock, tca[TCA_RATE-1]);
1880#endif
1881 return 0;
1882 }
1883
1884 if (parentid == TC_H_ROOT)
1885 return -EINVAL;
1886
1887 if (tb[TCA_CBQ_WRROPT-1] == NULL || tb[TCA_CBQ_RATE-1] == NULL ||
1888 tb[TCA_CBQ_LSSOPT-1] == NULL)
1889 return -EINVAL;
1890
1891 rtab = qdisc_get_rtab(RTA_DATA(tb[TCA_CBQ_RATE-1]), tb[TCA_CBQ_RTAB-1]);
1892 if (rtab == NULL)
1893 return -EINVAL;
1894
1895 if (classid) {
1896 err = -EINVAL;
1897 if (TC_H_MAJ(classid^sch->handle) || cbq_class_lookup(q, classid))
1898 goto failure;
1899 } else {
1900 int i;
1901 classid = TC_H_MAKE(sch->handle,0x8000);
1902
1903 for (i=0; i<0x8000; i++) {
1904 if (++q->hgenerator >= 0x8000)
1905 q->hgenerator = 1;
1906 if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
1907 break;
1908 }
1909 err = -ENOSR;
1910 if (i >= 0x8000)
1911 goto failure;
1912 classid = classid|q->hgenerator;
1913 }
1914
1915 parent = &q->link;
1916 if (parentid) {
1917 parent = cbq_class_lookup(q, parentid);
1918 err = -EINVAL;
1919 if (parent == NULL)
1920 goto failure;
1921 }
1922
1923 err = -ENOBUFS;
Panagiotis Issaris0da974f2006-07-21 14:51:30 -07001924 cl = kzalloc(sizeof(*cl), GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001925 if (cl == NULL)
1926 goto failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001927 cl->R_tab = rtab;
1928 rtab = NULL;
1929 cl->refcnt = 1;
Patrick McHardy9f9afec2006-11-29 17:35:18 -08001930 if (!(cl->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, classid)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001931 cl->q = &noop_qdisc;
1932 cl->classid = classid;
1933 cl->tparent = parent;
1934 cl->qdisc = sch;
1935 cl->allot = parent->allot;
1936 cl->quantum = cl->allot;
1937 cl->weight = cl->R_tab->rate.rate;
1938 cl->stats_lock = &sch->dev->queue_lock;
1939
1940 sch_tree_lock(sch);
1941 cbq_link_class(cl);
1942 cl->borrow = cl->tparent;
1943 if (cl->tparent != &q->link)
1944 cl->share = cl->tparent;
1945 cbq_adjust_levels(parent);
1946 cl->minidle = -0x7FFFFFFF;
1947 cbq_set_lss(cl, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1948 cbq_set_wrr(cl, RTA_DATA(tb[TCA_CBQ_WRROPT-1]));
1949 if (cl->ewma_log==0)
1950 cl->ewma_log = q->link.ewma_log;
1951 if (cl->maxidle==0)
1952 cl->maxidle = q->link.maxidle;
1953 if (cl->avpkt==0)
1954 cl->avpkt = q->link.avpkt;
1955 cl->overlimit = cbq_ovl_classic;
1956 if (tb[TCA_CBQ_OVL_STRATEGY-1])
1957 cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1]));
1958#ifdef CONFIG_NET_CLS_POLICE
1959 if (tb[TCA_CBQ_POLICE-1])
1960 cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1]));
1961#endif
1962 if (tb[TCA_CBQ_FOPT-1])
1963 cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1]));
1964 sch_tree_unlock(sch);
1965
1966#ifdef CONFIG_NET_ESTIMATOR
1967 if (tca[TCA_RATE-1])
1968 gen_new_estimator(&cl->bstats, &cl->rate_est,
1969 cl->stats_lock, tca[TCA_RATE-1]);
1970#endif
1971
1972 *arg = (unsigned long)cl;
1973 return 0;
1974
1975failure:
1976 qdisc_put_rtab(rtab);
1977 return err;
1978}
1979
1980static int cbq_delete(struct Qdisc *sch, unsigned long arg)
1981{
1982 struct cbq_sched_data *q = qdisc_priv(sch);
1983 struct cbq_class *cl = (struct cbq_class*)arg;
Jarek Poplawskia37ef2e2006-12-08 00:25:55 -08001984 unsigned int qlen;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001985
1986 if (cl->filters || cl->children || cl == &q->link)
1987 return -EBUSY;
1988
1989 sch_tree_lock(sch);
1990
Jarek Poplawskia37ef2e2006-12-08 00:25:55 -08001991 qlen = cl->q->q.qlen;
1992 qdisc_reset(cl->q);
1993 qdisc_tree_decrease_qlen(cl->q, qlen);
1994
Linus Torvalds1da177e2005-04-16 15:20:36 -07001995 if (cl->next_alive)
1996 cbq_deactivate_class(cl);
1997
1998 if (q->tx_borrowed == cl)
1999 q->tx_borrowed = q->tx_class;
2000 if (q->tx_class == cl) {
2001 q->tx_class = NULL;
2002 q->tx_borrowed = NULL;
2003 }
2004#ifdef CONFIG_NET_CLS_POLICE
2005 if (q->rx_class == cl)
2006 q->rx_class = NULL;
2007#endif
2008
2009 cbq_unlink_class(cl);
2010 cbq_adjust_levels(cl->tparent);
2011 cl->defmap = 0;
2012 cbq_sync_defmap(cl);
2013
2014 cbq_rmprio(q, cl);
2015 sch_tree_unlock(sch);
2016
2017 if (--cl->refcnt == 0)
2018 cbq_destroy_class(sch, cl);
2019
2020 return 0;
2021}
2022
2023static struct tcf_proto **cbq_find_tcf(struct Qdisc *sch, unsigned long arg)
2024{
2025 struct cbq_sched_data *q = qdisc_priv(sch);
2026 struct cbq_class *cl = (struct cbq_class *)arg;
2027
2028 if (cl == NULL)
2029 cl = &q->link;
2030
2031 return &cl->filter_list;
2032}
2033
2034static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
2035 u32 classid)
2036{
2037 struct cbq_sched_data *q = qdisc_priv(sch);
2038 struct cbq_class *p = (struct cbq_class*)parent;
2039 struct cbq_class *cl = cbq_class_lookup(q, classid);
2040
2041 if (cl) {
2042 if (p && p->level <= cl->level)
2043 return 0;
2044 cl->filters++;
2045 return (unsigned long)cl;
2046 }
2047 return 0;
2048}
2049
2050static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
2051{
2052 struct cbq_class *cl = (struct cbq_class*)arg;
2053
2054 cl->filters--;
2055}
2056
2057static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2058{
2059 struct cbq_sched_data *q = qdisc_priv(sch);
2060 unsigned h;
2061
2062 if (arg->stop)
2063 return;
2064
2065 for (h = 0; h < 16; h++) {
2066 struct cbq_class *cl;
2067
2068 for (cl = q->classes[h]; cl; cl = cl->next) {
2069 if (arg->count < arg->skip) {
2070 arg->count++;
2071 continue;
2072 }
2073 if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2074 arg->stop = 1;
2075 return;
2076 }
2077 arg->count++;
2078 }
2079 }
2080}
2081
2082static struct Qdisc_class_ops cbq_class_ops = {
2083 .graft = cbq_graft,
2084 .leaf = cbq_leaf,
Jarek Poplawskia37ef2e2006-12-08 00:25:55 -08002085 .qlen_notify = cbq_qlen_notify,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002086 .get = cbq_get,
2087 .put = cbq_put,
2088 .change = cbq_change_class,
2089 .delete = cbq_delete,
2090 .walk = cbq_walk,
2091 .tcf_chain = cbq_find_tcf,
2092 .bind_tcf = cbq_bind_filter,
2093 .unbind_tcf = cbq_unbind_filter,
2094 .dump = cbq_dump_class,
2095 .dump_stats = cbq_dump_class_stats,
2096};
2097
2098static struct Qdisc_ops cbq_qdisc_ops = {
2099 .next = NULL,
2100 .cl_ops = &cbq_class_ops,
2101 .id = "cbq",
2102 .priv_size = sizeof(struct cbq_sched_data),
2103 .enqueue = cbq_enqueue,
2104 .dequeue = cbq_dequeue,
2105 .requeue = cbq_requeue,
2106 .drop = cbq_drop,
2107 .init = cbq_init,
2108 .reset = cbq_reset,
2109 .destroy = cbq_destroy,
2110 .change = NULL,
2111 .dump = cbq_dump,
2112 .dump_stats = cbq_dump_stats,
2113 .owner = THIS_MODULE,
2114};
2115
2116static int __init cbq_module_init(void)
2117{
2118 return register_qdisc(&cbq_qdisc_ops);
2119}
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09002120static void __exit cbq_module_exit(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002121{
2122 unregister_qdisc(&cbq_qdisc_ops);
2123}
2124module_init(cbq_module_init)
2125module_exit(cbq_module_exit)
2126MODULE_LICENSE("GPL");