blob: 77381f1c6541bd5ef4d8205a74fde6a0cba75532 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * net/sched/sch_cbq.c Class-Based Queueing discipline.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 *
11 */
12
Linus Torvalds1da177e2005-04-16 15:20:36 -070013#include <linux/module.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070014#include <linux/types.h>
15#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070016#include <linux/string.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070017#include <linux/errno.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070018#include <linux/skbuff.h>
Patrick McHardy0ba48052007-07-02 22:49:07 -070019#include <net/netlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070020#include <net/pkt_sched.h>
21
22
23/* Class-Based Queueing (CBQ) algorithm.
24 =======================================
25
26 Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +090027 Management Models for Packet Networks",
Linus Torvalds1da177e2005-04-16 15:20:36 -070028 IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
29
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +090030 [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
Linus Torvalds1da177e2005-04-16 15:20:36 -070031
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +090032 [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
Linus Torvalds1da177e2005-04-16 15:20:36 -070033 Parameters", 1996
34
35 [4] Sally Floyd and Michael Speer, "Experimental Results
36 for Class-Based Queueing", 1998, not published.
37
38 -----------------------------------------------------------------------
39
40 Algorithm skeleton was taken from NS simulator cbq.cc.
41 If someone wants to check this code against the LBL version,
42 he should take into account that ONLY the skeleton was borrowed,
43 the implementation is different. Particularly:
44
45 --- The WRR algorithm is different. Our version looks more
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +090046 reasonable (I hope) and works when quanta are allowed to be
47 less than MTU, which is always the case when real time classes
48 have small rates. Note, that the statement of [3] is
49 incomplete, delay may actually be estimated even if class
50 per-round allotment is less than MTU. Namely, if per-round
51 allotment is W*r_i, and r_1+...+r_k = r < 1
Linus Torvalds1da177e2005-04-16 15:20:36 -070052
53 delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
54
55 In the worst case we have IntServ estimate with D = W*r+k*MTU
56 and C = MTU*r. The proof (if correct at all) is trivial.
57
58
59 --- It seems that cbq-2.0 is not very accurate. At least, I cannot
60 interpret some places, which look like wrong translations
61 from NS. Anyone is advised to find these differences
62 and explain to me, why I am wrong 8).
63
64 --- Linux has no EOI event, so that we cannot estimate true class
65 idle time. Workaround is to consider the next dequeue event
66 as sign that previous packet is finished. This is wrong because of
67 internal device queueing, but on a permanently loaded link it is true.
68 Moreover, combined with clock integrator, this scheme looks
69 very close to an ideal solution. */
70
71struct cbq_sched_data;
72
73
74struct cbq_class
75{
76 struct cbq_class *next; /* hash table link */
77 struct cbq_class *next_alive; /* next class with backlog in this priority band */
78
79/* Parameters */
80 u32 classid;
81 unsigned char priority; /* class priority */
82 unsigned char priority2; /* priority to be used after overlimit */
83 unsigned char ewma_log; /* time constant for idle time calculation */
84 unsigned char ovl_strategy;
Patrick McHardy73ca4912007-07-15 00:02:31 -070085#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE)
Linus Torvalds1da177e2005-04-16 15:20:36 -070086 unsigned char police;
87#endif
88
89 u32 defmap;
90
91 /* Link-sharing scheduler parameters */
92 long maxidle; /* Class parameters: see below. */
93 long offtime;
94 long minidle;
95 u32 avpkt;
96 struct qdisc_rate_table *R_tab;
97
98 /* Overlimit strategy parameters */
99 void (*overlimit)(struct cbq_class *cl);
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700100 psched_tdiff_t penalty;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700101
102 /* General scheduler (WRR) parameters */
103 long allot;
104 long quantum; /* Allotment per WRR round */
105 long weight; /* Relative allotment: see below */
106
107 struct Qdisc *qdisc; /* Ptr to CBQ discipline */
108 struct cbq_class *split; /* Ptr to split node */
109 struct cbq_class *share; /* Ptr to LS parent in the class tree */
110 struct cbq_class *tparent; /* Ptr to tree parent in the class tree */
111 struct cbq_class *borrow; /* NULL if class is bandwidth limited;
112 parent otherwise */
113 struct cbq_class *sibling; /* Sibling chain */
114 struct cbq_class *children; /* Pointer to children chain */
115
116 struct Qdisc *q; /* Elementary queueing discipline */
117
118
119/* Variables */
120 unsigned char cpriority; /* Effective priority */
121 unsigned char delayed;
122 unsigned char level; /* level of the class in hierarchy:
123 0 for leaf classes, and maximal
124 level of children + 1 for nodes.
125 */
126
127 psched_time_t last; /* Last end of service */
128 psched_time_t undertime;
129 long avgidle;
130 long deficit; /* Saved deficit for WRR */
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700131 psched_time_t penalized;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700132 struct gnet_stats_basic bstats;
133 struct gnet_stats_queue qstats;
134 struct gnet_stats_rate_est rate_est;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700135 struct tc_cbq_xstats xstats;
136
137 struct tcf_proto *filter_list;
138
139 int refcnt;
140 int filters;
141
142 struct cbq_class *defaults[TC_PRIO_MAX+1];
143};
144
145struct cbq_sched_data
146{
147 struct cbq_class *classes[16]; /* Hash table of all classes */
148 int nclasses[TC_CBQ_MAXPRIO+1];
149 unsigned quanta[TC_CBQ_MAXPRIO+1];
150
151 struct cbq_class link;
152
153 unsigned activemask;
154 struct cbq_class *active[TC_CBQ_MAXPRIO+1]; /* List of all classes
155 with backlog */
156
Patrick McHardy73ca4912007-07-15 00:02:31 -0700157#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700158 struct cbq_class *rx_class;
159#endif
160 struct cbq_class *tx_class;
161 struct cbq_class *tx_borrowed;
162 int tx_len;
163 psched_time_t now; /* Cached timestamp */
164 psched_time_t now_rt; /* Cached real time */
165 unsigned pmask;
166
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700167 struct hrtimer delay_timer;
Patrick McHardy88a99352007-03-16 01:21:11 -0700168 struct qdisc_watchdog watchdog; /* Watchdog timer,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700169 started when CBQ has
170 backlog, but cannot
171 transmit just now */
Patrick McHardy88a99352007-03-16 01:21:11 -0700172 psched_tdiff_t wd_expires;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700173 int toplevel;
174 u32 hgenerator;
175};
176
177
178#define L2T(cl,len) ((cl)->R_tab->data[(len)>>(cl)->R_tab->rate.cell_log])
179
180
181static __inline__ unsigned cbq_hash(u32 h)
182{
183 h ^= h>>8;
184 h ^= h>>4;
185 return h&0xF;
186}
187
188static __inline__ struct cbq_class *
189cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
190{
191 struct cbq_class *cl;
192
193 for (cl = q->classes[cbq_hash(classid)]; cl; cl = cl->next)
194 if (cl->classid == classid)
195 return cl;
196 return NULL;
197}
198
Patrick McHardy73ca4912007-07-15 00:02:31 -0700199#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700200
201static struct cbq_class *
202cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
203{
204 struct cbq_class *cl, *new;
205
206 for (cl = this->tparent; cl; cl = cl->tparent)
207 if ((new = cl->defaults[TC_PRIO_BESTEFFORT]) != NULL && new != this)
208 return new;
209
210 return NULL;
211}
212
213#endif
214
215/* Classify packet. The procedure is pretty complicated, but
216 it allows us to combine link sharing and priority scheduling
217 transparently.
218
219 Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
220 so that it resolves to split nodes. Then packets are classified
221 by logical priority, or a more specific classifier may be attached
222 to the split node.
223 */
224
225static struct cbq_class *
226cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
227{
228 struct cbq_sched_data *q = qdisc_priv(sch);
229 struct cbq_class *head = &q->link;
230 struct cbq_class **defmap;
231 struct cbq_class *cl = NULL;
232 u32 prio = skb->priority;
233 struct tcf_result res;
234
235 /*
236 * Step 1. If skb->priority points to one of our classes, use it.
237 */
238 if (TC_H_MAJ(prio^sch->handle) == 0 &&
239 (cl = cbq_class_lookup(q, prio)) != NULL)
240 return cl;
241
Jamal Hadi Salim29f1df62006-01-08 22:35:55 -0800242 *qerr = NET_XMIT_BYPASS;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700243 for (;;) {
244 int result = 0;
245 defmap = head->defaults;
246
247 /*
248 * Step 2+n. Apply classifier.
249 */
Patrick McHardy73ca4912007-07-15 00:02:31 -0700250 if (!head->filter_list ||
251 (result = tc_classify_compat(skb, head->filter_list, &res)) < 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700252 goto fallback;
253
254 if ((cl = (void*)res.class) == NULL) {
255 if (TC_H_MAJ(res.classid))
256 cl = cbq_class_lookup(q, res.classid);
257 else if ((cl = defmap[res.classid&TC_PRIO_MAX]) == NULL)
258 cl = defmap[TC_PRIO_BESTEFFORT];
259
260 if (cl == NULL || cl->level >= head->level)
261 goto fallback;
262 }
263
264#ifdef CONFIG_NET_CLS_ACT
265 switch (result) {
266 case TC_ACT_QUEUED:
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900267 case TC_ACT_STOLEN:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700268 *qerr = NET_XMIT_SUCCESS;
269 case TC_ACT_SHOT:
270 return NULL;
Patrick McHardy73ca4912007-07-15 00:02:31 -0700271 case TC_ACT_RECLASSIFY:
272 return cbq_reclassify(skb, cl);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700273 }
274#elif defined(CONFIG_NET_CLS_POLICE)
275 switch (result) {
276 case TC_POLICE_RECLASSIFY:
277 return cbq_reclassify(skb, cl);
278 case TC_POLICE_SHOT:
279 return NULL;
280 default:
281 break;
282 }
283#endif
284 if (cl->level == 0)
285 return cl;
286
287 /*
288 * Step 3+n. If classifier selected a link sharing class,
289 * apply agency specific classifier.
290 * Repeat this procdure until we hit a leaf node.
291 */
292 head = cl;
293 }
294
295fallback:
296 cl = head;
297
298 /*
299 * Step 4. No success...
300 */
301 if (TC_H_MAJ(prio) == 0 &&
302 !(cl = head->defaults[prio&TC_PRIO_MAX]) &&
303 !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
304 return head;
305
306 return cl;
307}
308
309/*
310 A packet has just been enqueued on the empty class.
311 cbq_activate_class adds it to the tail of active class list
312 of its priority band.
313 */
314
315static __inline__ void cbq_activate_class(struct cbq_class *cl)
316{
317 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
318 int prio = cl->cpriority;
319 struct cbq_class *cl_tail;
320
321 cl_tail = q->active[prio];
322 q->active[prio] = cl;
323
324 if (cl_tail != NULL) {
325 cl->next_alive = cl_tail->next_alive;
326 cl_tail->next_alive = cl;
327 } else {
328 cl->next_alive = cl;
329 q->activemask |= (1<<prio);
330 }
331}
332
333/*
334 Unlink class from active chain.
335 Note that this same procedure is done directly in cbq_dequeue*
336 during round-robin procedure.
337 */
338
339static void cbq_deactivate_class(struct cbq_class *this)
340{
341 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
342 int prio = this->cpriority;
343 struct cbq_class *cl;
344 struct cbq_class *cl_prev = q->active[prio];
345
346 do {
347 cl = cl_prev->next_alive;
348 if (cl == this) {
349 cl_prev->next_alive = cl->next_alive;
350 cl->next_alive = NULL;
351
352 if (cl == q->active[prio]) {
353 q->active[prio] = cl_prev;
354 if (cl == q->active[prio]) {
355 q->active[prio] = NULL;
356 q->activemask &= ~(1<<prio);
357 return;
358 }
359 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700360 return;
361 }
362 } while ((cl_prev = cl) != q->active[prio]);
363}
364
365static void
366cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
367{
368 int toplevel = q->toplevel;
369
370 if (toplevel > cl->level && !(cl->q->flags&TCQ_F_THROTTLED)) {
371 psched_time_t now;
372 psched_tdiff_t incr;
373
Patrick McHardy3bebcda2007-03-23 11:29:25 -0700374 now = psched_get_time();
Patrick McHardy8edc0c32007-03-23 11:28:55 -0700375 incr = now - q->now_rt;
Patrick McHardy7c59e252007-03-23 11:27:45 -0700376 now = q->now + incr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700377
378 do {
Patrick McHardy104e0872007-03-23 11:28:07 -0700379 if (cl->undertime < now) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700380 q->toplevel = cl->level;
381 return;
382 }
383 } while ((cl=cl->borrow) != NULL && toplevel > cl->level);
384 }
385}
386
387static int
388cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
389{
390 struct cbq_sched_data *q = qdisc_priv(sch);
391 int len = skb->len;
392 int ret;
393 struct cbq_class *cl = cbq_classify(skb, sch, &ret);
394
Patrick McHardy73ca4912007-07-15 00:02:31 -0700395#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700396 q->rx_class = cl;
397#endif
398 if (cl == NULL) {
Jamal Hadi Salim29f1df62006-01-08 22:35:55 -0800399 if (ret == NET_XMIT_BYPASS)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700400 sch->qstats.drops++;
401 kfree_skb(skb);
402 return ret;
403 }
404
Patrick McHardy73ca4912007-07-15 00:02:31 -0700405#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700406 cl->q->__parent = sch;
407#endif
408 if ((ret = cl->q->enqueue(skb, cl->q)) == NET_XMIT_SUCCESS) {
409 sch->q.qlen++;
410 sch->bstats.packets++;
411 sch->bstats.bytes+=len;
412 cbq_mark_toplevel(q, cl);
413 if (!cl->next_alive)
414 cbq_activate_class(cl);
415 return ret;
416 }
417
418 sch->qstats.drops++;
419 cbq_mark_toplevel(q, cl);
420 cl->qstats.drops++;
421 return ret;
422}
423
424static int
425cbq_requeue(struct sk_buff *skb, struct Qdisc *sch)
426{
427 struct cbq_sched_data *q = qdisc_priv(sch);
428 struct cbq_class *cl;
429 int ret;
430
431 if ((cl = q->tx_class) == NULL) {
432 kfree_skb(skb);
433 sch->qstats.drops++;
434 return NET_XMIT_CN;
435 }
436 q->tx_class = NULL;
437
438 cbq_mark_toplevel(q, cl);
439
Patrick McHardy73ca4912007-07-15 00:02:31 -0700440#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700441 q->rx_class = cl;
442 cl->q->__parent = sch;
443#endif
444 if ((ret = cl->q->ops->requeue(skb, cl->q)) == 0) {
445 sch->q.qlen++;
446 sch->qstats.requeues++;
447 if (!cl->next_alive)
448 cbq_activate_class(cl);
449 return 0;
450 }
451 sch->qstats.drops++;
452 cl->qstats.drops++;
453 return ret;
454}
455
456/* Overlimit actions */
457
458/* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
459
460static void cbq_ovl_classic(struct cbq_class *cl)
461{
462 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
Patrick McHardy8edc0c32007-03-23 11:28:55 -0700463 psched_tdiff_t delay = cl->undertime - q->now;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700464
465 if (!cl->delayed) {
466 delay += cl->offtime;
467
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900468 /*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700469 Class goes to sleep, so that it will have no
470 chance to work avgidle. Let's forgive it 8)
471
472 BTW cbq-2.0 has a crap in this
473 place, apparently they forgot to shift it by cl->ewma_log.
474 */
475 if (cl->avgidle < 0)
476 delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
477 if (cl->avgidle < cl->minidle)
478 cl->avgidle = cl->minidle;
479 if (delay <= 0)
480 delay = 1;
Patrick McHardy7c59e252007-03-23 11:27:45 -0700481 cl->undertime = q->now + delay;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700482
483 cl->xstats.overactions++;
484 cl->delayed = 1;
485 }
486 if (q->wd_expires == 0 || q->wd_expires > delay)
487 q->wd_expires = delay;
488
489 /* Dirty work! We must schedule wakeups based on
490 real available rate, rather than leaf rate,
491 which may be tiny (even zero).
492 */
493 if (q->toplevel == TC_CBQ_MAXLEVEL) {
494 struct cbq_class *b;
495 psched_tdiff_t base_delay = q->wd_expires;
496
497 for (b = cl->borrow; b; b = b->borrow) {
Patrick McHardy8edc0c32007-03-23 11:28:55 -0700498 delay = b->undertime - q->now;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700499 if (delay < base_delay) {
500 if (delay <= 0)
501 delay = 1;
502 base_delay = delay;
503 }
504 }
505
506 q->wd_expires = base_delay;
507 }
508}
509
510/* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
511 they go overlimit
512 */
513
514static void cbq_ovl_rclassic(struct cbq_class *cl)
515{
516 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
517 struct cbq_class *this = cl;
518
519 do {
520 if (cl->level > q->toplevel) {
521 cl = NULL;
522 break;
523 }
524 } while ((cl = cl->borrow) != NULL);
525
526 if (cl == NULL)
527 cl = this;
528 cbq_ovl_classic(cl);
529}
530
531/* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
532
533static void cbq_ovl_delay(struct cbq_class *cl)
534{
535 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
Patrick McHardy8edc0c32007-03-23 11:28:55 -0700536 psched_tdiff_t delay = cl->undertime - q->now;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700537
538 if (!cl->delayed) {
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700539 psched_time_t sched = q->now;
540 ktime_t expires;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700541
542 delay += cl->offtime;
543 if (cl->avgidle < 0)
544 delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
545 if (cl->avgidle < cl->minidle)
546 cl->avgidle = cl->minidle;
Patrick McHardy7c59e252007-03-23 11:27:45 -0700547 cl->undertime = q->now + delay;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700548
549 if (delay > 0) {
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700550 sched += delay + cl->penalty;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700551 cl->penalized = sched;
552 cl->cpriority = TC_CBQ_MAXPRIO;
553 q->pmask |= (1<<TC_CBQ_MAXPRIO);
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700554
555 expires = ktime_set(0, 0);
556 expires = ktime_add_ns(expires, PSCHED_US2NS(sched));
557 if (hrtimer_try_to_cancel(&q->delay_timer) &&
558 ktime_to_ns(ktime_sub(q->delay_timer.expires,
559 expires)) > 0)
560 q->delay_timer.expires = expires;
561 hrtimer_restart(&q->delay_timer);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700562 cl->delayed = 1;
563 cl->xstats.overactions++;
564 return;
565 }
566 delay = 1;
567 }
568 if (q->wd_expires == 0 || q->wd_expires > delay)
569 q->wd_expires = delay;
570}
571
572/* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
573
574static void cbq_ovl_lowprio(struct cbq_class *cl)
575{
576 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
577
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700578 cl->penalized = q->now + cl->penalty;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700579
580 if (cl->cpriority != cl->priority2) {
581 cl->cpriority = cl->priority2;
582 q->pmask |= (1<<cl->cpriority);
583 cl->xstats.overactions++;
584 }
585 cbq_ovl_classic(cl);
586}
587
588/* TC_CBQ_OVL_DROP: penalize class by dropping */
589
590static void cbq_ovl_drop(struct cbq_class *cl)
591{
592 if (cl->q->ops->drop)
593 if (cl->q->ops->drop(cl->q))
594 cl->qdisc->q.qlen--;
595 cl->xstats.overactions++;
596 cbq_ovl_classic(cl);
597}
598
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700599static psched_tdiff_t cbq_undelay_prio(struct cbq_sched_data *q, int prio,
600 psched_time_t now)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700601{
602 struct cbq_class *cl;
603 struct cbq_class *cl_prev = q->active[prio];
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700604 psched_time_t sched = now;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700605
606 if (cl_prev == NULL)
Patrick McHardye9054a32007-03-16 01:21:40 -0700607 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700608
609 do {
610 cl = cl_prev->next_alive;
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700611 if (now - cl->penalized > 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700612 cl_prev->next_alive = cl->next_alive;
613 cl->next_alive = NULL;
614 cl->cpriority = cl->priority;
615 cl->delayed = 0;
616 cbq_activate_class(cl);
617
618 if (cl == q->active[prio]) {
619 q->active[prio] = cl_prev;
620 if (cl == q->active[prio]) {
621 q->active[prio] = NULL;
622 return 0;
623 }
624 }
625
626 cl = cl_prev->next_alive;
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700627 } else if (sched - cl->penalized > 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700628 sched = cl->penalized;
629 } while ((cl_prev = cl) != q->active[prio]);
630
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700631 return sched - now;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700632}
633
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700634static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700635{
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700636 struct cbq_sched_data *q = container_of(timer, struct cbq_sched_data,
637 delay_timer);
638 struct Qdisc *sch = q->watchdog.qdisc;
639 psched_time_t now;
640 psched_tdiff_t delay = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700641 unsigned pmask;
642
Patrick McHardy3bebcda2007-03-23 11:29:25 -0700643 now = psched_get_time();
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700644
Linus Torvalds1da177e2005-04-16 15:20:36 -0700645 pmask = q->pmask;
646 q->pmask = 0;
647
648 while (pmask) {
649 int prio = ffz(~pmask);
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700650 psched_tdiff_t tmp;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700651
652 pmask &= ~(1<<prio);
653
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700654 tmp = cbq_undelay_prio(q, prio, now);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700655 if (tmp > 0) {
656 q->pmask |= 1<<prio;
657 if (tmp < delay || delay == 0)
658 delay = tmp;
659 }
660 }
661
662 if (delay) {
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700663 ktime_t time;
664
665 time = ktime_set(0, 0);
666 time = ktime_add_ns(time, PSCHED_US2NS(now + delay));
667 hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700668 }
669
670 sch->flags &= ~TCQ_F_THROTTLED;
671 netif_schedule(sch->dev);
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700672 return HRTIMER_NORESTART;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700673}
674
675
Patrick McHardy73ca4912007-07-15 00:02:31 -0700676#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700677
678static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
679{
680 int len = skb->len;
681 struct Qdisc *sch = child->__parent;
682 struct cbq_sched_data *q = qdisc_priv(sch);
683 struct cbq_class *cl = q->rx_class;
684
685 q->rx_class = NULL;
686
687 if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
688
689 cbq_mark_toplevel(q, cl);
690
691 q->rx_class = cl;
692 cl->q->__parent = sch;
693
694 if (cl->q->enqueue(skb, cl->q) == 0) {
695 sch->q.qlen++;
696 sch->bstats.packets++;
697 sch->bstats.bytes+=len;
698 if (!cl->next_alive)
699 cbq_activate_class(cl);
700 return 0;
701 }
702 sch->qstats.drops++;
703 return 0;
704 }
705
706 sch->qstats.drops++;
707 return -1;
708}
709#endif
710
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900711/*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700712 It is mission critical procedure.
713
714 We "regenerate" toplevel cutoff, if transmitting class
715 has backlog and it is not regulated. It is not part of
716 original CBQ description, but looks more reasonable.
717 Probably, it is wrong. This question needs further investigation.
718*/
719
720static __inline__ void
721cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
722 struct cbq_class *borrowed)
723{
724 if (cl && q->toplevel >= borrowed->level) {
725 if (cl->q->q.qlen > 1) {
726 do {
Patrick McHardya0849802007-03-23 11:28:30 -0700727 if (borrowed->undertime == PSCHED_PASTPERFECT) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700728 q->toplevel = borrowed->level;
729 return;
730 }
731 } while ((borrowed=borrowed->borrow) != NULL);
732 }
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900733#if 0
Linus Torvalds1da177e2005-04-16 15:20:36 -0700734 /* It is not necessary now. Uncommenting it
735 will save CPU cycles, but decrease fairness.
736 */
737 q->toplevel = TC_CBQ_MAXLEVEL;
738#endif
739 }
740}
741
742static void
743cbq_update(struct cbq_sched_data *q)
744{
745 struct cbq_class *this = q->tx_class;
746 struct cbq_class *cl = this;
747 int len = q->tx_len;
748
749 q->tx_class = NULL;
750
751 for ( ; cl; cl = cl->share) {
752 long avgidle = cl->avgidle;
753 long idle;
754
755 cl->bstats.packets++;
756 cl->bstats.bytes += len;
757
758 /*
759 (now - last) is total time between packet right edges.
760 (last_pktlen/rate) is "virtual" busy time, so that
761
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900762 idle = (now - last) - last_pktlen/rate
Linus Torvalds1da177e2005-04-16 15:20:36 -0700763 */
764
Patrick McHardy8edc0c32007-03-23 11:28:55 -0700765 idle = q->now - cl->last;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700766 if ((unsigned long)idle > 128*1024*1024) {
767 avgidle = cl->maxidle;
768 } else {
769 idle -= L2T(cl, len);
770
771 /* true_avgidle := (1-W)*true_avgidle + W*idle,
772 where W=2^{-ewma_log}. But cl->avgidle is scaled:
773 cl->avgidle == true_avgidle/W,
774 hence:
775 */
776 avgidle += idle - (avgidle>>cl->ewma_log);
777 }
778
779 if (avgidle <= 0) {
780 /* Overlimit or at-limit */
781
782 if (avgidle < cl->minidle)
783 avgidle = cl->minidle;
784
785 cl->avgidle = avgidle;
786
787 /* Calculate expected time, when this class
788 will be allowed to send.
789 It will occur, when:
790 (1-W)*true_avgidle + W*delay = 0, i.e.
791 idle = (1/W - 1)*(-true_avgidle)
792 or
793 idle = (1 - W)*(-cl->avgidle);
794 */
795 idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
796
797 /*
798 That is not all.
799 To maintain the rate allocated to the class,
800 we add to undertime virtual clock,
801 necessary to complete transmitted packet.
802 (len/phys_bandwidth has been already passed
803 to the moment of cbq_update)
804 */
805
806 idle -= L2T(&q->link, len);
807 idle += L2T(cl, len);
808
Patrick McHardy7c59e252007-03-23 11:27:45 -0700809 cl->undertime = q->now + idle;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700810 } else {
811 /* Underlimit */
812
Patrick McHardya0849802007-03-23 11:28:30 -0700813 cl->undertime = PSCHED_PASTPERFECT;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700814 if (avgidle > cl->maxidle)
815 cl->avgidle = cl->maxidle;
816 else
817 cl->avgidle = avgidle;
818 }
819 cl->last = q->now;
820 }
821
822 cbq_update_toplevel(q, this, q->tx_borrowed);
823}
824
825static __inline__ struct cbq_class *
826cbq_under_limit(struct cbq_class *cl)
827{
828 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
829 struct cbq_class *this_cl = cl;
830
831 if (cl->tparent == NULL)
832 return cl;
833
Patrick McHardya0849802007-03-23 11:28:30 -0700834 if (cl->undertime == PSCHED_PASTPERFECT || q->now >= cl->undertime) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700835 cl->delayed = 0;
836 return cl;
837 }
838
839 do {
840 /* It is very suspicious place. Now overlimit
841 action is generated for not bounded classes
842 only if link is completely congested.
843 Though it is in agree with ancestor-only paradigm,
844 it looks very stupid. Particularly,
845 it means that this chunk of code will either
846 never be called or result in strong amplification
847 of burstiness. Dangerous, silly, and, however,
848 no another solution exists.
849 */
850 if ((cl = cl->borrow) == NULL) {
851 this_cl->qstats.overlimits++;
852 this_cl->overlimit(this_cl);
853 return NULL;
854 }
855 if (cl->level > q->toplevel)
856 return NULL;
Patrick McHardya0849802007-03-23 11:28:30 -0700857 } while (cl->undertime != PSCHED_PASTPERFECT && q->now < cl->undertime);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700858
859 cl->delayed = 0;
860 return cl;
861}
862
863static __inline__ struct sk_buff *
864cbq_dequeue_prio(struct Qdisc *sch, int prio)
865{
866 struct cbq_sched_data *q = qdisc_priv(sch);
867 struct cbq_class *cl_tail, *cl_prev, *cl;
868 struct sk_buff *skb;
869 int deficit;
870
871 cl_tail = cl_prev = q->active[prio];
872 cl = cl_prev->next_alive;
873
874 do {
875 deficit = 0;
876
877 /* Start round */
878 do {
879 struct cbq_class *borrow = cl;
880
881 if (cl->q->q.qlen &&
882 (borrow = cbq_under_limit(cl)) == NULL)
883 goto skip_class;
884
885 if (cl->deficit <= 0) {
886 /* Class exhausted its allotment per
887 this round. Switch to the next one.
888 */
889 deficit = 1;
890 cl->deficit += cl->quantum;
891 goto next_class;
892 }
893
894 skb = cl->q->dequeue(cl->q);
895
896 /* Class did not give us any skb :-(
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900897 It could occur even if cl->q->q.qlen != 0
Linus Torvalds1da177e2005-04-16 15:20:36 -0700898 f.e. if cl->q == "tbf"
899 */
900 if (skb == NULL)
901 goto skip_class;
902
903 cl->deficit -= skb->len;
904 q->tx_class = cl;
905 q->tx_borrowed = borrow;
906 if (borrow != cl) {
907#ifndef CBQ_XSTATS_BORROWS_BYTES
908 borrow->xstats.borrows++;
909 cl->xstats.borrows++;
910#else
911 borrow->xstats.borrows += skb->len;
912 cl->xstats.borrows += skb->len;
913#endif
914 }
915 q->tx_len = skb->len;
916
917 if (cl->deficit <= 0) {
918 q->active[prio] = cl;
919 cl = cl->next_alive;
920 cl->deficit += cl->quantum;
921 }
922 return skb;
923
924skip_class:
925 if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
926 /* Class is empty or penalized.
927 Unlink it from active chain.
928 */
929 cl_prev->next_alive = cl->next_alive;
930 cl->next_alive = NULL;
931
932 /* Did cl_tail point to it? */
933 if (cl == cl_tail) {
934 /* Repair it! */
935 cl_tail = cl_prev;
936
937 /* Was it the last class in this band? */
938 if (cl == cl_tail) {
939 /* Kill the band! */
940 q->active[prio] = NULL;
941 q->activemask &= ~(1<<prio);
942 if (cl->q->q.qlen)
943 cbq_activate_class(cl);
944 return NULL;
945 }
946
947 q->active[prio] = cl_tail;
948 }
949 if (cl->q->q.qlen)
950 cbq_activate_class(cl);
951
952 cl = cl_prev;
953 }
954
955next_class:
956 cl_prev = cl;
957 cl = cl->next_alive;
958 } while (cl_prev != cl_tail);
959 } while (deficit);
960
961 q->active[prio] = cl_prev;
962
963 return NULL;
964}
965
966static __inline__ struct sk_buff *
967cbq_dequeue_1(struct Qdisc *sch)
968{
969 struct cbq_sched_data *q = qdisc_priv(sch);
970 struct sk_buff *skb;
971 unsigned activemask;
972
973 activemask = q->activemask&0xFF;
974 while (activemask) {
975 int prio = ffz(~activemask);
976 activemask &= ~(1<<prio);
977 skb = cbq_dequeue_prio(sch, prio);
978 if (skb)
979 return skb;
980 }
981 return NULL;
982}
983
984static struct sk_buff *
985cbq_dequeue(struct Qdisc *sch)
986{
987 struct sk_buff *skb;
988 struct cbq_sched_data *q = qdisc_priv(sch);
989 psched_time_t now;
990 psched_tdiff_t incr;
991
Patrick McHardy3bebcda2007-03-23 11:29:25 -0700992 now = psched_get_time();
Patrick McHardy8edc0c32007-03-23 11:28:55 -0700993 incr = now - q->now_rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700994
995 if (q->tx_class) {
996 psched_tdiff_t incr2;
997 /* Time integrator. We calculate EOS time
998 by adding expected packet transmission time.
999 If real time is greater, we warp artificial clock,
1000 so that:
1001
1002 cbq_time = max(real_time, work);
1003 */
1004 incr2 = L2T(&q->link, q->tx_len);
Patrick McHardy7c59e252007-03-23 11:27:45 -07001005 q->now += incr2;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001006 cbq_update(q);
1007 if ((incr -= incr2) < 0)
1008 incr = 0;
1009 }
Patrick McHardy7c59e252007-03-23 11:27:45 -07001010 q->now += incr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001011 q->now_rt = now;
1012
1013 for (;;) {
1014 q->wd_expires = 0;
1015
1016 skb = cbq_dequeue_1(sch);
1017 if (skb) {
1018 sch->q.qlen--;
1019 sch->flags &= ~TCQ_F_THROTTLED;
1020 return skb;
1021 }
1022
1023 /* All the classes are overlimit.
1024
1025 It is possible, if:
1026
1027 1. Scheduler is empty.
1028 2. Toplevel cutoff inhibited borrowing.
1029 3. Root class is overlimit.
1030
1031 Reset 2d and 3d conditions and retry.
1032
1033 Note, that NS and cbq-2.0 are buggy, peeking
1034 an arbitrary class is appropriate for ancestor-only
1035 sharing, but not for toplevel algorithm.
1036
1037 Our version is better, but slower, because it requires
1038 two passes, but it is unavoidable with top-level sharing.
1039 */
1040
1041 if (q->toplevel == TC_CBQ_MAXLEVEL &&
Patrick McHardya0849802007-03-23 11:28:30 -07001042 q->link.undertime == PSCHED_PASTPERFECT)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001043 break;
1044
1045 q->toplevel = TC_CBQ_MAXLEVEL;
Patrick McHardya0849802007-03-23 11:28:30 -07001046 q->link.undertime = PSCHED_PASTPERFECT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001047 }
1048
1049 /* No packets in scheduler or nobody wants to give them to us :-(
1050 Sigh... start watchdog timer in the last case. */
1051
1052 if (sch->q.qlen) {
1053 sch->qstats.overlimits++;
Patrick McHardy88a99352007-03-16 01:21:11 -07001054 if (q->wd_expires)
1055 qdisc_watchdog_schedule(&q->watchdog,
Patrick McHardybb239ac2007-03-16 12:31:28 -07001056 now + q->wd_expires);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001057 }
1058 return NULL;
1059}
1060
1061/* CBQ class maintanance routines */
1062
1063static void cbq_adjust_levels(struct cbq_class *this)
1064{
1065 if (this == NULL)
1066 return;
1067
1068 do {
1069 int level = 0;
1070 struct cbq_class *cl;
1071
1072 if ((cl = this->children) != NULL) {
1073 do {
1074 if (cl->level > level)
1075 level = cl->level;
1076 } while ((cl = cl->sibling) != this->children);
1077 }
1078 this->level = level+1;
1079 } while ((this = this->tparent) != NULL);
1080}
1081
1082static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
1083{
1084 struct cbq_class *cl;
1085 unsigned h;
1086
1087 if (q->quanta[prio] == 0)
1088 return;
1089
1090 for (h=0; h<16; h++) {
1091 for (cl = q->classes[h]; cl; cl = cl->next) {
1092 /* BUGGGG... Beware! This expression suffer of
1093 arithmetic overflows!
1094 */
1095 if (cl->priority == prio) {
1096 cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
1097 q->quanta[prio];
1098 }
1099 if (cl->quantum <= 0 || cl->quantum>32*cl->qdisc->dev->mtu) {
1100 printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->classid, cl->quantum);
1101 cl->quantum = cl->qdisc->dev->mtu/2 + 1;
1102 }
1103 }
1104 }
1105}
1106
1107static void cbq_sync_defmap(struct cbq_class *cl)
1108{
1109 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1110 struct cbq_class *split = cl->split;
1111 unsigned h;
1112 int i;
1113
1114 if (split == NULL)
1115 return;
1116
1117 for (i=0; i<=TC_PRIO_MAX; i++) {
1118 if (split->defaults[i] == cl && !(cl->defmap&(1<<i)))
1119 split->defaults[i] = NULL;
1120 }
1121
1122 for (i=0; i<=TC_PRIO_MAX; i++) {
1123 int level = split->level;
1124
1125 if (split->defaults[i])
1126 continue;
1127
1128 for (h=0; h<16; h++) {
1129 struct cbq_class *c;
1130
1131 for (c = q->classes[h]; c; c = c->next) {
1132 if (c->split == split && c->level < level &&
1133 c->defmap&(1<<i)) {
1134 split->defaults[i] = c;
1135 level = c->level;
1136 }
1137 }
1138 }
1139 }
1140}
1141
1142static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask)
1143{
1144 struct cbq_class *split = NULL;
1145
1146 if (splitid == 0) {
1147 if ((split = cl->split) == NULL)
1148 return;
1149 splitid = split->classid;
1150 }
1151
1152 if (split == NULL || split->classid != splitid) {
1153 for (split = cl->tparent; split; split = split->tparent)
1154 if (split->classid == splitid)
1155 break;
1156 }
1157
1158 if (split == NULL)
1159 return;
1160
1161 if (cl->split != split) {
1162 cl->defmap = 0;
1163 cbq_sync_defmap(cl);
1164 cl->split = split;
1165 cl->defmap = def&mask;
1166 } else
1167 cl->defmap = (cl->defmap&~mask)|(def&mask);
1168
1169 cbq_sync_defmap(cl);
1170}
1171
1172static void cbq_unlink_class(struct cbq_class *this)
1173{
1174 struct cbq_class *cl, **clp;
1175 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1176
1177 for (clp = &q->classes[cbq_hash(this->classid)]; (cl = *clp) != NULL; clp = &cl->next) {
1178 if (cl == this) {
1179 *clp = cl->next;
1180 cl->next = NULL;
1181 break;
1182 }
1183 }
1184
1185 if (this->tparent) {
1186 clp=&this->sibling;
1187 cl = *clp;
1188 do {
1189 if (cl == this) {
1190 *clp = cl->sibling;
1191 break;
1192 }
1193 clp = &cl->sibling;
1194 } while ((cl = *clp) != this->sibling);
1195
1196 if (this->tparent->children == this) {
1197 this->tparent->children = this->sibling;
1198 if (this->sibling == this)
1199 this->tparent->children = NULL;
1200 }
1201 } else {
1202 BUG_TRAP(this->sibling == this);
1203 }
1204}
1205
1206static void cbq_link_class(struct cbq_class *this)
1207{
1208 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1209 unsigned h = cbq_hash(this->classid);
1210 struct cbq_class *parent = this->tparent;
1211
1212 this->sibling = this;
1213 this->next = q->classes[h];
1214 q->classes[h] = this;
1215
1216 if (parent == NULL)
1217 return;
1218
1219 if (parent->children == NULL) {
1220 parent->children = this;
1221 } else {
1222 this->sibling = parent->children->sibling;
1223 parent->children->sibling = this;
1224 }
1225}
1226
1227static unsigned int cbq_drop(struct Qdisc* sch)
1228{
1229 struct cbq_sched_data *q = qdisc_priv(sch);
1230 struct cbq_class *cl, *cl_head;
1231 int prio;
1232 unsigned int len;
1233
1234 for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
1235 if ((cl_head = q->active[prio]) == NULL)
1236 continue;
1237
1238 cl = cl_head;
1239 do {
1240 if (cl->q->ops->drop && (len = cl->q->ops->drop(cl->q))) {
1241 sch->q.qlen--;
Jarek Poplawskia37ef2e2006-12-08 00:25:55 -08001242 if (!cl->q->q.qlen)
1243 cbq_deactivate_class(cl);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001244 return len;
1245 }
1246 } while ((cl = cl->next_alive) != cl_head);
1247 }
1248 return 0;
1249}
1250
1251static void
1252cbq_reset(struct Qdisc* sch)
1253{
1254 struct cbq_sched_data *q = qdisc_priv(sch);
1255 struct cbq_class *cl;
1256 int prio;
1257 unsigned h;
1258
1259 q->activemask = 0;
1260 q->pmask = 0;
1261 q->tx_class = NULL;
1262 q->tx_borrowed = NULL;
Patrick McHardy88a99352007-03-16 01:21:11 -07001263 qdisc_watchdog_cancel(&q->watchdog);
Patrick McHardy1a13cb62007-03-16 01:22:20 -07001264 hrtimer_cancel(&q->delay_timer);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001265 q->toplevel = TC_CBQ_MAXLEVEL;
Patrick McHardy3bebcda2007-03-23 11:29:25 -07001266 q->now = psched_get_time();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001267 q->now_rt = q->now;
1268
1269 for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
1270 q->active[prio] = NULL;
1271
1272 for (h = 0; h < 16; h++) {
1273 for (cl = q->classes[h]; cl; cl = cl->next) {
1274 qdisc_reset(cl->q);
1275
1276 cl->next_alive = NULL;
Patrick McHardya0849802007-03-23 11:28:30 -07001277 cl->undertime = PSCHED_PASTPERFECT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001278 cl->avgidle = cl->maxidle;
1279 cl->deficit = cl->quantum;
1280 cl->cpriority = cl->priority;
1281 }
1282 }
1283 sch->q.qlen = 0;
1284}
1285
1286
1287static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
1288{
1289 if (lss->change&TCF_CBQ_LSS_FLAGS) {
1290 cl->share = (lss->flags&TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
1291 cl->borrow = (lss->flags&TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
1292 }
1293 if (lss->change&TCF_CBQ_LSS_EWMA)
1294 cl->ewma_log = lss->ewma_log;
1295 if (lss->change&TCF_CBQ_LSS_AVPKT)
1296 cl->avpkt = lss->avpkt;
1297 if (lss->change&TCF_CBQ_LSS_MINIDLE)
1298 cl->minidle = -(long)lss->minidle;
1299 if (lss->change&TCF_CBQ_LSS_MAXIDLE) {
1300 cl->maxidle = lss->maxidle;
1301 cl->avgidle = lss->maxidle;
1302 }
1303 if (lss->change&TCF_CBQ_LSS_OFFTIME)
1304 cl->offtime = lss->offtime;
1305 return 0;
1306}
1307
1308static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl)
1309{
1310 q->nclasses[cl->priority]--;
1311 q->quanta[cl->priority] -= cl->weight;
1312 cbq_normalize_quanta(q, cl->priority);
1313}
1314
1315static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl)
1316{
1317 q->nclasses[cl->priority]++;
1318 q->quanta[cl->priority] += cl->weight;
1319 cbq_normalize_quanta(q, cl->priority);
1320}
1321
1322static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
1323{
1324 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1325
1326 if (wrr->allot)
1327 cl->allot = wrr->allot;
1328 if (wrr->weight)
1329 cl->weight = wrr->weight;
1330 if (wrr->priority) {
1331 cl->priority = wrr->priority-1;
1332 cl->cpriority = cl->priority;
1333 if (cl->priority >= cl->priority2)
1334 cl->priority2 = TC_CBQ_MAXPRIO-1;
1335 }
1336
1337 cbq_addprio(q, cl);
1338 return 0;
1339}
1340
1341static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
1342{
1343 switch (ovl->strategy) {
1344 case TC_CBQ_OVL_CLASSIC:
1345 cl->overlimit = cbq_ovl_classic;
1346 break;
1347 case TC_CBQ_OVL_DELAY:
1348 cl->overlimit = cbq_ovl_delay;
1349 break;
1350 case TC_CBQ_OVL_LOWPRIO:
1351 if (ovl->priority2-1 >= TC_CBQ_MAXPRIO ||
1352 ovl->priority2-1 <= cl->priority)
1353 return -EINVAL;
1354 cl->priority2 = ovl->priority2-1;
1355 cl->overlimit = cbq_ovl_lowprio;
1356 break;
1357 case TC_CBQ_OVL_DROP:
1358 cl->overlimit = cbq_ovl_drop;
1359 break;
1360 case TC_CBQ_OVL_RCLASSIC:
1361 cl->overlimit = cbq_ovl_rclassic;
1362 break;
1363 default:
1364 return -EINVAL;
1365 }
Patrick McHardy1a13cb62007-03-16 01:22:20 -07001366 cl->penalty = ovl->penalty;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001367 return 0;
1368}
1369
Patrick McHardy73ca4912007-07-15 00:02:31 -07001370#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001371static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p)
1372{
1373 cl->police = p->police;
1374
1375 if (cl->q->handle) {
1376 if (p->police == TC_POLICE_RECLASSIFY)
1377 cl->q->reshape_fail = cbq_reshape_fail;
1378 else
1379 cl->q->reshape_fail = NULL;
1380 }
1381 return 0;
1382}
1383#endif
1384
1385static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
1386{
1387 cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
1388 return 0;
1389}
1390
1391static int cbq_init(struct Qdisc *sch, struct rtattr *opt)
1392{
1393 struct cbq_sched_data *q = qdisc_priv(sch);
1394 struct rtattr *tb[TCA_CBQ_MAX];
1395 struct tc_ratespec *r;
1396
1397 if (rtattr_parse_nested(tb, TCA_CBQ_MAX, opt) < 0 ||
1398 tb[TCA_CBQ_RTAB-1] == NULL || tb[TCA_CBQ_RATE-1] == NULL ||
1399 RTA_PAYLOAD(tb[TCA_CBQ_RATE-1]) < sizeof(struct tc_ratespec))
1400 return -EINVAL;
1401
1402 if (tb[TCA_CBQ_LSSOPT-1] &&
1403 RTA_PAYLOAD(tb[TCA_CBQ_LSSOPT-1]) < sizeof(struct tc_cbq_lssopt))
1404 return -EINVAL;
1405
1406 r = RTA_DATA(tb[TCA_CBQ_RATE-1]);
1407
1408 if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB-1])) == NULL)
1409 return -EINVAL;
1410
1411 q->link.refcnt = 1;
1412 q->link.sibling = &q->link;
1413 q->link.classid = sch->handle;
1414 q->link.qdisc = sch;
Patrick McHardy9f9afec2006-11-29 17:35:18 -08001415 if (!(q->link.q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
1416 sch->handle)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001417 q->link.q = &noop_qdisc;
1418
1419 q->link.priority = TC_CBQ_MAXPRIO-1;
1420 q->link.priority2 = TC_CBQ_MAXPRIO-1;
1421 q->link.cpriority = TC_CBQ_MAXPRIO-1;
1422 q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
1423 q->link.overlimit = cbq_ovl_classic;
1424 q->link.allot = psched_mtu(sch->dev);
1425 q->link.quantum = q->link.allot;
1426 q->link.weight = q->link.R_tab->rate.rate;
1427
1428 q->link.ewma_log = TC_CBQ_DEF_EWMA;
1429 q->link.avpkt = q->link.allot/2;
1430 q->link.minidle = -0x7FFFFFFF;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001431
Patrick McHardy88a99352007-03-16 01:21:11 -07001432 qdisc_watchdog_init(&q->watchdog, sch);
Patrick McHardy1a13cb62007-03-16 01:22:20 -07001433 hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001434 q->delay_timer.function = cbq_undelay;
1435 q->toplevel = TC_CBQ_MAXLEVEL;
Patrick McHardy3bebcda2007-03-23 11:29:25 -07001436 q->now = psched_get_time();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001437 q->now_rt = q->now;
1438
1439 cbq_link_class(&q->link);
1440
1441 if (tb[TCA_CBQ_LSSOPT-1])
1442 cbq_set_lss(&q->link, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1443
1444 cbq_addprio(q, &q->link);
1445 return 0;
1446}
1447
1448static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
1449{
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001450 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001451
1452 RTA_PUT(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate);
1453 return skb->len;
1454
1455rtattr_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001456 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001457 return -1;
1458}
1459
1460static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
1461{
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001462 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001463 struct tc_cbq_lssopt opt;
1464
1465 opt.flags = 0;
1466 if (cl->borrow == NULL)
1467 opt.flags |= TCF_CBQ_LSS_BOUNDED;
1468 if (cl->share == NULL)
1469 opt.flags |= TCF_CBQ_LSS_ISOLATED;
1470 opt.ewma_log = cl->ewma_log;
1471 opt.level = cl->level;
1472 opt.avpkt = cl->avpkt;
1473 opt.maxidle = cl->maxidle;
1474 opt.minidle = (u32)(-cl->minidle);
1475 opt.offtime = cl->offtime;
1476 opt.change = ~0;
1477 RTA_PUT(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt);
1478 return skb->len;
1479
1480rtattr_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001481 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001482 return -1;
1483}
1484
1485static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
1486{
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001487 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001488 struct tc_cbq_wrropt opt;
1489
1490 opt.flags = 0;
1491 opt.allot = cl->allot;
1492 opt.priority = cl->priority+1;
1493 opt.cpriority = cl->cpriority+1;
1494 opt.weight = cl->weight;
1495 RTA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt);
1496 return skb->len;
1497
1498rtattr_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001499 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001500 return -1;
1501}
1502
1503static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
1504{
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001505 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001506 struct tc_cbq_ovl opt;
1507
1508 opt.strategy = cl->ovl_strategy;
1509 opt.priority2 = cl->priority2+1;
Patrick McHardy8a470772005-06-28 12:56:45 -07001510 opt.pad = 0;
Patrick McHardy1a13cb62007-03-16 01:22:20 -07001511 opt.penalty = cl->penalty;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001512 RTA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
1513 return skb->len;
1514
1515rtattr_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001516 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001517 return -1;
1518}
1519
1520static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
1521{
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001522 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001523 struct tc_cbq_fopt opt;
1524
1525 if (cl->split || cl->defmap) {
1526 opt.split = cl->split ? cl->split->classid : 0;
1527 opt.defmap = cl->defmap;
1528 opt.defchange = ~0;
1529 RTA_PUT(skb, TCA_CBQ_FOPT, sizeof(opt), &opt);
1530 }
1531 return skb->len;
1532
1533rtattr_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001534 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001535 return -1;
1536}
1537
Patrick McHardy73ca4912007-07-15 00:02:31 -07001538#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001539static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
1540{
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001541 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001542 struct tc_cbq_police opt;
1543
1544 if (cl->police) {
1545 opt.police = cl->police;
Patrick McHardy9ef1d4c2005-06-28 12:55:30 -07001546 opt.__res1 = 0;
1547 opt.__res2 = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001548 RTA_PUT(skb, TCA_CBQ_POLICE, sizeof(opt), &opt);
1549 }
1550 return skb->len;
1551
1552rtattr_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001553 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001554 return -1;
1555}
1556#endif
1557
1558static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
1559{
1560 if (cbq_dump_lss(skb, cl) < 0 ||
1561 cbq_dump_rate(skb, cl) < 0 ||
1562 cbq_dump_wrr(skb, cl) < 0 ||
1563 cbq_dump_ovl(skb, cl) < 0 ||
Patrick McHardy73ca4912007-07-15 00:02:31 -07001564#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001565 cbq_dump_police(skb, cl) < 0 ||
1566#endif
1567 cbq_dump_fopt(skb, cl) < 0)
1568 return -1;
1569 return 0;
1570}
1571
1572static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
1573{
1574 struct cbq_sched_data *q = qdisc_priv(sch);
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001575 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001576 struct rtattr *rta;
1577
1578 rta = (struct rtattr*)b;
1579 RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1580 if (cbq_dump_attr(skb, &q->link) < 0)
1581 goto rtattr_failure;
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001582 rta->rta_len = skb_tail_pointer(skb) - b;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001583 return skb->len;
1584
1585rtattr_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001586 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001587 return -1;
1588}
1589
1590static int
1591cbq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
1592{
1593 struct cbq_sched_data *q = qdisc_priv(sch);
1594
1595 q->link.xstats.avgidle = q->link.avgidle;
1596 return gnet_stats_copy_app(d, &q->link.xstats, sizeof(q->link.xstats));
1597}
1598
1599static int
1600cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1601 struct sk_buff *skb, struct tcmsg *tcm)
1602{
1603 struct cbq_class *cl = (struct cbq_class*)arg;
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001604 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001605 struct rtattr *rta;
1606
1607 if (cl->tparent)
1608 tcm->tcm_parent = cl->tparent->classid;
1609 else
1610 tcm->tcm_parent = TC_H_ROOT;
1611 tcm->tcm_handle = cl->classid;
1612 tcm->tcm_info = cl->q->handle;
1613
1614 rta = (struct rtattr*)b;
1615 RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1616 if (cbq_dump_attr(skb, cl) < 0)
1617 goto rtattr_failure;
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001618 rta->rta_len = skb_tail_pointer(skb) - b;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001619 return skb->len;
1620
1621rtattr_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001622 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001623 return -1;
1624}
1625
1626static int
1627cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
1628 struct gnet_dump *d)
1629{
1630 struct cbq_sched_data *q = qdisc_priv(sch);
1631 struct cbq_class *cl = (struct cbq_class*)arg;
1632
1633 cl->qstats.qlen = cl->q->q.qlen;
1634 cl->xstats.avgidle = cl->avgidle;
1635 cl->xstats.undertime = 0;
1636
Patrick McHardya0849802007-03-23 11:28:30 -07001637 if (cl->undertime != PSCHED_PASTPERFECT)
Patrick McHardy8edc0c32007-03-23 11:28:55 -07001638 cl->xstats.undertime = cl->undertime - q->now;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001639
1640 if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07001641 gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07001642 gnet_stats_copy_queue(d, &cl->qstats) < 0)
1643 return -1;
1644
1645 return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1646}
1647
1648static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1649 struct Qdisc **old)
1650{
1651 struct cbq_class *cl = (struct cbq_class*)arg;
1652
1653 if (cl) {
1654 if (new == NULL) {
Patrick McHardy9f9afec2006-11-29 17:35:18 -08001655 if ((new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
1656 cl->classid)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001657 return -ENOBUFS;
1658 } else {
Patrick McHardy73ca4912007-07-15 00:02:31 -07001659#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001660 if (cl->police == TC_POLICE_RECLASSIFY)
1661 new->reshape_fail = cbq_reshape_fail;
1662#endif
1663 }
1664 sch_tree_lock(sch);
Jarek Poplawskia37ef2e2006-12-08 00:25:55 -08001665 *old = xchg(&cl->q, new);
Patrick McHardy5e50da02006-11-29 17:36:20 -08001666 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001667 qdisc_reset(*old);
1668 sch_tree_unlock(sch);
1669
1670 return 0;
1671 }
1672 return -ENOENT;
1673}
1674
1675static struct Qdisc *
1676cbq_leaf(struct Qdisc *sch, unsigned long arg)
1677{
1678 struct cbq_class *cl = (struct cbq_class*)arg;
1679
1680 return cl ? cl->q : NULL;
1681}
1682
Jarek Poplawskia37ef2e2006-12-08 00:25:55 -08001683static void cbq_qlen_notify(struct Qdisc *sch, unsigned long arg)
1684{
1685 struct cbq_class *cl = (struct cbq_class *)arg;
1686
1687 if (cl->q->q.qlen == 0)
1688 cbq_deactivate_class(cl);
1689}
1690
Linus Torvalds1da177e2005-04-16 15:20:36 -07001691static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
1692{
1693 struct cbq_sched_data *q = qdisc_priv(sch);
1694 struct cbq_class *cl = cbq_class_lookup(q, classid);
1695
1696 if (cl) {
1697 cl->refcnt++;
1698 return (unsigned long)cl;
1699 }
1700 return 0;
1701}
1702
Linus Torvalds1da177e2005-04-16 15:20:36 -07001703static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
1704{
1705 struct cbq_sched_data *q = qdisc_priv(sch);
1706
1707 BUG_TRAP(!cl->filters);
1708
Patrick McHardya48b5a62007-03-23 11:29:43 -07001709 tcf_destroy_chain(cl->filter_list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001710 qdisc_destroy(cl->q);
1711 qdisc_put_rtab(cl->R_tab);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001712 gen_kill_estimator(&cl->bstats, &cl->rate_est);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001713 if (cl != &q->link)
1714 kfree(cl);
1715}
1716
1717static void
1718cbq_destroy(struct Qdisc* sch)
1719{
1720 struct cbq_sched_data *q = qdisc_priv(sch);
1721 struct cbq_class *cl;
1722 unsigned h;
1723
Patrick McHardy73ca4912007-07-15 00:02:31 -07001724#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001725 q->rx_class = NULL;
1726#endif
1727 /*
1728 * Filters must be destroyed first because we don't destroy the
1729 * classes from root to leafs which means that filters can still
1730 * be bound to classes which have been destroyed already. --TGR '04
1731 */
Patrick McHardyb00b4bf2007-06-05 16:06:59 -07001732 for (h = 0; h < 16; h++) {
1733 for (cl = q->classes[h]; cl; cl = cl->next) {
Patrick McHardya48b5a62007-03-23 11:29:43 -07001734 tcf_destroy_chain(cl->filter_list);
Patrick McHardyb00b4bf2007-06-05 16:06:59 -07001735 cl->filter_list = NULL;
1736 }
1737 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001738 for (h = 0; h < 16; h++) {
1739 struct cbq_class *next;
1740
1741 for (cl = q->classes[h]; cl; cl = next) {
1742 next = cl->next;
1743 cbq_destroy_class(sch, cl);
1744 }
1745 }
1746}
1747
1748static void cbq_put(struct Qdisc *sch, unsigned long arg)
1749{
1750 struct cbq_class *cl = (struct cbq_class*)arg;
1751
1752 if (--cl->refcnt == 0) {
Patrick McHardy73ca4912007-07-15 00:02:31 -07001753#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001754 struct cbq_sched_data *q = qdisc_priv(sch);
1755
1756 spin_lock_bh(&sch->dev->queue_lock);
1757 if (q->rx_class == cl)
1758 q->rx_class = NULL;
1759 spin_unlock_bh(&sch->dev->queue_lock);
1760#endif
1761
1762 cbq_destroy_class(sch, cl);
1763 }
1764}
1765
1766static int
1767cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **tca,
1768 unsigned long *arg)
1769{
1770 int err;
1771 struct cbq_sched_data *q = qdisc_priv(sch);
1772 struct cbq_class *cl = (struct cbq_class*)*arg;
1773 struct rtattr *opt = tca[TCA_OPTIONS-1];
1774 struct rtattr *tb[TCA_CBQ_MAX];
1775 struct cbq_class *parent;
1776 struct qdisc_rate_table *rtab = NULL;
1777
1778 if (opt==NULL || rtattr_parse_nested(tb, TCA_CBQ_MAX, opt))
1779 return -EINVAL;
1780
1781 if (tb[TCA_CBQ_OVL_STRATEGY-1] &&
1782 RTA_PAYLOAD(tb[TCA_CBQ_OVL_STRATEGY-1]) < sizeof(struct tc_cbq_ovl))
1783 return -EINVAL;
1784
1785 if (tb[TCA_CBQ_FOPT-1] &&
1786 RTA_PAYLOAD(tb[TCA_CBQ_FOPT-1]) < sizeof(struct tc_cbq_fopt))
1787 return -EINVAL;
1788
1789 if (tb[TCA_CBQ_RATE-1] &&
1790 RTA_PAYLOAD(tb[TCA_CBQ_RATE-1]) < sizeof(struct tc_ratespec))
1791 return -EINVAL;
1792
1793 if (tb[TCA_CBQ_LSSOPT-1] &&
1794 RTA_PAYLOAD(tb[TCA_CBQ_LSSOPT-1]) < sizeof(struct tc_cbq_lssopt))
1795 return -EINVAL;
1796
1797 if (tb[TCA_CBQ_WRROPT-1] &&
1798 RTA_PAYLOAD(tb[TCA_CBQ_WRROPT-1]) < sizeof(struct tc_cbq_wrropt))
1799 return -EINVAL;
1800
Patrick McHardy73ca4912007-07-15 00:02:31 -07001801#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001802 if (tb[TCA_CBQ_POLICE-1] &&
1803 RTA_PAYLOAD(tb[TCA_CBQ_POLICE-1]) < sizeof(struct tc_cbq_police))
1804 return -EINVAL;
1805#endif
1806
1807 if (cl) {
1808 /* Check parent */
1809 if (parentid) {
1810 if (cl->tparent && cl->tparent->classid != parentid)
1811 return -EINVAL;
1812 if (!cl->tparent && parentid != TC_H_ROOT)
1813 return -EINVAL;
1814 }
1815
1816 if (tb[TCA_CBQ_RATE-1]) {
1817 rtab = qdisc_get_rtab(RTA_DATA(tb[TCA_CBQ_RATE-1]), tb[TCA_CBQ_RTAB-1]);
1818 if (rtab == NULL)
1819 return -EINVAL;
1820 }
1821
1822 /* Change class parameters */
1823 sch_tree_lock(sch);
1824
1825 if (cl->next_alive != NULL)
1826 cbq_deactivate_class(cl);
1827
1828 if (rtab) {
1829 rtab = xchg(&cl->R_tab, rtab);
1830 qdisc_put_rtab(rtab);
1831 }
1832
1833 if (tb[TCA_CBQ_LSSOPT-1])
1834 cbq_set_lss(cl, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1835
1836 if (tb[TCA_CBQ_WRROPT-1]) {
1837 cbq_rmprio(q, cl);
1838 cbq_set_wrr(cl, RTA_DATA(tb[TCA_CBQ_WRROPT-1]));
1839 }
1840
1841 if (tb[TCA_CBQ_OVL_STRATEGY-1])
1842 cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1]));
1843
Patrick McHardy73ca4912007-07-15 00:02:31 -07001844#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001845 if (tb[TCA_CBQ_POLICE-1])
1846 cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1]));
1847#endif
1848
1849 if (tb[TCA_CBQ_FOPT-1])
1850 cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1]));
1851
1852 if (cl->q->q.qlen)
1853 cbq_activate_class(cl);
1854
1855 sch_tree_unlock(sch);
1856
Linus Torvalds1da177e2005-04-16 15:20:36 -07001857 if (tca[TCA_RATE-1])
1858 gen_replace_estimator(&cl->bstats, &cl->rate_est,
Patrick McHardy4bdf3992007-07-02 22:47:37 -07001859 &sch->dev->queue_lock,
1860 tca[TCA_RATE-1]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001861 return 0;
1862 }
1863
1864 if (parentid == TC_H_ROOT)
1865 return -EINVAL;
1866
1867 if (tb[TCA_CBQ_WRROPT-1] == NULL || tb[TCA_CBQ_RATE-1] == NULL ||
1868 tb[TCA_CBQ_LSSOPT-1] == NULL)
1869 return -EINVAL;
1870
1871 rtab = qdisc_get_rtab(RTA_DATA(tb[TCA_CBQ_RATE-1]), tb[TCA_CBQ_RTAB-1]);
1872 if (rtab == NULL)
1873 return -EINVAL;
1874
1875 if (classid) {
1876 err = -EINVAL;
1877 if (TC_H_MAJ(classid^sch->handle) || cbq_class_lookup(q, classid))
1878 goto failure;
1879 } else {
1880 int i;
1881 classid = TC_H_MAKE(sch->handle,0x8000);
1882
1883 for (i=0; i<0x8000; i++) {
1884 if (++q->hgenerator >= 0x8000)
1885 q->hgenerator = 1;
1886 if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
1887 break;
1888 }
1889 err = -ENOSR;
1890 if (i >= 0x8000)
1891 goto failure;
1892 classid = classid|q->hgenerator;
1893 }
1894
1895 parent = &q->link;
1896 if (parentid) {
1897 parent = cbq_class_lookup(q, parentid);
1898 err = -EINVAL;
1899 if (parent == NULL)
1900 goto failure;
1901 }
1902
1903 err = -ENOBUFS;
Panagiotis Issaris0da974f2006-07-21 14:51:30 -07001904 cl = kzalloc(sizeof(*cl), GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001905 if (cl == NULL)
1906 goto failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001907 cl->R_tab = rtab;
1908 rtab = NULL;
1909 cl->refcnt = 1;
Patrick McHardy9f9afec2006-11-29 17:35:18 -08001910 if (!(cl->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, classid)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001911 cl->q = &noop_qdisc;
1912 cl->classid = classid;
1913 cl->tparent = parent;
1914 cl->qdisc = sch;
1915 cl->allot = parent->allot;
1916 cl->quantum = cl->allot;
1917 cl->weight = cl->R_tab->rate.rate;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001918
1919 sch_tree_lock(sch);
1920 cbq_link_class(cl);
1921 cl->borrow = cl->tparent;
1922 if (cl->tparent != &q->link)
1923 cl->share = cl->tparent;
1924 cbq_adjust_levels(parent);
1925 cl->minidle = -0x7FFFFFFF;
1926 cbq_set_lss(cl, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1927 cbq_set_wrr(cl, RTA_DATA(tb[TCA_CBQ_WRROPT-1]));
1928 if (cl->ewma_log==0)
1929 cl->ewma_log = q->link.ewma_log;
1930 if (cl->maxidle==0)
1931 cl->maxidle = q->link.maxidle;
1932 if (cl->avpkt==0)
1933 cl->avpkt = q->link.avpkt;
1934 cl->overlimit = cbq_ovl_classic;
1935 if (tb[TCA_CBQ_OVL_STRATEGY-1])
1936 cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1]));
Patrick McHardy73ca4912007-07-15 00:02:31 -07001937#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001938 if (tb[TCA_CBQ_POLICE-1])
1939 cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1]));
1940#endif
1941 if (tb[TCA_CBQ_FOPT-1])
1942 cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1]));
1943 sch_tree_unlock(sch);
1944
Linus Torvalds1da177e2005-04-16 15:20:36 -07001945 if (tca[TCA_RATE-1])
1946 gen_new_estimator(&cl->bstats, &cl->rate_est,
Patrick McHardy4bdf3992007-07-02 22:47:37 -07001947 &sch->dev->queue_lock, tca[TCA_RATE-1]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001948
1949 *arg = (unsigned long)cl;
1950 return 0;
1951
1952failure:
1953 qdisc_put_rtab(rtab);
1954 return err;
1955}
1956
1957static int cbq_delete(struct Qdisc *sch, unsigned long arg)
1958{
1959 struct cbq_sched_data *q = qdisc_priv(sch);
1960 struct cbq_class *cl = (struct cbq_class*)arg;
Jarek Poplawskia37ef2e2006-12-08 00:25:55 -08001961 unsigned int qlen;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001962
1963 if (cl->filters || cl->children || cl == &q->link)
1964 return -EBUSY;
1965
1966 sch_tree_lock(sch);
1967
Jarek Poplawskia37ef2e2006-12-08 00:25:55 -08001968 qlen = cl->q->q.qlen;
1969 qdisc_reset(cl->q);
1970 qdisc_tree_decrease_qlen(cl->q, qlen);
1971
Linus Torvalds1da177e2005-04-16 15:20:36 -07001972 if (cl->next_alive)
1973 cbq_deactivate_class(cl);
1974
1975 if (q->tx_borrowed == cl)
1976 q->tx_borrowed = q->tx_class;
1977 if (q->tx_class == cl) {
1978 q->tx_class = NULL;
1979 q->tx_borrowed = NULL;
1980 }
Patrick McHardy73ca4912007-07-15 00:02:31 -07001981#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001982 if (q->rx_class == cl)
1983 q->rx_class = NULL;
1984#endif
1985
1986 cbq_unlink_class(cl);
1987 cbq_adjust_levels(cl->tparent);
1988 cl->defmap = 0;
1989 cbq_sync_defmap(cl);
1990
1991 cbq_rmprio(q, cl);
1992 sch_tree_unlock(sch);
1993
1994 if (--cl->refcnt == 0)
1995 cbq_destroy_class(sch, cl);
1996
1997 return 0;
1998}
1999
2000static struct tcf_proto **cbq_find_tcf(struct Qdisc *sch, unsigned long arg)
2001{
2002 struct cbq_sched_data *q = qdisc_priv(sch);
2003 struct cbq_class *cl = (struct cbq_class *)arg;
2004
2005 if (cl == NULL)
2006 cl = &q->link;
2007
2008 return &cl->filter_list;
2009}
2010
2011static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
2012 u32 classid)
2013{
2014 struct cbq_sched_data *q = qdisc_priv(sch);
2015 struct cbq_class *p = (struct cbq_class*)parent;
2016 struct cbq_class *cl = cbq_class_lookup(q, classid);
2017
2018 if (cl) {
2019 if (p && p->level <= cl->level)
2020 return 0;
2021 cl->filters++;
2022 return (unsigned long)cl;
2023 }
2024 return 0;
2025}
2026
2027static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
2028{
2029 struct cbq_class *cl = (struct cbq_class*)arg;
2030
2031 cl->filters--;
2032}
2033
2034static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2035{
2036 struct cbq_sched_data *q = qdisc_priv(sch);
2037 unsigned h;
2038
2039 if (arg->stop)
2040 return;
2041
2042 for (h = 0; h < 16; h++) {
2043 struct cbq_class *cl;
2044
2045 for (cl = q->classes[h]; cl; cl = cl->next) {
2046 if (arg->count < arg->skip) {
2047 arg->count++;
2048 continue;
2049 }
2050 if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2051 arg->stop = 1;
2052 return;
2053 }
2054 arg->count++;
2055 }
2056 }
2057}
2058
2059static struct Qdisc_class_ops cbq_class_ops = {
2060 .graft = cbq_graft,
2061 .leaf = cbq_leaf,
Jarek Poplawskia37ef2e2006-12-08 00:25:55 -08002062 .qlen_notify = cbq_qlen_notify,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002063 .get = cbq_get,
2064 .put = cbq_put,
2065 .change = cbq_change_class,
2066 .delete = cbq_delete,
2067 .walk = cbq_walk,
2068 .tcf_chain = cbq_find_tcf,
2069 .bind_tcf = cbq_bind_filter,
2070 .unbind_tcf = cbq_unbind_filter,
2071 .dump = cbq_dump_class,
2072 .dump_stats = cbq_dump_class_stats,
2073};
2074
2075static struct Qdisc_ops cbq_qdisc_ops = {
2076 .next = NULL,
2077 .cl_ops = &cbq_class_ops,
2078 .id = "cbq",
2079 .priv_size = sizeof(struct cbq_sched_data),
2080 .enqueue = cbq_enqueue,
2081 .dequeue = cbq_dequeue,
2082 .requeue = cbq_requeue,
2083 .drop = cbq_drop,
2084 .init = cbq_init,
2085 .reset = cbq_reset,
2086 .destroy = cbq_destroy,
2087 .change = NULL,
2088 .dump = cbq_dump,
2089 .dump_stats = cbq_dump_stats,
2090 .owner = THIS_MODULE,
2091};
2092
2093static int __init cbq_module_init(void)
2094{
2095 return register_qdisc(&cbq_qdisc_ops);
2096}
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09002097static void __exit cbq_module_exit(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002098{
2099 unregister_qdisc(&cbq_qdisc_ops);
2100}
2101module_init(cbq_module_init)
2102module_exit(cbq_module_exit)
2103MODULE_LICENSE("GPL");