blob: 5b132c473264eb4b428607f524c87d17032b149c [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * net/sched/sch_cbq.c Class-Based Queueing discipline.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 *
11 */
12
Linus Torvalds1da177e2005-04-16 15:20:36 -070013#include <linux/module.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070014#include <linux/types.h>
15#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070016#include <linux/string.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070017#include <linux/errno.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070018#include <linux/skbuff.h>
Patrick McHardy0ba48052007-07-02 22:49:07 -070019#include <net/netlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070020#include <net/pkt_sched.h>
21
22
23/* Class-Based Queueing (CBQ) algorithm.
24 =======================================
25
26 Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +090027 Management Models for Packet Networks",
Linus Torvalds1da177e2005-04-16 15:20:36 -070028 IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
29
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +090030 [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
Linus Torvalds1da177e2005-04-16 15:20:36 -070031
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +090032 [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
Linus Torvalds1da177e2005-04-16 15:20:36 -070033 Parameters", 1996
34
35 [4] Sally Floyd and Michael Speer, "Experimental Results
36 for Class-Based Queueing", 1998, not published.
37
38 -----------------------------------------------------------------------
39
40 Algorithm skeleton was taken from NS simulator cbq.cc.
41 If someone wants to check this code against the LBL version,
42 he should take into account that ONLY the skeleton was borrowed,
43 the implementation is different. Particularly:
44
45 --- The WRR algorithm is different. Our version looks more
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +090046 reasonable (I hope) and works when quanta are allowed to be
47 less than MTU, which is always the case when real time classes
48 have small rates. Note, that the statement of [3] is
49 incomplete, delay may actually be estimated even if class
50 per-round allotment is less than MTU. Namely, if per-round
51 allotment is W*r_i, and r_1+...+r_k = r < 1
Linus Torvalds1da177e2005-04-16 15:20:36 -070052
53 delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
54
55 In the worst case we have IntServ estimate with D = W*r+k*MTU
56 and C = MTU*r. The proof (if correct at all) is trivial.
57
58
59 --- It seems that cbq-2.0 is not very accurate. At least, I cannot
60 interpret some places, which look like wrong translations
61 from NS. Anyone is advised to find these differences
62 and explain to me, why I am wrong 8).
63
64 --- Linux has no EOI event, so that we cannot estimate true class
65 idle time. Workaround is to consider the next dequeue event
66 as sign that previous packet is finished. This is wrong because of
67 internal device queueing, but on a permanently loaded link it is true.
68 Moreover, combined with clock integrator, this scheme looks
69 very close to an ideal solution. */
70
71struct cbq_sched_data;
72
73
74struct cbq_class
75{
Patrick McHardyd77fea22008-07-05 23:22:05 -070076 struct Qdisc_class_common common;
Linus Torvalds1da177e2005-04-16 15:20:36 -070077 struct cbq_class *next_alive; /* next class with backlog in this priority band */
78
79/* Parameters */
Linus Torvalds1da177e2005-04-16 15:20:36 -070080 unsigned char priority; /* class priority */
81 unsigned char priority2; /* priority to be used after overlimit */
82 unsigned char ewma_log; /* time constant for idle time calculation */
83 unsigned char ovl_strategy;
Patrick McHardyc3bc7cf2007-07-15 00:03:05 -070084#ifdef CONFIG_NET_CLS_ACT
Linus Torvalds1da177e2005-04-16 15:20:36 -070085 unsigned char police;
86#endif
87
88 u32 defmap;
89
90 /* Link-sharing scheduler parameters */
91 long maxidle; /* Class parameters: see below. */
92 long offtime;
93 long minidle;
94 u32 avpkt;
95 struct qdisc_rate_table *R_tab;
96
97 /* Overlimit strategy parameters */
98 void (*overlimit)(struct cbq_class *cl);
Patrick McHardy1a13cb62007-03-16 01:22:20 -070099 psched_tdiff_t penalty;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700100
101 /* General scheduler (WRR) parameters */
102 long allot;
103 long quantum; /* Allotment per WRR round */
104 long weight; /* Relative allotment: see below */
105
106 struct Qdisc *qdisc; /* Ptr to CBQ discipline */
107 struct cbq_class *split; /* Ptr to split node */
108 struct cbq_class *share; /* Ptr to LS parent in the class tree */
109 struct cbq_class *tparent; /* Ptr to tree parent in the class tree */
110 struct cbq_class *borrow; /* NULL if class is bandwidth limited;
111 parent otherwise */
112 struct cbq_class *sibling; /* Sibling chain */
113 struct cbq_class *children; /* Pointer to children chain */
114
115 struct Qdisc *q; /* Elementary queueing discipline */
116
117
118/* Variables */
119 unsigned char cpriority; /* Effective priority */
120 unsigned char delayed;
121 unsigned char level; /* level of the class in hierarchy:
122 0 for leaf classes, and maximal
123 level of children + 1 for nodes.
124 */
125
126 psched_time_t last; /* Last end of service */
127 psched_time_t undertime;
128 long avgidle;
129 long deficit; /* Saved deficit for WRR */
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700130 psched_time_t penalized;
Eric Dumazetc1a8f1f2009-08-16 09:36:49 +0000131 struct gnet_stats_basic_packed bstats;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700132 struct gnet_stats_queue qstats;
133 struct gnet_stats_rate_est rate_est;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700134 struct tc_cbq_xstats xstats;
135
136 struct tcf_proto *filter_list;
137
138 int refcnt;
139 int filters;
140
141 struct cbq_class *defaults[TC_PRIO_MAX+1];
142};
143
144struct cbq_sched_data
145{
Patrick McHardyd77fea22008-07-05 23:22:05 -0700146 struct Qdisc_class_hash clhash; /* Hash table of all classes */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700147 int nclasses[TC_CBQ_MAXPRIO+1];
148 unsigned quanta[TC_CBQ_MAXPRIO+1];
149
150 struct cbq_class link;
151
152 unsigned activemask;
153 struct cbq_class *active[TC_CBQ_MAXPRIO+1]; /* List of all classes
154 with backlog */
155
Patrick McHardyc3bc7cf2007-07-15 00:03:05 -0700156#ifdef CONFIG_NET_CLS_ACT
Linus Torvalds1da177e2005-04-16 15:20:36 -0700157 struct cbq_class *rx_class;
158#endif
159 struct cbq_class *tx_class;
160 struct cbq_class *tx_borrowed;
161 int tx_len;
162 psched_time_t now; /* Cached timestamp */
163 psched_time_t now_rt; /* Cached real time */
164 unsigned pmask;
165
David S. Miller2fbd3da2009-09-01 17:59:25 -0700166 struct hrtimer delay_timer;
Patrick McHardy88a99352007-03-16 01:21:11 -0700167 struct qdisc_watchdog watchdog; /* Watchdog timer,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700168 started when CBQ has
169 backlog, but cannot
170 transmit just now */
Patrick McHardy88a99352007-03-16 01:21:11 -0700171 psched_tdiff_t wd_expires;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700172 int toplevel;
173 u32 hgenerator;
174};
175
176
Jesper Dangaard Brouere9bef552007-09-12 16:35:24 +0200177#define L2T(cl,len) qdisc_l2t((cl)->R_tab,len)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700178
Linus Torvalds1da177e2005-04-16 15:20:36 -0700179static __inline__ struct cbq_class *
180cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
181{
Patrick McHardyd77fea22008-07-05 23:22:05 -0700182 struct Qdisc_class_common *clc;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700183
Patrick McHardyd77fea22008-07-05 23:22:05 -0700184 clc = qdisc_class_find(&q->clhash, classid);
185 if (clc == NULL)
186 return NULL;
187 return container_of(clc, struct cbq_class, common);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700188}
189
Patrick McHardyc3bc7cf2007-07-15 00:03:05 -0700190#ifdef CONFIG_NET_CLS_ACT
Linus Torvalds1da177e2005-04-16 15:20:36 -0700191
192static struct cbq_class *
193cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
194{
195 struct cbq_class *cl, *new;
196
197 for (cl = this->tparent; cl; cl = cl->tparent)
198 if ((new = cl->defaults[TC_PRIO_BESTEFFORT]) != NULL && new != this)
199 return new;
200
201 return NULL;
202}
203
204#endif
205
206/* Classify packet. The procedure is pretty complicated, but
207 it allows us to combine link sharing and priority scheduling
208 transparently.
209
210 Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
211 so that it resolves to split nodes. Then packets are classified
212 by logical priority, or a more specific classifier may be attached
213 to the split node.
214 */
215
216static struct cbq_class *
217cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
218{
219 struct cbq_sched_data *q = qdisc_priv(sch);
220 struct cbq_class *head = &q->link;
221 struct cbq_class **defmap;
222 struct cbq_class *cl = NULL;
223 u32 prio = skb->priority;
224 struct tcf_result res;
225
226 /*
227 * Step 1. If skb->priority points to one of our classes, use it.
228 */
229 if (TC_H_MAJ(prio^sch->handle) == 0 &&
230 (cl = cbq_class_lookup(q, prio)) != NULL)
231 return cl;
232
Jarek Poplawskic27f3392008-08-04 22:39:11 -0700233 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700234 for (;;) {
235 int result = 0;
236 defmap = head->defaults;
237
238 /*
239 * Step 2+n. Apply classifier.
240 */
Patrick McHardy73ca4912007-07-15 00:02:31 -0700241 if (!head->filter_list ||
242 (result = tc_classify_compat(skb, head->filter_list, &res)) < 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700243 goto fallback;
244
245 if ((cl = (void*)res.class) == NULL) {
246 if (TC_H_MAJ(res.classid))
247 cl = cbq_class_lookup(q, res.classid);
248 else if ((cl = defmap[res.classid&TC_PRIO_MAX]) == NULL)
249 cl = defmap[TC_PRIO_BESTEFFORT];
250
251 if (cl == NULL || cl->level >= head->level)
252 goto fallback;
253 }
254
255#ifdef CONFIG_NET_CLS_ACT
256 switch (result) {
257 case TC_ACT_QUEUED:
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900258 case TC_ACT_STOLEN:
Jarek Poplawski378a2f02008-08-04 22:31:03 -0700259 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700260 case TC_ACT_SHOT:
261 return NULL;
Patrick McHardy73ca4912007-07-15 00:02:31 -0700262 case TC_ACT_RECLASSIFY:
263 return cbq_reclassify(skb, cl);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700264 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700265#endif
266 if (cl->level == 0)
267 return cl;
268
269 /*
270 * Step 3+n. If classifier selected a link sharing class,
271 * apply agency specific classifier.
272 * Repeat this procdure until we hit a leaf node.
273 */
274 head = cl;
275 }
276
277fallback:
278 cl = head;
279
280 /*
281 * Step 4. No success...
282 */
283 if (TC_H_MAJ(prio) == 0 &&
284 !(cl = head->defaults[prio&TC_PRIO_MAX]) &&
285 !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
286 return head;
287
288 return cl;
289}
290
291/*
292 A packet has just been enqueued on the empty class.
293 cbq_activate_class adds it to the tail of active class list
294 of its priority band.
295 */
296
297static __inline__ void cbq_activate_class(struct cbq_class *cl)
298{
299 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
300 int prio = cl->cpriority;
301 struct cbq_class *cl_tail;
302
303 cl_tail = q->active[prio];
304 q->active[prio] = cl;
305
306 if (cl_tail != NULL) {
307 cl->next_alive = cl_tail->next_alive;
308 cl_tail->next_alive = cl;
309 } else {
310 cl->next_alive = cl;
311 q->activemask |= (1<<prio);
312 }
313}
314
315/*
316 Unlink class from active chain.
317 Note that this same procedure is done directly in cbq_dequeue*
318 during round-robin procedure.
319 */
320
321static void cbq_deactivate_class(struct cbq_class *this)
322{
323 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
324 int prio = this->cpriority;
325 struct cbq_class *cl;
326 struct cbq_class *cl_prev = q->active[prio];
327
328 do {
329 cl = cl_prev->next_alive;
330 if (cl == this) {
331 cl_prev->next_alive = cl->next_alive;
332 cl->next_alive = NULL;
333
334 if (cl == q->active[prio]) {
335 q->active[prio] = cl_prev;
336 if (cl == q->active[prio]) {
337 q->active[prio] = NULL;
338 q->activemask &= ~(1<<prio);
339 return;
340 }
341 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700342 return;
343 }
344 } while ((cl_prev = cl) != q->active[prio]);
345}
346
347static void
348cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
349{
350 int toplevel = q->toplevel;
351
352 if (toplevel > cl->level && !(cl->q->flags&TCQ_F_THROTTLED)) {
353 psched_time_t now;
354 psched_tdiff_t incr;
355
Patrick McHardy3bebcda2007-03-23 11:29:25 -0700356 now = psched_get_time();
Patrick McHardy8edc0c32007-03-23 11:28:55 -0700357 incr = now - q->now_rt;
Patrick McHardy7c59e252007-03-23 11:27:45 -0700358 now = q->now + incr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700359
360 do {
Patrick McHardy104e0872007-03-23 11:28:07 -0700361 if (cl->undertime < now) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700362 q->toplevel = cl->level;
363 return;
364 }
365 } while ((cl=cl->borrow) != NULL && toplevel > cl->level);
366 }
367}
368
369static int
370cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
371{
372 struct cbq_sched_data *q = qdisc_priv(sch);
Satyam Sharmaddeee3c2007-09-16 14:54:05 -0700373 int uninitialized_var(ret);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700374 struct cbq_class *cl = cbq_classify(skb, sch, &ret);
375
Patrick McHardyc3bc7cf2007-07-15 00:03:05 -0700376#ifdef CONFIG_NET_CLS_ACT
Linus Torvalds1da177e2005-04-16 15:20:36 -0700377 q->rx_class = cl;
378#endif
379 if (cl == NULL) {
Jarek Poplawskic27f3392008-08-04 22:39:11 -0700380 if (ret & __NET_XMIT_BYPASS)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700381 sch->qstats.drops++;
382 kfree_skb(skb);
383 return ret;
384 }
385
Patrick McHardyc3bc7cf2007-07-15 00:03:05 -0700386#ifdef CONFIG_NET_CLS_ACT
Linus Torvalds1da177e2005-04-16 15:20:36 -0700387 cl->q->__parent = sch;
388#endif
Jussi Kivilinna5f861732008-07-20 00:08:04 -0700389 ret = qdisc_enqueue(skb, cl->q);
390 if (ret == NET_XMIT_SUCCESS) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700391 sch->q.qlen++;
392 sch->bstats.packets++;
Jussi Kivilinna0abf77e2008-07-20 00:08:27 -0700393 sch->bstats.bytes += qdisc_pkt_len(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700394 cbq_mark_toplevel(q, cl);
395 if (!cl->next_alive)
396 cbq_activate_class(cl);
397 return ret;
398 }
399
Jarek Poplawski378a2f02008-08-04 22:31:03 -0700400 if (net_xmit_drop_count(ret)) {
401 sch->qstats.drops++;
402 cbq_mark_toplevel(q, cl);
403 cl->qstats.drops++;
404 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700405 return ret;
406}
407
Linus Torvalds1da177e2005-04-16 15:20:36 -0700408/* Overlimit actions */
409
410/* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
411
412static void cbq_ovl_classic(struct cbq_class *cl)
413{
414 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
Patrick McHardy8edc0c32007-03-23 11:28:55 -0700415 psched_tdiff_t delay = cl->undertime - q->now;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700416
417 if (!cl->delayed) {
418 delay += cl->offtime;
419
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900420 /*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700421 Class goes to sleep, so that it will have no
422 chance to work avgidle. Let's forgive it 8)
423
424 BTW cbq-2.0 has a crap in this
425 place, apparently they forgot to shift it by cl->ewma_log.
426 */
427 if (cl->avgidle < 0)
428 delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
429 if (cl->avgidle < cl->minidle)
430 cl->avgidle = cl->minidle;
431 if (delay <= 0)
432 delay = 1;
Patrick McHardy7c59e252007-03-23 11:27:45 -0700433 cl->undertime = q->now + delay;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700434
435 cl->xstats.overactions++;
436 cl->delayed = 1;
437 }
438 if (q->wd_expires == 0 || q->wd_expires > delay)
439 q->wd_expires = delay;
440
441 /* Dirty work! We must schedule wakeups based on
442 real available rate, rather than leaf rate,
443 which may be tiny (even zero).
444 */
445 if (q->toplevel == TC_CBQ_MAXLEVEL) {
446 struct cbq_class *b;
447 psched_tdiff_t base_delay = q->wd_expires;
448
449 for (b = cl->borrow; b; b = b->borrow) {
Patrick McHardy8edc0c32007-03-23 11:28:55 -0700450 delay = b->undertime - q->now;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700451 if (delay < base_delay) {
452 if (delay <= 0)
453 delay = 1;
454 base_delay = delay;
455 }
456 }
457
458 q->wd_expires = base_delay;
459 }
460}
461
462/* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
463 they go overlimit
464 */
465
466static void cbq_ovl_rclassic(struct cbq_class *cl)
467{
468 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
469 struct cbq_class *this = cl;
470
471 do {
472 if (cl->level > q->toplevel) {
473 cl = NULL;
474 break;
475 }
476 } while ((cl = cl->borrow) != NULL);
477
478 if (cl == NULL)
479 cl = this;
480 cbq_ovl_classic(cl);
481}
482
483/* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
484
485static void cbq_ovl_delay(struct cbq_class *cl)
486{
487 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
Patrick McHardy8edc0c32007-03-23 11:28:55 -0700488 psched_tdiff_t delay = cl->undertime - q->now;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700489
Jarek Poplawski2540e052008-08-21 05:11:14 -0700490 if (test_bit(__QDISC_STATE_DEACTIVATED,
491 &qdisc_root_sleeping(cl->qdisc)->state))
492 return;
493
Linus Torvalds1da177e2005-04-16 15:20:36 -0700494 if (!cl->delayed) {
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700495 psched_time_t sched = q->now;
496 ktime_t expires;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700497
498 delay += cl->offtime;
499 if (cl->avgidle < 0)
500 delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
501 if (cl->avgidle < cl->minidle)
502 cl->avgidle = cl->minidle;
Patrick McHardy7c59e252007-03-23 11:27:45 -0700503 cl->undertime = q->now + delay;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700504
505 if (delay > 0) {
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700506 sched += delay + cl->penalty;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700507 cl->penalized = sched;
508 cl->cpriority = TC_CBQ_MAXPRIO;
509 q->pmask |= (1<<TC_CBQ_MAXPRIO);
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700510
511 expires = ktime_set(0, 0);
Jarek Poplawskica44d6e2009-06-15 02:31:47 -0700512 expires = ktime_add_ns(expires, PSCHED_TICKS2NS(sched));
David S. Miller2fbd3da2009-09-01 17:59:25 -0700513 if (hrtimer_try_to_cancel(&q->delay_timer) &&
514 ktime_to_ns(ktime_sub(
515 hrtimer_get_expires(&q->delay_timer),
516 expires)) > 0)
517 hrtimer_set_expires(&q->delay_timer, expires);
518 hrtimer_restart(&q->delay_timer);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700519 cl->delayed = 1;
520 cl->xstats.overactions++;
521 return;
522 }
523 delay = 1;
524 }
525 if (q->wd_expires == 0 || q->wd_expires > delay)
526 q->wd_expires = delay;
527}
528
529/* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
530
531static void cbq_ovl_lowprio(struct cbq_class *cl)
532{
533 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
534
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700535 cl->penalized = q->now + cl->penalty;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700536
537 if (cl->cpriority != cl->priority2) {
538 cl->cpriority = cl->priority2;
539 q->pmask |= (1<<cl->cpriority);
540 cl->xstats.overactions++;
541 }
542 cbq_ovl_classic(cl);
543}
544
545/* TC_CBQ_OVL_DROP: penalize class by dropping */
546
547static void cbq_ovl_drop(struct cbq_class *cl)
548{
549 if (cl->q->ops->drop)
550 if (cl->q->ops->drop(cl->q))
551 cl->qdisc->q.qlen--;
552 cl->xstats.overactions++;
553 cbq_ovl_classic(cl);
554}
555
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700556static psched_tdiff_t cbq_undelay_prio(struct cbq_sched_data *q, int prio,
557 psched_time_t now)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700558{
559 struct cbq_class *cl;
560 struct cbq_class *cl_prev = q->active[prio];
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700561 psched_time_t sched = now;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700562
563 if (cl_prev == NULL)
Patrick McHardye9054a32007-03-16 01:21:40 -0700564 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700565
566 do {
567 cl = cl_prev->next_alive;
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700568 if (now - cl->penalized > 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700569 cl_prev->next_alive = cl->next_alive;
570 cl->next_alive = NULL;
571 cl->cpriority = cl->priority;
572 cl->delayed = 0;
573 cbq_activate_class(cl);
574
575 if (cl == q->active[prio]) {
576 q->active[prio] = cl_prev;
577 if (cl == q->active[prio]) {
578 q->active[prio] = NULL;
579 return 0;
580 }
581 }
582
583 cl = cl_prev->next_alive;
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700584 } else if (sched - cl->penalized > 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700585 sched = cl->penalized;
586 } while ((cl_prev = cl) != q->active[prio]);
587
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700588 return sched - now;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700589}
590
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700591static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700592{
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700593 struct cbq_sched_data *q = container_of(timer, struct cbq_sched_data,
David S. Miller2fbd3da2009-09-01 17:59:25 -0700594 delay_timer);
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700595 struct Qdisc *sch = q->watchdog.qdisc;
596 psched_time_t now;
597 psched_tdiff_t delay = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700598 unsigned pmask;
599
Patrick McHardy3bebcda2007-03-23 11:29:25 -0700600 now = psched_get_time();
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700601
Linus Torvalds1da177e2005-04-16 15:20:36 -0700602 pmask = q->pmask;
603 q->pmask = 0;
604
605 while (pmask) {
606 int prio = ffz(~pmask);
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700607 psched_tdiff_t tmp;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700608
609 pmask &= ~(1<<prio);
610
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700611 tmp = cbq_undelay_prio(q, prio, now);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700612 if (tmp > 0) {
613 q->pmask |= 1<<prio;
614 if (tmp < delay || delay == 0)
615 delay = tmp;
616 }
617 }
618
619 if (delay) {
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700620 ktime_t time;
621
622 time = ktime_set(0, 0);
Jarek Poplawskica44d6e2009-06-15 02:31:47 -0700623 time = ktime_add_ns(time, PSCHED_TICKS2NS(now + delay));
David S. Miller2fbd3da2009-09-01 17:59:25 -0700624 hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700625 }
626
627 sch->flags &= ~TCQ_F_THROTTLED;
David S. Miller8608db02008-08-18 20:51:18 -0700628 __netif_schedule(qdisc_root(sch));
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700629 return HRTIMER_NORESTART;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700630}
631
Patrick McHardyc3bc7cf2007-07-15 00:03:05 -0700632#ifdef CONFIG_NET_CLS_ACT
Linus Torvalds1da177e2005-04-16 15:20:36 -0700633static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
634{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700635 struct Qdisc *sch = child->__parent;
636 struct cbq_sched_data *q = qdisc_priv(sch);
637 struct cbq_class *cl = q->rx_class;
638
639 q->rx_class = NULL;
640
641 if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
Jarek Poplawski378a2f02008-08-04 22:31:03 -0700642 int ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700643
644 cbq_mark_toplevel(q, cl);
645
646 q->rx_class = cl;
647 cl->q->__parent = sch;
648
Jarek Poplawski378a2f02008-08-04 22:31:03 -0700649 ret = qdisc_enqueue(skb, cl->q);
650 if (ret == NET_XMIT_SUCCESS) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700651 sch->q.qlen++;
652 sch->bstats.packets++;
Jussi Kivilinna0abf77e2008-07-20 00:08:27 -0700653 sch->bstats.bytes += qdisc_pkt_len(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700654 if (!cl->next_alive)
655 cbq_activate_class(cl);
656 return 0;
657 }
Jarek Poplawski378a2f02008-08-04 22:31:03 -0700658 if (net_xmit_drop_count(ret))
659 sch->qstats.drops++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700660 return 0;
661 }
662
663 sch->qstats.drops++;
664 return -1;
665}
666#endif
667
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900668/*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700669 It is mission critical procedure.
670
671 We "regenerate" toplevel cutoff, if transmitting class
672 has backlog and it is not regulated. It is not part of
673 original CBQ description, but looks more reasonable.
674 Probably, it is wrong. This question needs further investigation.
675*/
676
677static __inline__ void
678cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
679 struct cbq_class *borrowed)
680{
681 if (cl && q->toplevel >= borrowed->level) {
682 if (cl->q->q.qlen > 1) {
683 do {
Patrick McHardya0849802007-03-23 11:28:30 -0700684 if (borrowed->undertime == PSCHED_PASTPERFECT) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700685 q->toplevel = borrowed->level;
686 return;
687 }
688 } while ((borrowed=borrowed->borrow) != NULL);
689 }
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900690#if 0
Linus Torvalds1da177e2005-04-16 15:20:36 -0700691 /* It is not necessary now. Uncommenting it
692 will save CPU cycles, but decrease fairness.
693 */
694 q->toplevel = TC_CBQ_MAXLEVEL;
695#endif
696 }
697}
698
699static void
700cbq_update(struct cbq_sched_data *q)
701{
702 struct cbq_class *this = q->tx_class;
703 struct cbq_class *cl = this;
704 int len = q->tx_len;
705
706 q->tx_class = NULL;
707
708 for ( ; cl; cl = cl->share) {
709 long avgidle = cl->avgidle;
710 long idle;
711
712 cl->bstats.packets++;
713 cl->bstats.bytes += len;
714
715 /*
716 (now - last) is total time between packet right edges.
717 (last_pktlen/rate) is "virtual" busy time, so that
718
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900719 idle = (now - last) - last_pktlen/rate
Linus Torvalds1da177e2005-04-16 15:20:36 -0700720 */
721
Patrick McHardy8edc0c32007-03-23 11:28:55 -0700722 idle = q->now - cl->last;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700723 if ((unsigned long)idle > 128*1024*1024) {
724 avgidle = cl->maxidle;
725 } else {
726 idle -= L2T(cl, len);
727
728 /* true_avgidle := (1-W)*true_avgidle + W*idle,
729 where W=2^{-ewma_log}. But cl->avgidle is scaled:
730 cl->avgidle == true_avgidle/W,
731 hence:
732 */
733 avgidle += idle - (avgidle>>cl->ewma_log);
734 }
735
736 if (avgidle <= 0) {
737 /* Overlimit or at-limit */
738
739 if (avgidle < cl->minidle)
740 avgidle = cl->minidle;
741
742 cl->avgidle = avgidle;
743
744 /* Calculate expected time, when this class
745 will be allowed to send.
746 It will occur, when:
747 (1-W)*true_avgidle + W*delay = 0, i.e.
748 idle = (1/W - 1)*(-true_avgidle)
749 or
750 idle = (1 - W)*(-cl->avgidle);
751 */
752 idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
753
754 /*
755 That is not all.
756 To maintain the rate allocated to the class,
757 we add to undertime virtual clock,
758 necessary to complete transmitted packet.
759 (len/phys_bandwidth has been already passed
760 to the moment of cbq_update)
761 */
762
763 idle -= L2T(&q->link, len);
764 idle += L2T(cl, len);
765
Patrick McHardy7c59e252007-03-23 11:27:45 -0700766 cl->undertime = q->now + idle;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700767 } else {
768 /* Underlimit */
769
Patrick McHardya0849802007-03-23 11:28:30 -0700770 cl->undertime = PSCHED_PASTPERFECT;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700771 if (avgidle > cl->maxidle)
772 cl->avgidle = cl->maxidle;
773 else
774 cl->avgidle = avgidle;
775 }
776 cl->last = q->now;
777 }
778
779 cbq_update_toplevel(q, this, q->tx_borrowed);
780}
781
782static __inline__ struct cbq_class *
783cbq_under_limit(struct cbq_class *cl)
784{
785 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
786 struct cbq_class *this_cl = cl;
787
788 if (cl->tparent == NULL)
789 return cl;
790
Patrick McHardya0849802007-03-23 11:28:30 -0700791 if (cl->undertime == PSCHED_PASTPERFECT || q->now >= cl->undertime) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700792 cl->delayed = 0;
793 return cl;
794 }
795
796 do {
797 /* It is very suspicious place. Now overlimit
798 action is generated for not bounded classes
799 only if link is completely congested.
800 Though it is in agree with ancestor-only paradigm,
801 it looks very stupid. Particularly,
802 it means that this chunk of code will either
803 never be called or result in strong amplification
804 of burstiness. Dangerous, silly, and, however,
805 no another solution exists.
806 */
807 if ((cl = cl->borrow) == NULL) {
808 this_cl->qstats.overlimits++;
809 this_cl->overlimit(this_cl);
810 return NULL;
811 }
812 if (cl->level > q->toplevel)
813 return NULL;
Patrick McHardya0849802007-03-23 11:28:30 -0700814 } while (cl->undertime != PSCHED_PASTPERFECT && q->now < cl->undertime);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700815
816 cl->delayed = 0;
817 return cl;
818}
819
820static __inline__ struct sk_buff *
821cbq_dequeue_prio(struct Qdisc *sch, int prio)
822{
823 struct cbq_sched_data *q = qdisc_priv(sch);
824 struct cbq_class *cl_tail, *cl_prev, *cl;
825 struct sk_buff *skb;
826 int deficit;
827
828 cl_tail = cl_prev = q->active[prio];
829 cl = cl_prev->next_alive;
830
831 do {
832 deficit = 0;
833
834 /* Start round */
835 do {
836 struct cbq_class *borrow = cl;
837
838 if (cl->q->q.qlen &&
839 (borrow = cbq_under_limit(cl)) == NULL)
840 goto skip_class;
841
842 if (cl->deficit <= 0) {
843 /* Class exhausted its allotment per
844 this round. Switch to the next one.
845 */
846 deficit = 1;
847 cl->deficit += cl->quantum;
848 goto next_class;
849 }
850
851 skb = cl->q->dequeue(cl->q);
852
853 /* Class did not give us any skb :-(
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900854 It could occur even if cl->q->q.qlen != 0
Linus Torvalds1da177e2005-04-16 15:20:36 -0700855 f.e. if cl->q == "tbf"
856 */
857 if (skb == NULL)
858 goto skip_class;
859
Jussi Kivilinna0abf77e2008-07-20 00:08:27 -0700860 cl->deficit -= qdisc_pkt_len(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700861 q->tx_class = cl;
862 q->tx_borrowed = borrow;
863 if (borrow != cl) {
864#ifndef CBQ_XSTATS_BORROWS_BYTES
865 borrow->xstats.borrows++;
866 cl->xstats.borrows++;
867#else
Jussi Kivilinna0abf77e2008-07-20 00:08:27 -0700868 borrow->xstats.borrows += qdisc_pkt_len(skb);
869 cl->xstats.borrows += qdisc_pkt_len(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700870#endif
871 }
Jussi Kivilinna0abf77e2008-07-20 00:08:27 -0700872 q->tx_len = qdisc_pkt_len(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700873
874 if (cl->deficit <= 0) {
875 q->active[prio] = cl;
876 cl = cl->next_alive;
877 cl->deficit += cl->quantum;
878 }
879 return skb;
880
881skip_class:
882 if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
883 /* Class is empty or penalized.
884 Unlink it from active chain.
885 */
886 cl_prev->next_alive = cl->next_alive;
887 cl->next_alive = NULL;
888
889 /* Did cl_tail point to it? */
890 if (cl == cl_tail) {
891 /* Repair it! */
892 cl_tail = cl_prev;
893
894 /* Was it the last class in this band? */
895 if (cl == cl_tail) {
896 /* Kill the band! */
897 q->active[prio] = NULL;
898 q->activemask &= ~(1<<prio);
899 if (cl->q->q.qlen)
900 cbq_activate_class(cl);
901 return NULL;
902 }
903
904 q->active[prio] = cl_tail;
905 }
906 if (cl->q->q.qlen)
907 cbq_activate_class(cl);
908
909 cl = cl_prev;
910 }
911
912next_class:
913 cl_prev = cl;
914 cl = cl->next_alive;
915 } while (cl_prev != cl_tail);
916 } while (deficit);
917
918 q->active[prio] = cl_prev;
919
920 return NULL;
921}
922
923static __inline__ struct sk_buff *
924cbq_dequeue_1(struct Qdisc *sch)
925{
926 struct cbq_sched_data *q = qdisc_priv(sch);
927 struct sk_buff *skb;
928 unsigned activemask;
929
930 activemask = q->activemask&0xFF;
931 while (activemask) {
932 int prio = ffz(~activemask);
933 activemask &= ~(1<<prio);
934 skb = cbq_dequeue_prio(sch, prio);
935 if (skb)
936 return skb;
937 }
938 return NULL;
939}
940
941static struct sk_buff *
942cbq_dequeue(struct Qdisc *sch)
943{
944 struct sk_buff *skb;
945 struct cbq_sched_data *q = qdisc_priv(sch);
946 psched_time_t now;
947 psched_tdiff_t incr;
948
Patrick McHardy3bebcda2007-03-23 11:29:25 -0700949 now = psched_get_time();
Patrick McHardy8edc0c32007-03-23 11:28:55 -0700950 incr = now - q->now_rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700951
952 if (q->tx_class) {
953 psched_tdiff_t incr2;
954 /* Time integrator. We calculate EOS time
955 by adding expected packet transmission time.
956 If real time is greater, we warp artificial clock,
957 so that:
958
959 cbq_time = max(real_time, work);
960 */
961 incr2 = L2T(&q->link, q->tx_len);
Patrick McHardy7c59e252007-03-23 11:27:45 -0700962 q->now += incr2;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700963 cbq_update(q);
964 if ((incr -= incr2) < 0)
965 incr = 0;
966 }
Patrick McHardy7c59e252007-03-23 11:27:45 -0700967 q->now += incr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700968 q->now_rt = now;
969
970 for (;;) {
971 q->wd_expires = 0;
972
973 skb = cbq_dequeue_1(sch);
974 if (skb) {
975 sch->q.qlen--;
976 sch->flags &= ~TCQ_F_THROTTLED;
977 return skb;
978 }
979
980 /* All the classes are overlimit.
981
982 It is possible, if:
983
984 1. Scheduler is empty.
985 2. Toplevel cutoff inhibited borrowing.
986 3. Root class is overlimit.
987
988 Reset 2d and 3d conditions and retry.
989
990 Note, that NS and cbq-2.0 are buggy, peeking
991 an arbitrary class is appropriate for ancestor-only
992 sharing, but not for toplevel algorithm.
993
994 Our version is better, but slower, because it requires
995 two passes, but it is unavoidable with top-level sharing.
996 */
997
998 if (q->toplevel == TC_CBQ_MAXLEVEL &&
Patrick McHardya0849802007-03-23 11:28:30 -0700999 q->link.undertime == PSCHED_PASTPERFECT)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001000 break;
1001
1002 q->toplevel = TC_CBQ_MAXLEVEL;
Patrick McHardya0849802007-03-23 11:28:30 -07001003 q->link.undertime = PSCHED_PASTPERFECT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001004 }
1005
1006 /* No packets in scheduler or nobody wants to give them to us :-(
1007 Sigh... start watchdog timer in the last case. */
1008
1009 if (sch->q.qlen) {
1010 sch->qstats.overlimits++;
Patrick McHardy88a99352007-03-16 01:21:11 -07001011 if (q->wd_expires)
1012 qdisc_watchdog_schedule(&q->watchdog,
Patrick McHardybb239ac2007-03-16 12:31:28 -07001013 now + q->wd_expires);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001014 }
1015 return NULL;
1016}
1017
1018/* CBQ class maintanance routines */
1019
1020static void cbq_adjust_levels(struct cbq_class *this)
1021{
1022 if (this == NULL)
1023 return;
1024
1025 do {
1026 int level = 0;
1027 struct cbq_class *cl;
1028
1029 if ((cl = this->children) != NULL) {
1030 do {
1031 if (cl->level > level)
1032 level = cl->level;
1033 } while ((cl = cl->sibling) != this->children);
1034 }
1035 this->level = level+1;
1036 } while ((this = this->tparent) != NULL);
1037}
1038
1039static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
1040{
1041 struct cbq_class *cl;
Patrick McHardyd77fea22008-07-05 23:22:05 -07001042 struct hlist_node *n;
1043 unsigned int h;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001044
1045 if (q->quanta[prio] == 0)
1046 return;
1047
Patrick McHardyd77fea22008-07-05 23:22:05 -07001048 for (h = 0; h < q->clhash.hashsize; h++) {
1049 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001050 /* BUGGGG... Beware! This expression suffer of
1051 arithmetic overflows!
1052 */
1053 if (cl->priority == prio) {
1054 cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
1055 q->quanta[prio];
1056 }
David S. Miller5ce2d482008-07-08 17:06:30 -07001057 if (cl->quantum <= 0 || cl->quantum>32*qdisc_dev(cl->qdisc)->mtu) {
Patrick McHardyd77fea22008-07-05 23:22:05 -07001058 printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->common.classid, cl->quantum);
David S. Miller5ce2d482008-07-08 17:06:30 -07001059 cl->quantum = qdisc_dev(cl->qdisc)->mtu/2 + 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001060 }
1061 }
1062 }
1063}
1064
1065static void cbq_sync_defmap(struct cbq_class *cl)
1066{
1067 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1068 struct cbq_class *split = cl->split;
1069 unsigned h;
1070 int i;
1071
1072 if (split == NULL)
1073 return;
1074
1075 for (i=0; i<=TC_PRIO_MAX; i++) {
1076 if (split->defaults[i] == cl && !(cl->defmap&(1<<i)))
1077 split->defaults[i] = NULL;
1078 }
1079
1080 for (i=0; i<=TC_PRIO_MAX; i++) {
1081 int level = split->level;
1082
1083 if (split->defaults[i])
1084 continue;
1085
Patrick McHardyd77fea22008-07-05 23:22:05 -07001086 for (h = 0; h < q->clhash.hashsize; h++) {
1087 struct hlist_node *n;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001088 struct cbq_class *c;
1089
Patrick McHardyd77fea22008-07-05 23:22:05 -07001090 hlist_for_each_entry(c, n, &q->clhash.hash[h],
1091 common.hnode) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001092 if (c->split == split && c->level < level &&
1093 c->defmap&(1<<i)) {
1094 split->defaults[i] = c;
1095 level = c->level;
1096 }
1097 }
1098 }
1099 }
1100}
1101
1102static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask)
1103{
1104 struct cbq_class *split = NULL;
1105
1106 if (splitid == 0) {
1107 if ((split = cl->split) == NULL)
1108 return;
Patrick McHardyd77fea22008-07-05 23:22:05 -07001109 splitid = split->common.classid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001110 }
1111
Patrick McHardyd77fea22008-07-05 23:22:05 -07001112 if (split == NULL || split->common.classid != splitid) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001113 for (split = cl->tparent; split; split = split->tparent)
Patrick McHardyd77fea22008-07-05 23:22:05 -07001114 if (split->common.classid == splitid)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001115 break;
1116 }
1117
1118 if (split == NULL)
1119 return;
1120
1121 if (cl->split != split) {
1122 cl->defmap = 0;
1123 cbq_sync_defmap(cl);
1124 cl->split = split;
1125 cl->defmap = def&mask;
1126 } else
1127 cl->defmap = (cl->defmap&~mask)|(def&mask);
1128
1129 cbq_sync_defmap(cl);
1130}
1131
1132static void cbq_unlink_class(struct cbq_class *this)
1133{
1134 struct cbq_class *cl, **clp;
1135 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1136
Patrick McHardyd77fea22008-07-05 23:22:05 -07001137 qdisc_class_hash_remove(&q->clhash, &this->common);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001138
1139 if (this->tparent) {
1140 clp=&this->sibling;
1141 cl = *clp;
1142 do {
1143 if (cl == this) {
1144 *clp = cl->sibling;
1145 break;
1146 }
1147 clp = &cl->sibling;
1148 } while ((cl = *clp) != this->sibling);
1149
1150 if (this->tparent->children == this) {
1151 this->tparent->children = this->sibling;
1152 if (this->sibling == this)
1153 this->tparent->children = NULL;
1154 }
1155 } else {
Ilpo Järvinen547b7922008-07-25 21:43:18 -07001156 WARN_ON(this->sibling != this);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001157 }
1158}
1159
1160static void cbq_link_class(struct cbq_class *this)
1161{
1162 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001163 struct cbq_class *parent = this->tparent;
1164
1165 this->sibling = this;
Patrick McHardyd77fea22008-07-05 23:22:05 -07001166 qdisc_class_hash_insert(&q->clhash, &this->common);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001167
1168 if (parent == NULL)
1169 return;
1170
1171 if (parent->children == NULL) {
1172 parent->children = this;
1173 } else {
1174 this->sibling = parent->children->sibling;
1175 parent->children->sibling = this;
1176 }
1177}
1178
1179static unsigned int cbq_drop(struct Qdisc* sch)
1180{
1181 struct cbq_sched_data *q = qdisc_priv(sch);
1182 struct cbq_class *cl, *cl_head;
1183 int prio;
1184 unsigned int len;
1185
1186 for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
1187 if ((cl_head = q->active[prio]) == NULL)
1188 continue;
1189
1190 cl = cl_head;
1191 do {
1192 if (cl->q->ops->drop && (len = cl->q->ops->drop(cl->q))) {
1193 sch->q.qlen--;
Jarek Poplawskia37ef2e2006-12-08 00:25:55 -08001194 if (!cl->q->q.qlen)
1195 cbq_deactivate_class(cl);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001196 return len;
1197 }
1198 } while ((cl = cl->next_alive) != cl_head);
1199 }
1200 return 0;
1201}
1202
1203static void
1204cbq_reset(struct Qdisc* sch)
1205{
1206 struct cbq_sched_data *q = qdisc_priv(sch);
1207 struct cbq_class *cl;
Patrick McHardyd77fea22008-07-05 23:22:05 -07001208 struct hlist_node *n;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001209 int prio;
1210 unsigned h;
1211
1212 q->activemask = 0;
1213 q->pmask = 0;
1214 q->tx_class = NULL;
1215 q->tx_borrowed = NULL;
Patrick McHardy88a99352007-03-16 01:21:11 -07001216 qdisc_watchdog_cancel(&q->watchdog);
David S. Miller2fbd3da2009-09-01 17:59:25 -07001217 hrtimer_cancel(&q->delay_timer);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001218 q->toplevel = TC_CBQ_MAXLEVEL;
Patrick McHardy3bebcda2007-03-23 11:29:25 -07001219 q->now = psched_get_time();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001220 q->now_rt = q->now;
1221
1222 for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
1223 q->active[prio] = NULL;
1224
Patrick McHardyd77fea22008-07-05 23:22:05 -07001225 for (h = 0; h < q->clhash.hashsize; h++) {
1226 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001227 qdisc_reset(cl->q);
1228
1229 cl->next_alive = NULL;
Patrick McHardya0849802007-03-23 11:28:30 -07001230 cl->undertime = PSCHED_PASTPERFECT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001231 cl->avgidle = cl->maxidle;
1232 cl->deficit = cl->quantum;
1233 cl->cpriority = cl->priority;
1234 }
1235 }
1236 sch->q.qlen = 0;
1237}
1238
1239
1240static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
1241{
1242 if (lss->change&TCF_CBQ_LSS_FLAGS) {
1243 cl->share = (lss->flags&TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
1244 cl->borrow = (lss->flags&TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
1245 }
1246 if (lss->change&TCF_CBQ_LSS_EWMA)
1247 cl->ewma_log = lss->ewma_log;
1248 if (lss->change&TCF_CBQ_LSS_AVPKT)
1249 cl->avpkt = lss->avpkt;
1250 if (lss->change&TCF_CBQ_LSS_MINIDLE)
1251 cl->minidle = -(long)lss->minidle;
1252 if (lss->change&TCF_CBQ_LSS_MAXIDLE) {
1253 cl->maxidle = lss->maxidle;
1254 cl->avgidle = lss->maxidle;
1255 }
1256 if (lss->change&TCF_CBQ_LSS_OFFTIME)
1257 cl->offtime = lss->offtime;
1258 return 0;
1259}
1260
1261static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl)
1262{
1263 q->nclasses[cl->priority]--;
1264 q->quanta[cl->priority] -= cl->weight;
1265 cbq_normalize_quanta(q, cl->priority);
1266}
1267
1268static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl)
1269{
1270 q->nclasses[cl->priority]++;
1271 q->quanta[cl->priority] += cl->weight;
1272 cbq_normalize_quanta(q, cl->priority);
1273}
1274
1275static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
1276{
1277 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1278
1279 if (wrr->allot)
1280 cl->allot = wrr->allot;
1281 if (wrr->weight)
1282 cl->weight = wrr->weight;
1283 if (wrr->priority) {
1284 cl->priority = wrr->priority-1;
1285 cl->cpriority = cl->priority;
1286 if (cl->priority >= cl->priority2)
1287 cl->priority2 = TC_CBQ_MAXPRIO-1;
1288 }
1289
1290 cbq_addprio(q, cl);
1291 return 0;
1292}
1293
1294static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
1295{
1296 switch (ovl->strategy) {
1297 case TC_CBQ_OVL_CLASSIC:
1298 cl->overlimit = cbq_ovl_classic;
1299 break;
1300 case TC_CBQ_OVL_DELAY:
1301 cl->overlimit = cbq_ovl_delay;
1302 break;
1303 case TC_CBQ_OVL_LOWPRIO:
1304 if (ovl->priority2-1 >= TC_CBQ_MAXPRIO ||
1305 ovl->priority2-1 <= cl->priority)
1306 return -EINVAL;
1307 cl->priority2 = ovl->priority2-1;
1308 cl->overlimit = cbq_ovl_lowprio;
1309 break;
1310 case TC_CBQ_OVL_DROP:
1311 cl->overlimit = cbq_ovl_drop;
1312 break;
1313 case TC_CBQ_OVL_RCLASSIC:
1314 cl->overlimit = cbq_ovl_rclassic;
1315 break;
1316 default:
1317 return -EINVAL;
1318 }
Patrick McHardy1a13cb62007-03-16 01:22:20 -07001319 cl->penalty = ovl->penalty;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001320 return 0;
1321}
1322
Patrick McHardyc3bc7cf2007-07-15 00:03:05 -07001323#ifdef CONFIG_NET_CLS_ACT
Linus Torvalds1da177e2005-04-16 15:20:36 -07001324static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p)
1325{
1326 cl->police = p->police;
1327
1328 if (cl->q->handle) {
1329 if (p->police == TC_POLICE_RECLASSIFY)
1330 cl->q->reshape_fail = cbq_reshape_fail;
1331 else
1332 cl->q->reshape_fail = NULL;
1333 }
1334 return 0;
1335}
1336#endif
1337
1338static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
1339{
1340 cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
1341 return 0;
1342}
1343
Patrick McHardy27a34212008-01-23 20:35:39 -08001344static const struct nla_policy cbq_policy[TCA_CBQ_MAX + 1] = {
1345 [TCA_CBQ_LSSOPT] = { .len = sizeof(struct tc_cbq_lssopt) },
1346 [TCA_CBQ_WRROPT] = { .len = sizeof(struct tc_cbq_wrropt) },
1347 [TCA_CBQ_FOPT] = { .len = sizeof(struct tc_cbq_fopt) },
1348 [TCA_CBQ_OVL_STRATEGY] = { .len = sizeof(struct tc_cbq_ovl) },
1349 [TCA_CBQ_RATE] = { .len = sizeof(struct tc_ratespec) },
1350 [TCA_CBQ_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1351 [TCA_CBQ_POLICE] = { .len = sizeof(struct tc_cbq_police) },
1352};
1353
Patrick McHardy1e904742008-01-22 22:11:17 -08001354static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001355{
1356 struct cbq_sched_data *q = qdisc_priv(sch);
Patrick McHardy1e904742008-01-22 22:11:17 -08001357 struct nlattr *tb[TCA_CBQ_MAX + 1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001358 struct tc_ratespec *r;
Patrick McHardycee63722008-01-23 20:33:32 -08001359 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001360
Patrick McHardy27a34212008-01-23 20:35:39 -08001361 err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
Patrick McHardycee63722008-01-23 20:33:32 -08001362 if (err < 0)
1363 return err;
1364
Patrick McHardy27a34212008-01-23 20:35:39 -08001365 if (tb[TCA_CBQ_RTAB] == NULL || tb[TCA_CBQ_RATE] == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001366 return -EINVAL;
1367
Patrick McHardy1e904742008-01-22 22:11:17 -08001368 r = nla_data(tb[TCA_CBQ_RATE]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001369
Patrick McHardy1e904742008-01-22 22:11:17 -08001370 if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB])) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001371 return -EINVAL;
1372
Patrick McHardyd77fea22008-07-05 23:22:05 -07001373 err = qdisc_class_hash_init(&q->clhash);
1374 if (err < 0)
1375 goto put_rtab;
1376
Linus Torvalds1da177e2005-04-16 15:20:36 -07001377 q->link.refcnt = 1;
1378 q->link.sibling = &q->link;
Patrick McHardyd77fea22008-07-05 23:22:05 -07001379 q->link.common.classid = sch->handle;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001380 q->link.qdisc = sch;
David S. Miller5ce2d482008-07-08 17:06:30 -07001381 if (!(q->link.q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
David S. Millerbb949fb2008-07-08 16:55:56 -07001382 &pfifo_qdisc_ops,
Patrick McHardy9f9afec2006-11-29 17:35:18 -08001383 sch->handle)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001384 q->link.q = &noop_qdisc;
1385
1386 q->link.priority = TC_CBQ_MAXPRIO-1;
1387 q->link.priority2 = TC_CBQ_MAXPRIO-1;
1388 q->link.cpriority = TC_CBQ_MAXPRIO-1;
1389 q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
1390 q->link.overlimit = cbq_ovl_classic;
David S. Miller5ce2d482008-07-08 17:06:30 -07001391 q->link.allot = psched_mtu(qdisc_dev(sch));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001392 q->link.quantum = q->link.allot;
1393 q->link.weight = q->link.R_tab->rate.rate;
1394
1395 q->link.ewma_log = TC_CBQ_DEF_EWMA;
1396 q->link.avpkt = q->link.allot/2;
1397 q->link.minidle = -0x7FFFFFFF;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001398
Patrick McHardy88a99352007-03-16 01:21:11 -07001399 qdisc_watchdog_init(&q->watchdog, sch);
David S. Miller2fbd3da2009-09-01 17:59:25 -07001400 hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001401 q->delay_timer.function = cbq_undelay;
1402 q->toplevel = TC_CBQ_MAXLEVEL;
Patrick McHardy3bebcda2007-03-23 11:29:25 -07001403 q->now = psched_get_time();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001404 q->now_rt = q->now;
1405
1406 cbq_link_class(&q->link);
1407
Patrick McHardy1e904742008-01-22 22:11:17 -08001408 if (tb[TCA_CBQ_LSSOPT])
1409 cbq_set_lss(&q->link, nla_data(tb[TCA_CBQ_LSSOPT]));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001410
1411 cbq_addprio(q, &q->link);
1412 return 0;
Patrick McHardyd77fea22008-07-05 23:22:05 -07001413
1414put_rtab:
1415 qdisc_put_rtab(q->link.R_tab);
1416 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001417}
1418
1419static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
1420{
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001421 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001422
Patrick McHardy1e904742008-01-22 22:11:17 -08001423 NLA_PUT(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001424 return skb->len;
1425
Patrick McHardy1e904742008-01-22 22:11:17 -08001426nla_put_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001427 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001428 return -1;
1429}
1430
1431static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
1432{
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001433 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001434 struct tc_cbq_lssopt opt;
1435
1436 opt.flags = 0;
1437 if (cl->borrow == NULL)
1438 opt.flags |= TCF_CBQ_LSS_BOUNDED;
1439 if (cl->share == NULL)
1440 opt.flags |= TCF_CBQ_LSS_ISOLATED;
1441 opt.ewma_log = cl->ewma_log;
1442 opt.level = cl->level;
1443 opt.avpkt = cl->avpkt;
1444 opt.maxidle = cl->maxidle;
1445 opt.minidle = (u32)(-cl->minidle);
1446 opt.offtime = cl->offtime;
1447 opt.change = ~0;
Patrick McHardy1e904742008-01-22 22:11:17 -08001448 NLA_PUT(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001449 return skb->len;
1450
Patrick McHardy1e904742008-01-22 22:11:17 -08001451nla_put_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001452 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001453 return -1;
1454}
1455
1456static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
1457{
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001458 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001459 struct tc_cbq_wrropt opt;
1460
1461 opt.flags = 0;
1462 opt.allot = cl->allot;
1463 opt.priority = cl->priority+1;
1464 opt.cpriority = cl->cpriority+1;
1465 opt.weight = cl->weight;
Patrick McHardy1e904742008-01-22 22:11:17 -08001466 NLA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001467 return skb->len;
1468
Patrick McHardy1e904742008-01-22 22:11:17 -08001469nla_put_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001470 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001471 return -1;
1472}
1473
1474static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
1475{
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001476 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001477 struct tc_cbq_ovl opt;
1478
1479 opt.strategy = cl->ovl_strategy;
1480 opt.priority2 = cl->priority2+1;
Patrick McHardy8a470772005-06-28 12:56:45 -07001481 opt.pad = 0;
Patrick McHardy1a13cb62007-03-16 01:22:20 -07001482 opt.penalty = cl->penalty;
Patrick McHardy1e904742008-01-22 22:11:17 -08001483 NLA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001484 return skb->len;
1485
Patrick McHardy1e904742008-01-22 22:11:17 -08001486nla_put_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001487 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001488 return -1;
1489}
1490
1491static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
1492{
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001493 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001494 struct tc_cbq_fopt opt;
1495
1496 if (cl->split || cl->defmap) {
Patrick McHardyd77fea22008-07-05 23:22:05 -07001497 opt.split = cl->split ? cl->split->common.classid : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001498 opt.defmap = cl->defmap;
1499 opt.defchange = ~0;
Patrick McHardy1e904742008-01-22 22:11:17 -08001500 NLA_PUT(skb, TCA_CBQ_FOPT, sizeof(opt), &opt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001501 }
1502 return skb->len;
1503
Patrick McHardy1e904742008-01-22 22:11:17 -08001504nla_put_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001505 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001506 return -1;
1507}
1508
Patrick McHardyc3bc7cf2007-07-15 00:03:05 -07001509#ifdef CONFIG_NET_CLS_ACT
Linus Torvalds1da177e2005-04-16 15:20:36 -07001510static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
1511{
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001512 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001513 struct tc_cbq_police opt;
1514
1515 if (cl->police) {
1516 opt.police = cl->police;
Patrick McHardy9ef1d4c2005-06-28 12:55:30 -07001517 opt.__res1 = 0;
1518 opt.__res2 = 0;
Patrick McHardy1e904742008-01-22 22:11:17 -08001519 NLA_PUT(skb, TCA_CBQ_POLICE, sizeof(opt), &opt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001520 }
1521 return skb->len;
1522
Patrick McHardy1e904742008-01-22 22:11:17 -08001523nla_put_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001524 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001525 return -1;
1526}
1527#endif
1528
1529static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
1530{
1531 if (cbq_dump_lss(skb, cl) < 0 ||
1532 cbq_dump_rate(skb, cl) < 0 ||
1533 cbq_dump_wrr(skb, cl) < 0 ||
1534 cbq_dump_ovl(skb, cl) < 0 ||
Patrick McHardyc3bc7cf2007-07-15 00:03:05 -07001535#ifdef CONFIG_NET_CLS_ACT
Linus Torvalds1da177e2005-04-16 15:20:36 -07001536 cbq_dump_police(skb, cl) < 0 ||
1537#endif
1538 cbq_dump_fopt(skb, cl) < 0)
1539 return -1;
1540 return 0;
1541}
1542
1543static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
1544{
1545 struct cbq_sched_data *q = qdisc_priv(sch);
Patrick McHardy4b3550ef2008-01-23 20:34:11 -08001546 struct nlattr *nest;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001547
Patrick McHardy4b3550ef2008-01-23 20:34:11 -08001548 nest = nla_nest_start(skb, TCA_OPTIONS);
1549 if (nest == NULL)
1550 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001551 if (cbq_dump_attr(skb, &q->link) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001552 goto nla_put_failure;
Patrick McHardy4b3550ef2008-01-23 20:34:11 -08001553 nla_nest_end(skb, nest);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001554 return skb->len;
1555
Patrick McHardy1e904742008-01-22 22:11:17 -08001556nla_put_failure:
Patrick McHardy4b3550ef2008-01-23 20:34:11 -08001557 nla_nest_cancel(skb, nest);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001558 return -1;
1559}
1560
1561static int
1562cbq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
1563{
1564 struct cbq_sched_data *q = qdisc_priv(sch);
1565
1566 q->link.xstats.avgidle = q->link.avgidle;
1567 return gnet_stats_copy_app(d, &q->link.xstats, sizeof(q->link.xstats));
1568}
1569
1570static int
1571cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1572 struct sk_buff *skb, struct tcmsg *tcm)
1573{
1574 struct cbq_class *cl = (struct cbq_class*)arg;
Patrick McHardy4b3550ef2008-01-23 20:34:11 -08001575 struct nlattr *nest;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001576
1577 if (cl->tparent)
Patrick McHardyd77fea22008-07-05 23:22:05 -07001578 tcm->tcm_parent = cl->tparent->common.classid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001579 else
1580 tcm->tcm_parent = TC_H_ROOT;
Patrick McHardyd77fea22008-07-05 23:22:05 -07001581 tcm->tcm_handle = cl->common.classid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001582 tcm->tcm_info = cl->q->handle;
1583
Patrick McHardy4b3550ef2008-01-23 20:34:11 -08001584 nest = nla_nest_start(skb, TCA_OPTIONS);
1585 if (nest == NULL)
1586 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001587 if (cbq_dump_attr(skb, cl) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001588 goto nla_put_failure;
Patrick McHardy4b3550ef2008-01-23 20:34:11 -08001589 nla_nest_end(skb, nest);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001590 return skb->len;
1591
Patrick McHardy1e904742008-01-22 22:11:17 -08001592nla_put_failure:
Patrick McHardy4b3550ef2008-01-23 20:34:11 -08001593 nla_nest_cancel(skb, nest);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001594 return -1;
1595}
1596
1597static int
1598cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
1599 struct gnet_dump *d)
1600{
1601 struct cbq_sched_data *q = qdisc_priv(sch);
1602 struct cbq_class *cl = (struct cbq_class*)arg;
1603
1604 cl->qstats.qlen = cl->q->q.qlen;
1605 cl->xstats.avgidle = cl->avgidle;
1606 cl->xstats.undertime = 0;
1607
Patrick McHardya0849802007-03-23 11:28:30 -07001608 if (cl->undertime != PSCHED_PASTPERFECT)
Patrick McHardy8edc0c32007-03-23 11:28:55 -07001609 cl->xstats.undertime = cl->undertime - q->now;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001610
1611 if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07001612 gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07001613 gnet_stats_copy_queue(d, &cl->qstats) < 0)
1614 return -1;
1615
1616 return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1617}
1618
1619static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1620 struct Qdisc **old)
1621{
1622 struct cbq_class *cl = (struct cbq_class*)arg;
1623
Patrick McHardy5b9a9cc2009-09-04 06:41:17 +00001624 if (new == NULL) {
1625 new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
1626 &pfifo_qdisc_ops, cl->common.classid);
1627 if (new == NULL)
1628 return -ENOBUFS;
1629 } else {
Patrick McHardyc3bc7cf2007-07-15 00:03:05 -07001630#ifdef CONFIG_NET_CLS_ACT
Patrick McHardy5b9a9cc2009-09-04 06:41:17 +00001631 if (cl->police == TC_POLICE_RECLASSIFY)
1632 new->reshape_fail = cbq_reshape_fail;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001633#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001634 }
Patrick McHardy5b9a9cc2009-09-04 06:41:17 +00001635 sch_tree_lock(sch);
1636 *old = cl->q;
1637 cl->q = new;
1638 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
1639 qdisc_reset(*old);
1640 sch_tree_unlock(sch);
1641
1642 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001643}
1644
1645static struct Qdisc *
1646cbq_leaf(struct Qdisc *sch, unsigned long arg)
1647{
1648 struct cbq_class *cl = (struct cbq_class*)arg;
1649
Patrick McHardy5b9a9cc2009-09-04 06:41:17 +00001650 return cl->q;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001651}
1652
Jarek Poplawskia37ef2e2006-12-08 00:25:55 -08001653static void cbq_qlen_notify(struct Qdisc *sch, unsigned long arg)
1654{
1655 struct cbq_class *cl = (struct cbq_class *)arg;
1656
1657 if (cl->q->q.qlen == 0)
1658 cbq_deactivate_class(cl);
1659}
1660
Linus Torvalds1da177e2005-04-16 15:20:36 -07001661static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
1662{
1663 struct cbq_sched_data *q = qdisc_priv(sch);
1664 struct cbq_class *cl = cbq_class_lookup(q, classid);
1665
1666 if (cl) {
1667 cl->refcnt++;
1668 return (unsigned long)cl;
1669 }
1670 return 0;
1671}
1672
Linus Torvalds1da177e2005-04-16 15:20:36 -07001673static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
1674{
1675 struct cbq_sched_data *q = qdisc_priv(sch);
1676
Ilpo Järvinen547b7922008-07-25 21:43:18 -07001677 WARN_ON(cl->filters);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001678
Patrick McHardyff31ab52008-07-01 19:52:38 -07001679 tcf_destroy_chain(&cl->filter_list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001680 qdisc_destroy(cl->q);
1681 qdisc_put_rtab(cl->R_tab);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001682 gen_kill_estimator(&cl->bstats, &cl->rate_est);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001683 if (cl != &q->link)
1684 kfree(cl);
1685}
1686
1687static void
1688cbq_destroy(struct Qdisc* sch)
1689{
1690 struct cbq_sched_data *q = qdisc_priv(sch);
Patrick McHardyd77fea22008-07-05 23:22:05 -07001691 struct hlist_node *n, *next;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001692 struct cbq_class *cl;
1693 unsigned h;
1694
Patrick McHardyc3bc7cf2007-07-15 00:03:05 -07001695#ifdef CONFIG_NET_CLS_ACT
Linus Torvalds1da177e2005-04-16 15:20:36 -07001696 q->rx_class = NULL;
1697#endif
1698 /*
1699 * Filters must be destroyed first because we don't destroy the
1700 * classes from root to leafs which means that filters can still
1701 * be bound to classes which have been destroyed already. --TGR '04
1702 */
Patrick McHardyd77fea22008-07-05 23:22:05 -07001703 for (h = 0; h < q->clhash.hashsize; h++) {
1704 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode)
Patrick McHardyff31ab52008-07-01 19:52:38 -07001705 tcf_destroy_chain(&cl->filter_list);
Patrick McHardyb00b4bf2007-06-05 16:06:59 -07001706 }
Patrick McHardyd77fea22008-07-05 23:22:05 -07001707 for (h = 0; h < q->clhash.hashsize; h++) {
1708 hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[h],
1709 common.hnode)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001710 cbq_destroy_class(sch, cl);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001711 }
Patrick McHardyd77fea22008-07-05 23:22:05 -07001712 qdisc_class_hash_destroy(&q->clhash);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001713}
1714
1715static void cbq_put(struct Qdisc *sch, unsigned long arg)
1716{
1717 struct cbq_class *cl = (struct cbq_class*)arg;
1718
1719 if (--cl->refcnt == 0) {
Patrick McHardyc3bc7cf2007-07-15 00:03:05 -07001720#ifdef CONFIG_NET_CLS_ACT
Jarek Poplawski102396a2008-08-29 14:21:52 -07001721 spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001722 struct cbq_sched_data *q = qdisc_priv(sch);
1723
David S. Miller7698b4f2008-07-16 01:42:40 -07001724 spin_lock_bh(root_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001725 if (q->rx_class == cl)
1726 q->rx_class = NULL;
David S. Miller7698b4f2008-07-16 01:42:40 -07001727 spin_unlock_bh(root_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001728#endif
1729
1730 cbq_destroy_class(sch, cl);
1731 }
1732}
1733
1734static int
Patrick McHardy1e904742008-01-22 22:11:17 -08001735cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **tca,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001736 unsigned long *arg)
1737{
1738 int err;
1739 struct cbq_sched_data *q = qdisc_priv(sch);
1740 struct cbq_class *cl = (struct cbq_class*)*arg;
Patrick McHardy1e904742008-01-22 22:11:17 -08001741 struct nlattr *opt = tca[TCA_OPTIONS];
1742 struct nlattr *tb[TCA_CBQ_MAX + 1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001743 struct cbq_class *parent;
1744 struct qdisc_rate_table *rtab = NULL;
1745
Patrick McHardycee63722008-01-23 20:33:32 -08001746 if (opt == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001747 return -EINVAL;
1748
Patrick McHardy27a34212008-01-23 20:35:39 -08001749 err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
Patrick McHardycee63722008-01-23 20:33:32 -08001750 if (err < 0)
1751 return err;
1752
Linus Torvalds1da177e2005-04-16 15:20:36 -07001753 if (cl) {
1754 /* Check parent */
1755 if (parentid) {
Patrick McHardyd77fea22008-07-05 23:22:05 -07001756 if (cl->tparent &&
1757 cl->tparent->common.classid != parentid)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001758 return -EINVAL;
1759 if (!cl->tparent && parentid != TC_H_ROOT)
1760 return -EINVAL;
1761 }
1762
Patrick McHardy1e904742008-01-22 22:11:17 -08001763 if (tb[TCA_CBQ_RATE]) {
Stephen Hemminger71bcb092008-11-25 21:13:31 -08001764 rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]),
1765 tb[TCA_CBQ_RTAB]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001766 if (rtab == NULL)
1767 return -EINVAL;
1768 }
1769
Stephen Hemminger71bcb092008-11-25 21:13:31 -08001770 if (tca[TCA_RATE]) {
1771 err = gen_replace_estimator(&cl->bstats, &cl->rate_est,
1772 qdisc_root_sleeping_lock(sch),
1773 tca[TCA_RATE]);
1774 if (err) {
1775 if (rtab)
1776 qdisc_put_rtab(rtab);
1777 return err;
1778 }
1779 }
1780
Linus Torvalds1da177e2005-04-16 15:20:36 -07001781 /* Change class parameters */
1782 sch_tree_lock(sch);
1783
1784 if (cl->next_alive != NULL)
1785 cbq_deactivate_class(cl);
1786
1787 if (rtab) {
Patrick McHardyb94c8af2008-11-20 04:11:36 -08001788 qdisc_put_rtab(cl->R_tab);
1789 cl->R_tab = rtab;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001790 }
1791
Patrick McHardy1e904742008-01-22 22:11:17 -08001792 if (tb[TCA_CBQ_LSSOPT])
1793 cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001794
Patrick McHardy1e904742008-01-22 22:11:17 -08001795 if (tb[TCA_CBQ_WRROPT]) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001796 cbq_rmprio(q, cl);
Patrick McHardy1e904742008-01-22 22:11:17 -08001797 cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001798 }
1799
Patrick McHardy1e904742008-01-22 22:11:17 -08001800 if (tb[TCA_CBQ_OVL_STRATEGY])
1801 cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001802
Patrick McHardyc3bc7cf2007-07-15 00:03:05 -07001803#ifdef CONFIG_NET_CLS_ACT
Patrick McHardy1e904742008-01-22 22:11:17 -08001804 if (tb[TCA_CBQ_POLICE])
1805 cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001806#endif
1807
Patrick McHardy1e904742008-01-22 22:11:17 -08001808 if (tb[TCA_CBQ_FOPT])
1809 cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001810
1811 if (cl->q->q.qlen)
1812 cbq_activate_class(cl);
1813
1814 sch_tree_unlock(sch);
1815
Linus Torvalds1da177e2005-04-16 15:20:36 -07001816 return 0;
1817 }
1818
1819 if (parentid == TC_H_ROOT)
1820 return -EINVAL;
1821
Patrick McHardy1e904742008-01-22 22:11:17 -08001822 if (tb[TCA_CBQ_WRROPT] == NULL || tb[TCA_CBQ_RATE] == NULL ||
1823 tb[TCA_CBQ_LSSOPT] == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001824 return -EINVAL;
1825
Patrick McHardy1e904742008-01-22 22:11:17 -08001826 rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001827 if (rtab == NULL)
1828 return -EINVAL;
1829
1830 if (classid) {
1831 err = -EINVAL;
1832 if (TC_H_MAJ(classid^sch->handle) || cbq_class_lookup(q, classid))
1833 goto failure;
1834 } else {
1835 int i;
1836 classid = TC_H_MAKE(sch->handle,0x8000);
1837
1838 for (i=0; i<0x8000; i++) {
1839 if (++q->hgenerator >= 0x8000)
1840 q->hgenerator = 1;
1841 if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
1842 break;
1843 }
1844 err = -ENOSR;
1845 if (i >= 0x8000)
1846 goto failure;
1847 classid = classid|q->hgenerator;
1848 }
1849
1850 parent = &q->link;
1851 if (parentid) {
1852 parent = cbq_class_lookup(q, parentid);
1853 err = -EINVAL;
1854 if (parent == NULL)
1855 goto failure;
1856 }
1857
1858 err = -ENOBUFS;
Panagiotis Issaris0da974f2006-07-21 14:51:30 -07001859 cl = kzalloc(sizeof(*cl), GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001860 if (cl == NULL)
1861 goto failure;
Stephen Hemminger71bcb092008-11-25 21:13:31 -08001862
1863 if (tca[TCA_RATE]) {
1864 err = gen_new_estimator(&cl->bstats, &cl->rate_est,
1865 qdisc_root_sleeping_lock(sch),
1866 tca[TCA_RATE]);
1867 if (err) {
1868 kfree(cl);
1869 goto failure;
1870 }
1871 }
1872
Linus Torvalds1da177e2005-04-16 15:20:36 -07001873 cl->R_tab = rtab;
1874 rtab = NULL;
1875 cl->refcnt = 1;
David S. Miller5ce2d482008-07-08 17:06:30 -07001876 if (!(cl->q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
David S. Millerbb949fb2008-07-08 16:55:56 -07001877 &pfifo_qdisc_ops, classid)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001878 cl->q = &noop_qdisc;
Patrick McHardyd77fea22008-07-05 23:22:05 -07001879 cl->common.classid = classid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001880 cl->tparent = parent;
1881 cl->qdisc = sch;
1882 cl->allot = parent->allot;
1883 cl->quantum = cl->allot;
1884 cl->weight = cl->R_tab->rate.rate;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001885
1886 sch_tree_lock(sch);
1887 cbq_link_class(cl);
1888 cl->borrow = cl->tparent;
1889 if (cl->tparent != &q->link)
1890 cl->share = cl->tparent;
1891 cbq_adjust_levels(parent);
1892 cl->minidle = -0x7FFFFFFF;
Patrick McHardy1e904742008-01-22 22:11:17 -08001893 cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1894 cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001895 if (cl->ewma_log==0)
1896 cl->ewma_log = q->link.ewma_log;
1897 if (cl->maxidle==0)
1898 cl->maxidle = q->link.maxidle;
1899 if (cl->avpkt==0)
1900 cl->avpkt = q->link.avpkt;
1901 cl->overlimit = cbq_ovl_classic;
Patrick McHardy1e904742008-01-22 22:11:17 -08001902 if (tb[TCA_CBQ_OVL_STRATEGY])
1903 cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
Patrick McHardyc3bc7cf2007-07-15 00:03:05 -07001904#ifdef CONFIG_NET_CLS_ACT
Patrick McHardy1e904742008-01-22 22:11:17 -08001905 if (tb[TCA_CBQ_POLICE])
1906 cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001907#endif
Patrick McHardy1e904742008-01-22 22:11:17 -08001908 if (tb[TCA_CBQ_FOPT])
1909 cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001910 sch_tree_unlock(sch);
1911
Patrick McHardyd77fea22008-07-05 23:22:05 -07001912 qdisc_class_hash_grow(sch, &q->clhash);
1913
Linus Torvalds1da177e2005-04-16 15:20:36 -07001914 *arg = (unsigned long)cl;
1915 return 0;
1916
1917failure:
1918 qdisc_put_rtab(rtab);
1919 return err;
1920}
1921
1922static int cbq_delete(struct Qdisc *sch, unsigned long arg)
1923{
1924 struct cbq_sched_data *q = qdisc_priv(sch);
1925 struct cbq_class *cl = (struct cbq_class*)arg;
Jarek Poplawskia37ef2e2006-12-08 00:25:55 -08001926 unsigned int qlen;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001927
1928 if (cl->filters || cl->children || cl == &q->link)
1929 return -EBUSY;
1930
1931 sch_tree_lock(sch);
1932
Jarek Poplawskia37ef2e2006-12-08 00:25:55 -08001933 qlen = cl->q->q.qlen;
1934 qdisc_reset(cl->q);
1935 qdisc_tree_decrease_qlen(cl->q, qlen);
1936
Linus Torvalds1da177e2005-04-16 15:20:36 -07001937 if (cl->next_alive)
1938 cbq_deactivate_class(cl);
1939
1940 if (q->tx_borrowed == cl)
1941 q->tx_borrowed = q->tx_class;
1942 if (q->tx_class == cl) {
1943 q->tx_class = NULL;
1944 q->tx_borrowed = NULL;
1945 }
Patrick McHardyc3bc7cf2007-07-15 00:03:05 -07001946#ifdef CONFIG_NET_CLS_ACT
Linus Torvalds1da177e2005-04-16 15:20:36 -07001947 if (q->rx_class == cl)
1948 q->rx_class = NULL;
1949#endif
1950
1951 cbq_unlink_class(cl);
1952 cbq_adjust_levels(cl->tparent);
1953 cl->defmap = 0;
1954 cbq_sync_defmap(cl);
1955
1956 cbq_rmprio(q, cl);
1957 sch_tree_unlock(sch);
1958
Jarek Poplawski7cd0a632009-03-15 20:00:19 -07001959 BUG_ON(--cl->refcnt == 0);
1960 /*
1961 * This shouldn't happen: we "hold" one cops->get() when called
1962 * from tc_ctl_tclass; the destroy method is done from cops->put().
1963 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001964
1965 return 0;
1966}
1967
1968static struct tcf_proto **cbq_find_tcf(struct Qdisc *sch, unsigned long arg)
1969{
1970 struct cbq_sched_data *q = qdisc_priv(sch);
1971 struct cbq_class *cl = (struct cbq_class *)arg;
1972
1973 if (cl == NULL)
1974 cl = &q->link;
1975
1976 return &cl->filter_list;
1977}
1978
1979static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
1980 u32 classid)
1981{
1982 struct cbq_sched_data *q = qdisc_priv(sch);
1983 struct cbq_class *p = (struct cbq_class*)parent;
1984 struct cbq_class *cl = cbq_class_lookup(q, classid);
1985
1986 if (cl) {
1987 if (p && p->level <= cl->level)
1988 return 0;
1989 cl->filters++;
1990 return (unsigned long)cl;
1991 }
1992 return 0;
1993}
1994
1995static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
1996{
1997 struct cbq_class *cl = (struct cbq_class*)arg;
1998
1999 cl->filters--;
2000}
2001
2002static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2003{
2004 struct cbq_sched_data *q = qdisc_priv(sch);
Patrick McHardyd77fea22008-07-05 23:22:05 -07002005 struct cbq_class *cl;
2006 struct hlist_node *n;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002007 unsigned h;
2008
2009 if (arg->stop)
2010 return;
2011
Patrick McHardyd77fea22008-07-05 23:22:05 -07002012 for (h = 0; h < q->clhash.hashsize; h++) {
2013 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002014 if (arg->count < arg->skip) {
2015 arg->count++;
2016 continue;
2017 }
2018 if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2019 arg->stop = 1;
2020 return;
2021 }
2022 arg->count++;
2023 }
2024 }
2025}
2026
Eric Dumazet20fea082007-11-14 01:44:41 -08002027static const struct Qdisc_class_ops cbq_class_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002028 .graft = cbq_graft,
2029 .leaf = cbq_leaf,
Jarek Poplawskia37ef2e2006-12-08 00:25:55 -08002030 .qlen_notify = cbq_qlen_notify,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002031 .get = cbq_get,
2032 .put = cbq_put,
2033 .change = cbq_change_class,
2034 .delete = cbq_delete,
2035 .walk = cbq_walk,
2036 .tcf_chain = cbq_find_tcf,
2037 .bind_tcf = cbq_bind_filter,
2038 .unbind_tcf = cbq_unbind_filter,
2039 .dump = cbq_dump_class,
2040 .dump_stats = cbq_dump_class_stats,
2041};
2042
Eric Dumazet20fea082007-11-14 01:44:41 -08002043static struct Qdisc_ops cbq_qdisc_ops __read_mostly = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002044 .next = NULL,
2045 .cl_ops = &cbq_class_ops,
2046 .id = "cbq",
2047 .priv_size = sizeof(struct cbq_sched_data),
2048 .enqueue = cbq_enqueue,
2049 .dequeue = cbq_dequeue,
Jarek Poplawski77be1552008-10-31 00:47:01 -07002050 .peek = qdisc_peek_dequeued,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002051 .drop = cbq_drop,
2052 .init = cbq_init,
2053 .reset = cbq_reset,
2054 .destroy = cbq_destroy,
2055 .change = NULL,
2056 .dump = cbq_dump,
2057 .dump_stats = cbq_dump_stats,
2058 .owner = THIS_MODULE,
2059};
2060
2061static int __init cbq_module_init(void)
2062{
2063 return register_qdisc(&cbq_qdisc_ops);
2064}
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09002065static void __exit cbq_module_exit(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002066{
2067 unregister_qdisc(&cbq_qdisc_ops);
2068}
2069module_init(cbq_module_init)
2070module_exit(cbq_module_exit)
2071MODULE_LICENSE("GPL");