blob: 550be9504fff9c1066804b5faf9e8f85f748f7f7 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * net/sched/sch_cbq.c Class-Based Queueing discipline.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 *
11 */
12
Linus Torvalds1da177e2005-04-16 15:20:36 -070013#include <linux/module.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090014#include <linux/slab.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070015#include <linux/types.h>
16#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070017#include <linux/string.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070018#include <linux/errno.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070019#include <linux/skbuff.h>
Patrick McHardy0ba48052007-07-02 22:49:07 -070020#include <net/netlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070021#include <net/pkt_sched.h>
22
23
24/* Class-Based Queueing (CBQ) algorithm.
25 =======================================
26
27 Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +090028 Management Models for Packet Networks",
Linus Torvalds1da177e2005-04-16 15:20:36 -070029 IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
30
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +090031 [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
Linus Torvalds1da177e2005-04-16 15:20:36 -070032
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +090033 [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
Linus Torvalds1da177e2005-04-16 15:20:36 -070034 Parameters", 1996
35
36 [4] Sally Floyd and Michael Speer, "Experimental Results
37 for Class-Based Queueing", 1998, not published.
38
39 -----------------------------------------------------------------------
40
41 Algorithm skeleton was taken from NS simulator cbq.cc.
42 If someone wants to check this code against the LBL version,
43 he should take into account that ONLY the skeleton was borrowed,
44 the implementation is different. Particularly:
45
46 --- The WRR algorithm is different. Our version looks more
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +090047 reasonable (I hope) and works when quanta are allowed to be
48 less than MTU, which is always the case when real time classes
49 have small rates. Note, that the statement of [3] is
50 incomplete, delay may actually be estimated even if class
51 per-round allotment is less than MTU. Namely, if per-round
52 allotment is W*r_i, and r_1+...+r_k = r < 1
Linus Torvalds1da177e2005-04-16 15:20:36 -070053
54 delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
55
56 In the worst case we have IntServ estimate with D = W*r+k*MTU
57 and C = MTU*r. The proof (if correct at all) is trivial.
58
59
60 --- It seems that cbq-2.0 is not very accurate. At least, I cannot
61 interpret some places, which look like wrong translations
62 from NS. Anyone is advised to find these differences
63 and explain to me, why I am wrong 8).
64
65 --- Linux has no EOI event, so that we cannot estimate true class
66 idle time. Workaround is to consider the next dequeue event
67 as sign that previous packet is finished. This is wrong because of
68 internal device queueing, but on a permanently loaded link it is true.
69 Moreover, combined with clock integrator, this scheme looks
70 very close to an ideal solution. */
71
72struct cbq_sched_data;
73
74
Eric Dumazetcc7ec452011-01-19 19:26:56 +000075struct cbq_class {
Patrick McHardyd77fea22008-07-05 23:22:05 -070076 struct Qdisc_class_common common;
Linus Torvalds1da177e2005-04-16 15:20:36 -070077 struct cbq_class *next_alive; /* next class with backlog in this priority band */
78
79/* Parameters */
Linus Torvalds1da177e2005-04-16 15:20:36 -070080 unsigned char priority; /* class priority */
81 unsigned char priority2; /* priority to be used after overlimit */
82 unsigned char ewma_log; /* time constant for idle time calculation */
83 unsigned char ovl_strategy;
Patrick McHardyc3bc7cf2007-07-15 00:03:05 -070084#ifdef CONFIG_NET_CLS_ACT
Linus Torvalds1da177e2005-04-16 15:20:36 -070085 unsigned char police;
86#endif
87
88 u32 defmap;
89
90 /* Link-sharing scheduler parameters */
91 long maxidle; /* Class parameters: see below. */
92 long offtime;
93 long minidle;
94 u32 avpkt;
95 struct qdisc_rate_table *R_tab;
96
97 /* Overlimit strategy parameters */
98 void (*overlimit)(struct cbq_class *cl);
Patrick McHardy1a13cb62007-03-16 01:22:20 -070099 psched_tdiff_t penalty;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700100
101 /* General scheduler (WRR) parameters */
102 long allot;
103 long quantum; /* Allotment per WRR round */
104 long weight; /* Relative allotment: see below */
105
106 struct Qdisc *qdisc; /* Ptr to CBQ discipline */
107 struct cbq_class *split; /* Ptr to split node */
108 struct cbq_class *share; /* Ptr to LS parent in the class tree */
109 struct cbq_class *tparent; /* Ptr to tree parent in the class tree */
110 struct cbq_class *borrow; /* NULL if class is bandwidth limited;
111 parent otherwise */
112 struct cbq_class *sibling; /* Sibling chain */
113 struct cbq_class *children; /* Pointer to children chain */
114
115 struct Qdisc *q; /* Elementary queueing discipline */
116
117
118/* Variables */
119 unsigned char cpriority; /* Effective priority */
120 unsigned char delayed;
121 unsigned char level; /* level of the class in hierarchy:
122 0 for leaf classes, and maximal
123 level of children + 1 for nodes.
124 */
125
126 psched_time_t last; /* Last end of service */
127 psched_time_t undertime;
128 long avgidle;
129 long deficit; /* Saved deficit for WRR */
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700130 psched_time_t penalized;
Eric Dumazetc1a8f1f2009-08-16 09:36:49 +0000131 struct gnet_stats_basic_packed bstats;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700132 struct gnet_stats_queue qstats;
Eric Dumazet45203a32013-06-06 08:43:22 -0700133 struct gnet_stats_rate_est64 rate_est;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700134 struct tc_cbq_xstats xstats;
135
136 struct tcf_proto *filter_list;
137
138 int refcnt;
139 int filters;
140
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000141 struct cbq_class *defaults[TC_PRIO_MAX + 1];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700142};
143
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000144struct cbq_sched_data {
Patrick McHardyd77fea22008-07-05 23:22:05 -0700145 struct Qdisc_class_hash clhash; /* Hash table of all classes */
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000146 int nclasses[TC_CBQ_MAXPRIO + 1];
147 unsigned int quanta[TC_CBQ_MAXPRIO + 1];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700148
149 struct cbq_class link;
150
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000151 unsigned int activemask;
152 struct cbq_class *active[TC_CBQ_MAXPRIO + 1]; /* List of all classes
Linus Torvalds1da177e2005-04-16 15:20:36 -0700153 with backlog */
154
Patrick McHardyc3bc7cf2007-07-15 00:03:05 -0700155#ifdef CONFIG_NET_CLS_ACT
Linus Torvalds1da177e2005-04-16 15:20:36 -0700156 struct cbq_class *rx_class;
157#endif
158 struct cbq_class *tx_class;
159 struct cbq_class *tx_borrowed;
160 int tx_len;
161 psched_time_t now; /* Cached timestamp */
162 psched_time_t now_rt; /* Cached real time */
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000163 unsigned int pmask;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700164
David S. Miller2fbd3da2009-09-01 17:59:25 -0700165 struct hrtimer delay_timer;
Patrick McHardy88a99352007-03-16 01:21:11 -0700166 struct qdisc_watchdog watchdog; /* Watchdog timer,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700167 started when CBQ has
168 backlog, but cannot
169 transmit just now */
Patrick McHardy88a99352007-03-16 01:21:11 -0700170 psched_tdiff_t wd_expires;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700171 int toplevel;
172 u32 hgenerator;
173};
174
175
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000176#define L2T(cl, len) qdisc_l2t((cl)->R_tab, len)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700177
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000178static inline struct cbq_class *
Linus Torvalds1da177e2005-04-16 15:20:36 -0700179cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
180{
Patrick McHardyd77fea22008-07-05 23:22:05 -0700181 struct Qdisc_class_common *clc;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700182
Patrick McHardyd77fea22008-07-05 23:22:05 -0700183 clc = qdisc_class_find(&q->clhash, classid);
184 if (clc == NULL)
185 return NULL;
186 return container_of(clc, struct cbq_class, common);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700187}
188
Patrick McHardyc3bc7cf2007-07-15 00:03:05 -0700189#ifdef CONFIG_NET_CLS_ACT
Linus Torvalds1da177e2005-04-16 15:20:36 -0700190
191static struct cbq_class *
192cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
193{
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000194 struct cbq_class *cl;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700195
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000196 for (cl = this->tparent; cl; cl = cl->tparent) {
197 struct cbq_class *new = cl->defaults[TC_PRIO_BESTEFFORT];
198
199 if (new != NULL && new != this)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700200 return new;
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000201 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700202 return NULL;
203}
204
205#endif
206
207/* Classify packet. The procedure is pretty complicated, but
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000208 * it allows us to combine link sharing and priority scheduling
209 * transparently.
210 *
211 * Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
212 * so that it resolves to split nodes. Then packets are classified
213 * by logical priority, or a more specific classifier may be attached
214 * to the split node.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700215 */
216
217static struct cbq_class *
218cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
219{
220 struct cbq_sched_data *q = qdisc_priv(sch);
221 struct cbq_class *head = &q->link;
222 struct cbq_class **defmap;
223 struct cbq_class *cl = NULL;
224 u32 prio = skb->priority;
225 struct tcf_result res;
226
227 /*
228 * Step 1. If skb->priority points to one of our classes, use it.
229 */
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000230 if (TC_H_MAJ(prio ^ sch->handle) == 0 &&
Linus Torvalds1da177e2005-04-16 15:20:36 -0700231 (cl = cbq_class_lookup(q, prio)) != NULL)
232 return cl;
233
Jarek Poplawskic27f3392008-08-04 22:39:11 -0700234 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700235 for (;;) {
236 int result = 0;
237 defmap = head->defaults;
238
239 /*
240 * Step 2+n. Apply classifier.
241 */
Patrick McHardy73ca4912007-07-15 00:02:31 -0700242 if (!head->filter_list ||
243 (result = tc_classify_compat(skb, head->filter_list, &res)) < 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700244 goto fallback;
245
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000246 cl = (void *)res.class;
247 if (!cl) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700248 if (TC_H_MAJ(res.classid))
249 cl = cbq_class_lookup(q, res.classid);
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000250 else if ((cl = defmap[res.classid & TC_PRIO_MAX]) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700251 cl = defmap[TC_PRIO_BESTEFFORT];
252
Eric Dumazetbdfc87f2012-09-11 13:11:12 +0000253 if (cl == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700254 goto fallback;
255 }
Eric Dumazetbdfc87f2012-09-11 13:11:12 +0000256 if (cl->level >= head->level)
257 goto fallback;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700258#ifdef CONFIG_NET_CLS_ACT
259 switch (result) {
260 case TC_ACT_QUEUED:
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900261 case TC_ACT_STOLEN:
Jarek Poplawski378a2f02008-08-04 22:31:03 -0700262 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700263 case TC_ACT_SHOT:
264 return NULL;
Patrick McHardy73ca4912007-07-15 00:02:31 -0700265 case TC_ACT_RECLASSIFY:
266 return cbq_reclassify(skb, cl);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700267 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700268#endif
269 if (cl->level == 0)
270 return cl;
271
272 /*
273 * Step 3+n. If classifier selected a link sharing class,
274 * apply agency specific classifier.
275 * Repeat this procdure until we hit a leaf node.
276 */
277 head = cl;
278 }
279
280fallback:
281 cl = head;
282
283 /*
284 * Step 4. No success...
285 */
286 if (TC_H_MAJ(prio) == 0 &&
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000287 !(cl = head->defaults[prio & TC_PRIO_MAX]) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -0700288 !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
289 return head;
290
291 return cl;
292}
293
294/*
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000295 * A packet has just been enqueued on the empty class.
296 * cbq_activate_class adds it to the tail of active class list
297 * of its priority band.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700298 */
299
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000300static inline void cbq_activate_class(struct cbq_class *cl)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700301{
302 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
303 int prio = cl->cpriority;
304 struct cbq_class *cl_tail;
305
306 cl_tail = q->active[prio];
307 q->active[prio] = cl;
308
309 if (cl_tail != NULL) {
310 cl->next_alive = cl_tail->next_alive;
311 cl_tail->next_alive = cl;
312 } else {
313 cl->next_alive = cl;
314 q->activemask |= (1<<prio);
315 }
316}
317
318/*
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000319 * Unlink class from active chain.
320 * Note that this same procedure is done directly in cbq_dequeue*
321 * during round-robin procedure.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700322 */
323
324static void cbq_deactivate_class(struct cbq_class *this)
325{
326 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
327 int prio = this->cpriority;
328 struct cbq_class *cl;
329 struct cbq_class *cl_prev = q->active[prio];
330
331 do {
332 cl = cl_prev->next_alive;
333 if (cl == this) {
334 cl_prev->next_alive = cl->next_alive;
335 cl->next_alive = NULL;
336
337 if (cl == q->active[prio]) {
338 q->active[prio] = cl_prev;
339 if (cl == q->active[prio]) {
340 q->active[prio] = NULL;
341 q->activemask &= ~(1<<prio);
342 return;
343 }
344 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700345 return;
346 }
347 } while ((cl_prev = cl) != q->active[prio]);
348}
349
350static void
351cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
352{
353 int toplevel = q->toplevel;
354
Eric Dumazetfd245a42011-01-20 05:27:16 +0000355 if (toplevel > cl->level && !(qdisc_is_throttled(cl->q))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700356 psched_time_t now;
357 psched_tdiff_t incr;
358
Patrick McHardy3bebcda2007-03-23 11:29:25 -0700359 now = psched_get_time();
Patrick McHardy8edc0c32007-03-23 11:28:55 -0700360 incr = now - q->now_rt;
Patrick McHardy7c59e252007-03-23 11:27:45 -0700361 now = q->now + incr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700362
363 do {
Patrick McHardy104e0872007-03-23 11:28:07 -0700364 if (cl->undertime < now) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700365 q->toplevel = cl->level;
366 return;
367 }
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000368 } while ((cl = cl->borrow) != NULL && toplevel > cl->level);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700369 }
370}
371
372static int
373cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
374{
375 struct cbq_sched_data *q = qdisc_priv(sch);
Satyam Sharmaddeee3c2007-09-16 14:54:05 -0700376 int uninitialized_var(ret);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700377 struct cbq_class *cl = cbq_classify(skb, sch, &ret);
378
Patrick McHardyc3bc7cf2007-07-15 00:03:05 -0700379#ifdef CONFIG_NET_CLS_ACT
Linus Torvalds1da177e2005-04-16 15:20:36 -0700380 q->rx_class = cl;
381#endif
382 if (cl == NULL) {
Jarek Poplawskic27f3392008-08-04 22:39:11 -0700383 if (ret & __NET_XMIT_BYPASS)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700384 sch->qstats.drops++;
385 kfree_skb(skb);
386 return ret;
387 }
388
Patrick McHardyc3bc7cf2007-07-15 00:03:05 -0700389#ifdef CONFIG_NET_CLS_ACT
Linus Torvalds1da177e2005-04-16 15:20:36 -0700390 cl->q->__parent = sch;
391#endif
Jussi Kivilinna5f861732008-07-20 00:08:04 -0700392 ret = qdisc_enqueue(skb, cl->q);
393 if (ret == NET_XMIT_SUCCESS) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700394 sch->q.qlen++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700395 cbq_mark_toplevel(q, cl);
396 if (!cl->next_alive)
397 cbq_activate_class(cl);
398 return ret;
399 }
400
Jarek Poplawski378a2f02008-08-04 22:31:03 -0700401 if (net_xmit_drop_count(ret)) {
402 sch->qstats.drops++;
403 cbq_mark_toplevel(q, cl);
404 cl->qstats.drops++;
405 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700406 return ret;
407}
408
Linus Torvalds1da177e2005-04-16 15:20:36 -0700409/* Overlimit actions */
410
411/* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
412
413static void cbq_ovl_classic(struct cbq_class *cl)
414{
415 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
Patrick McHardy8edc0c32007-03-23 11:28:55 -0700416 psched_tdiff_t delay = cl->undertime - q->now;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700417
418 if (!cl->delayed) {
419 delay += cl->offtime;
420
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900421 /*
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000422 * Class goes to sleep, so that it will have no
423 * chance to work avgidle. Let's forgive it 8)
424 *
425 * BTW cbq-2.0 has a crap in this
426 * place, apparently they forgot to shift it by cl->ewma_log.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700427 */
428 if (cl->avgidle < 0)
429 delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
430 if (cl->avgidle < cl->minidle)
431 cl->avgidle = cl->minidle;
432 if (delay <= 0)
433 delay = 1;
Patrick McHardy7c59e252007-03-23 11:27:45 -0700434 cl->undertime = q->now + delay;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700435
436 cl->xstats.overactions++;
437 cl->delayed = 1;
438 }
439 if (q->wd_expires == 0 || q->wd_expires > delay)
440 q->wd_expires = delay;
441
442 /* Dirty work! We must schedule wakeups based on
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000443 * real available rate, rather than leaf rate,
444 * which may be tiny (even zero).
Linus Torvalds1da177e2005-04-16 15:20:36 -0700445 */
446 if (q->toplevel == TC_CBQ_MAXLEVEL) {
447 struct cbq_class *b;
448 psched_tdiff_t base_delay = q->wd_expires;
449
450 for (b = cl->borrow; b; b = b->borrow) {
Patrick McHardy8edc0c32007-03-23 11:28:55 -0700451 delay = b->undertime - q->now;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700452 if (delay < base_delay) {
453 if (delay <= 0)
454 delay = 1;
455 base_delay = delay;
456 }
457 }
458
459 q->wd_expires = base_delay;
460 }
461}
462
463/* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000464 * they go overlimit
Linus Torvalds1da177e2005-04-16 15:20:36 -0700465 */
466
467static void cbq_ovl_rclassic(struct cbq_class *cl)
468{
469 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
470 struct cbq_class *this = cl;
471
472 do {
473 if (cl->level > q->toplevel) {
474 cl = NULL;
475 break;
476 }
477 } while ((cl = cl->borrow) != NULL);
478
479 if (cl == NULL)
480 cl = this;
481 cbq_ovl_classic(cl);
482}
483
484/* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
485
486static void cbq_ovl_delay(struct cbq_class *cl)
487{
488 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
Patrick McHardy8edc0c32007-03-23 11:28:55 -0700489 psched_tdiff_t delay = cl->undertime - q->now;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700490
Jarek Poplawski2540e052008-08-21 05:11:14 -0700491 if (test_bit(__QDISC_STATE_DEACTIVATED,
492 &qdisc_root_sleeping(cl->qdisc)->state))
493 return;
494
Linus Torvalds1da177e2005-04-16 15:20:36 -0700495 if (!cl->delayed) {
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700496 psched_time_t sched = q->now;
497 ktime_t expires;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700498
499 delay += cl->offtime;
500 if (cl->avgidle < 0)
501 delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
502 if (cl->avgidle < cl->minidle)
503 cl->avgidle = cl->minidle;
Patrick McHardy7c59e252007-03-23 11:27:45 -0700504 cl->undertime = q->now + delay;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700505
506 if (delay > 0) {
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700507 sched += delay + cl->penalty;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700508 cl->penalized = sched;
509 cl->cpriority = TC_CBQ_MAXPRIO;
510 q->pmask |= (1<<TC_CBQ_MAXPRIO);
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700511
Eric Dumazet46baac32012-10-20 00:40:51 +0000512 expires = ns_to_ktime(PSCHED_TICKS2NS(sched));
David S. Miller2fbd3da2009-09-01 17:59:25 -0700513 if (hrtimer_try_to_cancel(&q->delay_timer) &&
514 ktime_to_ns(ktime_sub(
515 hrtimer_get_expires(&q->delay_timer),
516 expires)) > 0)
517 hrtimer_set_expires(&q->delay_timer, expires);
518 hrtimer_restart(&q->delay_timer);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700519 cl->delayed = 1;
520 cl->xstats.overactions++;
521 return;
522 }
523 delay = 1;
524 }
525 if (q->wd_expires == 0 || q->wd_expires > delay)
526 q->wd_expires = delay;
527}
528
529/* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
530
531static void cbq_ovl_lowprio(struct cbq_class *cl)
532{
533 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
534
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700535 cl->penalized = q->now + cl->penalty;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700536
537 if (cl->cpriority != cl->priority2) {
538 cl->cpriority = cl->priority2;
539 q->pmask |= (1<<cl->cpriority);
540 cl->xstats.overactions++;
541 }
542 cbq_ovl_classic(cl);
543}
544
545/* TC_CBQ_OVL_DROP: penalize class by dropping */
546
547static void cbq_ovl_drop(struct cbq_class *cl)
548{
549 if (cl->q->ops->drop)
550 if (cl->q->ops->drop(cl->q))
551 cl->qdisc->q.qlen--;
552 cl->xstats.overactions++;
553 cbq_ovl_classic(cl);
554}
555
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700556static psched_tdiff_t cbq_undelay_prio(struct cbq_sched_data *q, int prio,
557 psched_time_t now)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700558{
559 struct cbq_class *cl;
560 struct cbq_class *cl_prev = q->active[prio];
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700561 psched_time_t sched = now;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700562
563 if (cl_prev == NULL)
Patrick McHardye9054a32007-03-16 01:21:40 -0700564 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700565
566 do {
567 cl = cl_prev->next_alive;
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700568 if (now - cl->penalized > 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700569 cl_prev->next_alive = cl->next_alive;
570 cl->next_alive = NULL;
571 cl->cpriority = cl->priority;
572 cl->delayed = 0;
573 cbq_activate_class(cl);
574
575 if (cl == q->active[prio]) {
576 q->active[prio] = cl_prev;
577 if (cl == q->active[prio]) {
578 q->active[prio] = NULL;
579 return 0;
580 }
581 }
582
583 cl = cl_prev->next_alive;
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700584 } else if (sched - cl->penalized > 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700585 sched = cl->penalized;
586 } while ((cl_prev = cl) != q->active[prio]);
587
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700588 return sched - now;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700589}
590
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700591static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700592{
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700593 struct cbq_sched_data *q = container_of(timer, struct cbq_sched_data,
David S. Miller2fbd3da2009-09-01 17:59:25 -0700594 delay_timer);
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700595 struct Qdisc *sch = q->watchdog.qdisc;
596 psched_time_t now;
597 psched_tdiff_t delay = 0;
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000598 unsigned int pmask;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700599
Patrick McHardy3bebcda2007-03-23 11:29:25 -0700600 now = psched_get_time();
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700601
Linus Torvalds1da177e2005-04-16 15:20:36 -0700602 pmask = q->pmask;
603 q->pmask = 0;
604
605 while (pmask) {
606 int prio = ffz(~pmask);
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700607 psched_tdiff_t tmp;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700608
609 pmask &= ~(1<<prio);
610
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700611 tmp = cbq_undelay_prio(q, prio, now);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700612 if (tmp > 0) {
613 q->pmask |= 1<<prio;
614 if (tmp < delay || delay == 0)
615 delay = tmp;
616 }
617 }
618
619 if (delay) {
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700620 ktime_t time;
621
622 time = ktime_set(0, 0);
Jarek Poplawskica44d6e2009-06-15 02:31:47 -0700623 time = ktime_add_ns(time, PSCHED_TICKS2NS(now + delay));
David S. Miller2fbd3da2009-09-01 17:59:25 -0700624 hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700625 }
626
Eric Dumazetfd245a42011-01-20 05:27:16 +0000627 qdisc_unthrottled(sch);
David S. Miller8608db02008-08-18 20:51:18 -0700628 __netif_schedule(qdisc_root(sch));
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700629 return HRTIMER_NORESTART;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700630}
631
Patrick McHardyc3bc7cf2007-07-15 00:03:05 -0700632#ifdef CONFIG_NET_CLS_ACT
Linus Torvalds1da177e2005-04-16 15:20:36 -0700633static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
634{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700635 struct Qdisc *sch = child->__parent;
636 struct cbq_sched_data *q = qdisc_priv(sch);
637 struct cbq_class *cl = q->rx_class;
638
639 q->rx_class = NULL;
640
641 if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
Jarek Poplawski378a2f02008-08-04 22:31:03 -0700642 int ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700643
644 cbq_mark_toplevel(q, cl);
645
646 q->rx_class = cl;
647 cl->q->__parent = sch;
648
Jarek Poplawski378a2f02008-08-04 22:31:03 -0700649 ret = qdisc_enqueue(skb, cl->q);
650 if (ret == NET_XMIT_SUCCESS) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700651 sch->q.qlen++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700652 if (!cl->next_alive)
653 cbq_activate_class(cl);
654 return 0;
655 }
Jarek Poplawski378a2f02008-08-04 22:31:03 -0700656 if (net_xmit_drop_count(ret))
657 sch->qstats.drops++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700658 return 0;
659 }
660
661 sch->qstats.drops++;
662 return -1;
663}
664#endif
665
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900666/*
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000667 * It is mission critical procedure.
668 *
669 * We "regenerate" toplevel cutoff, if transmitting class
670 * has backlog and it is not regulated. It is not part of
671 * original CBQ description, but looks more reasonable.
672 * Probably, it is wrong. This question needs further investigation.
673 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700674
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000675static inline void
Linus Torvalds1da177e2005-04-16 15:20:36 -0700676cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
677 struct cbq_class *borrowed)
678{
679 if (cl && q->toplevel >= borrowed->level) {
680 if (cl->q->q.qlen > 1) {
681 do {
Patrick McHardya0849802007-03-23 11:28:30 -0700682 if (borrowed->undertime == PSCHED_PASTPERFECT) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700683 q->toplevel = borrowed->level;
684 return;
685 }
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000686 } while ((borrowed = borrowed->borrow) != NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700687 }
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900688#if 0
Linus Torvalds1da177e2005-04-16 15:20:36 -0700689 /* It is not necessary now. Uncommenting it
690 will save CPU cycles, but decrease fairness.
691 */
692 q->toplevel = TC_CBQ_MAXLEVEL;
693#endif
694 }
695}
696
697static void
698cbq_update(struct cbq_sched_data *q)
699{
700 struct cbq_class *this = q->tx_class;
701 struct cbq_class *cl = this;
702 int len = q->tx_len;
Vasily Averin73d0f372014-08-14 12:27:47 +0400703 psched_time_t now;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700704
705 q->tx_class = NULL;
Vasily Averin73d0f372014-08-14 12:27:47 +0400706 /* Time integrator. We calculate EOS time
707 * by adding expected packet transmission time.
708 */
709 now = q->now + L2T(&q->link, len);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700710
711 for ( ; cl; cl = cl->share) {
712 long avgidle = cl->avgidle;
713 long idle;
714
715 cl->bstats.packets++;
716 cl->bstats.bytes += len;
717
718 /*
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000719 * (now - last) is total time between packet right edges.
720 * (last_pktlen/rate) is "virtual" busy time, so that
721 *
722 * idle = (now - last) - last_pktlen/rate
Linus Torvalds1da177e2005-04-16 15:20:36 -0700723 */
724
Vasily Averin73d0f372014-08-14 12:27:47 +0400725 idle = now - cl->last;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700726 if ((unsigned long)idle > 128*1024*1024) {
727 avgidle = cl->maxidle;
728 } else {
729 idle -= L2T(cl, len);
730
731 /* true_avgidle := (1-W)*true_avgidle + W*idle,
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000732 * where W=2^{-ewma_log}. But cl->avgidle is scaled:
733 * cl->avgidle == true_avgidle/W,
734 * hence:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700735 */
736 avgidle += idle - (avgidle>>cl->ewma_log);
737 }
738
739 if (avgidle <= 0) {
740 /* Overlimit or at-limit */
741
742 if (avgidle < cl->minidle)
743 avgidle = cl->minidle;
744
745 cl->avgidle = avgidle;
746
747 /* Calculate expected time, when this class
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000748 * will be allowed to send.
749 * It will occur, when:
750 * (1-W)*true_avgidle + W*delay = 0, i.e.
751 * idle = (1/W - 1)*(-true_avgidle)
752 * or
753 * idle = (1 - W)*(-cl->avgidle);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700754 */
755 idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
756
757 /*
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000758 * That is not all.
759 * To maintain the rate allocated to the class,
760 * we add to undertime virtual clock,
761 * necessary to complete transmitted packet.
762 * (len/phys_bandwidth has been already passed
763 * to the moment of cbq_update)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700764 */
765
766 idle -= L2T(&q->link, len);
767 idle += L2T(cl, len);
768
Vasily Averin73d0f372014-08-14 12:27:47 +0400769 cl->undertime = now + idle;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700770 } else {
771 /* Underlimit */
772
Patrick McHardya0849802007-03-23 11:28:30 -0700773 cl->undertime = PSCHED_PASTPERFECT;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700774 if (avgidle > cl->maxidle)
775 cl->avgidle = cl->maxidle;
776 else
777 cl->avgidle = avgidle;
778 }
Vasily Averin73d0f372014-08-14 12:27:47 +0400779 if ((s64)(now - cl->last) > 0)
780 cl->last = now;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700781 }
782
783 cbq_update_toplevel(q, this, q->tx_borrowed);
784}
785
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000786static inline struct cbq_class *
Linus Torvalds1da177e2005-04-16 15:20:36 -0700787cbq_under_limit(struct cbq_class *cl)
788{
789 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
790 struct cbq_class *this_cl = cl;
791
792 if (cl->tparent == NULL)
793 return cl;
794
Patrick McHardya0849802007-03-23 11:28:30 -0700795 if (cl->undertime == PSCHED_PASTPERFECT || q->now >= cl->undertime) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700796 cl->delayed = 0;
797 return cl;
798 }
799
800 do {
801 /* It is very suspicious place. Now overlimit
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000802 * action is generated for not bounded classes
803 * only if link is completely congested.
804 * Though it is in agree with ancestor-only paradigm,
805 * it looks very stupid. Particularly,
806 * it means that this chunk of code will either
807 * never be called or result in strong amplification
808 * of burstiness. Dangerous, silly, and, however,
809 * no another solution exists.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700810 */
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000811 cl = cl->borrow;
812 if (!cl) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700813 this_cl->qstats.overlimits++;
814 this_cl->overlimit(this_cl);
815 return NULL;
816 }
817 if (cl->level > q->toplevel)
818 return NULL;
Patrick McHardya0849802007-03-23 11:28:30 -0700819 } while (cl->undertime != PSCHED_PASTPERFECT && q->now < cl->undertime);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700820
821 cl->delayed = 0;
822 return cl;
823}
824
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000825static inline struct sk_buff *
Linus Torvalds1da177e2005-04-16 15:20:36 -0700826cbq_dequeue_prio(struct Qdisc *sch, int prio)
827{
828 struct cbq_sched_data *q = qdisc_priv(sch);
829 struct cbq_class *cl_tail, *cl_prev, *cl;
830 struct sk_buff *skb;
831 int deficit;
832
833 cl_tail = cl_prev = q->active[prio];
834 cl = cl_prev->next_alive;
835
836 do {
837 deficit = 0;
838
839 /* Start round */
840 do {
841 struct cbq_class *borrow = cl;
842
843 if (cl->q->q.qlen &&
844 (borrow = cbq_under_limit(cl)) == NULL)
845 goto skip_class;
846
847 if (cl->deficit <= 0) {
848 /* Class exhausted its allotment per
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000849 * this round. Switch to the next one.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700850 */
851 deficit = 1;
852 cl->deficit += cl->quantum;
853 goto next_class;
854 }
855
856 skb = cl->q->dequeue(cl->q);
857
858 /* Class did not give us any skb :-(
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000859 * It could occur even if cl->q->q.qlen != 0
860 * f.e. if cl->q == "tbf"
Linus Torvalds1da177e2005-04-16 15:20:36 -0700861 */
862 if (skb == NULL)
863 goto skip_class;
864
Jussi Kivilinna0abf77e2008-07-20 00:08:27 -0700865 cl->deficit -= qdisc_pkt_len(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700866 q->tx_class = cl;
867 q->tx_borrowed = borrow;
868 if (borrow != cl) {
869#ifndef CBQ_XSTATS_BORROWS_BYTES
870 borrow->xstats.borrows++;
871 cl->xstats.borrows++;
872#else
Jussi Kivilinna0abf77e2008-07-20 00:08:27 -0700873 borrow->xstats.borrows += qdisc_pkt_len(skb);
874 cl->xstats.borrows += qdisc_pkt_len(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700875#endif
876 }
Jussi Kivilinna0abf77e2008-07-20 00:08:27 -0700877 q->tx_len = qdisc_pkt_len(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700878
879 if (cl->deficit <= 0) {
880 q->active[prio] = cl;
881 cl = cl->next_alive;
882 cl->deficit += cl->quantum;
883 }
884 return skb;
885
886skip_class:
887 if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
888 /* Class is empty or penalized.
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000889 * Unlink it from active chain.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700890 */
891 cl_prev->next_alive = cl->next_alive;
892 cl->next_alive = NULL;
893
894 /* Did cl_tail point to it? */
895 if (cl == cl_tail) {
896 /* Repair it! */
897 cl_tail = cl_prev;
898
899 /* Was it the last class in this band? */
900 if (cl == cl_tail) {
901 /* Kill the band! */
902 q->active[prio] = NULL;
903 q->activemask &= ~(1<<prio);
904 if (cl->q->q.qlen)
905 cbq_activate_class(cl);
906 return NULL;
907 }
908
909 q->active[prio] = cl_tail;
910 }
911 if (cl->q->q.qlen)
912 cbq_activate_class(cl);
913
914 cl = cl_prev;
915 }
916
917next_class:
918 cl_prev = cl;
919 cl = cl->next_alive;
920 } while (cl_prev != cl_tail);
921 } while (deficit);
922
923 q->active[prio] = cl_prev;
924
925 return NULL;
926}
927
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000928static inline struct sk_buff *
Linus Torvalds1da177e2005-04-16 15:20:36 -0700929cbq_dequeue_1(struct Qdisc *sch)
930{
931 struct cbq_sched_data *q = qdisc_priv(sch);
932 struct sk_buff *skb;
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000933 unsigned int activemask;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700934
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000935 activemask = q->activemask & 0xFF;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700936 while (activemask) {
937 int prio = ffz(~activemask);
938 activemask &= ~(1<<prio);
939 skb = cbq_dequeue_prio(sch, prio);
940 if (skb)
941 return skb;
942 }
943 return NULL;
944}
945
946static struct sk_buff *
947cbq_dequeue(struct Qdisc *sch)
948{
949 struct sk_buff *skb;
950 struct cbq_sched_data *q = qdisc_priv(sch);
951 psched_time_t now;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700952
Patrick McHardy3bebcda2007-03-23 11:29:25 -0700953 now = psched_get_time();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700954
Vasily Averin73d0f372014-08-14 12:27:47 +0400955 if (q->tx_class)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700956 cbq_update(q);
Vasily Averin73d0f372014-08-14 12:27:47 +0400957
958 q->now = now;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700959 q->now_rt = now;
960
961 for (;;) {
962 q->wd_expires = 0;
963
964 skb = cbq_dequeue_1(sch);
965 if (skb) {
Eric Dumazet9190b3b2011-01-20 23:31:33 -0800966 qdisc_bstats_update(sch, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700967 sch->q.qlen--;
Eric Dumazetfd245a42011-01-20 05:27:16 +0000968 qdisc_unthrottled(sch);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700969 return skb;
970 }
971
972 /* All the classes are overlimit.
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000973 *
974 * It is possible, if:
975 *
976 * 1. Scheduler is empty.
977 * 2. Toplevel cutoff inhibited borrowing.
978 * 3. Root class is overlimit.
979 *
980 * Reset 2d and 3d conditions and retry.
981 *
982 * Note, that NS and cbq-2.0 are buggy, peeking
983 * an arbitrary class is appropriate for ancestor-only
984 * sharing, but not for toplevel algorithm.
985 *
986 * Our version is better, but slower, because it requires
987 * two passes, but it is unavoidable with top-level sharing.
988 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700989
990 if (q->toplevel == TC_CBQ_MAXLEVEL &&
Patrick McHardya0849802007-03-23 11:28:30 -0700991 q->link.undertime == PSCHED_PASTPERFECT)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700992 break;
993
994 q->toplevel = TC_CBQ_MAXLEVEL;
Patrick McHardya0849802007-03-23 11:28:30 -0700995 q->link.undertime = PSCHED_PASTPERFECT;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700996 }
997
998 /* No packets in scheduler or nobody wants to give them to us :-(
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000999 * Sigh... start watchdog timer in the last case.
1000 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001001
1002 if (sch->q.qlen) {
1003 sch->qstats.overlimits++;
Patrick McHardy88a99352007-03-16 01:21:11 -07001004 if (q->wd_expires)
1005 qdisc_watchdog_schedule(&q->watchdog,
Patrick McHardybb239ac2007-03-16 12:31:28 -07001006 now + q->wd_expires);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001007 }
1008 return NULL;
1009}
1010
1011/* CBQ class maintanance routines */
1012
1013static void cbq_adjust_levels(struct cbq_class *this)
1014{
1015 if (this == NULL)
1016 return;
1017
1018 do {
1019 int level = 0;
1020 struct cbq_class *cl;
1021
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001022 cl = this->children;
1023 if (cl) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001024 do {
1025 if (cl->level > level)
1026 level = cl->level;
1027 } while ((cl = cl->sibling) != this->children);
1028 }
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001029 this->level = level + 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001030 } while ((this = this->tparent) != NULL);
1031}
1032
1033static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
1034{
1035 struct cbq_class *cl;
Patrick McHardyd77fea22008-07-05 23:22:05 -07001036 unsigned int h;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001037
1038 if (q->quanta[prio] == 0)
1039 return;
1040
Patrick McHardyd77fea22008-07-05 23:22:05 -07001041 for (h = 0; h < q->clhash.hashsize; h++) {
Sasha Levinb67bfe02013-02-27 17:06:00 -08001042 hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001043 /* BUGGGG... Beware! This expression suffer of
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001044 * arithmetic overflows!
Linus Torvalds1da177e2005-04-16 15:20:36 -07001045 */
1046 if (cl->priority == prio) {
1047 cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
1048 q->quanta[prio];
1049 }
Yang Yingliang833fa742013-12-10 20:55:32 +08001050 if (cl->quantum <= 0 ||
1051 cl->quantum > 32*qdisc_dev(cl->qdisc)->mtu) {
Yang Yingliangc17988a2013-12-23 17:38:58 +08001052 pr_warn("CBQ: class %08x has bad quantum==%ld, repaired.\n",
1053 cl->common.classid, cl->quantum);
David S. Miller5ce2d482008-07-08 17:06:30 -07001054 cl->quantum = qdisc_dev(cl->qdisc)->mtu/2 + 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001055 }
1056 }
1057 }
1058}
1059
1060static void cbq_sync_defmap(struct cbq_class *cl)
1061{
1062 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1063 struct cbq_class *split = cl->split;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001064 unsigned int h;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001065 int i;
1066
1067 if (split == NULL)
1068 return;
1069
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001070 for (i = 0; i <= TC_PRIO_MAX; i++) {
1071 if (split->defaults[i] == cl && !(cl->defmap & (1<<i)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001072 split->defaults[i] = NULL;
1073 }
1074
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001075 for (i = 0; i <= TC_PRIO_MAX; i++) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001076 int level = split->level;
1077
1078 if (split->defaults[i])
1079 continue;
1080
Patrick McHardyd77fea22008-07-05 23:22:05 -07001081 for (h = 0; h < q->clhash.hashsize; h++) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001082 struct cbq_class *c;
1083
Sasha Levinb67bfe02013-02-27 17:06:00 -08001084 hlist_for_each_entry(c, &q->clhash.hash[h],
Patrick McHardyd77fea22008-07-05 23:22:05 -07001085 common.hnode) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001086 if (c->split == split && c->level < level &&
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001087 c->defmap & (1<<i)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001088 split->defaults[i] = c;
1089 level = c->level;
1090 }
1091 }
1092 }
1093 }
1094}
1095
1096static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask)
1097{
1098 struct cbq_class *split = NULL;
1099
1100 if (splitid == 0) {
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001101 split = cl->split;
1102 if (!split)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001103 return;
Patrick McHardyd77fea22008-07-05 23:22:05 -07001104 splitid = split->common.classid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001105 }
1106
Patrick McHardyd77fea22008-07-05 23:22:05 -07001107 if (split == NULL || split->common.classid != splitid) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001108 for (split = cl->tparent; split; split = split->tparent)
Patrick McHardyd77fea22008-07-05 23:22:05 -07001109 if (split->common.classid == splitid)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001110 break;
1111 }
1112
1113 if (split == NULL)
1114 return;
1115
1116 if (cl->split != split) {
1117 cl->defmap = 0;
1118 cbq_sync_defmap(cl);
1119 cl->split = split;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001120 cl->defmap = def & mask;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001121 } else
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001122 cl->defmap = (cl->defmap & ~mask) | (def & mask);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001123
1124 cbq_sync_defmap(cl);
1125}
1126
1127static void cbq_unlink_class(struct cbq_class *this)
1128{
1129 struct cbq_class *cl, **clp;
1130 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1131
Patrick McHardyd77fea22008-07-05 23:22:05 -07001132 qdisc_class_hash_remove(&q->clhash, &this->common);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001133
1134 if (this->tparent) {
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001135 clp = &this->sibling;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001136 cl = *clp;
1137 do {
1138 if (cl == this) {
1139 *clp = cl->sibling;
1140 break;
1141 }
1142 clp = &cl->sibling;
1143 } while ((cl = *clp) != this->sibling);
1144
1145 if (this->tparent->children == this) {
1146 this->tparent->children = this->sibling;
1147 if (this->sibling == this)
1148 this->tparent->children = NULL;
1149 }
1150 } else {
Ilpo Järvinen547b7922008-07-25 21:43:18 -07001151 WARN_ON(this->sibling != this);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001152 }
1153}
1154
1155static void cbq_link_class(struct cbq_class *this)
1156{
1157 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001158 struct cbq_class *parent = this->tparent;
1159
1160 this->sibling = this;
Patrick McHardyd77fea22008-07-05 23:22:05 -07001161 qdisc_class_hash_insert(&q->clhash, &this->common);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001162
1163 if (parent == NULL)
1164 return;
1165
1166 if (parent->children == NULL) {
1167 parent->children = this;
1168 } else {
1169 this->sibling = parent->children->sibling;
1170 parent->children->sibling = this;
1171 }
1172}
1173
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001174static unsigned int cbq_drop(struct Qdisc *sch)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001175{
1176 struct cbq_sched_data *q = qdisc_priv(sch);
1177 struct cbq_class *cl, *cl_head;
1178 int prio;
1179 unsigned int len;
1180
1181 for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001182 cl_head = q->active[prio];
1183 if (!cl_head)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001184 continue;
1185
1186 cl = cl_head;
1187 do {
1188 if (cl->q->ops->drop && (len = cl->q->ops->drop(cl->q))) {
1189 sch->q.qlen--;
Jarek Poplawskia37ef2e2006-12-08 00:25:55 -08001190 if (!cl->q->q.qlen)
1191 cbq_deactivate_class(cl);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001192 return len;
1193 }
1194 } while ((cl = cl->next_alive) != cl_head);
1195 }
1196 return 0;
1197}
1198
1199static void
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001200cbq_reset(struct Qdisc *sch)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001201{
1202 struct cbq_sched_data *q = qdisc_priv(sch);
1203 struct cbq_class *cl;
1204 int prio;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001205 unsigned int h;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001206
1207 q->activemask = 0;
1208 q->pmask = 0;
1209 q->tx_class = NULL;
1210 q->tx_borrowed = NULL;
Patrick McHardy88a99352007-03-16 01:21:11 -07001211 qdisc_watchdog_cancel(&q->watchdog);
David S. Miller2fbd3da2009-09-01 17:59:25 -07001212 hrtimer_cancel(&q->delay_timer);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001213 q->toplevel = TC_CBQ_MAXLEVEL;
Patrick McHardy3bebcda2007-03-23 11:29:25 -07001214 q->now = psched_get_time();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001215 q->now_rt = q->now;
1216
1217 for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
1218 q->active[prio] = NULL;
1219
Patrick McHardyd77fea22008-07-05 23:22:05 -07001220 for (h = 0; h < q->clhash.hashsize; h++) {
Sasha Levinb67bfe02013-02-27 17:06:00 -08001221 hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001222 qdisc_reset(cl->q);
1223
1224 cl->next_alive = NULL;
Patrick McHardya0849802007-03-23 11:28:30 -07001225 cl->undertime = PSCHED_PASTPERFECT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001226 cl->avgidle = cl->maxidle;
1227 cl->deficit = cl->quantum;
1228 cl->cpriority = cl->priority;
1229 }
1230 }
1231 sch->q.qlen = 0;
1232}
1233
1234
1235static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
1236{
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001237 if (lss->change & TCF_CBQ_LSS_FLAGS) {
1238 cl->share = (lss->flags & TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
1239 cl->borrow = (lss->flags & TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001240 }
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001241 if (lss->change & TCF_CBQ_LSS_EWMA)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001242 cl->ewma_log = lss->ewma_log;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001243 if (lss->change & TCF_CBQ_LSS_AVPKT)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001244 cl->avpkt = lss->avpkt;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001245 if (lss->change & TCF_CBQ_LSS_MINIDLE)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001246 cl->minidle = -(long)lss->minidle;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001247 if (lss->change & TCF_CBQ_LSS_MAXIDLE) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001248 cl->maxidle = lss->maxidle;
1249 cl->avgidle = lss->maxidle;
1250 }
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001251 if (lss->change & TCF_CBQ_LSS_OFFTIME)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001252 cl->offtime = lss->offtime;
1253 return 0;
1254}
1255
1256static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl)
1257{
1258 q->nclasses[cl->priority]--;
1259 q->quanta[cl->priority] -= cl->weight;
1260 cbq_normalize_quanta(q, cl->priority);
1261}
1262
1263static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl)
1264{
1265 q->nclasses[cl->priority]++;
1266 q->quanta[cl->priority] += cl->weight;
1267 cbq_normalize_quanta(q, cl->priority);
1268}
1269
1270static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
1271{
1272 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1273
1274 if (wrr->allot)
1275 cl->allot = wrr->allot;
1276 if (wrr->weight)
1277 cl->weight = wrr->weight;
1278 if (wrr->priority) {
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001279 cl->priority = wrr->priority - 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001280 cl->cpriority = cl->priority;
1281 if (cl->priority >= cl->priority2)
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001282 cl->priority2 = TC_CBQ_MAXPRIO - 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001283 }
1284
1285 cbq_addprio(q, cl);
1286 return 0;
1287}
1288
1289static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
1290{
1291 switch (ovl->strategy) {
1292 case TC_CBQ_OVL_CLASSIC:
1293 cl->overlimit = cbq_ovl_classic;
1294 break;
1295 case TC_CBQ_OVL_DELAY:
1296 cl->overlimit = cbq_ovl_delay;
1297 break;
1298 case TC_CBQ_OVL_LOWPRIO:
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001299 if (ovl->priority2 - 1 >= TC_CBQ_MAXPRIO ||
1300 ovl->priority2 - 1 <= cl->priority)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001301 return -EINVAL;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001302 cl->priority2 = ovl->priority2 - 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001303 cl->overlimit = cbq_ovl_lowprio;
1304 break;
1305 case TC_CBQ_OVL_DROP:
1306 cl->overlimit = cbq_ovl_drop;
1307 break;
1308 case TC_CBQ_OVL_RCLASSIC:
1309 cl->overlimit = cbq_ovl_rclassic;
1310 break;
1311 default:
1312 return -EINVAL;
1313 }
Patrick McHardy1a13cb62007-03-16 01:22:20 -07001314 cl->penalty = ovl->penalty;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001315 return 0;
1316}
1317
Patrick McHardyc3bc7cf2007-07-15 00:03:05 -07001318#ifdef CONFIG_NET_CLS_ACT
Linus Torvalds1da177e2005-04-16 15:20:36 -07001319static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p)
1320{
1321 cl->police = p->police;
1322
1323 if (cl->q->handle) {
1324 if (p->police == TC_POLICE_RECLASSIFY)
1325 cl->q->reshape_fail = cbq_reshape_fail;
1326 else
1327 cl->q->reshape_fail = NULL;
1328 }
1329 return 0;
1330}
1331#endif
1332
1333static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
1334{
1335 cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
1336 return 0;
1337}
1338
Patrick McHardy27a34212008-01-23 20:35:39 -08001339static const struct nla_policy cbq_policy[TCA_CBQ_MAX + 1] = {
1340 [TCA_CBQ_LSSOPT] = { .len = sizeof(struct tc_cbq_lssopt) },
1341 [TCA_CBQ_WRROPT] = { .len = sizeof(struct tc_cbq_wrropt) },
1342 [TCA_CBQ_FOPT] = { .len = sizeof(struct tc_cbq_fopt) },
1343 [TCA_CBQ_OVL_STRATEGY] = { .len = sizeof(struct tc_cbq_ovl) },
1344 [TCA_CBQ_RATE] = { .len = sizeof(struct tc_ratespec) },
1345 [TCA_CBQ_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1346 [TCA_CBQ_POLICE] = { .len = sizeof(struct tc_cbq_police) },
1347};
1348
Patrick McHardy1e904742008-01-22 22:11:17 -08001349static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001350{
1351 struct cbq_sched_data *q = qdisc_priv(sch);
Patrick McHardy1e904742008-01-22 22:11:17 -08001352 struct nlattr *tb[TCA_CBQ_MAX + 1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001353 struct tc_ratespec *r;
Patrick McHardycee63722008-01-23 20:33:32 -08001354 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001355
Patrick McHardy27a34212008-01-23 20:35:39 -08001356 err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
Patrick McHardycee63722008-01-23 20:33:32 -08001357 if (err < 0)
1358 return err;
1359
Patrick McHardy27a34212008-01-23 20:35:39 -08001360 if (tb[TCA_CBQ_RTAB] == NULL || tb[TCA_CBQ_RATE] == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001361 return -EINVAL;
1362
Patrick McHardy1e904742008-01-22 22:11:17 -08001363 r = nla_data(tb[TCA_CBQ_RATE]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001364
Patrick McHardy1e904742008-01-22 22:11:17 -08001365 if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB])) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001366 return -EINVAL;
1367
Patrick McHardyd77fea22008-07-05 23:22:05 -07001368 err = qdisc_class_hash_init(&q->clhash);
1369 if (err < 0)
1370 goto put_rtab;
1371
Linus Torvalds1da177e2005-04-16 15:20:36 -07001372 q->link.refcnt = 1;
1373 q->link.sibling = &q->link;
Patrick McHardyd77fea22008-07-05 23:22:05 -07001374 q->link.common.classid = sch->handle;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001375 q->link.qdisc = sch;
Changli Gao3511c912010-10-16 13:04:08 +00001376 q->link.q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
1377 sch->handle);
1378 if (!q->link.q)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001379 q->link.q = &noop_qdisc;
1380
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001381 q->link.priority = TC_CBQ_MAXPRIO - 1;
1382 q->link.priority2 = TC_CBQ_MAXPRIO - 1;
1383 q->link.cpriority = TC_CBQ_MAXPRIO - 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001384 q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
1385 q->link.overlimit = cbq_ovl_classic;
David S. Miller5ce2d482008-07-08 17:06:30 -07001386 q->link.allot = psched_mtu(qdisc_dev(sch));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001387 q->link.quantum = q->link.allot;
1388 q->link.weight = q->link.R_tab->rate.rate;
1389
1390 q->link.ewma_log = TC_CBQ_DEF_EWMA;
1391 q->link.avpkt = q->link.allot/2;
1392 q->link.minidle = -0x7FFFFFFF;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001393
Patrick McHardy88a99352007-03-16 01:21:11 -07001394 qdisc_watchdog_init(&q->watchdog, sch);
David S. Miller2fbd3da2009-09-01 17:59:25 -07001395 hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001396 q->delay_timer.function = cbq_undelay;
1397 q->toplevel = TC_CBQ_MAXLEVEL;
Patrick McHardy3bebcda2007-03-23 11:29:25 -07001398 q->now = psched_get_time();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001399 q->now_rt = q->now;
1400
1401 cbq_link_class(&q->link);
1402
Patrick McHardy1e904742008-01-22 22:11:17 -08001403 if (tb[TCA_CBQ_LSSOPT])
1404 cbq_set_lss(&q->link, nla_data(tb[TCA_CBQ_LSSOPT]));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001405
1406 cbq_addprio(q, &q->link);
1407 return 0;
Patrick McHardyd77fea22008-07-05 23:22:05 -07001408
1409put_rtab:
1410 qdisc_put_rtab(q->link.R_tab);
1411 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001412}
1413
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001414static int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001415{
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001416 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001417
David S. Miller1b34ec42012-03-29 05:11:39 -04001418 if (nla_put(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate))
1419 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001420 return skb->len;
1421
Patrick McHardy1e904742008-01-22 22:11:17 -08001422nla_put_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001423 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001424 return -1;
1425}
1426
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001427static int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001428{
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001429 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001430 struct tc_cbq_lssopt opt;
1431
1432 opt.flags = 0;
1433 if (cl->borrow == NULL)
1434 opt.flags |= TCF_CBQ_LSS_BOUNDED;
1435 if (cl->share == NULL)
1436 opt.flags |= TCF_CBQ_LSS_ISOLATED;
1437 opt.ewma_log = cl->ewma_log;
1438 opt.level = cl->level;
1439 opt.avpkt = cl->avpkt;
1440 opt.maxidle = cl->maxidle;
1441 opt.minidle = (u32)(-cl->minidle);
1442 opt.offtime = cl->offtime;
1443 opt.change = ~0;
David S. Miller1b34ec42012-03-29 05:11:39 -04001444 if (nla_put(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt))
1445 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001446 return skb->len;
1447
Patrick McHardy1e904742008-01-22 22:11:17 -08001448nla_put_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001449 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001450 return -1;
1451}
1452
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001453static int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001454{
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001455 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001456 struct tc_cbq_wrropt opt;
1457
David S. Millera0db8562013-07-30 00:16:21 -07001458 memset(&opt, 0, sizeof(opt));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001459 opt.flags = 0;
1460 opt.allot = cl->allot;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001461 opt.priority = cl->priority + 1;
1462 opt.cpriority = cl->cpriority + 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001463 opt.weight = cl->weight;
David S. Miller1b34ec42012-03-29 05:11:39 -04001464 if (nla_put(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt))
1465 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001466 return skb->len;
1467
Patrick McHardy1e904742008-01-22 22:11:17 -08001468nla_put_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001469 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001470 return -1;
1471}
1472
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001473static int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001474{
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001475 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001476 struct tc_cbq_ovl opt;
1477
1478 opt.strategy = cl->ovl_strategy;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001479 opt.priority2 = cl->priority2 + 1;
Patrick McHardy8a470772005-06-28 12:56:45 -07001480 opt.pad = 0;
Patrick McHardy1a13cb62007-03-16 01:22:20 -07001481 opt.penalty = cl->penalty;
David S. Miller1b34ec42012-03-29 05:11:39 -04001482 if (nla_put(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt))
1483 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001484 return skb->len;
1485
Patrick McHardy1e904742008-01-22 22:11:17 -08001486nla_put_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001487 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001488 return -1;
1489}
1490
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001491static int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001492{
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001493 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001494 struct tc_cbq_fopt opt;
1495
1496 if (cl->split || cl->defmap) {
Patrick McHardyd77fea22008-07-05 23:22:05 -07001497 opt.split = cl->split ? cl->split->common.classid : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001498 opt.defmap = cl->defmap;
1499 opt.defchange = ~0;
David S. Miller1b34ec42012-03-29 05:11:39 -04001500 if (nla_put(skb, TCA_CBQ_FOPT, sizeof(opt), &opt))
1501 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001502 }
1503 return skb->len;
1504
Patrick McHardy1e904742008-01-22 22:11:17 -08001505nla_put_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001506 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001507 return -1;
1508}
1509
Patrick McHardyc3bc7cf2007-07-15 00:03:05 -07001510#ifdef CONFIG_NET_CLS_ACT
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001511static int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001512{
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001513 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001514 struct tc_cbq_police opt;
1515
1516 if (cl->police) {
1517 opt.police = cl->police;
Patrick McHardy9ef1d4c2005-06-28 12:55:30 -07001518 opt.__res1 = 0;
1519 opt.__res2 = 0;
David S. Miller1b34ec42012-03-29 05:11:39 -04001520 if (nla_put(skb, TCA_CBQ_POLICE, sizeof(opt), &opt))
1521 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001522 }
1523 return skb->len;
1524
Patrick McHardy1e904742008-01-22 22:11:17 -08001525nla_put_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001526 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001527 return -1;
1528}
1529#endif
1530
1531static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
1532{
1533 if (cbq_dump_lss(skb, cl) < 0 ||
1534 cbq_dump_rate(skb, cl) < 0 ||
1535 cbq_dump_wrr(skb, cl) < 0 ||
1536 cbq_dump_ovl(skb, cl) < 0 ||
Patrick McHardyc3bc7cf2007-07-15 00:03:05 -07001537#ifdef CONFIG_NET_CLS_ACT
Linus Torvalds1da177e2005-04-16 15:20:36 -07001538 cbq_dump_police(skb, cl) < 0 ||
1539#endif
1540 cbq_dump_fopt(skb, cl) < 0)
1541 return -1;
1542 return 0;
1543}
1544
1545static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
1546{
1547 struct cbq_sched_data *q = qdisc_priv(sch);
Patrick McHardy4b3550ef2008-01-23 20:34:11 -08001548 struct nlattr *nest;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001549
Patrick McHardy4b3550ef2008-01-23 20:34:11 -08001550 nest = nla_nest_start(skb, TCA_OPTIONS);
1551 if (nest == NULL)
1552 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001553 if (cbq_dump_attr(skb, &q->link) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001554 goto nla_put_failure;
Yang Yingliangd59b7d82014-03-12 10:20:32 +08001555 return nla_nest_end(skb, nest);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001556
Patrick McHardy1e904742008-01-22 22:11:17 -08001557nla_put_failure:
Patrick McHardy4b3550ef2008-01-23 20:34:11 -08001558 nla_nest_cancel(skb, nest);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001559 return -1;
1560}
1561
1562static int
1563cbq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
1564{
1565 struct cbq_sched_data *q = qdisc_priv(sch);
1566
1567 q->link.xstats.avgidle = q->link.avgidle;
1568 return gnet_stats_copy_app(d, &q->link.xstats, sizeof(q->link.xstats));
1569}
1570
1571static int
1572cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1573 struct sk_buff *skb, struct tcmsg *tcm)
1574{
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001575 struct cbq_class *cl = (struct cbq_class *)arg;
Patrick McHardy4b3550ef2008-01-23 20:34:11 -08001576 struct nlattr *nest;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001577
1578 if (cl->tparent)
Patrick McHardyd77fea22008-07-05 23:22:05 -07001579 tcm->tcm_parent = cl->tparent->common.classid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001580 else
1581 tcm->tcm_parent = TC_H_ROOT;
Patrick McHardyd77fea22008-07-05 23:22:05 -07001582 tcm->tcm_handle = cl->common.classid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001583 tcm->tcm_info = cl->q->handle;
1584
Patrick McHardy4b3550ef2008-01-23 20:34:11 -08001585 nest = nla_nest_start(skb, TCA_OPTIONS);
1586 if (nest == NULL)
1587 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001588 if (cbq_dump_attr(skb, cl) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001589 goto nla_put_failure;
Yang Yingliangd59b7d82014-03-12 10:20:32 +08001590 return nla_nest_end(skb, nest);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001591
Patrick McHardy1e904742008-01-22 22:11:17 -08001592nla_put_failure:
Patrick McHardy4b3550ef2008-01-23 20:34:11 -08001593 nla_nest_cancel(skb, nest);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001594 return -1;
1595}
1596
1597static int
1598cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
1599 struct gnet_dump *d)
1600{
1601 struct cbq_sched_data *q = qdisc_priv(sch);
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001602 struct cbq_class *cl = (struct cbq_class *)arg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001603
1604 cl->qstats.qlen = cl->q->q.qlen;
1605 cl->xstats.avgidle = cl->avgidle;
1606 cl->xstats.undertime = 0;
1607
Patrick McHardya0849802007-03-23 11:28:30 -07001608 if (cl->undertime != PSCHED_PASTPERFECT)
Patrick McHardy8edc0c32007-03-23 11:28:55 -07001609 cl->xstats.undertime = cl->undertime - q->now;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001610
1611 if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
Eric Dumazetd250a5f2009-10-02 10:32:18 +00001612 gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07001613 gnet_stats_copy_queue(d, &cl->qstats) < 0)
1614 return -1;
1615
1616 return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1617}
1618
1619static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1620 struct Qdisc **old)
1621{
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001622 struct cbq_class *cl = (struct cbq_class *)arg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001623
Patrick McHardy5b9a9cc2009-09-04 06:41:17 +00001624 if (new == NULL) {
Changli Gao3511c912010-10-16 13:04:08 +00001625 new = qdisc_create_dflt(sch->dev_queue,
Patrick McHardy5b9a9cc2009-09-04 06:41:17 +00001626 &pfifo_qdisc_ops, cl->common.classid);
1627 if (new == NULL)
1628 return -ENOBUFS;
1629 } else {
Patrick McHardyc3bc7cf2007-07-15 00:03:05 -07001630#ifdef CONFIG_NET_CLS_ACT
Patrick McHardy5b9a9cc2009-09-04 06:41:17 +00001631 if (cl->police == TC_POLICE_RECLASSIFY)
1632 new->reshape_fail = cbq_reshape_fail;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001633#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001634 }
Patrick McHardy5b9a9cc2009-09-04 06:41:17 +00001635 sch_tree_lock(sch);
1636 *old = cl->q;
1637 cl->q = new;
1638 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
1639 qdisc_reset(*old);
1640 sch_tree_unlock(sch);
1641
1642 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001643}
1644
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001645static struct Qdisc *cbq_leaf(struct Qdisc *sch, unsigned long arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001646{
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001647 struct cbq_class *cl = (struct cbq_class *)arg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001648
Patrick McHardy5b9a9cc2009-09-04 06:41:17 +00001649 return cl->q;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001650}
1651
Jarek Poplawskia37ef2e2006-12-08 00:25:55 -08001652static void cbq_qlen_notify(struct Qdisc *sch, unsigned long arg)
1653{
1654 struct cbq_class *cl = (struct cbq_class *)arg;
1655
1656 if (cl->q->q.qlen == 0)
1657 cbq_deactivate_class(cl);
1658}
1659
Linus Torvalds1da177e2005-04-16 15:20:36 -07001660static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
1661{
1662 struct cbq_sched_data *q = qdisc_priv(sch);
1663 struct cbq_class *cl = cbq_class_lookup(q, classid);
1664
1665 if (cl) {
1666 cl->refcnt++;
1667 return (unsigned long)cl;
1668 }
1669 return 0;
1670}
1671
Linus Torvalds1da177e2005-04-16 15:20:36 -07001672static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
1673{
1674 struct cbq_sched_data *q = qdisc_priv(sch);
1675
Ilpo Järvinen547b7922008-07-25 21:43:18 -07001676 WARN_ON(cl->filters);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001677
Patrick McHardyff31ab52008-07-01 19:52:38 -07001678 tcf_destroy_chain(&cl->filter_list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001679 qdisc_destroy(cl->q);
1680 qdisc_put_rtab(cl->R_tab);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001681 gen_kill_estimator(&cl->bstats, &cl->rate_est);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001682 if (cl != &q->link)
1683 kfree(cl);
1684}
1685
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001686static void cbq_destroy(struct Qdisc *sch)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001687{
1688 struct cbq_sched_data *q = qdisc_priv(sch);
Sasha Levinb67bfe02013-02-27 17:06:00 -08001689 struct hlist_node *next;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001690 struct cbq_class *cl;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001691 unsigned int h;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001692
Patrick McHardyc3bc7cf2007-07-15 00:03:05 -07001693#ifdef CONFIG_NET_CLS_ACT
Linus Torvalds1da177e2005-04-16 15:20:36 -07001694 q->rx_class = NULL;
1695#endif
1696 /*
1697 * Filters must be destroyed first because we don't destroy the
1698 * classes from root to leafs which means that filters can still
1699 * be bound to classes which have been destroyed already. --TGR '04
1700 */
Patrick McHardyd77fea22008-07-05 23:22:05 -07001701 for (h = 0; h < q->clhash.hashsize; h++) {
Sasha Levinb67bfe02013-02-27 17:06:00 -08001702 hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode)
Patrick McHardyff31ab52008-07-01 19:52:38 -07001703 tcf_destroy_chain(&cl->filter_list);
Patrick McHardyb00b4bf2007-06-05 16:06:59 -07001704 }
Patrick McHardyd77fea22008-07-05 23:22:05 -07001705 for (h = 0; h < q->clhash.hashsize; h++) {
Sasha Levinb67bfe02013-02-27 17:06:00 -08001706 hlist_for_each_entry_safe(cl, next, &q->clhash.hash[h],
Patrick McHardyd77fea22008-07-05 23:22:05 -07001707 common.hnode)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001708 cbq_destroy_class(sch, cl);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001709 }
Patrick McHardyd77fea22008-07-05 23:22:05 -07001710 qdisc_class_hash_destroy(&q->clhash);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001711}
1712
1713static void cbq_put(struct Qdisc *sch, unsigned long arg)
1714{
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001715 struct cbq_class *cl = (struct cbq_class *)arg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001716
1717 if (--cl->refcnt == 0) {
Patrick McHardyc3bc7cf2007-07-15 00:03:05 -07001718#ifdef CONFIG_NET_CLS_ACT
Jarek Poplawski102396a2008-08-29 14:21:52 -07001719 spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001720 struct cbq_sched_data *q = qdisc_priv(sch);
1721
David S. Miller7698b4f2008-07-16 01:42:40 -07001722 spin_lock_bh(root_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001723 if (q->rx_class == cl)
1724 q->rx_class = NULL;
David S. Miller7698b4f2008-07-16 01:42:40 -07001725 spin_unlock_bh(root_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001726#endif
1727
1728 cbq_destroy_class(sch, cl);
1729 }
1730}
1731
1732static int
Patrick McHardy1e904742008-01-22 22:11:17 -08001733cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **tca,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001734 unsigned long *arg)
1735{
1736 int err;
1737 struct cbq_sched_data *q = qdisc_priv(sch);
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001738 struct cbq_class *cl = (struct cbq_class *)*arg;
Patrick McHardy1e904742008-01-22 22:11:17 -08001739 struct nlattr *opt = tca[TCA_OPTIONS];
1740 struct nlattr *tb[TCA_CBQ_MAX + 1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001741 struct cbq_class *parent;
1742 struct qdisc_rate_table *rtab = NULL;
1743
Patrick McHardycee63722008-01-23 20:33:32 -08001744 if (opt == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001745 return -EINVAL;
1746
Patrick McHardy27a34212008-01-23 20:35:39 -08001747 err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
Patrick McHardycee63722008-01-23 20:33:32 -08001748 if (err < 0)
1749 return err;
1750
Linus Torvalds1da177e2005-04-16 15:20:36 -07001751 if (cl) {
1752 /* Check parent */
1753 if (parentid) {
Patrick McHardyd77fea22008-07-05 23:22:05 -07001754 if (cl->tparent &&
1755 cl->tparent->common.classid != parentid)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001756 return -EINVAL;
1757 if (!cl->tparent && parentid != TC_H_ROOT)
1758 return -EINVAL;
1759 }
1760
Patrick McHardy1e904742008-01-22 22:11:17 -08001761 if (tb[TCA_CBQ_RATE]) {
Stephen Hemminger71bcb092008-11-25 21:13:31 -08001762 rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]),
1763 tb[TCA_CBQ_RTAB]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001764 if (rtab == NULL)
1765 return -EINVAL;
1766 }
1767
Stephen Hemminger71bcb092008-11-25 21:13:31 -08001768 if (tca[TCA_RATE]) {
1769 err = gen_replace_estimator(&cl->bstats, &cl->rate_est,
1770 qdisc_root_sleeping_lock(sch),
1771 tca[TCA_RATE]);
1772 if (err) {
Yang Yingliang79c11f22013-12-17 15:29:17 +08001773 qdisc_put_rtab(rtab);
Stephen Hemminger71bcb092008-11-25 21:13:31 -08001774 return err;
1775 }
1776 }
1777
Linus Torvalds1da177e2005-04-16 15:20:36 -07001778 /* Change class parameters */
1779 sch_tree_lock(sch);
1780
1781 if (cl->next_alive != NULL)
1782 cbq_deactivate_class(cl);
1783
1784 if (rtab) {
Patrick McHardyb94c8af2008-11-20 04:11:36 -08001785 qdisc_put_rtab(cl->R_tab);
1786 cl->R_tab = rtab;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001787 }
1788
Patrick McHardy1e904742008-01-22 22:11:17 -08001789 if (tb[TCA_CBQ_LSSOPT])
1790 cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001791
Patrick McHardy1e904742008-01-22 22:11:17 -08001792 if (tb[TCA_CBQ_WRROPT]) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001793 cbq_rmprio(q, cl);
Patrick McHardy1e904742008-01-22 22:11:17 -08001794 cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001795 }
1796
Patrick McHardy1e904742008-01-22 22:11:17 -08001797 if (tb[TCA_CBQ_OVL_STRATEGY])
1798 cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001799
Patrick McHardyc3bc7cf2007-07-15 00:03:05 -07001800#ifdef CONFIG_NET_CLS_ACT
Patrick McHardy1e904742008-01-22 22:11:17 -08001801 if (tb[TCA_CBQ_POLICE])
1802 cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001803#endif
1804
Patrick McHardy1e904742008-01-22 22:11:17 -08001805 if (tb[TCA_CBQ_FOPT])
1806 cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001807
1808 if (cl->q->q.qlen)
1809 cbq_activate_class(cl);
1810
1811 sch_tree_unlock(sch);
1812
Linus Torvalds1da177e2005-04-16 15:20:36 -07001813 return 0;
1814 }
1815
1816 if (parentid == TC_H_ROOT)
1817 return -EINVAL;
1818
Patrick McHardy1e904742008-01-22 22:11:17 -08001819 if (tb[TCA_CBQ_WRROPT] == NULL || tb[TCA_CBQ_RATE] == NULL ||
1820 tb[TCA_CBQ_LSSOPT] == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001821 return -EINVAL;
1822
Patrick McHardy1e904742008-01-22 22:11:17 -08001823 rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001824 if (rtab == NULL)
1825 return -EINVAL;
1826
1827 if (classid) {
1828 err = -EINVAL;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001829 if (TC_H_MAJ(classid ^ sch->handle) ||
1830 cbq_class_lookup(q, classid))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001831 goto failure;
1832 } else {
1833 int i;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001834 classid = TC_H_MAKE(sch->handle, 0x8000);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001835
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001836 for (i = 0; i < 0x8000; i++) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001837 if (++q->hgenerator >= 0x8000)
1838 q->hgenerator = 1;
1839 if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
1840 break;
1841 }
1842 err = -ENOSR;
1843 if (i >= 0x8000)
1844 goto failure;
1845 classid = classid|q->hgenerator;
1846 }
1847
1848 parent = &q->link;
1849 if (parentid) {
1850 parent = cbq_class_lookup(q, parentid);
1851 err = -EINVAL;
1852 if (parent == NULL)
1853 goto failure;
1854 }
1855
1856 err = -ENOBUFS;
Panagiotis Issaris0da974f2006-07-21 14:51:30 -07001857 cl = kzalloc(sizeof(*cl), GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001858 if (cl == NULL)
1859 goto failure;
Stephen Hemminger71bcb092008-11-25 21:13:31 -08001860
1861 if (tca[TCA_RATE]) {
1862 err = gen_new_estimator(&cl->bstats, &cl->rate_est,
1863 qdisc_root_sleeping_lock(sch),
1864 tca[TCA_RATE]);
1865 if (err) {
1866 kfree(cl);
1867 goto failure;
1868 }
1869 }
1870
Linus Torvalds1da177e2005-04-16 15:20:36 -07001871 cl->R_tab = rtab;
1872 rtab = NULL;
1873 cl->refcnt = 1;
Changli Gao3511c912010-10-16 13:04:08 +00001874 cl->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid);
1875 if (!cl->q)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001876 cl->q = &noop_qdisc;
Patrick McHardyd77fea22008-07-05 23:22:05 -07001877 cl->common.classid = classid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001878 cl->tparent = parent;
1879 cl->qdisc = sch;
1880 cl->allot = parent->allot;
1881 cl->quantum = cl->allot;
1882 cl->weight = cl->R_tab->rate.rate;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001883
1884 sch_tree_lock(sch);
1885 cbq_link_class(cl);
1886 cl->borrow = cl->tparent;
1887 if (cl->tparent != &q->link)
1888 cl->share = cl->tparent;
1889 cbq_adjust_levels(parent);
1890 cl->minidle = -0x7FFFFFFF;
Patrick McHardy1e904742008-01-22 22:11:17 -08001891 cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1892 cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001893 if (cl->ewma_log == 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001894 cl->ewma_log = q->link.ewma_log;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001895 if (cl->maxidle == 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001896 cl->maxidle = q->link.maxidle;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001897 if (cl->avpkt == 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001898 cl->avpkt = q->link.avpkt;
1899 cl->overlimit = cbq_ovl_classic;
Patrick McHardy1e904742008-01-22 22:11:17 -08001900 if (tb[TCA_CBQ_OVL_STRATEGY])
1901 cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
Patrick McHardyc3bc7cf2007-07-15 00:03:05 -07001902#ifdef CONFIG_NET_CLS_ACT
Patrick McHardy1e904742008-01-22 22:11:17 -08001903 if (tb[TCA_CBQ_POLICE])
1904 cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001905#endif
Patrick McHardy1e904742008-01-22 22:11:17 -08001906 if (tb[TCA_CBQ_FOPT])
1907 cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001908 sch_tree_unlock(sch);
1909
Patrick McHardyd77fea22008-07-05 23:22:05 -07001910 qdisc_class_hash_grow(sch, &q->clhash);
1911
Linus Torvalds1da177e2005-04-16 15:20:36 -07001912 *arg = (unsigned long)cl;
1913 return 0;
1914
1915failure:
1916 qdisc_put_rtab(rtab);
1917 return err;
1918}
1919
1920static int cbq_delete(struct Qdisc *sch, unsigned long arg)
1921{
1922 struct cbq_sched_data *q = qdisc_priv(sch);
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001923 struct cbq_class *cl = (struct cbq_class *)arg;
Jarek Poplawskia37ef2e2006-12-08 00:25:55 -08001924 unsigned int qlen;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001925
1926 if (cl->filters || cl->children || cl == &q->link)
1927 return -EBUSY;
1928
1929 sch_tree_lock(sch);
1930
Jarek Poplawskia37ef2e2006-12-08 00:25:55 -08001931 qlen = cl->q->q.qlen;
1932 qdisc_reset(cl->q);
1933 qdisc_tree_decrease_qlen(cl->q, qlen);
1934
Linus Torvalds1da177e2005-04-16 15:20:36 -07001935 if (cl->next_alive)
1936 cbq_deactivate_class(cl);
1937
1938 if (q->tx_borrowed == cl)
1939 q->tx_borrowed = q->tx_class;
1940 if (q->tx_class == cl) {
1941 q->tx_class = NULL;
1942 q->tx_borrowed = NULL;
1943 }
Patrick McHardyc3bc7cf2007-07-15 00:03:05 -07001944#ifdef CONFIG_NET_CLS_ACT
Linus Torvalds1da177e2005-04-16 15:20:36 -07001945 if (q->rx_class == cl)
1946 q->rx_class = NULL;
1947#endif
1948
1949 cbq_unlink_class(cl);
1950 cbq_adjust_levels(cl->tparent);
1951 cl->defmap = 0;
1952 cbq_sync_defmap(cl);
1953
1954 cbq_rmprio(q, cl);
1955 sch_tree_unlock(sch);
1956
Jarek Poplawski7cd0a632009-03-15 20:00:19 -07001957 BUG_ON(--cl->refcnt == 0);
1958 /*
1959 * This shouldn't happen: we "hold" one cops->get() when called
1960 * from tc_ctl_tclass; the destroy method is done from cops->put().
1961 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001962
1963 return 0;
1964}
1965
1966static struct tcf_proto **cbq_find_tcf(struct Qdisc *sch, unsigned long arg)
1967{
1968 struct cbq_sched_data *q = qdisc_priv(sch);
1969 struct cbq_class *cl = (struct cbq_class *)arg;
1970
1971 if (cl == NULL)
1972 cl = &q->link;
1973
1974 return &cl->filter_list;
1975}
1976
1977static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
1978 u32 classid)
1979{
1980 struct cbq_sched_data *q = qdisc_priv(sch);
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001981 struct cbq_class *p = (struct cbq_class *)parent;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001982 struct cbq_class *cl = cbq_class_lookup(q, classid);
1983
1984 if (cl) {
1985 if (p && p->level <= cl->level)
1986 return 0;
1987 cl->filters++;
1988 return (unsigned long)cl;
1989 }
1990 return 0;
1991}
1992
1993static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
1994{
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001995 struct cbq_class *cl = (struct cbq_class *)arg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001996
1997 cl->filters--;
1998}
1999
2000static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2001{
2002 struct cbq_sched_data *q = qdisc_priv(sch);
Patrick McHardyd77fea22008-07-05 23:22:05 -07002003 struct cbq_class *cl;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00002004 unsigned int h;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002005
2006 if (arg->stop)
2007 return;
2008
Patrick McHardyd77fea22008-07-05 23:22:05 -07002009 for (h = 0; h < q->clhash.hashsize; h++) {
Sasha Levinb67bfe02013-02-27 17:06:00 -08002010 hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002011 if (arg->count < arg->skip) {
2012 arg->count++;
2013 continue;
2014 }
2015 if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2016 arg->stop = 1;
2017 return;
2018 }
2019 arg->count++;
2020 }
2021 }
2022}
2023
Eric Dumazet20fea082007-11-14 01:44:41 -08002024static const struct Qdisc_class_ops cbq_class_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002025 .graft = cbq_graft,
2026 .leaf = cbq_leaf,
Jarek Poplawskia37ef2e2006-12-08 00:25:55 -08002027 .qlen_notify = cbq_qlen_notify,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002028 .get = cbq_get,
2029 .put = cbq_put,
2030 .change = cbq_change_class,
2031 .delete = cbq_delete,
2032 .walk = cbq_walk,
2033 .tcf_chain = cbq_find_tcf,
2034 .bind_tcf = cbq_bind_filter,
2035 .unbind_tcf = cbq_unbind_filter,
2036 .dump = cbq_dump_class,
2037 .dump_stats = cbq_dump_class_stats,
2038};
2039
Eric Dumazet20fea082007-11-14 01:44:41 -08002040static struct Qdisc_ops cbq_qdisc_ops __read_mostly = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002041 .next = NULL,
2042 .cl_ops = &cbq_class_ops,
2043 .id = "cbq",
2044 .priv_size = sizeof(struct cbq_sched_data),
2045 .enqueue = cbq_enqueue,
2046 .dequeue = cbq_dequeue,
Jarek Poplawski77be1552008-10-31 00:47:01 -07002047 .peek = qdisc_peek_dequeued,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002048 .drop = cbq_drop,
2049 .init = cbq_init,
2050 .reset = cbq_reset,
2051 .destroy = cbq_destroy,
2052 .change = NULL,
2053 .dump = cbq_dump,
2054 .dump_stats = cbq_dump_stats,
2055 .owner = THIS_MODULE,
2056};
2057
2058static int __init cbq_module_init(void)
2059{
2060 return register_qdisc(&cbq_qdisc_ops);
2061}
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09002062static void __exit cbq_module_exit(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002063{
2064 unregister_qdisc(&cbq_qdisc_ops);
2065}
2066module_init(cbq_module_init)
2067module_exit(cbq_module_exit)
2068MODULE_LICENSE("GPL");