blob: 60432c3d3cd4f09df2d9d5691fedf50f43562e02 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * net/sched/sch_cbq.c Class-Based Queueing discipline.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 *
11 */
12
Linus Torvalds1da177e2005-04-16 15:20:36 -070013#include <linux/module.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090014#include <linux/slab.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070015#include <linux/types.h>
16#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070017#include <linux/string.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070018#include <linux/errno.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070019#include <linux/skbuff.h>
Patrick McHardy0ba48052007-07-02 22:49:07 -070020#include <net/netlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070021#include <net/pkt_sched.h>
22
23
24/* Class-Based Queueing (CBQ) algorithm.
25 =======================================
26
27 Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +090028 Management Models for Packet Networks",
Linus Torvalds1da177e2005-04-16 15:20:36 -070029 IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
30
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +090031 [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
Linus Torvalds1da177e2005-04-16 15:20:36 -070032
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +090033 [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
Linus Torvalds1da177e2005-04-16 15:20:36 -070034 Parameters", 1996
35
36 [4] Sally Floyd and Michael Speer, "Experimental Results
37 for Class-Based Queueing", 1998, not published.
38
39 -----------------------------------------------------------------------
40
41 Algorithm skeleton was taken from NS simulator cbq.cc.
42 If someone wants to check this code against the LBL version,
43 he should take into account that ONLY the skeleton was borrowed,
44 the implementation is different. Particularly:
45
46 --- The WRR algorithm is different. Our version looks more
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +090047 reasonable (I hope) and works when quanta are allowed to be
48 less than MTU, which is always the case when real time classes
49 have small rates. Note, that the statement of [3] is
50 incomplete, delay may actually be estimated even if class
51 per-round allotment is less than MTU. Namely, if per-round
52 allotment is W*r_i, and r_1+...+r_k = r < 1
Linus Torvalds1da177e2005-04-16 15:20:36 -070053
54 delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
55
56 In the worst case we have IntServ estimate with D = W*r+k*MTU
57 and C = MTU*r. The proof (if correct at all) is trivial.
58
59
60 --- It seems that cbq-2.0 is not very accurate. At least, I cannot
61 interpret some places, which look like wrong translations
62 from NS. Anyone is advised to find these differences
63 and explain to me, why I am wrong 8).
64
65 --- Linux has no EOI event, so that we cannot estimate true class
66 idle time. Workaround is to consider the next dequeue event
67 as sign that previous packet is finished. This is wrong because of
68 internal device queueing, but on a permanently loaded link it is true.
69 Moreover, combined with clock integrator, this scheme looks
70 very close to an ideal solution. */
71
72struct cbq_sched_data;
73
74
Eric Dumazetcc7ec452011-01-19 19:26:56 +000075struct cbq_class {
Patrick McHardyd77fea22008-07-05 23:22:05 -070076 struct Qdisc_class_common common;
Linus Torvalds1da177e2005-04-16 15:20:36 -070077 struct cbq_class *next_alive; /* next class with backlog in this priority band */
78
79/* Parameters */
Linus Torvalds1da177e2005-04-16 15:20:36 -070080 unsigned char priority; /* class priority */
81 unsigned char priority2; /* priority to be used after overlimit */
82 unsigned char ewma_log; /* time constant for idle time calculation */
83 unsigned char ovl_strategy;
Patrick McHardyc3bc7cf2007-07-15 00:03:05 -070084#ifdef CONFIG_NET_CLS_ACT
Linus Torvalds1da177e2005-04-16 15:20:36 -070085 unsigned char police;
86#endif
87
88 u32 defmap;
89
90 /* Link-sharing scheduler parameters */
91 long maxidle; /* Class parameters: see below. */
92 long offtime;
93 long minidle;
94 u32 avpkt;
95 struct qdisc_rate_table *R_tab;
96
97 /* Overlimit strategy parameters */
98 void (*overlimit)(struct cbq_class *cl);
Patrick McHardy1a13cb62007-03-16 01:22:20 -070099 psched_tdiff_t penalty;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700100
101 /* General scheduler (WRR) parameters */
102 long allot;
103 long quantum; /* Allotment per WRR round */
104 long weight; /* Relative allotment: see below */
105
106 struct Qdisc *qdisc; /* Ptr to CBQ discipline */
107 struct cbq_class *split; /* Ptr to split node */
108 struct cbq_class *share; /* Ptr to LS parent in the class tree */
109 struct cbq_class *tparent; /* Ptr to tree parent in the class tree */
110 struct cbq_class *borrow; /* NULL if class is bandwidth limited;
111 parent otherwise */
112 struct cbq_class *sibling; /* Sibling chain */
113 struct cbq_class *children; /* Pointer to children chain */
114
115 struct Qdisc *q; /* Elementary queueing discipline */
116
117
118/* Variables */
119 unsigned char cpriority; /* Effective priority */
120 unsigned char delayed;
121 unsigned char level; /* level of the class in hierarchy:
122 0 for leaf classes, and maximal
123 level of children + 1 for nodes.
124 */
125
126 psched_time_t last; /* Last end of service */
127 psched_time_t undertime;
128 long avgidle;
129 long deficit; /* Saved deficit for WRR */
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700130 psched_time_t penalized;
Eric Dumazetc1a8f1f2009-08-16 09:36:49 +0000131 struct gnet_stats_basic_packed bstats;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700132 struct gnet_stats_queue qstats;
Eric Dumazet45203a32013-06-06 08:43:22 -0700133 struct gnet_stats_rate_est64 rate_est;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700134 struct tc_cbq_xstats xstats;
135
John Fastabend25d8c0d2014-09-12 20:05:27 -0700136 struct tcf_proto __rcu *filter_list;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700137
138 int refcnt;
139 int filters;
140
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000141 struct cbq_class *defaults[TC_PRIO_MAX + 1];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700142};
143
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000144struct cbq_sched_data {
Patrick McHardyd77fea22008-07-05 23:22:05 -0700145 struct Qdisc_class_hash clhash; /* Hash table of all classes */
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000146 int nclasses[TC_CBQ_MAXPRIO + 1];
147 unsigned int quanta[TC_CBQ_MAXPRIO + 1];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700148
149 struct cbq_class link;
150
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000151 unsigned int activemask;
152 struct cbq_class *active[TC_CBQ_MAXPRIO + 1]; /* List of all classes
Linus Torvalds1da177e2005-04-16 15:20:36 -0700153 with backlog */
154
Patrick McHardyc3bc7cf2007-07-15 00:03:05 -0700155#ifdef CONFIG_NET_CLS_ACT
Linus Torvalds1da177e2005-04-16 15:20:36 -0700156 struct cbq_class *rx_class;
157#endif
158 struct cbq_class *tx_class;
159 struct cbq_class *tx_borrowed;
160 int tx_len;
161 psched_time_t now; /* Cached timestamp */
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000162 unsigned int pmask;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700163
David S. Miller2fbd3da2009-09-01 17:59:25 -0700164 struct hrtimer delay_timer;
Patrick McHardy88a99352007-03-16 01:21:11 -0700165 struct qdisc_watchdog watchdog; /* Watchdog timer,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700166 started when CBQ has
167 backlog, but cannot
168 transmit just now */
Patrick McHardy88a99352007-03-16 01:21:11 -0700169 psched_tdiff_t wd_expires;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700170 int toplevel;
171 u32 hgenerator;
172};
173
174
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000175#define L2T(cl, len) qdisc_l2t((cl)->R_tab, len)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700176
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000177static inline struct cbq_class *
Linus Torvalds1da177e2005-04-16 15:20:36 -0700178cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
179{
Patrick McHardyd77fea22008-07-05 23:22:05 -0700180 struct Qdisc_class_common *clc;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700181
Patrick McHardyd77fea22008-07-05 23:22:05 -0700182 clc = qdisc_class_find(&q->clhash, classid);
183 if (clc == NULL)
184 return NULL;
185 return container_of(clc, struct cbq_class, common);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700186}
187
Patrick McHardyc3bc7cf2007-07-15 00:03:05 -0700188#ifdef CONFIG_NET_CLS_ACT
Linus Torvalds1da177e2005-04-16 15:20:36 -0700189
190static struct cbq_class *
191cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
192{
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000193 struct cbq_class *cl;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700194
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000195 for (cl = this->tparent; cl; cl = cl->tparent) {
196 struct cbq_class *new = cl->defaults[TC_PRIO_BESTEFFORT];
197
198 if (new != NULL && new != this)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700199 return new;
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000200 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700201 return NULL;
202}
203
204#endif
205
206/* Classify packet. The procedure is pretty complicated, but
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000207 * it allows us to combine link sharing and priority scheduling
208 * transparently.
209 *
210 * Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
211 * so that it resolves to split nodes. Then packets are classified
212 * by logical priority, or a more specific classifier may be attached
213 * to the split node.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700214 */
215
216static struct cbq_class *
217cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
218{
219 struct cbq_sched_data *q = qdisc_priv(sch);
220 struct cbq_class *head = &q->link;
221 struct cbq_class **defmap;
222 struct cbq_class *cl = NULL;
223 u32 prio = skb->priority;
John Fastabend25d8c0d2014-09-12 20:05:27 -0700224 struct tcf_proto *fl;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700225 struct tcf_result res;
226
227 /*
228 * Step 1. If skb->priority points to one of our classes, use it.
229 */
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000230 if (TC_H_MAJ(prio ^ sch->handle) == 0 &&
Linus Torvalds1da177e2005-04-16 15:20:36 -0700231 (cl = cbq_class_lookup(q, prio)) != NULL)
232 return cl;
233
Jarek Poplawskic27f3392008-08-04 22:39:11 -0700234 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700235 for (;;) {
236 int result = 0;
237 defmap = head->defaults;
238
John Fastabend25d8c0d2014-09-12 20:05:27 -0700239 fl = rcu_dereference_bh(head->filter_list);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700240 /*
241 * Step 2+n. Apply classifier.
242 */
John Fastabend25d8c0d2014-09-12 20:05:27 -0700243 result = tc_classify_compat(skb, fl, &res);
244 if (!fl || result < 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700245 goto fallback;
246
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000247 cl = (void *)res.class;
248 if (!cl) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700249 if (TC_H_MAJ(res.classid))
250 cl = cbq_class_lookup(q, res.classid);
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000251 else if ((cl = defmap[res.classid & TC_PRIO_MAX]) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700252 cl = defmap[TC_PRIO_BESTEFFORT];
253
Eric Dumazetbdfc87f2012-09-11 13:11:12 +0000254 if (cl == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700255 goto fallback;
256 }
Eric Dumazetbdfc87f2012-09-11 13:11:12 +0000257 if (cl->level >= head->level)
258 goto fallback;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700259#ifdef CONFIG_NET_CLS_ACT
260 switch (result) {
261 case TC_ACT_QUEUED:
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900262 case TC_ACT_STOLEN:
Jarek Poplawski378a2f02008-08-04 22:31:03 -0700263 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700264 case TC_ACT_SHOT:
265 return NULL;
Patrick McHardy73ca4912007-07-15 00:02:31 -0700266 case TC_ACT_RECLASSIFY:
267 return cbq_reclassify(skb, cl);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700268 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700269#endif
270 if (cl->level == 0)
271 return cl;
272
273 /*
274 * Step 3+n. If classifier selected a link sharing class,
275 * apply agency specific classifier.
276 * Repeat this procdure until we hit a leaf node.
277 */
278 head = cl;
279 }
280
281fallback:
282 cl = head;
283
284 /*
285 * Step 4. No success...
286 */
287 if (TC_H_MAJ(prio) == 0 &&
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000288 !(cl = head->defaults[prio & TC_PRIO_MAX]) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -0700289 !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
290 return head;
291
292 return cl;
293}
294
295/*
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000296 * A packet has just been enqueued on the empty class.
297 * cbq_activate_class adds it to the tail of active class list
298 * of its priority band.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700299 */
300
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000301static inline void cbq_activate_class(struct cbq_class *cl)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700302{
303 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
304 int prio = cl->cpriority;
305 struct cbq_class *cl_tail;
306
307 cl_tail = q->active[prio];
308 q->active[prio] = cl;
309
310 if (cl_tail != NULL) {
311 cl->next_alive = cl_tail->next_alive;
312 cl_tail->next_alive = cl;
313 } else {
314 cl->next_alive = cl;
315 q->activemask |= (1<<prio);
316 }
317}
318
319/*
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000320 * Unlink class from active chain.
321 * Note that this same procedure is done directly in cbq_dequeue*
322 * during round-robin procedure.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700323 */
324
325static void cbq_deactivate_class(struct cbq_class *this)
326{
327 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
328 int prio = this->cpriority;
329 struct cbq_class *cl;
330 struct cbq_class *cl_prev = q->active[prio];
331
332 do {
333 cl = cl_prev->next_alive;
334 if (cl == this) {
335 cl_prev->next_alive = cl->next_alive;
336 cl->next_alive = NULL;
337
338 if (cl == q->active[prio]) {
339 q->active[prio] = cl_prev;
340 if (cl == q->active[prio]) {
341 q->active[prio] = NULL;
342 q->activemask &= ~(1<<prio);
343 return;
344 }
345 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700346 return;
347 }
348 } while ((cl_prev = cl) != q->active[prio]);
349}
350
351static void
352cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
353{
354 int toplevel = q->toplevel;
355
Eric Dumazetfd245a42011-01-20 05:27:16 +0000356 if (toplevel > cl->level && !(qdisc_is_throttled(cl->q))) {
Vasily Averin7201c1d2014-08-14 12:27:59 +0400357 psched_time_t now = psched_get_time();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700358
359 do {
Patrick McHardy104e0872007-03-23 11:28:07 -0700360 if (cl->undertime < now) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700361 q->toplevel = cl->level;
362 return;
363 }
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000364 } while ((cl = cl->borrow) != NULL && toplevel > cl->level);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700365 }
366}
367
368static int
369cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
370{
371 struct cbq_sched_data *q = qdisc_priv(sch);
Satyam Sharmaddeee3c2007-09-16 14:54:05 -0700372 int uninitialized_var(ret);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700373 struct cbq_class *cl = cbq_classify(skb, sch, &ret);
374
Patrick McHardyc3bc7cf2007-07-15 00:03:05 -0700375#ifdef CONFIG_NET_CLS_ACT
Linus Torvalds1da177e2005-04-16 15:20:36 -0700376 q->rx_class = cl;
377#endif
378 if (cl == NULL) {
Jarek Poplawskic27f3392008-08-04 22:39:11 -0700379 if (ret & __NET_XMIT_BYPASS)
John Fastabend25331d62014-09-28 11:53:29 -0700380 qdisc_qstats_drop(sch);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700381 kfree_skb(skb);
382 return ret;
383 }
384
Patrick McHardyc3bc7cf2007-07-15 00:03:05 -0700385#ifdef CONFIG_NET_CLS_ACT
Linus Torvalds1da177e2005-04-16 15:20:36 -0700386 cl->q->__parent = sch;
387#endif
Jussi Kivilinna5f861732008-07-20 00:08:04 -0700388 ret = qdisc_enqueue(skb, cl->q);
389 if (ret == NET_XMIT_SUCCESS) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700390 sch->q.qlen++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700391 cbq_mark_toplevel(q, cl);
392 if (!cl->next_alive)
393 cbq_activate_class(cl);
394 return ret;
395 }
396
Jarek Poplawski378a2f02008-08-04 22:31:03 -0700397 if (net_xmit_drop_count(ret)) {
John Fastabend25331d62014-09-28 11:53:29 -0700398 qdisc_qstats_drop(sch);
Jarek Poplawski378a2f02008-08-04 22:31:03 -0700399 cbq_mark_toplevel(q, cl);
400 cl->qstats.drops++;
401 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700402 return ret;
403}
404
Linus Torvalds1da177e2005-04-16 15:20:36 -0700405/* Overlimit actions */
406
407/* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
408
409static void cbq_ovl_classic(struct cbq_class *cl)
410{
411 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
Patrick McHardy8edc0c32007-03-23 11:28:55 -0700412 psched_tdiff_t delay = cl->undertime - q->now;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700413
414 if (!cl->delayed) {
415 delay += cl->offtime;
416
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900417 /*
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000418 * Class goes to sleep, so that it will have no
419 * chance to work avgidle. Let's forgive it 8)
420 *
421 * BTW cbq-2.0 has a crap in this
422 * place, apparently they forgot to shift it by cl->ewma_log.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700423 */
424 if (cl->avgidle < 0)
425 delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
426 if (cl->avgidle < cl->minidle)
427 cl->avgidle = cl->minidle;
428 if (delay <= 0)
429 delay = 1;
Patrick McHardy7c59e252007-03-23 11:27:45 -0700430 cl->undertime = q->now + delay;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700431
432 cl->xstats.overactions++;
433 cl->delayed = 1;
434 }
435 if (q->wd_expires == 0 || q->wd_expires > delay)
436 q->wd_expires = delay;
437
438 /* Dirty work! We must schedule wakeups based on
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000439 * real available rate, rather than leaf rate,
440 * which may be tiny (even zero).
Linus Torvalds1da177e2005-04-16 15:20:36 -0700441 */
442 if (q->toplevel == TC_CBQ_MAXLEVEL) {
443 struct cbq_class *b;
444 psched_tdiff_t base_delay = q->wd_expires;
445
446 for (b = cl->borrow; b; b = b->borrow) {
Patrick McHardy8edc0c32007-03-23 11:28:55 -0700447 delay = b->undertime - q->now;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700448 if (delay < base_delay) {
449 if (delay <= 0)
450 delay = 1;
451 base_delay = delay;
452 }
453 }
454
455 q->wd_expires = base_delay;
456 }
457}
458
459/* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000460 * they go overlimit
Linus Torvalds1da177e2005-04-16 15:20:36 -0700461 */
462
463static void cbq_ovl_rclassic(struct cbq_class *cl)
464{
465 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
466 struct cbq_class *this = cl;
467
468 do {
469 if (cl->level > q->toplevel) {
470 cl = NULL;
471 break;
472 }
473 } while ((cl = cl->borrow) != NULL);
474
475 if (cl == NULL)
476 cl = this;
477 cbq_ovl_classic(cl);
478}
479
480/* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
481
482static void cbq_ovl_delay(struct cbq_class *cl)
483{
484 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
Patrick McHardy8edc0c32007-03-23 11:28:55 -0700485 psched_tdiff_t delay = cl->undertime - q->now;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700486
Jarek Poplawski2540e052008-08-21 05:11:14 -0700487 if (test_bit(__QDISC_STATE_DEACTIVATED,
488 &qdisc_root_sleeping(cl->qdisc)->state))
489 return;
490
Linus Torvalds1da177e2005-04-16 15:20:36 -0700491 if (!cl->delayed) {
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700492 psched_time_t sched = q->now;
493 ktime_t expires;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700494
495 delay += cl->offtime;
496 if (cl->avgidle < 0)
497 delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
498 if (cl->avgidle < cl->minidle)
499 cl->avgidle = cl->minidle;
Patrick McHardy7c59e252007-03-23 11:27:45 -0700500 cl->undertime = q->now + delay;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700501
502 if (delay > 0) {
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700503 sched += delay + cl->penalty;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700504 cl->penalized = sched;
505 cl->cpriority = TC_CBQ_MAXPRIO;
506 q->pmask |= (1<<TC_CBQ_MAXPRIO);
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700507
Eric Dumazet46baac32012-10-20 00:40:51 +0000508 expires = ns_to_ktime(PSCHED_TICKS2NS(sched));
David S. Miller2fbd3da2009-09-01 17:59:25 -0700509 if (hrtimer_try_to_cancel(&q->delay_timer) &&
510 ktime_to_ns(ktime_sub(
511 hrtimer_get_expires(&q->delay_timer),
512 expires)) > 0)
513 hrtimer_set_expires(&q->delay_timer, expires);
514 hrtimer_restart(&q->delay_timer);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700515 cl->delayed = 1;
516 cl->xstats.overactions++;
517 return;
518 }
519 delay = 1;
520 }
521 if (q->wd_expires == 0 || q->wd_expires > delay)
522 q->wd_expires = delay;
523}
524
525/* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
526
527static void cbq_ovl_lowprio(struct cbq_class *cl)
528{
529 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
530
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700531 cl->penalized = q->now + cl->penalty;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700532
533 if (cl->cpriority != cl->priority2) {
534 cl->cpriority = cl->priority2;
535 q->pmask |= (1<<cl->cpriority);
536 cl->xstats.overactions++;
537 }
538 cbq_ovl_classic(cl);
539}
540
541/* TC_CBQ_OVL_DROP: penalize class by dropping */
542
543static void cbq_ovl_drop(struct cbq_class *cl)
544{
545 if (cl->q->ops->drop)
546 if (cl->q->ops->drop(cl->q))
547 cl->qdisc->q.qlen--;
548 cl->xstats.overactions++;
549 cbq_ovl_classic(cl);
550}
551
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700552static psched_tdiff_t cbq_undelay_prio(struct cbq_sched_data *q, int prio,
553 psched_time_t now)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700554{
555 struct cbq_class *cl;
556 struct cbq_class *cl_prev = q->active[prio];
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700557 psched_time_t sched = now;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700558
559 if (cl_prev == NULL)
Patrick McHardye9054a32007-03-16 01:21:40 -0700560 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700561
562 do {
563 cl = cl_prev->next_alive;
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700564 if (now - cl->penalized > 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700565 cl_prev->next_alive = cl->next_alive;
566 cl->next_alive = NULL;
567 cl->cpriority = cl->priority;
568 cl->delayed = 0;
569 cbq_activate_class(cl);
570
571 if (cl == q->active[prio]) {
572 q->active[prio] = cl_prev;
573 if (cl == q->active[prio]) {
574 q->active[prio] = NULL;
575 return 0;
576 }
577 }
578
579 cl = cl_prev->next_alive;
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700580 } else if (sched - cl->penalized > 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700581 sched = cl->penalized;
582 } while ((cl_prev = cl) != q->active[prio]);
583
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700584 return sched - now;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700585}
586
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700587static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700588{
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700589 struct cbq_sched_data *q = container_of(timer, struct cbq_sched_data,
David S. Miller2fbd3da2009-09-01 17:59:25 -0700590 delay_timer);
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700591 struct Qdisc *sch = q->watchdog.qdisc;
592 psched_time_t now;
593 psched_tdiff_t delay = 0;
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000594 unsigned int pmask;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700595
Patrick McHardy3bebcda2007-03-23 11:29:25 -0700596 now = psched_get_time();
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700597
Linus Torvalds1da177e2005-04-16 15:20:36 -0700598 pmask = q->pmask;
599 q->pmask = 0;
600
601 while (pmask) {
602 int prio = ffz(~pmask);
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700603 psched_tdiff_t tmp;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700604
605 pmask &= ~(1<<prio);
606
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700607 tmp = cbq_undelay_prio(q, prio, now);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700608 if (tmp > 0) {
609 q->pmask |= 1<<prio;
610 if (tmp < delay || delay == 0)
611 delay = tmp;
612 }
613 }
614
615 if (delay) {
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700616 ktime_t time;
617
618 time = ktime_set(0, 0);
Jarek Poplawskica44d6e2009-06-15 02:31:47 -0700619 time = ktime_add_ns(time, PSCHED_TICKS2NS(now + delay));
Eric Dumazet4a8e3202014-09-20 18:01:30 -0700620 hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS_PINNED);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700621 }
622
Eric Dumazetfd245a42011-01-20 05:27:16 +0000623 qdisc_unthrottled(sch);
David S. Miller8608db02008-08-18 20:51:18 -0700624 __netif_schedule(qdisc_root(sch));
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700625 return HRTIMER_NORESTART;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700626}
627
Patrick McHardyc3bc7cf2007-07-15 00:03:05 -0700628#ifdef CONFIG_NET_CLS_ACT
Linus Torvalds1da177e2005-04-16 15:20:36 -0700629static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
630{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700631 struct Qdisc *sch = child->__parent;
632 struct cbq_sched_data *q = qdisc_priv(sch);
633 struct cbq_class *cl = q->rx_class;
634
635 q->rx_class = NULL;
636
637 if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
Jarek Poplawski378a2f02008-08-04 22:31:03 -0700638 int ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700639
640 cbq_mark_toplevel(q, cl);
641
642 q->rx_class = cl;
643 cl->q->__parent = sch;
644
Jarek Poplawski378a2f02008-08-04 22:31:03 -0700645 ret = qdisc_enqueue(skb, cl->q);
646 if (ret == NET_XMIT_SUCCESS) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700647 sch->q.qlen++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700648 if (!cl->next_alive)
649 cbq_activate_class(cl);
650 return 0;
651 }
Jarek Poplawski378a2f02008-08-04 22:31:03 -0700652 if (net_xmit_drop_count(ret))
John Fastabend25331d62014-09-28 11:53:29 -0700653 qdisc_qstats_drop(sch);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700654 return 0;
655 }
656
John Fastabend25331d62014-09-28 11:53:29 -0700657 qdisc_qstats_drop(sch);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700658 return -1;
659}
660#endif
661
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900662/*
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000663 * It is mission critical procedure.
664 *
665 * We "regenerate" toplevel cutoff, if transmitting class
666 * has backlog and it is not regulated. It is not part of
667 * original CBQ description, but looks more reasonable.
668 * Probably, it is wrong. This question needs further investigation.
669 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700670
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000671static inline void
Linus Torvalds1da177e2005-04-16 15:20:36 -0700672cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
673 struct cbq_class *borrowed)
674{
675 if (cl && q->toplevel >= borrowed->level) {
676 if (cl->q->q.qlen > 1) {
677 do {
Patrick McHardya0849802007-03-23 11:28:30 -0700678 if (borrowed->undertime == PSCHED_PASTPERFECT) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700679 q->toplevel = borrowed->level;
680 return;
681 }
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000682 } while ((borrowed = borrowed->borrow) != NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700683 }
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900684#if 0
Linus Torvalds1da177e2005-04-16 15:20:36 -0700685 /* It is not necessary now. Uncommenting it
686 will save CPU cycles, but decrease fairness.
687 */
688 q->toplevel = TC_CBQ_MAXLEVEL;
689#endif
690 }
691}
692
693static void
694cbq_update(struct cbq_sched_data *q)
695{
696 struct cbq_class *this = q->tx_class;
697 struct cbq_class *cl = this;
698 int len = q->tx_len;
Vasily Averin73d0f372014-08-14 12:27:47 +0400699 psched_time_t now;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700700
701 q->tx_class = NULL;
Vasily Averin73d0f372014-08-14 12:27:47 +0400702 /* Time integrator. We calculate EOS time
703 * by adding expected packet transmission time.
704 */
705 now = q->now + L2T(&q->link, len);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700706
707 for ( ; cl; cl = cl->share) {
708 long avgidle = cl->avgidle;
709 long idle;
710
711 cl->bstats.packets++;
712 cl->bstats.bytes += len;
713
714 /*
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000715 * (now - last) is total time between packet right edges.
716 * (last_pktlen/rate) is "virtual" busy time, so that
717 *
718 * idle = (now - last) - last_pktlen/rate
Linus Torvalds1da177e2005-04-16 15:20:36 -0700719 */
720
Vasily Averin73d0f372014-08-14 12:27:47 +0400721 idle = now - cl->last;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700722 if ((unsigned long)idle > 128*1024*1024) {
723 avgidle = cl->maxidle;
724 } else {
725 idle -= L2T(cl, len);
726
727 /* true_avgidle := (1-W)*true_avgidle + W*idle,
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000728 * where W=2^{-ewma_log}. But cl->avgidle is scaled:
729 * cl->avgidle == true_avgidle/W,
730 * hence:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700731 */
732 avgidle += idle - (avgidle>>cl->ewma_log);
733 }
734
735 if (avgidle <= 0) {
736 /* Overlimit or at-limit */
737
738 if (avgidle < cl->minidle)
739 avgidle = cl->minidle;
740
741 cl->avgidle = avgidle;
742
743 /* Calculate expected time, when this class
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000744 * will be allowed to send.
745 * It will occur, when:
746 * (1-W)*true_avgidle + W*delay = 0, i.e.
747 * idle = (1/W - 1)*(-true_avgidle)
748 * or
749 * idle = (1 - W)*(-cl->avgidle);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700750 */
751 idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
752
753 /*
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000754 * That is not all.
755 * To maintain the rate allocated to the class,
756 * we add to undertime virtual clock,
757 * necessary to complete transmitted packet.
758 * (len/phys_bandwidth has been already passed
759 * to the moment of cbq_update)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700760 */
761
762 idle -= L2T(&q->link, len);
763 idle += L2T(cl, len);
764
Vasily Averin73d0f372014-08-14 12:27:47 +0400765 cl->undertime = now + idle;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700766 } else {
767 /* Underlimit */
768
Patrick McHardya0849802007-03-23 11:28:30 -0700769 cl->undertime = PSCHED_PASTPERFECT;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700770 if (avgidle > cl->maxidle)
771 cl->avgidle = cl->maxidle;
772 else
773 cl->avgidle = avgidle;
774 }
Vasily Averin73d0f372014-08-14 12:27:47 +0400775 if ((s64)(now - cl->last) > 0)
776 cl->last = now;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700777 }
778
779 cbq_update_toplevel(q, this, q->tx_borrowed);
780}
781
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000782static inline struct cbq_class *
Linus Torvalds1da177e2005-04-16 15:20:36 -0700783cbq_under_limit(struct cbq_class *cl)
784{
785 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
786 struct cbq_class *this_cl = cl;
787
788 if (cl->tparent == NULL)
789 return cl;
790
Patrick McHardya0849802007-03-23 11:28:30 -0700791 if (cl->undertime == PSCHED_PASTPERFECT || q->now >= cl->undertime) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700792 cl->delayed = 0;
793 return cl;
794 }
795
796 do {
797 /* It is very suspicious place. Now overlimit
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000798 * action is generated for not bounded classes
799 * only if link is completely congested.
800 * Though it is in agree with ancestor-only paradigm,
801 * it looks very stupid. Particularly,
802 * it means that this chunk of code will either
803 * never be called or result in strong amplification
804 * of burstiness. Dangerous, silly, and, however,
805 * no another solution exists.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700806 */
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000807 cl = cl->borrow;
808 if (!cl) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700809 this_cl->qstats.overlimits++;
810 this_cl->overlimit(this_cl);
811 return NULL;
812 }
813 if (cl->level > q->toplevel)
814 return NULL;
Patrick McHardya0849802007-03-23 11:28:30 -0700815 } while (cl->undertime != PSCHED_PASTPERFECT && q->now < cl->undertime);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700816
817 cl->delayed = 0;
818 return cl;
819}
820
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000821static inline struct sk_buff *
Linus Torvalds1da177e2005-04-16 15:20:36 -0700822cbq_dequeue_prio(struct Qdisc *sch, int prio)
823{
824 struct cbq_sched_data *q = qdisc_priv(sch);
825 struct cbq_class *cl_tail, *cl_prev, *cl;
826 struct sk_buff *skb;
827 int deficit;
828
829 cl_tail = cl_prev = q->active[prio];
830 cl = cl_prev->next_alive;
831
832 do {
833 deficit = 0;
834
835 /* Start round */
836 do {
837 struct cbq_class *borrow = cl;
838
839 if (cl->q->q.qlen &&
840 (borrow = cbq_under_limit(cl)) == NULL)
841 goto skip_class;
842
843 if (cl->deficit <= 0) {
844 /* Class exhausted its allotment per
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000845 * this round. Switch to the next one.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700846 */
847 deficit = 1;
848 cl->deficit += cl->quantum;
849 goto next_class;
850 }
851
852 skb = cl->q->dequeue(cl->q);
853
854 /* Class did not give us any skb :-(
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000855 * It could occur even if cl->q->q.qlen != 0
856 * f.e. if cl->q == "tbf"
Linus Torvalds1da177e2005-04-16 15:20:36 -0700857 */
858 if (skb == NULL)
859 goto skip_class;
860
Jussi Kivilinna0abf77e2008-07-20 00:08:27 -0700861 cl->deficit -= qdisc_pkt_len(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700862 q->tx_class = cl;
863 q->tx_borrowed = borrow;
864 if (borrow != cl) {
865#ifndef CBQ_XSTATS_BORROWS_BYTES
866 borrow->xstats.borrows++;
867 cl->xstats.borrows++;
868#else
Jussi Kivilinna0abf77e2008-07-20 00:08:27 -0700869 borrow->xstats.borrows += qdisc_pkt_len(skb);
870 cl->xstats.borrows += qdisc_pkt_len(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700871#endif
872 }
Jussi Kivilinna0abf77e2008-07-20 00:08:27 -0700873 q->tx_len = qdisc_pkt_len(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700874
875 if (cl->deficit <= 0) {
876 q->active[prio] = cl;
877 cl = cl->next_alive;
878 cl->deficit += cl->quantum;
879 }
880 return skb;
881
882skip_class:
883 if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
884 /* Class is empty or penalized.
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000885 * Unlink it from active chain.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700886 */
887 cl_prev->next_alive = cl->next_alive;
888 cl->next_alive = NULL;
889
890 /* Did cl_tail point to it? */
891 if (cl == cl_tail) {
892 /* Repair it! */
893 cl_tail = cl_prev;
894
895 /* Was it the last class in this band? */
896 if (cl == cl_tail) {
897 /* Kill the band! */
898 q->active[prio] = NULL;
899 q->activemask &= ~(1<<prio);
900 if (cl->q->q.qlen)
901 cbq_activate_class(cl);
902 return NULL;
903 }
904
905 q->active[prio] = cl_tail;
906 }
907 if (cl->q->q.qlen)
908 cbq_activate_class(cl);
909
910 cl = cl_prev;
911 }
912
913next_class:
914 cl_prev = cl;
915 cl = cl->next_alive;
916 } while (cl_prev != cl_tail);
917 } while (deficit);
918
919 q->active[prio] = cl_prev;
920
921 return NULL;
922}
923
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000924static inline struct sk_buff *
Linus Torvalds1da177e2005-04-16 15:20:36 -0700925cbq_dequeue_1(struct Qdisc *sch)
926{
927 struct cbq_sched_data *q = qdisc_priv(sch);
928 struct sk_buff *skb;
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000929 unsigned int activemask;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700930
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000931 activemask = q->activemask & 0xFF;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700932 while (activemask) {
933 int prio = ffz(~activemask);
934 activemask &= ~(1<<prio);
935 skb = cbq_dequeue_prio(sch, prio);
936 if (skb)
937 return skb;
938 }
939 return NULL;
940}
941
942static struct sk_buff *
943cbq_dequeue(struct Qdisc *sch)
944{
945 struct sk_buff *skb;
946 struct cbq_sched_data *q = qdisc_priv(sch);
947 psched_time_t now;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700948
Patrick McHardy3bebcda2007-03-23 11:29:25 -0700949 now = psched_get_time();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700950
Vasily Averin73d0f372014-08-14 12:27:47 +0400951 if (q->tx_class)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700952 cbq_update(q);
Vasily Averin73d0f372014-08-14 12:27:47 +0400953
954 q->now = now;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700955
956 for (;;) {
957 q->wd_expires = 0;
958
959 skb = cbq_dequeue_1(sch);
960 if (skb) {
Eric Dumazet9190b3b2011-01-20 23:31:33 -0800961 qdisc_bstats_update(sch, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700962 sch->q.qlen--;
Eric Dumazetfd245a42011-01-20 05:27:16 +0000963 qdisc_unthrottled(sch);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700964 return skb;
965 }
966
967 /* All the classes are overlimit.
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000968 *
969 * It is possible, if:
970 *
971 * 1. Scheduler is empty.
972 * 2. Toplevel cutoff inhibited borrowing.
973 * 3. Root class is overlimit.
974 *
975 * Reset 2d and 3d conditions and retry.
976 *
977 * Note, that NS and cbq-2.0 are buggy, peeking
978 * an arbitrary class is appropriate for ancestor-only
979 * sharing, but not for toplevel algorithm.
980 *
981 * Our version is better, but slower, because it requires
982 * two passes, but it is unavoidable with top-level sharing.
983 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700984
985 if (q->toplevel == TC_CBQ_MAXLEVEL &&
Patrick McHardya0849802007-03-23 11:28:30 -0700986 q->link.undertime == PSCHED_PASTPERFECT)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700987 break;
988
989 q->toplevel = TC_CBQ_MAXLEVEL;
Patrick McHardya0849802007-03-23 11:28:30 -0700990 q->link.undertime = PSCHED_PASTPERFECT;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700991 }
992
993 /* No packets in scheduler or nobody wants to give them to us :-(
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000994 * Sigh... start watchdog timer in the last case.
995 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700996
997 if (sch->q.qlen) {
John Fastabend25331d62014-09-28 11:53:29 -0700998 qdisc_qstats_overlimit(sch);
Patrick McHardy88a99352007-03-16 01:21:11 -0700999 if (q->wd_expires)
1000 qdisc_watchdog_schedule(&q->watchdog,
Patrick McHardybb239ac2007-03-16 12:31:28 -07001001 now + q->wd_expires);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001002 }
1003 return NULL;
1004}
1005
1006/* CBQ class maintanance routines */
1007
1008static void cbq_adjust_levels(struct cbq_class *this)
1009{
1010 if (this == NULL)
1011 return;
1012
1013 do {
1014 int level = 0;
1015 struct cbq_class *cl;
1016
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001017 cl = this->children;
1018 if (cl) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001019 do {
1020 if (cl->level > level)
1021 level = cl->level;
1022 } while ((cl = cl->sibling) != this->children);
1023 }
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001024 this->level = level + 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001025 } while ((this = this->tparent) != NULL);
1026}
1027
1028static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
1029{
1030 struct cbq_class *cl;
Patrick McHardyd77fea22008-07-05 23:22:05 -07001031 unsigned int h;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001032
1033 if (q->quanta[prio] == 0)
1034 return;
1035
Patrick McHardyd77fea22008-07-05 23:22:05 -07001036 for (h = 0; h < q->clhash.hashsize; h++) {
Sasha Levinb67bfe02013-02-27 17:06:00 -08001037 hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001038 /* BUGGGG... Beware! This expression suffer of
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001039 * arithmetic overflows!
Linus Torvalds1da177e2005-04-16 15:20:36 -07001040 */
1041 if (cl->priority == prio) {
1042 cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
1043 q->quanta[prio];
1044 }
Yang Yingliang833fa742013-12-10 20:55:32 +08001045 if (cl->quantum <= 0 ||
1046 cl->quantum > 32*qdisc_dev(cl->qdisc)->mtu) {
Yang Yingliangc17988a2013-12-23 17:38:58 +08001047 pr_warn("CBQ: class %08x has bad quantum==%ld, repaired.\n",
1048 cl->common.classid, cl->quantum);
David S. Miller5ce2d482008-07-08 17:06:30 -07001049 cl->quantum = qdisc_dev(cl->qdisc)->mtu/2 + 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001050 }
1051 }
1052 }
1053}
1054
1055static void cbq_sync_defmap(struct cbq_class *cl)
1056{
1057 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1058 struct cbq_class *split = cl->split;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001059 unsigned int h;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001060 int i;
1061
1062 if (split == NULL)
1063 return;
1064
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001065 for (i = 0; i <= TC_PRIO_MAX; i++) {
1066 if (split->defaults[i] == cl && !(cl->defmap & (1<<i)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001067 split->defaults[i] = NULL;
1068 }
1069
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001070 for (i = 0; i <= TC_PRIO_MAX; i++) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001071 int level = split->level;
1072
1073 if (split->defaults[i])
1074 continue;
1075
Patrick McHardyd77fea22008-07-05 23:22:05 -07001076 for (h = 0; h < q->clhash.hashsize; h++) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001077 struct cbq_class *c;
1078
Sasha Levinb67bfe02013-02-27 17:06:00 -08001079 hlist_for_each_entry(c, &q->clhash.hash[h],
Patrick McHardyd77fea22008-07-05 23:22:05 -07001080 common.hnode) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001081 if (c->split == split && c->level < level &&
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001082 c->defmap & (1<<i)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001083 split->defaults[i] = c;
1084 level = c->level;
1085 }
1086 }
1087 }
1088 }
1089}
1090
1091static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask)
1092{
1093 struct cbq_class *split = NULL;
1094
1095 if (splitid == 0) {
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001096 split = cl->split;
1097 if (!split)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001098 return;
Patrick McHardyd77fea22008-07-05 23:22:05 -07001099 splitid = split->common.classid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001100 }
1101
Patrick McHardyd77fea22008-07-05 23:22:05 -07001102 if (split == NULL || split->common.classid != splitid) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001103 for (split = cl->tparent; split; split = split->tparent)
Patrick McHardyd77fea22008-07-05 23:22:05 -07001104 if (split->common.classid == splitid)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001105 break;
1106 }
1107
1108 if (split == NULL)
1109 return;
1110
1111 if (cl->split != split) {
1112 cl->defmap = 0;
1113 cbq_sync_defmap(cl);
1114 cl->split = split;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001115 cl->defmap = def & mask;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001116 } else
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001117 cl->defmap = (cl->defmap & ~mask) | (def & mask);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001118
1119 cbq_sync_defmap(cl);
1120}
1121
1122static void cbq_unlink_class(struct cbq_class *this)
1123{
1124 struct cbq_class *cl, **clp;
1125 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1126
Patrick McHardyd77fea22008-07-05 23:22:05 -07001127 qdisc_class_hash_remove(&q->clhash, &this->common);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001128
1129 if (this->tparent) {
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001130 clp = &this->sibling;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001131 cl = *clp;
1132 do {
1133 if (cl == this) {
1134 *clp = cl->sibling;
1135 break;
1136 }
1137 clp = &cl->sibling;
1138 } while ((cl = *clp) != this->sibling);
1139
1140 if (this->tparent->children == this) {
1141 this->tparent->children = this->sibling;
1142 if (this->sibling == this)
1143 this->tparent->children = NULL;
1144 }
1145 } else {
Ilpo Järvinen547b7922008-07-25 21:43:18 -07001146 WARN_ON(this->sibling != this);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001147 }
1148}
1149
1150static void cbq_link_class(struct cbq_class *this)
1151{
1152 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001153 struct cbq_class *parent = this->tparent;
1154
1155 this->sibling = this;
Patrick McHardyd77fea22008-07-05 23:22:05 -07001156 qdisc_class_hash_insert(&q->clhash, &this->common);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001157
1158 if (parent == NULL)
1159 return;
1160
1161 if (parent->children == NULL) {
1162 parent->children = this;
1163 } else {
1164 this->sibling = parent->children->sibling;
1165 parent->children->sibling = this;
1166 }
1167}
1168
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001169static unsigned int cbq_drop(struct Qdisc *sch)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001170{
1171 struct cbq_sched_data *q = qdisc_priv(sch);
1172 struct cbq_class *cl, *cl_head;
1173 int prio;
1174 unsigned int len;
1175
1176 for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001177 cl_head = q->active[prio];
1178 if (!cl_head)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001179 continue;
1180
1181 cl = cl_head;
1182 do {
1183 if (cl->q->ops->drop && (len = cl->q->ops->drop(cl->q))) {
1184 sch->q.qlen--;
Jarek Poplawskia37ef2e2006-12-08 00:25:55 -08001185 if (!cl->q->q.qlen)
1186 cbq_deactivate_class(cl);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001187 return len;
1188 }
1189 } while ((cl = cl->next_alive) != cl_head);
1190 }
1191 return 0;
1192}
1193
1194static void
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001195cbq_reset(struct Qdisc *sch)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001196{
1197 struct cbq_sched_data *q = qdisc_priv(sch);
1198 struct cbq_class *cl;
1199 int prio;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001200 unsigned int h;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001201
1202 q->activemask = 0;
1203 q->pmask = 0;
1204 q->tx_class = NULL;
1205 q->tx_borrowed = NULL;
Patrick McHardy88a99352007-03-16 01:21:11 -07001206 qdisc_watchdog_cancel(&q->watchdog);
David S. Miller2fbd3da2009-09-01 17:59:25 -07001207 hrtimer_cancel(&q->delay_timer);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001208 q->toplevel = TC_CBQ_MAXLEVEL;
Patrick McHardy3bebcda2007-03-23 11:29:25 -07001209 q->now = psched_get_time();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001210
1211 for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
1212 q->active[prio] = NULL;
1213
Patrick McHardyd77fea22008-07-05 23:22:05 -07001214 for (h = 0; h < q->clhash.hashsize; h++) {
Sasha Levinb67bfe02013-02-27 17:06:00 -08001215 hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001216 qdisc_reset(cl->q);
1217
1218 cl->next_alive = NULL;
Patrick McHardya0849802007-03-23 11:28:30 -07001219 cl->undertime = PSCHED_PASTPERFECT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001220 cl->avgidle = cl->maxidle;
1221 cl->deficit = cl->quantum;
1222 cl->cpriority = cl->priority;
1223 }
1224 }
1225 sch->q.qlen = 0;
1226}
1227
1228
1229static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
1230{
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001231 if (lss->change & TCF_CBQ_LSS_FLAGS) {
1232 cl->share = (lss->flags & TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
1233 cl->borrow = (lss->flags & TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001234 }
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001235 if (lss->change & TCF_CBQ_LSS_EWMA)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001236 cl->ewma_log = lss->ewma_log;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001237 if (lss->change & TCF_CBQ_LSS_AVPKT)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001238 cl->avpkt = lss->avpkt;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001239 if (lss->change & TCF_CBQ_LSS_MINIDLE)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001240 cl->minidle = -(long)lss->minidle;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001241 if (lss->change & TCF_CBQ_LSS_MAXIDLE) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001242 cl->maxidle = lss->maxidle;
1243 cl->avgidle = lss->maxidle;
1244 }
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001245 if (lss->change & TCF_CBQ_LSS_OFFTIME)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001246 cl->offtime = lss->offtime;
1247 return 0;
1248}
1249
1250static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl)
1251{
1252 q->nclasses[cl->priority]--;
1253 q->quanta[cl->priority] -= cl->weight;
1254 cbq_normalize_quanta(q, cl->priority);
1255}
1256
1257static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl)
1258{
1259 q->nclasses[cl->priority]++;
1260 q->quanta[cl->priority] += cl->weight;
1261 cbq_normalize_quanta(q, cl->priority);
1262}
1263
1264static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
1265{
1266 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1267
1268 if (wrr->allot)
1269 cl->allot = wrr->allot;
1270 if (wrr->weight)
1271 cl->weight = wrr->weight;
1272 if (wrr->priority) {
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001273 cl->priority = wrr->priority - 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001274 cl->cpriority = cl->priority;
1275 if (cl->priority >= cl->priority2)
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001276 cl->priority2 = TC_CBQ_MAXPRIO - 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001277 }
1278
1279 cbq_addprio(q, cl);
1280 return 0;
1281}
1282
1283static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
1284{
1285 switch (ovl->strategy) {
1286 case TC_CBQ_OVL_CLASSIC:
1287 cl->overlimit = cbq_ovl_classic;
1288 break;
1289 case TC_CBQ_OVL_DELAY:
1290 cl->overlimit = cbq_ovl_delay;
1291 break;
1292 case TC_CBQ_OVL_LOWPRIO:
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001293 if (ovl->priority2 - 1 >= TC_CBQ_MAXPRIO ||
1294 ovl->priority2 - 1 <= cl->priority)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001295 return -EINVAL;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001296 cl->priority2 = ovl->priority2 - 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001297 cl->overlimit = cbq_ovl_lowprio;
1298 break;
1299 case TC_CBQ_OVL_DROP:
1300 cl->overlimit = cbq_ovl_drop;
1301 break;
1302 case TC_CBQ_OVL_RCLASSIC:
1303 cl->overlimit = cbq_ovl_rclassic;
1304 break;
1305 default:
1306 return -EINVAL;
1307 }
Patrick McHardy1a13cb62007-03-16 01:22:20 -07001308 cl->penalty = ovl->penalty;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001309 return 0;
1310}
1311
Patrick McHardyc3bc7cf2007-07-15 00:03:05 -07001312#ifdef CONFIG_NET_CLS_ACT
Linus Torvalds1da177e2005-04-16 15:20:36 -07001313static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p)
1314{
1315 cl->police = p->police;
1316
1317 if (cl->q->handle) {
1318 if (p->police == TC_POLICE_RECLASSIFY)
1319 cl->q->reshape_fail = cbq_reshape_fail;
1320 else
1321 cl->q->reshape_fail = NULL;
1322 }
1323 return 0;
1324}
1325#endif
1326
1327static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
1328{
1329 cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
1330 return 0;
1331}
1332
Patrick McHardy27a34212008-01-23 20:35:39 -08001333static const struct nla_policy cbq_policy[TCA_CBQ_MAX + 1] = {
1334 [TCA_CBQ_LSSOPT] = { .len = sizeof(struct tc_cbq_lssopt) },
1335 [TCA_CBQ_WRROPT] = { .len = sizeof(struct tc_cbq_wrropt) },
1336 [TCA_CBQ_FOPT] = { .len = sizeof(struct tc_cbq_fopt) },
1337 [TCA_CBQ_OVL_STRATEGY] = { .len = sizeof(struct tc_cbq_ovl) },
1338 [TCA_CBQ_RATE] = { .len = sizeof(struct tc_ratespec) },
1339 [TCA_CBQ_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1340 [TCA_CBQ_POLICE] = { .len = sizeof(struct tc_cbq_police) },
1341};
1342
Patrick McHardy1e904742008-01-22 22:11:17 -08001343static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001344{
1345 struct cbq_sched_data *q = qdisc_priv(sch);
Patrick McHardy1e904742008-01-22 22:11:17 -08001346 struct nlattr *tb[TCA_CBQ_MAX + 1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001347 struct tc_ratespec *r;
Patrick McHardycee63722008-01-23 20:33:32 -08001348 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001349
Patrick McHardy27a34212008-01-23 20:35:39 -08001350 err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
Patrick McHardycee63722008-01-23 20:33:32 -08001351 if (err < 0)
1352 return err;
1353
Patrick McHardy27a34212008-01-23 20:35:39 -08001354 if (tb[TCA_CBQ_RTAB] == NULL || tb[TCA_CBQ_RATE] == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001355 return -EINVAL;
1356
Patrick McHardy1e904742008-01-22 22:11:17 -08001357 r = nla_data(tb[TCA_CBQ_RATE]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001358
Patrick McHardy1e904742008-01-22 22:11:17 -08001359 if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB])) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001360 return -EINVAL;
1361
Patrick McHardyd77fea22008-07-05 23:22:05 -07001362 err = qdisc_class_hash_init(&q->clhash);
1363 if (err < 0)
1364 goto put_rtab;
1365
Linus Torvalds1da177e2005-04-16 15:20:36 -07001366 q->link.refcnt = 1;
1367 q->link.sibling = &q->link;
Patrick McHardyd77fea22008-07-05 23:22:05 -07001368 q->link.common.classid = sch->handle;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001369 q->link.qdisc = sch;
Changli Gao3511c912010-10-16 13:04:08 +00001370 q->link.q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
1371 sch->handle);
1372 if (!q->link.q)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001373 q->link.q = &noop_qdisc;
1374
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001375 q->link.priority = TC_CBQ_MAXPRIO - 1;
1376 q->link.priority2 = TC_CBQ_MAXPRIO - 1;
1377 q->link.cpriority = TC_CBQ_MAXPRIO - 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001378 q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
1379 q->link.overlimit = cbq_ovl_classic;
David S. Miller5ce2d482008-07-08 17:06:30 -07001380 q->link.allot = psched_mtu(qdisc_dev(sch));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001381 q->link.quantum = q->link.allot;
1382 q->link.weight = q->link.R_tab->rate.rate;
1383
1384 q->link.ewma_log = TC_CBQ_DEF_EWMA;
1385 q->link.avpkt = q->link.allot/2;
1386 q->link.minidle = -0x7FFFFFFF;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001387
Patrick McHardy88a99352007-03-16 01:21:11 -07001388 qdisc_watchdog_init(&q->watchdog, sch);
Eric Dumazet4a8e3202014-09-20 18:01:30 -07001389 hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001390 q->delay_timer.function = cbq_undelay;
1391 q->toplevel = TC_CBQ_MAXLEVEL;
Patrick McHardy3bebcda2007-03-23 11:29:25 -07001392 q->now = psched_get_time();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001393
1394 cbq_link_class(&q->link);
1395
Patrick McHardy1e904742008-01-22 22:11:17 -08001396 if (tb[TCA_CBQ_LSSOPT])
1397 cbq_set_lss(&q->link, nla_data(tb[TCA_CBQ_LSSOPT]));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001398
1399 cbq_addprio(q, &q->link);
1400 return 0;
Patrick McHardyd77fea22008-07-05 23:22:05 -07001401
1402put_rtab:
1403 qdisc_put_rtab(q->link.R_tab);
1404 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001405}
1406
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001407static int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001408{
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001409 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001410
David S. Miller1b34ec42012-03-29 05:11:39 -04001411 if (nla_put(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate))
1412 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001413 return skb->len;
1414
Patrick McHardy1e904742008-01-22 22:11:17 -08001415nla_put_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001416 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001417 return -1;
1418}
1419
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001420static int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001421{
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001422 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001423 struct tc_cbq_lssopt opt;
1424
1425 opt.flags = 0;
1426 if (cl->borrow == NULL)
1427 opt.flags |= TCF_CBQ_LSS_BOUNDED;
1428 if (cl->share == NULL)
1429 opt.flags |= TCF_CBQ_LSS_ISOLATED;
1430 opt.ewma_log = cl->ewma_log;
1431 opt.level = cl->level;
1432 opt.avpkt = cl->avpkt;
1433 opt.maxidle = cl->maxidle;
1434 opt.minidle = (u32)(-cl->minidle);
1435 opt.offtime = cl->offtime;
1436 opt.change = ~0;
David S. Miller1b34ec42012-03-29 05:11:39 -04001437 if (nla_put(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt))
1438 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001439 return skb->len;
1440
Patrick McHardy1e904742008-01-22 22:11:17 -08001441nla_put_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001442 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001443 return -1;
1444}
1445
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001446static int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001447{
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001448 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001449 struct tc_cbq_wrropt opt;
1450
David S. Millera0db8562013-07-30 00:16:21 -07001451 memset(&opt, 0, sizeof(opt));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001452 opt.flags = 0;
1453 opt.allot = cl->allot;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001454 opt.priority = cl->priority + 1;
1455 opt.cpriority = cl->cpriority + 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001456 opt.weight = cl->weight;
David S. Miller1b34ec42012-03-29 05:11:39 -04001457 if (nla_put(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt))
1458 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001459 return skb->len;
1460
Patrick McHardy1e904742008-01-22 22:11:17 -08001461nla_put_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001462 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001463 return -1;
1464}
1465
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001466static int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001467{
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001468 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001469 struct tc_cbq_ovl opt;
1470
1471 opt.strategy = cl->ovl_strategy;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001472 opt.priority2 = cl->priority2 + 1;
Patrick McHardy8a470772005-06-28 12:56:45 -07001473 opt.pad = 0;
Patrick McHardy1a13cb62007-03-16 01:22:20 -07001474 opt.penalty = cl->penalty;
David S. Miller1b34ec42012-03-29 05:11:39 -04001475 if (nla_put(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt))
1476 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001477 return skb->len;
1478
Patrick McHardy1e904742008-01-22 22:11:17 -08001479nla_put_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001480 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001481 return -1;
1482}
1483
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001484static int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001485{
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001486 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001487 struct tc_cbq_fopt opt;
1488
1489 if (cl->split || cl->defmap) {
Patrick McHardyd77fea22008-07-05 23:22:05 -07001490 opt.split = cl->split ? cl->split->common.classid : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001491 opt.defmap = cl->defmap;
1492 opt.defchange = ~0;
David S. Miller1b34ec42012-03-29 05:11:39 -04001493 if (nla_put(skb, TCA_CBQ_FOPT, sizeof(opt), &opt))
1494 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001495 }
1496 return skb->len;
1497
Patrick McHardy1e904742008-01-22 22:11:17 -08001498nla_put_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001499 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001500 return -1;
1501}
1502
Patrick McHardyc3bc7cf2007-07-15 00:03:05 -07001503#ifdef CONFIG_NET_CLS_ACT
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001504static int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001505{
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001506 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001507 struct tc_cbq_police opt;
1508
1509 if (cl->police) {
1510 opt.police = cl->police;
Patrick McHardy9ef1d4c2005-06-28 12:55:30 -07001511 opt.__res1 = 0;
1512 opt.__res2 = 0;
David S. Miller1b34ec42012-03-29 05:11:39 -04001513 if (nla_put(skb, TCA_CBQ_POLICE, sizeof(opt), &opt))
1514 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001515 }
1516 return skb->len;
1517
Patrick McHardy1e904742008-01-22 22:11:17 -08001518nla_put_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001519 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001520 return -1;
1521}
1522#endif
1523
1524static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
1525{
1526 if (cbq_dump_lss(skb, cl) < 0 ||
1527 cbq_dump_rate(skb, cl) < 0 ||
1528 cbq_dump_wrr(skb, cl) < 0 ||
1529 cbq_dump_ovl(skb, cl) < 0 ||
Patrick McHardyc3bc7cf2007-07-15 00:03:05 -07001530#ifdef CONFIG_NET_CLS_ACT
Linus Torvalds1da177e2005-04-16 15:20:36 -07001531 cbq_dump_police(skb, cl) < 0 ||
1532#endif
1533 cbq_dump_fopt(skb, cl) < 0)
1534 return -1;
1535 return 0;
1536}
1537
1538static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
1539{
1540 struct cbq_sched_data *q = qdisc_priv(sch);
Patrick McHardy4b3550ef2008-01-23 20:34:11 -08001541 struct nlattr *nest;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001542
Patrick McHardy4b3550ef2008-01-23 20:34:11 -08001543 nest = nla_nest_start(skb, TCA_OPTIONS);
1544 if (nest == NULL)
1545 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001546 if (cbq_dump_attr(skb, &q->link) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001547 goto nla_put_failure;
Yang Yingliangd59b7d82014-03-12 10:20:32 +08001548 return nla_nest_end(skb, nest);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001549
Patrick McHardy1e904742008-01-22 22:11:17 -08001550nla_put_failure:
Patrick McHardy4b3550ef2008-01-23 20:34:11 -08001551 nla_nest_cancel(skb, nest);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001552 return -1;
1553}
1554
1555static int
1556cbq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
1557{
1558 struct cbq_sched_data *q = qdisc_priv(sch);
1559
1560 q->link.xstats.avgidle = q->link.avgidle;
1561 return gnet_stats_copy_app(d, &q->link.xstats, sizeof(q->link.xstats));
1562}
1563
1564static int
1565cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1566 struct sk_buff *skb, struct tcmsg *tcm)
1567{
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001568 struct cbq_class *cl = (struct cbq_class *)arg;
Patrick McHardy4b3550ef2008-01-23 20:34:11 -08001569 struct nlattr *nest;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001570
1571 if (cl->tparent)
Patrick McHardyd77fea22008-07-05 23:22:05 -07001572 tcm->tcm_parent = cl->tparent->common.classid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001573 else
1574 tcm->tcm_parent = TC_H_ROOT;
Patrick McHardyd77fea22008-07-05 23:22:05 -07001575 tcm->tcm_handle = cl->common.classid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001576 tcm->tcm_info = cl->q->handle;
1577
Patrick McHardy4b3550ef2008-01-23 20:34:11 -08001578 nest = nla_nest_start(skb, TCA_OPTIONS);
1579 if (nest == NULL)
1580 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001581 if (cbq_dump_attr(skb, cl) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001582 goto nla_put_failure;
Yang Yingliangd59b7d82014-03-12 10:20:32 +08001583 return nla_nest_end(skb, nest);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001584
Patrick McHardy1e904742008-01-22 22:11:17 -08001585nla_put_failure:
Patrick McHardy4b3550ef2008-01-23 20:34:11 -08001586 nla_nest_cancel(skb, nest);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001587 return -1;
1588}
1589
1590static int
1591cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
1592 struct gnet_dump *d)
1593{
1594 struct cbq_sched_data *q = qdisc_priv(sch);
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001595 struct cbq_class *cl = (struct cbq_class *)arg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001596
1597 cl->qstats.qlen = cl->q->q.qlen;
1598 cl->xstats.avgidle = cl->avgidle;
1599 cl->xstats.undertime = 0;
1600
Patrick McHardya0849802007-03-23 11:28:30 -07001601 if (cl->undertime != PSCHED_PASTPERFECT)
Patrick McHardy8edc0c32007-03-23 11:28:55 -07001602 cl->xstats.undertime = cl->undertime - q->now;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001603
John Fastabend22e0f8b2014-09-28 11:52:56 -07001604 if (gnet_stats_copy_basic(d, NULL, &cl->bstats) < 0 ||
Eric Dumazetd250a5f2009-10-02 10:32:18 +00001605 gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07001606 gnet_stats_copy_queue(d, &cl->qstats) < 0)
1607 return -1;
1608
1609 return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1610}
1611
1612static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1613 struct Qdisc **old)
1614{
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001615 struct cbq_class *cl = (struct cbq_class *)arg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001616
Patrick McHardy5b9a9cc2009-09-04 06:41:17 +00001617 if (new == NULL) {
Changli Gao3511c912010-10-16 13:04:08 +00001618 new = qdisc_create_dflt(sch->dev_queue,
Patrick McHardy5b9a9cc2009-09-04 06:41:17 +00001619 &pfifo_qdisc_ops, cl->common.classid);
1620 if (new == NULL)
1621 return -ENOBUFS;
1622 } else {
Patrick McHardyc3bc7cf2007-07-15 00:03:05 -07001623#ifdef CONFIG_NET_CLS_ACT
Patrick McHardy5b9a9cc2009-09-04 06:41:17 +00001624 if (cl->police == TC_POLICE_RECLASSIFY)
1625 new->reshape_fail = cbq_reshape_fail;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001626#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001627 }
Patrick McHardy5b9a9cc2009-09-04 06:41:17 +00001628 sch_tree_lock(sch);
1629 *old = cl->q;
1630 cl->q = new;
1631 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
1632 qdisc_reset(*old);
1633 sch_tree_unlock(sch);
1634
1635 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001636}
1637
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001638static struct Qdisc *cbq_leaf(struct Qdisc *sch, unsigned long arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001639{
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001640 struct cbq_class *cl = (struct cbq_class *)arg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001641
Patrick McHardy5b9a9cc2009-09-04 06:41:17 +00001642 return cl->q;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001643}
1644
Jarek Poplawskia37ef2e2006-12-08 00:25:55 -08001645static void cbq_qlen_notify(struct Qdisc *sch, unsigned long arg)
1646{
1647 struct cbq_class *cl = (struct cbq_class *)arg;
1648
1649 if (cl->q->q.qlen == 0)
1650 cbq_deactivate_class(cl);
1651}
1652
Linus Torvalds1da177e2005-04-16 15:20:36 -07001653static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
1654{
1655 struct cbq_sched_data *q = qdisc_priv(sch);
1656 struct cbq_class *cl = cbq_class_lookup(q, classid);
1657
1658 if (cl) {
1659 cl->refcnt++;
1660 return (unsigned long)cl;
1661 }
1662 return 0;
1663}
1664
Linus Torvalds1da177e2005-04-16 15:20:36 -07001665static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
1666{
1667 struct cbq_sched_data *q = qdisc_priv(sch);
1668
Ilpo Järvinen547b7922008-07-25 21:43:18 -07001669 WARN_ON(cl->filters);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001670
Patrick McHardyff31ab52008-07-01 19:52:38 -07001671 tcf_destroy_chain(&cl->filter_list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001672 qdisc_destroy(cl->q);
1673 qdisc_put_rtab(cl->R_tab);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001674 gen_kill_estimator(&cl->bstats, &cl->rate_est);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001675 if (cl != &q->link)
1676 kfree(cl);
1677}
1678
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001679static void cbq_destroy(struct Qdisc *sch)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001680{
1681 struct cbq_sched_data *q = qdisc_priv(sch);
Sasha Levinb67bfe02013-02-27 17:06:00 -08001682 struct hlist_node *next;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001683 struct cbq_class *cl;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001684 unsigned int h;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001685
Patrick McHardyc3bc7cf2007-07-15 00:03:05 -07001686#ifdef CONFIG_NET_CLS_ACT
Linus Torvalds1da177e2005-04-16 15:20:36 -07001687 q->rx_class = NULL;
1688#endif
1689 /*
1690 * Filters must be destroyed first because we don't destroy the
1691 * classes from root to leafs which means that filters can still
1692 * be bound to classes which have been destroyed already. --TGR '04
1693 */
Patrick McHardyd77fea22008-07-05 23:22:05 -07001694 for (h = 0; h < q->clhash.hashsize; h++) {
Sasha Levinb67bfe02013-02-27 17:06:00 -08001695 hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode)
Patrick McHardyff31ab52008-07-01 19:52:38 -07001696 tcf_destroy_chain(&cl->filter_list);
Patrick McHardyb00b4bf2007-06-05 16:06:59 -07001697 }
Patrick McHardyd77fea22008-07-05 23:22:05 -07001698 for (h = 0; h < q->clhash.hashsize; h++) {
Sasha Levinb67bfe02013-02-27 17:06:00 -08001699 hlist_for_each_entry_safe(cl, next, &q->clhash.hash[h],
Patrick McHardyd77fea22008-07-05 23:22:05 -07001700 common.hnode)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001701 cbq_destroy_class(sch, cl);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001702 }
Patrick McHardyd77fea22008-07-05 23:22:05 -07001703 qdisc_class_hash_destroy(&q->clhash);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001704}
1705
1706static void cbq_put(struct Qdisc *sch, unsigned long arg)
1707{
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001708 struct cbq_class *cl = (struct cbq_class *)arg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001709
1710 if (--cl->refcnt == 0) {
Patrick McHardyc3bc7cf2007-07-15 00:03:05 -07001711#ifdef CONFIG_NET_CLS_ACT
Jarek Poplawski102396a2008-08-29 14:21:52 -07001712 spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001713 struct cbq_sched_data *q = qdisc_priv(sch);
1714
David S. Miller7698b4f2008-07-16 01:42:40 -07001715 spin_lock_bh(root_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001716 if (q->rx_class == cl)
1717 q->rx_class = NULL;
David S. Miller7698b4f2008-07-16 01:42:40 -07001718 spin_unlock_bh(root_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001719#endif
1720
1721 cbq_destroy_class(sch, cl);
1722 }
1723}
1724
1725static int
Patrick McHardy1e904742008-01-22 22:11:17 -08001726cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **tca,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001727 unsigned long *arg)
1728{
1729 int err;
1730 struct cbq_sched_data *q = qdisc_priv(sch);
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001731 struct cbq_class *cl = (struct cbq_class *)*arg;
Patrick McHardy1e904742008-01-22 22:11:17 -08001732 struct nlattr *opt = tca[TCA_OPTIONS];
1733 struct nlattr *tb[TCA_CBQ_MAX + 1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001734 struct cbq_class *parent;
1735 struct qdisc_rate_table *rtab = NULL;
1736
Patrick McHardycee63722008-01-23 20:33:32 -08001737 if (opt == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001738 return -EINVAL;
1739
Patrick McHardy27a34212008-01-23 20:35:39 -08001740 err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
Patrick McHardycee63722008-01-23 20:33:32 -08001741 if (err < 0)
1742 return err;
1743
Linus Torvalds1da177e2005-04-16 15:20:36 -07001744 if (cl) {
1745 /* Check parent */
1746 if (parentid) {
Patrick McHardyd77fea22008-07-05 23:22:05 -07001747 if (cl->tparent &&
1748 cl->tparent->common.classid != parentid)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001749 return -EINVAL;
1750 if (!cl->tparent && parentid != TC_H_ROOT)
1751 return -EINVAL;
1752 }
1753
Patrick McHardy1e904742008-01-22 22:11:17 -08001754 if (tb[TCA_CBQ_RATE]) {
Stephen Hemminger71bcb092008-11-25 21:13:31 -08001755 rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]),
1756 tb[TCA_CBQ_RTAB]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001757 if (rtab == NULL)
1758 return -EINVAL;
1759 }
1760
Stephen Hemminger71bcb092008-11-25 21:13:31 -08001761 if (tca[TCA_RATE]) {
John Fastabend22e0f8b2014-09-28 11:52:56 -07001762 err = gen_replace_estimator(&cl->bstats, NULL,
1763 &cl->rate_est,
Stephen Hemminger71bcb092008-11-25 21:13:31 -08001764 qdisc_root_sleeping_lock(sch),
1765 tca[TCA_RATE]);
1766 if (err) {
Yang Yingliang79c11f22013-12-17 15:29:17 +08001767 qdisc_put_rtab(rtab);
Stephen Hemminger71bcb092008-11-25 21:13:31 -08001768 return err;
1769 }
1770 }
1771
Linus Torvalds1da177e2005-04-16 15:20:36 -07001772 /* Change class parameters */
1773 sch_tree_lock(sch);
1774
1775 if (cl->next_alive != NULL)
1776 cbq_deactivate_class(cl);
1777
1778 if (rtab) {
Patrick McHardyb94c8af2008-11-20 04:11:36 -08001779 qdisc_put_rtab(cl->R_tab);
1780 cl->R_tab = rtab;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001781 }
1782
Patrick McHardy1e904742008-01-22 22:11:17 -08001783 if (tb[TCA_CBQ_LSSOPT])
1784 cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001785
Patrick McHardy1e904742008-01-22 22:11:17 -08001786 if (tb[TCA_CBQ_WRROPT]) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001787 cbq_rmprio(q, cl);
Patrick McHardy1e904742008-01-22 22:11:17 -08001788 cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001789 }
1790
Patrick McHardy1e904742008-01-22 22:11:17 -08001791 if (tb[TCA_CBQ_OVL_STRATEGY])
1792 cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001793
Patrick McHardyc3bc7cf2007-07-15 00:03:05 -07001794#ifdef CONFIG_NET_CLS_ACT
Patrick McHardy1e904742008-01-22 22:11:17 -08001795 if (tb[TCA_CBQ_POLICE])
1796 cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001797#endif
1798
Patrick McHardy1e904742008-01-22 22:11:17 -08001799 if (tb[TCA_CBQ_FOPT])
1800 cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001801
1802 if (cl->q->q.qlen)
1803 cbq_activate_class(cl);
1804
1805 sch_tree_unlock(sch);
1806
Linus Torvalds1da177e2005-04-16 15:20:36 -07001807 return 0;
1808 }
1809
1810 if (parentid == TC_H_ROOT)
1811 return -EINVAL;
1812
Patrick McHardy1e904742008-01-22 22:11:17 -08001813 if (tb[TCA_CBQ_WRROPT] == NULL || tb[TCA_CBQ_RATE] == NULL ||
1814 tb[TCA_CBQ_LSSOPT] == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001815 return -EINVAL;
1816
Patrick McHardy1e904742008-01-22 22:11:17 -08001817 rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001818 if (rtab == NULL)
1819 return -EINVAL;
1820
1821 if (classid) {
1822 err = -EINVAL;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001823 if (TC_H_MAJ(classid ^ sch->handle) ||
1824 cbq_class_lookup(q, classid))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001825 goto failure;
1826 } else {
1827 int i;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001828 classid = TC_H_MAKE(sch->handle, 0x8000);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001829
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001830 for (i = 0; i < 0x8000; i++) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001831 if (++q->hgenerator >= 0x8000)
1832 q->hgenerator = 1;
1833 if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
1834 break;
1835 }
1836 err = -ENOSR;
1837 if (i >= 0x8000)
1838 goto failure;
1839 classid = classid|q->hgenerator;
1840 }
1841
1842 parent = &q->link;
1843 if (parentid) {
1844 parent = cbq_class_lookup(q, parentid);
1845 err = -EINVAL;
1846 if (parent == NULL)
1847 goto failure;
1848 }
1849
1850 err = -ENOBUFS;
Panagiotis Issaris0da974f2006-07-21 14:51:30 -07001851 cl = kzalloc(sizeof(*cl), GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001852 if (cl == NULL)
1853 goto failure;
Stephen Hemminger71bcb092008-11-25 21:13:31 -08001854
1855 if (tca[TCA_RATE]) {
John Fastabend22e0f8b2014-09-28 11:52:56 -07001856 err = gen_new_estimator(&cl->bstats, NULL, &cl->rate_est,
Stephen Hemminger71bcb092008-11-25 21:13:31 -08001857 qdisc_root_sleeping_lock(sch),
1858 tca[TCA_RATE]);
1859 if (err) {
1860 kfree(cl);
1861 goto failure;
1862 }
1863 }
1864
Linus Torvalds1da177e2005-04-16 15:20:36 -07001865 cl->R_tab = rtab;
1866 rtab = NULL;
1867 cl->refcnt = 1;
Changli Gao3511c912010-10-16 13:04:08 +00001868 cl->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid);
1869 if (!cl->q)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001870 cl->q = &noop_qdisc;
Patrick McHardyd77fea22008-07-05 23:22:05 -07001871 cl->common.classid = classid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001872 cl->tparent = parent;
1873 cl->qdisc = sch;
1874 cl->allot = parent->allot;
1875 cl->quantum = cl->allot;
1876 cl->weight = cl->R_tab->rate.rate;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001877
1878 sch_tree_lock(sch);
1879 cbq_link_class(cl);
1880 cl->borrow = cl->tparent;
1881 if (cl->tparent != &q->link)
1882 cl->share = cl->tparent;
1883 cbq_adjust_levels(parent);
1884 cl->minidle = -0x7FFFFFFF;
Patrick McHardy1e904742008-01-22 22:11:17 -08001885 cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1886 cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001887 if (cl->ewma_log == 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001888 cl->ewma_log = q->link.ewma_log;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001889 if (cl->maxidle == 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001890 cl->maxidle = q->link.maxidle;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001891 if (cl->avpkt == 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001892 cl->avpkt = q->link.avpkt;
1893 cl->overlimit = cbq_ovl_classic;
Patrick McHardy1e904742008-01-22 22:11:17 -08001894 if (tb[TCA_CBQ_OVL_STRATEGY])
1895 cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
Patrick McHardyc3bc7cf2007-07-15 00:03:05 -07001896#ifdef CONFIG_NET_CLS_ACT
Patrick McHardy1e904742008-01-22 22:11:17 -08001897 if (tb[TCA_CBQ_POLICE])
1898 cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001899#endif
Patrick McHardy1e904742008-01-22 22:11:17 -08001900 if (tb[TCA_CBQ_FOPT])
1901 cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001902 sch_tree_unlock(sch);
1903
Patrick McHardyd77fea22008-07-05 23:22:05 -07001904 qdisc_class_hash_grow(sch, &q->clhash);
1905
Linus Torvalds1da177e2005-04-16 15:20:36 -07001906 *arg = (unsigned long)cl;
1907 return 0;
1908
1909failure:
1910 qdisc_put_rtab(rtab);
1911 return err;
1912}
1913
1914static int cbq_delete(struct Qdisc *sch, unsigned long arg)
1915{
1916 struct cbq_sched_data *q = qdisc_priv(sch);
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001917 struct cbq_class *cl = (struct cbq_class *)arg;
Jarek Poplawskia37ef2e2006-12-08 00:25:55 -08001918 unsigned int qlen;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001919
1920 if (cl->filters || cl->children || cl == &q->link)
1921 return -EBUSY;
1922
1923 sch_tree_lock(sch);
1924
Jarek Poplawskia37ef2e2006-12-08 00:25:55 -08001925 qlen = cl->q->q.qlen;
1926 qdisc_reset(cl->q);
1927 qdisc_tree_decrease_qlen(cl->q, qlen);
1928
Linus Torvalds1da177e2005-04-16 15:20:36 -07001929 if (cl->next_alive)
1930 cbq_deactivate_class(cl);
1931
1932 if (q->tx_borrowed == cl)
1933 q->tx_borrowed = q->tx_class;
1934 if (q->tx_class == cl) {
1935 q->tx_class = NULL;
1936 q->tx_borrowed = NULL;
1937 }
Patrick McHardyc3bc7cf2007-07-15 00:03:05 -07001938#ifdef CONFIG_NET_CLS_ACT
Linus Torvalds1da177e2005-04-16 15:20:36 -07001939 if (q->rx_class == cl)
1940 q->rx_class = NULL;
1941#endif
1942
1943 cbq_unlink_class(cl);
1944 cbq_adjust_levels(cl->tparent);
1945 cl->defmap = 0;
1946 cbq_sync_defmap(cl);
1947
1948 cbq_rmprio(q, cl);
1949 sch_tree_unlock(sch);
1950
Jarek Poplawski7cd0a632009-03-15 20:00:19 -07001951 BUG_ON(--cl->refcnt == 0);
1952 /*
1953 * This shouldn't happen: we "hold" one cops->get() when called
1954 * from tc_ctl_tclass; the destroy method is done from cops->put().
1955 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001956
1957 return 0;
1958}
1959
John Fastabend25d8c0d2014-09-12 20:05:27 -07001960static struct tcf_proto __rcu **cbq_find_tcf(struct Qdisc *sch,
1961 unsigned long arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001962{
1963 struct cbq_sched_data *q = qdisc_priv(sch);
1964 struct cbq_class *cl = (struct cbq_class *)arg;
1965
1966 if (cl == NULL)
1967 cl = &q->link;
1968
1969 return &cl->filter_list;
1970}
1971
1972static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
1973 u32 classid)
1974{
1975 struct cbq_sched_data *q = qdisc_priv(sch);
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001976 struct cbq_class *p = (struct cbq_class *)parent;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001977 struct cbq_class *cl = cbq_class_lookup(q, classid);
1978
1979 if (cl) {
1980 if (p && p->level <= cl->level)
1981 return 0;
1982 cl->filters++;
1983 return (unsigned long)cl;
1984 }
1985 return 0;
1986}
1987
1988static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
1989{
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001990 struct cbq_class *cl = (struct cbq_class *)arg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001991
1992 cl->filters--;
1993}
1994
1995static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
1996{
1997 struct cbq_sched_data *q = qdisc_priv(sch);
Patrick McHardyd77fea22008-07-05 23:22:05 -07001998 struct cbq_class *cl;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001999 unsigned int h;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002000
2001 if (arg->stop)
2002 return;
2003
Patrick McHardyd77fea22008-07-05 23:22:05 -07002004 for (h = 0; h < q->clhash.hashsize; h++) {
Sasha Levinb67bfe02013-02-27 17:06:00 -08002005 hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002006 if (arg->count < arg->skip) {
2007 arg->count++;
2008 continue;
2009 }
2010 if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2011 arg->stop = 1;
2012 return;
2013 }
2014 arg->count++;
2015 }
2016 }
2017}
2018
Eric Dumazet20fea082007-11-14 01:44:41 -08002019static const struct Qdisc_class_ops cbq_class_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002020 .graft = cbq_graft,
2021 .leaf = cbq_leaf,
Jarek Poplawskia37ef2e2006-12-08 00:25:55 -08002022 .qlen_notify = cbq_qlen_notify,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002023 .get = cbq_get,
2024 .put = cbq_put,
2025 .change = cbq_change_class,
2026 .delete = cbq_delete,
2027 .walk = cbq_walk,
2028 .tcf_chain = cbq_find_tcf,
2029 .bind_tcf = cbq_bind_filter,
2030 .unbind_tcf = cbq_unbind_filter,
2031 .dump = cbq_dump_class,
2032 .dump_stats = cbq_dump_class_stats,
2033};
2034
Eric Dumazet20fea082007-11-14 01:44:41 -08002035static struct Qdisc_ops cbq_qdisc_ops __read_mostly = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002036 .next = NULL,
2037 .cl_ops = &cbq_class_ops,
2038 .id = "cbq",
2039 .priv_size = sizeof(struct cbq_sched_data),
2040 .enqueue = cbq_enqueue,
2041 .dequeue = cbq_dequeue,
Jarek Poplawski77be1552008-10-31 00:47:01 -07002042 .peek = qdisc_peek_dequeued,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002043 .drop = cbq_drop,
2044 .init = cbq_init,
2045 .reset = cbq_reset,
2046 .destroy = cbq_destroy,
2047 .change = NULL,
2048 .dump = cbq_dump,
2049 .dump_stats = cbq_dump_stats,
2050 .owner = THIS_MODULE,
2051};
2052
2053static int __init cbq_module_init(void)
2054{
2055 return register_qdisc(&cbq_qdisc_ops);
2056}
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09002057static void __exit cbq_module_exit(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002058{
2059 unregister_qdisc(&cbq_qdisc_ops);
2060}
2061module_init(cbq_module_init)
2062module_exit(cbq_module_exit)
2063MODULE_LICENSE("GPL");