blob: cd81505662b8a3bcc201a18bce0d6c9bd421b650 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * net/sched/sch_api.c Packet scheduler API.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 *
11 * Fixes:
12 *
13 * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired.
14 * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
15 * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support
16 */
17
Linus Torvalds1da177e2005-04-16 15:20:36 -070018#include <linux/module.h>
19#include <linux/types.h>
20#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070021#include <linux/string.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070022#include <linux/errno.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070023#include <linux/skbuff.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070024#include <linux/init.h>
25#include <linux/proc_fs.h>
26#include <linux/seq_file.h>
27#include <linux/kmod.h>
28#include <linux/list.h>
Patrick McHardy41794772007-03-16 01:19:15 -070029#include <linux/hrtimer.h>
Jarek Poplawski25bfcd52008-08-18 20:53:34 -070030#include <linux/lockdep.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090031#include <linux/slab.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070032
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020033#include <net/net_namespace.h>
Denis V. Lunevb8542722007-12-01 00:21:31 +110034#include <net/sock.h>
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -070035#include <net/netlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070036#include <net/pkt_sched.h>
37
Tom Goff7316ae82010-03-19 15:40:13 +000038static int qdisc_notify(struct net *net, struct sk_buff *oskb,
39 struct nlmsghdr *n, u32 clid,
Linus Torvalds1da177e2005-04-16 15:20:36 -070040 struct Qdisc *old, struct Qdisc *new);
Tom Goff7316ae82010-03-19 15:40:13 +000041static int tclass_notify(struct net *net, struct sk_buff *oskb,
42 struct nlmsghdr *n, struct Qdisc *q,
43 unsigned long cl, int event);
Linus Torvalds1da177e2005-04-16 15:20:36 -070044
45/*
46
47 Short review.
48 -------------
49
50 This file consists of two interrelated parts:
51
52 1. queueing disciplines manager frontend.
53 2. traffic classes manager frontend.
54
55 Generally, queueing discipline ("qdisc") is a black box,
56 which is able to enqueue packets and to dequeue them (when
57 device is ready to send something) in order and at times
58 determined by algorithm hidden in it.
59
60 qdisc's are divided to two categories:
61 - "queues", which have no internal structure visible from outside.
62 - "schedulers", which split all the packets to "traffic classes",
63 using "packet classifiers" (look at cls_api.c)
64
65 In turn, classes may have child qdiscs (as rule, queues)
66 attached to them etc. etc. etc.
67
68 The goal of the routines in this file is to translate
69 information supplied by user in the form of handles
70 to more intelligible for kernel form, to make some sanity
71 checks and part of work, which is common to all qdiscs
72 and to provide rtnetlink notifications.
73
74 All real intelligent work is done inside qdisc modules.
75
76
77
78 Every discipline has two major routines: enqueue and dequeue.
79
80 ---dequeue
81
82 dequeue usually returns a skb to send. It is allowed to return NULL,
83 but it does not mean that queue is empty, it just means that
84 discipline does not want to send anything this time.
85 Queue is really empty if q->q.qlen == 0.
86 For complicated disciplines with multiple queues q->q is not
87 real packet queue, but however q->q.qlen must be valid.
88
89 ---enqueue
90
91 enqueue returns 0, if packet was enqueued successfully.
92 If packet (this one or another one) was dropped, it returns
93 not zero error code.
94 NET_XMIT_DROP - this packet dropped
95 Expected action: do not backoff, but wait until queue will clear.
96 NET_XMIT_CN - probably this packet enqueued, but another one dropped.
97 Expected action: backoff or ignore
98 NET_XMIT_POLICED - dropped by police.
99 Expected action: backoff or error to real-time apps.
100
101 Auxiliary routines:
102
Jarek Poplawski99c0db22008-10-31 00:45:27 -0700103 ---peek
104
105 like dequeue but without removing a packet from the queue
106
Linus Torvalds1da177e2005-04-16 15:20:36 -0700107 ---reset
108
109 returns qdisc to initial state: purge all buffers, clear all
110 timers, counters (except for statistics) etc.
111
112 ---init
113
114 initializes newly created qdisc.
115
116 ---destroy
117
118 destroys resources allocated by init and during lifetime of qdisc.
119
120 ---change
121
122 changes qdisc parameters.
123 */
124
125/* Protects list of registered TC modules. It is pure SMP lock. */
126static DEFINE_RWLOCK(qdisc_mod_lock);
127
128
129/************************************************
130 * Queueing disciplines manipulation. *
131 ************************************************/
132
133
134/* The list of all installed queueing disciplines. */
135
136static struct Qdisc_ops *qdisc_base;
137
138/* Register/uregister queueing discipline */
139
140int register_qdisc(struct Qdisc_ops *qops)
141{
142 struct Qdisc_ops *q, **qp;
143 int rc = -EEXIST;
144
145 write_lock(&qdisc_mod_lock);
146 for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
147 if (!strcmp(qops->id, q->id))
148 goto out;
149
150 if (qops->enqueue == NULL)
151 qops->enqueue = noop_qdisc_ops.enqueue;
Jarek Poplawski99c0db22008-10-31 00:45:27 -0700152 if (qops->peek == NULL) {
Jarek Poplawski68fd26b2010-08-09 12:18:48 +0000153 if (qops->dequeue == NULL)
Jarek Poplawski99c0db22008-10-31 00:45:27 -0700154 qops->peek = noop_qdisc_ops.peek;
Jarek Poplawski68fd26b2010-08-09 12:18:48 +0000155 else
156 goto out_einval;
Jarek Poplawski99c0db22008-10-31 00:45:27 -0700157 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700158 if (qops->dequeue == NULL)
159 qops->dequeue = noop_qdisc_ops.dequeue;
160
Jarek Poplawski68fd26b2010-08-09 12:18:48 +0000161 if (qops->cl_ops) {
162 const struct Qdisc_class_ops *cops = qops->cl_ops;
163
Jarek Poplawski3e9e5a52010-08-10 22:31:20 +0000164 if (!(cops->get && cops->put && cops->walk && cops->leaf))
Jarek Poplawski68fd26b2010-08-09 12:18:48 +0000165 goto out_einval;
166
167 if (cops->tcf_chain && !(cops->bind_tcf && cops->unbind_tcf))
168 goto out_einval;
169 }
170
Linus Torvalds1da177e2005-04-16 15:20:36 -0700171 qops->next = NULL;
172 *qp = qops;
173 rc = 0;
174out:
175 write_unlock(&qdisc_mod_lock);
176 return rc;
Jarek Poplawski68fd26b2010-08-09 12:18:48 +0000177
178out_einval:
179 rc = -EINVAL;
180 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700181}
Patrick McHardy62e3ba12008-01-22 22:10:23 -0800182EXPORT_SYMBOL(register_qdisc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700183
184int unregister_qdisc(struct Qdisc_ops *qops)
185{
186 struct Qdisc_ops *q, **qp;
187 int err = -ENOENT;
188
189 write_lock(&qdisc_mod_lock);
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000190 for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700191 if (q == qops)
192 break;
193 if (q) {
194 *qp = q->next;
195 q->next = NULL;
196 err = 0;
197 }
198 write_unlock(&qdisc_mod_lock);
199 return err;
200}
Patrick McHardy62e3ba12008-01-22 22:10:23 -0800201EXPORT_SYMBOL(unregister_qdisc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700202
stephen hemminger6da7c8f2013-08-27 16:19:08 -0700203/* Get default qdisc if not otherwise specified */
204void qdisc_get_default(char *name, size_t len)
205{
206 read_lock(&qdisc_mod_lock);
207 strlcpy(name, default_qdisc_ops->id, len);
208 read_unlock(&qdisc_mod_lock);
209}
210
211static struct Qdisc_ops *qdisc_lookup_default(const char *name)
212{
213 struct Qdisc_ops *q = NULL;
214
215 for (q = qdisc_base; q; q = q->next) {
216 if (!strcmp(name, q->id)) {
217 if (!try_module_get(q->owner))
218 q = NULL;
219 break;
220 }
221 }
222
223 return q;
224}
225
226/* Set new default qdisc to use */
227int qdisc_set_default(const char *name)
228{
229 const struct Qdisc_ops *ops;
230
231 if (!capable(CAP_NET_ADMIN))
232 return -EPERM;
233
234 write_lock(&qdisc_mod_lock);
235 ops = qdisc_lookup_default(name);
236 if (!ops) {
237 /* Not found, drop lock and try to load module */
238 write_unlock(&qdisc_mod_lock);
239 request_module("sch_%s", name);
240 write_lock(&qdisc_mod_lock);
241
242 ops = qdisc_lookup_default(name);
243 }
244
245 if (ops) {
246 /* Set new default */
247 module_put(default_qdisc_ops->owner);
248 default_qdisc_ops = ops;
249 }
250 write_unlock(&qdisc_mod_lock);
251
252 return ops ? 0 : -ENOENT;
253}
254
Linus Torvalds1da177e2005-04-16 15:20:36 -0700255/* We know handle. Find qdisc among all qdisc's attached to device
256 (root qdisc, all its children, children of children etc.)
257 */
258
Hannes Eder6113b742008-11-28 03:06:46 -0800259static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
David S. Miller8123b422008-08-08 23:23:39 -0700260{
261 struct Qdisc *q;
262
263 if (!(root->flags & TCQ_F_BUILTIN) &&
264 root->handle == handle)
265 return root;
266
267 list_for_each_entry(q, &root->list, list) {
268 if (q->handle == handle)
269 return q;
270 }
271 return NULL;
272}
273
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700274static void qdisc_list_add(struct Qdisc *q)
275{
Jarek Poplawskif6486d42008-11-25 13:56:06 -0800276 if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS))
Patrick McHardyaf356af2009-09-04 06:41:18 +0000277 list_add_tail(&q->list, &qdisc_dev(q)->qdisc->list);
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700278}
279
280void qdisc_list_del(struct Qdisc *q)
281{
Jarek Poplawskif6486d42008-11-25 13:56:06 -0800282 if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS))
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700283 list_del(&q->list);
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700284}
285EXPORT_SYMBOL(qdisc_list_del);
286
David S. Milleread81cc2008-07-17 00:50:32 -0700287struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
Patrick McHardy43effa12006-11-29 17:35:48 -0800288{
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700289 struct Qdisc *q;
290
Patrick McHardyaf356af2009-09-04 06:41:18 +0000291 q = qdisc_match_from_root(dev->qdisc, handle);
292 if (q)
293 goto out;
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700294
Eric Dumazet24824a02010-10-02 06:11:55 +0000295 if (dev_ingress_queue(dev))
296 q = qdisc_match_from_root(
297 dev_ingress_queue(dev)->qdisc_sleeping,
298 handle);
Jarek Poplawskif6486d42008-11-25 13:56:06 -0800299out:
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700300 return q;
Patrick McHardy43effa12006-11-29 17:35:48 -0800301}
302
Linus Torvalds1da177e2005-04-16 15:20:36 -0700303static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
304{
305 unsigned long cl;
306 struct Qdisc *leaf;
Eric Dumazet20fea082007-11-14 01:44:41 -0800307 const struct Qdisc_class_ops *cops = p->ops->cl_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700308
309 if (cops == NULL)
310 return NULL;
311 cl = cops->get(p, classid);
312
313 if (cl == 0)
314 return NULL;
315 leaf = cops->leaf(p, cl);
316 cops->put(p, cl);
317 return leaf;
318}
319
320/* Find queueing discipline by name */
321
Patrick McHardy1e904742008-01-22 22:11:17 -0800322static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700323{
324 struct Qdisc_ops *q = NULL;
325
326 if (kind) {
327 read_lock(&qdisc_mod_lock);
328 for (q = qdisc_base; q; q = q->next) {
Patrick McHardy1e904742008-01-22 22:11:17 -0800329 if (nla_strcmp(kind, q->id) == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700330 if (!try_module_get(q->owner))
331 q = NULL;
332 break;
333 }
334 }
335 read_unlock(&qdisc_mod_lock);
336 }
337 return q;
338}
339
Jesper Dangaard Brouer8a8e3d82013-08-14 23:47:11 +0200340/* The linklayer setting were not transferred from iproute2, in older
341 * versions, and the rate tables lookup systems have been dropped in
342 * the kernel. To keep backward compatible with older iproute2 tc
343 * utils, we detect the linklayer setting by detecting if the rate
344 * table were modified.
345 *
346 * For linklayer ATM table entries, the rate table will be aligned to
347 * 48 bytes, thus some table entries will contain the same value. The
348 * mpu (min packet unit) is also encoded into the old rate table, thus
349 * starting from the mpu, we find low and high table entries for
350 * mapping this cell. If these entries contain the same value, when
351 * the rate tables have been modified for linklayer ATM.
352 *
353 * This is done by rounding mpu to the nearest 48 bytes cell/entry,
354 * and then roundup to the next cell, calc the table entry one below,
355 * and compare.
356 */
357static __u8 __detect_linklayer(struct tc_ratespec *r, __u32 *rtab)
358{
359 int low = roundup(r->mpu, 48);
360 int high = roundup(low+1, 48);
361 int cell_low = low >> r->cell_log;
362 int cell_high = (high >> r->cell_log) - 1;
363
364 /* rtab is too inaccurate at rates > 100Mbit/s */
365 if ((r->rate > (100000000/8)) || (rtab[0] == 0)) {
366 pr_debug("TC linklayer: Giving up ATM detection\n");
367 return TC_LINKLAYER_ETHERNET;
368 }
369
370 if ((cell_high > cell_low) && (cell_high < 256)
371 && (rtab[cell_low] == rtab[cell_high])) {
372 pr_debug("TC linklayer: Detected ATM, low(%d)=high(%d)=%u\n",
373 cell_low, cell_high, rtab[cell_high]);
374 return TC_LINKLAYER_ATM;
375 }
376 return TC_LINKLAYER_ETHERNET;
377}
378
Linus Torvalds1da177e2005-04-16 15:20:36 -0700379static struct qdisc_rate_table *qdisc_rtab_list;
380
Patrick McHardy1e904742008-01-22 22:11:17 -0800381struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct nlattr *tab)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700382{
383 struct qdisc_rate_table *rtab;
384
Eric Dumazet40edeff2013-06-02 11:15:55 +0000385 if (tab == NULL || r->rate == 0 || r->cell_log == 0 ||
386 nla_len(tab) != TC_RTAB_SIZE)
387 return NULL;
388
Linus Torvalds1da177e2005-04-16 15:20:36 -0700389 for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) {
Eric Dumazet40edeff2013-06-02 11:15:55 +0000390 if (!memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) &&
391 !memcmp(&rtab->data, nla_data(tab), 1024)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700392 rtab->refcnt++;
393 return rtab;
394 }
395 }
396
Linus Torvalds1da177e2005-04-16 15:20:36 -0700397 rtab = kmalloc(sizeof(*rtab), GFP_KERNEL);
398 if (rtab) {
399 rtab->rate = *r;
400 rtab->refcnt = 1;
Patrick McHardy1e904742008-01-22 22:11:17 -0800401 memcpy(rtab->data, nla_data(tab), 1024);
Jesper Dangaard Brouer8a8e3d82013-08-14 23:47:11 +0200402 if (r->linklayer == TC_LINKLAYER_UNAWARE)
403 r->linklayer = __detect_linklayer(r, rtab->data);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700404 rtab->next = qdisc_rtab_list;
405 qdisc_rtab_list = rtab;
406 }
407 return rtab;
408}
Patrick McHardy62e3ba12008-01-22 22:10:23 -0800409EXPORT_SYMBOL(qdisc_get_rtab);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700410
411void qdisc_put_rtab(struct qdisc_rate_table *tab)
412{
413 struct qdisc_rate_table *rtab, **rtabp;
414
415 if (!tab || --tab->refcnt)
416 return;
417
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000418 for (rtabp = &qdisc_rtab_list;
419 (rtab = *rtabp) != NULL;
420 rtabp = &rtab->next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700421 if (rtab == tab) {
422 *rtabp = rtab->next;
423 kfree(rtab);
424 return;
425 }
426 }
427}
Patrick McHardy62e3ba12008-01-22 22:10:23 -0800428EXPORT_SYMBOL(qdisc_put_rtab);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700429
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700430static LIST_HEAD(qdisc_stab_list);
431static DEFINE_SPINLOCK(qdisc_stab_lock);
432
433static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = {
434 [TCA_STAB_BASE] = { .len = sizeof(struct tc_sizespec) },
435 [TCA_STAB_DATA] = { .type = NLA_BINARY },
436};
437
438static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt)
439{
440 struct nlattr *tb[TCA_STAB_MAX + 1];
441 struct qdisc_size_table *stab;
442 struct tc_sizespec *s;
443 unsigned int tsize = 0;
444 u16 *tab = NULL;
445 int err;
446
447 err = nla_parse_nested(tb, TCA_STAB_MAX, opt, stab_policy);
448 if (err < 0)
449 return ERR_PTR(err);
450 if (!tb[TCA_STAB_BASE])
451 return ERR_PTR(-EINVAL);
452
453 s = nla_data(tb[TCA_STAB_BASE]);
454
455 if (s->tsize > 0) {
456 if (!tb[TCA_STAB_DATA])
457 return ERR_PTR(-EINVAL);
458 tab = nla_data(tb[TCA_STAB_DATA]);
459 tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16);
460 }
461
Dan Carpenter00093fa2010-08-14 11:09:49 +0000462 if (tsize != s->tsize || (!tab && tsize > 0))
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700463 return ERR_PTR(-EINVAL);
464
David S. Millerf3b96052008-08-18 22:33:05 -0700465 spin_lock(&qdisc_stab_lock);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700466
467 list_for_each_entry(stab, &qdisc_stab_list, list) {
468 if (memcmp(&stab->szopts, s, sizeof(*s)))
469 continue;
470 if (tsize > 0 && memcmp(stab->data, tab, tsize * sizeof(u16)))
471 continue;
472 stab->refcnt++;
David S. Millerf3b96052008-08-18 22:33:05 -0700473 spin_unlock(&qdisc_stab_lock);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700474 return stab;
475 }
476
David S. Millerf3b96052008-08-18 22:33:05 -0700477 spin_unlock(&qdisc_stab_lock);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700478
479 stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL);
480 if (!stab)
481 return ERR_PTR(-ENOMEM);
482
483 stab->refcnt = 1;
484 stab->szopts = *s;
485 if (tsize > 0)
486 memcpy(stab->data, tab, tsize * sizeof(u16));
487
David S. Millerf3b96052008-08-18 22:33:05 -0700488 spin_lock(&qdisc_stab_lock);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700489 list_add_tail(&stab->list, &qdisc_stab_list);
David S. Millerf3b96052008-08-18 22:33:05 -0700490 spin_unlock(&qdisc_stab_lock);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700491
492 return stab;
493}
494
Eric Dumazeta2da5702011-01-20 03:48:19 +0000495static void stab_kfree_rcu(struct rcu_head *head)
496{
497 kfree(container_of(head, struct qdisc_size_table, rcu));
498}
499
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700500void qdisc_put_stab(struct qdisc_size_table *tab)
501{
502 if (!tab)
503 return;
504
David S. Millerf3b96052008-08-18 22:33:05 -0700505 spin_lock(&qdisc_stab_lock);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700506
507 if (--tab->refcnt == 0) {
508 list_del(&tab->list);
Eric Dumazeta2da5702011-01-20 03:48:19 +0000509 call_rcu_bh(&tab->rcu, stab_kfree_rcu);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700510 }
511
David S. Millerf3b96052008-08-18 22:33:05 -0700512 spin_unlock(&qdisc_stab_lock);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700513}
514EXPORT_SYMBOL(qdisc_put_stab);
515
516static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab)
517{
518 struct nlattr *nest;
519
520 nest = nla_nest_start(skb, TCA_STAB);
Patrick McHardy3aa46142008-11-20 04:07:14 -0800521 if (nest == NULL)
522 goto nla_put_failure;
David S. Miller1b34ec42012-03-29 05:11:39 -0400523 if (nla_put(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts))
524 goto nla_put_failure;
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700525 nla_nest_end(skb, nest);
526
527 return skb->len;
528
529nla_put_failure:
530 return -1;
531}
532
Eric Dumazeta2da5702011-01-20 03:48:19 +0000533void __qdisc_calculate_pkt_len(struct sk_buff *skb, const struct qdisc_size_table *stab)
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700534{
535 int pkt_len, slot;
536
537 pkt_len = skb->len + stab->szopts.overhead;
538 if (unlikely(!stab->szopts.tsize))
539 goto out;
540
541 slot = pkt_len + stab->szopts.cell_align;
542 if (unlikely(slot < 0))
543 slot = 0;
544
545 slot >>= stab->szopts.cell_log;
546 if (likely(slot < stab->szopts.tsize))
547 pkt_len = stab->data[slot];
548 else
549 pkt_len = stab->data[stab->szopts.tsize - 1] *
550 (slot / stab->szopts.tsize) +
551 stab->data[slot % stab->szopts.tsize];
552
553 pkt_len <<= stab->szopts.size_log;
554out:
555 if (unlikely(pkt_len < 1))
556 pkt_len = 1;
557 qdisc_skb_cb(skb)->pkt_len = pkt_len;
558}
Eric Dumazeta2da5702011-01-20 03:48:19 +0000559EXPORT_SYMBOL(__qdisc_calculate_pkt_len);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700560
Jarek Poplawskib00355d2009-02-01 01:12:42 -0800561void qdisc_warn_nonwc(char *txt, struct Qdisc *qdisc)
562{
563 if (!(qdisc->flags & TCQ_F_WARN_NONWC)) {
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000564 pr_warn("%s: %s qdisc %X: is non-work-conserving?\n",
565 txt, qdisc->ops->id, qdisc->handle >> 16);
Jarek Poplawskib00355d2009-02-01 01:12:42 -0800566 qdisc->flags |= TCQ_F_WARN_NONWC;
567 }
568}
569EXPORT_SYMBOL(qdisc_warn_nonwc);
570
Patrick McHardy41794772007-03-16 01:19:15 -0700571static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
572{
573 struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
David S. Miller2fbd3da2009-09-01 17:59:25 -0700574 timer);
Patrick McHardy41794772007-03-16 01:19:15 -0700575
Eric Dumazetfd245a42011-01-20 05:27:16 +0000576 qdisc_unthrottled(wd->qdisc);
David S. Miller8608db02008-08-18 20:51:18 -0700577 __netif_schedule(qdisc_root(wd->qdisc));
Stephen Hemminger19365022007-03-22 12:18:35 -0700578
Patrick McHardy41794772007-03-16 01:19:15 -0700579 return HRTIMER_NORESTART;
580}
581
582void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
583{
David S. Miller2fbd3da2009-09-01 17:59:25 -0700584 hrtimer_init(&wd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
585 wd->timer.function = qdisc_watchdog;
Patrick McHardy41794772007-03-16 01:19:15 -0700586 wd->qdisc = qdisc;
587}
588EXPORT_SYMBOL(qdisc_watchdog_init);
589
Jiri Pirko34c5d292013-02-12 00:12:04 +0000590void qdisc_watchdog_schedule_ns(struct qdisc_watchdog *wd, u64 expires)
Patrick McHardy41794772007-03-16 01:19:15 -0700591{
Jarek Poplawski2540e052008-08-21 05:11:14 -0700592 if (test_bit(__QDISC_STATE_DEACTIVATED,
593 &qdisc_root_sleeping(wd->qdisc)->state))
594 return;
595
Eric Dumazetfd245a42011-01-20 05:27:16 +0000596 qdisc_throttled(wd->qdisc);
Eric Dumazet46baac32012-10-20 00:40:51 +0000597
598 hrtimer_start(&wd->timer,
Jiri Pirko34c5d292013-02-12 00:12:04 +0000599 ns_to_ktime(expires),
Eric Dumazet46baac32012-10-20 00:40:51 +0000600 HRTIMER_MODE_ABS);
Patrick McHardy41794772007-03-16 01:19:15 -0700601}
Jiri Pirko34c5d292013-02-12 00:12:04 +0000602EXPORT_SYMBOL(qdisc_watchdog_schedule_ns);
Patrick McHardy41794772007-03-16 01:19:15 -0700603
604void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
605{
David S. Miller2fbd3da2009-09-01 17:59:25 -0700606 hrtimer_cancel(&wd->timer);
Eric Dumazetfd245a42011-01-20 05:27:16 +0000607 qdisc_unthrottled(wd->qdisc);
Patrick McHardy41794772007-03-16 01:19:15 -0700608}
609EXPORT_SYMBOL(qdisc_watchdog_cancel);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700610
Adrian Bunka94f7792008-07-22 14:20:11 -0700611static struct hlist_head *qdisc_class_hash_alloc(unsigned int n)
Patrick McHardy6fe1c7a2008-07-05 23:21:31 -0700612{
613 unsigned int size = n * sizeof(struct hlist_head), i;
614 struct hlist_head *h;
615
616 if (size <= PAGE_SIZE)
617 h = kmalloc(size, GFP_KERNEL);
618 else
619 h = (struct hlist_head *)
620 __get_free_pages(GFP_KERNEL, get_order(size));
621
622 if (h != NULL) {
623 for (i = 0; i < n; i++)
624 INIT_HLIST_HEAD(&h[i]);
625 }
626 return h;
627}
628
629static void qdisc_class_hash_free(struct hlist_head *h, unsigned int n)
630{
631 unsigned int size = n * sizeof(struct hlist_head);
632
633 if (size <= PAGE_SIZE)
634 kfree(h);
635 else
636 free_pages((unsigned long)h, get_order(size));
637}
638
639void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash)
640{
641 struct Qdisc_class_common *cl;
Sasha Levinb67bfe02013-02-27 17:06:00 -0800642 struct hlist_node *next;
Patrick McHardy6fe1c7a2008-07-05 23:21:31 -0700643 struct hlist_head *nhash, *ohash;
644 unsigned int nsize, nmask, osize;
645 unsigned int i, h;
646
647 /* Rehash when load factor exceeds 0.75 */
648 if (clhash->hashelems * 4 <= clhash->hashsize * 3)
649 return;
650 nsize = clhash->hashsize * 2;
651 nmask = nsize - 1;
652 nhash = qdisc_class_hash_alloc(nsize);
653 if (nhash == NULL)
654 return;
655
656 ohash = clhash->hash;
657 osize = clhash->hashsize;
658
659 sch_tree_lock(sch);
660 for (i = 0; i < osize; i++) {
Sasha Levinb67bfe02013-02-27 17:06:00 -0800661 hlist_for_each_entry_safe(cl, next, &ohash[i], hnode) {
Patrick McHardy6fe1c7a2008-07-05 23:21:31 -0700662 h = qdisc_class_hash(cl->classid, nmask);
663 hlist_add_head(&cl->hnode, &nhash[h]);
664 }
665 }
666 clhash->hash = nhash;
667 clhash->hashsize = nsize;
668 clhash->hashmask = nmask;
669 sch_tree_unlock(sch);
670
671 qdisc_class_hash_free(ohash, osize);
672}
673EXPORT_SYMBOL(qdisc_class_hash_grow);
674
675int qdisc_class_hash_init(struct Qdisc_class_hash *clhash)
676{
677 unsigned int size = 4;
678
679 clhash->hash = qdisc_class_hash_alloc(size);
680 if (clhash->hash == NULL)
681 return -ENOMEM;
682 clhash->hashsize = size;
683 clhash->hashmask = size - 1;
684 clhash->hashelems = 0;
685 return 0;
686}
687EXPORT_SYMBOL(qdisc_class_hash_init);
688
689void qdisc_class_hash_destroy(struct Qdisc_class_hash *clhash)
690{
691 qdisc_class_hash_free(clhash->hash, clhash->hashsize);
692}
693EXPORT_SYMBOL(qdisc_class_hash_destroy);
694
695void qdisc_class_hash_insert(struct Qdisc_class_hash *clhash,
696 struct Qdisc_class_common *cl)
697{
698 unsigned int h;
699
700 INIT_HLIST_NODE(&cl->hnode);
701 h = qdisc_class_hash(cl->classid, clhash->hashmask);
702 hlist_add_head(&cl->hnode, &clhash->hash[h]);
703 clhash->hashelems++;
704}
705EXPORT_SYMBOL(qdisc_class_hash_insert);
706
707void qdisc_class_hash_remove(struct Qdisc_class_hash *clhash,
708 struct Qdisc_class_common *cl)
709{
710 hlist_del(&cl->hnode);
711 clhash->hashelems--;
712}
713EXPORT_SYMBOL(qdisc_class_hash_remove);
714
Eric Dumazetfa0f5aa2012-01-03 00:00:11 +0000715/* Allocate an unique handle from space managed by kernel
716 * Possible range is [8000-FFFF]:0000 (0x8000 values)
717 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700718static u32 qdisc_alloc_handle(struct net_device *dev)
719{
Eric Dumazetfa0f5aa2012-01-03 00:00:11 +0000720 int i = 0x8000;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700721 static u32 autohandle = TC_H_MAKE(0x80000000U, 0);
722
723 do {
724 autohandle += TC_H_MAKE(0x10000U, 0);
725 if (autohandle == TC_H_MAKE(TC_H_ROOT, 0))
726 autohandle = TC_H_MAKE(0x80000000U, 0);
Eric Dumazetfa0f5aa2012-01-03 00:00:11 +0000727 if (!qdisc_lookup(dev, autohandle))
728 return autohandle;
729 cond_resched();
730 } while (--i > 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700731
Eric Dumazetfa0f5aa2012-01-03 00:00:11 +0000732 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700733}
734
Patrick McHardy43effa12006-11-29 17:35:48 -0800735void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
736{
Eric Dumazet20fea082007-11-14 01:44:41 -0800737 const struct Qdisc_class_ops *cops;
Patrick McHardy43effa12006-11-29 17:35:48 -0800738 unsigned long cl;
739 u32 parentid;
Eric Dumazet2c8c8e62013-10-07 08:32:32 -0700740 int drops;
Patrick McHardy43effa12006-11-29 17:35:48 -0800741
742 if (n == 0)
743 return;
Eric Dumazet2c8c8e62013-10-07 08:32:32 -0700744 drops = max_t(int, n, 0);
Patrick McHardy43effa12006-11-29 17:35:48 -0800745 while ((parentid = sch->parent)) {
Jarek Poplawski066a3b52008-04-14 15:10:42 -0700746 if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS))
747 return;
748
David S. Miller5ce2d482008-07-08 17:06:30 -0700749 sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
Patrick McHardyffc8fef2007-07-30 17:11:50 -0700750 if (sch == NULL) {
751 WARN_ON(parentid != TC_H_ROOT);
752 return;
753 }
Patrick McHardy43effa12006-11-29 17:35:48 -0800754 cops = sch->ops->cl_ops;
755 if (cops->qlen_notify) {
756 cl = cops->get(sch, parentid);
757 cops->qlen_notify(sch, cl);
758 cops->put(sch, cl);
759 }
760 sch->q.qlen -= n;
Eric Dumazet2c8c8e62013-10-07 08:32:32 -0700761 sch->qstats.drops += drops;
Patrick McHardy43effa12006-11-29 17:35:48 -0800762 }
763}
764EXPORT_SYMBOL(qdisc_tree_decrease_qlen);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700765
Tom Goff7316ae82010-03-19 15:40:13 +0000766static void notify_and_destroy(struct net *net, struct sk_buff *skb,
767 struct nlmsghdr *n, u32 clid,
David S. Miller99194cf2008-07-17 04:54:10 -0700768 struct Qdisc *old, struct Qdisc *new)
769{
770 if (new || old)
Tom Goff7316ae82010-03-19 15:40:13 +0000771 qdisc_notify(net, skb, n, clid, old, new);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700772
David S. Miller4d8863a2008-08-18 21:03:15 -0700773 if (old)
David S. Miller99194cf2008-07-17 04:54:10 -0700774 qdisc_destroy(old);
David S. Miller99194cf2008-07-17 04:54:10 -0700775}
776
777/* Graft qdisc "new" to class "classid" of qdisc "parent" or
778 * to device "dev".
779 *
780 * When appropriate send a netlink notification using 'skb'
781 * and "n".
782 *
783 * On success, destroy old qdisc.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700784 */
785
786static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
David S. Miller99194cf2008-07-17 04:54:10 -0700787 struct sk_buff *skb, struct nlmsghdr *n, u32 classid,
788 struct Qdisc *new, struct Qdisc *old)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700789{
David S. Miller99194cf2008-07-17 04:54:10 -0700790 struct Qdisc *q = old;
Tom Goff7316ae82010-03-19 15:40:13 +0000791 struct net *net = dev_net(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700792 int err = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700793
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900794 if (parent == NULL) {
David S. Miller99194cf2008-07-17 04:54:10 -0700795 unsigned int i, num_q, ingress;
796
797 ingress = 0;
798 num_q = dev->num_tx_queues;
David S. Miller8d50b532008-07-30 02:37:46 -0700799 if ((q && q->flags & TCQ_F_INGRESS) ||
800 (new && new->flags & TCQ_F_INGRESS)) {
David S. Miller99194cf2008-07-17 04:54:10 -0700801 num_q = 1;
802 ingress = 1;
Eric Dumazet24824a02010-10-02 06:11:55 +0000803 if (!dev_ingress_queue(dev))
804 return -ENOENT;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700805 }
David S. Miller99194cf2008-07-17 04:54:10 -0700806
807 if (dev->flags & IFF_UP)
808 dev_deactivate(dev);
809
David S. Miller6ec1c692009-09-06 01:58:51 -0700810 if (new && new->ops->attach) {
811 new->ops->attach(new);
812 num_q = 0;
813 }
814
David S. Miller99194cf2008-07-17 04:54:10 -0700815 for (i = 0; i < num_q; i++) {
Eric Dumazet24824a02010-10-02 06:11:55 +0000816 struct netdev_queue *dev_queue = dev_ingress_queue(dev);
David S. Miller99194cf2008-07-17 04:54:10 -0700817
818 if (!ingress)
819 dev_queue = netdev_get_tx_queue(dev, i);
820
David S. Miller8d50b532008-07-30 02:37:46 -0700821 old = dev_graft_qdisc(dev_queue, new);
822 if (new && i > 0)
823 atomic_inc(&new->refcnt);
824
Jarek Poplawski036d6a62009-09-13 22:35:44 +0000825 if (!ingress)
826 qdisc_destroy(old);
David S. Miller99194cf2008-07-17 04:54:10 -0700827 }
828
Jarek Poplawski036d6a62009-09-13 22:35:44 +0000829 if (!ingress) {
Tom Goff7316ae82010-03-19 15:40:13 +0000830 notify_and_destroy(net, skb, n, classid,
831 dev->qdisc, new);
Jarek Poplawski036d6a62009-09-13 22:35:44 +0000832 if (new && !new->ops->attach)
833 atomic_inc(&new->refcnt);
834 dev->qdisc = new ? : &noop_qdisc;
835 } else {
Tom Goff7316ae82010-03-19 15:40:13 +0000836 notify_and_destroy(net, skb, n, classid, old, new);
Jarek Poplawski036d6a62009-09-13 22:35:44 +0000837 }
Patrick McHardyaf356af2009-09-04 06:41:18 +0000838
David S. Miller99194cf2008-07-17 04:54:10 -0700839 if (dev->flags & IFF_UP)
840 dev_activate(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700841 } else {
Eric Dumazet20fea082007-11-14 01:44:41 -0800842 const struct Qdisc_class_ops *cops = parent->ops->cl_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700843
Patrick McHardyc9f1d032009-09-04 06:41:13 +0000844 err = -EOPNOTSUPP;
845 if (cops && cops->graft) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700846 unsigned long cl = cops->get(parent, classid);
847 if (cl) {
David S. Miller99194cf2008-07-17 04:54:10 -0700848 err = cops->graft(parent, cl, new, &old);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700849 cops->put(parent, cl);
Patrick McHardyc9f1d032009-09-04 06:41:13 +0000850 } else
851 err = -ENOENT;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700852 }
David S. Miller99194cf2008-07-17 04:54:10 -0700853 if (!err)
Tom Goff7316ae82010-03-19 15:40:13 +0000854 notify_and_destroy(net, skb, n, classid, old, new);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700855 }
856 return err;
857}
858
Jarek Poplawski25bfcd52008-08-18 20:53:34 -0700859/* lockdep annotation is needed for ingress; egress gets it only for name */
860static struct lock_class_key qdisc_tx_lock;
861static struct lock_class_key qdisc_rx_lock;
862
Linus Torvalds1da177e2005-04-16 15:20:36 -0700863/*
864 Allocate and initialize new qdisc.
865
866 Parameters are passed via opt.
867 */
868
869static struct Qdisc *
David S. Millerbb949fb2008-07-08 16:55:56 -0700870qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
Patrick McHardy23bcf632009-09-09 18:11:23 -0700871 struct Qdisc *p, u32 parent, u32 handle,
872 struct nlattr **tca, int *errp)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700873{
874 int err;
Patrick McHardy1e904742008-01-22 22:11:17 -0800875 struct nlattr *kind = tca[TCA_KIND];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700876 struct Qdisc *sch;
877 struct Qdisc_ops *ops;
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700878 struct qdisc_size_table *stab;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700879
880 ops = qdisc_lookup_ops(kind);
Johannes Berg95a5afc2008-10-16 15:24:51 -0700881#ifdef CONFIG_MODULES
Linus Torvalds1da177e2005-04-16 15:20:36 -0700882 if (ops == NULL && kind != NULL) {
883 char name[IFNAMSIZ];
Patrick McHardy1e904742008-01-22 22:11:17 -0800884 if (nla_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700885 /* We dropped the RTNL semaphore in order to
886 * perform the module load. So, even if we
887 * succeeded in loading the module we have to
888 * tell the caller to replay the request. We
889 * indicate this using -EAGAIN.
890 * We replay the request because the device may
891 * go away in the mean time.
892 */
893 rtnl_unlock();
894 request_module("sch_%s", name);
895 rtnl_lock();
896 ops = qdisc_lookup_ops(kind);
897 if (ops != NULL) {
898 /* We will try again qdisc_lookup_ops,
899 * so don't keep a reference.
900 */
901 module_put(ops->owner);
902 err = -EAGAIN;
903 goto err_out;
904 }
905 }
906 }
907#endif
908
Jamal Hadi Salimb9e2cc02006-08-03 16:36:51 -0700909 err = -ENOENT;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700910 if (ops == NULL)
911 goto err_out;
912
David S. Miller5ce2d482008-07-08 17:06:30 -0700913 sch = qdisc_alloc(dev_queue, ops);
Thomas Graf3d54b822005-07-05 14:15:09 -0700914 if (IS_ERR(sch)) {
915 err = PTR_ERR(sch);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700916 goto err_out2;
Thomas Graf3d54b822005-07-05 14:15:09 -0700917 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700918
Patrick McHardyffc8fef2007-07-30 17:11:50 -0700919 sch->parent = parent;
920
Thomas Graf3d54b822005-07-05 14:15:09 -0700921 if (handle == TC_H_INGRESS) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700922 sch->flags |= TCQ_F_INGRESS;
Thomas Graf3d54b822005-07-05 14:15:09 -0700923 handle = TC_H_MAKE(TC_H_INGRESS, 0);
Jarek Poplawski25bfcd52008-08-18 20:53:34 -0700924 lockdep_set_class(qdisc_lock(sch), &qdisc_rx_lock);
Patrick McHardyfd44de72007-04-16 17:07:08 -0700925 } else {
Patrick McHardyfd44de72007-04-16 17:07:08 -0700926 if (handle == 0) {
927 handle = qdisc_alloc_handle(dev);
928 err = -ENOMEM;
929 if (handle == 0)
930 goto err_out3;
931 }
Jarek Poplawski25bfcd52008-08-18 20:53:34 -0700932 lockdep_set_class(qdisc_lock(sch), &qdisc_tx_lock);
Eric Dumazet1abbe132012-12-11 15:54:33 +0000933 if (!netif_is_multiqueue(dev))
934 sch->flags |= TCQ_F_ONETXQUEUE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700935 }
936
Thomas Graf3d54b822005-07-05 14:15:09 -0700937 sch->handle = handle;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700938
Patrick McHardy1e904742008-01-22 22:11:17 -0800939 if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS])) == 0) {
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700940 if (tca[TCA_STAB]) {
941 stab = qdisc_get_stab(tca[TCA_STAB]);
942 if (IS_ERR(stab)) {
943 err = PTR_ERR(stab);
Jarek Poplawski7c64b9f2009-09-15 23:42:05 -0700944 goto err_out4;
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700945 }
Eric Dumazeta2da5702011-01-20 03:48:19 +0000946 rcu_assign_pointer(sch->stab, stab);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700947 }
Patrick McHardy1e904742008-01-22 22:11:17 -0800948 if (tca[TCA_RATE]) {
Jarek Poplawskif6f9b932008-08-27 02:25:17 -0700949 spinlock_t *root_lock;
950
Patrick McHardy23bcf632009-09-09 18:11:23 -0700951 err = -EOPNOTSUPP;
952 if (sch->flags & TCQ_F_MQROOT)
953 goto err_out4;
954
Jarek Poplawskif6f9b932008-08-27 02:25:17 -0700955 if ((sch->parent != TC_H_ROOT) &&
Patrick McHardy23bcf632009-09-09 18:11:23 -0700956 !(sch->flags & TCQ_F_INGRESS) &&
957 (!p || !(p->flags & TCQ_F_MQROOT)))
Jarek Poplawskif6f9b932008-08-27 02:25:17 -0700958 root_lock = qdisc_root_sleeping_lock(sch);
959 else
960 root_lock = qdisc_lock(sch);
961
Thomas Graf023e09a2005-07-05 14:15:53 -0700962 err = gen_new_estimator(&sch->bstats, &sch->rate_est,
Jarek Poplawskif6f9b932008-08-27 02:25:17 -0700963 root_lock, tca[TCA_RATE]);
Patrick McHardy23bcf632009-09-09 18:11:23 -0700964 if (err)
965 goto err_out4;
Thomas Graf023e09a2005-07-05 14:15:53 -0700966 }
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700967
968 qdisc_list_add(sch);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700969
Linus Torvalds1da177e2005-04-16 15:20:36 -0700970 return sch;
971 }
972err_out3:
973 dev_put(dev);
Thomas Graf3d54b822005-07-05 14:15:09 -0700974 kfree((char *) sch - sch->padded);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700975err_out2:
976 module_put(ops->owner);
977err_out:
978 *errp = err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700979 return NULL;
Patrick McHardy23bcf632009-09-09 18:11:23 -0700980
981err_out4:
982 /*
983 * Any broken qdiscs that would require a ops->reset() here?
984 * The qdisc was never in action so it shouldn't be necessary.
985 */
Eric Dumazeta2da5702011-01-20 03:48:19 +0000986 qdisc_put_stab(rtnl_dereference(sch->stab));
Patrick McHardy23bcf632009-09-09 18:11:23 -0700987 if (ops->destroy)
988 ops->destroy(sch);
989 goto err_out3;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700990}
991
Patrick McHardy1e904742008-01-22 22:11:17 -0800992static int qdisc_change(struct Qdisc *sch, struct nlattr **tca)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700993{
Eric Dumazeta2da5702011-01-20 03:48:19 +0000994 struct qdisc_size_table *ostab, *stab = NULL;
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700995 int err = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700996
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700997 if (tca[TCA_OPTIONS]) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700998 if (sch->ops->change == NULL)
999 return -EINVAL;
Patrick McHardy1e904742008-01-22 22:11:17 -08001000 err = sch->ops->change(sch, tca[TCA_OPTIONS]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001001 if (err)
1002 return err;
1003 }
Jussi Kivilinna175f9c12008-07-20 00:08:47 -07001004
1005 if (tca[TCA_STAB]) {
1006 stab = qdisc_get_stab(tca[TCA_STAB]);
1007 if (IS_ERR(stab))
1008 return PTR_ERR(stab);
1009 }
1010
Eric Dumazeta2da5702011-01-20 03:48:19 +00001011 ostab = rtnl_dereference(sch->stab);
1012 rcu_assign_pointer(sch->stab, stab);
1013 qdisc_put_stab(ostab);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -07001014
Patrick McHardy23bcf632009-09-09 18:11:23 -07001015 if (tca[TCA_RATE]) {
Stephen Hemminger71bcb092008-11-25 21:13:31 -08001016 /* NB: ignores errors from replace_estimator
1017 because change can't be undone. */
Patrick McHardy23bcf632009-09-09 18:11:23 -07001018 if (sch->flags & TCQ_F_MQROOT)
1019 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001020 gen_replace_estimator(&sch->bstats, &sch->rate_est,
Stephen Hemminger71bcb092008-11-25 21:13:31 -08001021 qdisc_root_sleeping_lock(sch),
1022 tca[TCA_RATE]);
Patrick McHardy23bcf632009-09-09 18:11:23 -07001023 }
1024out:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001025 return 0;
1026}
1027
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001028struct check_loop_arg {
1029 struct qdisc_walker w;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001030 struct Qdisc *p;
1031 int depth;
1032};
1033
1034static int check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w);
1035
1036static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth)
1037{
1038 struct check_loop_arg arg;
1039
1040 if (q->ops->cl_ops == NULL)
1041 return 0;
1042
1043 arg.w.stop = arg.w.skip = arg.w.count = 0;
1044 arg.w.fn = check_loop_fn;
1045 arg.depth = depth;
1046 arg.p = p;
1047 q->ops->cl_ops->walk(q, &arg.w);
1048 return arg.w.stop ? -ELOOP : 0;
1049}
1050
1051static int
1052check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
1053{
1054 struct Qdisc *leaf;
Eric Dumazet20fea082007-11-14 01:44:41 -08001055 const struct Qdisc_class_ops *cops = q->ops->cl_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001056 struct check_loop_arg *arg = (struct check_loop_arg *)w;
1057
1058 leaf = cops->leaf(q, cl);
1059 if (leaf) {
1060 if (leaf == arg->p || arg->depth > 7)
1061 return -ELOOP;
1062 return check_loop(leaf, arg->p, arg->depth + 1);
1063 }
1064 return 0;
1065}
1066
1067/*
1068 * Delete/get qdisc.
1069 */
1070
Thomas Graf661d2962013-03-21 07:45:29 +00001071static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001072{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001073 struct net *net = sock_net(skb->sk);
David S. Miller02ef22c2012-06-26 21:50:05 -07001074 struct tcmsg *tcm = nlmsg_data(n);
Patrick McHardy1e904742008-01-22 22:11:17 -08001075 struct nlattr *tca[TCA_MAX + 1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001076 struct net_device *dev;
Hong zhi guode179c82013-03-25 17:36:33 +00001077 u32 clid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001078 struct Qdisc *q = NULL;
1079 struct Qdisc *p = NULL;
1080 int err;
1081
Eric W. Biedermandfc47ef2012-11-16 03:03:00 +00001082 if ((n->nlmsg_type != RTM_GETQDISC) && !capable(CAP_NET_ADMIN))
1083 return -EPERM;
1084
Patrick McHardy1e904742008-01-22 22:11:17 -08001085 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
1086 if (err < 0)
1087 return err;
1088
Hong zhi guode179c82013-03-25 17:36:33 +00001089 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1090 if (!dev)
1091 return -ENODEV;
1092
1093 clid = tcm->tcm_parent;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001094 if (clid) {
1095 if (clid != TC_H_ROOT) {
1096 if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001097 p = qdisc_lookup(dev, TC_H_MAJ(clid));
1098 if (!p)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001099 return -ENOENT;
1100 q = qdisc_leaf(p, clid);
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001101 } else if (dev_ingress_queue(dev)) {
1102 q = dev_ingress_queue(dev)->qdisc_sleeping;
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09001103 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001104 } else {
Patrick McHardyaf356af2009-09-04 06:41:18 +00001105 q = dev->qdisc;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001106 }
1107 if (!q)
1108 return -ENOENT;
1109
1110 if (tcm->tcm_handle && q->handle != tcm->tcm_handle)
1111 return -EINVAL;
1112 } else {
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001113 q = qdisc_lookup(dev, tcm->tcm_handle);
1114 if (!q)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001115 return -ENOENT;
1116 }
1117
Patrick McHardy1e904742008-01-22 22:11:17 -08001118 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001119 return -EINVAL;
1120
1121 if (n->nlmsg_type == RTM_DELQDISC) {
1122 if (!clid)
1123 return -EINVAL;
1124 if (q->handle == 0)
1125 return -ENOENT;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001126 err = qdisc_graft(dev, p, skb, n, clid, NULL, q);
1127 if (err != 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001128 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001129 } else {
Tom Goff7316ae82010-03-19 15:40:13 +00001130 qdisc_notify(net, skb, n, clid, NULL, q);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001131 }
1132 return 0;
1133}
1134
1135/*
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001136 * Create/change qdisc.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001137 */
1138
Thomas Graf661d2962013-03-21 07:45:29 +00001139static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001140{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001141 struct net *net = sock_net(skb->sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001142 struct tcmsg *tcm;
Patrick McHardy1e904742008-01-22 22:11:17 -08001143 struct nlattr *tca[TCA_MAX + 1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001144 struct net_device *dev;
1145 u32 clid;
1146 struct Qdisc *q, *p;
1147 int err;
1148
Eric W. Biedermandfc47ef2012-11-16 03:03:00 +00001149 if (!capable(CAP_NET_ADMIN))
1150 return -EPERM;
1151
Linus Torvalds1da177e2005-04-16 15:20:36 -07001152replay:
1153 /* Reinit, just in case something touches this. */
Hong zhi guode179c82013-03-25 17:36:33 +00001154 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
1155 if (err < 0)
1156 return err;
1157
David S. Miller02ef22c2012-06-26 21:50:05 -07001158 tcm = nlmsg_data(n);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001159 clid = tcm->tcm_parent;
1160 q = p = NULL;
1161
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001162 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1163 if (!dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001164 return -ENODEV;
1165
Patrick McHardy1e904742008-01-22 22:11:17 -08001166
Linus Torvalds1da177e2005-04-16 15:20:36 -07001167 if (clid) {
1168 if (clid != TC_H_ROOT) {
1169 if (clid != TC_H_INGRESS) {
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001170 p = qdisc_lookup(dev, TC_H_MAJ(clid));
1171 if (!p)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001172 return -ENOENT;
1173 q = qdisc_leaf(p, clid);
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001174 } else if (dev_ingress_queue_create(dev)) {
1175 q = dev_ingress_queue(dev)->qdisc_sleeping;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001176 }
1177 } else {
Patrick McHardyaf356af2009-09-04 06:41:18 +00001178 q = dev->qdisc;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001179 }
1180
1181 /* It may be default qdisc, ignore it */
1182 if (q && q->handle == 0)
1183 q = NULL;
1184
1185 if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
1186 if (tcm->tcm_handle) {
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001187 if (q && !(n->nlmsg_flags & NLM_F_REPLACE))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001188 return -EEXIST;
1189 if (TC_H_MIN(tcm->tcm_handle))
1190 return -EINVAL;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001191 q = qdisc_lookup(dev, tcm->tcm_handle);
1192 if (!q)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001193 goto create_n_graft;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001194 if (n->nlmsg_flags & NLM_F_EXCL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001195 return -EEXIST;
Patrick McHardy1e904742008-01-22 22:11:17 -08001196 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001197 return -EINVAL;
1198 if (q == p ||
1199 (p && check_loop(q, p, 0)))
1200 return -ELOOP;
1201 atomic_inc(&q->refcnt);
1202 goto graft;
1203 } else {
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001204 if (!q)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001205 goto create_n_graft;
1206
1207 /* This magic test requires explanation.
1208 *
1209 * We know, that some child q is already
1210 * attached to this parent and have choice:
1211 * either to change it or to create/graft new one.
1212 *
1213 * 1. We are allowed to create/graft only
1214 * if CREATE and REPLACE flags are set.
1215 *
1216 * 2. If EXCL is set, requestor wanted to say,
1217 * that qdisc tcm_handle is not expected
1218 * to exist, so that we choose create/graft too.
1219 *
1220 * 3. The last case is when no flags are set.
1221 * Alas, it is sort of hole in API, we
1222 * cannot decide what to do unambiguously.
1223 * For now we select create/graft, if
1224 * user gave KIND, which does not match existing.
1225 */
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001226 if ((n->nlmsg_flags & NLM_F_CREATE) &&
1227 (n->nlmsg_flags & NLM_F_REPLACE) &&
1228 ((n->nlmsg_flags & NLM_F_EXCL) ||
Patrick McHardy1e904742008-01-22 22:11:17 -08001229 (tca[TCA_KIND] &&
1230 nla_strcmp(tca[TCA_KIND], q->ops->id))))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001231 goto create_n_graft;
1232 }
1233 }
1234 } else {
1235 if (!tcm->tcm_handle)
1236 return -EINVAL;
1237 q = qdisc_lookup(dev, tcm->tcm_handle);
1238 }
1239
1240 /* Change qdisc parameters */
1241 if (q == NULL)
1242 return -ENOENT;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001243 if (n->nlmsg_flags & NLM_F_EXCL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001244 return -EEXIST;
Patrick McHardy1e904742008-01-22 22:11:17 -08001245 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001246 return -EINVAL;
1247 err = qdisc_change(q, tca);
1248 if (err == 0)
Tom Goff7316ae82010-03-19 15:40:13 +00001249 qdisc_notify(net, skb, n, clid, NULL, q);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001250 return err;
1251
1252create_n_graft:
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001253 if (!(n->nlmsg_flags & NLM_F_CREATE))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001254 return -ENOENT;
Eric Dumazet24824a02010-10-02 06:11:55 +00001255 if (clid == TC_H_INGRESS) {
1256 if (dev_ingress_queue(dev))
1257 q = qdisc_create(dev, dev_ingress_queue(dev), p,
1258 tcm->tcm_parent, tcm->tcm_parent,
1259 tca, &err);
1260 else
1261 err = -ENOENT;
1262 } else {
Jarek Poplawski926e61b2009-09-15 02:53:07 -07001263 struct netdev_queue *dev_queue;
David S. Miller6ec1c692009-09-06 01:58:51 -07001264
1265 if (p && p->ops->cl_ops && p->ops->cl_ops->select_queue)
Jarek Poplawski926e61b2009-09-15 02:53:07 -07001266 dev_queue = p->ops->cl_ops->select_queue(p, tcm);
1267 else if (p)
1268 dev_queue = p->dev_queue;
1269 else
1270 dev_queue = netdev_get_tx_queue(dev, 0);
David S. Miller6ec1c692009-09-06 01:58:51 -07001271
Jarek Poplawski926e61b2009-09-15 02:53:07 -07001272 q = qdisc_create(dev, dev_queue, p,
David S. Millerbb949fb2008-07-08 16:55:56 -07001273 tcm->tcm_parent, tcm->tcm_handle,
Patrick McHardyffc8fef2007-07-30 17:11:50 -07001274 tca, &err);
David S. Miller6ec1c692009-09-06 01:58:51 -07001275 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001276 if (q == NULL) {
1277 if (err == -EAGAIN)
1278 goto replay;
1279 return err;
1280 }
1281
1282graft:
Ilpo Järvinene5befbd2008-08-18 22:30:01 -07001283 err = qdisc_graft(dev, p, skb, n, clid, q, NULL);
1284 if (err) {
1285 if (q)
1286 qdisc_destroy(q);
1287 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001288 }
Ilpo Järvinene5befbd2008-08-18 22:30:01 -07001289
Linus Torvalds1da177e2005-04-16 15:20:36 -07001290 return 0;
1291}
1292
1293static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
Eric W. Biederman15e47302012-09-07 20:12:54 +00001294 u32 portid, u32 seq, u16 flags, int event)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001295{
1296 struct tcmsg *tcm;
1297 struct nlmsghdr *nlh;
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001298 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001299 struct gnet_dump d;
Eric Dumazeta2da5702011-01-20 03:48:19 +00001300 struct qdisc_size_table *stab;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001301
Eric W. Biederman15e47302012-09-07 20:12:54 +00001302 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
David S. Miller02ef22c2012-06-26 21:50:05 -07001303 if (!nlh)
1304 goto out_nlmsg_trim;
1305 tcm = nlmsg_data(nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001306 tcm->tcm_family = AF_UNSPEC;
Patrick McHardy9ef1d4c2005-06-28 12:55:30 -07001307 tcm->tcm__pad1 = 0;
1308 tcm->tcm__pad2 = 0;
David S. Miller5ce2d482008-07-08 17:06:30 -07001309 tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001310 tcm->tcm_parent = clid;
1311 tcm->tcm_handle = q->handle;
1312 tcm->tcm_info = atomic_read(&q->refcnt);
David S. Miller1b34ec42012-03-29 05:11:39 -04001313 if (nla_put_string(skb, TCA_KIND, q->ops->id))
1314 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001315 if (q->ops->dump && q->ops->dump(q, skb) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001316 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001317 q->qstats.qlen = q->q.qlen;
1318
Eric Dumazeta2da5702011-01-20 03:48:19 +00001319 stab = rtnl_dereference(q->stab);
1320 if (stab && qdisc_dump_stab(skb, stab) < 0)
Jussi Kivilinna175f9c12008-07-20 00:08:47 -07001321 goto nla_put_failure;
1322
Jarek Poplawski102396a2008-08-29 14:21:52 -07001323 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
1324 qdisc_root_sleeping_lock(q), &d) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001325 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001326
1327 if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001328 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001329
1330 if (gnet_stats_copy_basic(&d, &q->bstats) < 0 ||
Eric Dumazetd250a5f2009-10-02 10:32:18 +00001331 gnet_stats_copy_rate_est(&d, &q->bstats, &q->rate_est) < 0 ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07001332 gnet_stats_copy_queue(&d, &q->qstats) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001333 goto nla_put_failure;
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09001334
Linus Torvalds1da177e2005-04-16 15:20:36 -07001335 if (gnet_stats_finish_copy(&d) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001336 goto nla_put_failure;
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09001337
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001338 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001339 return skb->len;
1340
David S. Miller02ef22c2012-06-26 21:50:05 -07001341out_nlmsg_trim:
Patrick McHardy1e904742008-01-22 22:11:17 -08001342nla_put_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001343 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001344 return -1;
1345}
1346
Eric Dumazet53b0f082010-05-22 20:37:44 +00001347static bool tc_qdisc_dump_ignore(struct Qdisc *q)
1348{
1349 return (q->flags & TCQ_F_BUILTIN) ? true : false;
1350}
1351
Tom Goff7316ae82010-03-19 15:40:13 +00001352static int qdisc_notify(struct net *net, struct sk_buff *oskb,
1353 struct nlmsghdr *n, u32 clid,
1354 struct Qdisc *old, struct Qdisc *new)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001355{
1356 struct sk_buff *skb;
Eric W. Biederman15e47302012-09-07 20:12:54 +00001357 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001358
1359 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1360 if (!skb)
1361 return -ENOBUFS;
1362
Eric Dumazet53b0f082010-05-22 20:37:44 +00001363 if (old && !tc_qdisc_dump_ignore(old)) {
Eric W. Biederman15e47302012-09-07 20:12:54 +00001364 if (tc_fill_qdisc(skb, old, clid, portid, n->nlmsg_seq,
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001365 0, RTM_DELQDISC) < 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001366 goto err_out;
1367 }
Eric Dumazet53b0f082010-05-22 20:37:44 +00001368 if (new && !tc_qdisc_dump_ignore(new)) {
Eric W. Biederman15e47302012-09-07 20:12:54 +00001369 if (tc_fill_qdisc(skb, new, clid, portid, n->nlmsg_seq,
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001370 old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001371 goto err_out;
1372 }
1373
1374 if (skb->len)
Eric W. Biederman15e47302012-09-07 20:12:54 +00001375 return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001376 n->nlmsg_flags & NLM_F_ECHO);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001377
1378err_out:
1379 kfree_skb(skb);
1380 return -EINVAL;
1381}
1382
David S. Miller30723672008-07-18 22:50:15 -07001383static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
1384 struct netlink_callback *cb,
1385 int *q_idx_p, int s_q_idx)
1386{
1387 int ret = 0, q_idx = *q_idx_p;
1388 struct Qdisc *q;
1389
1390 if (!root)
1391 return 0;
1392
1393 q = root;
1394 if (q_idx < s_q_idx) {
1395 q_idx++;
1396 } else {
1397 if (!tc_qdisc_dump_ignore(q) &&
Eric W. Biederman15e47302012-09-07 20:12:54 +00001398 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
David S. Miller30723672008-07-18 22:50:15 -07001399 cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
1400 goto done;
1401 q_idx++;
1402 }
1403 list_for_each_entry(q, &root->list, list) {
1404 if (q_idx < s_q_idx) {
1405 q_idx++;
1406 continue;
1407 }
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001408 if (!tc_qdisc_dump_ignore(q) &&
Eric W. Biederman15e47302012-09-07 20:12:54 +00001409 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
David S. Miller30723672008-07-18 22:50:15 -07001410 cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
1411 goto done;
1412 q_idx++;
1413 }
1414
1415out:
1416 *q_idx_p = q_idx;
1417 return ret;
1418done:
1419 ret = -1;
1420 goto out;
1421}
1422
Linus Torvalds1da177e2005-04-16 15:20:36 -07001423static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
1424{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001425 struct net *net = sock_net(skb->sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001426 int idx, q_idx;
1427 int s_idx, s_q_idx;
1428 struct net_device *dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001429
1430 s_idx = cb->args[0];
1431 s_q_idx = q_idx = cb->args[1];
stephen hemmingerf1e90162009-11-10 07:54:49 +00001432
1433 rcu_read_lock();
Pavel Emelianov7562f872007-05-03 15:13:45 -07001434 idx = 0;
Tom Goff7316ae82010-03-19 15:40:13 +00001435 for_each_netdev_rcu(net, dev) {
David S. Miller30723672008-07-18 22:50:15 -07001436 struct netdev_queue *dev_queue;
1437
Linus Torvalds1da177e2005-04-16 15:20:36 -07001438 if (idx < s_idx)
Pavel Emelianov7562f872007-05-03 15:13:45 -07001439 goto cont;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001440 if (idx > s_idx)
1441 s_q_idx = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001442 q_idx = 0;
David S. Miller30723672008-07-18 22:50:15 -07001443
Patrick McHardyaf356af2009-09-04 06:41:18 +00001444 if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx) < 0)
David S. Miller30723672008-07-18 22:50:15 -07001445 goto done;
1446
Eric Dumazet24824a02010-10-02 06:11:55 +00001447 dev_queue = dev_ingress_queue(dev);
1448 if (dev_queue &&
1449 tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb,
1450 &q_idx, s_q_idx) < 0)
David S. Miller30723672008-07-18 22:50:15 -07001451 goto done;
1452
Pavel Emelianov7562f872007-05-03 15:13:45 -07001453cont:
1454 idx++;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001455 }
1456
1457done:
stephen hemmingerf1e90162009-11-10 07:54:49 +00001458 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001459
1460 cb->args[0] = idx;
1461 cb->args[1] = q_idx;
1462
1463 return skb->len;
1464}
1465
1466
1467
1468/************************************************
1469 * Traffic classes manipulation. *
1470 ************************************************/
1471
1472
1473
Thomas Graf661d2962013-03-21 07:45:29 +00001474static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001475{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001476 struct net *net = sock_net(skb->sk);
David S. Miller02ef22c2012-06-26 21:50:05 -07001477 struct tcmsg *tcm = nlmsg_data(n);
Patrick McHardy1e904742008-01-22 22:11:17 -08001478 struct nlattr *tca[TCA_MAX + 1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001479 struct net_device *dev;
1480 struct Qdisc *q = NULL;
Eric Dumazet20fea082007-11-14 01:44:41 -08001481 const struct Qdisc_class_ops *cops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001482 unsigned long cl = 0;
1483 unsigned long new_cl;
Hong zhi guode179c82013-03-25 17:36:33 +00001484 u32 portid;
1485 u32 clid;
1486 u32 qid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001487 int err;
1488
Eric W. Biedermandfc47ef2012-11-16 03:03:00 +00001489 if ((n->nlmsg_type != RTM_GETTCLASS) && !capable(CAP_NET_ADMIN))
1490 return -EPERM;
1491
Patrick McHardy1e904742008-01-22 22:11:17 -08001492 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
1493 if (err < 0)
1494 return err;
1495
Hong zhi guode179c82013-03-25 17:36:33 +00001496 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1497 if (!dev)
1498 return -ENODEV;
1499
Linus Torvalds1da177e2005-04-16 15:20:36 -07001500 /*
1501 parent == TC_H_UNSPEC - unspecified parent.
1502 parent == TC_H_ROOT - class is root, which has no parent.
1503 parent == X:0 - parent is root class.
1504 parent == X:Y - parent is a node in hierarchy.
1505 parent == 0:Y - parent is X:Y, where X:0 is qdisc.
1506
1507 handle == 0:0 - generate handle from kernel pool.
1508 handle == 0:Y - class is X:Y, where X:0 is qdisc.
1509 handle == X:Y - clear.
1510 handle == X:0 - root class.
1511 */
1512
1513 /* Step 1. Determine qdisc handle X:0 */
1514
Hong zhi guode179c82013-03-25 17:36:33 +00001515 portid = tcm->tcm_parent;
1516 clid = tcm->tcm_handle;
1517 qid = TC_H_MAJ(clid);
1518
Eric W. Biederman15e47302012-09-07 20:12:54 +00001519 if (portid != TC_H_ROOT) {
1520 u32 qid1 = TC_H_MAJ(portid);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001521
1522 if (qid && qid1) {
1523 /* If both majors are known, they must be identical. */
1524 if (qid != qid1)
1525 return -EINVAL;
1526 } else if (qid1) {
1527 qid = qid1;
1528 } else if (qid == 0)
Patrick McHardyaf356af2009-09-04 06:41:18 +00001529 qid = dev->qdisc->handle;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001530
1531 /* Now qid is genuine qdisc handle consistent
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001532 * both with parent and child.
1533 *
Eric W. Biederman15e47302012-09-07 20:12:54 +00001534 * TC_H_MAJ(portid) still may be unspecified, complete it now.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001535 */
Eric W. Biederman15e47302012-09-07 20:12:54 +00001536 if (portid)
1537 portid = TC_H_MAKE(qid, portid);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001538 } else {
1539 if (qid == 0)
Patrick McHardyaf356af2009-09-04 06:41:18 +00001540 qid = dev->qdisc->handle;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001541 }
1542
1543 /* OK. Locate qdisc */
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001544 q = qdisc_lookup(dev, qid);
1545 if (!q)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001546 return -ENOENT;
1547
1548 /* An check that it supports classes */
1549 cops = q->ops->cl_ops;
1550 if (cops == NULL)
1551 return -EINVAL;
1552
1553 /* Now try to get class */
1554 if (clid == 0) {
Eric W. Biederman15e47302012-09-07 20:12:54 +00001555 if (portid == TC_H_ROOT)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001556 clid = qid;
1557 } else
1558 clid = TC_H_MAKE(qid, clid);
1559
1560 if (clid)
1561 cl = cops->get(q, clid);
1562
1563 if (cl == 0) {
1564 err = -ENOENT;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001565 if (n->nlmsg_type != RTM_NEWTCLASS ||
1566 !(n->nlmsg_flags & NLM_F_CREATE))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001567 goto out;
1568 } else {
1569 switch (n->nlmsg_type) {
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09001570 case RTM_NEWTCLASS:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001571 err = -EEXIST;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001572 if (n->nlmsg_flags & NLM_F_EXCL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001573 goto out;
1574 break;
1575 case RTM_DELTCLASS:
Patrick McHardyde6d5cd2009-09-04 06:41:16 +00001576 err = -EOPNOTSUPP;
1577 if (cops->delete)
1578 err = cops->delete(q, cl);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001579 if (err == 0)
Tom Goff7316ae82010-03-19 15:40:13 +00001580 tclass_notify(net, skb, n, q, cl, RTM_DELTCLASS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001581 goto out;
1582 case RTM_GETTCLASS:
Tom Goff7316ae82010-03-19 15:40:13 +00001583 err = tclass_notify(net, skb, n, q, cl, RTM_NEWTCLASS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001584 goto out;
1585 default:
1586 err = -EINVAL;
1587 goto out;
1588 }
1589 }
1590
1591 new_cl = cl;
Patrick McHardyde6d5cd2009-09-04 06:41:16 +00001592 err = -EOPNOTSUPP;
1593 if (cops->change)
Eric W. Biederman15e47302012-09-07 20:12:54 +00001594 err = cops->change(q, clid, portid, tca, &new_cl);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001595 if (err == 0)
Tom Goff7316ae82010-03-19 15:40:13 +00001596 tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001597
1598out:
1599 if (cl)
1600 cops->put(q, cl);
1601
1602 return err;
1603}
1604
1605
1606static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
1607 unsigned long cl,
Eric W. Biederman15e47302012-09-07 20:12:54 +00001608 u32 portid, u32 seq, u16 flags, int event)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001609{
1610 struct tcmsg *tcm;
1611 struct nlmsghdr *nlh;
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001612 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001613 struct gnet_dump d;
Eric Dumazet20fea082007-11-14 01:44:41 -08001614 const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001615
Eric W. Biederman15e47302012-09-07 20:12:54 +00001616 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
David S. Miller02ef22c2012-06-26 21:50:05 -07001617 if (!nlh)
1618 goto out_nlmsg_trim;
1619 tcm = nlmsg_data(nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001620 tcm->tcm_family = AF_UNSPEC;
Eric Dumazet16ebb5e2009-09-02 02:40:09 +00001621 tcm->tcm__pad1 = 0;
1622 tcm->tcm__pad2 = 0;
David S. Miller5ce2d482008-07-08 17:06:30 -07001623 tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001624 tcm->tcm_parent = q->handle;
1625 tcm->tcm_handle = q->handle;
1626 tcm->tcm_info = 0;
David S. Miller1b34ec42012-03-29 05:11:39 -04001627 if (nla_put_string(skb, TCA_KIND, q->ops->id))
1628 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001629 if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001630 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001631
Jarek Poplawski102396a2008-08-29 14:21:52 -07001632 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
1633 qdisc_root_sleeping_lock(q), &d) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001634 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001635
1636 if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001637 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001638
1639 if (gnet_stats_finish_copy(&d) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001640 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001641
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001642 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001643 return skb->len;
1644
David S. Miller02ef22c2012-06-26 21:50:05 -07001645out_nlmsg_trim:
Patrick McHardy1e904742008-01-22 22:11:17 -08001646nla_put_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001647 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001648 return -1;
1649}
1650
Tom Goff7316ae82010-03-19 15:40:13 +00001651static int tclass_notify(struct net *net, struct sk_buff *oskb,
1652 struct nlmsghdr *n, struct Qdisc *q,
1653 unsigned long cl, int event)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001654{
1655 struct sk_buff *skb;
Eric W. Biederman15e47302012-09-07 20:12:54 +00001656 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001657
1658 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1659 if (!skb)
1660 return -ENOBUFS;
1661
Eric W. Biederman15e47302012-09-07 20:12:54 +00001662 if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, event) < 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001663 kfree_skb(skb);
1664 return -EINVAL;
1665 }
1666
Eric W. Biederman15e47302012-09-07 20:12:54 +00001667 return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001668 n->nlmsg_flags & NLM_F_ECHO);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001669}
1670
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001671struct qdisc_dump_args {
1672 struct qdisc_walker w;
1673 struct sk_buff *skb;
1674 struct netlink_callback *cb;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001675};
1676
1677static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walker *arg)
1678{
1679 struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg;
1680
Eric W. Biederman15e47302012-09-07 20:12:54 +00001681 return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).portid,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001682 a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTCLASS);
1683}
1684
David S. Miller30723672008-07-18 22:50:15 -07001685static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb,
1686 struct tcmsg *tcm, struct netlink_callback *cb,
1687 int *t_p, int s_t)
1688{
1689 struct qdisc_dump_args arg;
1690
1691 if (tc_qdisc_dump_ignore(q) ||
1692 *t_p < s_t || !q->ops->cl_ops ||
1693 (tcm->tcm_parent &&
1694 TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
1695 (*t_p)++;
1696 return 0;
1697 }
1698 if (*t_p > s_t)
1699 memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
1700 arg.w.fn = qdisc_class_dump;
1701 arg.skb = skb;
1702 arg.cb = cb;
1703 arg.w.stop = 0;
1704 arg.w.skip = cb->args[1];
1705 arg.w.count = 0;
1706 q->ops->cl_ops->walk(q, &arg.w);
1707 cb->args[1] = arg.w.count;
1708 if (arg.w.stop)
1709 return -1;
1710 (*t_p)++;
1711 return 0;
1712}
1713
1714static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
1715 struct tcmsg *tcm, struct netlink_callback *cb,
1716 int *t_p, int s_t)
1717{
1718 struct Qdisc *q;
1719
1720 if (!root)
1721 return 0;
1722
1723 if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0)
1724 return -1;
1725
1726 list_for_each_entry(q, &root->list, list) {
1727 if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
1728 return -1;
1729 }
1730
1731 return 0;
1732}
1733
Linus Torvalds1da177e2005-04-16 15:20:36 -07001734static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
1735{
David S. Miller02ef22c2012-06-26 21:50:05 -07001736 struct tcmsg *tcm = nlmsg_data(cb->nlh);
David S. Miller30723672008-07-18 22:50:15 -07001737 struct net *net = sock_net(skb->sk);
1738 struct netdev_queue *dev_queue;
1739 struct net_device *dev;
1740 int t, s_t;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001741
Hong zhi guo573ce262013-03-27 06:47:04 +00001742 if (nlmsg_len(cb->nlh) < sizeof(*tcm))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001743 return 0;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001744 dev = dev_get_by_index(net, tcm->tcm_ifindex);
1745 if (!dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001746 return 0;
1747
1748 s_t = cb->args[0];
1749 t = 0;
1750
Patrick McHardyaf356af2009-09-04 06:41:18 +00001751 if (tc_dump_tclass_root(dev->qdisc, skb, tcm, cb, &t, s_t) < 0)
David S. Miller30723672008-07-18 22:50:15 -07001752 goto done;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001753
Eric Dumazet24824a02010-10-02 06:11:55 +00001754 dev_queue = dev_ingress_queue(dev);
1755 if (dev_queue &&
1756 tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb,
1757 &t, s_t) < 0)
David S. Miller30723672008-07-18 22:50:15 -07001758 goto done;
1759
1760done:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001761 cb->args[0] = t;
1762
1763 dev_put(dev);
1764 return skb->len;
1765}
1766
1767/* Main classifier routine: scans classifier chain attached
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001768 * to this qdisc, (optionally) tests for protocol and asks
1769 * specific classifiers.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001770 */
Eric Dumazetdc7f9f62011-07-05 23:25:42 +00001771int tc_classify_compat(struct sk_buff *skb, const struct tcf_proto *tp,
Patrick McHardy73ca4912007-07-15 00:02:31 -07001772 struct tcf_result *res)
1773{
1774 __be16 protocol = skb->protocol;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001775 int err;
Patrick McHardy73ca4912007-07-15 00:02:31 -07001776
1777 for (; tp; tp = tp->next) {
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001778 if (tp->protocol != protocol &&
1779 tp->protocol != htons(ETH_P_ALL))
1780 continue;
1781 err = tp->classify(skb, tp, res);
1782
1783 if (err >= 0) {
Patrick McHardy73ca4912007-07-15 00:02:31 -07001784#ifdef CONFIG_NET_CLS_ACT
1785 if (err != TC_ACT_RECLASSIFY && skb->tc_verd)
1786 skb->tc_verd = SET_TC_VERD(skb->tc_verd, 0);
1787#endif
1788 return err;
1789 }
1790 }
1791 return -1;
1792}
1793EXPORT_SYMBOL(tc_classify_compat);
1794
Eric Dumazetdc7f9f62011-07-05 23:25:42 +00001795int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp,
Patrick McHardy73ca4912007-07-15 00:02:31 -07001796 struct tcf_result *res)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001797{
1798 int err = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001799#ifdef CONFIG_NET_CLS_ACT
Eric Dumazetdc7f9f62011-07-05 23:25:42 +00001800 const struct tcf_proto *otp = tp;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001801reclassify:
Hagen Paul Pfeifer52bc9742011-02-25 05:45:21 +00001802#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001803
Patrick McHardy73ca4912007-07-15 00:02:31 -07001804 err = tc_classify_compat(skb, tp, res);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001805#ifdef CONFIG_NET_CLS_ACT
Patrick McHardy73ca4912007-07-15 00:02:31 -07001806 if (err == TC_ACT_RECLASSIFY) {
1807 u32 verd = G_TC_VERD(skb->tc_verd);
1808 tp = otp;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001809
Patrick McHardy73ca4912007-07-15 00:02:31 -07001810 if (verd++ >= MAX_REC_LOOP) {
Joe Perchese87cc472012-05-13 21:56:26 +00001811 net_notice_ratelimited("%s: packet reclassify loop rule prio %u protocol %02x\n",
1812 tp->q->ops->id,
1813 tp->prio & 0xffff,
1814 ntohs(tp->protocol));
Patrick McHardy73ca4912007-07-15 00:02:31 -07001815 return TC_ACT_SHOT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001816 }
Patrick McHardy73ca4912007-07-15 00:02:31 -07001817 skb->tc_verd = SET_TC_VERD(skb->tc_verd, verd);
1818 goto reclassify;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001819 }
Patrick McHardy73ca4912007-07-15 00:02:31 -07001820#endif
1821 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001822}
Patrick McHardy73ca4912007-07-15 00:02:31 -07001823EXPORT_SYMBOL(tc_classify);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001824
Patrick McHardya48b5a62007-03-23 11:29:43 -07001825void tcf_destroy(struct tcf_proto *tp)
1826{
1827 tp->ops->destroy(tp);
1828 module_put(tp->ops->owner);
1829 kfree(tp);
1830}
1831
Patrick McHardyff31ab52008-07-01 19:52:38 -07001832void tcf_destroy_chain(struct tcf_proto **fl)
Patrick McHardya48b5a62007-03-23 11:29:43 -07001833{
1834 struct tcf_proto *tp;
1835
Patrick McHardyff31ab52008-07-01 19:52:38 -07001836 while ((tp = *fl) != NULL) {
1837 *fl = tp->next;
Patrick McHardya48b5a62007-03-23 11:29:43 -07001838 tcf_destroy(tp);
1839 }
1840}
1841EXPORT_SYMBOL(tcf_destroy_chain);
1842
Linus Torvalds1da177e2005-04-16 15:20:36 -07001843#ifdef CONFIG_PROC_FS
1844static int psched_show(struct seq_file *seq, void *v)
1845{
Patrick McHardy3c0cfc12007-10-10 16:32:41 -07001846 struct timespec ts;
1847
1848 hrtimer_get_res(CLOCK_MONOTONIC, &ts);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001849 seq_printf(seq, "%08x %08x %08x %08x\n",
Jarek Poplawskica44d6e2009-06-15 02:31:47 -07001850 (u32)NSEC_PER_USEC, (u32)PSCHED_TICKS2NS(1),
Patrick McHardy514bca32007-03-16 12:34:52 -07001851 1000000,
Patrick McHardy3c0cfc12007-10-10 16:32:41 -07001852 (u32)NSEC_PER_SEC/(u32)ktime_to_ns(timespec_to_ktime(ts)));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001853
1854 return 0;
1855}
1856
1857static int psched_open(struct inode *inode, struct file *file)
1858{
Tom Goff7e5ab152010-03-30 19:44:56 -07001859 return single_open(file, psched_show, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001860}
1861
Arjan van de Venda7071d2007-02-12 00:55:36 -08001862static const struct file_operations psched_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001863 .owner = THIS_MODULE,
1864 .open = psched_open,
1865 .read = seq_read,
1866 .llseek = seq_lseek,
1867 .release = single_release,
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09001868};
Tom Goff7316ae82010-03-19 15:40:13 +00001869
1870static int __net_init psched_net_init(struct net *net)
1871{
1872 struct proc_dir_entry *e;
1873
Gao fengd4beaa62013-02-18 01:34:54 +00001874 e = proc_create("psched", 0, net->proc_net, &psched_fops);
Tom Goff7316ae82010-03-19 15:40:13 +00001875 if (e == NULL)
1876 return -ENOMEM;
1877
1878 return 0;
1879}
1880
1881static void __net_exit psched_net_exit(struct net *net)
1882{
Gao fengece31ff2013-02-18 01:34:56 +00001883 remove_proc_entry("psched", net->proc_net);
Tom Goff7316ae82010-03-19 15:40:13 +00001884}
1885#else
1886static int __net_init psched_net_init(struct net *net)
1887{
1888 return 0;
1889}
1890
1891static void __net_exit psched_net_exit(struct net *net)
1892{
1893}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001894#endif
1895
Tom Goff7316ae82010-03-19 15:40:13 +00001896static struct pernet_operations psched_net_ops = {
1897 .init = psched_net_init,
1898 .exit = psched_net_exit,
1899};
1900
Linus Torvalds1da177e2005-04-16 15:20:36 -07001901static int __init pktsched_init(void)
1902{
Tom Goff7316ae82010-03-19 15:40:13 +00001903 int err;
1904
1905 err = register_pernet_subsys(&psched_net_ops);
1906 if (err) {
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001907 pr_err("pktsched_init: "
Tom Goff7316ae82010-03-19 15:40:13 +00001908 "cannot initialize per netns operations\n");
1909 return err;
1910 }
1911
stephen hemminger6da7c8f2013-08-27 16:19:08 -07001912 register_qdisc(&pfifo_fast_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001913 register_qdisc(&pfifo_qdisc_ops);
1914 register_qdisc(&bfifo_qdisc_ops);
Hagen Paul Pfeifer57dbb2d2010-01-24 12:30:59 +00001915 register_qdisc(&pfifo_head_drop_qdisc_ops);
David S. Miller6ec1c692009-09-06 01:58:51 -07001916 register_qdisc(&mq_qdisc_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001917
Greg Rosec7ac8672011-06-10 01:27:09 +00001918 rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL, NULL);
1919 rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL, NULL);
1920 rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc, NULL);
1921 rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL, NULL);
1922 rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL, NULL);
1923 rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass, NULL);
Thomas Grafbe577dd2007-03-22 11:55:50 -07001924
Linus Torvalds1da177e2005-04-16 15:20:36 -07001925 return 0;
1926}
1927
1928subsys_initcall(pktsched_init);