blob: 2adda7fa2d390c4bb2aec883cf049df7e7cef9ef [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * net/sched/sch_api.c Packet scheduler API.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 *
11 * Fixes:
12 *
13 * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired.
14 * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
15 * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support
16 */
17
Linus Torvalds1da177e2005-04-16 15:20:36 -070018#include <linux/module.h>
19#include <linux/types.h>
20#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070021#include <linux/string.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070022#include <linux/errno.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070023#include <linux/skbuff.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070024#include <linux/init.h>
25#include <linux/proc_fs.h>
26#include <linux/seq_file.h>
27#include <linux/kmod.h>
28#include <linux/list.h>
Patrick McHardy41794772007-03-16 01:19:15 -070029#include <linux/hrtimer.h>
Jarek Poplawski25bfcd52008-08-18 20:53:34 -070030#include <linux/lockdep.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090031#include <linux/slab.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070032
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020033#include <net/net_namespace.h>
Denis V. Lunevb8542722007-12-01 00:21:31 +110034#include <net/sock.h>
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -070035#include <net/netlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070036#include <net/pkt_sched.h>
37
Tom Goff7316ae82010-03-19 15:40:13 +000038static int qdisc_notify(struct net *net, struct sk_buff *oskb,
39 struct nlmsghdr *n, u32 clid,
Linus Torvalds1da177e2005-04-16 15:20:36 -070040 struct Qdisc *old, struct Qdisc *new);
Tom Goff7316ae82010-03-19 15:40:13 +000041static int tclass_notify(struct net *net, struct sk_buff *oskb,
42 struct nlmsghdr *n, struct Qdisc *q,
43 unsigned long cl, int event);
Linus Torvalds1da177e2005-04-16 15:20:36 -070044
45/*
46
47 Short review.
48 -------------
49
50 This file consists of two interrelated parts:
51
52 1. queueing disciplines manager frontend.
53 2. traffic classes manager frontend.
54
55 Generally, queueing discipline ("qdisc") is a black box,
56 which is able to enqueue packets and to dequeue them (when
57 device is ready to send something) in order and at times
58 determined by algorithm hidden in it.
59
60 qdisc's are divided to two categories:
61 - "queues", which have no internal structure visible from outside.
62 - "schedulers", which split all the packets to "traffic classes",
63 using "packet classifiers" (look at cls_api.c)
64
65 In turn, classes may have child qdiscs (as rule, queues)
66 attached to them etc. etc. etc.
67
68 The goal of the routines in this file is to translate
69 information supplied by user in the form of handles
70 to more intelligible for kernel form, to make some sanity
71 checks and part of work, which is common to all qdiscs
72 and to provide rtnetlink notifications.
73
74 All real intelligent work is done inside qdisc modules.
75
76
77
78 Every discipline has two major routines: enqueue and dequeue.
79
80 ---dequeue
81
82 dequeue usually returns a skb to send. It is allowed to return NULL,
83 but it does not mean that queue is empty, it just means that
84 discipline does not want to send anything this time.
85 Queue is really empty if q->q.qlen == 0.
86 For complicated disciplines with multiple queues q->q is not
87 real packet queue, but however q->q.qlen must be valid.
88
89 ---enqueue
90
91 enqueue returns 0, if packet was enqueued successfully.
92 If packet (this one or another one) was dropped, it returns
93 not zero error code.
94 NET_XMIT_DROP - this packet dropped
95 Expected action: do not backoff, but wait until queue will clear.
96 NET_XMIT_CN - probably this packet enqueued, but another one dropped.
97 Expected action: backoff or ignore
98 NET_XMIT_POLICED - dropped by police.
99 Expected action: backoff or error to real-time apps.
100
101 Auxiliary routines:
102
Jarek Poplawski99c0db22008-10-31 00:45:27 -0700103 ---peek
104
105 like dequeue but without removing a packet from the queue
106
Linus Torvalds1da177e2005-04-16 15:20:36 -0700107 ---reset
108
109 returns qdisc to initial state: purge all buffers, clear all
110 timers, counters (except for statistics) etc.
111
112 ---init
113
114 initializes newly created qdisc.
115
116 ---destroy
117
118 destroys resources allocated by init and during lifetime of qdisc.
119
120 ---change
121
122 changes qdisc parameters.
123 */
124
125/* Protects list of registered TC modules. It is pure SMP lock. */
126static DEFINE_RWLOCK(qdisc_mod_lock);
127
128
129/************************************************
130 * Queueing disciplines manipulation. *
131 ************************************************/
132
133
134/* The list of all installed queueing disciplines. */
135
136static struct Qdisc_ops *qdisc_base;
137
138/* Register/uregister queueing discipline */
139
140int register_qdisc(struct Qdisc_ops *qops)
141{
142 struct Qdisc_ops *q, **qp;
143 int rc = -EEXIST;
144
145 write_lock(&qdisc_mod_lock);
146 for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
147 if (!strcmp(qops->id, q->id))
148 goto out;
149
150 if (qops->enqueue == NULL)
151 qops->enqueue = noop_qdisc_ops.enqueue;
Jarek Poplawski99c0db22008-10-31 00:45:27 -0700152 if (qops->peek == NULL) {
Jarek Poplawski68fd26b2010-08-09 12:18:48 +0000153 if (qops->dequeue == NULL)
Jarek Poplawski99c0db22008-10-31 00:45:27 -0700154 qops->peek = noop_qdisc_ops.peek;
Jarek Poplawski68fd26b2010-08-09 12:18:48 +0000155 else
156 goto out_einval;
Jarek Poplawski99c0db22008-10-31 00:45:27 -0700157 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700158 if (qops->dequeue == NULL)
159 qops->dequeue = noop_qdisc_ops.dequeue;
160
Jarek Poplawski68fd26b2010-08-09 12:18:48 +0000161 if (qops->cl_ops) {
162 const struct Qdisc_class_ops *cops = qops->cl_ops;
163
Jarek Poplawski3e9e5a52010-08-10 22:31:20 +0000164 if (!(cops->get && cops->put && cops->walk && cops->leaf))
Jarek Poplawski68fd26b2010-08-09 12:18:48 +0000165 goto out_einval;
166
167 if (cops->tcf_chain && !(cops->bind_tcf && cops->unbind_tcf))
168 goto out_einval;
169 }
170
Linus Torvalds1da177e2005-04-16 15:20:36 -0700171 qops->next = NULL;
172 *qp = qops;
173 rc = 0;
174out:
175 write_unlock(&qdisc_mod_lock);
176 return rc;
Jarek Poplawski68fd26b2010-08-09 12:18:48 +0000177
178out_einval:
179 rc = -EINVAL;
180 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700181}
Patrick McHardy62e3ba12008-01-22 22:10:23 -0800182EXPORT_SYMBOL(register_qdisc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700183
184int unregister_qdisc(struct Qdisc_ops *qops)
185{
186 struct Qdisc_ops *q, **qp;
187 int err = -ENOENT;
188
189 write_lock(&qdisc_mod_lock);
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000190 for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700191 if (q == qops)
192 break;
193 if (q) {
194 *qp = q->next;
195 q->next = NULL;
196 err = 0;
197 }
198 write_unlock(&qdisc_mod_lock);
199 return err;
200}
Patrick McHardy62e3ba12008-01-22 22:10:23 -0800201EXPORT_SYMBOL(unregister_qdisc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700202
stephen hemminger6da7c8f2013-08-27 16:19:08 -0700203/* Get default qdisc if not otherwise specified */
204void qdisc_get_default(char *name, size_t len)
205{
206 read_lock(&qdisc_mod_lock);
207 strlcpy(name, default_qdisc_ops->id, len);
208 read_unlock(&qdisc_mod_lock);
209}
210
211static struct Qdisc_ops *qdisc_lookup_default(const char *name)
212{
213 struct Qdisc_ops *q = NULL;
214
215 for (q = qdisc_base; q; q = q->next) {
216 if (!strcmp(name, q->id)) {
217 if (!try_module_get(q->owner))
218 q = NULL;
219 break;
220 }
221 }
222
223 return q;
224}
225
226/* Set new default qdisc to use */
227int qdisc_set_default(const char *name)
228{
229 const struct Qdisc_ops *ops;
230
231 if (!capable(CAP_NET_ADMIN))
232 return -EPERM;
233
234 write_lock(&qdisc_mod_lock);
235 ops = qdisc_lookup_default(name);
236 if (!ops) {
237 /* Not found, drop lock and try to load module */
238 write_unlock(&qdisc_mod_lock);
239 request_module("sch_%s", name);
240 write_lock(&qdisc_mod_lock);
241
242 ops = qdisc_lookup_default(name);
243 }
244
245 if (ops) {
246 /* Set new default */
247 module_put(default_qdisc_ops->owner);
248 default_qdisc_ops = ops;
249 }
250 write_unlock(&qdisc_mod_lock);
251
252 return ops ? 0 : -ENOENT;
253}
254
Linus Torvalds1da177e2005-04-16 15:20:36 -0700255/* We know handle. Find qdisc among all qdisc's attached to device
256 (root qdisc, all its children, children of children etc.)
257 */
258
Hannes Eder6113b742008-11-28 03:06:46 -0800259static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
David S. Miller8123b422008-08-08 23:23:39 -0700260{
261 struct Qdisc *q;
262
263 if (!(root->flags & TCQ_F_BUILTIN) &&
264 root->handle == handle)
265 return root;
266
267 list_for_each_entry(q, &root->list, list) {
268 if (q->handle == handle)
269 return q;
270 }
271 return NULL;
272}
273
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700274static void qdisc_list_add(struct Qdisc *q)
275{
Jarek Poplawskif6486d42008-11-25 13:56:06 -0800276 if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS))
Patrick McHardyaf356af2009-09-04 06:41:18 +0000277 list_add_tail(&q->list, &qdisc_dev(q)->qdisc->list);
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700278}
279
280void qdisc_list_del(struct Qdisc *q)
281{
Jarek Poplawskif6486d42008-11-25 13:56:06 -0800282 if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS))
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700283 list_del(&q->list);
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700284}
285EXPORT_SYMBOL(qdisc_list_del);
286
David S. Milleread81cc2008-07-17 00:50:32 -0700287struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
Patrick McHardy43effa12006-11-29 17:35:48 -0800288{
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700289 struct Qdisc *q;
290
Patrick McHardyaf356af2009-09-04 06:41:18 +0000291 q = qdisc_match_from_root(dev->qdisc, handle);
292 if (q)
293 goto out;
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700294
Eric Dumazet24824a02010-10-02 06:11:55 +0000295 if (dev_ingress_queue(dev))
296 q = qdisc_match_from_root(
297 dev_ingress_queue(dev)->qdisc_sleeping,
298 handle);
Jarek Poplawskif6486d42008-11-25 13:56:06 -0800299out:
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700300 return q;
Patrick McHardy43effa12006-11-29 17:35:48 -0800301}
302
Linus Torvalds1da177e2005-04-16 15:20:36 -0700303static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
304{
305 unsigned long cl;
306 struct Qdisc *leaf;
Eric Dumazet20fea082007-11-14 01:44:41 -0800307 const struct Qdisc_class_ops *cops = p->ops->cl_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700308
309 if (cops == NULL)
310 return NULL;
311 cl = cops->get(p, classid);
312
313 if (cl == 0)
314 return NULL;
315 leaf = cops->leaf(p, cl);
316 cops->put(p, cl);
317 return leaf;
318}
319
320/* Find queueing discipline by name */
321
Patrick McHardy1e904742008-01-22 22:11:17 -0800322static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700323{
324 struct Qdisc_ops *q = NULL;
325
326 if (kind) {
327 read_lock(&qdisc_mod_lock);
328 for (q = qdisc_base; q; q = q->next) {
Patrick McHardy1e904742008-01-22 22:11:17 -0800329 if (nla_strcmp(kind, q->id) == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700330 if (!try_module_get(q->owner))
331 q = NULL;
332 break;
333 }
334 }
335 read_unlock(&qdisc_mod_lock);
336 }
337 return q;
338}
339
Jesper Dangaard Brouer8a8e3d82013-08-14 23:47:11 +0200340/* The linklayer setting were not transferred from iproute2, in older
341 * versions, and the rate tables lookup systems have been dropped in
342 * the kernel. To keep backward compatible with older iproute2 tc
343 * utils, we detect the linklayer setting by detecting if the rate
344 * table were modified.
345 *
346 * For linklayer ATM table entries, the rate table will be aligned to
347 * 48 bytes, thus some table entries will contain the same value. The
348 * mpu (min packet unit) is also encoded into the old rate table, thus
349 * starting from the mpu, we find low and high table entries for
350 * mapping this cell. If these entries contain the same value, when
351 * the rate tables have been modified for linklayer ATM.
352 *
353 * This is done by rounding mpu to the nearest 48 bytes cell/entry,
354 * and then roundup to the next cell, calc the table entry one below,
355 * and compare.
356 */
357static __u8 __detect_linklayer(struct tc_ratespec *r, __u32 *rtab)
358{
359 int low = roundup(r->mpu, 48);
360 int high = roundup(low+1, 48);
361 int cell_low = low >> r->cell_log;
362 int cell_high = (high >> r->cell_log) - 1;
363
364 /* rtab is too inaccurate at rates > 100Mbit/s */
365 if ((r->rate > (100000000/8)) || (rtab[0] == 0)) {
366 pr_debug("TC linklayer: Giving up ATM detection\n");
367 return TC_LINKLAYER_ETHERNET;
368 }
369
370 if ((cell_high > cell_low) && (cell_high < 256)
371 && (rtab[cell_low] == rtab[cell_high])) {
372 pr_debug("TC linklayer: Detected ATM, low(%d)=high(%d)=%u\n",
373 cell_low, cell_high, rtab[cell_high]);
374 return TC_LINKLAYER_ATM;
375 }
376 return TC_LINKLAYER_ETHERNET;
377}
378
Linus Torvalds1da177e2005-04-16 15:20:36 -0700379static struct qdisc_rate_table *qdisc_rtab_list;
380
Patrick McHardy1e904742008-01-22 22:11:17 -0800381struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct nlattr *tab)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700382{
383 struct qdisc_rate_table *rtab;
384
Eric Dumazet40edeff2013-06-02 11:15:55 +0000385 if (tab == NULL || r->rate == 0 || r->cell_log == 0 ||
386 nla_len(tab) != TC_RTAB_SIZE)
387 return NULL;
388
Linus Torvalds1da177e2005-04-16 15:20:36 -0700389 for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) {
Eric Dumazet40edeff2013-06-02 11:15:55 +0000390 if (!memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) &&
391 !memcmp(&rtab->data, nla_data(tab), 1024)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700392 rtab->refcnt++;
393 return rtab;
394 }
395 }
396
Linus Torvalds1da177e2005-04-16 15:20:36 -0700397 rtab = kmalloc(sizeof(*rtab), GFP_KERNEL);
398 if (rtab) {
399 rtab->rate = *r;
400 rtab->refcnt = 1;
Patrick McHardy1e904742008-01-22 22:11:17 -0800401 memcpy(rtab->data, nla_data(tab), 1024);
Jesper Dangaard Brouer8a8e3d82013-08-14 23:47:11 +0200402 if (r->linklayer == TC_LINKLAYER_UNAWARE)
403 r->linklayer = __detect_linklayer(r, rtab->data);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700404 rtab->next = qdisc_rtab_list;
405 qdisc_rtab_list = rtab;
406 }
407 return rtab;
408}
Patrick McHardy62e3ba12008-01-22 22:10:23 -0800409EXPORT_SYMBOL(qdisc_get_rtab);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700410
411void qdisc_put_rtab(struct qdisc_rate_table *tab)
412{
413 struct qdisc_rate_table *rtab, **rtabp;
414
415 if (!tab || --tab->refcnt)
416 return;
417
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000418 for (rtabp = &qdisc_rtab_list;
419 (rtab = *rtabp) != NULL;
420 rtabp = &rtab->next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700421 if (rtab == tab) {
422 *rtabp = rtab->next;
423 kfree(rtab);
424 return;
425 }
426 }
427}
Patrick McHardy62e3ba12008-01-22 22:10:23 -0800428EXPORT_SYMBOL(qdisc_put_rtab);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700429
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700430static LIST_HEAD(qdisc_stab_list);
431static DEFINE_SPINLOCK(qdisc_stab_lock);
432
433static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = {
434 [TCA_STAB_BASE] = { .len = sizeof(struct tc_sizespec) },
435 [TCA_STAB_DATA] = { .type = NLA_BINARY },
436};
437
438static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt)
439{
440 struct nlattr *tb[TCA_STAB_MAX + 1];
441 struct qdisc_size_table *stab;
442 struct tc_sizespec *s;
443 unsigned int tsize = 0;
444 u16 *tab = NULL;
445 int err;
446
447 err = nla_parse_nested(tb, TCA_STAB_MAX, opt, stab_policy);
448 if (err < 0)
449 return ERR_PTR(err);
450 if (!tb[TCA_STAB_BASE])
451 return ERR_PTR(-EINVAL);
452
453 s = nla_data(tb[TCA_STAB_BASE]);
454
455 if (s->tsize > 0) {
456 if (!tb[TCA_STAB_DATA])
457 return ERR_PTR(-EINVAL);
458 tab = nla_data(tb[TCA_STAB_DATA]);
459 tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16);
460 }
461
Dan Carpenter00093fa2010-08-14 11:09:49 +0000462 if (tsize != s->tsize || (!tab && tsize > 0))
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700463 return ERR_PTR(-EINVAL);
464
David S. Millerf3b96052008-08-18 22:33:05 -0700465 spin_lock(&qdisc_stab_lock);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700466
467 list_for_each_entry(stab, &qdisc_stab_list, list) {
468 if (memcmp(&stab->szopts, s, sizeof(*s)))
469 continue;
470 if (tsize > 0 && memcmp(stab->data, tab, tsize * sizeof(u16)))
471 continue;
472 stab->refcnt++;
David S. Millerf3b96052008-08-18 22:33:05 -0700473 spin_unlock(&qdisc_stab_lock);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700474 return stab;
475 }
476
David S. Millerf3b96052008-08-18 22:33:05 -0700477 spin_unlock(&qdisc_stab_lock);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700478
479 stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL);
480 if (!stab)
481 return ERR_PTR(-ENOMEM);
482
483 stab->refcnt = 1;
484 stab->szopts = *s;
485 if (tsize > 0)
486 memcpy(stab->data, tab, tsize * sizeof(u16));
487
David S. Millerf3b96052008-08-18 22:33:05 -0700488 spin_lock(&qdisc_stab_lock);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700489 list_add_tail(&stab->list, &qdisc_stab_list);
David S. Millerf3b96052008-08-18 22:33:05 -0700490 spin_unlock(&qdisc_stab_lock);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700491
492 return stab;
493}
494
Eric Dumazeta2da5702011-01-20 03:48:19 +0000495static void stab_kfree_rcu(struct rcu_head *head)
496{
497 kfree(container_of(head, struct qdisc_size_table, rcu));
498}
499
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700500void qdisc_put_stab(struct qdisc_size_table *tab)
501{
502 if (!tab)
503 return;
504
David S. Millerf3b96052008-08-18 22:33:05 -0700505 spin_lock(&qdisc_stab_lock);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700506
507 if (--tab->refcnt == 0) {
508 list_del(&tab->list);
Eric Dumazeta2da5702011-01-20 03:48:19 +0000509 call_rcu_bh(&tab->rcu, stab_kfree_rcu);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700510 }
511
David S. Millerf3b96052008-08-18 22:33:05 -0700512 spin_unlock(&qdisc_stab_lock);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700513}
514EXPORT_SYMBOL(qdisc_put_stab);
515
516static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab)
517{
518 struct nlattr *nest;
519
520 nest = nla_nest_start(skb, TCA_STAB);
Patrick McHardy3aa46142008-11-20 04:07:14 -0800521 if (nest == NULL)
522 goto nla_put_failure;
David S. Miller1b34ec42012-03-29 05:11:39 -0400523 if (nla_put(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts))
524 goto nla_put_failure;
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700525 nla_nest_end(skb, nest);
526
527 return skb->len;
528
529nla_put_failure:
530 return -1;
531}
532
Eric Dumazeta2da5702011-01-20 03:48:19 +0000533void __qdisc_calculate_pkt_len(struct sk_buff *skb, const struct qdisc_size_table *stab)
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700534{
535 int pkt_len, slot;
536
537 pkt_len = skb->len + stab->szopts.overhead;
538 if (unlikely(!stab->szopts.tsize))
539 goto out;
540
541 slot = pkt_len + stab->szopts.cell_align;
542 if (unlikely(slot < 0))
543 slot = 0;
544
545 slot >>= stab->szopts.cell_log;
546 if (likely(slot < stab->szopts.tsize))
547 pkt_len = stab->data[slot];
548 else
549 pkt_len = stab->data[stab->szopts.tsize - 1] *
550 (slot / stab->szopts.tsize) +
551 stab->data[slot % stab->szopts.tsize];
552
553 pkt_len <<= stab->szopts.size_log;
554out:
555 if (unlikely(pkt_len < 1))
556 pkt_len = 1;
557 qdisc_skb_cb(skb)->pkt_len = pkt_len;
558}
Eric Dumazeta2da5702011-01-20 03:48:19 +0000559EXPORT_SYMBOL(__qdisc_calculate_pkt_len);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700560
Jarek Poplawskib00355d2009-02-01 01:12:42 -0800561void qdisc_warn_nonwc(char *txt, struct Qdisc *qdisc)
562{
563 if (!(qdisc->flags & TCQ_F_WARN_NONWC)) {
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000564 pr_warn("%s: %s qdisc %X: is non-work-conserving?\n",
565 txt, qdisc->ops->id, qdisc->handle >> 16);
Jarek Poplawskib00355d2009-02-01 01:12:42 -0800566 qdisc->flags |= TCQ_F_WARN_NONWC;
567 }
568}
569EXPORT_SYMBOL(qdisc_warn_nonwc);
570
Patrick McHardy41794772007-03-16 01:19:15 -0700571static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
572{
573 struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
David S. Miller2fbd3da2009-09-01 17:59:25 -0700574 timer);
Patrick McHardy41794772007-03-16 01:19:15 -0700575
Eric Dumazetfd245a42011-01-20 05:27:16 +0000576 qdisc_unthrottled(wd->qdisc);
David S. Miller8608db02008-08-18 20:51:18 -0700577 __netif_schedule(qdisc_root(wd->qdisc));
Stephen Hemminger19365022007-03-22 12:18:35 -0700578
Patrick McHardy41794772007-03-16 01:19:15 -0700579 return HRTIMER_NORESTART;
580}
581
582void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
583{
David S. Miller2fbd3da2009-09-01 17:59:25 -0700584 hrtimer_init(&wd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
585 wd->timer.function = qdisc_watchdog;
Patrick McHardy41794772007-03-16 01:19:15 -0700586 wd->qdisc = qdisc;
587}
588EXPORT_SYMBOL(qdisc_watchdog_init);
589
Jiri Pirko34c5d292013-02-12 00:12:04 +0000590void qdisc_watchdog_schedule_ns(struct qdisc_watchdog *wd, u64 expires)
Patrick McHardy41794772007-03-16 01:19:15 -0700591{
Jarek Poplawski2540e052008-08-21 05:11:14 -0700592 if (test_bit(__QDISC_STATE_DEACTIVATED,
593 &qdisc_root_sleeping(wd->qdisc)->state))
594 return;
595
Eric Dumazetfd245a42011-01-20 05:27:16 +0000596 qdisc_throttled(wd->qdisc);
Eric Dumazet46baac32012-10-20 00:40:51 +0000597
598 hrtimer_start(&wd->timer,
Jiri Pirko34c5d292013-02-12 00:12:04 +0000599 ns_to_ktime(expires),
Eric Dumazet46baac32012-10-20 00:40:51 +0000600 HRTIMER_MODE_ABS);
Patrick McHardy41794772007-03-16 01:19:15 -0700601}
Jiri Pirko34c5d292013-02-12 00:12:04 +0000602EXPORT_SYMBOL(qdisc_watchdog_schedule_ns);
Patrick McHardy41794772007-03-16 01:19:15 -0700603
604void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
605{
David S. Miller2fbd3da2009-09-01 17:59:25 -0700606 hrtimer_cancel(&wd->timer);
Eric Dumazetfd245a42011-01-20 05:27:16 +0000607 qdisc_unthrottled(wd->qdisc);
Patrick McHardy41794772007-03-16 01:19:15 -0700608}
609EXPORT_SYMBOL(qdisc_watchdog_cancel);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700610
Adrian Bunka94f7792008-07-22 14:20:11 -0700611static struct hlist_head *qdisc_class_hash_alloc(unsigned int n)
Patrick McHardy6fe1c7a2008-07-05 23:21:31 -0700612{
613 unsigned int size = n * sizeof(struct hlist_head), i;
614 struct hlist_head *h;
615
616 if (size <= PAGE_SIZE)
617 h = kmalloc(size, GFP_KERNEL);
618 else
619 h = (struct hlist_head *)
620 __get_free_pages(GFP_KERNEL, get_order(size));
621
622 if (h != NULL) {
623 for (i = 0; i < n; i++)
624 INIT_HLIST_HEAD(&h[i]);
625 }
626 return h;
627}
628
629static void qdisc_class_hash_free(struct hlist_head *h, unsigned int n)
630{
631 unsigned int size = n * sizeof(struct hlist_head);
632
633 if (size <= PAGE_SIZE)
634 kfree(h);
635 else
636 free_pages((unsigned long)h, get_order(size));
637}
638
639void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash)
640{
641 struct Qdisc_class_common *cl;
Sasha Levinb67bfe02013-02-27 17:06:00 -0800642 struct hlist_node *next;
Patrick McHardy6fe1c7a2008-07-05 23:21:31 -0700643 struct hlist_head *nhash, *ohash;
644 unsigned int nsize, nmask, osize;
645 unsigned int i, h;
646
647 /* Rehash when load factor exceeds 0.75 */
648 if (clhash->hashelems * 4 <= clhash->hashsize * 3)
649 return;
650 nsize = clhash->hashsize * 2;
651 nmask = nsize - 1;
652 nhash = qdisc_class_hash_alloc(nsize);
653 if (nhash == NULL)
654 return;
655
656 ohash = clhash->hash;
657 osize = clhash->hashsize;
658
659 sch_tree_lock(sch);
660 for (i = 0; i < osize; i++) {
Sasha Levinb67bfe02013-02-27 17:06:00 -0800661 hlist_for_each_entry_safe(cl, next, &ohash[i], hnode) {
Patrick McHardy6fe1c7a2008-07-05 23:21:31 -0700662 h = qdisc_class_hash(cl->classid, nmask);
663 hlist_add_head(&cl->hnode, &nhash[h]);
664 }
665 }
666 clhash->hash = nhash;
667 clhash->hashsize = nsize;
668 clhash->hashmask = nmask;
669 sch_tree_unlock(sch);
670
671 qdisc_class_hash_free(ohash, osize);
672}
673EXPORT_SYMBOL(qdisc_class_hash_grow);
674
675int qdisc_class_hash_init(struct Qdisc_class_hash *clhash)
676{
677 unsigned int size = 4;
678
679 clhash->hash = qdisc_class_hash_alloc(size);
680 if (clhash->hash == NULL)
681 return -ENOMEM;
682 clhash->hashsize = size;
683 clhash->hashmask = size - 1;
684 clhash->hashelems = 0;
685 return 0;
686}
687EXPORT_SYMBOL(qdisc_class_hash_init);
688
689void qdisc_class_hash_destroy(struct Qdisc_class_hash *clhash)
690{
691 qdisc_class_hash_free(clhash->hash, clhash->hashsize);
692}
693EXPORT_SYMBOL(qdisc_class_hash_destroy);
694
695void qdisc_class_hash_insert(struct Qdisc_class_hash *clhash,
696 struct Qdisc_class_common *cl)
697{
698 unsigned int h;
699
700 INIT_HLIST_NODE(&cl->hnode);
701 h = qdisc_class_hash(cl->classid, clhash->hashmask);
702 hlist_add_head(&cl->hnode, &clhash->hash[h]);
703 clhash->hashelems++;
704}
705EXPORT_SYMBOL(qdisc_class_hash_insert);
706
707void qdisc_class_hash_remove(struct Qdisc_class_hash *clhash,
708 struct Qdisc_class_common *cl)
709{
710 hlist_del(&cl->hnode);
711 clhash->hashelems--;
712}
713EXPORT_SYMBOL(qdisc_class_hash_remove);
714
Eric Dumazetfa0f5aa2012-01-03 00:00:11 +0000715/* Allocate an unique handle from space managed by kernel
716 * Possible range is [8000-FFFF]:0000 (0x8000 values)
717 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700718static u32 qdisc_alloc_handle(struct net_device *dev)
719{
Eric Dumazetfa0f5aa2012-01-03 00:00:11 +0000720 int i = 0x8000;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700721 static u32 autohandle = TC_H_MAKE(0x80000000U, 0);
722
723 do {
724 autohandle += TC_H_MAKE(0x10000U, 0);
725 if (autohandle == TC_H_MAKE(TC_H_ROOT, 0))
726 autohandle = TC_H_MAKE(0x80000000U, 0);
Eric Dumazetfa0f5aa2012-01-03 00:00:11 +0000727 if (!qdisc_lookup(dev, autohandle))
728 return autohandle;
729 cond_resched();
730 } while (--i > 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700731
Eric Dumazetfa0f5aa2012-01-03 00:00:11 +0000732 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700733}
734
Patrick McHardy43effa12006-11-29 17:35:48 -0800735void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
736{
Eric Dumazet20fea082007-11-14 01:44:41 -0800737 const struct Qdisc_class_ops *cops;
Patrick McHardy43effa12006-11-29 17:35:48 -0800738 unsigned long cl;
739 u32 parentid;
740
741 if (n == 0)
742 return;
743 while ((parentid = sch->parent)) {
Jarek Poplawski066a3b52008-04-14 15:10:42 -0700744 if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS))
745 return;
746
David S. Miller5ce2d482008-07-08 17:06:30 -0700747 sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
Patrick McHardyffc8fef2007-07-30 17:11:50 -0700748 if (sch == NULL) {
749 WARN_ON(parentid != TC_H_ROOT);
750 return;
751 }
Patrick McHardy43effa12006-11-29 17:35:48 -0800752 cops = sch->ops->cl_ops;
753 if (cops->qlen_notify) {
754 cl = cops->get(sch, parentid);
755 cops->qlen_notify(sch, cl);
756 cops->put(sch, cl);
757 }
758 sch->q.qlen -= n;
759 }
760}
761EXPORT_SYMBOL(qdisc_tree_decrease_qlen);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700762
Tom Goff7316ae82010-03-19 15:40:13 +0000763static void notify_and_destroy(struct net *net, struct sk_buff *skb,
764 struct nlmsghdr *n, u32 clid,
David S. Miller99194cf2008-07-17 04:54:10 -0700765 struct Qdisc *old, struct Qdisc *new)
766{
767 if (new || old)
Tom Goff7316ae82010-03-19 15:40:13 +0000768 qdisc_notify(net, skb, n, clid, old, new);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700769
David S. Miller4d8863a2008-08-18 21:03:15 -0700770 if (old)
David S. Miller99194cf2008-07-17 04:54:10 -0700771 qdisc_destroy(old);
David S. Miller99194cf2008-07-17 04:54:10 -0700772}
773
774/* Graft qdisc "new" to class "classid" of qdisc "parent" or
775 * to device "dev".
776 *
777 * When appropriate send a netlink notification using 'skb'
778 * and "n".
779 *
780 * On success, destroy old qdisc.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700781 */
782
783static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
David S. Miller99194cf2008-07-17 04:54:10 -0700784 struct sk_buff *skb, struct nlmsghdr *n, u32 classid,
785 struct Qdisc *new, struct Qdisc *old)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700786{
David S. Miller99194cf2008-07-17 04:54:10 -0700787 struct Qdisc *q = old;
Tom Goff7316ae82010-03-19 15:40:13 +0000788 struct net *net = dev_net(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700789 int err = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700790
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900791 if (parent == NULL) {
David S. Miller99194cf2008-07-17 04:54:10 -0700792 unsigned int i, num_q, ingress;
793
794 ingress = 0;
795 num_q = dev->num_tx_queues;
David S. Miller8d50b532008-07-30 02:37:46 -0700796 if ((q && q->flags & TCQ_F_INGRESS) ||
797 (new && new->flags & TCQ_F_INGRESS)) {
David S. Miller99194cf2008-07-17 04:54:10 -0700798 num_q = 1;
799 ingress = 1;
Eric Dumazet24824a02010-10-02 06:11:55 +0000800 if (!dev_ingress_queue(dev))
801 return -ENOENT;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700802 }
David S. Miller99194cf2008-07-17 04:54:10 -0700803
804 if (dev->flags & IFF_UP)
805 dev_deactivate(dev);
806
David S. Miller6ec1c692009-09-06 01:58:51 -0700807 if (new && new->ops->attach) {
808 new->ops->attach(new);
809 num_q = 0;
810 }
811
David S. Miller99194cf2008-07-17 04:54:10 -0700812 for (i = 0; i < num_q; i++) {
Eric Dumazet24824a02010-10-02 06:11:55 +0000813 struct netdev_queue *dev_queue = dev_ingress_queue(dev);
David S. Miller99194cf2008-07-17 04:54:10 -0700814
815 if (!ingress)
816 dev_queue = netdev_get_tx_queue(dev, i);
817
David S. Miller8d50b532008-07-30 02:37:46 -0700818 old = dev_graft_qdisc(dev_queue, new);
819 if (new && i > 0)
820 atomic_inc(&new->refcnt);
821
Jarek Poplawski036d6a62009-09-13 22:35:44 +0000822 if (!ingress)
823 qdisc_destroy(old);
David S. Miller99194cf2008-07-17 04:54:10 -0700824 }
825
Jarek Poplawski036d6a62009-09-13 22:35:44 +0000826 if (!ingress) {
Tom Goff7316ae82010-03-19 15:40:13 +0000827 notify_and_destroy(net, skb, n, classid,
828 dev->qdisc, new);
Jarek Poplawski036d6a62009-09-13 22:35:44 +0000829 if (new && !new->ops->attach)
830 atomic_inc(&new->refcnt);
831 dev->qdisc = new ? : &noop_qdisc;
832 } else {
Tom Goff7316ae82010-03-19 15:40:13 +0000833 notify_and_destroy(net, skb, n, classid, old, new);
Jarek Poplawski036d6a62009-09-13 22:35:44 +0000834 }
Patrick McHardyaf356af2009-09-04 06:41:18 +0000835
David S. Miller99194cf2008-07-17 04:54:10 -0700836 if (dev->flags & IFF_UP)
837 dev_activate(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700838 } else {
Eric Dumazet20fea082007-11-14 01:44:41 -0800839 const struct Qdisc_class_ops *cops = parent->ops->cl_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700840
Patrick McHardyc9f1d032009-09-04 06:41:13 +0000841 err = -EOPNOTSUPP;
842 if (cops && cops->graft) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700843 unsigned long cl = cops->get(parent, classid);
844 if (cl) {
David S. Miller99194cf2008-07-17 04:54:10 -0700845 err = cops->graft(parent, cl, new, &old);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700846 cops->put(parent, cl);
Patrick McHardyc9f1d032009-09-04 06:41:13 +0000847 } else
848 err = -ENOENT;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700849 }
David S. Miller99194cf2008-07-17 04:54:10 -0700850 if (!err)
Tom Goff7316ae82010-03-19 15:40:13 +0000851 notify_and_destroy(net, skb, n, classid, old, new);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700852 }
853 return err;
854}
855
Jarek Poplawski25bfcd52008-08-18 20:53:34 -0700856/* lockdep annotation is needed for ingress; egress gets it only for name */
857static struct lock_class_key qdisc_tx_lock;
858static struct lock_class_key qdisc_rx_lock;
859
Linus Torvalds1da177e2005-04-16 15:20:36 -0700860/*
861 Allocate and initialize new qdisc.
862
863 Parameters are passed via opt.
864 */
865
866static struct Qdisc *
David S. Millerbb949fb2008-07-08 16:55:56 -0700867qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
Patrick McHardy23bcf632009-09-09 18:11:23 -0700868 struct Qdisc *p, u32 parent, u32 handle,
869 struct nlattr **tca, int *errp)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700870{
871 int err;
Patrick McHardy1e904742008-01-22 22:11:17 -0800872 struct nlattr *kind = tca[TCA_KIND];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700873 struct Qdisc *sch;
874 struct Qdisc_ops *ops;
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700875 struct qdisc_size_table *stab;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700876
877 ops = qdisc_lookup_ops(kind);
Johannes Berg95a5afc2008-10-16 15:24:51 -0700878#ifdef CONFIG_MODULES
Linus Torvalds1da177e2005-04-16 15:20:36 -0700879 if (ops == NULL && kind != NULL) {
880 char name[IFNAMSIZ];
Patrick McHardy1e904742008-01-22 22:11:17 -0800881 if (nla_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700882 /* We dropped the RTNL semaphore in order to
883 * perform the module load. So, even if we
884 * succeeded in loading the module we have to
885 * tell the caller to replay the request. We
886 * indicate this using -EAGAIN.
887 * We replay the request because the device may
888 * go away in the mean time.
889 */
890 rtnl_unlock();
891 request_module("sch_%s", name);
892 rtnl_lock();
893 ops = qdisc_lookup_ops(kind);
894 if (ops != NULL) {
895 /* We will try again qdisc_lookup_ops,
896 * so don't keep a reference.
897 */
898 module_put(ops->owner);
899 err = -EAGAIN;
900 goto err_out;
901 }
902 }
903 }
904#endif
905
Jamal Hadi Salimb9e2cc02006-08-03 16:36:51 -0700906 err = -ENOENT;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700907 if (ops == NULL)
908 goto err_out;
909
David S. Miller5ce2d482008-07-08 17:06:30 -0700910 sch = qdisc_alloc(dev_queue, ops);
Thomas Graf3d54b822005-07-05 14:15:09 -0700911 if (IS_ERR(sch)) {
912 err = PTR_ERR(sch);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700913 goto err_out2;
Thomas Graf3d54b822005-07-05 14:15:09 -0700914 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700915
Patrick McHardyffc8fef2007-07-30 17:11:50 -0700916 sch->parent = parent;
917
Thomas Graf3d54b822005-07-05 14:15:09 -0700918 if (handle == TC_H_INGRESS) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700919 sch->flags |= TCQ_F_INGRESS;
Thomas Graf3d54b822005-07-05 14:15:09 -0700920 handle = TC_H_MAKE(TC_H_INGRESS, 0);
Jarek Poplawski25bfcd52008-08-18 20:53:34 -0700921 lockdep_set_class(qdisc_lock(sch), &qdisc_rx_lock);
Patrick McHardyfd44de72007-04-16 17:07:08 -0700922 } else {
Patrick McHardyfd44de72007-04-16 17:07:08 -0700923 if (handle == 0) {
924 handle = qdisc_alloc_handle(dev);
925 err = -ENOMEM;
926 if (handle == 0)
927 goto err_out3;
928 }
Jarek Poplawski25bfcd52008-08-18 20:53:34 -0700929 lockdep_set_class(qdisc_lock(sch), &qdisc_tx_lock);
Eric Dumazet1abbe132012-12-11 15:54:33 +0000930 if (!netif_is_multiqueue(dev))
931 sch->flags |= TCQ_F_ONETXQUEUE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700932 }
933
Thomas Graf3d54b822005-07-05 14:15:09 -0700934 sch->handle = handle;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700935
Patrick McHardy1e904742008-01-22 22:11:17 -0800936 if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS])) == 0) {
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700937 if (tca[TCA_STAB]) {
938 stab = qdisc_get_stab(tca[TCA_STAB]);
939 if (IS_ERR(stab)) {
940 err = PTR_ERR(stab);
Jarek Poplawski7c64b9f2009-09-15 23:42:05 -0700941 goto err_out4;
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700942 }
Eric Dumazeta2da5702011-01-20 03:48:19 +0000943 rcu_assign_pointer(sch->stab, stab);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700944 }
Patrick McHardy1e904742008-01-22 22:11:17 -0800945 if (tca[TCA_RATE]) {
Jarek Poplawskif6f9b932008-08-27 02:25:17 -0700946 spinlock_t *root_lock;
947
Patrick McHardy23bcf632009-09-09 18:11:23 -0700948 err = -EOPNOTSUPP;
949 if (sch->flags & TCQ_F_MQROOT)
950 goto err_out4;
951
Jarek Poplawskif6f9b932008-08-27 02:25:17 -0700952 if ((sch->parent != TC_H_ROOT) &&
Patrick McHardy23bcf632009-09-09 18:11:23 -0700953 !(sch->flags & TCQ_F_INGRESS) &&
954 (!p || !(p->flags & TCQ_F_MQROOT)))
Jarek Poplawskif6f9b932008-08-27 02:25:17 -0700955 root_lock = qdisc_root_sleeping_lock(sch);
956 else
957 root_lock = qdisc_lock(sch);
958
Thomas Graf023e09a2005-07-05 14:15:53 -0700959 err = gen_new_estimator(&sch->bstats, &sch->rate_est,
Jarek Poplawskif6f9b932008-08-27 02:25:17 -0700960 root_lock, tca[TCA_RATE]);
Patrick McHardy23bcf632009-09-09 18:11:23 -0700961 if (err)
962 goto err_out4;
Thomas Graf023e09a2005-07-05 14:15:53 -0700963 }
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700964
965 qdisc_list_add(sch);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700966
Linus Torvalds1da177e2005-04-16 15:20:36 -0700967 return sch;
968 }
969err_out3:
970 dev_put(dev);
Thomas Graf3d54b822005-07-05 14:15:09 -0700971 kfree((char *) sch - sch->padded);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700972err_out2:
973 module_put(ops->owner);
974err_out:
975 *errp = err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700976 return NULL;
Patrick McHardy23bcf632009-09-09 18:11:23 -0700977
978err_out4:
979 /*
980 * Any broken qdiscs that would require a ops->reset() here?
981 * The qdisc was never in action so it shouldn't be necessary.
982 */
Eric Dumazeta2da5702011-01-20 03:48:19 +0000983 qdisc_put_stab(rtnl_dereference(sch->stab));
Patrick McHardy23bcf632009-09-09 18:11:23 -0700984 if (ops->destroy)
985 ops->destroy(sch);
986 goto err_out3;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700987}
988
Patrick McHardy1e904742008-01-22 22:11:17 -0800989static int qdisc_change(struct Qdisc *sch, struct nlattr **tca)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700990{
Eric Dumazeta2da5702011-01-20 03:48:19 +0000991 struct qdisc_size_table *ostab, *stab = NULL;
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700992 int err = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700993
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700994 if (tca[TCA_OPTIONS]) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700995 if (sch->ops->change == NULL)
996 return -EINVAL;
Patrick McHardy1e904742008-01-22 22:11:17 -0800997 err = sch->ops->change(sch, tca[TCA_OPTIONS]);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700998 if (err)
999 return err;
1000 }
Jussi Kivilinna175f9c12008-07-20 00:08:47 -07001001
1002 if (tca[TCA_STAB]) {
1003 stab = qdisc_get_stab(tca[TCA_STAB]);
1004 if (IS_ERR(stab))
1005 return PTR_ERR(stab);
1006 }
1007
Eric Dumazeta2da5702011-01-20 03:48:19 +00001008 ostab = rtnl_dereference(sch->stab);
1009 rcu_assign_pointer(sch->stab, stab);
1010 qdisc_put_stab(ostab);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -07001011
Patrick McHardy23bcf632009-09-09 18:11:23 -07001012 if (tca[TCA_RATE]) {
Stephen Hemminger71bcb092008-11-25 21:13:31 -08001013 /* NB: ignores errors from replace_estimator
1014 because change can't be undone. */
Patrick McHardy23bcf632009-09-09 18:11:23 -07001015 if (sch->flags & TCQ_F_MQROOT)
1016 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001017 gen_replace_estimator(&sch->bstats, &sch->rate_est,
Stephen Hemminger71bcb092008-11-25 21:13:31 -08001018 qdisc_root_sleeping_lock(sch),
1019 tca[TCA_RATE]);
Patrick McHardy23bcf632009-09-09 18:11:23 -07001020 }
1021out:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001022 return 0;
1023}
1024
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001025struct check_loop_arg {
1026 struct qdisc_walker w;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001027 struct Qdisc *p;
1028 int depth;
1029};
1030
1031static int check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w);
1032
1033static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth)
1034{
1035 struct check_loop_arg arg;
1036
1037 if (q->ops->cl_ops == NULL)
1038 return 0;
1039
1040 arg.w.stop = arg.w.skip = arg.w.count = 0;
1041 arg.w.fn = check_loop_fn;
1042 arg.depth = depth;
1043 arg.p = p;
1044 q->ops->cl_ops->walk(q, &arg.w);
1045 return arg.w.stop ? -ELOOP : 0;
1046}
1047
1048static int
1049check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
1050{
1051 struct Qdisc *leaf;
Eric Dumazet20fea082007-11-14 01:44:41 -08001052 const struct Qdisc_class_ops *cops = q->ops->cl_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001053 struct check_loop_arg *arg = (struct check_loop_arg *)w;
1054
1055 leaf = cops->leaf(q, cl);
1056 if (leaf) {
1057 if (leaf == arg->p || arg->depth > 7)
1058 return -ELOOP;
1059 return check_loop(leaf, arg->p, arg->depth + 1);
1060 }
1061 return 0;
1062}
1063
1064/*
1065 * Delete/get qdisc.
1066 */
1067
Thomas Graf661d2962013-03-21 07:45:29 +00001068static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001069{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001070 struct net *net = sock_net(skb->sk);
David S. Miller02ef22c2012-06-26 21:50:05 -07001071 struct tcmsg *tcm = nlmsg_data(n);
Patrick McHardy1e904742008-01-22 22:11:17 -08001072 struct nlattr *tca[TCA_MAX + 1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001073 struct net_device *dev;
Hong zhi guode179c82013-03-25 17:36:33 +00001074 u32 clid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001075 struct Qdisc *q = NULL;
1076 struct Qdisc *p = NULL;
1077 int err;
1078
Eric W. Biedermandfc47ef2012-11-16 03:03:00 +00001079 if ((n->nlmsg_type != RTM_GETQDISC) && !capable(CAP_NET_ADMIN))
1080 return -EPERM;
1081
Patrick McHardy1e904742008-01-22 22:11:17 -08001082 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
1083 if (err < 0)
1084 return err;
1085
Hong zhi guode179c82013-03-25 17:36:33 +00001086 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1087 if (!dev)
1088 return -ENODEV;
1089
1090 clid = tcm->tcm_parent;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001091 if (clid) {
1092 if (clid != TC_H_ROOT) {
1093 if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001094 p = qdisc_lookup(dev, TC_H_MAJ(clid));
1095 if (!p)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001096 return -ENOENT;
1097 q = qdisc_leaf(p, clid);
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001098 } else if (dev_ingress_queue(dev)) {
1099 q = dev_ingress_queue(dev)->qdisc_sleeping;
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09001100 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001101 } else {
Patrick McHardyaf356af2009-09-04 06:41:18 +00001102 q = dev->qdisc;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001103 }
1104 if (!q)
1105 return -ENOENT;
1106
1107 if (tcm->tcm_handle && q->handle != tcm->tcm_handle)
1108 return -EINVAL;
1109 } else {
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001110 q = qdisc_lookup(dev, tcm->tcm_handle);
1111 if (!q)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001112 return -ENOENT;
1113 }
1114
Patrick McHardy1e904742008-01-22 22:11:17 -08001115 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001116 return -EINVAL;
1117
1118 if (n->nlmsg_type == RTM_DELQDISC) {
1119 if (!clid)
1120 return -EINVAL;
1121 if (q->handle == 0)
1122 return -ENOENT;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001123 err = qdisc_graft(dev, p, skb, n, clid, NULL, q);
1124 if (err != 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001125 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001126 } else {
Tom Goff7316ae82010-03-19 15:40:13 +00001127 qdisc_notify(net, skb, n, clid, NULL, q);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001128 }
1129 return 0;
1130}
1131
1132/*
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001133 * Create/change qdisc.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001134 */
1135
Thomas Graf661d2962013-03-21 07:45:29 +00001136static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001137{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001138 struct net *net = sock_net(skb->sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001139 struct tcmsg *tcm;
Patrick McHardy1e904742008-01-22 22:11:17 -08001140 struct nlattr *tca[TCA_MAX + 1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001141 struct net_device *dev;
1142 u32 clid;
1143 struct Qdisc *q, *p;
1144 int err;
1145
Eric W. Biedermandfc47ef2012-11-16 03:03:00 +00001146 if (!capable(CAP_NET_ADMIN))
1147 return -EPERM;
1148
Linus Torvalds1da177e2005-04-16 15:20:36 -07001149replay:
1150 /* Reinit, just in case something touches this. */
Hong zhi guode179c82013-03-25 17:36:33 +00001151 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
1152 if (err < 0)
1153 return err;
1154
David S. Miller02ef22c2012-06-26 21:50:05 -07001155 tcm = nlmsg_data(n);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001156 clid = tcm->tcm_parent;
1157 q = p = NULL;
1158
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001159 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1160 if (!dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001161 return -ENODEV;
1162
Patrick McHardy1e904742008-01-22 22:11:17 -08001163
Linus Torvalds1da177e2005-04-16 15:20:36 -07001164 if (clid) {
1165 if (clid != TC_H_ROOT) {
1166 if (clid != TC_H_INGRESS) {
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001167 p = qdisc_lookup(dev, TC_H_MAJ(clid));
1168 if (!p)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001169 return -ENOENT;
1170 q = qdisc_leaf(p, clid);
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001171 } else if (dev_ingress_queue_create(dev)) {
1172 q = dev_ingress_queue(dev)->qdisc_sleeping;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001173 }
1174 } else {
Patrick McHardyaf356af2009-09-04 06:41:18 +00001175 q = dev->qdisc;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001176 }
1177
1178 /* It may be default qdisc, ignore it */
1179 if (q && q->handle == 0)
1180 q = NULL;
1181
1182 if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
1183 if (tcm->tcm_handle) {
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001184 if (q && !(n->nlmsg_flags & NLM_F_REPLACE))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001185 return -EEXIST;
1186 if (TC_H_MIN(tcm->tcm_handle))
1187 return -EINVAL;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001188 q = qdisc_lookup(dev, tcm->tcm_handle);
1189 if (!q)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001190 goto create_n_graft;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001191 if (n->nlmsg_flags & NLM_F_EXCL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001192 return -EEXIST;
Patrick McHardy1e904742008-01-22 22:11:17 -08001193 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001194 return -EINVAL;
1195 if (q == p ||
1196 (p && check_loop(q, p, 0)))
1197 return -ELOOP;
1198 atomic_inc(&q->refcnt);
1199 goto graft;
1200 } else {
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001201 if (!q)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001202 goto create_n_graft;
1203
1204 /* This magic test requires explanation.
1205 *
1206 * We know, that some child q is already
1207 * attached to this parent and have choice:
1208 * either to change it or to create/graft new one.
1209 *
1210 * 1. We are allowed to create/graft only
1211 * if CREATE and REPLACE flags are set.
1212 *
1213 * 2. If EXCL is set, requestor wanted to say,
1214 * that qdisc tcm_handle is not expected
1215 * to exist, so that we choose create/graft too.
1216 *
1217 * 3. The last case is when no flags are set.
1218 * Alas, it is sort of hole in API, we
1219 * cannot decide what to do unambiguously.
1220 * For now we select create/graft, if
1221 * user gave KIND, which does not match existing.
1222 */
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001223 if ((n->nlmsg_flags & NLM_F_CREATE) &&
1224 (n->nlmsg_flags & NLM_F_REPLACE) &&
1225 ((n->nlmsg_flags & NLM_F_EXCL) ||
Patrick McHardy1e904742008-01-22 22:11:17 -08001226 (tca[TCA_KIND] &&
1227 nla_strcmp(tca[TCA_KIND], q->ops->id))))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001228 goto create_n_graft;
1229 }
1230 }
1231 } else {
1232 if (!tcm->tcm_handle)
1233 return -EINVAL;
1234 q = qdisc_lookup(dev, tcm->tcm_handle);
1235 }
1236
1237 /* Change qdisc parameters */
1238 if (q == NULL)
1239 return -ENOENT;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001240 if (n->nlmsg_flags & NLM_F_EXCL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001241 return -EEXIST;
Patrick McHardy1e904742008-01-22 22:11:17 -08001242 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001243 return -EINVAL;
1244 err = qdisc_change(q, tca);
1245 if (err == 0)
Tom Goff7316ae82010-03-19 15:40:13 +00001246 qdisc_notify(net, skb, n, clid, NULL, q);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001247 return err;
1248
1249create_n_graft:
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001250 if (!(n->nlmsg_flags & NLM_F_CREATE))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001251 return -ENOENT;
Eric Dumazet24824a02010-10-02 06:11:55 +00001252 if (clid == TC_H_INGRESS) {
1253 if (dev_ingress_queue(dev))
1254 q = qdisc_create(dev, dev_ingress_queue(dev), p,
1255 tcm->tcm_parent, tcm->tcm_parent,
1256 tca, &err);
1257 else
1258 err = -ENOENT;
1259 } else {
Jarek Poplawski926e61b2009-09-15 02:53:07 -07001260 struct netdev_queue *dev_queue;
David S. Miller6ec1c692009-09-06 01:58:51 -07001261
1262 if (p && p->ops->cl_ops && p->ops->cl_ops->select_queue)
Jarek Poplawski926e61b2009-09-15 02:53:07 -07001263 dev_queue = p->ops->cl_ops->select_queue(p, tcm);
1264 else if (p)
1265 dev_queue = p->dev_queue;
1266 else
1267 dev_queue = netdev_get_tx_queue(dev, 0);
David S. Miller6ec1c692009-09-06 01:58:51 -07001268
Jarek Poplawski926e61b2009-09-15 02:53:07 -07001269 q = qdisc_create(dev, dev_queue, p,
David S. Millerbb949fb2008-07-08 16:55:56 -07001270 tcm->tcm_parent, tcm->tcm_handle,
Patrick McHardyffc8fef2007-07-30 17:11:50 -07001271 tca, &err);
David S. Miller6ec1c692009-09-06 01:58:51 -07001272 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001273 if (q == NULL) {
1274 if (err == -EAGAIN)
1275 goto replay;
1276 return err;
1277 }
1278
1279graft:
Ilpo Järvinene5befbd2008-08-18 22:30:01 -07001280 err = qdisc_graft(dev, p, skb, n, clid, q, NULL);
1281 if (err) {
1282 if (q)
1283 qdisc_destroy(q);
1284 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001285 }
Ilpo Järvinene5befbd2008-08-18 22:30:01 -07001286
Linus Torvalds1da177e2005-04-16 15:20:36 -07001287 return 0;
1288}
1289
1290static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
Eric W. Biederman15e47302012-09-07 20:12:54 +00001291 u32 portid, u32 seq, u16 flags, int event)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001292{
1293 struct tcmsg *tcm;
1294 struct nlmsghdr *nlh;
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001295 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001296 struct gnet_dump d;
Eric Dumazeta2da5702011-01-20 03:48:19 +00001297 struct qdisc_size_table *stab;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001298
Eric W. Biederman15e47302012-09-07 20:12:54 +00001299 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
David S. Miller02ef22c2012-06-26 21:50:05 -07001300 if (!nlh)
1301 goto out_nlmsg_trim;
1302 tcm = nlmsg_data(nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001303 tcm->tcm_family = AF_UNSPEC;
Patrick McHardy9ef1d4c2005-06-28 12:55:30 -07001304 tcm->tcm__pad1 = 0;
1305 tcm->tcm__pad2 = 0;
David S. Miller5ce2d482008-07-08 17:06:30 -07001306 tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001307 tcm->tcm_parent = clid;
1308 tcm->tcm_handle = q->handle;
1309 tcm->tcm_info = atomic_read(&q->refcnt);
David S. Miller1b34ec42012-03-29 05:11:39 -04001310 if (nla_put_string(skb, TCA_KIND, q->ops->id))
1311 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001312 if (q->ops->dump && q->ops->dump(q, skb) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001313 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001314 q->qstats.qlen = q->q.qlen;
1315
Eric Dumazeta2da5702011-01-20 03:48:19 +00001316 stab = rtnl_dereference(q->stab);
1317 if (stab && qdisc_dump_stab(skb, stab) < 0)
Jussi Kivilinna175f9c12008-07-20 00:08:47 -07001318 goto nla_put_failure;
1319
Jarek Poplawski102396a2008-08-29 14:21:52 -07001320 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
1321 qdisc_root_sleeping_lock(q), &d) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001322 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001323
1324 if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001325 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001326
1327 if (gnet_stats_copy_basic(&d, &q->bstats) < 0 ||
Eric Dumazetd250a5f2009-10-02 10:32:18 +00001328 gnet_stats_copy_rate_est(&d, &q->bstats, &q->rate_est) < 0 ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07001329 gnet_stats_copy_queue(&d, &q->qstats) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001330 goto nla_put_failure;
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09001331
Linus Torvalds1da177e2005-04-16 15:20:36 -07001332 if (gnet_stats_finish_copy(&d) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001333 goto nla_put_failure;
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09001334
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001335 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001336 return skb->len;
1337
David S. Miller02ef22c2012-06-26 21:50:05 -07001338out_nlmsg_trim:
Patrick McHardy1e904742008-01-22 22:11:17 -08001339nla_put_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001340 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001341 return -1;
1342}
1343
Eric Dumazet53b0f082010-05-22 20:37:44 +00001344static bool tc_qdisc_dump_ignore(struct Qdisc *q)
1345{
1346 return (q->flags & TCQ_F_BUILTIN) ? true : false;
1347}
1348
Tom Goff7316ae82010-03-19 15:40:13 +00001349static int qdisc_notify(struct net *net, struct sk_buff *oskb,
1350 struct nlmsghdr *n, u32 clid,
1351 struct Qdisc *old, struct Qdisc *new)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001352{
1353 struct sk_buff *skb;
Eric W. Biederman15e47302012-09-07 20:12:54 +00001354 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001355
1356 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1357 if (!skb)
1358 return -ENOBUFS;
1359
Eric Dumazet53b0f082010-05-22 20:37:44 +00001360 if (old && !tc_qdisc_dump_ignore(old)) {
Eric W. Biederman15e47302012-09-07 20:12:54 +00001361 if (tc_fill_qdisc(skb, old, clid, portid, n->nlmsg_seq,
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001362 0, RTM_DELQDISC) < 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001363 goto err_out;
1364 }
Eric Dumazet53b0f082010-05-22 20:37:44 +00001365 if (new && !tc_qdisc_dump_ignore(new)) {
Eric W. Biederman15e47302012-09-07 20:12:54 +00001366 if (tc_fill_qdisc(skb, new, clid, portid, n->nlmsg_seq,
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001367 old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001368 goto err_out;
1369 }
1370
1371 if (skb->len)
Eric W. Biederman15e47302012-09-07 20:12:54 +00001372 return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001373 n->nlmsg_flags & NLM_F_ECHO);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001374
1375err_out:
1376 kfree_skb(skb);
1377 return -EINVAL;
1378}
1379
David S. Miller30723672008-07-18 22:50:15 -07001380static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
1381 struct netlink_callback *cb,
1382 int *q_idx_p, int s_q_idx)
1383{
1384 int ret = 0, q_idx = *q_idx_p;
1385 struct Qdisc *q;
1386
1387 if (!root)
1388 return 0;
1389
1390 q = root;
1391 if (q_idx < s_q_idx) {
1392 q_idx++;
1393 } else {
1394 if (!tc_qdisc_dump_ignore(q) &&
Eric W. Biederman15e47302012-09-07 20:12:54 +00001395 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
David S. Miller30723672008-07-18 22:50:15 -07001396 cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
1397 goto done;
1398 q_idx++;
1399 }
1400 list_for_each_entry(q, &root->list, list) {
1401 if (q_idx < s_q_idx) {
1402 q_idx++;
1403 continue;
1404 }
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001405 if (!tc_qdisc_dump_ignore(q) &&
Eric W. Biederman15e47302012-09-07 20:12:54 +00001406 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
David S. Miller30723672008-07-18 22:50:15 -07001407 cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
1408 goto done;
1409 q_idx++;
1410 }
1411
1412out:
1413 *q_idx_p = q_idx;
1414 return ret;
1415done:
1416 ret = -1;
1417 goto out;
1418}
1419
Linus Torvalds1da177e2005-04-16 15:20:36 -07001420static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
1421{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001422 struct net *net = sock_net(skb->sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001423 int idx, q_idx;
1424 int s_idx, s_q_idx;
1425 struct net_device *dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001426
1427 s_idx = cb->args[0];
1428 s_q_idx = q_idx = cb->args[1];
stephen hemmingerf1e90162009-11-10 07:54:49 +00001429
1430 rcu_read_lock();
Pavel Emelianov7562f872007-05-03 15:13:45 -07001431 idx = 0;
Tom Goff7316ae82010-03-19 15:40:13 +00001432 for_each_netdev_rcu(net, dev) {
David S. Miller30723672008-07-18 22:50:15 -07001433 struct netdev_queue *dev_queue;
1434
Linus Torvalds1da177e2005-04-16 15:20:36 -07001435 if (idx < s_idx)
Pavel Emelianov7562f872007-05-03 15:13:45 -07001436 goto cont;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001437 if (idx > s_idx)
1438 s_q_idx = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001439 q_idx = 0;
David S. Miller30723672008-07-18 22:50:15 -07001440
Patrick McHardyaf356af2009-09-04 06:41:18 +00001441 if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx) < 0)
David S. Miller30723672008-07-18 22:50:15 -07001442 goto done;
1443
Eric Dumazet24824a02010-10-02 06:11:55 +00001444 dev_queue = dev_ingress_queue(dev);
1445 if (dev_queue &&
1446 tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb,
1447 &q_idx, s_q_idx) < 0)
David S. Miller30723672008-07-18 22:50:15 -07001448 goto done;
1449
Pavel Emelianov7562f872007-05-03 15:13:45 -07001450cont:
1451 idx++;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001452 }
1453
1454done:
stephen hemmingerf1e90162009-11-10 07:54:49 +00001455 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001456
1457 cb->args[0] = idx;
1458 cb->args[1] = q_idx;
1459
1460 return skb->len;
1461}
1462
1463
1464
1465/************************************************
1466 * Traffic classes manipulation. *
1467 ************************************************/
1468
1469
1470
Thomas Graf661d2962013-03-21 07:45:29 +00001471static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001472{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001473 struct net *net = sock_net(skb->sk);
David S. Miller02ef22c2012-06-26 21:50:05 -07001474 struct tcmsg *tcm = nlmsg_data(n);
Patrick McHardy1e904742008-01-22 22:11:17 -08001475 struct nlattr *tca[TCA_MAX + 1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001476 struct net_device *dev;
1477 struct Qdisc *q = NULL;
Eric Dumazet20fea082007-11-14 01:44:41 -08001478 const struct Qdisc_class_ops *cops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001479 unsigned long cl = 0;
1480 unsigned long new_cl;
Hong zhi guode179c82013-03-25 17:36:33 +00001481 u32 portid;
1482 u32 clid;
1483 u32 qid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001484 int err;
1485
Eric W. Biedermandfc47ef2012-11-16 03:03:00 +00001486 if ((n->nlmsg_type != RTM_GETTCLASS) && !capable(CAP_NET_ADMIN))
1487 return -EPERM;
1488
Patrick McHardy1e904742008-01-22 22:11:17 -08001489 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
1490 if (err < 0)
1491 return err;
1492
Hong zhi guode179c82013-03-25 17:36:33 +00001493 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1494 if (!dev)
1495 return -ENODEV;
1496
Linus Torvalds1da177e2005-04-16 15:20:36 -07001497 /*
1498 parent == TC_H_UNSPEC - unspecified parent.
1499 parent == TC_H_ROOT - class is root, which has no parent.
1500 parent == X:0 - parent is root class.
1501 parent == X:Y - parent is a node in hierarchy.
1502 parent == 0:Y - parent is X:Y, where X:0 is qdisc.
1503
1504 handle == 0:0 - generate handle from kernel pool.
1505 handle == 0:Y - class is X:Y, where X:0 is qdisc.
1506 handle == X:Y - clear.
1507 handle == X:0 - root class.
1508 */
1509
1510 /* Step 1. Determine qdisc handle X:0 */
1511
Hong zhi guode179c82013-03-25 17:36:33 +00001512 portid = tcm->tcm_parent;
1513 clid = tcm->tcm_handle;
1514 qid = TC_H_MAJ(clid);
1515
Eric W. Biederman15e47302012-09-07 20:12:54 +00001516 if (portid != TC_H_ROOT) {
1517 u32 qid1 = TC_H_MAJ(portid);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001518
1519 if (qid && qid1) {
1520 /* If both majors are known, they must be identical. */
1521 if (qid != qid1)
1522 return -EINVAL;
1523 } else if (qid1) {
1524 qid = qid1;
1525 } else if (qid == 0)
Patrick McHardyaf356af2009-09-04 06:41:18 +00001526 qid = dev->qdisc->handle;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001527
1528 /* Now qid is genuine qdisc handle consistent
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001529 * both with parent and child.
1530 *
Eric W. Biederman15e47302012-09-07 20:12:54 +00001531 * TC_H_MAJ(portid) still may be unspecified, complete it now.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001532 */
Eric W. Biederman15e47302012-09-07 20:12:54 +00001533 if (portid)
1534 portid = TC_H_MAKE(qid, portid);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001535 } else {
1536 if (qid == 0)
Patrick McHardyaf356af2009-09-04 06:41:18 +00001537 qid = dev->qdisc->handle;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001538 }
1539
1540 /* OK. Locate qdisc */
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001541 q = qdisc_lookup(dev, qid);
1542 if (!q)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001543 return -ENOENT;
1544
1545 /* An check that it supports classes */
1546 cops = q->ops->cl_ops;
1547 if (cops == NULL)
1548 return -EINVAL;
1549
1550 /* Now try to get class */
1551 if (clid == 0) {
Eric W. Biederman15e47302012-09-07 20:12:54 +00001552 if (portid == TC_H_ROOT)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001553 clid = qid;
1554 } else
1555 clid = TC_H_MAKE(qid, clid);
1556
1557 if (clid)
1558 cl = cops->get(q, clid);
1559
1560 if (cl == 0) {
1561 err = -ENOENT;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001562 if (n->nlmsg_type != RTM_NEWTCLASS ||
1563 !(n->nlmsg_flags & NLM_F_CREATE))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001564 goto out;
1565 } else {
1566 switch (n->nlmsg_type) {
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09001567 case RTM_NEWTCLASS:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001568 err = -EEXIST;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001569 if (n->nlmsg_flags & NLM_F_EXCL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001570 goto out;
1571 break;
1572 case RTM_DELTCLASS:
Patrick McHardyde6d5cd2009-09-04 06:41:16 +00001573 err = -EOPNOTSUPP;
1574 if (cops->delete)
1575 err = cops->delete(q, cl);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001576 if (err == 0)
Tom Goff7316ae82010-03-19 15:40:13 +00001577 tclass_notify(net, skb, n, q, cl, RTM_DELTCLASS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001578 goto out;
1579 case RTM_GETTCLASS:
Tom Goff7316ae82010-03-19 15:40:13 +00001580 err = tclass_notify(net, skb, n, q, cl, RTM_NEWTCLASS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001581 goto out;
1582 default:
1583 err = -EINVAL;
1584 goto out;
1585 }
1586 }
1587
1588 new_cl = cl;
Patrick McHardyde6d5cd2009-09-04 06:41:16 +00001589 err = -EOPNOTSUPP;
1590 if (cops->change)
Eric W. Biederman15e47302012-09-07 20:12:54 +00001591 err = cops->change(q, clid, portid, tca, &new_cl);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001592 if (err == 0)
Tom Goff7316ae82010-03-19 15:40:13 +00001593 tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001594
1595out:
1596 if (cl)
1597 cops->put(q, cl);
1598
1599 return err;
1600}
1601
1602
1603static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
1604 unsigned long cl,
Eric W. Biederman15e47302012-09-07 20:12:54 +00001605 u32 portid, u32 seq, u16 flags, int event)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001606{
1607 struct tcmsg *tcm;
1608 struct nlmsghdr *nlh;
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001609 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001610 struct gnet_dump d;
Eric Dumazet20fea082007-11-14 01:44:41 -08001611 const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001612
Eric W. Biederman15e47302012-09-07 20:12:54 +00001613 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
David S. Miller02ef22c2012-06-26 21:50:05 -07001614 if (!nlh)
1615 goto out_nlmsg_trim;
1616 tcm = nlmsg_data(nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001617 tcm->tcm_family = AF_UNSPEC;
Eric Dumazet16ebb5e2009-09-02 02:40:09 +00001618 tcm->tcm__pad1 = 0;
1619 tcm->tcm__pad2 = 0;
David S. Miller5ce2d482008-07-08 17:06:30 -07001620 tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001621 tcm->tcm_parent = q->handle;
1622 tcm->tcm_handle = q->handle;
1623 tcm->tcm_info = 0;
David S. Miller1b34ec42012-03-29 05:11:39 -04001624 if (nla_put_string(skb, TCA_KIND, q->ops->id))
1625 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001626 if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001627 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001628
Jarek Poplawski102396a2008-08-29 14:21:52 -07001629 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
1630 qdisc_root_sleeping_lock(q), &d) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001631 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001632
1633 if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001634 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001635
1636 if (gnet_stats_finish_copy(&d) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001637 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001638
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001639 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001640 return skb->len;
1641
David S. Miller02ef22c2012-06-26 21:50:05 -07001642out_nlmsg_trim:
Patrick McHardy1e904742008-01-22 22:11:17 -08001643nla_put_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001644 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001645 return -1;
1646}
1647
Tom Goff7316ae82010-03-19 15:40:13 +00001648static int tclass_notify(struct net *net, struct sk_buff *oskb,
1649 struct nlmsghdr *n, struct Qdisc *q,
1650 unsigned long cl, int event)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001651{
1652 struct sk_buff *skb;
Eric W. Biederman15e47302012-09-07 20:12:54 +00001653 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001654
1655 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1656 if (!skb)
1657 return -ENOBUFS;
1658
Eric W. Biederman15e47302012-09-07 20:12:54 +00001659 if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, event) < 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001660 kfree_skb(skb);
1661 return -EINVAL;
1662 }
1663
Eric W. Biederman15e47302012-09-07 20:12:54 +00001664 return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001665 n->nlmsg_flags & NLM_F_ECHO);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001666}
1667
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001668struct qdisc_dump_args {
1669 struct qdisc_walker w;
1670 struct sk_buff *skb;
1671 struct netlink_callback *cb;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001672};
1673
1674static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walker *arg)
1675{
1676 struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg;
1677
Eric W. Biederman15e47302012-09-07 20:12:54 +00001678 return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).portid,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001679 a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTCLASS);
1680}
1681
David S. Miller30723672008-07-18 22:50:15 -07001682static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb,
1683 struct tcmsg *tcm, struct netlink_callback *cb,
1684 int *t_p, int s_t)
1685{
1686 struct qdisc_dump_args arg;
1687
1688 if (tc_qdisc_dump_ignore(q) ||
1689 *t_p < s_t || !q->ops->cl_ops ||
1690 (tcm->tcm_parent &&
1691 TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
1692 (*t_p)++;
1693 return 0;
1694 }
1695 if (*t_p > s_t)
1696 memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
1697 arg.w.fn = qdisc_class_dump;
1698 arg.skb = skb;
1699 arg.cb = cb;
1700 arg.w.stop = 0;
1701 arg.w.skip = cb->args[1];
1702 arg.w.count = 0;
1703 q->ops->cl_ops->walk(q, &arg.w);
1704 cb->args[1] = arg.w.count;
1705 if (arg.w.stop)
1706 return -1;
1707 (*t_p)++;
1708 return 0;
1709}
1710
1711static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
1712 struct tcmsg *tcm, struct netlink_callback *cb,
1713 int *t_p, int s_t)
1714{
1715 struct Qdisc *q;
1716
1717 if (!root)
1718 return 0;
1719
1720 if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0)
1721 return -1;
1722
1723 list_for_each_entry(q, &root->list, list) {
1724 if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
1725 return -1;
1726 }
1727
1728 return 0;
1729}
1730
Linus Torvalds1da177e2005-04-16 15:20:36 -07001731static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
1732{
David S. Miller02ef22c2012-06-26 21:50:05 -07001733 struct tcmsg *tcm = nlmsg_data(cb->nlh);
David S. Miller30723672008-07-18 22:50:15 -07001734 struct net *net = sock_net(skb->sk);
1735 struct netdev_queue *dev_queue;
1736 struct net_device *dev;
1737 int t, s_t;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001738
Hong zhi guo573ce262013-03-27 06:47:04 +00001739 if (nlmsg_len(cb->nlh) < sizeof(*tcm))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001740 return 0;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001741 dev = dev_get_by_index(net, tcm->tcm_ifindex);
1742 if (!dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001743 return 0;
1744
1745 s_t = cb->args[0];
1746 t = 0;
1747
Patrick McHardyaf356af2009-09-04 06:41:18 +00001748 if (tc_dump_tclass_root(dev->qdisc, skb, tcm, cb, &t, s_t) < 0)
David S. Miller30723672008-07-18 22:50:15 -07001749 goto done;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001750
Eric Dumazet24824a02010-10-02 06:11:55 +00001751 dev_queue = dev_ingress_queue(dev);
1752 if (dev_queue &&
1753 tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb,
1754 &t, s_t) < 0)
David S. Miller30723672008-07-18 22:50:15 -07001755 goto done;
1756
1757done:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001758 cb->args[0] = t;
1759
1760 dev_put(dev);
1761 return skb->len;
1762}
1763
1764/* Main classifier routine: scans classifier chain attached
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001765 * to this qdisc, (optionally) tests for protocol and asks
1766 * specific classifiers.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001767 */
Eric Dumazetdc7f9f62011-07-05 23:25:42 +00001768int tc_classify_compat(struct sk_buff *skb, const struct tcf_proto *tp,
Patrick McHardy73ca4912007-07-15 00:02:31 -07001769 struct tcf_result *res)
1770{
1771 __be16 protocol = skb->protocol;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001772 int err;
Patrick McHardy73ca4912007-07-15 00:02:31 -07001773
1774 for (; tp; tp = tp->next) {
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001775 if (tp->protocol != protocol &&
1776 tp->protocol != htons(ETH_P_ALL))
1777 continue;
1778 err = tp->classify(skb, tp, res);
1779
1780 if (err >= 0) {
Patrick McHardy73ca4912007-07-15 00:02:31 -07001781#ifdef CONFIG_NET_CLS_ACT
1782 if (err != TC_ACT_RECLASSIFY && skb->tc_verd)
1783 skb->tc_verd = SET_TC_VERD(skb->tc_verd, 0);
1784#endif
1785 return err;
1786 }
1787 }
1788 return -1;
1789}
1790EXPORT_SYMBOL(tc_classify_compat);
1791
Eric Dumazetdc7f9f62011-07-05 23:25:42 +00001792int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp,
Patrick McHardy73ca4912007-07-15 00:02:31 -07001793 struct tcf_result *res)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001794{
1795 int err = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001796#ifdef CONFIG_NET_CLS_ACT
Eric Dumazetdc7f9f62011-07-05 23:25:42 +00001797 const struct tcf_proto *otp = tp;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001798reclassify:
Hagen Paul Pfeifer52bc9742011-02-25 05:45:21 +00001799#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001800
Patrick McHardy73ca4912007-07-15 00:02:31 -07001801 err = tc_classify_compat(skb, tp, res);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001802#ifdef CONFIG_NET_CLS_ACT
Patrick McHardy73ca4912007-07-15 00:02:31 -07001803 if (err == TC_ACT_RECLASSIFY) {
1804 u32 verd = G_TC_VERD(skb->tc_verd);
1805 tp = otp;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001806
Patrick McHardy73ca4912007-07-15 00:02:31 -07001807 if (verd++ >= MAX_REC_LOOP) {
Joe Perchese87cc472012-05-13 21:56:26 +00001808 net_notice_ratelimited("%s: packet reclassify loop rule prio %u protocol %02x\n",
1809 tp->q->ops->id,
1810 tp->prio & 0xffff,
1811 ntohs(tp->protocol));
Patrick McHardy73ca4912007-07-15 00:02:31 -07001812 return TC_ACT_SHOT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001813 }
Patrick McHardy73ca4912007-07-15 00:02:31 -07001814 skb->tc_verd = SET_TC_VERD(skb->tc_verd, verd);
1815 goto reclassify;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001816 }
Patrick McHardy73ca4912007-07-15 00:02:31 -07001817#endif
1818 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001819}
Patrick McHardy73ca4912007-07-15 00:02:31 -07001820EXPORT_SYMBOL(tc_classify);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001821
Patrick McHardya48b5a62007-03-23 11:29:43 -07001822void tcf_destroy(struct tcf_proto *tp)
1823{
1824 tp->ops->destroy(tp);
1825 module_put(tp->ops->owner);
1826 kfree(tp);
1827}
1828
Patrick McHardyff31ab52008-07-01 19:52:38 -07001829void tcf_destroy_chain(struct tcf_proto **fl)
Patrick McHardya48b5a62007-03-23 11:29:43 -07001830{
1831 struct tcf_proto *tp;
1832
Patrick McHardyff31ab52008-07-01 19:52:38 -07001833 while ((tp = *fl) != NULL) {
1834 *fl = tp->next;
Patrick McHardya48b5a62007-03-23 11:29:43 -07001835 tcf_destroy(tp);
1836 }
1837}
1838EXPORT_SYMBOL(tcf_destroy_chain);
1839
Linus Torvalds1da177e2005-04-16 15:20:36 -07001840#ifdef CONFIG_PROC_FS
1841static int psched_show(struct seq_file *seq, void *v)
1842{
Patrick McHardy3c0cfc12007-10-10 16:32:41 -07001843 struct timespec ts;
1844
1845 hrtimer_get_res(CLOCK_MONOTONIC, &ts);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001846 seq_printf(seq, "%08x %08x %08x %08x\n",
Jarek Poplawskica44d6e2009-06-15 02:31:47 -07001847 (u32)NSEC_PER_USEC, (u32)PSCHED_TICKS2NS(1),
Patrick McHardy514bca32007-03-16 12:34:52 -07001848 1000000,
Patrick McHardy3c0cfc12007-10-10 16:32:41 -07001849 (u32)NSEC_PER_SEC/(u32)ktime_to_ns(timespec_to_ktime(ts)));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001850
1851 return 0;
1852}
1853
1854static int psched_open(struct inode *inode, struct file *file)
1855{
Tom Goff7e5ab152010-03-30 19:44:56 -07001856 return single_open(file, psched_show, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001857}
1858
Arjan van de Venda7071d2007-02-12 00:55:36 -08001859static const struct file_operations psched_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001860 .owner = THIS_MODULE,
1861 .open = psched_open,
1862 .read = seq_read,
1863 .llseek = seq_lseek,
1864 .release = single_release,
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09001865};
Tom Goff7316ae82010-03-19 15:40:13 +00001866
1867static int __net_init psched_net_init(struct net *net)
1868{
1869 struct proc_dir_entry *e;
1870
Gao fengd4beaa62013-02-18 01:34:54 +00001871 e = proc_create("psched", 0, net->proc_net, &psched_fops);
Tom Goff7316ae82010-03-19 15:40:13 +00001872 if (e == NULL)
1873 return -ENOMEM;
1874
1875 return 0;
1876}
1877
1878static void __net_exit psched_net_exit(struct net *net)
1879{
Gao fengece31ff2013-02-18 01:34:56 +00001880 remove_proc_entry("psched", net->proc_net);
Tom Goff7316ae82010-03-19 15:40:13 +00001881}
1882#else
1883static int __net_init psched_net_init(struct net *net)
1884{
1885 return 0;
1886}
1887
1888static void __net_exit psched_net_exit(struct net *net)
1889{
1890}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001891#endif
1892
Tom Goff7316ae82010-03-19 15:40:13 +00001893static struct pernet_operations psched_net_ops = {
1894 .init = psched_net_init,
1895 .exit = psched_net_exit,
1896};
1897
Linus Torvalds1da177e2005-04-16 15:20:36 -07001898static int __init pktsched_init(void)
1899{
Tom Goff7316ae82010-03-19 15:40:13 +00001900 int err;
1901
1902 err = register_pernet_subsys(&psched_net_ops);
1903 if (err) {
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001904 pr_err("pktsched_init: "
Tom Goff7316ae82010-03-19 15:40:13 +00001905 "cannot initialize per netns operations\n");
1906 return err;
1907 }
1908
stephen hemminger6da7c8f2013-08-27 16:19:08 -07001909 register_qdisc(&pfifo_fast_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001910 register_qdisc(&pfifo_qdisc_ops);
1911 register_qdisc(&bfifo_qdisc_ops);
Hagen Paul Pfeifer57dbb2d2010-01-24 12:30:59 +00001912 register_qdisc(&pfifo_head_drop_qdisc_ops);
David S. Miller6ec1c692009-09-06 01:58:51 -07001913 register_qdisc(&mq_qdisc_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001914
Greg Rosec7ac8672011-06-10 01:27:09 +00001915 rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL, NULL);
1916 rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL, NULL);
1917 rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc, NULL);
1918 rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL, NULL);
1919 rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL, NULL);
1920 rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass, NULL);
Thomas Grafbe577dd2007-03-22 11:55:50 -07001921
Linus Torvalds1da177e2005-04-16 15:20:36 -07001922 return 0;
1923}
1924
1925subsys_initcall(pktsched_init);