blob: ddf047df5361b8c0d05fead97ee1b5ccdeee4113 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * net/sched/sch_api.c Packet scheduler API.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 *
11 * Fixes:
12 *
13 * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired.
14 * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
15 * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support
16 */
17
Linus Torvalds1da177e2005-04-16 15:20:36 -070018#include <linux/module.h>
19#include <linux/types.h>
20#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070021#include <linux/string.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070022#include <linux/errno.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070023#include <linux/skbuff.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070024#include <linux/init.h>
25#include <linux/proc_fs.h>
26#include <linux/seq_file.h>
27#include <linux/kmod.h>
28#include <linux/list.h>
Patrick McHardy41794772007-03-16 01:19:15 -070029#include <linux/hrtimer.h>
Jarek Poplawski25bfcd52008-08-18 20:53:34 -070030#include <linux/lockdep.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090031#include <linux/slab.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070032
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020033#include <net/net_namespace.h>
Denis V. Lunevb8542722007-12-01 00:21:31 +110034#include <net/sock.h>
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -070035#include <net/netlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070036#include <net/pkt_sched.h>
37
Tom Goff7316ae82010-03-19 15:40:13 +000038static int qdisc_notify(struct net *net, struct sk_buff *oskb,
39 struct nlmsghdr *n, u32 clid,
Linus Torvalds1da177e2005-04-16 15:20:36 -070040 struct Qdisc *old, struct Qdisc *new);
Tom Goff7316ae82010-03-19 15:40:13 +000041static int tclass_notify(struct net *net, struct sk_buff *oskb,
42 struct nlmsghdr *n, struct Qdisc *q,
43 unsigned long cl, int event);
Linus Torvalds1da177e2005-04-16 15:20:36 -070044
45/*
46
47 Short review.
48 -------------
49
50 This file consists of two interrelated parts:
51
52 1. queueing disciplines manager frontend.
53 2. traffic classes manager frontend.
54
55 Generally, queueing discipline ("qdisc") is a black box,
56 which is able to enqueue packets and to dequeue them (when
57 device is ready to send something) in order and at times
58 determined by algorithm hidden in it.
59
60 qdisc's are divided to two categories:
61 - "queues", which have no internal structure visible from outside.
62 - "schedulers", which split all the packets to "traffic classes",
63 using "packet classifiers" (look at cls_api.c)
64
65 In turn, classes may have child qdiscs (as rule, queues)
66 attached to them etc. etc. etc.
67
68 The goal of the routines in this file is to translate
69 information supplied by user in the form of handles
70 to more intelligible for kernel form, to make some sanity
71 checks and part of work, which is common to all qdiscs
72 and to provide rtnetlink notifications.
73
74 All real intelligent work is done inside qdisc modules.
75
76
77
78 Every discipline has two major routines: enqueue and dequeue.
79
80 ---dequeue
81
82 dequeue usually returns a skb to send. It is allowed to return NULL,
83 but it does not mean that queue is empty, it just means that
84 discipline does not want to send anything this time.
85 Queue is really empty if q->q.qlen == 0.
86 For complicated disciplines with multiple queues q->q is not
87 real packet queue, but however q->q.qlen must be valid.
88
89 ---enqueue
90
91 enqueue returns 0, if packet was enqueued successfully.
92 If packet (this one or another one) was dropped, it returns
93 not zero error code.
94 NET_XMIT_DROP - this packet dropped
95 Expected action: do not backoff, but wait until queue will clear.
96 NET_XMIT_CN - probably this packet enqueued, but another one dropped.
97 Expected action: backoff or ignore
98 NET_XMIT_POLICED - dropped by police.
99 Expected action: backoff or error to real-time apps.
100
101 Auxiliary routines:
102
Jarek Poplawski99c0db22008-10-31 00:45:27 -0700103 ---peek
104
105 like dequeue but without removing a packet from the queue
106
Linus Torvalds1da177e2005-04-16 15:20:36 -0700107 ---reset
108
109 returns qdisc to initial state: purge all buffers, clear all
110 timers, counters (except for statistics) etc.
111
112 ---init
113
114 initializes newly created qdisc.
115
116 ---destroy
117
118 destroys resources allocated by init and during lifetime of qdisc.
119
120 ---change
121
122 changes qdisc parameters.
123 */
124
125/* Protects list of registered TC modules. It is pure SMP lock. */
126static DEFINE_RWLOCK(qdisc_mod_lock);
127
128
129/************************************************
130 * Queueing disciplines manipulation. *
131 ************************************************/
132
133
134/* The list of all installed queueing disciplines. */
135
136static struct Qdisc_ops *qdisc_base;
137
Zhi Yong Wu21eb2182014-01-01 04:34:51 +0800138/* Register/unregister queueing discipline */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700139
140int register_qdisc(struct Qdisc_ops *qops)
141{
142 struct Qdisc_ops *q, **qp;
143 int rc = -EEXIST;
144
145 write_lock(&qdisc_mod_lock);
146 for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
147 if (!strcmp(qops->id, q->id))
148 goto out;
149
150 if (qops->enqueue == NULL)
151 qops->enqueue = noop_qdisc_ops.enqueue;
Jarek Poplawski99c0db22008-10-31 00:45:27 -0700152 if (qops->peek == NULL) {
Jarek Poplawski68fd26b2010-08-09 12:18:48 +0000153 if (qops->dequeue == NULL)
Jarek Poplawski99c0db22008-10-31 00:45:27 -0700154 qops->peek = noop_qdisc_ops.peek;
Jarek Poplawski68fd26b2010-08-09 12:18:48 +0000155 else
156 goto out_einval;
Jarek Poplawski99c0db22008-10-31 00:45:27 -0700157 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700158 if (qops->dequeue == NULL)
159 qops->dequeue = noop_qdisc_ops.dequeue;
160
Jarek Poplawski68fd26b2010-08-09 12:18:48 +0000161 if (qops->cl_ops) {
162 const struct Qdisc_class_ops *cops = qops->cl_ops;
163
Jarek Poplawski3e9e5a52010-08-10 22:31:20 +0000164 if (!(cops->get && cops->put && cops->walk && cops->leaf))
Jarek Poplawski68fd26b2010-08-09 12:18:48 +0000165 goto out_einval;
166
167 if (cops->tcf_chain && !(cops->bind_tcf && cops->unbind_tcf))
168 goto out_einval;
169 }
170
Linus Torvalds1da177e2005-04-16 15:20:36 -0700171 qops->next = NULL;
172 *qp = qops;
173 rc = 0;
174out:
175 write_unlock(&qdisc_mod_lock);
176 return rc;
Jarek Poplawski68fd26b2010-08-09 12:18:48 +0000177
178out_einval:
179 rc = -EINVAL;
180 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700181}
Patrick McHardy62e3ba12008-01-22 22:10:23 -0800182EXPORT_SYMBOL(register_qdisc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700183
184int unregister_qdisc(struct Qdisc_ops *qops)
185{
186 struct Qdisc_ops *q, **qp;
187 int err = -ENOENT;
188
189 write_lock(&qdisc_mod_lock);
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000190 for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700191 if (q == qops)
192 break;
193 if (q) {
194 *qp = q->next;
195 q->next = NULL;
196 err = 0;
197 }
198 write_unlock(&qdisc_mod_lock);
199 return err;
200}
Patrick McHardy62e3ba12008-01-22 22:10:23 -0800201EXPORT_SYMBOL(unregister_qdisc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700202
stephen hemminger6da7c8f2013-08-27 16:19:08 -0700203/* Get default qdisc if not otherwise specified */
204void qdisc_get_default(char *name, size_t len)
205{
206 read_lock(&qdisc_mod_lock);
207 strlcpy(name, default_qdisc_ops->id, len);
208 read_unlock(&qdisc_mod_lock);
209}
210
211static struct Qdisc_ops *qdisc_lookup_default(const char *name)
212{
213 struct Qdisc_ops *q = NULL;
214
215 for (q = qdisc_base; q; q = q->next) {
216 if (!strcmp(name, q->id)) {
217 if (!try_module_get(q->owner))
218 q = NULL;
219 break;
220 }
221 }
222
223 return q;
224}
225
226/* Set new default qdisc to use */
227int qdisc_set_default(const char *name)
228{
229 const struct Qdisc_ops *ops;
230
231 if (!capable(CAP_NET_ADMIN))
232 return -EPERM;
233
234 write_lock(&qdisc_mod_lock);
235 ops = qdisc_lookup_default(name);
236 if (!ops) {
237 /* Not found, drop lock and try to load module */
238 write_unlock(&qdisc_mod_lock);
239 request_module("sch_%s", name);
240 write_lock(&qdisc_mod_lock);
241
242 ops = qdisc_lookup_default(name);
243 }
244
245 if (ops) {
246 /* Set new default */
247 module_put(default_qdisc_ops->owner);
248 default_qdisc_ops = ops;
249 }
250 write_unlock(&qdisc_mod_lock);
251
252 return ops ? 0 : -ENOENT;
253}
254
Linus Torvalds1da177e2005-04-16 15:20:36 -0700255/* We know handle. Find qdisc among all qdisc's attached to device
Eric Dumazet4eaf3b82015-12-01 20:08:51 -0800256 * (root qdisc, all its children, children of children etc.)
257 * Note: caller either uses rtnl or rcu_read_lock()
Linus Torvalds1da177e2005-04-16 15:20:36 -0700258 */
259
Hannes Eder6113b742008-11-28 03:06:46 -0800260static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
David S. Miller8123b422008-08-08 23:23:39 -0700261{
262 struct Qdisc *q;
263
264 if (!(root->flags & TCQ_F_BUILTIN) &&
265 root->handle == handle)
266 return root;
267
Eric Dumazet4eaf3b82015-12-01 20:08:51 -0800268 list_for_each_entry_rcu(q, &root->list, list) {
David S. Miller8123b422008-08-08 23:23:39 -0700269 if (q->handle == handle)
270 return q;
271 }
272 return NULL;
273}
274
Eric Dumazet95dc1922013-12-05 11:12:02 -0800275void qdisc_list_add(struct Qdisc *q)
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700276{
Eric Dumazet37314362014-03-08 08:01:19 -0800277 if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
278 struct Qdisc *root = qdisc_dev(q)->qdisc;
Eric Dumazete57a7842013-12-12 15:41:56 -0800279
Eric Dumazet37314362014-03-08 08:01:19 -0800280 WARN_ON_ONCE(root == &noop_qdisc);
Eric Dumazet4eaf3b82015-12-01 20:08:51 -0800281 ASSERT_RTNL();
282 list_add_tail_rcu(&q->list, &root->list);
Eric Dumazet37314362014-03-08 08:01:19 -0800283 }
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700284}
Eric Dumazet95dc1922013-12-05 11:12:02 -0800285EXPORT_SYMBOL(qdisc_list_add);
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700286
287void qdisc_list_del(struct Qdisc *q)
288{
Eric Dumazet4eaf3b82015-12-01 20:08:51 -0800289 if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
290 ASSERT_RTNL();
291 list_del_rcu(&q->list);
292 }
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700293}
294EXPORT_SYMBOL(qdisc_list_del);
295
David S. Milleread81cc2008-07-17 00:50:32 -0700296struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
Patrick McHardy43effa12006-11-29 17:35:48 -0800297{
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700298 struct Qdisc *q;
299
Patrick McHardyaf356af2009-09-04 06:41:18 +0000300 q = qdisc_match_from_root(dev->qdisc, handle);
301 if (q)
302 goto out;
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700303
Eric Dumazet24824a02010-10-02 06:11:55 +0000304 if (dev_ingress_queue(dev))
305 q = qdisc_match_from_root(
306 dev_ingress_queue(dev)->qdisc_sleeping,
307 handle);
Jarek Poplawskif6486d42008-11-25 13:56:06 -0800308out:
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700309 return q;
Patrick McHardy43effa12006-11-29 17:35:48 -0800310}
311
Linus Torvalds1da177e2005-04-16 15:20:36 -0700312static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
313{
314 unsigned long cl;
315 struct Qdisc *leaf;
Eric Dumazet20fea082007-11-14 01:44:41 -0800316 const struct Qdisc_class_ops *cops = p->ops->cl_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700317
318 if (cops == NULL)
319 return NULL;
320 cl = cops->get(p, classid);
321
322 if (cl == 0)
323 return NULL;
324 leaf = cops->leaf(p, cl);
325 cops->put(p, cl);
326 return leaf;
327}
328
329/* Find queueing discipline by name */
330
Patrick McHardy1e904742008-01-22 22:11:17 -0800331static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700332{
333 struct Qdisc_ops *q = NULL;
334
335 if (kind) {
336 read_lock(&qdisc_mod_lock);
337 for (q = qdisc_base; q; q = q->next) {
Patrick McHardy1e904742008-01-22 22:11:17 -0800338 if (nla_strcmp(kind, q->id) == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700339 if (!try_module_get(q->owner))
340 q = NULL;
341 break;
342 }
343 }
344 read_unlock(&qdisc_mod_lock);
345 }
346 return q;
347}
348
Jesper Dangaard Brouer8a8e3d82013-08-14 23:47:11 +0200349/* The linklayer setting were not transferred from iproute2, in older
350 * versions, and the rate tables lookup systems have been dropped in
351 * the kernel. To keep backward compatible with older iproute2 tc
352 * utils, we detect the linklayer setting by detecting if the rate
353 * table were modified.
354 *
355 * For linklayer ATM table entries, the rate table will be aligned to
356 * 48 bytes, thus some table entries will contain the same value. The
357 * mpu (min packet unit) is also encoded into the old rate table, thus
358 * starting from the mpu, we find low and high table entries for
359 * mapping this cell. If these entries contain the same value, when
360 * the rate tables have been modified for linklayer ATM.
361 *
362 * This is done by rounding mpu to the nearest 48 bytes cell/entry,
363 * and then roundup to the next cell, calc the table entry one below,
364 * and compare.
365 */
366static __u8 __detect_linklayer(struct tc_ratespec *r, __u32 *rtab)
367{
368 int low = roundup(r->mpu, 48);
369 int high = roundup(low+1, 48);
370 int cell_low = low >> r->cell_log;
371 int cell_high = (high >> r->cell_log) - 1;
372
373 /* rtab is too inaccurate at rates > 100Mbit/s */
374 if ((r->rate > (100000000/8)) || (rtab[0] == 0)) {
375 pr_debug("TC linklayer: Giving up ATM detection\n");
376 return TC_LINKLAYER_ETHERNET;
377 }
378
379 if ((cell_high > cell_low) && (cell_high < 256)
380 && (rtab[cell_low] == rtab[cell_high])) {
381 pr_debug("TC linklayer: Detected ATM, low(%d)=high(%d)=%u\n",
382 cell_low, cell_high, rtab[cell_high]);
383 return TC_LINKLAYER_ATM;
384 }
385 return TC_LINKLAYER_ETHERNET;
386}
387
Linus Torvalds1da177e2005-04-16 15:20:36 -0700388static struct qdisc_rate_table *qdisc_rtab_list;
389
Patrick McHardy1e904742008-01-22 22:11:17 -0800390struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct nlattr *tab)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700391{
392 struct qdisc_rate_table *rtab;
393
Eric Dumazet40edeff2013-06-02 11:15:55 +0000394 if (tab == NULL || r->rate == 0 || r->cell_log == 0 ||
395 nla_len(tab) != TC_RTAB_SIZE)
396 return NULL;
397
Linus Torvalds1da177e2005-04-16 15:20:36 -0700398 for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) {
Eric Dumazet40edeff2013-06-02 11:15:55 +0000399 if (!memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) &&
400 !memcmp(&rtab->data, nla_data(tab), 1024)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700401 rtab->refcnt++;
402 return rtab;
403 }
404 }
405
Linus Torvalds1da177e2005-04-16 15:20:36 -0700406 rtab = kmalloc(sizeof(*rtab), GFP_KERNEL);
407 if (rtab) {
408 rtab->rate = *r;
409 rtab->refcnt = 1;
Patrick McHardy1e904742008-01-22 22:11:17 -0800410 memcpy(rtab->data, nla_data(tab), 1024);
Jesper Dangaard Brouer8a8e3d82013-08-14 23:47:11 +0200411 if (r->linklayer == TC_LINKLAYER_UNAWARE)
412 r->linklayer = __detect_linklayer(r, rtab->data);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700413 rtab->next = qdisc_rtab_list;
414 qdisc_rtab_list = rtab;
415 }
416 return rtab;
417}
Patrick McHardy62e3ba12008-01-22 22:10:23 -0800418EXPORT_SYMBOL(qdisc_get_rtab);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700419
420void qdisc_put_rtab(struct qdisc_rate_table *tab)
421{
422 struct qdisc_rate_table *rtab, **rtabp;
423
424 if (!tab || --tab->refcnt)
425 return;
426
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000427 for (rtabp = &qdisc_rtab_list;
428 (rtab = *rtabp) != NULL;
429 rtabp = &rtab->next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700430 if (rtab == tab) {
431 *rtabp = rtab->next;
432 kfree(rtab);
433 return;
434 }
435 }
436}
Patrick McHardy62e3ba12008-01-22 22:10:23 -0800437EXPORT_SYMBOL(qdisc_put_rtab);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700438
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700439static LIST_HEAD(qdisc_stab_list);
440static DEFINE_SPINLOCK(qdisc_stab_lock);
441
442static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = {
443 [TCA_STAB_BASE] = { .len = sizeof(struct tc_sizespec) },
444 [TCA_STAB_DATA] = { .type = NLA_BINARY },
445};
446
447static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt)
448{
449 struct nlattr *tb[TCA_STAB_MAX + 1];
450 struct qdisc_size_table *stab;
451 struct tc_sizespec *s;
452 unsigned int tsize = 0;
453 u16 *tab = NULL;
454 int err;
455
456 err = nla_parse_nested(tb, TCA_STAB_MAX, opt, stab_policy);
457 if (err < 0)
458 return ERR_PTR(err);
459 if (!tb[TCA_STAB_BASE])
460 return ERR_PTR(-EINVAL);
461
462 s = nla_data(tb[TCA_STAB_BASE]);
463
464 if (s->tsize > 0) {
465 if (!tb[TCA_STAB_DATA])
466 return ERR_PTR(-EINVAL);
467 tab = nla_data(tb[TCA_STAB_DATA]);
468 tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16);
469 }
470
Dan Carpenter00093fa2010-08-14 11:09:49 +0000471 if (tsize != s->tsize || (!tab && tsize > 0))
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700472 return ERR_PTR(-EINVAL);
473
David S. Millerf3b96052008-08-18 22:33:05 -0700474 spin_lock(&qdisc_stab_lock);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700475
476 list_for_each_entry(stab, &qdisc_stab_list, list) {
477 if (memcmp(&stab->szopts, s, sizeof(*s)))
478 continue;
479 if (tsize > 0 && memcmp(stab->data, tab, tsize * sizeof(u16)))
480 continue;
481 stab->refcnt++;
David S. Millerf3b96052008-08-18 22:33:05 -0700482 spin_unlock(&qdisc_stab_lock);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700483 return stab;
484 }
485
David S. Millerf3b96052008-08-18 22:33:05 -0700486 spin_unlock(&qdisc_stab_lock);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700487
488 stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL);
489 if (!stab)
490 return ERR_PTR(-ENOMEM);
491
492 stab->refcnt = 1;
493 stab->szopts = *s;
494 if (tsize > 0)
495 memcpy(stab->data, tab, tsize * sizeof(u16));
496
David S. Millerf3b96052008-08-18 22:33:05 -0700497 spin_lock(&qdisc_stab_lock);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700498 list_add_tail(&stab->list, &qdisc_stab_list);
David S. Millerf3b96052008-08-18 22:33:05 -0700499 spin_unlock(&qdisc_stab_lock);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700500
501 return stab;
502}
503
Eric Dumazeta2da5702011-01-20 03:48:19 +0000504static void stab_kfree_rcu(struct rcu_head *head)
505{
506 kfree(container_of(head, struct qdisc_size_table, rcu));
507}
508
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700509void qdisc_put_stab(struct qdisc_size_table *tab)
510{
511 if (!tab)
512 return;
513
David S. Millerf3b96052008-08-18 22:33:05 -0700514 spin_lock(&qdisc_stab_lock);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700515
516 if (--tab->refcnt == 0) {
517 list_del(&tab->list);
Eric Dumazeta2da5702011-01-20 03:48:19 +0000518 call_rcu_bh(&tab->rcu, stab_kfree_rcu);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700519 }
520
David S. Millerf3b96052008-08-18 22:33:05 -0700521 spin_unlock(&qdisc_stab_lock);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700522}
523EXPORT_SYMBOL(qdisc_put_stab);
524
525static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab)
526{
527 struct nlattr *nest;
528
529 nest = nla_nest_start(skb, TCA_STAB);
Patrick McHardy3aa46142008-11-20 04:07:14 -0800530 if (nest == NULL)
531 goto nla_put_failure;
David S. Miller1b34ec42012-03-29 05:11:39 -0400532 if (nla_put(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts))
533 goto nla_put_failure;
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700534 nla_nest_end(skb, nest);
535
536 return skb->len;
537
538nla_put_failure:
539 return -1;
540}
541
Eric Dumazeta2da5702011-01-20 03:48:19 +0000542void __qdisc_calculate_pkt_len(struct sk_buff *skb, const struct qdisc_size_table *stab)
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700543{
544 int pkt_len, slot;
545
546 pkt_len = skb->len + stab->szopts.overhead;
547 if (unlikely(!stab->szopts.tsize))
548 goto out;
549
550 slot = pkt_len + stab->szopts.cell_align;
551 if (unlikely(slot < 0))
552 slot = 0;
553
554 slot >>= stab->szopts.cell_log;
555 if (likely(slot < stab->szopts.tsize))
556 pkt_len = stab->data[slot];
557 else
558 pkt_len = stab->data[stab->szopts.tsize - 1] *
559 (slot / stab->szopts.tsize) +
560 stab->data[slot % stab->szopts.tsize];
561
562 pkt_len <<= stab->szopts.size_log;
563out:
564 if (unlikely(pkt_len < 1))
565 pkt_len = 1;
566 qdisc_skb_cb(skb)->pkt_len = pkt_len;
567}
Eric Dumazeta2da5702011-01-20 03:48:19 +0000568EXPORT_SYMBOL(__qdisc_calculate_pkt_len);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700569
Florian Westphal6e765a02014-06-11 20:35:18 +0200570void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc)
Jarek Poplawskib00355d2009-02-01 01:12:42 -0800571{
572 if (!(qdisc->flags & TCQ_F_WARN_NONWC)) {
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000573 pr_warn("%s: %s qdisc %X: is non-work-conserving?\n",
574 txt, qdisc->ops->id, qdisc->handle >> 16);
Jarek Poplawskib00355d2009-02-01 01:12:42 -0800575 qdisc->flags |= TCQ_F_WARN_NONWC;
576 }
577}
578EXPORT_SYMBOL(qdisc_warn_nonwc);
579
Patrick McHardy41794772007-03-16 01:19:15 -0700580static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
581{
582 struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
David S. Miller2fbd3da2009-09-01 17:59:25 -0700583 timer);
Patrick McHardy41794772007-03-16 01:19:15 -0700584
John Fastabend1e203c12014-10-02 22:43:09 -0700585 rcu_read_lock();
Eric Dumazetfd245a42011-01-20 05:27:16 +0000586 qdisc_unthrottled(wd->qdisc);
David S. Miller8608db02008-08-18 20:51:18 -0700587 __netif_schedule(qdisc_root(wd->qdisc));
John Fastabend1e203c12014-10-02 22:43:09 -0700588 rcu_read_unlock();
Stephen Hemminger19365022007-03-22 12:18:35 -0700589
Patrick McHardy41794772007-03-16 01:19:15 -0700590 return HRTIMER_NORESTART;
591}
592
593void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
594{
Eric Dumazet4a8e3202014-09-20 18:01:30 -0700595 hrtimer_init(&wd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
David S. Miller2fbd3da2009-09-01 17:59:25 -0700596 wd->timer.function = qdisc_watchdog;
Patrick McHardy41794772007-03-16 01:19:15 -0700597 wd->qdisc = qdisc;
598}
599EXPORT_SYMBOL(qdisc_watchdog_init);
600
Eric Dumazetf2600cf2014-10-04 10:11:31 -0700601void qdisc_watchdog_schedule_ns(struct qdisc_watchdog *wd, u64 expires, bool throttle)
Patrick McHardy41794772007-03-16 01:19:15 -0700602{
Jarek Poplawski2540e052008-08-21 05:11:14 -0700603 if (test_bit(__QDISC_STATE_DEACTIVATED,
604 &qdisc_root_sleeping(wd->qdisc)->state))
605 return;
606
Eric Dumazetf2600cf2014-10-04 10:11:31 -0700607 if (throttle)
608 qdisc_throttled(wd->qdisc);
Eric Dumazet46baac32012-10-20 00:40:51 +0000609
Eric Dumazeta9efad82016-05-23 14:24:56 -0700610 if (wd->last_expires == expires)
611 return;
612
613 wd->last_expires = expires;
Eric Dumazet46baac32012-10-20 00:40:51 +0000614 hrtimer_start(&wd->timer,
Jiri Pirko34c5d292013-02-12 00:12:04 +0000615 ns_to_ktime(expires),
Eric Dumazet4a8e3202014-09-20 18:01:30 -0700616 HRTIMER_MODE_ABS_PINNED);
Patrick McHardy41794772007-03-16 01:19:15 -0700617}
Jiri Pirko34c5d292013-02-12 00:12:04 +0000618EXPORT_SYMBOL(qdisc_watchdog_schedule_ns);
Patrick McHardy41794772007-03-16 01:19:15 -0700619
620void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
621{
David S. Miller2fbd3da2009-09-01 17:59:25 -0700622 hrtimer_cancel(&wd->timer);
Eric Dumazetfd245a42011-01-20 05:27:16 +0000623 qdisc_unthrottled(wd->qdisc);
Patrick McHardy41794772007-03-16 01:19:15 -0700624}
625EXPORT_SYMBOL(qdisc_watchdog_cancel);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700626
Adrian Bunka94f7792008-07-22 14:20:11 -0700627static struct hlist_head *qdisc_class_hash_alloc(unsigned int n)
Patrick McHardy6fe1c7a2008-07-05 23:21:31 -0700628{
629 unsigned int size = n * sizeof(struct hlist_head), i;
630 struct hlist_head *h;
631
632 if (size <= PAGE_SIZE)
633 h = kmalloc(size, GFP_KERNEL);
634 else
635 h = (struct hlist_head *)
636 __get_free_pages(GFP_KERNEL, get_order(size));
637
638 if (h != NULL) {
639 for (i = 0; i < n; i++)
640 INIT_HLIST_HEAD(&h[i]);
641 }
642 return h;
643}
644
645static void qdisc_class_hash_free(struct hlist_head *h, unsigned int n)
646{
647 unsigned int size = n * sizeof(struct hlist_head);
648
649 if (size <= PAGE_SIZE)
650 kfree(h);
651 else
652 free_pages((unsigned long)h, get_order(size));
653}
654
655void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash)
656{
657 struct Qdisc_class_common *cl;
Sasha Levinb67bfe02013-02-27 17:06:00 -0800658 struct hlist_node *next;
Patrick McHardy6fe1c7a2008-07-05 23:21:31 -0700659 struct hlist_head *nhash, *ohash;
660 unsigned int nsize, nmask, osize;
661 unsigned int i, h;
662
663 /* Rehash when load factor exceeds 0.75 */
664 if (clhash->hashelems * 4 <= clhash->hashsize * 3)
665 return;
666 nsize = clhash->hashsize * 2;
667 nmask = nsize - 1;
668 nhash = qdisc_class_hash_alloc(nsize);
669 if (nhash == NULL)
670 return;
671
672 ohash = clhash->hash;
673 osize = clhash->hashsize;
674
675 sch_tree_lock(sch);
676 for (i = 0; i < osize; i++) {
Sasha Levinb67bfe02013-02-27 17:06:00 -0800677 hlist_for_each_entry_safe(cl, next, &ohash[i], hnode) {
Patrick McHardy6fe1c7a2008-07-05 23:21:31 -0700678 h = qdisc_class_hash(cl->classid, nmask);
679 hlist_add_head(&cl->hnode, &nhash[h]);
680 }
681 }
682 clhash->hash = nhash;
683 clhash->hashsize = nsize;
684 clhash->hashmask = nmask;
685 sch_tree_unlock(sch);
686
687 qdisc_class_hash_free(ohash, osize);
688}
689EXPORT_SYMBOL(qdisc_class_hash_grow);
690
691int qdisc_class_hash_init(struct Qdisc_class_hash *clhash)
692{
693 unsigned int size = 4;
694
695 clhash->hash = qdisc_class_hash_alloc(size);
696 if (clhash->hash == NULL)
697 return -ENOMEM;
698 clhash->hashsize = size;
699 clhash->hashmask = size - 1;
700 clhash->hashelems = 0;
701 return 0;
702}
703EXPORT_SYMBOL(qdisc_class_hash_init);
704
705void qdisc_class_hash_destroy(struct Qdisc_class_hash *clhash)
706{
707 qdisc_class_hash_free(clhash->hash, clhash->hashsize);
708}
709EXPORT_SYMBOL(qdisc_class_hash_destroy);
710
711void qdisc_class_hash_insert(struct Qdisc_class_hash *clhash,
712 struct Qdisc_class_common *cl)
713{
714 unsigned int h;
715
716 INIT_HLIST_NODE(&cl->hnode);
717 h = qdisc_class_hash(cl->classid, clhash->hashmask);
718 hlist_add_head(&cl->hnode, &clhash->hash[h]);
719 clhash->hashelems++;
720}
721EXPORT_SYMBOL(qdisc_class_hash_insert);
722
723void qdisc_class_hash_remove(struct Qdisc_class_hash *clhash,
724 struct Qdisc_class_common *cl)
725{
726 hlist_del(&cl->hnode);
727 clhash->hashelems--;
728}
729EXPORT_SYMBOL(qdisc_class_hash_remove);
730
Eric Dumazetfa0f5aa2012-01-03 00:00:11 +0000731/* Allocate an unique handle from space managed by kernel
732 * Possible range is [8000-FFFF]:0000 (0x8000 values)
733 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700734static u32 qdisc_alloc_handle(struct net_device *dev)
735{
Eric Dumazetfa0f5aa2012-01-03 00:00:11 +0000736 int i = 0x8000;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700737 static u32 autohandle = TC_H_MAKE(0x80000000U, 0);
738
739 do {
740 autohandle += TC_H_MAKE(0x10000U, 0);
741 if (autohandle == TC_H_MAKE(TC_H_ROOT, 0))
742 autohandle = TC_H_MAKE(0x80000000U, 0);
Eric Dumazetfa0f5aa2012-01-03 00:00:11 +0000743 if (!qdisc_lookup(dev, autohandle))
744 return autohandle;
745 cond_resched();
746 } while (--i > 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700747
Eric Dumazetfa0f5aa2012-01-03 00:00:11 +0000748 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700749}
750
WANG Cong2ccccf52016-02-25 14:55:01 -0800751void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n,
752 unsigned int len)
Patrick McHardy43effa12006-11-29 17:35:48 -0800753{
Eric Dumazet20fea082007-11-14 01:44:41 -0800754 const struct Qdisc_class_ops *cops;
Patrick McHardy43effa12006-11-29 17:35:48 -0800755 unsigned long cl;
756 u32 parentid;
Eric Dumazet2c8c8e62013-10-07 08:32:32 -0700757 int drops;
Patrick McHardy43effa12006-11-29 17:35:48 -0800758
WANG Cong2ccccf52016-02-25 14:55:01 -0800759 if (n == 0 && len == 0)
Patrick McHardy43effa12006-11-29 17:35:48 -0800760 return;
Eric Dumazet2c8c8e62013-10-07 08:32:32 -0700761 drops = max_t(int, n, 0);
Eric Dumazet4eaf3b82015-12-01 20:08:51 -0800762 rcu_read_lock();
Patrick McHardy43effa12006-11-29 17:35:48 -0800763 while ((parentid = sch->parent)) {
Jarek Poplawski066a3b52008-04-14 15:10:42 -0700764 if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS))
Eric Dumazet4eaf3b82015-12-01 20:08:51 -0800765 break;
Jarek Poplawski066a3b52008-04-14 15:10:42 -0700766
Eric Dumazet4eaf3b82015-12-01 20:08:51 -0800767 if (sch->flags & TCQ_F_NOPARENT)
768 break;
769 /* TODO: perform the search on a per txq basis */
David S. Miller5ce2d482008-07-08 17:06:30 -0700770 sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
Patrick McHardyffc8fef2007-07-30 17:11:50 -0700771 if (sch == NULL) {
Eric Dumazet4eaf3b82015-12-01 20:08:51 -0800772 WARN_ON_ONCE(parentid != TC_H_ROOT);
773 break;
Patrick McHardyffc8fef2007-07-30 17:11:50 -0700774 }
Patrick McHardy43effa12006-11-29 17:35:48 -0800775 cops = sch->ops->cl_ops;
776 if (cops->qlen_notify) {
777 cl = cops->get(sch, parentid);
778 cops->qlen_notify(sch, cl);
779 cops->put(sch, cl);
780 }
781 sch->q.qlen -= n;
WANG Cong2ccccf52016-02-25 14:55:01 -0800782 sch->qstats.backlog -= len;
John Fastabend25331d62014-09-28 11:53:29 -0700783 __qdisc_qstats_drop(sch, drops);
Patrick McHardy43effa12006-11-29 17:35:48 -0800784 }
Eric Dumazet4eaf3b82015-12-01 20:08:51 -0800785 rcu_read_unlock();
Patrick McHardy43effa12006-11-29 17:35:48 -0800786}
WANG Cong2ccccf52016-02-25 14:55:01 -0800787EXPORT_SYMBOL(qdisc_tree_reduce_backlog);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700788
Tom Goff7316ae82010-03-19 15:40:13 +0000789static void notify_and_destroy(struct net *net, struct sk_buff *skb,
790 struct nlmsghdr *n, u32 clid,
David S. Miller99194cf2008-07-17 04:54:10 -0700791 struct Qdisc *old, struct Qdisc *new)
792{
793 if (new || old)
Tom Goff7316ae82010-03-19 15:40:13 +0000794 qdisc_notify(net, skb, n, clid, old, new);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700795
David S. Miller4d8863a2008-08-18 21:03:15 -0700796 if (old)
David S. Miller99194cf2008-07-17 04:54:10 -0700797 qdisc_destroy(old);
David S. Miller99194cf2008-07-17 04:54:10 -0700798}
799
800/* Graft qdisc "new" to class "classid" of qdisc "parent" or
801 * to device "dev".
802 *
803 * When appropriate send a netlink notification using 'skb'
804 * and "n".
805 *
806 * On success, destroy old qdisc.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700807 */
808
809static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
David S. Miller99194cf2008-07-17 04:54:10 -0700810 struct sk_buff *skb, struct nlmsghdr *n, u32 classid,
811 struct Qdisc *new, struct Qdisc *old)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700812{
David S. Miller99194cf2008-07-17 04:54:10 -0700813 struct Qdisc *q = old;
Tom Goff7316ae82010-03-19 15:40:13 +0000814 struct net *net = dev_net(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700815 int err = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700816
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900817 if (parent == NULL) {
David S. Miller99194cf2008-07-17 04:54:10 -0700818 unsigned int i, num_q, ingress;
819
820 ingress = 0;
821 num_q = dev->num_tx_queues;
David S. Miller8d50b532008-07-30 02:37:46 -0700822 if ((q && q->flags & TCQ_F_INGRESS) ||
823 (new && new->flags & TCQ_F_INGRESS)) {
David S. Miller99194cf2008-07-17 04:54:10 -0700824 num_q = 1;
825 ingress = 1;
Eric Dumazet24824a02010-10-02 06:11:55 +0000826 if (!dev_ingress_queue(dev))
827 return -ENOENT;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700828 }
David S. Miller99194cf2008-07-17 04:54:10 -0700829
830 if (dev->flags & IFF_UP)
831 dev_deactivate(dev);
832
WANG Cong86e363d2015-05-26 16:08:48 -0700833 if (new && new->ops->attach)
834 goto skip;
David S. Miller6ec1c692009-09-06 01:58:51 -0700835
David S. Miller99194cf2008-07-17 04:54:10 -0700836 for (i = 0; i < num_q; i++) {
Eric Dumazet24824a02010-10-02 06:11:55 +0000837 struct netdev_queue *dev_queue = dev_ingress_queue(dev);
David S. Miller99194cf2008-07-17 04:54:10 -0700838
839 if (!ingress)
840 dev_queue = netdev_get_tx_queue(dev, i);
841
David S. Miller8d50b532008-07-30 02:37:46 -0700842 old = dev_graft_qdisc(dev_queue, new);
843 if (new && i > 0)
844 atomic_inc(&new->refcnt);
845
Jarek Poplawski036d6a62009-09-13 22:35:44 +0000846 if (!ingress)
847 qdisc_destroy(old);
David S. Miller99194cf2008-07-17 04:54:10 -0700848 }
849
WANG Cong86e363d2015-05-26 16:08:48 -0700850skip:
Jarek Poplawski036d6a62009-09-13 22:35:44 +0000851 if (!ingress) {
Tom Goff7316ae82010-03-19 15:40:13 +0000852 notify_and_destroy(net, skb, n, classid,
853 dev->qdisc, new);
Jarek Poplawski036d6a62009-09-13 22:35:44 +0000854 if (new && !new->ops->attach)
855 atomic_inc(&new->refcnt);
856 dev->qdisc = new ? : &noop_qdisc;
WANG Cong86e363d2015-05-26 16:08:48 -0700857
858 if (new && new->ops->attach)
859 new->ops->attach(new);
Jarek Poplawski036d6a62009-09-13 22:35:44 +0000860 } else {
Tom Goff7316ae82010-03-19 15:40:13 +0000861 notify_and_destroy(net, skb, n, classid, old, new);
Jarek Poplawski036d6a62009-09-13 22:35:44 +0000862 }
Patrick McHardyaf356af2009-09-04 06:41:18 +0000863
David S. Miller99194cf2008-07-17 04:54:10 -0700864 if (dev->flags & IFF_UP)
865 dev_activate(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700866 } else {
Eric Dumazet20fea082007-11-14 01:44:41 -0800867 const struct Qdisc_class_ops *cops = parent->ops->cl_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700868
Patrick McHardyc9f1d032009-09-04 06:41:13 +0000869 err = -EOPNOTSUPP;
870 if (cops && cops->graft) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700871 unsigned long cl = cops->get(parent, classid);
872 if (cl) {
David S. Miller99194cf2008-07-17 04:54:10 -0700873 err = cops->graft(parent, cl, new, &old);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700874 cops->put(parent, cl);
Patrick McHardyc9f1d032009-09-04 06:41:13 +0000875 } else
876 err = -ENOENT;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700877 }
David S. Miller99194cf2008-07-17 04:54:10 -0700878 if (!err)
Tom Goff7316ae82010-03-19 15:40:13 +0000879 notify_and_destroy(net, skb, n, classid, old, new);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700880 }
881 return err;
882}
883
Jarek Poplawski25bfcd52008-08-18 20:53:34 -0700884/* lockdep annotation is needed for ingress; egress gets it only for name */
885static struct lock_class_key qdisc_tx_lock;
886static struct lock_class_key qdisc_rx_lock;
887
Linus Torvalds1da177e2005-04-16 15:20:36 -0700888/*
889 Allocate and initialize new qdisc.
890
891 Parameters are passed via opt.
892 */
893
894static struct Qdisc *
David S. Millerbb949fb2008-07-08 16:55:56 -0700895qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
Patrick McHardy23bcf632009-09-09 18:11:23 -0700896 struct Qdisc *p, u32 parent, u32 handle,
897 struct nlattr **tca, int *errp)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700898{
899 int err;
Patrick McHardy1e904742008-01-22 22:11:17 -0800900 struct nlattr *kind = tca[TCA_KIND];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700901 struct Qdisc *sch;
902 struct Qdisc_ops *ops;
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700903 struct qdisc_size_table *stab;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700904
905 ops = qdisc_lookup_ops(kind);
Johannes Berg95a5afc2008-10-16 15:24:51 -0700906#ifdef CONFIG_MODULES
Linus Torvalds1da177e2005-04-16 15:20:36 -0700907 if (ops == NULL && kind != NULL) {
908 char name[IFNAMSIZ];
Patrick McHardy1e904742008-01-22 22:11:17 -0800909 if (nla_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700910 /* We dropped the RTNL semaphore in order to
911 * perform the module load. So, even if we
912 * succeeded in loading the module we have to
913 * tell the caller to replay the request. We
914 * indicate this using -EAGAIN.
915 * We replay the request because the device may
916 * go away in the mean time.
917 */
918 rtnl_unlock();
919 request_module("sch_%s", name);
920 rtnl_lock();
921 ops = qdisc_lookup_ops(kind);
922 if (ops != NULL) {
923 /* We will try again qdisc_lookup_ops,
924 * so don't keep a reference.
925 */
926 module_put(ops->owner);
927 err = -EAGAIN;
928 goto err_out;
929 }
930 }
931 }
932#endif
933
Jamal Hadi Salimb9e2cc02006-08-03 16:36:51 -0700934 err = -ENOENT;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700935 if (ops == NULL)
936 goto err_out;
937
David S. Miller5ce2d482008-07-08 17:06:30 -0700938 sch = qdisc_alloc(dev_queue, ops);
Thomas Graf3d54b822005-07-05 14:15:09 -0700939 if (IS_ERR(sch)) {
940 err = PTR_ERR(sch);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700941 goto err_out2;
Thomas Graf3d54b822005-07-05 14:15:09 -0700942 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700943
Patrick McHardyffc8fef2007-07-30 17:11:50 -0700944 sch->parent = parent;
945
Thomas Graf3d54b822005-07-05 14:15:09 -0700946 if (handle == TC_H_INGRESS) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700947 sch->flags |= TCQ_F_INGRESS;
Thomas Graf3d54b822005-07-05 14:15:09 -0700948 handle = TC_H_MAKE(TC_H_INGRESS, 0);
Jarek Poplawski25bfcd52008-08-18 20:53:34 -0700949 lockdep_set_class(qdisc_lock(sch), &qdisc_rx_lock);
Patrick McHardyfd44de72007-04-16 17:07:08 -0700950 } else {
Patrick McHardyfd44de72007-04-16 17:07:08 -0700951 if (handle == 0) {
952 handle = qdisc_alloc_handle(dev);
953 err = -ENOMEM;
954 if (handle == 0)
955 goto err_out3;
956 }
Jarek Poplawski25bfcd52008-08-18 20:53:34 -0700957 lockdep_set_class(qdisc_lock(sch), &qdisc_tx_lock);
Eric Dumazet1abbe132012-12-11 15:54:33 +0000958 if (!netif_is_multiqueue(dev))
Eric Dumazet225734d2015-12-15 09:43:12 -0800959 sch->flags |= TCQ_F_ONETXQUEUE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700960 }
961
Thomas Graf3d54b822005-07-05 14:15:09 -0700962 sch->handle = handle;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700963
Patrick McHardy1e904742008-01-22 22:11:17 -0800964 if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS])) == 0) {
John Fastabend22e0f8b2014-09-28 11:52:56 -0700965 if (qdisc_is_percpu_stats(sch)) {
966 sch->cpu_bstats =
Sabrina Dubroca7c1c97d2014-10-21 11:23:30 +0200967 netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu);
John Fastabend22e0f8b2014-09-28 11:52:56 -0700968 if (!sch->cpu_bstats)
969 goto err_out4;
John Fastabendb0ab6f92014-09-28 11:54:24 -0700970
971 sch->cpu_qstats = alloc_percpu(struct gnet_stats_queue);
972 if (!sch->cpu_qstats)
973 goto err_out4;
John Fastabend22e0f8b2014-09-28 11:52:56 -0700974 }
975
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700976 if (tca[TCA_STAB]) {
977 stab = qdisc_get_stab(tca[TCA_STAB]);
978 if (IS_ERR(stab)) {
979 err = PTR_ERR(stab);
Jarek Poplawski7c64b9f2009-09-15 23:42:05 -0700980 goto err_out4;
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700981 }
Eric Dumazeta2da5702011-01-20 03:48:19 +0000982 rcu_assign_pointer(sch->stab, stab);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700983 }
Patrick McHardy1e904742008-01-22 22:11:17 -0800984 if (tca[TCA_RATE]) {
Jarek Poplawskif6f9b932008-08-27 02:25:17 -0700985 spinlock_t *root_lock;
986
Patrick McHardy23bcf632009-09-09 18:11:23 -0700987 err = -EOPNOTSUPP;
988 if (sch->flags & TCQ_F_MQROOT)
989 goto err_out4;
990
Jarek Poplawskif6f9b932008-08-27 02:25:17 -0700991 if ((sch->parent != TC_H_ROOT) &&
Patrick McHardy23bcf632009-09-09 18:11:23 -0700992 !(sch->flags & TCQ_F_INGRESS) &&
993 (!p || !(p->flags & TCQ_F_MQROOT)))
Jarek Poplawskif6f9b932008-08-27 02:25:17 -0700994 root_lock = qdisc_root_sleeping_lock(sch);
995 else
996 root_lock = qdisc_lock(sch);
997
John Fastabend22e0f8b2014-09-28 11:52:56 -0700998 err = gen_new_estimator(&sch->bstats,
999 sch->cpu_bstats,
1000 &sch->rate_est,
1001 root_lock,
1002 tca[TCA_RATE]);
Patrick McHardy23bcf632009-09-09 18:11:23 -07001003 if (err)
1004 goto err_out4;
Thomas Graf023e09a2005-07-05 14:15:53 -07001005 }
Jarek Poplawskif6e0b232008-08-22 03:24:05 -07001006
1007 qdisc_list_add(sch);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001008
Linus Torvalds1da177e2005-04-16 15:20:36 -07001009 return sch;
1010 }
1011err_out3:
1012 dev_put(dev);
Thomas Graf3d54b822005-07-05 14:15:09 -07001013 kfree((char *) sch - sch->padded);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001014err_out2:
1015 module_put(ops->owner);
1016err_out:
1017 *errp = err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001018 return NULL;
Patrick McHardy23bcf632009-09-09 18:11:23 -07001019
1020err_out4:
John Fastabend22e0f8b2014-09-28 11:52:56 -07001021 free_percpu(sch->cpu_bstats);
John Fastabendb0ab6f92014-09-28 11:54:24 -07001022 free_percpu(sch->cpu_qstats);
Patrick McHardy23bcf632009-09-09 18:11:23 -07001023 /*
1024 * Any broken qdiscs that would require a ops->reset() here?
1025 * The qdisc was never in action so it shouldn't be necessary.
1026 */
Eric Dumazeta2da5702011-01-20 03:48:19 +00001027 qdisc_put_stab(rtnl_dereference(sch->stab));
Patrick McHardy23bcf632009-09-09 18:11:23 -07001028 if (ops->destroy)
1029 ops->destroy(sch);
1030 goto err_out3;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001031}
1032
Patrick McHardy1e904742008-01-22 22:11:17 -08001033static int qdisc_change(struct Qdisc *sch, struct nlattr **tca)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001034{
Eric Dumazeta2da5702011-01-20 03:48:19 +00001035 struct qdisc_size_table *ostab, *stab = NULL;
Jussi Kivilinna175f9c12008-07-20 00:08:47 -07001036 int err = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001037
Jussi Kivilinna175f9c12008-07-20 00:08:47 -07001038 if (tca[TCA_OPTIONS]) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001039 if (sch->ops->change == NULL)
1040 return -EINVAL;
Patrick McHardy1e904742008-01-22 22:11:17 -08001041 err = sch->ops->change(sch, tca[TCA_OPTIONS]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001042 if (err)
1043 return err;
1044 }
Jussi Kivilinna175f9c12008-07-20 00:08:47 -07001045
1046 if (tca[TCA_STAB]) {
1047 stab = qdisc_get_stab(tca[TCA_STAB]);
1048 if (IS_ERR(stab))
1049 return PTR_ERR(stab);
1050 }
1051
Eric Dumazeta2da5702011-01-20 03:48:19 +00001052 ostab = rtnl_dereference(sch->stab);
1053 rcu_assign_pointer(sch->stab, stab);
1054 qdisc_put_stab(ostab);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -07001055
Patrick McHardy23bcf632009-09-09 18:11:23 -07001056 if (tca[TCA_RATE]) {
Stephen Hemminger71bcb092008-11-25 21:13:31 -08001057 /* NB: ignores errors from replace_estimator
1058 because change can't be undone. */
Patrick McHardy23bcf632009-09-09 18:11:23 -07001059 if (sch->flags & TCQ_F_MQROOT)
1060 goto out;
John Fastabend22e0f8b2014-09-28 11:52:56 -07001061 gen_replace_estimator(&sch->bstats,
1062 sch->cpu_bstats,
1063 &sch->rate_est,
1064 qdisc_root_sleeping_lock(sch),
1065 tca[TCA_RATE]);
Patrick McHardy23bcf632009-09-09 18:11:23 -07001066 }
1067out:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001068 return 0;
1069}
1070
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001071struct check_loop_arg {
1072 struct qdisc_walker w;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001073 struct Qdisc *p;
1074 int depth;
1075};
1076
1077static int check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w);
1078
1079static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth)
1080{
1081 struct check_loop_arg arg;
1082
1083 if (q->ops->cl_ops == NULL)
1084 return 0;
1085
1086 arg.w.stop = arg.w.skip = arg.w.count = 0;
1087 arg.w.fn = check_loop_fn;
1088 arg.depth = depth;
1089 arg.p = p;
1090 q->ops->cl_ops->walk(q, &arg.w);
1091 return arg.w.stop ? -ELOOP : 0;
1092}
1093
1094static int
1095check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
1096{
1097 struct Qdisc *leaf;
Eric Dumazet20fea082007-11-14 01:44:41 -08001098 const struct Qdisc_class_ops *cops = q->ops->cl_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001099 struct check_loop_arg *arg = (struct check_loop_arg *)w;
1100
1101 leaf = cops->leaf(q, cl);
1102 if (leaf) {
1103 if (leaf == arg->p || arg->depth > 7)
1104 return -ELOOP;
1105 return check_loop(leaf, arg->p, arg->depth + 1);
1106 }
1107 return 0;
1108}
1109
1110/*
1111 * Delete/get qdisc.
1112 */
1113
Thomas Graf661d2962013-03-21 07:45:29 +00001114static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001115{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001116 struct net *net = sock_net(skb->sk);
David S. Miller02ef22c2012-06-26 21:50:05 -07001117 struct tcmsg *tcm = nlmsg_data(n);
Patrick McHardy1e904742008-01-22 22:11:17 -08001118 struct nlattr *tca[TCA_MAX + 1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001119 struct net_device *dev;
Hong zhi guode179c82013-03-25 17:36:33 +00001120 u32 clid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001121 struct Qdisc *q = NULL;
1122 struct Qdisc *p = NULL;
1123 int err;
1124
Stéphane Graber4e8bbb82014-04-30 11:25:43 -04001125 if ((n->nlmsg_type != RTM_GETQDISC) &&
David S. Miller5f013c9b2014-05-12 13:19:14 -04001126 !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
Eric W. Biedermandfc47ef2012-11-16 03:03:00 +00001127 return -EPERM;
1128
Patrick McHardy1e904742008-01-22 22:11:17 -08001129 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
1130 if (err < 0)
1131 return err;
1132
Hong zhi guode179c82013-03-25 17:36:33 +00001133 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1134 if (!dev)
1135 return -ENODEV;
1136
1137 clid = tcm->tcm_parent;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001138 if (clid) {
1139 if (clid != TC_H_ROOT) {
1140 if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001141 p = qdisc_lookup(dev, TC_H_MAJ(clid));
1142 if (!p)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001143 return -ENOENT;
1144 q = qdisc_leaf(p, clid);
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001145 } else if (dev_ingress_queue(dev)) {
1146 q = dev_ingress_queue(dev)->qdisc_sleeping;
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09001147 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001148 } else {
Patrick McHardyaf356af2009-09-04 06:41:18 +00001149 q = dev->qdisc;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001150 }
1151 if (!q)
1152 return -ENOENT;
1153
1154 if (tcm->tcm_handle && q->handle != tcm->tcm_handle)
1155 return -EINVAL;
1156 } else {
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001157 q = qdisc_lookup(dev, tcm->tcm_handle);
1158 if (!q)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001159 return -ENOENT;
1160 }
1161
Patrick McHardy1e904742008-01-22 22:11:17 -08001162 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001163 return -EINVAL;
1164
1165 if (n->nlmsg_type == RTM_DELQDISC) {
1166 if (!clid)
1167 return -EINVAL;
1168 if (q->handle == 0)
1169 return -ENOENT;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001170 err = qdisc_graft(dev, p, skb, n, clid, NULL, q);
1171 if (err != 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001172 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001173 } else {
Tom Goff7316ae82010-03-19 15:40:13 +00001174 qdisc_notify(net, skb, n, clid, NULL, q);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001175 }
1176 return 0;
1177}
1178
1179/*
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001180 * Create/change qdisc.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001181 */
1182
Thomas Graf661d2962013-03-21 07:45:29 +00001183static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001184{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001185 struct net *net = sock_net(skb->sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001186 struct tcmsg *tcm;
Patrick McHardy1e904742008-01-22 22:11:17 -08001187 struct nlattr *tca[TCA_MAX + 1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001188 struct net_device *dev;
1189 u32 clid;
1190 struct Qdisc *q, *p;
1191 int err;
1192
David S. Miller5f013c9b2014-05-12 13:19:14 -04001193 if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
Eric W. Biedermandfc47ef2012-11-16 03:03:00 +00001194 return -EPERM;
1195
Linus Torvalds1da177e2005-04-16 15:20:36 -07001196replay:
1197 /* Reinit, just in case something touches this. */
Hong zhi guode179c82013-03-25 17:36:33 +00001198 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
1199 if (err < 0)
1200 return err;
1201
David S. Miller02ef22c2012-06-26 21:50:05 -07001202 tcm = nlmsg_data(n);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001203 clid = tcm->tcm_parent;
1204 q = p = NULL;
1205
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001206 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1207 if (!dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001208 return -ENODEV;
1209
Patrick McHardy1e904742008-01-22 22:11:17 -08001210
Linus Torvalds1da177e2005-04-16 15:20:36 -07001211 if (clid) {
1212 if (clid != TC_H_ROOT) {
1213 if (clid != TC_H_INGRESS) {
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001214 p = qdisc_lookup(dev, TC_H_MAJ(clid));
1215 if (!p)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001216 return -ENOENT;
1217 q = qdisc_leaf(p, clid);
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001218 } else if (dev_ingress_queue_create(dev)) {
1219 q = dev_ingress_queue(dev)->qdisc_sleeping;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001220 }
1221 } else {
Patrick McHardyaf356af2009-09-04 06:41:18 +00001222 q = dev->qdisc;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001223 }
1224
1225 /* It may be default qdisc, ignore it */
1226 if (q && q->handle == 0)
1227 q = NULL;
1228
1229 if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
1230 if (tcm->tcm_handle) {
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001231 if (q && !(n->nlmsg_flags & NLM_F_REPLACE))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001232 return -EEXIST;
1233 if (TC_H_MIN(tcm->tcm_handle))
1234 return -EINVAL;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001235 q = qdisc_lookup(dev, tcm->tcm_handle);
1236 if (!q)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001237 goto create_n_graft;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001238 if (n->nlmsg_flags & NLM_F_EXCL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001239 return -EEXIST;
Patrick McHardy1e904742008-01-22 22:11:17 -08001240 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001241 return -EINVAL;
1242 if (q == p ||
1243 (p && check_loop(q, p, 0)))
1244 return -ELOOP;
1245 atomic_inc(&q->refcnt);
1246 goto graft;
1247 } else {
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001248 if (!q)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001249 goto create_n_graft;
1250
1251 /* This magic test requires explanation.
1252 *
1253 * We know, that some child q is already
1254 * attached to this parent and have choice:
1255 * either to change it or to create/graft new one.
1256 *
1257 * 1. We are allowed to create/graft only
1258 * if CREATE and REPLACE flags are set.
1259 *
1260 * 2. If EXCL is set, requestor wanted to say,
1261 * that qdisc tcm_handle is not expected
1262 * to exist, so that we choose create/graft too.
1263 *
1264 * 3. The last case is when no flags are set.
1265 * Alas, it is sort of hole in API, we
1266 * cannot decide what to do unambiguously.
1267 * For now we select create/graft, if
1268 * user gave KIND, which does not match existing.
1269 */
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001270 if ((n->nlmsg_flags & NLM_F_CREATE) &&
1271 (n->nlmsg_flags & NLM_F_REPLACE) &&
1272 ((n->nlmsg_flags & NLM_F_EXCL) ||
Patrick McHardy1e904742008-01-22 22:11:17 -08001273 (tca[TCA_KIND] &&
1274 nla_strcmp(tca[TCA_KIND], q->ops->id))))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001275 goto create_n_graft;
1276 }
1277 }
1278 } else {
1279 if (!tcm->tcm_handle)
1280 return -EINVAL;
1281 q = qdisc_lookup(dev, tcm->tcm_handle);
1282 }
1283
1284 /* Change qdisc parameters */
1285 if (q == NULL)
1286 return -ENOENT;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001287 if (n->nlmsg_flags & NLM_F_EXCL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001288 return -EEXIST;
Patrick McHardy1e904742008-01-22 22:11:17 -08001289 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001290 return -EINVAL;
1291 err = qdisc_change(q, tca);
1292 if (err == 0)
Tom Goff7316ae82010-03-19 15:40:13 +00001293 qdisc_notify(net, skb, n, clid, NULL, q);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001294 return err;
1295
1296create_n_graft:
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001297 if (!(n->nlmsg_flags & NLM_F_CREATE))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001298 return -ENOENT;
Eric Dumazet24824a02010-10-02 06:11:55 +00001299 if (clid == TC_H_INGRESS) {
1300 if (dev_ingress_queue(dev))
1301 q = qdisc_create(dev, dev_ingress_queue(dev), p,
1302 tcm->tcm_parent, tcm->tcm_parent,
1303 tca, &err);
1304 else
1305 err = -ENOENT;
1306 } else {
Jarek Poplawski926e61b2009-09-15 02:53:07 -07001307 struct netdev_queue *dev_queue;
David S. Miller6ec1c692009-09-06 01:58:51 -07001308
1309 if (p && p->ops->cl_ops && p->ops->cl_ops->select_queue)
Jarek Poplawski926e61b2009-09-15 02:53:07 -07001310 dev_queue = p->ops->cl_ops->select_queue(p, tcm);
1311 else if (p)
1312 dev_queue = p->dev_queue;
1313 else
1314 dev_queue = netdev_get_tx_queue(dev, 0);
David S. Miller6ec1c692009-09-06 01:58:51 -07001315
Jarek Poplawski926e61b2009-09-15 02:53:07 -07001316 q = qdisc_create(dev, dev_queue, p,
David S. Millerbb949fb2008-07-08 16:55:56 -07001317 tcm->tcm_parent, tcm->tcm_handle,
Patrick McHardyffc8fef2007-07-30 17:11:50 -07001318 tca, &err);
David S. Miller6ec1c692009-09-06 01:58:51 -07001319 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001320 if (q == NULL) {
1321 if (err == -EAGAIN)
1322 goto replay;
1323 return err;
1324 }
1325
1326graft:
Ilpo Järvinene5befbd2008-08-18 22:30:01 -07001327 err = qdisc_graft(dev, p, skb, n, clid, q, NULL);
1328 if (err) {
1329 if (q)
1330 qdisc_destroy(q);
1331 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001332 }
Ilpo Järvinene5befbd2008-08-18 22:30:01 -07001333
Linus Torvalds1da177e2005-04-16 15:20:36 -07001334 return 0;
1335}
1336
1337static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
Eric W. Biederman15e47302012-09-07 20:12:54 +00001338 u32 portid, u32 seq, u16 flags, int event)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001339{
John Fastabend22e0f8b2014-09-28 11:52:56 -07001340 struct gnet_stats_basic_cpu __percpu *cpu_bstats = NULL;
John Fastabendb0ab6f92014-09-28 11:54:24 -07001341 struct gnet_stats_queue __percpu *cpu_qstats = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001342 struct tcmsg *tcm;
1343 struct nlmsghdr *nlh;
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001344 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001345 struct gnet_dump d;
Eric Dumazeta2da5702011-01-20 03:48:19 +00001346 struct qdisc_size_table *stab;
John Fastabend64015852014-09-28 11:53:57 -07001347 __u32 qlen;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001348
Eric Dumazetfba373d2014-03-10 17:11:43 -07001349 cond_resched();
Eric W. Biederman15e47302012-09-07 20:12:54 +00001350 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
David S. Miller02ef22c2012-06-26 21:50:05 -07001351 if (!nlh)
1352 goto out_nlmsg_trim;
1353 tcm = nlmsg_data(nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001354 tcm->tcm_family = AF_UNSPEC;
Patrick McHardy9ef1d4c2005-06-28 12:55:30 -07001355 tcm->tcm__pad1 = 0;
1356 tcm->tcm__pad2 = 0;
David S. Miller5ce2d482008-07-08 17:06:30 -07001357 tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001358 tcm->tcm_parent = clid;
1359 tcm->tcm_handle = q->handle;
1360 tcm->tcm_info = atomic_read(&q->refcnt);
David S. Miller1b34ec42012-03-29 05:11:39 -04001361 if (nla_put_string(skb, TCA_KIND, q->ops->id))
1362 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001363 if (q->ops->dump && q->ops->dump(q, skb) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001364 goto nla_put_failure;
John Fastabend64015852014-09-28 11:53:57 -07001365 qlen = q->q.qlen;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001366
Eric Dumazeta2da5702011-01-20 03:48:19 +00001367 stab = rtnl_dereference(q->stab);
1368 if (stab && qdisc_dump_stab(skb, stab) < 0)
Jussi Kivilinna175f9c12008-07-20 00:08:47 -07001369 goto nla_put_failure;
1370
Jarek Poplawski102396a2008-08-29 14:21:52 -07001371 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
Nicolas Dichtel98545182016-04-26 10:06:18 +02001372 qdisc_root_sleeping_lock(q), &d,
1373 TCA_PAD) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001374 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001375
1376 if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001377 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001378
John Fastabendb0ab6f92014-09-28 11:54:24 -07001379 if (qdisc_is_percpu_stats(q)) {
John Fastabend22e0f8b2014-09-28 11:52:56 -07001380 cpu_bstats = q->cpu_bstats;
John Fastabendb0ab6f92014-09-28 11:54:24 -07001381 cpu_qstats = q->cpu_qstats;
1382 }
John Fastabend22e0f8b2014-09-28 11:52:56 -07001383
1384 if (gnet_stats_copy_basic(&d, cpu_bstats, &q->bstats) < 0 ||
Eric Dumazetd250a5f2009-10-02 10:32:18 +00001385 gnet_stats_copy_rate_est(&d, &q->bstats, &q->rate_est) < 0 ||
John Fastabendb0ab6f92014-09-28 11:54:24 -07001386 gnet_stats_copy_queue(&d, cpu_qstats, &q->qstats, qlen) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001387 goto nla_put_failure;
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09001388
Linus Torvalds1da177e2005-04-16 15:20:36 -07001389 if (gnet_stats_finish_copy(&d) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001390 goto nla_put_failure;
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09001391
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001392 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001393 return skb->len;
1394
David S. Miller02ef22c2012-06-26 21:50:05 -07001395out_nlmsg_trim:
Patrick McHardy1e904742008-01-22 22:11:17 -08001396nla_put_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001397 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001398 return -1;
1399}
1400
Eric Dumazet53b0f082010-05-22 20:37:44 +00001401static bool tc_qdisc_dump_ignore(struct Qdisc *q)
1402{
1403 return (q->flags & TCQ_F_BUILTIN) ? true : false;
1404}
1405
Tom Goff7316ae82010-03-19 15:40:13 +00001406static int qdisc_notify(struct net *net, struct sk_buff *oskb,
1407 struct nlmsghdr *n, u32 clid,
1408 struct Qdisc *old, struct Qdisc *new)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001409{
1410 struct sk_buff *skb;
Eric W. Biederman15e47302012-09-07 20:12:54 +00001411 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001412
1413 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1414 if (!skb)
1415 return -ENOBUFS;
1416
Eric Dumazet53b0f082010-05-22 20:37:44 +00001417 if (old && !tc_qdisc_dump_ignore(old)) {
Eric W. Biederman15e47302012-09-07 20:12:54 +00001418 if (tc_fill_qdisc(skb, old, clid, portid, n->nlmsg_seq,
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001419 0, RTM_DELQDISC) < 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001420 goto err_out;
1421 }
Eric Dumazet53b0f082010-05-22 20:37:44 +00001422 if (new && !tc_qdisc_dump_ignore(new)) {
Eric W. Biederman15e47302012-09-07 20:12:54 +00001423 if (tc_fill_qdisc(skb, new, clid, portid, n->nlmsg_seq,
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001424 old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001425 goto err_out;
1426 }
1427
1428 if (skb->len)
Eric W. Biederman15e47302012-09-07 20:12:54 +00001429 return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001430 n->nlmsg_flags & NLM_F_ECHO);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001431
1432err_out:
1433 kfree_skb(skb);
1434 return -EINVAL;
1435}
1436
David S. Miller30723672008-07-18 22:50:15 -07001437static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
1438 struct netlink_callback *cb,
1439 int *q_idx_p, int s_q_idx)
1440{
1441 int ret = 0, q_idx = *q_idx_p;
1442 struct Qdisc *q;
1443
1444 if (!root)
1445 return 0;
1446
1447 q = root;
1448 if (q_idx < s_q_idx) {
1449 q_idx++;
1450 } else {
1451 if (!tc_qdisc_dump_ignore(q) &&
Eric W. Biederman15e47302012-09-07 20:12:54 +00001452 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
David S. Miller30723672008-07-18 22:50:15 -07001453 cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
1454 goto done;
1455 q_idx++;
1456 }
1457 list_for_each_entry(q, &root->list, list) {
1458 if (q_idx < s_q_idx) {
1459 q_idx++;
1460 continue;
1461 }
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001462 if (!tc_qdisc_dump_ignore(q) &&
Eric W. Biederman15e47302012-09-07 20:12:54 +00001463 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
David S. Miller30723672008-07-18 22:50:15 -07001464 cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
1465 goto done;
1466 q_idx++;
1467 }
1468
1469out:
1470 *q_idx_p = q_idx;
1471 return ret;
1472done:
1473 ret = -1;
1474 goto out;
1475}
1476
Linus Torvalds1da177e2005-04-16 15:20:36 -07001477static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
1478{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001479 struct net *net = sock_net(skb->sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001480 int idx, q_idx;
1481 int s_idx, s_q_idx;
1482 struct net_device *dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001483
1484 s_idx = cb->args[0];
1485 s_q_idx = q_idx = cb->args[1];
stephen hemmingerf1e90162009-11-10 07:54:49 +00001486
Pavel Emelianov7562f872007-05-03 15:13:45 -07001487 idx = 0;
Eric Dumazet15dc36e2014-03-10 17:11:42 -07001488 ASSERT_RTNL();
1489 for_each_netdev(net, dev) {
David S. Miller30723672008-07-18 22:50:15 -07001490 struct netdev_queue *dev_queue;
1491
Linus Torvalds1da177e2005-04-16 15:20:36 -07001492 if (idx < s_idx)
Pavel Emelianov7562f872007-05-03 15:13:45 -07001493 goto cont;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001494 if (idx > s_idx)
1495 s_q_idx = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001496 q_idx = 0;
David S. Miller30723672008-07-18 22:50:15 -07001497
Patrick McHardyaf356af2009-09-04 06:41:18 +00001498 if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx) < 0)
David S. Miller30723672008-07-18 22:50:15 -07001499 goto done;
1500
Eric Dumazet24824a02010-10-02 06:11:55 +00001501 dev_queue = dev_ingress_queue(dev);
1502 if (dev_queue &&
1503 tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb,
1504 &q_idx, s_q_idx) < 0)
David S. Miller30723672008-07-18 22:50:15 -07001505 goto done;
1506
Pavel Emelianov7562f872007-05-03 15:13:45 -07001507cont:
1508 idx++;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001509 }
1510
1511done:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001512 cb->args[0] = idx;
1513 cb->args[1] = q_idx;
1514
1515 return skb->len;
1516}
1517
1518
1519
1520/************************************************
1521 * Traffic classes manipulation. *
1522 ************************************************/
1523
1524
1525
Thomas Graf661d2962013-03-21 07:45:29 +00001526static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001527{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001528 struct net *net = sock_net(skb->sk);
David S. Miller02ef22c2012-06-26 21:50:05 -07001529 struct tcmsg *tcm = nlmsg_data(n);
Patrick McHardy1e904742008-01-22 22:11:17 -08001530 struct nlattr *tca[TCA_MAX + 1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001531 struct net_device *dev;
1532 struct Qdisc *q = NULL;
Eric Dumazet20fea082007-11-14 01:44:41 -08001533 const struct Qdisc_class_ops *cops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001534 unsigned long cl = 0;
1535 unsigned long new_cl;
Hong zhi guode179c82013-03-25 17:36:33 +00001536 u32 portid;
1537 u32 clid;
1538 u32 qid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001539 int err;
1540
Stéphane Graber4e8bbb82014-04-30 11:25:43 -04001541 if ((n->nlmsg_type != RTM_GETTCLASS) &&
David S. Miller5f013c9b2014-05-12 13:19:14 -04001542 !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
Eric W. Biedermandfc47ef2012-11-16 03:03:00 +00001543 return -EPERM;
1544
Patrick McHardy1e904742008-01-22 22:11:17 -08001545 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
1546 if (err < 0)
1547 return err;
1548
Hong zhi guode179c82013-03-25 17:36:33 +00001549 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1550 if (!dev)
1551 return -ENODEV;
1552
Linus Torvalds1da177e2005-04-16 15:20:36 -07001553 /*
1554 parent == TC_H_UNSPEC - unspecified parent.
1555 parent == TC_H_ROOT - class is root, which has no parent.
1556 parent == X:0 - parent is root class.
1557 parent == X:Y - parent is a node in hierarchy.
1558 parent == 0:Y - parent is X:Y, where X:0 is qdisc.
1559
1560 handle == 0:0 - generate handle from kernel pool.
1561 handle == 0:Y - class is X:Y, where X:0 is qdisc.
1562 handle == X:Y - clear.
1563 handle == X:0 - root class.
1564 */
1565
1566 /* Step 1. Determine qdisc handle X:0 */
1567
Hong zhi guode179c82013-03-25 17:36:33 +00001568 portid = tcm->tcm_parent;
1569 clid = tcm->tcm_handle;
1570 qid = TC_H_MAJ(clid);
1571
Eric W. Biederman15e47302012-09-07 20:12:54 +00001572 if (portid != TC_H_ROOT) {
1573 u32 qid1 = TC_H_MAJ(portid);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001574
1575 if (qid && qid1) {
1576 /* If both majors are known, they must be identical. */
1577 if (qid != qid1)
1578 return -EINVAL;
1579 } else if (qid1) {
1580 qid = qid1;
1581 } else if (qid == 0)
Patrick McHardyaf356af2009-09-04 06:41:18 +00001582 qid = dev->qdisc->handle;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001583
1584 /* Now qid is genuine qdisc handle consistent
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001585 * both with parent and child.
1586 *
Eric W. Biederman15e47302012-09-07 20:12:54 +00001587 * TC_H_MAJ(portid) still may be unspecified, complete it now.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001588 */
Eric W. Biederman15e47302012-09-07 20:12:54 +00001589 if (portid)
1590 portid = TC_H_MAKE(qid, portid);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001591 } else {
1592 if (qid == 0)
Patrick McHardyaf356af2009-09-04 06:41:18 +00001593 qid = dev->qdisc->handle;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001594 }
1595
1596 /* OK. Locate qdisc */
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001597 q = qdisc_lookup(dev, qid);
1598 if (!q)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001599 return -ENOENT;
1600
1601 /* An check that it supports classes */
1602 cops = q->ops->cl_ops;
1603 if (cops == NULL)
1604 return -EINVAL;
1605
1606 /* Now try to get class */
1607 if (clid == 0) {
Eric W. Biederman15e47302012-09-07 20:12:54 +00001608 if (portid == TC_H_ROOT)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001609 clid = qid;
1610 } else
1611 clid = TC_H_MAKE(qid, clid);
1612
1613 if (clid)
1614 cl = cops->get(q, clid);
1615
1616 if (cl == 0) {
1617 err = -ENOENT;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001618 if (n->nlmsg_type != RTM_NEWTCLASS ||
1619 !(n->nlmsg_flags & NLM_F_CREATE))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001620 goto out;
1621 } else {
1622 switch (n->nlmsg_type) {
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09001623 case RTM_NEWTCLASS:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001624 err = -EEXIST;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001625 if (n->nlmsg_flags & NLM_F_EXCL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001626 goto out;
1627 break;
1628 case RTM_DELTCLASS:
Patrick McHardyde6d5cd2009-09-04 06:41:16 +00001629 err = -EOPNOTSUPP;
1630 if (cops->delete)
1631 err = cops->delete(q, cl);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001632 if (err == 0)
Tom Goff7316ae82010-03-19 15:40:13 +00001633 tclass_notify(net, skb, n, q, cl, RTM_DELTCLASS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001634 goto out;
1635 case RTM_GETTCLASS:
Tom Goff7316ae82010-03-19 15:40:13 +00001636 err = tclass_notify(net, skb, n, q, cl, RTM_NEWTCLASS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001637 goto out;
1638 default:
1639 err = -EINVAL;
1640 goto out;
1641 }
1642 }
1643
1644 new_cl = cl;
Patrick McHardyde6d5cd2009-09-04 06:41:16 +00001645 err = -EOPNOTSUPP;
1646 if (cops->change)
Eric W. Biederman15e47302012-09-07 20:12:54 +00001647 err = cops->change(q, clid, portid, tca, &new_cl);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001648 if (err == 0)
Tom Goff7316ae82010-03-19 15:40:13 +00001649 tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001650
1651out:
1652 if (cl)
1653 cops->put(q, cl);
1654
1655 return err;
1656}
1657
1658
1659static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
1660 unsigned long cl,
Eric W. Biederman15e47302012-09-07 20:12:54 +00001661 u32 portid, u32 seq, u16 flags, int event)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001662{
1663 struct tcmsg *tcm;
1664 struct nlmsghdr *nlh;
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001665 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001666 struct gnet_dump d;
Eric Dumazet20fea082007-11-14 01:44:41 -08001667 const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001668
Eric Dumazetfba373d2014-03-10 17:11:43 -07001669 cond_resched();
Eric W. Biederman15e47302012-09-07 20:12:54 +00001670 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
David S. Miller02ef22c2012-06-26 21:50:05 -07001671 if (!nlh)
1672 goto out_nlmsg_trim;
1673 tcm = nlmsg_data(nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001674 tcm->tcm_family = AF_UNSPEC;
Eric Dumazet16ebb5e2009-09-02 02:40:09 +00001675 tcm->tcm__pad1 = 0;
1676 tcm->tcm__pad2 = 0;
David S. Miller5ce2d482008-07-08 17:06:30 -07001677 tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001678 tcm->tcm_parent = q->handle;
1679 tcm->tcm_handle = q->handle;
1680 tcm->tcm_info = 0;
David S. Miller1b34ec42012-03-29 05:11:39 -04001681 if (nla_put_string(skb, TCA_KIND, q->ops->id))
1682 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001683 if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001684 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001685
Jarek Poplawski102396a2008-08-29 14:21:52 -07001686 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
Nicolas Dichtel98545182016-04-26 10:06:18 +02001687 qdisc_root_sleeping_lock(q), &d,
1688 TCA_PAD) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001689 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001690
1691 if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001692 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001693
1694 if (gnet_stats_finish_copy(&d) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001695 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001696
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001697 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001698 return skb->len;
1699
David S. Miller02ef22c2012-06-26 21:50:05 -07001700out_nlmsg_trim:
Patrick McHardy1e904742008-01-22 22:11:17 -08001701nla_put_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001702 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001703 return -1;
1704}
1705
Tom Goff7316ae82010-03-19 15:40:13 +00001706static int tclass_notify(struct net *net, struct sk_buff *oskb,
1707 struct nlmsghdr *n, struct Qdisc *q,
1708 unsigned long cl, int event)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001709{
1710 struct sk_buff *skb;
Eric W. Biederman15e47302012-09-07 20:12:54 +00001711 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001712
1713 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1714 if (!skb)
1715 return -ENOBUFS;
1716
Eric W. Biederman15e47302012-09-07 20:12:54 +00001717 if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, event) < 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001718 kfree_skb(skb);
1719 return -EINVAL;
1720 }
1721
Eric W. Biederman15e47302012-09-07 20:12:54 +00001722 return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001723 n->nlmsg_flags & NLM_F_ECHO);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001724}
1725
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001726struct qdisc_dump_args {
1727 struct qdisc_walker w;
1728 struct sk_buff *skb;
1729 struct netlink_callback *cb;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001730};
1731
1732static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walker *arg)
1733{
1734 struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg;
1735
Eric W. Biederman15e47302012-09-07 20:12:54 +00001736 return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).portid,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001737 a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTCLASS);
1738}
1739
David S. Miller30723672008-07-18 22:50:15 -07001740static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb,
1741 struct tcmsg *tcm, struct netlink_callback *cb,
1742 int *t_p, int s_t)
1743{
1744 struct qdisc_dump_args arg;
1745
1746 if (tc_qdisc_dump_ignore(q) ||
1747 *t_p < s_t || !q->ops->cl_ops ||
1748 (tcm->tcm_parent &&
1749 TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
1750 (*t_p)++;
1751 return 0;
1752 }
1753 if (*t_p > s_t)
1754 memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
1755 arg.w.fn = qdisc_class_dump;
1756 arg.skb = skb;
1757 arg.cb = cb;
1758 arg.w.stop = 0;
1759 arg.w.skip = cb->args[1];
1760 arg.w.count = 0;
1761 q->ops->cl_ops->walk(q, &arg.w);
1762 cb->args[1] = arg.w.count;
1763 if (arg.w.stop)
1764 return -1;
1765 (*t_p)++;
1766 return 0;
1767}
1768
1769static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
1770 struct tcmsg *tcm, struct netlink_callback *cb,
1771 int *t_p, int s_t)
1772{
1773 struct Qdisc *q;
1774
1775 if (!root)
1776 return 0;
1777
1778 if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0)
1779 return -1;
1780
1781 list_for_each_entry(q, &root->list, list) {
1782 if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
1783 return -1;
1784 }
1785
1786 return 0;
1787}
1788
Linus Torvalds1da177e2005-04-16 15:20:36 -07001789static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
1790{
David S. Miller02ef22c2012-06-26 21:50:05 -07001791 struct tcmsg *tcm = nlmsg_data(cb->nlh);
David S. Miller30723672008-07-18 22:50:15 -07001792 struct net *net = sock_net(skb->sk);
1793 struct netdev_queue *dev_queue;
1794 struct net_device *dev;
1795 int t, s_t;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001796
Hong zhi guo573ce262013-03-27 06:47:04 +00001797 if (nlmsg_len(cb->nlh) < sizeof(*tcm))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001798 return 0;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001799 dev = dev_get_by_index(net, tcm->tcm_ifindex);
1800 if (!dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001801 return 0;
1802
1803 s_t = cb->args[0];
1804 t = 0;
1805
Patrick McHardyaf356af2009-09-04 06:41:18 +00001806 if (tc_dump_tclass_root(dev->qdisc, skb, tcm, cb, &t, s_t) < 0)
David S. Miller30723672008-07-18 22:50:15 -07001807 goto done;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001808
Eric Dumazet24824a02010-10-02 06:11:55 +00001809 dev_queue = dev_ingress_queue(dev);
1810 if (dev_queue &&
1811 tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb,
1812 &t, s_t) < 0)
David S. Miller30723672008-07-18 22:50:15 -07001813 goto done;
1814
1815done:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001816 cb->args[0] = t;
1817
1818 dev_put(dev);
1819 return skb->len;
1820}
1821
1822/* Main classifier routine: scans classifier chain attached
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001823 * to this qdisc, (optionally) tests for protocol and asks
1824 * specific classifiers.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001825 */
Daniel Borkmann3b3ae882015-08-26 23:00:06 +02001826int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp,
1827 struct tcf_result *res, bool compat_mode)
Patrick McHardy73ca4912007-07-15 00:02:31 -07001828{
Jiri Pirkod8b96052015-01-13 17:13:43 +01001829 __be16 protocol = tc_skb_protocol(skb);
Daniel Borkmann3b3ae882015-08-26 23:00:06 +02001830#ifdef CONFIG_NET_CLS_ACT
1831 const struct tcf_proto *old_tp = tp;
1832 int limit = 0;
Patrick McHardy73ca4912007-07-15 00:02:31 -07001833
Daniel Borkmann3b3ae882015-08-26 23:00:06 +02001834reclassify:
1835#endif
John Fastabend25d8c0d2014-09-12 20:05:27 -07001836 for (; tp; tp = rcu_dereference_bh(tp->next)) {
Daniel Borkmann3b3ae882015-08-26 23:00:06 +02001837 int err;
1838
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001839 if (tp->protocol != protocol &&
1840 tp->protocol != htons(ETH_P_ALL))
1841 continue;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001842
Daniel Borkmann3b3ae882015-08-26 23:00:06 +02001843 err = tp->classify(skb, tp, res);
1844#ifdef CONFIG_NET_CLS_ACT
Daniel Borkmannc1b3b192015-08-28 18:46:39 +02001845 if (unlikely(err == TC_ACT_RECLASSIFY && !compat_mode))
Daniel Borkmann3b3ae882015-08-26 23:00:06 +02001846 goto reset;
1847#endif
Florian Westphale578d9c2015-05-11 19:50:41 +02001848 if (err >= 0)
Patrick McHardy73ca4912007-07-15 00:02:31 -07001849 return err;
Patrick McHardy73ca4912007-07-15 00:02:31 -07001850 }
Daniel Borkmann3b3ae882015-08-26 23:00:06 +02001851
Jamal Hadi Salim7e6e18f2016-02-18 08:04:43 -05001852 return TC_ACT_UNSPEC; /* signal: continue lookup */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001853#ifdef CONFIG_NET_CLS_ACT
Daniel Borkmann3b3ae882015-08-26 23:00:06 +02001854reset:
1855 if (unlikely(limit++ >= MAX_REC_LOOP)) {
Daniel Borkmannc1b3b192015-08-28 18:46:39 +02001856 net_notice_ratelimited("%s: reclassify loop, rule prio %u, protocol %02x\n",
1857 tp->q->ops->id, tp->prio & 0xffff,
1858 ntohs(tp->protocol));
Daniel Borkmann3b3ae882015-08-26 23:00:06 +02001859 return TC_ACT_SHOT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001860 }
Daniel Borkmann3b3ae882015-08-26 23:00:06 +02001861
1862 tp = old_tp;
Jamal Hadi Salim619fe322016-02-18 07:38:04 -05001863 protocol = tc_skb_protocol(skb);
Daniel Borkmann3b3ae882015-08-26 23:00:06 +02001864 goto reclassify;
Patrick McHardy73ca4912007-07-15 00:02:31 -07001865#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001866}
Patrick McHardy73ca4912007-07-15 00:02:31 -07001867EXPORT_SYMBOL(tc_classify);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001868
Cong Wang1e052be2015-03-06 11:47:59 -08001869bool tcf_destroy(struct tcf_proto *tp, bool force)
Patrick McHardya48b5a62007-03-23 11:29:43 -07001870{
Cong Wang1e052be2015-03-06 11:47:59 -08001871 if (tp->ops->destroy(tp, force)) {
1872 module_put(tp->ops->owner);
1873 kfree_rcu(tp, rcu);
1874 return true;
1875 }
1876
1877 return false;
Patrick McHardya48b5a62007-03-23 11:29:43 -07001878}
1879
John Fastabend25d8c0d2014-09-12 20:05:27 -07001880void tcf_destroy_chain(struct tcf_proto __rcu **fl)
Patrick McHardya48b5a62007-03-23 11:29:43 -07001881{
1882 struct tcf_proto *tp;
1883
John Fastabend25d8c0d2014-09-12 20:05:27 -07001884 while ((tp = rtnl_dereference(*fl)) != NULL) {
1885 RCU_INIT_POINTER(*fl, tp->next);
Cong Wang1e052be2015-03-06 11:47:59 -08001886 tcf_destroy(tp, true);
Patrick McHardya48b5a62007-03-23 11:29:43 -07001887 }
1888}
1889EXPORT_SYMBOL(tcf_destroy_chain);
1890
Linus Torvalds1da177e2005-04-16 15:20:36 -07001891#ifdef CONFIG_PROC_FS
1892static int psched_show(struct seq_file *seq, void *v)
1893{
1894 seq_printf(seq, "%08x %08x %08x %08x\n",
Jarek Poplawskica44d6e2009-06-15 02:31:47 -07001895 (u32)NSEC_PER_USEC, (u32)PSCHED_TICKS2NS(1),
Patrick McHardy514bca32007-03-16 12:34:52 -07001896 1000000,
Thomas Gleixner1e317682015-04-14 21:08:28 +00001897 (u32)NSEC_PER_SEC / hrtimer_resolution);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001898
1899 return 0;
1900}
1901
1902static int psched_open(struct inode *inode, struct file *file)
1903{
Tom Goff7e5ab152010-03-30 19:44:56 -07001904 return single_open(file, psched_show, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001905}
1906
Arjan van de Venda7071d2007-02-12 00:55:36 -08001907static const struct file_operations psched_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001908 .owner = THIS_MODULE,
1909 .open = psched_open,
1910 .read = seq_read,
1911 .llseek = seq_lseek,
1912 .release = single_release,
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09001913};
Tom Goff7316ae82010-03-19 15:40:13 +00001914
1915static int __net_init psched_net_init(struct net *net)
1916{
1917 struct proc_dir_entry *e;
1918
Gao fengd4beaa62013-02-18 01:34:54 +00001919 e = proc_create("psched", 0, net->proc_net, &psched_fops);
Tom Goff7316ae82010-03-19 15:40:13 +00001920 if (e == NULL)
1921 return -ENOMEM;
1922
1923 return 0;
1924}
1925
1926static void __net_exit psched_net_exit(struct net *net)
1927{
Gao fengece31ff2013-02-18 01:34:56 +00001928 remove_proc_entry("psched", net->proc_net);
Tom Goff7316ae82010-03-19 15:40:13 +00001929}
1930#else
1931static int __net_init psched_net_init(struct net *net)
1932{
1933 return 0;
1934}
1935
1936static void __net_exit psched_net_exit(struct net *net)
1937{
1938}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001939#endif
1940
Tom Goff7316ae82010-03-19 15:40:13 +00001941static struct pernet_operations psched_net_ops = {
1942 .init = psched_net_init,
1943 .exit = psched_net_exit,
1944};
1945
Linus Torvalds1da177e2005-04-16 15:20:36 -07001946static int __init pktsched_init(void)
1947{
Tom Goff7316ae82010-03-19 15:40:13 +00001948 int err;
1949
1950 err = register_pernet_subsys(&psched_net_ops);
1951 if (err) {
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001952 pr_err("pktsched_init: "
Tom Goff7316ae82010-03-19 15:40:13 +00001953 "cannot initialize per netns operations\n");
1954 return err;
1955 }
1956
stephen hemminger6da7c8f2013-08-27 16:19:08 -07001957 register_qdisc(&pfifo_fast_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001958 register_qdisc(&pfifo_qdisc_ops);
1959 register_qdisc(&bfifo_qdisc_ops);
Hagen Paul Pfeifer57dbb2d2010-01-24 12:30:59 +00001960 register_qdisc(&pfifo_head_drop_qdisc_ops);
David S. Miller6ec1c692009-09-06 01:58:51 -07001961 register_qdisc(&mq_qdisc_ops);
Phil Sutterd66d6c32015-08-27 21:21:38 +02001962 register_qdisc(&noqueue_qdisc_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001963
Greg Rosec7ac8672011-06-10 01:27:09 +00001964 rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL, NULL);
1965 rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL, NULL);
1966 rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc, NULL);
1967 rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL, NULL);
1968 rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL, NULL);
1969 rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass, NULL);
Thomas Grafbe577dd2007-03-22 11:55:50 -07001970
Linus Torvalds1da177e2005-04-16 15:20:36 -07001971 return 0;
1972}
1973
1974subsys_initcall(pktsched_init);