blob: 12ebde84552337ec7248e56fe1a64edc0227d7ec [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * net/sched/sch_api.c Packet scheduler API.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 *
11 * Fixes:
12 *
13 * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired.
14 * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
15 * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support
16 */
17
Linus Torvalds1da177e2005-04-16 15:20:36 -070018#include <linux/module.h>
19#include <linux/types.h>
20#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070021#include <linux/string.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070022#include <linux/errno.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070023#include <linux/skbuff.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070024#include <linux/init.h>
25#include <linux/proc_fs.h>
26#include <linux/seq_file.h>
27#include <linux/kmod.h>
28#include <linux/list.h>
Patrick McHardy41794772007-03-16 01:19:15 -070029#include <linux/hrtimer.h>
Jarek Poplawski25bfcd52008-08-18 20:53:34 -070030#include <linux/lockdep.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090031#include <linux/slab.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070032
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020033#include <net/net_namespace.h>
Denis V. Lunevb8542722007-12-01 00:21:31 +110034#include <net/sock.h>
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -070035#include <net/netlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070036#include <net/pkt_sched.h>
37
Tom Goff7316ae82010-03-19 15:40:13 +000038static int qdisc_notify(struct net *net, struct sk_buff *oskb,
39 struct nlmsghdr *n, u32 clid,
Linus Torvalds1da177e2005-04-16 15:20:36 -070040 struct Qdisc *old, struct Qdisc *new);
Tom Goff7316ae82010-03-19 15:40:13 +000041static int tclass_notify(struct net *net, struct sk_buff *oskb,
42 struct nlmsghdr *n, struct Qdisc *q,
43 unsigned long cl, int event);
Linus Torvalds1da177e2005-04-16 15:20:36 -070044
45/*
46
47 Short review.
48 -------------
49
50 This file consists of two interrelated parts:
51
52 1. queueing disciplines manager frontend.
53 2. traffic classes manager frontend.
54
55 Generally, queueing discipline ("qdisc") is a black box,
56 which is able to enqueue packets and to dequeue them (when
57 device is ready to send something) in order and at times
58 determined by algorithm hidden in it.
59
60 qdisc's are divided to two categories:
61 - "queues", which have no internal structure visible from outside.
62 - "schedulers", which split all the packets to "traffic classes",
63 using "packet classifiers" (look at cls_api.c)
64
65 In turn, classes may have child qdiscs (as rule, queues)
66 attached to them etc. etc. etc.
67
68 The goal of the routines in this file is to translate
69 information supplied by user in the form of handles
70 to more intelligible for kernel form, to make some sanity
71 checks and part of work, which is common to all qdiscs
72 and to provide rtnetlink notifications.
73
74 All real intelligent work is done inside qdisc modules.
75
76
77
78 Every discipline has two major routines: enqueue and dequeue.
79
80 ---dequeue
81
82 dequeue usually returns a skb to send. It is allowed to return NULL,
83 but it does not mean that queue is empty, it just means that
84 discipline does not want to send anything this time.
85 Queue is really empty if q->q.qlen == 0.
86 For complicated disciplines with multiple queues q->q is not
87 real packet queue, but however q->q.qlen must be valid.
88
89 ---enqueue
90
91 enqueue returns 0, if packet was enqueued successfully.
92 If packet (this one or another one) was dropped, it returns
93 not zero error code.
94 NET_XMIT_DROP - this packet dropped
95 Expected action: do not backoff, but wait until queue will clear.
96 NET_XMIT_CN - probably this packet enqueued, but another one dropped.
97 Expected action: backoff or ignore
Linus Torvalds1da177e2005-04-16 15:20:36 -070098
99 Auxiliary routines:
100
Jarek Poplawski99c0db22008-10-31 00:45:27 -0700101 ---peek
102
103 like dequeue but without removing a packet from the queue
104
Linus Torvalds1da177e2005-04-16 15:20:36 -0700105 ---reset
106
107 returns qdisc to initial state: purge all buffers, clear all
108 timers, counters (except for statistics) etc.
109
110 ---init
111
112 initializes newly created qdisc.
113
114 ---destroy
115
116 destroys resources allocated by init and during lifetime of qdisc.
117
118 ---change
119
120 changes qdisc parameters.
121 */
122
123/* Protects list of registered TC modules. It is pure SMP lock. */
124static DEFINE_RWLOCK(qdisc_mod_lock);
125
126
127/************************************************
128 * Queueing disciplines manipulation. *
129 ************************************************/
130
131
132/* The list of all installed queueing disciplines. */
133
134static struct Qdisc_ops *qdisc_base;
135
Zhi Yong Wu21eb2182014-01-01 04:34:51 +0800136/* Register/unregister queueing discipline */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700137
138int register_qdisc(struct Qdisc_ops *qops)
139{
140 struct Qdisc_ops *q, **qp;
141 int rc = -EEXIST;
142
143 write_lock(&qdisc_mod_lock);
144 for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
145 if (!strcmp(qops->id, q->id))
146 goto out;
147
148 if (qops->enqueue == NULL)
149 qops->enqueue = noop_qdisc_ops.enqueue;
Jarek Poplawski99c0db22008-10-31 00:45:27 -0700150 if (qops->peek == NULL) {
Jarek Poplawski68fd26b2010-08-09 12:18:48 +0000151 if (qops->dequeue == NULL)
Jarek Poplawski99c0db22008-10-31 00:45:27 -0700152 qops->peek = noop_qdisc_ops.peek;
Jarek Poplawski68fd26b2010-08-09 12:18:48 +0000153 else
154 goto out_einval;
Jarek Poplawski99c0db22008-10-31 00:45:27 -0700155 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700156 if (qops->dequeue == NULL)
157 qops->dequeue = noop_qdisc_ops.dequeue;
158
Jarek Poplawski68fd26b2010-08-09 12:18:48 +0000159 if (qops->cl_ops) {
160 const struct Qdisc_class_ops *cops = qops->cl_ops;
161
Jarek Poplawski3e9e5a52010-08-10 22:31:20 +0000162 if (!(cops->get && cops->put && cops->walk && cops->leaf))
Jarek Poplawski68fd26b2010-08-09 12:18:48 +0000163 goto out_einval;
164
165 if (cops->tcf_chain && !(cops->bind_tcf && cops->unbind_tcf))
166 goto out_einval;
167 }
168
Linus Torvalds1da177e2005-04-16 15:20:36 -0700169 qops->next = NULL;
170 *qp = qops;
171 rc = 0;
172out:
173 write_unlock(&qdisc_mod_lock);
174 return rc;
Jarek Poplawski68fd26b2010-08-09 12:18:48 +0000175
176out_einval:
177 rc = -EINVAL;
178 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700179}
Patrick McHardy62e3ba12008-01-22 22:10:23 -0800180EXPORT_SYMBOL(register_qdisc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700181
182int unregister_qdisc(struct Qdisc_ops *qops)
183{
184 struct Qdisc_ops *q, **qp;
185 int err = -ENOENT;
186
187 write_lock(&qdisc_mod_lock);
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000188 for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700189 if (q == qops)
190 break;
191 if (q) {
192 *qp = q->next;
193 q->next = NULL;
194 err = 0;
195 }
196 write_unlock(&qdisc_mod_lock);
197 return err;
198}
Patrick McHardy62e3ba12008-01-22 22:10:23 -0800199EXPORT_SYMBOL(unregister_qdisc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700200
stephen hemminger6da7c8f2013-08-27 16:19:08 -0700201/* Get default qdisc if not otherwise specified */
202void qdisc_get_default(char *name, size_t len)
203{
204 read_lock(&qdisc_mod_lock);
205 strlcpy(name, default_qdisc_ops->id, len);
206 read_unlock(&qdisc_mod_lock);
207}
208
209static struct Qdisc_ops *qdisc_lookup_default(const char *name)
210{
211 struct Qdisc_ops *q = NULL;
212
213 for (q = qdisc_base; q; q = q->next) {
214 if (!strcmp(name, q->id)) {
215 if (!try_module_get(q->owner))
216 q = NULL;
217 break;
218 }
219 }
220
221 return q;
222}
223
224/* Set new default qdisc to use */
225int qdisc_set_default(const char *name)
226{
227 const struct Qdisc_ops *ops;
228
229 if (!capable(CAP_NET_ADMIN))
230 return -EPERM;
231
232 write_lock(&qdisc_mod_lock);
233 ops = qdisc_lookup_default(name);
234 if (!ops) {
235 /* Not found, drop lock and try to load module */
236 write_unlock(&qdisc_mod_lock);
237 request_module("sch_%s", name);
238 write_lock(&qdisc_mod_lock);
239
240 ops = qdisc_lookup_default(name);
241 }
242
243 if (ops) {
244 /* Set new default */
245 module_put(default_qdisc_ops->owner);
246 default_qdisc_ops = ops;
247 }
248 write_unlock(&qdisc_mod_lock);
249
250 return ops ? 0 : -ENOENT;
251}
252
Linus Torvalds1da177e2005-04-16 15:20:36 -0700253/* We know handle. Find qdisc among all qdisc's attached to device
Eric Dumazet4eaf3b82015-12-01 20:08:51 -0800254 * (root qdisc, all its children, children of children etc.)
255 * Note: caller either uses rtnl or rcu_read_lock()
Linus Torvalds1da177e2005-04-16 15:20:36 -0700256 */
257
Hannes Eder6113b742008-11-28 03:06:46 -0800258static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
David S. Miller8123b422008-08-08 23:23:39 -0700259{
260 struct Qdisc *q;
261
262 if (!(root->flags & TCQ_F_BUILTIN) &&
263 root->handle == handle)
264 return root;
265
Eric Dumazet4eaf3b82015-12-01 20:08:51 -0800266 list_for_each_entry_rcu(q, &root->list, list) {
David S. Miller8123b422008-08-08 23:23:39 -0700267 if (q->handle == handle)
268 return q;
269 }
270 return NULL;
271}
272
Eric Dumazet95dc1922013-12-05 11:12:02 -0800273void qdisc_list_add(struct Qdisc *q)
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700274{
Eric Dumazet37314362014-03-08 08:01:19 -0800275 if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
276 struct Qdisc *root = qdisc_dev(q)->qdisc;
Eric Dumazete57a7842013-12-12 15:41:56 -0800277
Eric Dumazet37314362014-03-08 08:01:19 -0800278 WARN_ON_ONCE(root == &noop_qdisc);
Eric Dumazet4eaf3b82015-12-01 20:08:51 -0800279 ASSERT_RTNL();
280 list_add_tail_rcu(&q->list, &root->list);
Eric Dumazet37314362014-03-08 08:01:19 -0800281 }
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700282}
Eric Dumazet95dc1922013-12-05 11:12:02 -0800283EXPORT_SYMBOL(qdisc_list_add);
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700284
285void qdisc_list_del(struct Qdisc *q)
286{
Eric Dumazet4eaf3b82015-12-01 20:08:51 -0800287 if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
288 ASSERT_RTNL();
289 list_del_rcu(&q->list);
290 }
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700291}
292EXPORT_SYMBOL(qdisc_list_del);
293
David S. Milleread81cc2008-07-17 00:50:32 -0700294struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
Patrick McHardy43effa12006-11-29 17:35:48 -0800295{
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700296 struct Qdisc *q;
297
Patrick McHardyaf356af2009-09-04 06:41:18 +0000298 q = qdisc_match_from_root(dev->qdisc, handle);
299 if (q)
300 goto out;
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700301
Eric Dumazet24824a02010-10-02 06:11:55 +0000302 if (dev_ingress_queue(dev))
303 q = qdisc_match_from_root(
304 dev_ingress_queue(dev)->qdisc_sleeping,
305 handle);
Jarek Poplawskif6486d42008-11-25 13:56:06 -0800306out:
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700307 return q;
Patrick McHardy43effa12006-11-29 17:35:48 -0800308}
309
Linus Torvalds1da177e2005-04-16 15:20:36 -0700310static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
311{
312 unsigned long cl;
313 struct Qdisc *leaf;
Eric Dumazet20fea082007-11-14 01:44:41 -0800314 const struct Qdisc_class_ops *cops = p->ops->cl_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700315
316 if (cops == NULL)
317 return NULL;
318 cl = cops->get(p, classid);
319
320 if (cl == 0)
321 return NULL;
322 leaf = cops->leaf(p, cl);
323 cops->put(p, cl);
324 return leaf;
325}
326
327/* Find queueing discipline by name */
328
Patrick McHardy1e904742008-01-22 22:11:17 -0800329static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700330{
331 struct Qdisc_ops *q = NULL;
332
333 if (kind) {
334 read_lock(&qdisc_mod_lock);
335 for (q = qdisc_base; q; q = q->next) {
Patrick McHardy1e904742008-01-22 22:11:17 -0800336 if (nla_strcmp(kind, q->id) == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700337 if (!try_module_get(q->owner))
338 q = NULL;
339 break;
340 }
341 }
342 read_unlock(&qdisc_mod_lock);
343 }
344 return q;
345}
346
Jesper Dangaard Brouer8a8e3d82013-08-14 23:47:11 +0200347/* The linklayer setting were not transferred from iproute2, in older
348 * versions, and the rate tables lookup systems have been dropped in
349 * the kernel. To keep backward compatible with older iproute2 tc
350 * utils, we detect the linklayer setting by detecting if the rate
351 * table were modified.
352 *
353 * For linklayer ATM table entries, the rate table will be aligned to
354 * 48 bytes, thus some table entries will contain the same value. The
355 * mpu (min packet unit) is also encoded into the old rate table, thus
356 * starting from the mpu, we find low and high table entries for
357 * mapping this cell. If these entries contain the same value, when
358 * the rate tables have been modified for linklayer ATM.
359 *
360 * This is done by rounding mpu to the nearest 48 bytes cell/entry,
361 * and then roundup to the next cell, calc the table entry one below,
362 * and compare.
363 */
364static __u8 __detect_linklayer(struct tc_ratespec *r, __u32 *rtab)
365{
366 int low = roundup(r->mpu, 48);
367 int high = roundup(low+1, 48);
368 int cell_low = low >> r->cell_log;
369 int cell_high = (high >> r->cell_log) - 1;
370
371 /* rtab is too inaccurate at rates > 100Mbit/s */
372 if ((r->rate > (100000000/8)) || (rtab[0] == 0)) {
373 pr_debug("TC linklayer: Giving up ATM detection\n");
374 return TC_LINKLAYER_ETHERNET;
375 }
376
377 if ((cell_high > cell_low) && (cell_high < 256)
378 && (rtab[cell_low] == rtab[cell_high])) {
379 pr_debug("TC linklayer: Detected ATM, low(%d)=high(%d)=%u\n",
380 cell_low, cell_high, rtab[cell_high]);
381 return TC_LINKLAYER_ATM;
382 }
383 return TC_LINKLAYER_ETHERNET;
384}
385
Linus Torvalds1da177e2005-04-16 15:20:36 -0700386static struct qdisc_rate_table *qdisc_rtab_list;
387
Patrick McHardy1e904742008-01-22 22:11:17 -0800388struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct nlattr *tab)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700389{
390 struct qdisc_rate_table *rtab;
391
Eric Dumazet40edeff2013-06-02 11:15:55 +0000392 if (tab == NULL || r->rate == 0 || r->cell_log == 0 ||
393 nla_len(tab) != TC_RTAB_SIZE)
394 return NULL;
395
Linus Torvalds1da177e2005-04-16 15:20:36 -0700396 for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) {
Eric Dumazet40edeff2013-06-02 11:15:55 +0000397 if (!memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) &&
398 !memcmp(&rtab->data, nla_data(tab), 1024)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700399 rtab->refcnt++;
400 return rtab;
401 }
402 }
403
Linus Torvalds1da177e2005-04-16 15:20:36 -0700404 rtab = kmalloc(sizeof(*rtab), GFP_KERNEL);
405 if (rtab) {
406 rtab->rate = *r;
407 rtab->refcnt = 1;
Patrick McHardy1e904742008-01-22 22:11:17 -0800408 memcpy(rtab->data, nla_data(tab), 1024);
Jesper Dangaard Brouer8a8e3d82013-08-14 23:47:11 +0200409 if (r->linklayer == TC_LINKLAYER_UNAWARE)
410 r->linklayer = __detect_linklayer(r, rtab->data);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700411 rtab->next = qdisc_rtab_list;
412 qdisc_rtab_list = rtab;
413 }
414 return rtab;
415}
Patrick McHardy62e3ba12008-01-22 22:10:23 -0800416EXPORT_SYMBOL(qdisc_get_rtab);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700417
418void qdisc_put_rtab(struct qdisc_rate_table *tab)
419{
420 struct qdisc_rate_table *rtab, **rtabp;
421
422 if (!tab || --tab->refcnt)
423 return;
424
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000425 for (rtabp = &qdisc_rtab_list;
426 (rtab = *rtabp) != NULL;
427 rtabp = &rtab->next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700428 if (rtab == tab) {
429 *rtabp = rtab->next;
430 kfree(rtab);
431 return;
432 }
433 }
434}
Patrick McHardy62e3ba12008-01-22 22:10:23 -0800435EXPORT_SYMBOL(qdisc_put_rtab);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700436
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700437static LIST_HEAD(qdisc_stab_list);
438static DEFINE_SPINLOCK(qdisc_stab_lock);
439
440static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = {
441 [TCA_STAB_BASE] = { .len = sizeof(struct tc_sizespec) },
442 [TCA_STAB_DATA] = { .type = NLA_BINARY },
443};
444
445static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt)
446{
447 struct nlattr *tb[TCA_STAB_MAX + 1];
448 struct qdisc_size_table *stab;
449 struct tc_sizespec *s;
450 unsigned int tsize = 0;
451 u16 *tab = NULL;
452 int err;
453
454 err = nla_parse_nested(tb, TCA_STAB_MAX, opt, stab_policy);
455 if (err < 0)
456 return ERR_PTR(err);
457 if (!tb[TCA_STAB_BASE])
458 return ERR_PTR(-EINVAL);
459
460 s = nla_data(tb[TCA_STAB_BASE]);
461
462 if (s->tsize > 0) {
463 if (!tb[TCA_STAB_DATA])
464 return ERR_PTR(-EINVAL);
465 tab = nla_data(tb[TCA_STAB_DATA]);
466 tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16);
467 }
468
Dan Carpenter00093fa2010-08-14 11:09:49 +0000469 if (tsize != s->tsize || (!tab && tsize > 0))
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700470 return ERR_PTR(-EINVAL);
471
David S. Millerf3b96052008-08-18 22:33:05 -0700472 spin_lock(&qdisc_stab_lock);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700473
474 list_for_each_entry(stab, &qdisc_stab_list, list) {
475 if (memcmp(&stab->szopts, s, sizeof(*s)))
476 continue;
477 if (tsize > 0 && memcmp(stab->data, tab, tsize * sizeof(u16)))
478 continue;
479 stab->refcnt++;
David S. Millerf3b96052008-08-18 22:33:05 -0700480 spin_unlock(&qdisc_stab_lock);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700481 return stab;
482 }
483
David S. Millerf3b96052008-08-18 22:33:05 -0700484 spin_unlock(&qdisc_stab_lock);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700485
486 stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL);
487 if (!stab)
488 return ERR_PTR(-ENOMEM);
489
490 stab->refcnt = 1;
491 stab->szopts = *s;
492 if (tsize > 0)
493 memcpy(stab->data, tab, tsize * sizeof(u16));
494
David S. Millerf3b96052008-08-18 22:33:05 -0700495 spin_lock(&qdisc_stab_lock);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700496 list_add_tail(&stab->list, &qdisc_stab_list);
David S. Millerf3b96052008-08-18 22:33:05 -0700497 spin_unlock(&qdisc_stab_lock);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700498
499 return stab;
500}
501
Eric Dumazeta2da5702011-01-20 03:48:19 +0000502static void stab_kfree_rcu(struct rcu_head *head)
503{
504 kfree(container_of(head, struct qdisc_size_table, rcu));
505}
506
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700507void qdisc_put_stab(struct qdisc_size_table *tab)
508{
509 if (!tab)
510 return;
511
David S. Millerf3b96052008-08-18 22:33:05 -0700512 spin_lock(&qdisc_stab_lock);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700513
514 if (--tab->refcnt == 0) {
515 list_del(&tab->list);
Eric Dumazeta2da5702011-01-20 03:48:19 +0000516 call_rcu_bh(&tab->rcu, stab_kfree_rcu);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700517 }
518
David S. Millerf3b96052008-08-18 22:33:05 -0700519 spin_unlock(&qdisc_stab_lock);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700520}
521EXPORT_SYMBOL(qdisc_put_stab);
522
523static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab)
524{
525 struct nlattr *nest;
526
527 nest = nla_nest_start(skb, TCA_STAB);
Patrick McHardy3aa46142008-11-20 04:07:14 -0800528 if (nest == NULL)
529 goto nla_put_failure;
David S. Miller1b34ec42012-03-29 05:11:39 -0400530 if (nla_put(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts))
531 goto nla_put_failure;
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700532 nla_nest_end(skb, nest);
533
534 return skb->len;
535
536nla_put_failure:
537 return -1;
538}
539
Eric Dumazeta2da5702011-01-20 03:48:19 +0000540void __qdisc_calculate_pkt_len(struct sk_buff *skb, const struct qdisc_size_table *stab)
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700541{
542 int pkt_len, slot;
543
544 pkt_len = skb->len + stab->szopts.overhead;
545 if (unlikely(!stab->szopts.tsize))
546 goto out;
547
548 slot = pkt_len + stab->szopts.cell_align;
549 if (unlikely(slot < 0))
550 slot = 0;
551
552 slot >>= stab->szopts.cell_log;
553 if (likely(slot < stab->szopts.tsize))
554 pkt_len = stab->data[slot];
555 else
556 pkt_len = stab->data[stab->szopts.tsize - 1] *
557 (slot / stab->szopts.tsize) +
558 stab->data[slot % stab->szopts.tsize];
559
560 pkt_len <<= stab->szopts.size_log;
561out:
562 if (unlikely(pkt_len < 1))
563 pkt_len = 1;
564 qdisc_skb_cb(skb)->pkt_len = pkt_len;
565}
Eric Dumazeta2da5702011-01-20 03:48:19 +0000566EXPORT_SYMBOL(__qdisc_calculate_pkt_len);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700567
Florian Westphal6e765a02014-06-11 20:35:18 +0200568void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc)
Jarek Poplawskib00355d2009-02-01 01:12:42 -0800569{
570 if (!(qdisc->flags & TCQ_F_WARN_NONWC)) {
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000571 pr_warn("%s: %s qdisc %X: is non-work-conserving?\n",
572 txt, qdisc->ops->id, qdisc->handle >> 16);
Jarek Poplawskib00355d2009-02-01 01:12:42 -0800573 qdisc->flags |= TCQ_F_WARN_NONWC;
574 }
575}
576EXPORT_SYMBOL(qdisc_warn_nonwc);
577
Patrick McHardy41794772007-03-16 01:19:15 -0700578static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
579{
580 struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
David S. Miller2fbd3da2009-09-01 17:59:25 -0700581 timer);
Patrick McHardy41794772007-03-16 01:19:15 -0700582
John Fastabend1e203c12014-10-02 22:43:09 -0700583 rcu_read_lock();
David S. Miller8608db02008-08-18 20:51:18 -0700584 __netif_schedule(qdisc_root(wd->qdisc));
John Fastabend1e203c12014-10-02 22:43:09 -0700585 rcu_read_unlock();
Stephen Hemminger19365022007-03-22 12:18:35 -0700586
Patrick McHardy41794772007-03-16 01:19:15 -0700587 return HRTIMER_NORESTART;
588}
589
590void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
591{
Eric Dumazet4a8e3202014-09-20 18:01:30 -0700592 hrtimer_init(&wd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
David S. Miller2fbd3da2009-09-01 17:59:25 -0700593 wd->timer.function = qdisc_watchdog;
Patrick McHardy41794772007-03-16 01:19:15 -0700594 wd->qdisc = qdisc;
595}
596EXPORT_SYMBOL(qdisc_watchdog_init);
597
Eric Dumazet45f50be2016-06-10 16:41:39 -0700598void qdisc_watchdog_schedule_ns(struct qdisc_watchdog *wd, u64 expires)
Patrick McHardy41794772007-03-16 01:19:15 -0700599{
Jarek Poplawski2540e052008-08-21 05:11:14 -0700600 if (test_bit(__QDISC_STATE_DEACTIVATED,
601 &qdisc_root_sleeping(wd->qdisc)->state))
602 return;
603
Eric Dumazeta9efad82016-05-23 14:24:56 -0700604 if (wd->last_expires == expires)
605 return;
606
607 wd->last_expires = expires;
Eric Dumazet46baac32012-10-20 00:40:51 +0000608 hrtimer_start(&wd->timer,
Jiri Pirko34c5d292013-02-12 00:12:04 +0000609 ns_to_ktime(expires),
Eric Dumazet4a8e3202014-09-20 18:01:30 -0700610 HRTIMER_MODE_ABS_PINNED);
Patrick McHardy41794772007-03-16 01:19:15 -0700611}
Jiri Pirko34c5d292013-02-12 00:12:04 +0000612EXPORT_SYMBOL(qdisc_watchdog_schedule_ns);
Patrick McHardy41794772007-03-16 01:19:15 -0700613
614void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
615{
David S. Miller2fbd3da2009-09-01 17:59:25 -0700616 hrtimer_cancel(&wd->timer);
Patrick McHardy41794772007-03-16 01:19:15 -0700617}
618EXPORT_SYMBOL(qdisc_watchdog_cancel);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700619
Adrian Bunka94f7792008-07-22 14:20:11 -0700620static struct hlist_head *qdisc_class_hash_alloc(unsigned int n)
Patrick McHardy6fe1c7a2008-07-05 23:21:31 -0700621{
622 unsigned int size = n * sizeof(struct hlist_head), i;
623 struct hlist_head *h;
624
625 if (size <= PAGE_SIZE)
626 h = kmalloc(size, GFP_KERNEL);
627 else
628 h = (struct hlist_head *)
629 __get_free_pages(GFP_KERNEL, get_order(size));
630
631 if (h != NULL) {
632 for (i = 0; i < n; i++)
633 INIT_HLIST_HEAD(&h[i]);
634 }
635 return h;
636}
637
638static void qdisc_class_hash_free(struct hlist_head *h, unsigned int n)
639{
640 unsigned int size = n * sizeof(struct hlist_head);
641
642 if (size <= PAGE_SIZE)
643 kfree(h);
644 else
645 free_pages((unsigned long)h, get_order(size));
646}
647
648void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash)
649{
650 struct Qdisc_class_common *cl;
Sasha Levinb67bfe02013-02-27 17:06:00 -0800651 struct hlist_node *next;
Patrick McHardy6fe1c7a2008-07-05 23:21:31 -0700652 struct hlist_head *nhash, *ohash;
653 unsigned int nsize, nmask, osize;
654 unsigned int i, h;
655
656 /* Rehash when load factor exceeds 0.75 */
657 if (clhash->hashelems * 4 <= clhash->hashsize * 3)
658 return;
659 nsize = clhash->hashsize * 2;
660 nmask = nsize - 1;
661 nhash = qdisc_class_hash_alloc(nsize);
662 if (nhash == NULL)
663 return;
664
665 ohash = clhash->hash;
666 osize = clhash->hashsize;
667
668 sch_tree_lock(sch);
669 for (i = 0; i < osize; i++) {
Sasha Levinb67bfe02013-02-27 17:06:00 -0800670 hlist_for_each_entry_safe(cl, next, &ohash[i], hnode) {
Patrick McHardy6fe1c7a2008-07-05 23:21:31 -0700671 h = qdisc_class_hash(cl->classid, nmask);
672 hlist_add_head(&cl->hnode, &nhash[h]);
673 }
674 }
675 clhash->hash = nhash;
676 clhash->hashsize = nsize;
677 clhash->hashmask = nmask;
678 sch_tree_unlock(sch);
679
680 qdisc_class_hash_free(ohash, osize);
681}
682EXPORT_SYMBOL(qdisc_class_hash_grow);
683
684int qdisc_class_hash_init(struct Qdisc_class_hash *clhash)
685{
686 unsigned int size = 4;
687
688 clhash->hash = qdisc_class_hash_alloc(size);
689 if (clhash->hash == NULL)
690 return -ENOMEM;
691 clhash->hashsize = size;
692 clhash->hashmask = size - 1;
693 clhash->hashelems = 0;
694 return 0;
695}
696EXPORT_SYMBOL(qdisc_class_hash_init);
697
698void qdisc_class_hash_destroy(struct Qdisc_class_hash *clhash)
699{
700 qdisc_class_hash_free(clhash->hash, clhash->hashsize);
701}
702EXPORT_SYMBOL(qdisc_class_hash_destroy);
703
704void qdisc_class_hash_insert(struct Qdisc_class_hash *clhash,
705 struct Qdisc_class_common *cl)
706{
707 unsigned int h;
708
709 INIT_HLIST_NODE(&cl->hnode);
710 h = qdisc_class_hash(cl->classid, clhash->hashmask);
711 hlist_add_head(&cl->hnode, &clhash->hash[h]);
712 clhash->hashelems++;
713}
714EXPORT_SYMBOL(qdisc_class_hash_insert);
715
716void qdisc_class_hash_remove(struct Qdisc_class_hash *clhash,
717 struct Qdisc_class_common *cl)
718{
719 hlist_del(&cl->hnode);
720 clhash->hashelems--;
721}
722EXPORT_SYMBOL(qdisc_class_hash_remove);
723
Eric Dumazetfa0f5aa2012-01-03 00:00:11 +0000724/* Allocate an unique handle from space managed by kernel
725 * Possible range is [8000-FFFF]:0000 (0x8000 values)
726 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700727static u32 qdisc_alloc_handle(struct net_device *dev)
728{
Eric Dumazetfa0f5aa2012-01-03 00:00:11 +0000729 int i = 0x8000;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700730 static u32 autohandle = TC_H_MAKE(0x80000000U, 0);
731
732 do {
733 autohandle += TC_H_MAKE(0x10000U, 0);
734 if (autohandle == TC_H_MAKE(TC_H_ROOT, 0))
735 autohandle = TC_H_MAKE(0x80000000U, 0);
Eric Dumazetfa0f5aa2012-01-03 00:00:11 +0000736 if (!qdisc_lookup(dev, autohandle))
737 return autohandle;
738 cond_resched();
739 } while (--i > 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700740
Eric Dumazetfa0f5aa2012-01-03 00:00:11 +0000741 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700742}
743
WANG Cong2ccccf52016-02-25 14:55:01 -0800744void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n,
745 unsigned int len)
Patrick McHardy43effa12006-11-29 17:35:48 -0800746{
Eric Dumazet20fea082007-11-14 01:44:41 -0800747 const struct Qdisc_class_ops *cops;
Patrick McHardy43effa12006-11-29 17:35:48 -0800748 unsigned long cl;
749 u32 parentid;
Eric Dumazet2c8c8e62013-10-07 08:32:32 -0700750 int drops;
Patrick McHardy43effa12006-11-29 17:35:48 -0800751
WANG Cong2ccccf52016-02-25 14:55:01 -0800752 if (n == 0 && len == 0)
Patrick McHardy43effa12006-11-29 17:35:48 -0800753 return;
Eric Dumazet2c8c8e62013-10-07 08:32:32 -0700754 drops = max_t(int, n, 0);
Eric Dumazet4eaf3b82015-12-01 20:08:51 -0800755 rcu_read_lock();
Patrick McHardy43effa12006-11-29 17:35:48 -0800756 while ((parentid = sch->parent)) {
Jarek Poplawski066a3b52008-04-14 15:10:42 -0700757 if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS))
Eric Dumazet4eaf3b82015-12-01 20:08:51 -0800758 break;
Jarek Poplawski066a3b52008-04-14 15:10:42 -0700759
Eric Dumazet4eaf3b82015-12-01 20:08:51 -0800760 if (sch->flags & TCQ_F_NOPARENT)
761 break;
762 /* TODO: perform the search on a per txq basis */
David S. Miller5ce2d482008-07-08 17:06:30 -0700763 sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
Patrick McHardyffc8fef2007-07-30 17:11:50 -0700764 if (sch == NULL) {
Eric Dumazet4eaf3b82015-12-01 20:08:51 -0800765 WARN_ON_ONCE(parentid != TC_H_ROOT);
766 break;
Patrick McHardyffc8fef2007-07-30 17:11:50 -0700767 }
Patrick McHardy43effa12006-11-29 17:35:48 -0800768 cops = sch->ops->cl_ops;
769 if (cops->qlen_notify) {
770 cl = cops->get(sch, parentid);
771 cops->qlen_notify(sch, cl);
772 cops->put(sch, cl);
773 }
774 sch->q.qlen -= n;
WANG Cong2ccccf52016-02-25 14:55:01 -0800775 sch->qstats.backlog -= len;
John Fastabend25331d62014-09-28 11:53:29 -0700776 __qdisc_qstats_drop(sch, drops);
Patrick McHardy43effa12006-11-29 17:35:48 -0800777 }
Eric Dumazet4eaf3b82015-12-01 20:08:51 -0800778 rcu_read_unlock();
Patrick McHardy43effa12006-11-29 17:35:48 -0800779}
WANG Cong2ccccf52016-02-25 14:55:01 -0800780EXPORT_SYMBOL(qdisc_tree_reduce_backlog);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700781
Tom Goff7316ae82010-03-19 15:40:13 +0000782static void notify_and_destroy(struct net *net, struct sk_buff *skb,
783 struct nlmsghdr *n, u32 clid,
David S. Miller99194cf2008-07-17 04:54:10 -0700784 struct Qdisc *old, struct Qdisc *new)
785{
786 if (new || old)
Tom Goff7316ae82010-03-19 15:40:13 +0000787 qdisc_notify(net, skb, n, clid, old, new);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700788
David S. Miller4d8863a2008-08-18 21:03:15 -0700789 if (old)
David S. Miller99194cf2008-07-17 04:54:10 -0700790 qdisc_destroy(old);
David S. Miller99194cf2008-07-17 04:54:10 -0700791}
792
793/* Graft qdisc "new" to class "classid" of qdisc "parent" or
794 * to device "dev".
795 *
796 * When appropriate send a netlink notification using 'skb'
797 * and "n".
798 *
799 * On success, destroy old qdisc.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700800 */
801
802static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
David S. Miller99194cf2008-07-17 04:54:10 -0700803 struct sk_buff *skb, struct nlmsghdr *n, u32 classid,
804 struct Qdisc *new, struct Qdisc *old)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700805{
David S. Miller99194cf2008-07-17 04:54:10 -0700806 struct Qdisc *q = old;
Tom Goff7316ae82010-03-19 15:40:13 +0000807 struct net *net = dev_net(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700808 int err = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700809
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900810 if (parent == NULL) {
David S. Miller99194cf2008-07-17 04:54:10 -0700811 unsigned int i, num_q, ingress;
812
813 ingress = 0;
814 num_q = dev->num_tx_queues;
David S. Miller8d50b532008-07-30 02:37:46 -0700815 if ((q && q->flags & TCQ_F_INGRESS) ||
816 (new && new->flags & TCQ_F_INGRESS)) {
David S. Miller99194cf2008-07-17 04:54:10 -0700817 num_q = 1;
818 ingress = 1;
Eric Dumazet24824a02010-10-02 06:11:55 +0000819 if (!dev_ingress_queue(dev))
820 return -ENOENT;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700821 }
David S. Miller99194cf2008-07-17 04:54:10 -0700822
823 if (dev->flags & IFF_UP)
824 dev_deactivate(dev);
825
WANG Cong86e363d2015-05-26 16:08:48 -0700826 if (new && new->ops->attach)
827 goto skip;
David S. Miller6ec1c692009-09-06 01:58:51 -0700828
David S. Miller99194cf2008-07-17 04:54:10 -0700829 for (i = 0; i < num_q; i++) {
Eric Dumazet24824a02010-10-02 06:11:55 +0000830 struct netdev_queue *dev_queue = dev_ingress_queue(dev);
David S. Miller99194cf2008-07-17 04:54:10 -0700831
832 if (!ingress)
833 dev_queue = netdev_get_tx_queue(dev, i);
834
David S. Miller8d50b532008-07-30 02:37:46 -0700835 old = dev_graft_qdisc(dev_queue, new);
836 if (new && i > 0)
837 atomic_inc(&new->refcnt);
838
Jarek Poplawski036d6a62009-09-13 22:35:44 +0000839 if (!ingress)
840 qdisc_destroy(old);
David S. Miller99194cf2008-07-17 04:54:10 -0700841 }
842
WANG Cong86e363d2015-05-26 16:08:48 -0700843skip:
Jarek Poplawski036d6a62009-09-13 22:35:44 +0000844 if (!ingress) {
Tom Goff7316ae82010-03-19 15:40:13 +0000845 notify_and_destroy(net, skb, n, classid,
846 dev->qdisc, new);
Jarek Poplawski036d6a62009-09-13 22:35:44 +0000847 if (new && !new->ops->attach)
848 atomic_inc(&new->refcnt);
849 dev->qdisc = new ? : &noop_qdisc;
WANG Cong86e363d2015-05-26 16:08:48 -0700850
851 if (new && new->ops->attach)
852 new->ops->attach(new);
Jarek Poplawski036d6a62009-09-13 22:35:44 +0000853 } else {
Tom Goff7316ae82010-03-19 15:40:13 +0000854 notify_and_destroy(net, skb, n, classid, old, new);
Jarek Poplawski036d6a62009-09-13 22:35:44 +0000855 }
Patrick McHardyaf356af2009-09-04 06:41:18 +0000856
David S. Miller99194cf2008-07-17 04:54:10 -0700857 if (dev->flags & IFF_UP)
858 dev_activate(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700859 } else {
Eric Dumazet20fea082007-11-14 01:44:41 -0800860 const struct Qdisc_class_ops *cops = parent->ops->cl_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700861
Patrick McHardyc9f1d032009-09-04 06:41:13 +0000862 err = -EOPNOTSUPP;
863 if (cops && cops->graft) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700864 unsigned long cl = cops->get(parent, classid);
865 if (cl) {
David S. Miller99194cf2008-07-17 04:54:10 -0700866 err = cops->graft(parent, cl, new, &old);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700867 cops->put(parent, cl);
Patrick McHardyc9f1d032009-09-04 06:41:13 +0000868 } else
869 err = -ENOENT;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700870 }
David S. Miller99194cf2008-07-17 04:54:10 -0700871 if (!err)
Tom Goff7316ae82010-03-19 15:40:13 +0000872 notify_and_destroy(net, skb, n, classid, old, new);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700873 }
874 return err;
875}
876
Jarek Poplawski25bfcd52008-08-18 20:53:34 -0700877/* lockdep annotation is needed for ingress; egress gets it only for name */
878static struct lock_class_key qdisc_tx_lock;
879static struct lock_class_key qdisc_rx_lock;
880
Linus Torvalds1da177e2005-04-16 15:20:36 -0700881/*
882 Allocate and initialize new qdisc.
883
884 Parameters are passed via opt.
885 */
886
887static struct Qdisc *
David S. Millerbb949fb2008-07-08 16:55:56 -0700888qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
Patrick McHardy23bcf632009-09-09 18:11:23 -0700889 struct Qdisc *p, u32 parent, u32 handle,
890 struct nlattr **tca, int *errp)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700891{
892 int err;
Patrick McHardy1e904742008-01-22 22:11:17 -0800893 struct nlattr *kind = tca[TCA_KIND];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700894 struct Qdisc *sch;
895 struct Qdisc_ops *ops;
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700896 struct qdisc_size_table *stab;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700897
898 ops = qdisc_lookup_ops(kind);
Johannes Berg95a5afc2008-10-16 15:24:51 -0700899#ifdef CONFIG_MODULES
Linus Torvalds1da177e2005-04-16 15:20:36 -0700900 if (ops == NULL && kind != NULL) {
901 char name[IFNAMSIZ];
Patrick McHardy1e904742008-01-22 22:11:17 -0800902 if (nla_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700903 /* We dropped the RTNL semaphore in order to
904 * perform the module load. So, even if we
905 * succeeded in loading the module we have to
906 * tell the caller to replay the request. We
907 * indicate this using -EAGAIN.
908 * We replay the request because the device may
909 * go away in the mean time.
910 */
911 rtnl_unlock();
912 request_module("sch_%s", name);
913 rtnl_lock();
914 ops = qdisc_lookup_ops(kind);
915 if (ops != NULL) {
916 /* We will try again qdisc_lookup_ops,
917 * so don't keep a reference.
918 */
919 module_put(ops->owner);
920 err = -EAGAIN;
921 goto err_out;
922 }
923 }
924 }
925#endif
926
Jamal Hadi Salimb9e2cc02006-08-03 16:36:51 -0700927 err = -ENOENT;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700928 if (ops == NULL)
929 goto err_out;
930
David S. Miller5ce2d482008-07-08 17:06:30 -0700931 sch = qdisc_alloc(dev_queue, ops);
Thomas Graf3d54b822005-07-05 14:15:09 -0700932 if (IS_ERR(sch)) {
933 err = PTR_ERR(sch);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700934 goto err_out2;
Thomas Graf3d54b822005-07-05 14:15:09 -0700935 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700936
Patrick McHardyffc8fef2007-07-30 17:11:50 -0700937 sch->parent = parent;
938
Thomas Graf3d54b822005-07-05 14:15:09 -0700939 if (handle == TC_H_INGRESS) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700940 sch->flags |= TCQ_F_INGRESS;
Thomas Graf3d54b822005-07-05 14:15:09 -0700941 handle = TC_H_MAKE(TC_H_INGRESS, 0);
Jarek Poplawski25bfcd52008-08-18 20:53:34 -0700942 lockdep_set_class(qdisc_lock(sch), &qdisc_rx_lock);
Patrick McHardyfd44de72007-04-16 17:07:08 -0700943 } else {
Patrick McHardyfd44de72007-04-16 17:07:08 -0700944 if (handle == 0) {
945 handle = qdisc_alloc_handle(dev);
946 err = -ENOMEM;
947 if (handle == 0)
948 goto err_out3;
949 }
Jarek Poplawski25bfcd52008-08-18 20:53:34 -0700950 lockdep_set_class(qdisc_lock(sch), &qdisc_tx_lock);
Eric Dumazet1abbe132012-12-11 15:54:33 +0000951 if (!netif_is_multiqueue(dev))
Eric Dumazet225734d2015-12-15 09:43:12 -0800952 sch->flags |= TCQ_F_ONETXQUEUE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700953 }
954
Thomas Graf3d54b822005-07-05 14:15:09 -0700955 sch->handle = handle;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700956
Patrick McHardy1e904742008-01-22 22:11:17 -0800957 if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS])) == 0) {
John Fastabend22e0f8b2014-09-28 11:52:56 -0700958 if (qdisc_is_percpu_stats(sch)) {
959 sch->cpu_bstats =
Sabrina Dubroca7c1c97d2014-10-21 11:23:30 +0200960 netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu);
John Fastabend22e0f8b2014-09-28 11:52:56 -0700961 if (!sch->cpu_bstats)
962 goto err_out4;
John Fastabendb0ab6f92014-09-28 11:54:24 -0700963
964 sch->cpu_qstats = alloc_percpu(struct gnet_stats_queue);
965 if (!sch->cpu_qstats)
966 goto err_out4;
John Fastabend22e0f8b2014-09-28 11:52:56 -0700967 }
968
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700969 if (tca[TCA_STAB]) {
970 stab = qdisc_get_stab(tca[TCA_STAB]);
971 if (IS_ERR(stab)) {
972 err = PTR_ERR(stab);
Jarek Poplawski7c64b9f2009-09-15 23:42:05 -0700973 goto err_out4;
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700974 }
Eric Dumazeta2da5702011-01-20 03:48:19 +0000975 rcu_assign_pointer(sch->stab, stab);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700976 }
Patrick McHardy1e904742008-01-22 22:11:17 -0800977 if (tca[TCA_RATE]) {
Eric Dumazetedb09eb2016-06-06 09:37:16 -0700978 seqcount_t *running;
Jarek Poplawskif6f9b932008-08-27 02:25:17 -0700979
Patrick McHardy23bcf632009-09-09 18:11:23 -0700980 err = -EOPNOTSUPP;
981 if (sch->flags & TCQ_F_MQROOT)
982 goto err_out4;
983
Jarek Poplawskif6f9b932008-08-27 02:25:17 -0700984 if ((sch->parent != TC_H_ROOT) &&
Patrick McHardy23bcf632009-09-09 18:11:23 -0700985 !(sch->flags & TCQ_F_INGRESS) &&
986 (!p || !(p->flags & TCQ_F_MQROOT)))
Eric Dumazetedb09eb2016-06-06 09:37:16 -0700987 running = qdisc_root_sleeping_running(sch);
Jarek Poplawskif6f9b932008-08-27 02:25:17 -0700988 else
Eric Dumazetedb09eb2016-06-06 09:37:16 -0700989 running = &sch->running;
Jarek Poplawskif6f9b932008-08-27 02:25:17 -0700990
John Fastabend22e0f8b2014-09-28 11:52:56 -0700991 err = gen_new_estimator(&sch->bstats,
992 sch->cpu_bstats,
993 &sch->rate_est,
Eric Dumazetedb09eb2016-06-06 09:37:16 -0700994 NULL,
995 running,
John Fastabend22e0f8b2014-09-28 11:52:56 -0700996 tca[TCA_RATE]);
Patrick McHardy23bcf632009-09-09 18:11:23 -0700997 if (err)
998 goto err_out4;
Thomas Graf023e09a2005-07-05 14:15:53 -0700999 }
Jarek Poplawskif6e0b232008-08-22 03:24:05 -07001000
1001 qdisc_list_add(sch);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001002
Linus Torvalds1da177e2005-04-16 15:20:36 -07001003 return sch;
1004 }
1005err_out3:
1006 dev_put(dev);
Thomas Graf3d54b822005-07-05 14:15:09 -07001007 kfree((char *) sch - sch->padded);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001008err_out2:
1009 module_put(ops->owner);
1010err_out:
1011 *errp = err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001012 return NULL;
Patrick McHardy23bcf632009-09-09 18:11:23 -07001013
1014err_out4:
John Fastabend22e0f8b2014-09-28 11:52:56 -07001015 free_percpu(sch->cpu_bstats);
John Fastabendb0ab6f92014-09-28 11:54:24 -07001016 free_percpu(sch->cpu_qstats);
Patrick McHardy23bcf632009-09-09 18:11:23 -07001017 /*
1018 * Any broken qdiscs that would require a ops->reset() here?
1019 * The qdisc was never in action so it shouldn't be necessary.
1020 */
Eric Dumazeta2da5702011-01-20 03:48:19 +00001021 qdisc_put_stab(rtnl_dereference(sch->stab));
Patrick McHardy23bcf632009-09-09 18:11:23 -07001022 if (ops->destroy)
1023 ops->destroy(sch);
1024 goto err_out3;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001025}
1026
Patrick McHardy1e904742008-01-22 22:11:17 -08001027static int qdisc_change(struct Qdisc *sch, struct nlattr **tca)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001028{
Eric Dumazeta2da5702011-01-20 03:48:19 +00001029 struct qdisc_size_table *ostab, *stab = NULL;
Jussi Kivilinna175f9c12008-07-20 00:08:47 -07001030 int err = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001031
Jussi Kivilinna175f9c12008-07-20 00:08:47 -07001032 if (tca[TCA_OPTIONS]) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001033 if (sch->ops->change == NULL)
1034 return -EINVAL;
Patrick McHardy1e904742008-01-22 22:11:17 -08001035 err = sch->ops->change(sch, tca[TCA_OPTIONS]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001036 if (err)
1037 return err;
1038 }
Jussi Kivilinna175f9c12008-07-20 00:08:47 -07001039
1040 if (tca[TCA_STAB]) {
1041 stab = qdisc_get_stab(tca[TCA_STAB]);
1042 if (IS_ERR(stab))
1043 return PTR_ERR(stab);
1044 }
1045
Eric Dumazeta2da5702011-01-20 03:48:19 +00001046 ostab = rtnl_dereference(sch->stab);
1047 rcu_assign_pointer(sch->stab, stab);
1048 qdisc_put_stab(ostab);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -07001049
Patrick McHardy23bcf632009-09-09 18:11:23 -07001050 if (tca[TCA_RATE]) {
Stephen Hemminger71bcb092008-11-25 21:13:31 -08001051 /* NB: ignores errors from replace_estimator
1052 because change can't be undone. */
Patrick McHardy23bcf632009-09-09 18:11:23 -07001053 if (sch->flags & TCQ_F_MQROOT)
1054 goto out;
John Fastabend22e0f8b2014-09-28 11:52:56 -07001055 gen_replace_estimator(&sch->bstats,
1056 sch->cpu_bstats,
1057 &sch->rate_est,
Eric Dumazetedb09eb2016-06-06 09:37:16 -07001058 NULL,
1059 qdisc_root_sleeping_running(sch),
John Fastabend22e0f8b2014-09-28 11:52:56 -07001060 tca[TCA_RATE]);
Patrick McHardy23bcf632009-09-09 18:11:23 -07001061 }
1062out:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001063 return 0;
1064}
1065
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001066struct check_loop_arg {
1067 struct qdisc_walker w;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001068 struct Qdisc *p;
1069 int depth;
1070};
1071
1072static int check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w);
1073
1074static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth)
1075{
1076 struct check_loop_arg arg;
1077
1078 if (q->ops->cl_ops == NULL)
1079 return 0;
1080
1081 arg.w.stop = arg.w.skip = arg.w.count = 0;
1082 arg.w.fn = check_loop_fn;
1083 arg.depth = depth;
1084 arg.p = p;
1085 q->ops->cl_ops->walk(q, &arg.w);
1086 return arg.w.stop ? -ELOOP : 0;
1087}
1088
1089static int
1090check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
1091{
1092 struct Qdisc *leaf;
Eric Dumazet20fea082007-11-14 01:44:41 -08001093 const struct Qdisc_class_ops *cops = q->ops->cl_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001094 struct check_loop_arg *arg = (struct check_loop_arg *)w;
1095
1096 leaf = cops->leaf(q, cl);
1097 if (leaf) {
1098 if (leaf == arg->p || arg->depth > 7)
1099 return -ELOOP;
1100 return check_loop(leaf, arg->p, arg->depth + 1);
1101 }
1102 return 0;
1103}
1104
1105/*
1106 * Delete/get qdisc.
1107 */
1108
Thomas Graf661d2962013-03-21 07:45:29 +00001109static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001110{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001111 struct net *net = sock_net(skb->sk);
David S. Miller02ef22c2012-06-26 21:50:05 -07001112 struct tcmsg *tcm = nlmsg_data(n);
Patrick McHardy1e904742008-01-22 22:11:17 -08001113 struct nlattr *tca[TCA_MAX + 1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001114 struct net_device *dev;
Hong zhi guode179c82013-03-25 17:36:33 +00001115 u32 clid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001116 struct Qdisc *q = NULL;
1117 struct Qdisc *p = NULL;
1118 int err;
1119
Stéphane Graber4e8bbb82014-04-30 11:25:43 -04001120 if ((n->nlmsg_type != RTM_GETQDISC) &&
David S. Miller5f013c9b2014-05-12 13:19:14 -04001121 !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
Eric W. Biedermandfc47ef2012-11-16 03:03:00 +00001122 return -EPERM;
1123
Patrick McHardy1e904742008-01-22 22:11:17 -08001124 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
1125 if (err < 0)
1126 return err;
1127
Hong zhi guode179c82013-03-25 17:36:33 +00001128 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1129 if (!dev)
1130 return -ENODEV;
1131
1132 clid = tcm->tcm_parent;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001133 if (clid) {
1134 if (clid != TC_H_ROOT) {
1135 if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001136 p = qdisc_lookup(dev, TC_H_MAJ(clid));
1137 if (!p)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001138 return -ENOENT;
1139 q = qdisc_leaf(p, clid);
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001140 } else if (dev_ingress_queue(dev)) {
1141 q = dev_ingress_queue(dev)->qdisc_sleeping;
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09001142 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001143 } else {
Patrick McHardyaf356af2009-09-04 06:41:18 +00001144 q = dev->qdisc;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001145 }
1146 if (!q)
1147 return -ENOENT;
1148
1149 if (tcm->tcm_handle && q->handle != tcm->tcm_handle)
1150 return -EINVAL;
1151 } else {
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001152 q = qdisc_lookup(dev, tcm->tcm_handle);
1153 if (!q)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001154 return -ENOENT;
1155 }
1156
Patrick McHardy1e904742008-01-22 22:11:17 -08001157 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001158 return -EINVAL;
1159
1160 if (n->nlmsg_type == RTM_DELQDISC) {
1161 if (!clid)
1162 return -EINVAL;
1163 if (q->handle == 0)
1164 return -ENOENT;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001165 err = qdisc_graft(dev, p, skb, n, clid, NULL, q);
1166 if (err != 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001167 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001168 } else {
Tom Goff7316ae82010-03-19 15:40:13 +00001169 qdisc_notify(net, skb, n, clid, NULL, q);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001170 }
1171 return 0;
1172}
1173
1174/*
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001175 * Create/change qdisc.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001176 */
1177
Thomas Graf661d2962013-03-21 07:45:29 +00001178static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001179{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001180 struct net *net = sock_net(skb->sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001181 struct tcmsg *tcm;
Patrick McHardy1e904742008-01-22 22:11:17 -08001182 struct nlattr *tca[TCA_MAX + 1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001183 struct net_device *dev;
1184 u32 clid;
1185 struct Qdisc *q, *p;
1186 int err;
1187
David S. Miller5f013c9b2014-05-12 13:19:14 -04001188 if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
Eric W. Biedermandfc47ef2012-11-16 03:03:00 +00001189 return -EPERM;
1190
Linus Torvalds1da177e2005-04-16 15:20:36 -07001191replay:
1192 /* Reinit, just in case something touches this. */
Hong zhi guode179c82013-03-25 17:36:33 +00001193 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
1194 if (err < 0)
1195 return err;
1196
David S. Miller02ef22c2012-06-26 21:50:05 -07001197 tcm = nlmsg_data(n);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001198 clid = tcm->tcm_parent;
1199 q = p = NULL;
1200
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001201 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1202 if (!dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001203 return -ENODEV;
1204
Patrick McHardy1e904742008-01-22 22:11:17 -08001205
Linus Torvalds1da177e2005-04-16 15:20:36 -07001206 if (clid) {
1207 if (clid != TC_H_ROOT) {
1208 if (clid != TC_H_INGRESS) {
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001209 p = qdisc_lookup(dev, TC_H_MAJ(clid));
1210 if (!p)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001211 return -ENOENT;
1212 q = qdisc_leaf(p, clid);
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001213 } else if (dev_ingress_queue_create(dev)) {
1214 q = dev_ingress_queue(dev)->qdisc_sleeping;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001215 }
1216 } else {
Patrick McHardyaf356af2009-09-04 06:41:18 +00001217 q = dev->qdisc;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001218 }
1219
1220 /* It may be default qdisc, ignore it */
1221 if (q && q->handle == 0)
1222 q = NULL;
1223
1224 if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
1225 if (tcm->tcm_handle) {
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001226 if (q && !(n->nlmsg_flags & NLM_F_REPLACE))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001227 return -EEXIST;
1228 if (TC_H_MIN(tcm->tcm_handle))
1229 return -EINVAL;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001230 q = qdisc_lookup(dev, tcm->tcm_handle);
1231 if (!q)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001232 goto create_n_graft;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001233 if (n->nlmsg_flags & NLM_F_EXCL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001234 return -EEXIST;
Patrick McHardy1e904742008-01-22 22:11:17 -08001235 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001236 return -EINVAL;
1237 if (q == p ||
1238 (p && check_loop(q, p, 0)))
1239 return -ELOOP;
1240 atomic_inc(&q->refcnt);
1241 goto graft;
1242 } else {
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001243 if (!q)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001244 goto create_n_graft;
1245
1246 /* This magic test requires explanation.
1247 *
1248 * We know, that some child q is already
1249 * attached to this parent and have choice:
1250 * either to change it or to create/graft new one.
1251 *
1252 * 1. We are allowed to create/graft only
1253 * if CREATE and REPLACE flags are set.
1254 *
1255 * 2. If EXCL is set, requestor wanted to say,
1256 * that qdisc tcm_handle is not expected
1257 * to exist, so that we choose create/graft too.
1258 *
1259 * 3. The last case is when no flags are set.
1260 * Alas, it is sort of hole in API, we
1261 * cannot decide what to do unambiguously.
1262 * For now we select create/graft, if
1263 * user gave KIND, which does not match existing.
1264 */
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001265 if ((n->nlmsg_flags & NLM_F_CREATE) &&
1266 (n->nlmsg_flags & NLM_F_REPLACE) &&
1267 ((n->nlmsg_flags & NLM_F_EXCL) ||
Patrick McHardy1e904742008-01-22 22:11:17 -08001268 (tca[TCA_KIND] &&
1269 nla_strcmp(tca[TCA_KIND], q->ops->id))))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001270 goto create_n_graft;
1271 }
1272 }
1273 } else {
1274 if (!tcm->tcm_handle)
1275 return -EINVAL;
1276 q = qdisc_lookup(dev, tcm->tcm_handle);
1277 }
1278
1279 /* Change qdisc parameters */
1280 if (q == NULL)
1281 return -ENOENT;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001282 if (n->nlmsg_flags & NLM_F_EXCL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001283 return -EEXIST;
Patrick McHardy1e904742008-01-22 22:11:17 -08001284 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001285 return -EINVAL;
1286 err = qdisc_change(q, tca);
1287 if (err == 0)
Tom Goff7316ae82010-03-19 15:40:13 +00001288 qdisc_notify(net, skb, n, clid, NULL, q);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001289 return err;
1290
1291create_n_graft:
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001292 if (!(n->nlmsg_flags & NLM_F_CREATE))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001293 return -ENOENT;
Eric Dumazet24824a02010-10-02 06:11:55 +00001294 if (clid == TC_H_INGRESS) {
1295 if (dev_ingress_queue(dev))
1296 q = qdisc_create(dev, dev_ingress_queue(dev), p,
1297 tcm->tcm_parent, tcm->tcm_parent,
1298 tca, &err);
1299 else
1300 err = -ENOENT;
1301 } else {
Jarek Poplawski926e61b2009-09-15 02:53:07 -07001302 struct netdev_queue *dev_queue;
David S. Miller6ec1c692009-09-06 01:58:51 -07001303
1304 if (p && p->ops->cl_ops && p->ops->cl_ops->select_queue)
Jarek Poplawski926e61b2009-09-15 02:53:07 -07001305 dev_queue = p->ops->cl_ops->select_queue(p, tcm);
1306 else if (p)
1307 dev_queue = p->dev_queue;
1308 else
1309 dev_queue = netdev_get_tx_queue(dev, 0);
David S. Miller6ec1c692009-09-06 01:58:51 -07001310
Jarek Poplawski926e61b2009-09-15 02:53:07 -07001311 q = qdisc_create(dev, dev_queue, p,
David S. Millerbb949fb2008-07-08 16:55:56 -07001312 tcm->tcm_parent, tcm->tcm_handle,
Patrick McHardyffc8fef2007-07-30 17:11:50 -07001313 tca, &err);
David S. Miller6ec1c692009-09-06 01:58:51 -07001314 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001315 if (q == NULL) {
1316 if (err == -EAGAIN)
1317 goto replay;
1318 return err;
1319 }
1320
1321graft:
Ilpo Järvinene5befbd2008-08-18 22:30:01 -07001322 err = qdisc_graft(dev, p, skb, n, clid, q, NULL);
1323 if (err) {
1324 if (q)
1325 qdisc_destroy(q);
1326 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001327 }
Ilpo Järvinene5befbd2008-08-18 22:30:01 -07001328
Linus Torvalds1da177e2005-04-16 15:20:36 -07001329 return 0;
1330}
1331
1332static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
Eric W. Biederman15e47302012-09-07 20:12:54 +00001333 u32 portid, u32 seq, u16 flags, int event)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001334{
John Fastabend22e0f8b2014-09-28 11:52:56 -07001335 struct gnet_stats_basic_cpu __percpu *cpu_bstats = NULL;
John Fastabendb0ab6f92014-09-28 11:54:24 -07001336 struct gnet_stats_queue __percpu *cpu_qstats = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001337 struct tcmsg *tcm;
1338 struct nlmsghdr *nlh;
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001339 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001340 struct gnet_dump d;
Eric Dumazeta2da5702011-01-20 03:48:19 +00001341 struct qdisc_size_table *stab;
John Fastabend64015852014-09-28 11:53:57 -07001342 __u32 qlen;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001343
Eric Dumazetfba373d2014-03-10 17:11:43 -07001344 cond_resched();
Eric W. Biederman15e47302012-09-07 20:12:54 +00001345 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
David S. Miller02ef22c2012-06-26 21:50:05 -07001346 if (!nlh)
1347 goto out_nlmsg_trim;
1348 tcm = nlmsg_data(nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001349 tcm->tcm_family = AF_UNSPEC;
Patrick McHardy9ef1d4c2005-06-28 12:55:30 -07001350 tcm->tcm__pad1 = 0;
1351 tcm->tcm__pad2 = 0;
David S. Miller5ce2d482008-07-08 17:06:30 -07001352 tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001353 tcm->tcm_parent = clid;
1354 tcm->tcm_handle = q->handle;
1355 tcm->tcm_info = atomic_read(&q->refcnt);
David S. Miller1b34ec42012-03-29 05:11:39 -04001356 if (nla_put_string(skb, TCA_KIND, q->ops->id))
1357 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001358 if (q->ops->dump && q->ops->dump(q, skb) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001359 goto nla_put_failure;
John Fastabend64015852014-09-28 11:53:57 -07001360 qlen = q->q.qlen;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001361
Eric Dumazeta2da5702011-01-20 03:48:19 +00001362 stab = rtnl_dereference(q->stab);
1363 if (stab && qdisc_dump_stab(skb, stab) < 0)
Jussi Kivilinna175f9c12008-07-20 00:08:47 -07001364 goto nla_put_failure;
1365
Jarek Poplawski102396a2008-08-29 14:21:52 -07001366 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
Eric Dumazetedb09eb2016-06-06 09:37:16 -07001367 NULL, &d, TCA_PAD) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001368 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001369
1370 if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001371 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001372
John Fastabendb0ab6f92014-09-28 11:54:24 -07001373 if (qdisc_is_percpu_stats(q)) {
John Fastabend22e0f8b2014-09-28 11:52:56 -07001374 cpu_bstats = q->cpu_bstats;
John Fastabendb0ab6f92014-09-28 11:54:24 -07001375 cpu_qstats = q->cpu_qstats;
1376 }
John Fastabend22e0f8b2014-09-28 11:52:56 -07001377
Eric Dumazetedb09eb2016-06-06 09:37:16 -07001378 if (gnet_stats_copy_basic(qdisc_root_sleeping_running(q),
1379 &d, cpu_bstats, &q->bstats) < 0 ||
Eric Dumazetd250a5f2009-10-02 10:32:18 +00001380 gnet_stats_copy_rate_est(&d, &q->bstats, &q->rate_est) < 0 ||
John Fastabendb0ab6f92014-09-28 11:54:24 -07001381 gnet_stats_copy_queue(&d, cpu_qstats, &q->qstats, qlen) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001382 goto nla_put_failure;
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09001383
Linus Torvalds1da177e2005-04-16 15:20:36 -07001384 if (gnet_stats_finish_copy(&d) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001385 goto nla_put_failure;
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09001386
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001387 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001388 return skb->len;
1389
David S. Miller02ef22c2012-06-26 21:50:05 -07001390out_nlmsg_trim:
Patrick McHardy1e904742008-01-22 22:11:17 -08001391nla_put_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001392 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001393 return -1;
1394}
1395
Eric Dumazet53b0f082010-05-22 20:37:44 +00001396static bool tc_qdisc_dump_ignore(struct Qdisc *q)
1397{
1398 return (q->flags & TCQ_F_BUILTIN) ? true : false;
1399}
1400
Tom Goff7316ae82010-03-19 15:40:13 +00001401static int qdisc_notify(struct net *net, struct sk_buff *oskb,
1402 struct nlmsghdr *n, u32 clid,
1403 struct Qdisc *old, struct Qdisc *new)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001404{
1405 struct sk_buff *skb;
Eric W. Biederman15e47302012-09-07 20:12:54 +00001406 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001407
1408 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1409 if (!skb)
1410 return -ENOBUFS;
1411
Eric Dumazet53b0f082010-05-22 20:37:44 +00001412 if (old && !tc_qdisc_dump_ignore(old)) {
Eric W. Biederman15e47302012-09-07 20:12:54 +00001413 if (tc_fill_qdisc(skb, old, clid, portid, n->nlmsg_seq,
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001414 0, RTM_DELQDISC) < 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001415 goto err_out;
1416 }
Eric Dumazet53b0f082010-05-22 20:37:44 +00001417 if (new && !tc_qdisc_dump_ignore(new)) {
Eric W. Biederman15e47302012-09-07 20:12:54 +00001418 if (tc_fill_qdisc(skb, new, clid, portid, n->nlmsg_seq,
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001419 old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001420 goto err_out;
1421 }
1422
1423 if (skb->len)
Eric W. Biederman15e47302012-09-07 20:12:54 +00001424 return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001425 n->nlmsg_flags & NLM_F_ECHO);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001426
1427err_out:
1428 kfree_skb(skb);
1429 return -EINVAL;
1430}
1431
David S. Miller30723672008-07-18 22:50:15 -07001432static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
1433 struct netlink_callback *cb,
1434 int *q_idx_p, int s_q_idx)
1435{
1436 int ret = 0, q_idx = *q_idx_p;
1437 struct Qdisc *q;
1438
1439 if (!root)
1440 return 0;
1441
1442 q = root;
1443 if (q_idx < s_q_idx) {
1444 q_idx++;
1445 } else {
1446 if (!tc_qdisc_dump_ignore(q) &&
Eric W. Biederman15e47302012-09-07 20:12:54 +00001447 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
David S. Miller30723672008-07-18 22:50:15 -07001448 cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
1449 goto done;
1450 q_idx++;
1451 }
1452 list_for_each_entry(q, &root->list, list) {
1453 if (q_idx < s_q_idx) {
1454 q_idx++;
1455 continue;
1456 }
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001457 if (!tc_qdisc_dump_ignore(q) &&
Eric W. Biederman15e47302012-09-07 20:12:54 +00001458 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
David S. Miller30723672008-07-18 22:50:15 -07001459 cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
1460 goto done;
1461 q_idx++;
1462 }
1463
1464out:
1465 *q_idx_p = q_idx;
1466 return ret;
1467done:
1468 ret = -1;
1469 goto out;
1470}
1471
Linus Torvalds1da177e2005-04-16 15:20:36 -07001472static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
1473{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001474 struct net *net = sock_net(skb->sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001475 int idx, q_idx;
1476 int s_idx, s_q_idx;
1477 struct net_device *dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001478
1479 s_idx = cb->args[0];
1480 s_q_idx = q_idx = cb->args[1];
stephen hemmingerf1e90162009-11-10 07:54:49 +00001481
Pavel Emelianov7562f872007-05-03 15:13:45 -07001482 idx = 0;
Eric Dumazet15dc36e2014-03-10 17:11:42 -07001483 ASSERT_RTNL();
1484 for_each_netdev(net, dev) {
David S. Miller30723672008-07-18 22:50:15 -07001485 struct netdev_queue *dev_queue;
1486
Linus Torvalds1da177e2005-04-16 15:20:36 -07001487 if (idx < s_idx)
Pavel Emelianov7562f872007-05-03 15:13:45 -07001488 goto cont;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001489 if (idx > s_idx)
1490 s_q_idx = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001491 q_idx = 0;
David S. Miller30723672008-07-18 22:50:15 -07001492
Patrick McHardyaf356af2009-09-04 06:41:18 +00001493 if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx) < 0)
David S. Miller30723672008-07-18 22:50:15 -07001494 goto done;
1495
Eric Dumazet24824a02010-10-02 06:11:55 +00001496 dev_queue = dev_ingress_queue(dev);
1497 if (dev_queue &&
1498 tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb,
1499 &q_idx, s_q_idx) < 0)
David S. Miller30723672008-07-18 22:50:15 -07001500 goto done;
1501
Pavel Emelianov7562f872007-05-03 15:13:45 -07001502cont:
1503 idx++;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001504 }
1505
1506done:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001507 cb->args[0] = idx;
1508 cb->args[1] = q_idx;
1509
1510 return skb->len;
1511}
1512
1513
1514
1515/************************************************
1516 * Traffic classes manipulation. *
1517 ************************************************/
1518
1519
1520
Thomas Graf661d2962013-03-21 07:45:29 +00001521static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001522{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001523 struct net *net = sock_net(skb->sk);
David S. Miller02ef22c2012-06-26 21:50:05 -07001524 struct tcmsg *tcm = nlmsg_data(n);
Patrick McHardy1e904742008-01-22 22:11:17 -08001525 struct nlattr *tca[TCA_MAX + 1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001526 struct net_device *dev;
1527 struct Qdisc *q = NULL;
Eric Dumazet20fea082007-11-14 01:44:41 -08001528 const struct Qdisc_class_ops *cops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001529 unsigned long cl = 0;
1530 unsigned long new_cl;
Hong zhi guode179c82013-03-25 17:36:33 +00001531 u32 portid;
1532 u32 clid;
1533 u32 qid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001534 int err;
1535
Stéphane Graber4e8bbb82014-04-30 11:25:43 -04001536 if ((n->nlmsg_type != RTM_GETTCLASS) &&
David S. Miller5f013c9b2014-05-12 13:19:14 -04001537 !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
Eric W. Biedermandfc47ef2012-11-16 03:03:00 +00001538 return -EPERM;
1539
Patrick McHardy1e904742008-01-22 22:11:17 -08001540 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
1541 if (err < 0)
1542 return err;
1543
Hong zhi guode179c82013-03-25 17:36:33 +00001544 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1545 if (!dev)
1546 return -ENODEV;
1547
Linus Torvalds1da177e2005-04-16 15:20:36 -07001548 /*
1549 parent == TC_H_UNSPEC - unspecified parent.
1550 parent == TC_H_ROOT - class is root, which has no parent.
1551 parent == X:0 - parent is root class.
1552 parent == X:Y - parent is a node in hierarchy.
1553 parent == 0:Y - parent is X:Y, where X:0 is qdisc.
1554
1555 handle == 0:0 - generate handle from kernel pool.
1556 handle == 0:Y - class is X:Y, where X:0 is qdisc.
1557 handle == X:Y - clear.
1558 handle == X:0 - root class.
1559 */
1560
1561 /* Step 1. Determine qdisc handle X:0 */
1562
Hong zhi guode179c82013-03-25 17:36:33 +00001563 portid = tcm->tcm_parent;
1564 clid = tcm->tcm_handle;
1565 qid = TC_H_MAJ(clid);
1566
Eric W. Biederman15e47302012-09-07 20:12:54 +00001567 if (portid != TC_H_ROOT) {
1568 u32 qid1 = TC_H_MAJ(portid);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001569
1570 if (qid && qid1) {
1571 /* If both majors are known, they must be identical. */
1572 if (qid != qid1)
1573 return -EINVAL;
1574 } else if (qid1) {
1575 qid = qid1;
1576 } else if (qid == 0)
Patrick McHardyaf356af2009-09-04 06:41:18 +00001577 qid = dev->qdisc->handle;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001578
1579 /* Now qid is genuine qdisc handle consistent
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001580 * both with parent and child.
1581 *
Eric W. Biederman15e47302012-09-07 20:12:54 +00001582 * TC_H_MAJ(portid) still may be unspecified, complete it now.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001583 */
Eric W. Biederman15e47302012-09-07 20:12:54 +00001584 if (portid)
1585 portid = TC_H_MAKE(qid, portid);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001586 } else {
1587 if (qid == 0)
Patrick McHardyaf356af2009-09-04 06:41:18 +00001588 qid = dev->qdisc->handle;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001589 }
1590
1591 /* OK. Locate qdisc */
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001592 q = qdisc_lookup(dev, qid);
1593 if (!q)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001594 return -ENOENT;
1595
1596 /* An check that it supports classes */
1597 cops = q->ops->cl_ops;
1598 if (cops == NULL)
1599 return -EINVAL;
1600
1601 /* Now try to get class */
1602 if (clid == 0) {
Eric W. Biederman15e47302012-09-07 20:12:54 +00001603 if (portid == TC_H_ROOT)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001604 clid = qid;
1605 } else
1606 clid = TC_H_MAKE(qid, clid);
1607
1608 if (clid)
1609 cl = cops->get(q, clid);
1610
1611 if (cl == 0) {
1612 err = -ENOENT;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001613 if (n->nlmsg_type != RTM_NEWTCLASS ||
1614 !(n->nlmsg_flags & NLM_F_CREATE))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001615 goto out;
1616 } else {
1617 switch (n->nlmsg_type) {
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09001618 case RTM_NEWTCLASS:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001619 err = -EEXIST;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001620 if (n->nlmsg_flags & NLM_F_EXCL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001621 goto out;
1622 break;
1623 case RTM_DELTCLASS:
Patrick McHardyde6d5cd2009-09-04 06:41:16 +00001624 err = -EOPNOTSUPP;
1625 if (cops->delete)
1626 err = cops->delete(q, cl);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001627 if (err == 0)
Tom Goff7316ae82010-03-19 15:40:13 +00001628 tclass_notify(net, skb, n, q, cl, RTM_DELTCLASS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001629 goto out;
1630 case RTM_GETTCLASS:
Tom Goff7316ae82010-03-19 15:40:13 +00001631 err = tclass_notify(net, skb, n, q, cl, RTM_NEWTCLASS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001632 goto out;
1633 default:
1634 err = -EINVAL;
1635 goto out;
1636 }
1637 }
1638
1639 new_cl = cl;
Patrick McHardyde6d5cd2009-09-04 06:41:16 +00001640 err = -EOPNOTSUPP;
1641 if (cops->change)
Eric W. Biederman15e47302012-09-07 20:12:54 +00001642 err = cops->change(q, clid, portid, tca, &new_cl);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001643 if (err == 0)
Tom Goff7316ae82010-03-19 15:40:13 +00001644 tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001645
1646out:
1647 if (cl)
1648 cops->put(q, cl);
1649
1650 return err;
1651}
1652
1653
1654static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
1655 unsigned long cl,
Eric W. Biederman15e47302012-09-07 20:12:54 +00001656 u32 portid, u32 seq, u16 flags, int event)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001657{
1658 struct tcmsg *tcm;
1659 struct nlmsghdr *nlh;
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001660 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001661 struct gnet_dump d;
Eric Dumazet20fea082007-11-14 01:44:41 -08001662 const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001663
Eric Dumazetfba373d2014-03-10 17:11:43 -07001664 cond_resched();
Eric W. Biederman15e47302012-09-07 20:12:54 +00001665 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
David S. Miller02ef22c2012-06-26 21:50:05 -07001666 if (!nlh)
1667 goto out_nlmsg_trim;
1668 tcm = nlmsg_data(nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001669 tcm->tcm_family = AF_UNSPEC;
Eric Dumazet16ebb5e2009-09-02 02:40:09 +00001670 tcm->tcm__pad1 = 0;
1671 tcm->tcm__pad2 = 0;
David S. Miller5ce2d482008-07-08 17:06:30 -07001672 tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001673 tcm->tcm_parent = q->handle;
1674 tcm->tcm_handle = q->handle;
1675 tcm->tcm_info = 0;
David S. Miller1b34ec42012-03-29 05:11:39 -04001676 if (nla_put_string(skb, TCA_KIND, q->ops->id))
1677 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001678 if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001679 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001680
Jarek Poplawski102396a2008-08-29 14:21:52 -07001681 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
Eric Dumazetedb09eb2016-06-06 09:37:16 -07001682 NULL, &d, TCA_PAD) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001683 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001684
1685 if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001686 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001687
1688 if (gnet_stats_finish_copy(&d) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001689 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001690
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001691 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001692 return skb->len;
1693
David S. Miller02ef22c2012-06-26 21:50:05 -07001694out_nlmsg_trim:
Patrick McHardy1e904742008-01-22 22:11:17 -08001695nla_put_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001696 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001697 return -1;
1698}
1699
Tom Goff7316ae82010-03-19 15:40:13 +00001700static int tclass_notify(struct net *net, struct sk_buff *oskb,
1701 struct nlmsghdr *n, struct Qdisc *q,
1702 unsigned long cl, int event)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001703{
1704 struct sk_buff *skb;
Eric W. Biederman15e47302012-09-07 20:12:54 +00001705 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001706
1707 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1708 if (!skb)
1709 return -ENOBUFS;
1710
Eric W. Biederman15e47302012-09-07 20:12:54 +00001711 if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, event) < 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001712 kfree_skb(skb);
1713 return -EINVAL;
1714 }
1715
Eric W. Biederman15e47302012-09-07 20:12:54 +00001716 return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001717 n->nlmsg_flags & NLM_F_ECHO);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001718}
1719
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001720struct qdisc_dump_args {
1721 struct qdisc_walker w;
1722 struct sk_buff *skb;
1723 struct netlink_callback *cb;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001724};
1725
1726static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walker *arg)
1727{
1728 struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg;
1729
Eric W. Biederman15e47302012-09-07 20:12:54 +00001730 return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).portid,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001731 a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTCLASS);
1732}
1733
David S. Miller30723672008-07-18 22:50:15 -07001734static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb,
1735 struct tcmsg *tcm, struct netlink_callback *cb,
1736 int *t_p, int s_t)
1737{
1738 struct qdisc_dump_args arg;
1739
1740 if (tc_qdisc_dump_ignore(q) ||
1741 *t_p < s_t || !q->ops->cl_ops ||
1742 (tcm->tcm_parent &&
1743 TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
1744 (*t_p)++;
1745 return 0;
1746 }
1747 if (*t_p > s_t)
1748 memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
1749 arg.w.fn = qdisc_class_dump;
1750 arg.skb = skb;
1751 arg.cb = cb;
1752 arg.w.stop = 0;
1753 arg.w.skip = cb->args[1];
1754 arg.w.count = 0;
1755 q->ops->cl_ops->walk(q, &arg.w);
1756 cb->args[1] = arg.w.count;
1757 if (arg.w.stop)
1758 return -1;
1759 (*t_p)++;
1760 return 0;
1761}
1762
1763static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
1764 struct tcmsg *tcm, struct netlink_callback *cb,
1765 int *t_p, int s_t)
1766{
1767 struct Qdisc *q;
1768
1769 if (!root)
1770 return 0;
1771
1772 if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0)
1773 return -1;
1774
1775 list_for_each_entry(q, &root->list, list) {
1776 if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
1777 return -1;
1778 }
1779
1780 return 0;
1781}
1782
Linus Torvalds1da177e2005-04-16 15:20:36 -07001783static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
1784{
David S. Miller02ef22c2012-06-26 21:50:05 -07001785 struct tcmsg *tcm = nlmsg_data(cb->nlh);
David S. Miller30723672008-07-18 22:50:15 -07001786 struct net *net = sock_net(skb->sk);
1787 struct netdev_queue *dev_queue;
1788 struct net_device *dev;
1789 int t, s_t;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001790
Hong zhi guo573ce262013-03-27 06:47:04 +00001791 if (nlmsg_len(cb->nlh) < sizeof(*tcm))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001792 return 0;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001793 dev = dev_get_by_index(net, tcm->tcm_ifindex);
1794 if (!dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001795 return 0;
1796
1797 s_t = cb->args[0];
1798 t = 0;
1799
Patrick McHardyaf356af2009-09-04 06:41:18 +00001800 if (tc_dump_tclass_root(dev->qdisc, skb, tcm, cb, &t, s_t) < 0)
David S. Miller30723672008-07-18 22:50:15 -07001801 goto done;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001802
Eric Dumazet24824a02010-10-02 06:11:55 +00001803 dev_queue = dev_ingress_queue(dev);
1804 if (dev_queue &&
1805 tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb,
1806 &t, s_t) < 0)
David S. Miller30723672008-07-18 22:50:15 -07001807 goto done;
1808
1809done:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001810 cb->args[0] = t;
1811
1812 dev_put(dev);
1813 return skb->len;
1814}
1815
1816/* Main classifier routine: scans classifier chain attached
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001817 * to this qdisc, (optionally) tests for protocol and asks
1818 * specific classifiers.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001819 */
Daniel Borkmann3b3ae882015-08-26 23:00:06 +02001820int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp,
1821 struct tcf_result *res, bool compat_mode)
Patrick McHardy73ca4912007-07-15 00:02:31 -07001822{
Jiri Pirkod8b96052015-01-13 17:13:43 +01001823 __be16 protocol = tc_skb_protocol(skb);
Daniel Borkmann3b3ae882015-08-26 23:00:06 +02001824#ifdef CONFIG_NET_CLS_ACT
1825 const struct tcf_proto *old_tp = tp;
1826 int limit = 0;
Patrick McHardy73ca4912007-07-15 00:02:31 -07001827
Daniel Borkmann3b3ae882015-08-26 23:00:06 +02001828reclassify:
1829#endif
John Fastabend25d8c0d2014-09-12 20:05:27 -07001830 for (; tp; tp = rcu_dereference_bh(tp->next)) {
Daniel Borkmann3b3ae882015-08-26 23:00:06 +02001831 int err;
1832
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001833 if (tp->protocol != protocol &&
1834 tp->protocol != htons(ETH_P_ALL))
1835 continue;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001836
Daniel Borkmann3b3ae882015-08-26 23:00:06 +02001837 err = tp->classify(skb, tp, res);
1838#ifdef CONFIG_NET_CLS_ACT
Daniel Borkmannc1b3b192015-08-28 18:46:39 +02001839 if (unlikely(err == TC_ACT_RECLASSIFY && !compat_mode))
Daniel Borkmann3b3ae882015-08-26 23:00:06 +02001840 goto reset;
1841#endif
Florian Westphale578d9c2015-05-11 19:50:41 +02001842 if (err >= 0)
Patrick McHardy73ca4912007-07-15 00:02:31 -07001843 return err;
Patrick McHardy73ca4912007-07-15 00:02:31 -07001844 }
Daniel Borkmann3b3ae882015-08-26 23:00:06 +02001845
Jamal Hadi Salim7e6e18f2016-02-18 08:04:43 -05001846 return TC_ACT_UNSPEC; /* signal: continue lookup */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001847#ifdef CONFIG_NET_CLS_ACT
Daniel Borkmann3b3ae882015-08-26 23:00:06 +02001848reset:
1849 if (unlikely(limit++ >= MAX_REC_LOOP)) {
Daniel Borkmannc1b3b192015-08-28 18:46:39 +02001850 net_notice_ratelimited("%s: reclassify loop, rule prio %u, protocol %02x\n",
1851 tp->q->ops->id, tp->prio & 0xffff,
1852 ntohs(tp->protocol));
Daniel Borkmann3b3ae882015-08-26 23:00:06 +02001853 return TC_ACT_SHOT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001854 }
Daniel Borkmann3b3ae882015-08-26 23:00:06 +02001855
1856 tp = old_tp;
Jamal Hadi Salim619fe322016-02-18 07:38:04 -05001857 protocol = tc_skb_protocol(skb);
Daniel Borkmann3b3ae882015-08-26 23:00:06 +02001858 goto reclassify;
Patrick McHardy73ca4912007-07-15 00:02:31 -07001859#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001860}
Patrick McHardy73ca4912007-07-15 00:02:31 -07001861EXPORT_SYMBOL(tc_classify);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001862
Cong Wang1e052be2015-03-06 11:47:59 -08001863bool tcf_destroy(struct tcf_proto *tp, bool force)
Patrick McHardya48b5a62007-03-23 11:29:43 -07001864{
Cong Wang1e052be2015-03-06 11:47:59 -08001865 if (tp->ops->destroy(tp, force)) {
1866 module_put(tp->ops->owner);
1867 kfree_rcu(tp, rcu);
1868 return true;
1869 }
1870
1871 return false;
Patrick McHardya48b5a62007-03-23 11:29:43 -07001872}
1873
John Fastabend25d8c0d2014-09-12 20:05:27 -07001874void tcf_destroy_chain(struct tcf_proto __rcu **fl)
Patrick McHardya48b5a62007-03-23 11:29:43 -07001875{
1876 struct tcf_proto *tp;
1877
John Fastabend25d8c0d2014-09-12 20:05:27 -07001878 while ((tp = rtnl_dereference(*fl)) != NULL) {
1879 RCU_INIT_POINTER(*fl, tp->next);
Cong Wang1e052be2015-03-06 11:47:59 -08001880 tcf_destroy(tp, true);
Patrick McHardya48b5a62007-03-23 11:29:43 -07001881 }
1882}
1883EXPORT_SYMBOL(tcf_destroy_chain);
1884
Linus Torvalds1da177e2005-04-16 15:20:36 -07001885#ifdef CONFIG_PROC_FS
1886static int psched_show(struct seq_file *seq, void *v)
1887{
1888 seq_printf(seq, "%08x %08x %08x %08x\n",
Jarek Poplawskica44d6e2009-06-15 02:31:47 -07001889 (u32)NSEC_PER_USEC, (u32)PSCHED_TICKS2NS(1),
Patrick McHardy514bca32007-03-16 12:34:52 -07001890 1000000,
Thomas Gleixner1e317682015-04-14 21:08:28 +00001891 (u32)NSEC_PER_SEC / hrtimer_resolution);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001892
1893 return 0;
1894}
1895
1896static int psched_open(struct inode *inode, struct file *file)
1897{
Tom Goff7e5ab152010-03-30 19:44:56 -07001898 return single_open(file, psched_show, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001899}
1900
Arjan van de Venda7071d2007-02-12 00:55:36 -08001901static const struct file_operations psched_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001902 .owner = THIS_MODULE,
1903 .open = psched_open,
1904 .read = seq_read,
1905 .llseek = seq_lseek,
1906 .release = single_release,
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09001907};
Tom Goff7316ae82010-03-19 15:40:13 +00001908
1909static int __net_init psched_net_init(struct net *net)
1910{
1911 struct proc_dir_entry *e;
1912
Gao fengd4beaa62013-02-18 01:34:54 +00001913 e = proc_create("psched", 0, net->proc_net, &psched_fops);
Tom Goff7316ae82010-03-19 15:40:13 +00001914 if (e == NULL)
1915 return -ENOMEM;
1916
1917 return 0;
1918}
1919
1920static void __net_exit psched_net_exit(struct net *net)
1921{
Gao fengece31ff2013-02-18 01:34:56 +00001922 remove_proc_entry("psched", net->proc_net);
Tom Goff7316ae82010-03-19 15:40:13 +00001923}
1924#else
1925static int __net_init psched_net_init(struct net *net)
1926{
1927 return 0;
1928}
1929
1930static void __net_exit psched_net_exit(struct net *net)
1931{
1932}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001933#endif
1934
Tom Goff7316ae82010-03-19 15:40:13 +00001935static struct pernet_operations psched_net_ops = {
1936 .init = psched_net_init,
1937 .exit = psched_net_exit,
1938};
1939
Linus Torvalds1da177e2005-04-16 15:20:36 -07001940static int __init pktsched_init(void)
1941{
Tom Goff7316ae82010-03-19 15:40:13 +00001942 int err;
1943
1944 err = register_pernet_subsys(&psched_net_ops);
1945 if (err) {
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001946 pr_err("pktsched_init: "
Tom Goff7316ae82010-03-19 15:40:13 +00001947 "cannot initialize per netns operations\n");
1948 return err;
1949 }
1950
stephen hemminger6da7c8f2013-08-27 16:19:08 -07001951 register_qdisc(&pfifo_fast_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001952 register_qdisc(&pfifo_qdisc_ops);
1953 register_qdisc(&bfifo_qdisc_ops);
Hagen Paul Pfeifer57dbb2d2010-01-24 12:30:59 +00001954 register_qdisc(&pfifo_head_drop_qdisc_ops);
David S. Miller6ec1c692009-09-06 01:58:51 -07001955 register_qdisc(&mq_qdisc_ops);
Phil Sutterd66d6c32015-08-27 21:21:38 +02001956 register_qdisc(&noqueue_qdisc_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001957
Greg Rosec7ac8672011-06-10 01:27:09 +00001958 rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL, NULL);
1959 rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL, NULL);
1960 rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc, NULL);
1961 rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL, NULL);
1962 rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL, NULL);
1963 rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass, NULL);
Thomas Grafbe577dd2007-03-22 11:55:50 -07001964
Linus Torvalds1da177e2005-04-16 15:20:36 -07001965 return 0;
1966}
1967
1968subsys_initcall(pktsched_init);