blob: fe35c1f338c2b25d7c2a4e4ae19123bf4023f6b5 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * net/sched/sch_api.c Packet scheduler API.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 *
11 * Fixes:
12 *
13 * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired.
14 * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
15 * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support
16 */
17
Linus Torvalds1da177e2005-04-16 15:20:36 -070018#include <linux/module.h>
19#include <linux/types.h>
20#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070021#include <linux/string.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070022#include <linux/errno.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070023#include <linux/skbuff.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070024#include <linux/init.h>
25#include <linux/proc_fs.h>
26#include <linux/seq_file.h>
27#include <linux/kmod.h>
28#include <linux/list.h>
Patrick McHardy41794772007-03-16 01:19:15 -070029#include <linux/hrtimer.h>
Jarek Poplawski25bfcd52008-08-18 20:53:34 -070030#include <linux/lockdep.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090031#include <linux/slab.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070032
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020033#include <net/net_namespace.h>
Denis V. Lunevb8542722007-12-01 00:21:31 +110034#include <net/sock.h>
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -070035#include <net/netlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070036#include <net/pkt_sched.h>
37
Tom Goff7316ae82010-03-19 15:40:13 +000038static int qdisc_notify(struct net *net, struct sk_buff *oskb,
39 struct nlmsghdr *n, u32 clid,
Linus Torvalds1da177e2005-04-16 15:20:36 -070040 struct Qdisc *old, struct Qdisc *new);
Tom Goff7316ae82010-03-19 15:40:13 +000041static int tclass_notify(struct net *net, struct sk_buff *oskb,
42 struct nlmsghdr *n, struct Qdisc *q,
43 unsigned long cl, int event);
Linus Torvalds1da177e2005-04-16 15:20:36 -070044
45/*
46
47 Short review.
48 -------------
49
50 This file consists of two interrelated parts:
51
52 1. queueing disciplines manager frontend.
53 2. traffic classes manager frontend.
54
55 Generally, queueing discipline ("qdisc") is a black box,
56 which is able to enqueue packets and to dequeue them (when
57 device is ready to send something) in order and at times
58 determined by algorithm hidden in it.
59
60 qdisc's are divided to two categories:
61 - "queues", which have no internal structure visible from outside.
62 - "schedulers", which split all the packets to "traffic classes",
63 using "packet classifiers" (look at cls_api.c)
64
65 In turn, classes may have child qdiscs (as rule, queues)
66 attached to them etc. etc. etc.
67
68 The goal of the routines in this file is to translate
69 information supplied by user in the form of handles
70 to more intelligible for kernel form, to make some sanity
71 checks and part of work, which is common to all qdiscs
72 and to provide rtnetlink notifications.
73
74 All real intelligent work is done inside qdisc modules.
75
76
77
78 Every discipline has two major routines: enqueue and dequeue.
79
80 ---dequeue
81
82 dequeue usually returns a skb to send. It is allowed to return NULL,
83 but it does not mean that queue is empty, it just means that
84 discipline does not want to send anything this time.
85 Queue is really empty if q->q.qlen == 0.
86 For complicated disciplines with multiple queues q->q is not
87 real packet queue, but however q->q.qlen must be valid.
88
89 ---enqueue
90
91 enqueue returns 0, if packet was enqueued successfully.
92 If packet (this one or another one) was dropped, it returns
93 not zero error code.
94 NET_XMIT_DROP - this packet dropped
95 Expected action: do not backoff, but wait until queue will clear.
96 NET_XMIT_CN - probably this packet enqueued, but another one dropped.
97 Expected action: backoff or ignore
98 NET_XMIT_POLICED - dropped by police.
99 Expected action: backoff or error to real-time apps.
100
101 Auxiliary routines:
102
Jarek Poplawski99c0db22008-10-31 00:45:27 -0700103 ---peek
104
105 like dequeue but without removing a packet from the queue
106
Linus Torvalds1da177e2005-04-16 15:20:36 -0700107 ---reset
108
109 returns qdisc to initial state: purge all buffers, clear all
110 timers, counters (except for statistics) etc.
111
112 ---init
113
114 initializes newly created qdisc.
115
116 ---destroy
117
118 destroys resources allocated by init and during lifetime of qdisc.
119
120 ---change
121
122 changes qdisc parameters.
123 */
124
125/* Protects list of registered TC modules. It is pure SMP lock. */
126static DEFINE_RWLOCK(qdisc_mod_lock);
127
128
129/************************************************
130 * Queueing disciplines manipulation. *
131 ************************************************/
132
133
134/* The list of all installed queueing disciplines. */
135
136static struct Qdisc_ops *qdisc_base;
137
138/* Register/uregister queueing discipline */
139
140int register_qdisc(struct Qdisc_ops *qops)
141{
142 struct Qdisc_ops *q, **qp;
143 int rc = -EEXIST;
144
145 write_lock(&qdisc_mod_lock);
146 for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
147 if (!strcmp(qops->id, q->id))
148 goto out;
149
150 if (qops->enqueue == NULL)
151 qops->enqueue = noop_qdisc_ops.enqueue;
Jarek Poplawski99c0db22008-10-31 00:45:27 -0700152 if (qops->peek == NULL) {
153 if (qops->dequeue == NULL) {
154 qops->peek = noop_qdisc_ops.peek;
155 } else {
156 rc = -EINVAL;
157 goto out;
158 }
159 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700160 if (qops->dequeue == NULL)
161 qops->dequeue = noop_qdisc_ops.dequeue;
162
163 qops->next = NULL;
164 *qp = qops;
165 rc = 0;
166out:
167 write_unlock(&qdisc_mod_lock);
168 return rc;
169}
Patrick McHardy62e3ba12008-01-22 22:10:23 -0800170EXPORT_SYMBOL(register_qdisc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700171
172int unregister_qdisc(struct Qdisc_ops *qops)
173{
174 struct Qdisc_ops *q, **qp;
175 int err = -ENOENT;
176
177 write_lock(&qdisc_mod_lock);
178 for (qp = &qdisc_base; (q=*qp)!=NULL; qp = &q->next)
179 if (q == qops)
180 break;
181 if (q) {
182 *qp = q->next;
183 q->next = NULL;
184 err = 0;
185 }
186 write_unlock(&qdisc_mod_lock);
187 return err;
188}
Patrick McHardy62e3ba12008-01-22 22:10:23 -0800189EXPORT_SYMBOL(unregister_qdisc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700190
191/* We know handle. Find qdisc among all qdisc's attached to device
192 (root qdisc, all its children, children of children etc.)
193 */
194
Hannes Eder6113b742008-11-28 03:06:46 -0800195static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
David S. Miller8123b422008-08-08 23:23:39 -0700196{
197 struct Qdisc *q;
198
199 if (!(root->flags & TCQ_F_BUILTIN) &&
200 root->handle == handle)
201 return root;
202
203 list_for_each_entry(q, &root->list, list) {
204 if (q->handle == handle)
205 return q;
206 }
207 return NULL;
208}
209
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700210static void qdisc_list_add(struct Qdisc *q)
211{
Jarek Poplawskif6486d42008-11-25 13:56:06 -0800212 if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS))
Patrick McHardyaf356af2009-09-04 06:41:18 +0000213 list_add_tail(&q->list, &qdisc_dev(q)->qdisc->list);
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700214}
215
216void qdisc_list_del(struct Qdisc *q)
217{
Jarek Poplawskif6486d42008-11-25 13:56:06 -0800218 if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS))
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700219 list_del(&q->list);
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700220}
221EXPORT_SYMBOL(qdisc_list_del);
222
David S. Milleread81cc2008-07-17 00:50:32 -0700223struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
Patrick McHardy43effa12006-11-29 17:35:48 -0800224{
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700225 struct Qdisc *q;
226
Patrick McHardyaf356af2009-09-04 06:41:18 +0000227 q = qdisc_match_from_root(dev->qdisc, handle);
228 if (q)
229 goto out;
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700230
231 q = qdisc_match_from_root(dev->rx_queue.qdisc_sleeping, handle);
Jarek Poplawskif6486d42008-11-25 13:56:06 -0800232out:
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700233 return q;
Patrick McHardy43effa12006-11-29 17:35:48 -0800234}
235
Linus Torvalds1da177e2005-04-16 15:20:36 -0700236static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
237{
238 unsigned long cl;
239 struct Qdisc *leaf;
Eric Dumazet20fea082007-11-14 01:44:41 -0800240 const struct Qdisc_class_ops *cops = p->ops->cl_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700241
242 if (cops == NULL)
243 return NULL;
244 cl = cops->get(p, classid);
245
246 if (cl == 0)
247 return NULL;
248 leaf = cops->leaf(p, cl);
249 cops->put(p, cl);
250 return leaf;
251}
252
253/* Find queueing discipline by name */
254
Patrick McHardy1e904742008-01-22 22:11:17 -0800255static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700256{
257 struct Qdisc_ops *q = NULL;
258
259 if (kind) {
260 read_lock(&qdisc_mod_lock);
261 for (q = qdisc_base; q; q = q->next) {
Patrick McHardy1e904742008-01-22 22:11:17 -0800262 if (nla_strcmp(kind, q->id) == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700263 if (!try_module_get(q->owner))
264 q = NULL;
265 break;
266 }
267 }
268 read_unlock(&qdisc_mod_lock);
269 }
270 return q;
271}
272
273static struct qdisc_rate_table *qdisc_rtab_list;
274
Patrick McHardy1e904742008-01-22 22:11:17 -0800275struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct nlattr *tab)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700276{
277 struct qdisc_rate_table *rtab;
278
279 for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) {
280 if (memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) == 0) {
281 rtab->refcnt++;
282 return rtab;
283 }
284 }
285
Patrick McHardy5feb5e12008-01-23 20:35:19 -0800286 if (tab == NULL || r->rate == 0 || r->cell_log == 0 ||
287 nla_len(tab) != TC_RTAB_SIZE)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700288 return NULL;
289
290 rtab = kmalloc(sizeof(*rtab), GFP_KERNEL);
291 if (rtab) {
292 rtab->rate = *r;
293 rtab->refcnt = 1;
Patrick McHardy1e904742008-01-22 22:11:17 -0800294 memcpy(rtab->data, nla_data(tab), 1024);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700295 rtab->next = qdisc_rtab_list;
296 qdisc_rtab_list = rtab;
297 }
298 return rtab;
299}
Patrick McHardy62e3ba12008-01-22 22:10:23 -0800300EXPORT_SYMBOL(qdisc_get_rtab);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700301
302void qdisc_put_rtab(struct qdisc_rate_table *tab)
303{
304 struct qdisc_rate_table *rtab, **rtabp;
305
306 if (!tab || --tab->refcnt)
307 return;
308
309 for (rtabp = &qdisc_rtab_list; (rtab=*rtabp) != NULL; rtabp = &rtab->next) {
310 if (rtab == tab) {
311 *rtabp = rtab->next;
312 kfree(rtab);
313 return;
314 }
315 }
316}
Patrick McHardy62e3ba12008-01-22 22:10:23 -0800317EXPORT_SYMBOL(qdisc_put_rtab);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700318
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700319static LIST_HEAD(qdisc_stab_list);
320static DEFINE_SPINLOCK(qdisc_stab_lock);
321
322static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = {
323 [TCA_STAB_BASE] = { .len = sizeof(struct tc_sizespec) },
324 [TCA_STAB_DATA] = { .type = NLA_BINARY },
325};
326
327static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt)
328{
329 struct nlattr *tb[TCA_STAB_MAX + 1];
330 struct qdisc_size_table *stab;
331 struct tc_sizespec *s;
332 unsigned int tsize = 0;
333 u16 *tab = NULL;
334 int err;
335
336 err = nla_parse_nested(tb, TCA_STAB_MAX, opt, stab_policy);
337 if (err < 0)
338 return ERR_PTR(err);
339 if (!tb[TCA_STAB_BASE])
340 return ERR_PTR(-EINVAL);
341
342 s = nla_data(tb[TCA_STAB_BASE]);
343
344 if (s->tsize > 0) {
345 if (!tb[TCA_STAB_DATA])
346 return ERR_PTR(-EINVAL);
347 tab = nla_data(tb[TCA_STAB_DATA]);
348 tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16);
349 }
350
351 if (!s || tsize != s->tsize || (!tab && tsize > 0))
352 return ERR_PTR(-EINVAL);
353
David S. Millerf3b96052008-08-18 22:33:05 -0700354 spin_lock(&qdisc_stab_lock);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700355
356 list_for_each_entry(stab, &qdisc_stab_list, list) {
357 if (memcmp(&stab->szopts, s, sizeof(*s)))
358 continue;
359 if (tsize > 0 && memcmp(stab->data, tab, tsize * sizeof(u16)))
360 continue;
361 stab->refcnt++;
David S. Millerf3b96052008-08-18 22:33:05 -0700362 spin_unlock(&qdisc_stab_lock);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700363 return stab;
364 }
365
David S. Millerf3b96052008-08-18 22:33:05 -0700366 spin_unlock(&qdisc_stab_lock);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700367
368 stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL);
369 if (!stab)
370 return ERR_PTR(-ENOMEM);
371
372 stab->refcnt = 1;
373 stab->szopts = *s;
374 if (tsize > 0)
375 memcpy(stab->data, tab, tsize * sizeof(u16));
376
David S. Millerf3b96052008-08-18 22:33:05 -0700377 spin_lock(&qdisc_stab_lock);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700378 list_add_tail(&stab->list, &qdisc_stab_list);
David S. Millerf3b96052008-08-18 22:33:05 -0700379 spin_unlock(&qdisc_stab_lock);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700380
381 return stab;
382}
383
384void qdisc_put_stab(struct qdisc_size_table *tab)
385{
386 if (!tab)
387 return;
388
David S. Millerf3b96052008-08-18 22:33:05 -0700389 spin_lock(&qdisc_stab_lock);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700390
391 if (--tab->refcnt == 0) {
392 list_del(&tab->list);
393 kfree(tab);
394 }
395
David S. Millerf3b96052008-08-18 22:33:05 -0700396 spin_unlock(&qdisc_stab_lock);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700397}
398EXPORT_SYMBOL(qdisc_put_stab);
399
400static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab)
401{
402 struct nlattr *nest;
403
404 nest = nla_nest_start(skb, TCA_STAB);
Patrick McHardy3aa46142008-11-20 04:07:14 -0800405 if (nest == NULL)
406 goto nla_put_failure;
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700407 NLA_PUT(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts);
408 nla_nest_end(skb, nest);
409
410 return skb->len;
411
412nla_put_failure:
413 return -1;
414}
415
416void qdisc_calculate_pkt_len(struct sk_buff *skb, struct qdisc_size_table *stab)
417{
418 int pkt_len, slot;
419
420 pkt_len = skb->len + stab->szopts.overhead;
421 if (unlikely(!stab->szopts.tsize))
422 goto out;
423
424 slot = pkt_len + stab->szopts.cell_align;
425 if (unlikely(slot < 0))
426 slot = 0;
427
428 slot >>= stab->szopts.cell_log;
429 if (likely(slot < stab->szopts.tsize))
430 pkt_len = stab->data[slot];
431 else
432 pkt_len = stab->data[stab->szopts.tsize - 1] *
433 (slot / stab->szopts.tsize) +
434 stab->data[slot % stab->szopts.tsize];
435
436 pkt_len <<= stab->szopts.size_log;
437out:
438 if (unlikely(pkt_len < 1))
439 pkt_len = 1;
440 qdisc_skb_cb(skb)->pkt_len = pkt_len;
441}
442EXPORT_SYMBOL(qdisc_calculate_pkt_len);
443
Jarek Poplawskib00355d2009-02-01 01:12:42 -0800444void qdisc_warn_nonwc(char *txt, struct Qdisc *qdisc)
445{
446 if (!(qdisc->flags & TCQ_F_WARN_NONWC)) {
447 printk(KERN_WARNING
448 "%s: %s qdisc %X: is non-work-conserving?\n",
449 txt, qdisc->ops->id, qdisc->handle >> 16);
450 qdisc->flags |= TCQ_F_WARN_NONWC;
451 }
452}
453EXPORT_SYMBOL(qdisc_warn_nonwc);
454
Patrick McHardy41794772007-03-16 01:19:15 -0700455static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
456{
457 struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
David S. Miller2fbd3da2009-09-01 17:59:25 -0700458 timer);
Patrick McHardy41794772007-03-16 01:19:15 -0700459
460 wd->qdisc->flags &= ~TCQ_F_THROTTLED;
David S. Miller8608db02008-08-18 20:51:18 -0700461 __netif_schedule(qdisc_root(wd->qdisc));
Stephen Hemminger19365022007-03-22 12:18:35 -0700462
Patrick McHardy41794772007-03-16 01:19:15 -0700463 return HRTIMER_NORESTART;
464}
465
466void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
467{
David S. Miller2fbd3da2009-09-01 17:59:25 -0700468 hrtimer_init(&wd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
469 wd->timer.function = qdisc_watchdog;
Patrick McHardy41794772007-03-16 01:19:15 -0700470 wd->qdisc = qdisc;
471}
472EXPORT_SYMBOL(qdisc_watchdog_init);
473
474void qdisc_watchdog_schedule(struct qdisc_watchdog *wd, psched_time_t expires)
475{
476 ktime_t time;
477
Jarek Poplawski2540e052008-08-21 05:11:14 -0700478 if (test_bit(__QDISC_STATE_DEACTIVATED,
479 &qdisc_root_sleeping(wd->qdisc)->state))
480 return;
481
Patrick McHardy41794772007-03-16 01:19:15 -0700482 wd->qdisc->flags |= TCQ_F_THROTTLED;
483 time = ktime_set(0, 0);
Jarek Poplawskica44d6e2009-06-15 02:31:47 -0700484 time = ktime_add_ns(time, PSCHED_TICKS2NS(expires));
David S. Miller2fbd3da2009-09-01 17:59:25 -0700485 hrtimer_start(&wd->timer, time, HRTIMER_MODE_ABS);
Patrick McHardy41794772007-03-16 01:19:15 -0700486}
487EXPORT_SYMBOL(qdisc_watchdog_schedule);
488
489void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
490{
David S. Miller2fbd3da2009-09-01 17:59:25 -0700491 hrtimer_cancel(&wd->timer);
Patrick McHardy41794772007-03-16 01:19:15 -0700492 wd->qdisc->flags &= ~TCQ_F_THROTTLED;
493}
494EXPORT_SYMBOL(qdisc_watchdog_cancel);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700495
Adrian Bunka94f7792008-07-22 14:20:11 -0700496static struct hlist_head *qdisc_class_hash_alloc(unsigned int n)
Patrick McHardy6fe1c7a2008-07-05 23:21:31 -0700497{
498 unsigned int size = n * sizeof(struct hlist_head), i;
499 struct hlist_head *h;
500
501 if (size <= PAGE_SIZE)
502 h = kmalloc(size, GFP_KERNEL);
503 else
504 h = (struct hlist_head *)
505 __get_free_pages(GFP_KERNEL, get_order(size));
506
507 if (h != NULL) {
508 for (i = 0; i < n; i++)
509 INIT_HLIST_HEAD(&h[i]);
510 }
511 return h;
512}
513
514static void qdisc_class_hash_free(struct hlist_head *h, unsigned int n)
515{
516 unsigned int size = n * sizeof(struct hlist_head);
517
518 if (size <= PAGE_SIZE)
519 kfree(h);
520 else
521 free_pages((unsigned long)h, get_order(size));
522}
523
524void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash)
525{
526 struct Qdisc_class_common *cl;
527 struct hlist_node *n, *next;
528 struct hlist_head *nhash, *ohash;
529 unsigned int nsize, nmask, osize;
530 unsigned int i, h;
531
532 /* Rehash when load factor exceeds 0.75 */
533 if (clhash->hashelems * 4 <= clhash->hashsize * 3)
534 return;
535 nsize = clhash->hashsize * 2;
536 nmask = nsize - 1;
537 nhash = qdisc_class_hash_alloc(nsize);
538 if (nhash == NULL)
539 return;
540
541 ohash = clhash->hash;
542 osize = clhash->hashsize;
543
544 sch_tree_lock(sch);
545 for (i = 0; i < osize; i++) {
546 hlist_for_each_entry_safe(cl, n, next, &ohash[i], hnode) {
547 h = qdisc_class_hash(cl->classid, nmask);
548 hlist_add_head(&cl->hnode, &nhash[h]);
549 }
550 }
551 clhash->hash = nhash;
552 clhash->hashsize = nsize;
553 clhash->hashmask = nmask;
554 sch_tree_unlock(sch);
555
556 qdisc_class_hash_free(ohash, osize);
557}
558EXPORT_SYMBOL(qdisc_class_hash_grow);
559
560int qdisc_class_hash_init(struct Qdisc_class_hash *clhash)
561{
562 unsigned int size = 4;
563
564 clhash->hash = qdisc_class_hash_alloc(size);
565 if (clhash->hash == NULL)
566 return -ENOMEM;
567 clhash->hashsize = size;
568 clhash->hashmask = size - 1;
569 clhash->hashelems = 0;
570 return 0;
571}
572EXPORT_SYMBOL(qdisc_class_hash_init);
573
574void qdisc_class_hash_destroy(struct Qdisc_class_hash *clhash)
575{
576 qdisc_class_hash_free(clhash->hash, clhash->hashsize);
577}
578EXPORT_SYMBOL(qdisc_class_hash_destroy);
579
580void qdisc_class_hash_insert(struct Qdisc_class_hash *clhash,
581 struct Qdisc_class_common *cl)
582{
583 unsigned int h;
584
585 INIT_HLIST_NODE(&cl->hnode);
586 h = qdisc_class_hash(cl->classid, clhash->hashmask);
587 hlist_add_head(&cl->hnode, &clhash->hash[h]);
588 clhash->hashelems++;
589}
590EXPORT_SYMBOL(qdisc_class_hash_insert);
591
592void qdisc_class_hash_remove(struct Qdisc_class_hash *clhash,
593 struct Qdisc_class_common *cl)
594{
595 hlist_del(&cl->hnode);
596 clhash->hashelems--;
597}
598EXPORT_SYMBOL(qdisc_class_hash_remove);
599
Linus Torvalds1da177e2005-04-16 15:20:36 -0700600/* Allocate an unique handle from space managed by kernel */
601
602static u32 qdisc_alloc_handle(struct net_device *dev)
603{
604 int i = 0x10000;
605 static u32 autohandle = TC_H_MAKE(0x80000000U, 0);
606
607 do {
608 autohandle += TC_H_MAKE(0x10000U, 0);
609 if (autohandle == TC_H_MAKE(TC_H_ROOT, 0))
610 autohandle = TC_H_MAKE(0x80000000U, 0);
611 } while (qdisc_lookup(dev, autohandle) && --i > 0);
612
613 return i>0 ? autohandle : 0;
614}
615
Patrick McHardy43effa12006-11-29 17:35:48 -0800616void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
617{
Eric Dumazet20fea082007-11-14 01:44:41 -0800618 const struct Qdisc_class_ops *cops;
Patrick McHardy43effa12006-11-29 17:35:48 -0800619 unsigned long cl;
620 u32 parentid;
621
622 if (n == 0)
623 return;
624 while ((parentid = sch->parent)) {
Jarek Poplawski066a3b52008-04-14 15:10:42 -0700625 if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS))
626 return;
627
David S. Miller5ce2d482008-07-08 17:06:30 -0700628 sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
Patrick McHardyffc8fef2007-07-30 17:11:50 -0700629 if (sch == NULL) {
630 WARN_ON(parentid != TC_H_ROOT);
631 return;
632 }
Patrick McHardy43effa12006-11-29 17:35:48 -0800633 cops = sch->ops->cl_ops;
634 if (cops->qlen_notify) {
635 cl = cops->get(sch, parentid);
636 cops->qlen_notify(sch, cl);
637 cops->put(sch, cl);
638 }
639 sch->q.qlen -= n;
640 }
641}
642EXPORT_SYMBOL(qdisc_tree_decrease_qlen);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700643
Tom Goff7316ae82010-03-19 15:40:13 +0000644static void notify_and_destroy(struct net *net, struct sk_buff *skb,
645 struct nlmsghdr *n, u32 clid,
David S. Miller99194cf2008-07-17 04:54:10 -0700646 struct Qdisc *old, struct Qdisc *new)
647{
648 if (new || old)
Tom Goff7316ae82010-03-19 15:40:13 +0000649 qdisc_notify(net, skb, n, clid, old, new);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700650
David S. Miller4d8863a2008-08-18 21:03:15 -0700651 if (old)
David S. Miller99194cf2008-07-17 04:54:10 -0700652 qdisc_destroy(old);
David S. Miller99194cf2008-07-17 04:54:10 -0700653}
654
655/* Graft qdisc "new" to class "classid" of qdisc "parent" or
656 * to device "dev".
657 *
658 * When appropriate send a netlink notification using 'skb'
659 * and "n".
660 *
661 * On success, destroy old qdisc.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700662 */
663
664static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
David S. Miller99194cf2008-07-17 04:54:10 -0700665 struct sk_buff *skb, struct nlmsghdr *n, u32 classid,
666 struct Qdisc *new, struct Qdisc *old)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700667{
David S. Miller99194cf2008-07-17 04:54:10 -0700668 struct Qdisc *q = old;
Tom Goff7316ae82010-03-19 15:40:13 +0000669 struct net *net = dev_net(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700670 int err = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700671
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900672 if (parent == NULL) {
David S. Miller99194cf2008-07-17 04:54:10 -0700673 unsigned int i, num_q, ingress;
674
675 ingress = 0;
676 num_q = dev->num_tx_queues;
David S. Miller8d50b532008-07-30 02:37:46 -0700677 if ((q && q->flags & TCQ_F_INGRESS) ||
678 (new && new->flags & TCQ_F_INGRESS)) {
David S. Miller99194cf2008-07-17 04:54:10 -0700679 num_q = 1;
680 ingress = 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700681 }
David S. Miller99194cf2008-07-17 04:54:10 -0700682
683 if (dev->flags & IFF_UP)
684 dev_deactivate(dev);
685
David S. Miller6ec1c692009-09-06 01:58:51 -0700686 if (new && new->ops->attach) {
687 new->ops->attach(new);
688 num_q = 0;
689 }
690
David S. Miller99194cf2008-07-17 04:54:10 -0700691 for (i = 0; i < num_q; i++) {
692 struct netdev_queue *dev_queue = &dev->rx_queue;
693
694 if (!ingress)
695 dev_queue = netdev_get_tx_queue(dev, i);
696
David S. Miller8d50b532008-07-30 02:37:46 -0700697 old = dev_graft_qdisc(dev_queue, new);
698 if (new && i > 0)
699 atomic_inc(&new->refcnt);
700
Jarek Poplawski036d6a62009-09-13 22:35:44 +0000701 if (!ingress)
702 qdisc_destroy(old);
David S. Miller99194cf2008-07-17 04:54:10 -0700703 }
704
Jarek Poplawski036d6a62009-09-13 22:35:44 +0000705 if (!ingress) {
Tom Goff7316ae82010-03-19 15:40:13 +0000706 notify_and_destroy(net, skb, n, classid,
707 dev->qdisc, new);
Jarek Poplawski036d6a62009-09-13 22:35:44 +0000708 if (new && !new->ops->attach)
709 atomic_inc(&new->refcnt);
710 dev->qdisc = new ? : &noop_qdisc;
711 } else {
Tom Goff7316ae82010-03-19 15:40:13 +0000712 notify_and_destroy(net, skb, n, classid, old, new);
Jarek Poplawski036d6a62009-09-13 22:35:44 +0000713 }
Patrick McHardyaf356af2009-09-04 06:41:18 +0000714
David S. Miller99194cf2008-07-17 04:54:10 -0700715 if (dev->flags & IFF_UP)
716 dev_activate(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700717 } else {
Eric Dumazet20fea082007-11-14 01:44:41 -0800718 const struct Qdisc_class_ops *cops = parent->ops->cl_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700719
Patrick McHardyc9f1d032009-09-04 06:41:13 +0000720 err = -EOPNOTSUPP;
721 if (cops && cops->graft) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700722 unsigned long cl = cops->get(parent, classid);
723 if (cl) {
David S. Miller99194cf2008-07-17 04:54:10 -0700724 err = cops->graft(parent, cl, new, &old);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700725 cops->put(parent, cl);
Patrick McHardyc9f1d032009-09-04 06:41:13 +0000726 } else
727 err = -ENOENT;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700728 }
David S. Miller99194cf2008-07-17 04:54:10 -0700729 if (!err)
Tom Goff7316ae82010-03-19 15:40:13 +0000730 notify_and_destroy(net, skb, n, classid, old, new);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700731 }
732 return err;
733}
734
Jarek Poplawski25bfcd52008-08-18 20:53:34 -0700735/* lockdep annotation is needed for ingress; egress gets it only for name */
736static struct lock_class_key qdisc_tx_lock;
737static struct lock_class_key qdisc_rx_lock;
738
Linus Torvalds1da177e2005-04-16 15:20:36 -0700739/*
740 Allocate and initialize new qdisc.
741
742 Parameters are passed via opt.
743 */
744
745static struct Qdisc *
David S. Millerbb949fb2008-07-08 16:55:56 -0700746qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
Patrick McHardy23bcf632009-09-09 18:11:23 -0700747 struct Qdisc *p, u32 parent, u32 handle,
748 struct nlattr **tca, int *errp)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700749{
750 int err;
Patrick McHardy1e904742008-01-22 22:11:17 -0800751 struct nlattr *kind = tca[TCA_KIND];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700752 struct Qdisc *sch;
753 struct Qdisc_ops *ops;
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700754 struct qdisc_size_table *stab;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700755
756 ops = qdisc_lookup_ops(kind);
Johannes Berg95a5afc2008-10-16 15:24:51 -0700757#ifdef CONFIG_MODULES
Linus Torvalds1da177e2005-04-16 15:20:36 -0700758 if (ops == NULL && kind != NULL) {
759 char name[IFNAMSIZ];
Patrick McHardy1e904742008-01-22 22:11:17 -0800760 if (nla_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700761 /* We dropped the RTNL semaphore in order to
762 * perform the module load. So, even if we
763 * succeeded in loading the module we have to
764 * tell the caller to replay the request. We
765 * indicate this using -EAGAIN.
766 * We replay the request because the device may
767 * go away in the mean time.
768 */
769 rtnl_unlock();
770 request_module("sch_%s", name);
771 rtnl_lock();
772 ops = qdisc_lookup_ops(kind);
773 if (ops != NULL) {
774 /* We will try again qdisc_lookup_ops,
775 * so don't keep a reference.
776 */
777 module_put(ops->owner);
778 err = -EAGAIN;
779 goto err_out;
780 }
781 }
782 }
783#endif
784
Jamal Hadi Salimb9e2cc02006-08-03 16:36:51 -0700785 err = -ENOENT;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700786 if (ops == NULL)
787 goto err_out;
788
David S. Miller5ce2d482008-07-08 17:06:30 -0700789 sch = qdisc_alloc(dev_queue, ops);
Thomas Graf3d54b822005-07-05 14:15:09 -0700790 if (IS_ERR(sch)) {
791 err = PTR_ERR(sch);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700792 goto err_out2;
Thomas Graf3d54b822005-07-05 14:15:09 -0700793 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700794
Patrick McHardyffc8fef2007-07-30 17:11:50 -0700795 sch->parent = parent;
796
Thomas Graf3d54b822005-07-05 14:15:09 -0700797 if (handle == TC_H_INGRESS) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700798 sch->flags |= TCQ_F_INGRESS;
Thomas Graf3d54b822005-07-05 14:15:09 -0700799 handle = TC_H_MAKE(TC_H_INGRESS, 0);
Jarek Poplawski25bfcd52008-08-18 20:53:34 -0700800 lockdep_set_class(qdisc_lock(sch), &qdisc_rx_lock);
Patrick McHardyfd44de72007-04-16 17:07:08 -0700801 } else {
Patrick McHardyfd44de72007-04-16 17:07:08 -0700802 if (handle == 0) {
803 handle = qdisc_alloc_handle(dev);
804 err = -ENOMEM;
805 if (handle == 0)
806 goto err_out3;
807 }
Jarek Poplawski25bfcd52008-08-18 20:53:34 -0700808 lockdep_set_class(qdisc_lock(sch), &qdisc_tx_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700809 }
810
Thomas Graf3d54b822005-07-05 14:15:09 -0700811 sch->handle = handle;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700812
Patrick McHardy1e904742008-01-22 22:11:17 -0800813 if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS])) == 0) {
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700814 if (tca[TCA_STAB]) {
815 stab = qdisc_get_stab(tca[TCA_STAB]);
816 if (IS_ERR(stab)) {
817 err = PTR_ERR(stab);
Jarek Poplawski7c64b9f2009-09-15 23:42:05 -0700818 goto err_out4;
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700819 }
820 sch->stab = stab;
821 }
Patrick McHardy1e904742008-01-22 22:11:17 -0800822 if (tca[TCA_RATE]) {
Jarek Poplawskif6f9b932008-08-27 02:25:17 -0700823 spinlock_t *root_lock;
824
Patrick McHardy23bcf632009-09-09 18:11:23 -0700825 err = -EOPNOTSUPP;
826 if (sch->flags & TCQ_F_MQROOT)
827 goto err_out4;
828
Jarek Poplawskif6f9b932008-08-27 02:25:17 -0700829 if ((sch->parent != TC_H_ROOT) &&
Patrick McHardy23bcf632009-09-09 18:11:23 -0700830 !(sch->flags & TCQ_F_INGRESS) &&
831 (!p || !(p->flags & TCQ_F_MQROOT)))
Jarek Poplawskif6f9b932008-08-27 02:25:17 -0700832 root_lock = qdisc_root_sleeping_lock(sch);
833 else
834 root_lock = qdisc_lock(sch);
835
Thomas Graf023e09a2005-07-05 14:15:53 -0700836 err = gen_new_estimator(&sch->bstats, &sch->rate_est,
Jarek Poplawskif6f9b932008-08-27 02:25:17 -0700837 root_lock, tca[TCA_RATE]);
Patrick McHardy23bcf632009-09-09 18:11:23 -0700838 if (err)
839 goto err_out4;
Thomas Graf023e09a2005-07-05 14:15:53 -0700840 }
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700841
842 qdisc_list_add(sch);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700843
Linus Torvalds1da177e2005-04-16 15:20:36 -0700844 return sch;
845 }
846err_out3:
847 dev_put(dev);
Thomas Graf3d54b822005-07-05 14:15:09 -0700848 kfree((char *) sch - sch->padded);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700849err_out2:
850 module_put(ops->owner);
851err_out:
852 *errp = err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700853 return NULL;
Patrick McHardy23bcf632009-09-09 18:11:23 -0700854
855err_out4:
856 /*
857 * Any broken qdiscs that would require a ops->reset() here?
858 * The qdisc was never in action so it shouldn't be necessary.
859 */
Jarek Poplawski7c64b9f2009-09-15 23:42:05 -0700860 qdisc_put_stab(sch->stab);
Patrick McHardy23bcf632009-09-09 18:11:23 -0700861 if (ops->destroy)
862 ops->destroy(sch);
863 goto err_out3;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700864}
865
Patrick McHardy1e904742008-01-22 22:11:17 -0800866static int qdisc_change(struct Qdisc *sch, struct nlattr **tca)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700867{
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700868 struct qdisc_size_table *stab = NULL;
869 int err = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700870
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700871 if (tca[TCA_OPTIONS]) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700872 if (sch->ops->change == NULL)
873 return -EINVAL;
Patrick McHardy1e904742008-01-22 22:11:17 -0800874 err = sch->ops->change(sch, tca[TCA_OPTIONS]);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700875 if (err)
876 return err;
877 }
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700878
879 if (tca[TCA_STAB]) {
880 stab = qdisc_get_stab(tca[TCA_STAB]);
881 if (IS_ERR(stab))
882 return PTR_ERR(stab);
883 }
884
885 qdisc_put_stab(sch->stab);
886 sch->stab = stab;
887
Patrick McHardy23bcf632009-09-09 18:11:23 -0700888 if (tca[TCA_RATE]) {
Stephen Hemminger71bcb092008-11-25 21:13:31 -0800889 /* NB: ignores errors from replace_estimator
890 because change can't be undone. */
Patrick McHardy23bcf632009-09-09 18:11:23 -0700891 if (sch->flags & TCQ_F_MQROOT)
892 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700893 gen_replace_estimator(&sch->bstats, &sch->rate_est,
Stephen Hemminger71bcb092008-11-25 21:13:31 -0800894 qdisc_root_sleeping_lock(sch),
895 tca[TCA_RATE]);
Patrick McHardy23bcf632009-09-09 18:11:23 -0700896 }
897out:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700898 return 0;
899}
900
901struct check_loop_arg
902{
903 struct qdisc_walker w;
904 struct Qdisc *p;
905 int depth;
906};
907
908static int check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w);
909
910static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth)
911{
912 struct check_loop_arg arg;
913
914 if (q->ops->cl_ops == NULL)
915 return 0;
916
917 arg.w.stop = arg.w.skip = arg.w.count = 0;
918 arg.w.fn = check_loop_fn;
919 arg.depth = depth;
920 arg.p = p;
921 q->ops->cl_ops->walk(q, &arg.w);
922 return arg.w.stop ? -ELOOP : 0;
923}
924
925static int
926check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
927{
928 struct Qdisc *leaf;
Eric Dumazet20fea082007-11-14 01:44:41 -0800929 const struct Qdisc_class_ops *cops = q->ops->cl_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700930 struct check_loop_arg *arg = (struct check_loop_arg *)w;
931
932 leaf = cops->leaf(q, cl);
933 if (leaf) {
934 if (leaf == arg->p || arg->depth > 7)
935 return -ELOOP;
936 return check_loop(leaf, arg->p, arg->depth + 1);
937 }
938 return 0;
939}
940
941/*
942 * Delete/get qdisc.
943 */
944
945static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
946{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +0900947 struct net *net = sock_net(skb->sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700948 struct tcmsg *tcm = NLMSG_DATA(n);
Patrick McHardy1e904742008-01-22 22:11:17 -0800949 struct nlattr *tca[TCA_MAX + 1];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700950 struct net_device *dev;
951 u32 clid = tcm->tcm_parent;
952 struct Qdisc *q = NULL;
953 struct Qdisc *p = NULL;
954 int err;
955
Tom Goff7316ae82010-03-19 15:40:13 +0000956 if ((dev = __dev_get_by_index(net, tcm->tcm_ifindex)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700957 return -ENODEV;
958
Patrick McHardy1e904742008-01-22 22:11:17 -0800959 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
960 if (err < 0)
961 return err;
962
Linus Torvalds1da177e2005-04-16 15:20:36 -0700963 if (clid) {
964 if (clid != TC_H_ROOT) {
965 if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
966 if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL)
967 return -ENOENT;
968 q = qdisc_leaf(p, clid);
969 } else { /* ingress */
David S. Miller8123b422008-08-08 23:23:39 -0700970 q = dev->rx_queue.qdisc_sleeping;
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900971 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700972 } else {
Patrick McHardyaf356af2009-09-04 06:41:18 +0000973 q = dev->qdisc;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700974 }
975 if (!q)
976 return -ENOENT;
977
978 if (tcm->tcm_handle && q->handle != tcm->tcm_handle)
979 return -EINVAL;
980 } else {
981 if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL)
982 return -ENOENT;
983 }
984
Patrick McHardy1e904742008-01-22 22:11:17 -0800985 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700986 return -EINVAL;
987
988 if (n->nlmsg_type == RTM_DELQDISC) {
989 if (!clid)
990 return -EINVAL;
991 if (q->handle == 0)
992 return -ENOENT;
David S. Miller99194cf2008-07-17 04:54:10 -0700993 if ((err = qdisc_graft(dev, p, skb, n, clid, NULL, q)) != 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700994 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700995 } else {
Tom Goff7316ae82010-03-19 15:40:13 +0000996 qdisc_notify(net, skb, n, clid, NULL, q);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700997 }
998 return 0;
999}
1000
1001/*
1002 Create/change qdisc.
1003 */
1004
1005static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
1006{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001007 struct net *net = sock_net(skb->sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001008 struct tcmsg *tcm;
Patrick McHardy1e904742008-01-22 22:11:17 -08001009 struct nlattr *tca[TCA_MAX + 1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001010 struct net_device *dev;
1011 u32 clid;
1012 struct Qdisc *q, *p;
1013 int err;
1014
1015replay:
1016 /* Reinit, just in case something touches this. */
1017 tcm = NLMSG_DATA(n);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001018 clid = tcm->tcm_parent;
1019 q = p = NULL;
1020
Tom Goff7316ae82010-03-19 15:40:13 +00001021 if ((dev = __dev_get_by_index(net, tcm->tcm_ifindex)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001022 return -ENODEV;
1023
Patrick McHardy1e904742008-01-22 22:11:17 -08001024 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
1025 if (err < 0)
1026 return err;
1027
Linus Torvalds1da177e2005-04-16 15:20:36 -07001028 if (clid) {
1029 if (clid != TC_H_ROOT) {
1030 if (clid != TC_H_INGRESS) {
1031 if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL)
1032 return -ENOENT;
1033 q = qdisc_leaf(p, clid);
1034 } else { /*ingress */
David S. Miller8123b422008-08-08 23:23:39 -07001035 q = dev->rx_queue.qdisc_sleeping;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001036 }
1037 } else {
Patrick McHardyaf356af2009-09-04 06:41:18 +00001038 q = dev->qdisc;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001039 }
1040
1041 /* It may be default qdisc, ignore it */
1042 if (q && q->handle == 0)
1043 q = NULL;
1044
1045 if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
1046 if (tcm->tcm_handle) {
1047 if (q && !(n->nlmsg_flags&NLM_F_REPLACE))
1048 return -EEXIST;
1049 if (TC_H_MIN(tcm->tcm_handle))
1050 return -EINVAL;
1051 if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL)
1052 goto create_n_graft;
1053 if (n->nlmsg_flags&NLM_F_EXCL)
1054 return -EEXIST;
Patrick McHardy1e904742008-01-22 22:11:17 -08001055 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001056 return -EINVAL;
1057 if (q == p ||
1058 (p && check_loop(q, p, 0)))
1059 return -ELOOP;
1060 atomic_inc(&q->refcnt);
1061 goto graft;
1062 } else {
1063 if (q == NULL)
1064 goto create_n_graft;
1065
1066 /* This magic test requires explanation.
1067 *
1068 * We know, that some child q is already
1069 * attached to this parent and have choice:
1070 * either to change it or to create/graft new one.
1071 *
1072 * 1. We are allowed to create/graft only
1073 * if CREATE and REPLACE flags are set.
1074 *
1075 * 2. If EXCL is set, requestor wanted to say,
1076 * that qdisc tcm_handle is not expected
1077 * to exist, so that we choose create/graft too.
1078 *
1079 * 3. The last case is when no flags are set.
1080 * Alas, it is sort of hole in API, we
1081 * cannot decide what to do unambiguously.
1082 * For now we select create/graft, if
1083 * user gave KIND, which does not match existing.
1084 */
1085 if ((n->nlmsg_flags&NLM_F_CREATE) &&
1086 (n->nlmsg_flags&NLM_F_REPLACE) &&
1087 ((n->nlmsg_flags&NLM_F_EXCL) ||
Patrick McHardy1e904742008-01-22 22:11:17 -08001088 (tca[TCA_KIND] &&
1089 nla_strcmp(tca[TCA_KIND], q->ops->id))))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001090 goto create_n_graft;
1091 }
1092 }
1093 } else {
1094 if (!tcm->tcm_handle)
1095 return -EINVAL;
1096 q = qdisc_lookup(dev, tcm->tcm_handle);
1097 }
1098
1099 /* Change qdisc parameters */
1100 if (q == NULL)
1101 return -ENOENT;
1102 if (n->nlmsg_flags&NLM_F_EXCL)
1103 return -EEXIST;
Patrick McHardy1e904742008-01-22 22:11:17 -08001104 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001105 return -EINVAL;
1106 err = qdisc_change(q, tca);
1107 if (err == 0)
Tom Goff7316ae82010-03-19 15:40:13 +00001108 qdisc_notify(net, skb, n, clid, NULL, q);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001109 return err;
1110
1111create_n_graft:
1112 if (!(n->nlmsg_flags&NLM_F_CREATE))
1113 return -ENOENT;
1114 if (clid == TC_H_INGRESS)
Patrick McHardy23bcf632009-09-09 18:11:23 -07001115 q = qdisc_create(dev, &dev->rx_queue, p,
David S. Millerbb949fb2008-07-08 16:55:56 -07001116 tcm->tcm_parent, tcm->tcm_parent,
Patrick McHardyffc8fef2007-07-30 17:11:50 -07001117 tca, &err);
David S. Miller6ec1c692009-09-06 01:58:51 -07001118 else {
Jarek Poplawski926e61b2009-09-15 02:53:07 -07001119 struct netdev_queue *dev_queue;
David S. Miller6ec1c692009-09-06 01:58:51 -07001120
1121 if (p && p->ops->cl_ops && p->ops->cl_ops->select_queue)
Jarek Poplawski926e61b2009-09-15 02:53:07 -07001122 dev_queue = p->ops->cl_ops->select_queue(p, tcm);
1123 else if (p)
1124 dev_queue = p->dev_queue;
1125 else
1126 dev_queue = netdev_get_tx_queue(dev, 0);
David S. Miller6ec1c692009-09-06 01:58:51 -07001127
Jarek Poplawski926e61b2009-09-15 02:53:07 -07001128 q = qdisc_create(dev, dev_queue, p,
David S. Millerbb949fb2008-07-08 16:55:56 -07001129 tcm->tcm_parent, tcm->tcm_handle,
Patrick McHardyffc8fef2007-07-30 17:11:50 -07001130 tca, &err);
David S. Miller6ec1c692009-09-06 01:58:51 -07001131 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001132 if (q == NULL) {
1133 if (err == -EAGAIN)
1134 goto replay;
1135 return err;
1136 }
1137
1138graft:
Ilpo Järvinene5befbd2008-08-18 22:30:01 -07001139 err = qdisc_graft(dev, p, skb, n, clid, q, NULL);
1140 if (err) {
1141 if (q)
1142 qdisc_destroy(q);
1143 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001144 }
Ilpo Järvinene5befbd2008-08-18 22:30:01 -07001145
Linus Torvalds1da177e2005-04-16 15:20:36 -07001146 return 0;
1147}
1148
1149static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
Jamal Hadi Salime431b8c2005-06-18 22:55:31 -07001150 u32 pid, u32 seq, u16 flags, int event)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001151{
1152 struct tcmsg *tcm;
1153 struct nlmsghdr *nlh;
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001154 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001155 struct gnet_dump d;
1156
Jamal Hadi Salime431b8c2005-06-18 22:55:31 -07001157 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001158 tcm = NLMSG_DATA(nlh);
1159 tcm->tcm_family = AF_UNSPEC;
Patrick McHardy9ef1d4c2005-06-28 12:55:30 -07001160 tcm->tcm__pad1 = 0;
1161 tcm->tcm__pad2 = 0;
David S. Miller5ce2d482008-07-08 17:06:30 -07001162 tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001163 tcm->tcm_parent = clid;
1164 tcm->tcm_handle = q->handle;
1165 tcm->tcm_info = atomic_read(&q->refcnt);
Patrick McHardy57e1c482008-01-23 20:34:28 -08001166 NLA_PUT_STRING(skb, TCA_KIND, q->ops->id);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001167 if (q->ops->dump && q->ops->dump(q, skb) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001168 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001169 q->qstats.qlen = q->q.qlen;
1170
Jussi Kivilinna175f9c12008-07-20 00:08:47 -07001171 if (q->stab && qdisc_dump_stab(skb, q->stab) < 0)
1172 goto nla_put_failure;
1173
Jarek Poplawski102396a2008-08-29 14:21:52 -07001174 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
1175 qdisc_root_sleeping_lock(q), &d) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001176 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001177
1178 if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001179 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001180
1181 if (gnet_stats_copy_basic(&d, &q->bstats) < 0 ||
Eric Dumazetd250a5f2009-10-02 10:32:18 +00001182 gnet_stats_copy_rate_est(&d, &q->bstats, &q->rate_est) < 0 ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07001183 gnet_stats_copy_queue(&d, &q->qstats) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001184 goto nla_put_failure;
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09001185
Linus Torvalds1da177e2005-04-16 15:20:36 -07001186 if (gnet_stats_finish_copy(&d) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001187 goto nla_put_failure;
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09001188
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001189 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001190 return skb->len;
1191
1192nlmsg_failure:
Patrick McHardy1e904742008-01-22 22:11:17 -08001193nla_put_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001194 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001195 return -1;
1196}
1197
Tom Goff7316ae82010-03-19 15:40:13 +00001198static int qdisc_notify(struct net *net, struct sk_buff *oskb,
1199 struct nlmsghdr *n, u32 clid,
1200 struct Qdisc *old, struct Qdisc *new)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001201{
1202 struct sk_buff *skb;
1203 u32 pid = oskb ? NETLINK_CB(oskb).pid : 0;
1204
1205 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1206 if (!skb)
1207 return -ENOBUFS;
1208
1209 if (old && old->handle) {
1210 if (tc_fill_qdisc(skb, old, clid, pid, n->nlmsg_seq, 0, RTM_DELQDISC) < 0)
1211 goto err_out;
1212 }
1213 if (new) {
1214 if (tc_fill_qdisc(skb, new, clid, pid, n->nlmsg_seq, old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
1215 goto err_out;
1216 }
1217
1218 if (skb->len)
Tom Goff7316ae82010-03-19 15:40:13 +00001219 return rtnetlink_send(skb, net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001220
1221err_out:
1222 kfree_skb(skb);
1223 return -EINVAL;
1224}
1225
David S. Miller30723672008-07-18 22:50:15 -07001226static bool tc_qdisc_dump_ignore(struct Qdisc *q)
1227{
1228 return (q->flags & TCQ_F_BUILTIN) ? true : false;
1229}
1230
1231static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
1232 struct netlink_callback *cb,
1233 int *q_idx_p, int s_q_idx)
1234{
1235 int ret = 0, q_idx = *q_idx_p;
1236 struct Qdisc *q;
1237
1238 if (!root)
1239 return 0;
1240
1241 q = root;
1242 if (q_idx < s_q_idx) {
1243 q_idx++;
1244 } else {
1245 if (!tc_qdisc_dump_ignore(q) &&
1246 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid,
1247 cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
1248 goto done;
1249 q_idx++;
1250 }
1251 list_for_each_entry(q, &root->list, list) {
1252 if (q_idx < s_q_idx) {
1253 q_idx++;
1254 continue;
1255 }
1256 if (!tc_qdisc_dump_ignore(q) &&
1257 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid,
1258 cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
1259 goto done;
1260 q_idx++;
1261 }
1262
1263out:
1264 *q_idx_p = q_idx;
1265 return ret;
1266done:
1267 ret = -1;
1268 goto out;
1269}
1270
Linus Torvalds1da177e2005-04-16 15:20:36 -07001271static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
1272{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001273 struct net *net = sock_net(skb->sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001274 int idx, q_idx;
1275 int s_idx, s_q_idx;
1276 struct net_device *dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001277
1278 s_idx = cb->args[0];
1279 s_q_idx = q_idx = cb->args[1];
stephen hemmingerf1e90162009-11-10 07:54:49 +00001280
1281 rcu_read_lock();
Pavel Emelianov7562f872007-05-03 15:13:45 -07001282 idx = 0;
Tom Goff7316ae82010-03-19 15:40:13 +00001283 for_each_netdev_rcu(net, dev) {
David S. Miller30723672008-07-18 22:50:15 -07001284 struct netdev_queue *dev_queue;
1285
Linus Torvalds1da177e2005-04-16 15:20:36 -07001286 if (idx < s_idx)
Pavel Emelianov7562f872007-05-03 15:13:45 -07001287 goto cont;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001288 if (idx > s_idx)
1289 s_q_idx = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001290 q_idx = 0;
David S. Miller30723672008-07-18 22:50:15 -07001291
Patrick McHardyaf356af2009-09-04 06:41:18 +00001292 if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx) < 0)
David S. Miller30723672008-07-18 22:50:15 -07001293 goto done;
1294
1295 dev_queue = &dev->rx_queue;
David S. Miller827ebd62008-08-07 20:26:40 -07001296 if (tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb, &q_idx, s_q_idx) < 0)
David S. Miller30723672008-07-18 22:50:15 -07001297 goto done;
1298
Pavel Emelianov7562f872007-05-03 15:13:45 -07001299cont:
1300 idx++;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001301 }
1302
1303done:
stephen hemmingerf1e90162009-11-10 07:54:49 +00001304 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001305
1306 cb->args[0] = idx;
1307 cb->args[1] = q_idx;
1308
1309 return skb->len;
1310}
1311
1312
1313
1314/************************************************
1315 * Traffic classes manipulation. *
1316 ************************************************/
1317
1318
1319
1320static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
1321{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001322 struct net *net = sock_net(skb->sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001323 struct tcmsg *tcm = NLMSG_DATA(n);
Patrick McHardy1e904742008-01-22 22:11:17 -08001324 struct nlattr *tca[TCA_MAX + 1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001325 struct net_device *dev;
1326 struct Qdisc *q = NULL;
Eric Dumazet20fea082007-11-14 01:44:41 -08001327 const struct Qdisc_class_ops *cops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001328 unsigned long cl = 0;
1329 unsigned long new_cl;
1330 u32 pid = tcm->tcm_parent;
1331 u32 clid = tcm->tcm_handle;
1332 u32 qid = TC_H_MAJ(clid);
1333 int err;
1334
Tom Goff7316ae82010-03-19 15:40:13 +00001335 if ((dev = __dev_get_by_index(net, tcm->tcm_ifindex)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001336 return -ENODEV;
1337
Patrick McHardy1e904742008-01-22 22:11:17 -08001338 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
1339 if (err < 0)
1340 return err;
1341
Linus Torvalds1da177e2005-04-16 15:20:36 -07001342 /*
1343 parent == TC_H_UNSPEC - unspecified parent.
1344 parent == TC_H_ROOT - class is root, which has no parent.
1345 parent == X:0 - parent is root class.
1346 parent == X:Y - parent is a node in hierarchy.
1347 parent == 0:Y - parent is X:Y, where X:0 is qdisc.
1348
1349 handle == 0:0 - generate handle from kernel pool.
1350 handle == 0:Y - class is X:Y, where X:0 is qdisc.
1351 handle == X:Y - clear.
1352 handle == X:0 - root class.
1353 */
1354
1355 /* Step 1. Determine qdisc handle X:0 */
1356
1357 if (pid != TC_H_ROOT) {
1358 u32 qid1 = TC_H_MAJ(pid);
1359
1360 if (qid && qid1) {
1361 /* If both majors are known, they must be identical. */
1362 if (qid != qid1)
1363 return -EINVAL;
1364 } else if (qid1) {
1365 qid = qid1;
1366 } else if (qid == 0)
Patrick McHardyaf356af2009-09-04 06:41:18 +00001367 qid = dev->qdisc->handle;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001368
1369 /* Now qid is genuine qdisc handle consistent
1370 both with parent and child.
1371
1372 TC_H_MAJ(pid) still may be unspecified, complete it now.
1373 */
1374 if (pid)
1375 pid = TC_H_MAKE(qid, pid);
1376 } else {
1377 if (qid == 0)
Patrick McHardyaf356af2009-09-04 06:41:18 +00001378 qid = dev->qdisc->handle;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001379 }
1380
1381 /* OK. Locate qdisc */
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09001382 if ((q = qdisc_lookup(dev, qid)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001383 return -ENOENT;
1384
1385 /* An check that it supports classes */
1386 cops = q->ops->cl_ops;
1387 if (cops == NULL)
1388 return -EINVAL;
1389
1390 /* Now try to get class */
1391 if (clid == 0) {
1392 if (pid == TC_H_ROOT)
1393 clid = qid;
1394 } else
1395 clid = TC_H_MAKE(qid, clid);
1396
1397 if (clid)
1398 cl = cops->get(q, clid);
1399
1400 if (cl == 0) {
1401 err = -ENOENT;
1402 if (n->nlmsg_type != RTM_NEWTCLASS || !(n->nlmsg_flags&NLM_F_CREATE))
1403 goto out;
1404 } else {
1405 switch (n->nlmsg_type) {
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09001406 case RTM_NEWTCLASS:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001407 err = -EEXIST;
1408 if (n->nlmsg_flags&NLM_F_EXCL)
1409 goto out;
1410 break;
1411 case RTM_DELTCLASS:
Patrick McHardyde6d5cd2009-09-04 06:41:16 +00001412 err = -EOPNOTSUPP;
1413 if (cops->delete)
1414 err = cops->delete(q, cl);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001415 if (err == 0)
Tom Goff7316ae82010-03-19 15:40:13 +00001416 tclass_notify(net, skb, n, q, cl, RTM_DELTCLASS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001417 goto out;
1418 case RTM_GETTCLASS:
Tom Goff7316ae82010-03-19 15:40:13 +00001419 err = tclass_notify(net, skb, n, q, cl, RTM_NEWTCLASS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001420 goto out;
1421 default:
1422 err = -EINVAL;
1423 goto out;
1424 }
1425 }
1426
1427 new_cl = cl;
Patrick McHardyde6d5cd2009-09-04 06:41:16 +00001428 err = -EOPNOTSUPP;
1429 if (cops->change)
1430 err = cops->change(q, clid, pid, tca, &new_cl);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001431 if (err == 0)
Tom Goff7316ae82010-03-19 15:40:13 +00001432 tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001433
1434out:
1435 if (cl)
1436 cops->put(q, cl);
1437
1438 return err;
1439}
1440
1441
1442static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
1443 unsigned long cl,
Jamal Hadi Salime431b8c2005-06-18 22:55:31 -07001444 u32 pid, u32 seq, u16 flags, int event)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001445{
1446 struct tcmsg *tcm;
1447 struct nlmsghdr *nlh;
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001448 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001449 struct gnet_dump d;
Eric Dumazet20fea082007-11-14 01:44:41 -08001450 const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001451
Jamal Hadi Salime431b8c2005-06-18 22:55:31 -07001452 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001453 tcm = NLMSG_DATA(nlh);
1454 tcm->tcm_family = AF_UNSPEC;
Eric Dumazet16ebb5e2009-09-02 02:40:09 +00001455 tcm->tcm__pad1 = 0;
1456 tcm->tcm__pad2 = 0;
David S. Miller5ce2d482008-07-08 17:06:30 -07001457 tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001458 tcm->tcm_parent = q->handle;
1459 tcm->tcm_handle = q->handle;
1460 tcm->tcm_info = 0;
Patrick McHardy57e1c482008-01-23 20:34:28 -08001461 NLA_PUT_STRING(skb, TCA_KIND, q->ops->id);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001462 if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001463 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001464
Jarek Poplawski102396a2008-08-29 14:21:52 -07001465 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
1466 qdisc_root_sleeping_lock(q), &d) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001467 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001468
1469 if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001470 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001471
1472 if (gnet_stats_finish_copy(&d) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001473 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001474
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001475 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001476 return skb->len;
1477
1478nlmsg_failure:
Patrick McHardy1e904742008-01-22 22:11:17 -08001479nla_put_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001480 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001481 return -1;
1482}
1483
Tom Goff7316ae82010-03-19 15:40:13 +00001484static int tclass_notify(struct net *net, struct sk_buff *oskb,
1485 struct nlmsghdr *n, struct Qdisc *q,
1486 unsigned long cl, int event)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001487{
1488 struct sk_buff *skb;
1489 u32 pid = oskb ? NETLINK_CB(oskb).pid : 0;
1490
1491 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1492 if (!skb)
1493 return -ENOBUFS;
1494
1495 if (tc_fill_tclass(skb, q, cl, pid, n->nlmsg_seq, 0, event) < 0) {
1496 kfree_skb(skb);
1497 return -EINVAL;
1498 }
1499
Tom Goff7316ae82010-03-19 15:40:13 +00001500 return rtnetlink_send(skb, net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001501}
1502
1503struct qdisc_dump_args
1504{
1505 struct qdisc_walker w;
1506 struct sk_buff *skb;
1507 struct netlink_callback *cb;
1508};
1509
1510static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walker *arg)
1511{
1512 struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg;
1513
1514 return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).pid,
1515 a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTCLASS);
1516}
1517
David S. Miller30723672008-07-18 22:50:15 -07001518static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb,
1519 struct tcmsg *tcm, struct netlink_callback *cb,
1520 int *t_p, int s_t)
1521{
1522 struct qdisc_dump_args arg;
1523
1524 if (tc_qdisc_dump_ignore(q) ||
1525 *t_p < s_t || !q->ops->cl_ops ||
1526 (tcm->tcm_parent &&
1527 TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
1528 (*t_p)++;
1529 return 0;
1530 }
1531 if (*t_p > s_t)
1532 memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
1533 arg.w.fn = qdisc_class_dump;
1534 arg.skb = skb;
1535 arg.cb = cb;
1536 arg.w.stop = 0;
1537 arg.w.skip = cb->args[1];
1538 arg.w.count = 0;
1539 q->ops->cl_ops->walk(q, &arg.w);
1540 cb->args[1] = arg.w.count;
1541 if (arg.w.stop)
1542 return -1;
1543 (*t_p)++;
1544 return 0;
1545}
1546
1547static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
1548 struct tcmsg *tcm, struct netlink_callback *cb,
1549 int *t_p, int s_t)
1550{
1551 struct Qdisc *q;
1552
1553 if (!root)
1554 return 0;
1555
1556 if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0)
1557 return -1;
1558
1559 list_for_each_entry(q, &root->list, list) {
1560 if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
1561 return -1;
1562 }
1563
1564 return 0;
1565}
1566
Linus Torvalds1da177e2005-04-16 15:20:36 -07001567static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
1568{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001569 struct tcmsg *tcm = (struct tcmsg*)NLMSG_DATA(cb->nlh);
David S. Miller30723672008-07-18 22:50:15 -07001570 struct net *net = sock_net(skb->sk);
1571 struct netdev_queue *dev_queue;
1572 struct net_device *dev;
1573 int t, s_t;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001574
1575 if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm)))
1576 return 0;
Tom Goff7316ae82010-03-19 15:40:13 +00001577 if ((dev = dev_get_by_index(net, tcm->tcm_ifindex)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001578 return 0;
1579
1580 s_t = cb->args[0];
1581 t = 0;
1582
Patrick McHardyaf356af2009-09-04 06:41:18 +00001583 if (tc_dump_tclass_root(dev->qdisc, skb, tcm, cb, &t, s_t) < 0)
David S. Miller30723672008-07-18 22:50:15 -07001584 goto done;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001585
David S. Miller30723672008-07-18 22:50:15 -07001586 dev_queue = &dev->rx_queue;
David S. Miller8123b422008-08-08 23:23:39 -07001587 if (tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb, &t, s_t) < 0)
David S. Miller30723672008-07-18 22:50:15 -07001588 goto done;
1589
1590done:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001591 cb->args[0] = t;
1592
1593 dev_put(dev);
1594 return skb->len;
1595}
1596
1597/* Main classifier routine: scans classifier chain attached
1598 to this qdisc, (optionally) tests for protocol and asks
1599 specific classifiers.
1600 */
Patrick McHardy73ca4912007-07-15 00:02:31 -07001601int tc_classify_compat(struct sk_buff *skb, struct tcf_proto *tp,
1602 struct tcf_result *res)
1603{
1604 __be16 protocol = skb->protocol;
1605 int err = 0;
1606
1607 for (; tp; tp = tp->next) {
1608 if ((tp->protocol == protocol ||
1609 tp->protocol == htons(ETH_P_ALL)) &&
1610 (err = tp->classify(skb, tp, res)) >= 0) {
1611#ifdef CONFIG_NET_CLS_ACT
1612 if (err != TC_ACT_RECLASSIFY && skb->tc_verd)
1613 skb->tc_verd = SET_TC_VERD(skb->tc_verd, 0);
1614#endif
1615 return err;
1616 }
1617 }
1618 return -1;
1619}
1620EXPORT_SYMBOL(tc_classify_compat);
1621
Linus Torvalds1da177e2005-04-16 15:20:36 -07001622int tc_classify(struct sk_buff *skb, struct tcf_proto *tp,
Patrick McHardy73ca4912007-07-15 00:02:31 -07001623 struct tcf_result *res)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001624{
1625 int err = 0;
Patrick McHardy73ca4912007-07-15 00:02:31 -07001626 __be16 protocol;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001627#ifdef CONFIG_NET_CLS_ACT
1628 struct tcf_proto *otp = tp;
1629reclassify:
1630#endif
1631 protocol = skb->protocol;
1632
Patrick McHardy73ca4912007-07-15 00:02:31 -07001633 err = tc_classify_compat(skb, tp, res);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001634#ifdef CONFIG_NET_CLS_ACT
Patrick McHardy73ca4912007-07-15 00:02:31 -07001635 if (err == TC_ACT_RECLASSIFY) {
1636 u32 verd = G_TC_VERD(skb->tc_verd);
1637 tp = otp;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001638
Patrick McHardy73ca4912007-07-15 00:02:31 -07001639 if (verd++ >= MAX_REC_LOOP) {
stephen hemmingerb60b6592010-05-11 14:24:12 +00001640 if (net_ratelimit())
1641 printk(KERN_NOTICE
1642 "%s: packet reclassify loop"
1643 " rule prio %u protocol %02x\n",
1644 tp->q->ops->id,
1645 tp->prio & 0xffff, ntohs(tp->protocol));
Patrick McHardy73ca4912007-07-15 00:02:31 -07001646 return TC_ACT_SHOT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001647 }
Patrick McHardy73ca4912007-07-15 00:02:31 -07001648 skb->tc_verd = SET_TC_VERD(skb->tc_verd, verd);
1649 goto reclassify;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001650 }
Patrick McHardy73ca4912007-07-15 00:02:31 -07001651#endif
1652 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001653}
Patrick McHardy73ca4912007-07-15 00:02:31 -07001654EXPORT_SYMBOL(tc_classify);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001655
Patrick McHardya48b5a62007-03-23 11:29:43 -07001656void tcf_destroy(struct tcf_proto *tp)
1657{
1658 tp->ops->destroy(tp);
1659 module_put(tp->ops->owner);
1660 kfree(tp);
1661}
1662
Patrick McHardyff31ab52008-07-01 19:52:38 -07001663void tcf_destroy_chain(struct tcf_proto **fl)
Patrick McHardya48b5a62007-03-23 11:29:43 -07001664{
1665 struct tcf_proto *tp;
1666
Patrick McHardyff31ab52008-07-01 19:52:38 -07001667 while ((tp = *fl) != NULL) {
1668 *fl = tp->next;
Patrick McHardya48b5a62007-03-23 11:29:43 -07001669 tcf_destroy(tp);
1670 }
1671}
1672EXPORT_SYMBOL(tcf_destroy_chain);
1673
Linus Torvalds1da177e2005-04-16 15:20:36 -07001674#ifdef CONFIG_PROC_FS
1675static int psched_show(struct seq_file *seq, void *v)
1676{
Patrick McHardy3c0cfc12007-10-10 16:32:41 -07001677 struct timespec ts;
1678
1679 hrtimer_get_res(CLOCK_MONOTONIC, &ts);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001680 seq_printf(seq, "%08x %08x %08x %08x\n",
Jarek Poplawskica44d6e2009-06-15 02:31:47 -07001681 (u32)NSEC_PER_USEC, (u32)PSCHED_TICKS2NS(1),
Patrick McHardy514bca32007-03-16 12:34:52 -07001682 1000000,
Patrick McHardy3c0cfc12007-10-10 16:32:41 -07001683 (u32)NSEC_PER_SEC/(u32)ktime_to_ns(timespec_to_ktime(ts)));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001684
1685 return 0;
1686}
1687
1688static int psched_open(struct inode *inode, struct file *file)
1689{
Tom Goff7e5ab152010-03-30 19:44:56 -07001690 return single_open(file, psched_show, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001691}
1692
Arjan van de Venda7071d2007-02-12 00:55:36 -08001693static const struct file_operations psched_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001694 .owner = THIS_MODULE,
1695 .open = psched_open,
1696 .read = seq_read,
1697 .llseek = seq_lseek,
1698 .release = single_release,
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09001699};
Tom Goff7316ae82010-03-19 15:40:13 +00001700
1701static int __net_init psched_net_init(struct net *net)
1702{
1703 struct proc_dir_entry *e;
1704
1705 e = proc_net_fops_create(net, "psched", 0, &psched_fops);
1706 if (e == NULL)
1707 return -ENOMEM;
1708
1709 return 0;
1710}
1711
1712static void __net_exit psched_net_exit(struct net *net)
1713{
1714 proc_net_remove(net, "psched");
Tom Goff7316ae82010-03-19 15:40:13 +00001715}
1716#else
1717static int __net_init psched_net_init(struct net *net)
1718{
1719 return 0;
1720}
1721
1722static void __net_exit psched_net_exit(struct net *net)
1723{
1724}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001725#endif
1726
Tom Goff7316ae82010-03-19 15:40:13 +00001727static struct pernet_operations psched_net_ops = {
1728 .init = psched_net_init,
1729 .exit = psched_net_exit,
1730};
1731
Linus Torvalds1da177e2005-04-16 15:20:36 -07001732static int __init pktsched_init(void)
1733{
Tom Goff7316ae82010-03-19 15:40:13 +00001734 int err;
1735
1736 err = register_pernet_subsys(&psched_net_ops);
1737 if (err) {
1738 printk(KERN_ERR "pktsched_init: "
1739 "cannot initialize per netns operations\n");
1740 return err;
1741 }
1742
Linus Torvalds1da177e2005-04-16 15:20:36 -07001743 register_qdisc(&pfifo_qdisc_ops);
1744 register_qdisc(&bfifo_qdisc_ops);
Hagen Paul Pfeifer57dbb2d2010-01-24 12:30:59 +00001745 register_qdisc(&pfifo_head_drop_qdisc_ops);
David S. Miller6ec1c692009-09-06 01:58:51 -07001746 register_qdisc(&mq_qdisc_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001747
Thomas Grafbe577dd2007-03-22 11:55:50 -07001748 rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL);
1749 rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL);
1750 rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc);
1751 rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL);
1752 rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL);
1753 rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass);
1754
Linus Torvalds1da177e2005-04-16 15:20:36 -07001755 return 0;
1756}
1757
1758subsys_initcall(pktsched_init);