blob: 692d9a41cd23e714f77eda461323962e11c3a7b8 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * net/sched/sch_api.c Packet scheduler API.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 *
11 * Fixes:
12 *
13 * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired.
14 * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
15 * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support
16 */
17
Linus Torvalds1da177e2005-04-16 15:20:36 -070018#include <linux/module.h>
19#include <linux/types.h>
20#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070021#include <linux/string.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070022#include <linux/errno.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070023#include <linux/skbuff.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070024#include <linux/init.h>
25#include <linux/proc_fs.h>
26#include <linux/seq_file.h>
27#include <linux/kmod.h>
28#include <linux/list.h>
Patrick McHardy41794772007-03-16 01:19:15 -070029#include <linux/hrtimer.h>
Jarek Poplawski25bfcd52008-08-18 20:53:34 -070030#include <linux/lockdep.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070031
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020032#include <net/net_namespace.h>
Denis V. Lunevb8542722007-12-01 00:21:31 +110033#include <net/sock.h>
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -070034#include <net/netlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070035#include <net/pkt_sched.h>
36
Linus Torvalds1da177e2005-04-16 15:20:36 -070037static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n, u32 clid,
38 struct Qdisc *old, struct Qdisc *new);
39static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
40 struct Qdisc *q, unsigned long cl, int event);
41
42/*
43
44 Short review.
45 -------------
46
47 This file consists of two interrelated parts:
48
49 1. queueing disciplines manager frontend.
50 2. traffic classes manager frontend.
51
52 Generally, queueing discipline ("qdisc") is a black box,
53 which is able to enqueue packets and to dequeue them (when
54 device is ready to send something) in order and at times
55 determined by algorithm hidden in it.
56
57 qdisc's are divided to two categories:
58 - "queues", which have no internal structure visible from outside.
59 - "schedulers", which split all the packets to "traffic classes",
60 using "packet classifiers" (look at cls_api.c)
61
62 In turn, classes may have child qdiscs (as rule, queues)
63 attached to them etc. etc. etc.
64
65 The goal of the routines in this file is to translate
66 information supplied by user in the form of handles
67 to more intelligible for kernel form, to make some sanity
68 checks and part of work, which is common to all qdiscs
69 and to provide rtnetlink notifications.
70
71 All real intelligent work is done inside qdisc modules.
72
73
74
75 Every discipline has two major routines: enqueue and dequeue.
76
77 ---dequeue
78
79 dequeue usually returns a skb to send. It is allowed to return NULL,
80 but it does not mean that queue is empty, it just means that
81 discipline does not want to send anything this time.
82 Queue is really empty if q->q.qlen == 0.
83 For complicated disciplines with multiple queues q->q is not
84 real packet queue, but however q->q.qlen must be valid.
85
86 ---enqueue
87
88 enqueue returns 0, if packet was enqueued successfully.
89 If packet (this one or another one) was dropped, it returns
90 not zero error code.
91 NET_XMIT_DROP - this packet dropped
92 Expected action: do not backoff, but wait until queue will clear.
93 NET_XMIT_CN - probably this packet enqueued, but another one dropped.
94 Expected action: backoff or ignore
95 NET_XMIT_POLICED - dropped by police.
96 Expected action: backoff or error to real-time apps.
97
98 Auxiliary routines:
99
Jarek Poplawski99c0db22008-10-31 00:45:27 -0700100 ---peek
101
102 like dequeue but without removing a packet from the queue
103
Linus Torvalds1da177e2005-04-16 15:20:36 -0700104 ---reset
105
106 returns qdisc to initial state: purge all buffers, clear all
107 timers, counters (except for statistics) etc.
108
109 ---init
110
111 initializes newly created qdisc.
112
113 ---destroy
114
115 destroys resources allocated by init and during lifetime of qdisc.
116
117 ---change
118
119 changes qdisc parameters.
120 */
121
122/* Protects list of registered TC modules. It is pure SMP lock. */
123static DEFINE_RWLOCK(qdisc_mod_lock);
124
125
126/************************************************
127 * Queueing disciplines manipulation. *
128 ************************************************/
129
130
131/* The list of all installed queueing disciplines. */
132
133static struct Qdisc_ops *qdisc_base;
134
135/* Register/uregister queueing discipline */
136
137int register_qdisc(struct Qdisc_ops *qops)
138{
139 struct Qdisc_ops *q, **qp;
140 int rc = -EEXIST;
141
142 write_lock(&qdisc_mod_lock);
143 for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
144 if (!strcmp(qops->id, q->id))
145 goto out;
146
147 if (qops->enqueue == NULL)
148 qops->enqueue = noop_qdisc_ops.enqueue;
Jarek Poplawski99c0db22008-10-31 00:45:27 -0700149 if (qops->peek == NULL) {
150 if (qops->dequeue == NULL) {
151 qops->peek = noop_qdisc_ops.peek;
152 } else {
153 rc = -EINVAL;
154 goto out;
155 }
156 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700157 if (qops->dequeue == NULL)
158 qops->dequeue = noop_qdisc_ops.dequeue;
159
160 qops->next = NULL;
161 *qp = qops;
162 rc = 0;
163out:
164 write_unlock(&qdisc_mod_lock);
165 return rc;
166}
Patrick McHardy62e3ba12008-01-22 22:10:23 -0800167EXPORT_SYMBOL(register_qdisc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700168
169int unregister_qdisc(struct Qdisc_ops *qops)
170{
171 struct Qdisc_ops *q, **qp;
172 int err = -ENOENT;
173
174 write_lock(&qdisc_mod_lock);
175 for (qp = &qdisc_base; (q=*qp)!=NULL; qp = &q->next)
176 if (q == qops)
177 break;
178 if (q) {
179 *qp = q->next;
180 q->next = NULL;
181 err = 0;
182 }
183 write_unlock(&qdisc_mod_lock);
184 return err;
185}
Patrick McHardy62e3ba12008-01-22 22:10:23 -0800186EXPORT_SYMBOL(unregister_qdisc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700187
188/* We know handle. Find qdisc among all qdisc's attached to device
189 (root qdisc, all its children, children of children etc.)
190 */
191
Hannes Eder6113b742008-11-28 03:06:46 -0800192static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
David S. Miller8123b422008-08-08 23:23:39 -0700193{
194 struct Qdisc *q;
195
196 if (!(root->flags & TCQ_F_BUILTIN) &&
197 root->handle == handle)
198 return root;
199
200 list_for_each_entry(q, &root->list, list) {
201 if (q->handle == handle)
202 return q;
203 }
204 return NULL;
205}
206
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700207static void qdisc_list_add(struct Qdisc *q)
208{
Jarek Poplawskif6486d42008-11-25 13:56:06 -0800209 if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS))
Patrick McHardyaf356af2009-09-04 06:41:18 +0000210 list_add_tail(&q->list, &qdisc_dev(q)->qdisc->list);
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700211}
212
213void qdisc_list_del(struct Qdisc *q)
214{
Jarek Poplawskif6486d42008-11-25 13:56:06 -0800215 if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS))
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700216 list_del(&q->list);
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700217}
218EXPORT_SYMBOL(qdisc_list_del);
219
David S. Milleread81cc2008-07-17 00:50:32 -0700220struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
Patrick McHardy43effa12006-11-29 17:35:48 -0800221{
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700222 struct Qdisc *q;
223
Patrick McHardyaf356af2009-09-04 06:41:18 +0000224 q = qdisc_match_from_root(dev->qdisc, handle);
225 if (q)
226 goto out;
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700227
228 q = qdisc_match_from_root(dev->rx_queue.qdisc_sleeping, handle);
Jarek Poplawskif6486d42008-11-25 13:56:06 -0800229out:
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700230 return q;
Patrick McHardy43effa12006-11-29 17:35:48 -0800231}
232
Linus Torvalds1da177e2005-04-16 15:20:36 -0700233static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
234{
235 unsigned long cl;
236 struct Qdisc *leaf;
Eric Dumazet20fea082007-11-14 01:44:41 -0800237 const struct Qdisc_class_ops *cops = p->ops->cl_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700238
239 if (cops == NULL)
240 return NULL;
241 cl = cops->get(p, classid);
242
243 if (cl == 0)
244 return NULL;
245 leaf = cops->leaf(p, cl);
246 cops->put(p, cl);
247 return leaf;
248}
249
250/* Find queueing discipline by name */
251
Patrick McHardy1e904742008-01-22 22:11:17 -0800252static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700253{
254 struct Qdisc_ops *q = NULL;
255
256 if (kind) {
257 read_lock(&qdisc_mod_lock);
258 for (q = qdisc_base; q; q = q->next) {
Patrick McHardy1e904742008-01-22 22:11:17 -0800259 if (nla_strcmp(kind, q->id) == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700260 if (!try_module_get(q->owner))
261 q = NULL;
262 break;
263 }
264 }
265 read_unlock(&qdisc_mod_lock);
266 }
267 return q;
268}
269
270static struct qdisc_rate_table *qdisc_rtab_list;
271
Patrick McHardy1e904742008-01-22 22:11:17 -0800272struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct nlattr *tab)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700273{
274 struct qdisc_rate_table *rtab;
275
276 for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) {
277 if (memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) == 0) {
278 rtab->refcnt++;
279 return rtab;
280 }
281 }
282
Patrick McHardy5feb5e12008-01-23 20:35:19 -0800283 if (tab == NULL || r->rate == 0 || r->cell_log == 0 ||
284 nla_len(tab) != TC_RTAB_SIZE)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700285 return NULL;
286
287 rtab = kmalloc(sizeof(*rtab), GFP_KERNEL);
288 if (rtab) {
289 rtab->rate = *r;
290 rtab->refcnt = 1;
Patrick McHardy1e904742008-01-22 22:11:17 -0800291 memcpy(rtab->data, nla_data(tab), 1024);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700292 rtab->next = qdisc_rtab_list;
293 qdisc_rtab_list = rtab;
294 }
295 return rtab;
296}
Patrick McHardy62e3ba12008-01-22 22:10:23 -0800297EXPORT_SYMBOL(qdisc_get_rtab);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700298
299void qdisc_put_rtab(struct qdisc_rate_table *tab)
300{
301 struct qdisc_rate_table *rtab, **rtabp;
302
303 if (!tab || --tab->refcnt)
304 return;
305
306 for (rtabp = &qdisc_rtab_list; (rtab=*rtabp) != NULL; rtabp = &rtab->next) {
307 if (rtab == tab) {
308 *rtabp = rtab->next;
309 kfree(rtab);
310 return;
311 }
312 }
313}
Patrick McHardy62e3ba12008-01-22 22:10:23 -0800314EXPORT_SYMBOL(qdisc_put_rtab);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700315
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700316static LIST_HEAD(qdisc_stab_list);
317static DEFINE_SPINLOCK(qdisc_stab_lock);
318
319static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = {
320 [TCA_STAB_BASE] = { .len = sizeof(struct tc_sizespec) },
321 [TCA_STAB_DATA] = { .type = NLA_BINARY },
322};
323
324static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt)
325{
326 struct nlattr *tb[TCA_STAB_MAX + 1];
327 struct qdisc_size_table *stab;
328 struct tc_sizespec *s;
329 unsigned int tsize = 0;
330 u16 *tab = NULL;
331 int err;
332
333 err = nla_parse_nested(tb, TCA_STAB_MAX, opt, stab_policy);
334 if (err < 0)
335 return ERR_PTR(err);
336 if (!tb[TCA_STAB_BASE])
337 return ERR_PTR(-EINVAL);
338
339 s = nla_data(tb[TCA_STAB_BASE]);
340
341 if (s->tsize > 0) {
342 if (!tb[TCA_STAB_DATA])
343 return ERR_PTR(-EINVAL);
344 tab = nla_data(tb[TCA_STAB_DATA]);
345 tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16);
346 }
347
348 if (!s || tsize != s->tsize || (!tab && tsize > 0))
349 return ERR_PTR(-EINVAL);
350
David S. Millerf3b96052008-08-18 22:33:05 -0700351 spin_lock(&qdisc_stab_lock);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700352
353 list_for_each_entry(stab, &qdisc_stab_list, list) {
354 if (memcmp(&stab->szopts, s, sizeof(*s)))
355 continue;
356 if (tsize > 0 && memcmp(stab->data, tab, tsize * sizeof(u16)))
357 continue;
358 stab->refcnt++;
David S. Millerf3b96052008-08-18 22:33:05 -0700359 spin_unlock(&qdisc_stab_lock);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700360 return stab;
361 }
362
David S. Millerf3b96052008-08-18 22:33:05 -0700363 spin_unlock(&qdisc_stab_lock);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700364
365 stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL);
366 if (!stab)
367 return ERR_PTR(-ENOMEM);
368
369 stab->refcnt = 1;
370 stab->szopts = *s;
371 if (tsize > 0)
372 memcpy(stab->data, tab, tsize * sizeof(u16));
373
David S. Millerf3b96052008-08-18 22:33:05 -0700374 spin_lock(&qdisc_stab_lock);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700375 list_add_tail(&stab->list, &qdisc_stab_list);
David S. Millerf3b96052008-08-18 22:33:05 -0700376 spin_unlock(&qdisc_stab_lock);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700377
378 return stab;
379}
380
381void qdisc_put_stab(struct qdisc_size_table *tab)
382{
383 if (!tab)
384 return;
385
David S. Millerf3b96052008-08-18 22:33:05 -0700386 spin_lock(&qdisc_stab_lock);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700387
388 if (--tab->refcnt == 0) {
389 list_del(&tab->list);
390 kfree(tab);
391 }
392
David S. Millerf3b96052008-08-18 22:33:05 -0700393 spin_unlock(&qdisc_stab_lock);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700394}
395EXPORT_SYMBOL(qdisc_put_stab);
396
397static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab)
398{
399 struct nlattr *nest;
400
401 nest = nla_nest_start(skb, TCA_STAB);
Patrick McHardy3aa46142008-11-20 04:07:14 -0800402 if (nest == NULL)
403 goto nla_put_failure;
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700404 NLA_PUT(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts);
405 nla_nest_end(skb, nest);
406
407 return skb->len;
408
409nla_put_failure:
410 return -1;
411}
412
413void qdisc_calculate_pkt_len(struct sk_buff *skb, struct qdisc_size_table *stab)
414{
415 int pkt_len, slot;
416
417 pkt_len = skb->len + stab->szopts.overhead;
418 if (unlikely(!stab->szopts.tsize))
419 goto out;
420
421 slot = pkt_len + stab->szopts.cell_align;
422 if (unlikely(slot < 0))
423 slot = 0;
424
425 slot >>= stab->szopts.cell_log;
426 if (likely(slot < stab->szopts.tsize))
427 pkt_len = stab->data[slot];
428 else
429 pkt_len = stab->data[stab->szopts.tsize - 1] *
430 (slot / stab->szopts.tsize) +
431 stab->data[slot % stab->szopts.tsize];
432
433 pkt_len <<= stab->szopts.size_log;
434out:
435 if (unlikely(pkt_len < 1))
436 pkt_len = 1;
437 qdisc_skb_cb(skb)->pkt_len = pkt_len;
438}
439EXPORT_SYMBOL(qdisc_calculate_pkt_len);
440
Jarek Poplawskib00355d2009-02-01 01:12:42 -0800441void qdisc_warn_nonwc(char *txt, struct Qdisc *qdisc)
442{
443 if (!(qdisc->flags & TCQ_F_WARN_NONWC)) {
444 printk(KERN_WARNING
445 "%s: %s qdisc %X: is non-work-conserving?\n",
446 txt, qdisc->ops->id, qdisc->handle >> 16);
447 qdisc->flags |= TCQ_F_WARN_NONWC;
448 }
449}
450EXPORT_SYMBOL(qdisc_warn_nonwc);
451
Patrick McHardy41794772007-03-16 01:19:15 -0700452static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
453{
454 struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
David S. Miller2fbd3da2009-09-01 17:59:25 -0700455 timer);
Patrick McHardy41794772007-03-16 01:19:15 -0700456
457 wd->qdisc->flags &= ~TCQ_F_THROTTLED;
David S. Miller8608db02008-08-18 20:51:18 -0700458 __netif_schedule(qdisc_root(wd->qdisc));
Stephen Hemminger19365022007-03-22 12:18:35 -0700459
Patrick McHardy41794772007-03-16 01:19:15 -0700460 return HRTIMER_NORESTART;
461}
462
463void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
464{
David S. Miller2fbd3da2009-09-01 17:59:25 -0700465 hrtimer_init(&wd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
466 wd->timer.function = qdisc_watchdog;
Patrick McHardy41794772007-03-16 01:19:15 -0700467 wd->qdisc = qdisc;
468}
469EXPORT_SYMBOL(qdisc_watchdog_init);
470
471void qdisc_watchdog_schedule(struct qdisc_watchdog *wd, psched_time_t expires)
472{
473 ktime_t time;
474
Jarek Poplawski2540e052008-08-21 05:11:14 -0700475 if (test_bit(__QDISC_STATE_DEACTIVATED,
476 &qdisc_root_sleeping(wd->qdisc)->state))
477 return;
478
Patrick McHardy41794772007-03-16 01:19:15 -0700479 wd->qdisc->flags |= TCQ_F_THROTTLED;
480 time = ktime_set(0, 0);
Jarek Poplawskica44d6e2009-06-15 02:31:47 -0700481 time = ktime_add_ns(time, PSCHED_TICKS2NS(expires));
David S. Miller2fbd3da2009-09-01 17:59:25 -0700482 hrtimer_start(&wd->timer, time, HRTIMER_MODE_ABS);
Patrick McHardy41794772007-03-16 01:19:15 -0700483}
484EXPORT_SYMBOL(qdisc_watchdog_schedule);
485
486void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
487{
David S. Miller2fbd3da2009-09-01 17:59:25 -0700488 hrtimer_cancel(&wd->timer);
Patrick McHardy41794772007-03-16 01:19:15 -0700489 wd->qdisc->flags &= ~TCQ_F_THROTTLED;
490}
491EXPORT_SYMBOL(qdisc_watchdog_cancel);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700492
Adrian Bunka94f7792008-07-22 14:20:11 -0700493static struct hlist_head *qdisc_class_hash_alloc(unsigned int n)
Patrick McHardy6fe1c7a2008-07-05 23:21:31 -0700494{
495 unsigned int size = n * sizeof(struct hlist_head), i;
496 struct hlist_head *h;
497
498 if (size <= PAGE_SIZE)
499 h = kmalloc(size, GFP_KERNEL);
500 else
501 h = (struct hlist_head *)
502 __get_free_pages(GFP_KERNEL, get_order(size));
503
504 if (h != NULL) {
505 for (i = 0; i < n; i++)
506 INIT_HLIST_HEAD(&h[i]);
507 }
508 return h;
509}
510
511static void qdisc_class_hash_free(struct hlist_head *h, unsigned int n)
512{
513 unsigned int size = n * sizeof(struct hlist_head);
514
515 if (size <= PAGE_SIZE)
516 kfree(h);
517 else
518 free_pages((unsigned long)h, get_order(size));
519}
520
521void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash)
522{
523 struct Qdisc_class_common *cl;
524 struct hlist_node *n, *next;
525 struct hlist_head *nhash, *ohash;
526 unsigned int nsize, nmask, osize;
527 unsigned int i, h;
528
529 /* Rehash when load factor exceeds 0.75 */
530 if (clhash->hashelems * 4 <= clhash->hashsize * 3)
531 return;
532 nsize = clhash->hashsize * 2;
533 nmask = nsize - 1;
534 nhash = qdisc_class_hash_alloc(nsize);
535 if (nhash == NULL)
536 return;
537
538 ohash = clhash->hash;
539 osize = clhash->hashsize;
540
541 sch_tree_lock(sch);
542 for (i = 0; i < osize; i++) {
543 hlist_for_each_entry_safe(cl, n, next, &ohash[i], hnode) {
544 h = qdisc_class_hash(cl->classid, nmask);
545 hlist_add_head(&cl->hnode, &nhash[h]);
546 }
547 }
548 clhash->hash = nhash;
549 clhash->hashsize = nsize;
550 clhash->hashmask = nmask;
551 sch_tree_unlock(sch);
552
553 qdisc_class_hash_free(ohash, osize);
554}
555EXPORT_SYMBOL(qdisc_class_hash_grow);
556
557int qdisc_class_hash_init(struct Qdisc_class_hash *clhash)
558{
559 unsigned int size = 4;
560
561 clhash->hash = qdisc_class_hash_alloc(size);
562 if (clhash->hash == NULL)
563 return -ENOMEM;
564 clhash->hashsize = size;
565 clhash->hashmask = size - 1;
566 clhash->hashelems = 0;
567 return 0;
568}
569EXPORT_SYMBOL(qdisc_class_hash_init);
570
571void qdisc_class_hash_destroy(struct Qdisc_class_hash *clhash)
572{
573 qdisc_class_hash_free(clhash->hash, clhash->hashsize);
574}
575EXPORT_SYMBOL(qdisc_class_hash_destroy);
576
577void qdisc_class_hash_insert(struct Qdisc_class_hash *clhash,
578 struct Qdisc_class_common *cl)
579{
580 unsigned int h;
581
582 INIT_HLIST_NODE(&cl->hnode);
583 h = qdisc_class_hash(cl->classid, clhash->hashmask);
584 hlist_add_head(&cl->hnode, &clhash->hash[h]);
585 clhash->hashelems++;
586}
587EXPORT_SYMBOL(qdisc_class_hash_insert);
588
589void qdisc_class_hash_remove(struct Qdisc_class_hash *clhash,
590 struct Qdisc_class_common *cl)
591{
592 hlist_del(&cl->hnode);
593 clhash->hashelems--;
594}
595EXPORT_SYMBOL(qdisc_class_hash_remove);
596
Linus Torvalds1da177e2005-04-16 15:20:36 -0700597/* Allocate an unique handle from space managed by kernel */
598
599static u32 qdisc_alloc_handle(struct net_device *dev)
600{
601 int i = 0x10000;
602 static u32 autohandle = TC_H_MAKE(0x80000000U, 0);
603
604 do {
605 autohandle += TC_H_MAKE(0x10000U, 0);
606 if (autohandle == TC_H_MAKE(TC_H_ROOT, 0))
607 autohandle = TC_H_MAKE(0x80000000U, 0);
608 } while (qdisc_lookup(dev, autohandle) && --i > 0);
609
610 return i>0 ? autohandle : 0;
611}
612
Patrick McHardy43effa12006-11-29 17:35:48 -0800613void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
614{
Eric Dumazet20fea082007-11-14 01:44:41 -0800615 const struct Qdisc_class_ops *cops;
Patrick McHardy43effa12006-11-29 17:35:48 -0800616 unsigned long cl;
617 u32 parentid;
618
619 if (n == 0)
620 return;
621 while ((parentid = sch->parent)) {
Jarek Poplawski066a3b52008-04-14 15:10:42 -0700622 if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS))
623 return;
624
David S. Miller5ce2d482008-07-08 17:06:30 -0700625 sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
Patrick McHardyffc8fef2007-07-30 17:11:50 -0700626 if (sch == NULL) {
627 WARN_ON(parentid != TC_H_ROOT);
628 return;
629 }
Patrick McHardy43effa12006-11-29 17:35:48 -0800630 cops = sch->ops->cl_ops;
631 if (cops->qlen_notify) {
632 cl = cops->get(sch, parentid);
633 cops->qlen_notify(sch, cl);
634 cops->put(sch, cl);
635 }
636 sch->q.qlen -= n;
637 }
638}
639EXPORT_SYMBOL(qdisc_tree_decrease_qlen);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700640
David S. Miller99194cf2008-07-17 04:54:10 -0700641static void notify_and_destroy(struct sk_buff *skb, struct nlmsghdr *n, u32 clid,
642 struct Qdisc *old, struct Qdisc *new)
643{
644 if (new || old)
645 qdisc_notify(skb, n, clid, old, new);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700646
David S. Miller4d8863a2008-08-18 21:03:15 -0700647 if (old)
David S. Miller99194cf2008-07-17 04:54:10 -0700648 qdisc_destroy(old);
David S. Miller99194cf2008-07-17 04:54:10 -0700649}
650
651/* Graft qdisc "new" to class "classid" of qdisc "parent" or
652 * to device "dev".
653 *
654 * When appropriate send a netlink notification using 'skb'
655 * and "n".
656 *
657 * On success, destroy old qdisc.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700658 */
659
660static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
David S. Miller99194cf2008-07-17 04:54:10 -0700661 struct sk_buff *skb, struct nlmsghdr *n, u32 classid,
662 struct Qdisc *new, struct Qdisc *old)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700663{
David S. Miller99194cf2008-07-17 04:54:10 -0700664 struct Qdisc *q = old;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700665 int err = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700666
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900667 if (parent == NULL) {
David S. Miller99194cf2008-07-17 04:54:10 -0700668 unsigned int i, num_q, ingress;
669
670 ingress = 0;
671 num_q = dev->num_tx_queues;
David S. Miller8d50b532008-07-30 02:37:46 -0700672 if ((q && q->flags & TCQ_F_INGRESS) ||
673 (new && new->flags & TCQ_F_INGRESS)) {
David S. Miller99194cf2008-07-17 04:54:10 -0700674 num_q = 1;
675 ingress = 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700676 }
David S. Miller99194cf2008-07-17 04:54:10 -0700677
678 if (dev->flags & IFF_UP)
679 dev_deactivate(dev);
680
David S. Miller6ec1c692009-09-06 01:58:51 -0700681 if (new && new->ops->attach) {
682 new->ops->attach(new);
683 num_q = 0;
684 }
685
David S. Miller99194cf2008-07-17 04:54:10 -0700686 for (i = 0; i < num_q; i++) {
687 struct netdev_queue *dev_queue = &dev->rx_queue;
688
689 if (!ingress)
690 dev_queue = netdev_get_tx_queue(dev, i);
691
David S. Miller8d50b532008-07-30 02:37:46 -0700692 old = dev_graft_qdisc(dev_queue, new);
693 if (new && i > 0)
694 atomic_inc(&new->refcnt);
695
Patrick McHardyaf356af2009-09-04 06:41:18 +0000696 qdisc_destroy(old);
David S. Miller99194cf2008-07-17 04:54:10 -0700697 }
698
Patrick McHardyaf356af2009-09-04 06:41:18 +0000699 notify_and_destroy(skb, n, classid, dev->qdisc, new);
David S. Miller6ec1c692009-09-06 01:58:51 -0700700 if (new && !new->ops->attach)
Patrick McHardyaf356af2009-09-04 06:41:18 +0000701 atomic_inc(&new->refcnt);
702 dev->qdisc = new ? : &noop_qdisc;
703
David S. Miller99194cf2008-07-17 04:54:10 -0700704 if (dev->flags & IFF_UP)
705 dev_activate(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700706 } else {
Eric Dumazet20fea082007-11-14 01:44:41 -0800707 const struct Qdisc_class_ops *cops = parent->ops->cl_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700708
Patrick McHardyc9f1d032009-09-04 06:41:13 +0000709 err = -EOPNOTSUPP;
710 if (cops && cops->graft) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700711 unsigned long cl = cops->get(parent, classid);
712 if (cl) {
David S. Miller99194cf2008-07-17 04:54:10 -0700713 err = cops->graft(parent, cl, new, &old);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700714 cops->put(parent, cl);
Patrick McHardyc9f1d032009-09-04 06:41:13 +0000715 } else
716 err = -ENOENT;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700717 }
David S. Miller99194cf2008-07-17 04:54:10 -0700718 if (!err)
719 notify_and_destroy(skb, n, classid, old, new);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700720 }
721 return err;
722}
723
Jarek Poplawski25bfcd52008-08-18 20:53:34 -0700724/* lockdep annotation is needed for ingress; egress gets it only for name */
725static struct lock_class_key qdisc_tx_lock;
726static struct lock_class_key qdisc_rx_lock;
727
Linus Torvalds1da177e2005-04-16 15:20:36 -0700728/*
729 Allocate and initialize new qdisc.
730
731 Parameters are passed via opt.
732 */
733
734static struct Qdisc *
David S. Millerbb949fb2008-07-08 16:55:56 -0700735qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
Patrick McHardy23bcf632009-09-09 18:11:23 -0700736 struct Qdisc *p, u32 parent, u32 handle,
737 struct nlattr **tca, int *errp)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700738{
739 int err;
Patrick McHardy1e904742008-01-22 22:11:17 -0800740 struct nlattr *kind = tca[TCA_KIND];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700741 struct Qdisc *sch;
742 struct Qdisc_ops *ops;
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700743 struct qdisc_size_table *stab;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700744
745 ops = qdisc_lookup_ops(kind);
Johannes Berg95a5afc2008-10-16 15:24:51 -0700746#ifdef CONFIG_MODULES
Linus Torvalds1da177e2005-04-16 15:20:36 -0700747 if (ops == NULL && kind != NULL) {
748 char name[IFNAMSIZ];
Patrick McHardy1e904742008-01-22 22:11:17 -0800749 if (nla_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700750 /* We dropped the RTNL semaphore in order to
751 * perform the module load. So, even if we
752 * succeeded in loading the module we have to
753 * tell the caller to replay the request. We
754 * indicate this using -EAGAIN.
755 * We replay the request because the device may
756 * go away in the mean time.
757 */
758 rtnl_unlock();
759 request_module("sch_%s", name);
760 rtnl_lock();
761 ops = qdisc_lookup_ops(kind);
762 if (ops != NULL) {
763 /* We will try again qdisc_lookup_ops,
764 * so don't keep a reference.
765 */
766 module_put(ops->owner);
767 err = -EAGAIN;
768 goto err_out;
769 }
770 }
771 }
772#endif
773
Jamal Hadi Salimb9e2cc02006-08-03 16:36:51 -0700774 err = -ENOENT;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700775 if (ops == NULL)
776 goto err_out;
777
David S. Miller5ce2d482008-07-08 17:06:30 -0700778 sch = qdisc_alloc(dev_queue, ops);
Thomas Graf3d54b822005-07-05 14:15:09 -0700779 if (IS_ERR(sch)) {
780 err = PTR_ERR(sch);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700781 goto err_out2;
Thomas Graf3d54b822005-07-05 14:15:09 -0700782 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700783
Patrick McHardyffc8fef2007-07-30 17:11:50 -0700784 sch->parent = parent;
785
Thomas Graf3d54b822005-07-05 14:15:09 -0700786 if (handle == TC_H_INGRESS) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700787 sch->flags |= TCQ_F_INGRESS;
Thomas Graf3d54b822005-07-05 14:15:09 -0700788 handle = TC_H_MAKE(TC_H_INGRESS, 0);
Jarek Poplawski25bfcd52008-08-18 20:53:34 -0700789 lockdep_set_class(qdisc_lock(sch), &qdisc_rx_lock);
Patrick McHardyfd44de72007-04-16 17:07:08 -0700790 } else {
Patrick McHardyfd44de72007-04-16 17:07:08 -0700791 if (handle == 0) {
792 handle = qdisc_alloc_handle(dev);
793 err = -ENOMEM;
794 if (handle == 0)
795 goto err_out3;
796 }
Jarek Poplawski25bfcd52008-08-18 20:53:34 -0700797 lockdep_set_class(qdisc_lock(sch), &qdisc_tx_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700798 }
799
Thomas Graf3d54b822005-07-05 14:15:09 -0700800 sch->handle = handle;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700801
Patrick McHardy1e904742008-01-22 22:11:17 -0800802 if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS])) == 0) {
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700803 if (tca[TCA_STAB]) {
804 stab = qdisc_get_stab(tca[TCA_STAB]);
805 if (IS_ERR(stab)) {
806 err = PTR_ERR(stab);
807 goto err_out3;
808 }
809 sch->stab = stab;
810 }
Patrick McHardy1e904742008-01-22 22:11:17 -0800811 if (tca[TCA_RATE]) {
Jarek Poplawskif6f9b932008-08-27 02:25:17 -0700812 spinlock_t *root_lock;
813
Patrick McHardy23bcf632009-09-09 18:11:23 -0700814 err = -EOPNOTSUPP;
815 if (sch->flags & TCQ_F_MQROOT)
816 goto err_out4;
817
Jarek Poplawskif6f9b932008-08-27 02:25:17 -0700818 if ((sch->parent != TC_H_ROOT) &&
Patrick McHardy23bcf632009-09-09 18:11:23 -0700819 !(sch->flags & TCQ_F_INGRESS) &&
820 (!p || !(p->flags & TCQ_F_MQROOT)))
Jarek Poplawskif6f9b932008-08-27 02:25:17 -0700821 root_lock = qdisc_root_sleeping_lock(sch);
822 else
823 root_lock = qdisc_lock(sch);
824
Thomas Graf023e09a2005-07-05 14:15:53 -0700825 err = gen_new_estimator(&sch->bstats, &sch->rate_est,
Jarek Poplawskif6f9b932008-08-27 02:25:17 -0700826 root_lock, tca[TCA_RATE]);
Patrick McHardy23bcf632009-09-09 18:11:23 -0700827 if (err)
828 goto err_out4;
Thomas Graf023e09a2005-07-05 14:15:53 -0700829 }
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700830
831 qdisc_list_add(sch);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700832
Linus Torvalds1da177e2005-04-16 15:20:36 -0700833 return sch;
834 }
835err_out3:
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700836 qdisc_put_stab(sch->stab);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700837 dev_put(dev);
Thomas Graf3d54b822005-07-05 14:15:09 -0700838 kfree((char *) sch - sch->padded);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700839err_out2:
840 module_put(ops->owner);
841err_out:
842 *errp = err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700843 return NULL;
Patrick McHardy23bcf632009-09-09 18:11:23 -0700844
845err_out4:
846 /*
847 * Any broken qdiscs that would require a ops->reset() here?
848 * The qdisc was never in action so it shouldn't be necessary.
849 */
850 if (ops->destroy)
851 ops->destroy(sch);
852 goto err_out3;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700853}
854
Patrick McHardy1e904742008-01-22 22:11:17 -0800855static int qdisc_change(struct Qdisc *sch, struct nlattr **tca)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700856{
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700857 struct qdisc_size_table *stab = NULL;
858 int err = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700859
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700860 if (tca[TCA_OPTIONS]) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700861 if (sch->ops->change == NULL)
862 return -EINVAL;
Patrick McHardy1e904742008-01-22 22:11:17 -0800863 err = sch->ops->change(sch, tca[TCA_OPTIONS]);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700864 if (err)
865 return err;
866 }
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700867
868 if (tca[TCA_STAB]) {
869 stab = qdisc_get_stab(tca[TCA_STAB]);
870 if (IS_ERR(stab))
871 return PTR_ERR(stab);
872 }
873
874 qdisc_put_stab(sch->stab);
875 sch->stab = stab;
876
Patrick McHardy23bcf632009-09-09 18:11:23 -0700877 if (tca[TCA_RATE]) {
Stephen Hemminger71bcb092008-11-25 21:13:31 -0800878 /* NB: ignores errors from replace_estimator
879 because change can't be undone. */
Patrick McHardy23bcf632009-09-09 18:11:23 -0700880 if (sch->flags & TCQ_F_MQROOT)
881 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700882 gen_replace_estimator(&sch->bstats, &sch->rate_est,
Stephen Hemminger71bcb092008-11-25 21:13:31 -0800883 qdisc_root_sleeping_lock(sch),
884 tca[TCA_RATE]);
Patrick McHardy23bcf632009-09-09 18:11:23 -0700885 }
886out:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700887 return 0;
888}
889
890struct check_loop_arg
891{
892 struct qdisc_walker w;
893 struct Qdisc *p;
894 int depth;
895};
896
897static int check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w);
898
899static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth)
900{
901 struct check_loop_arg arg;
902
903 if (q->ops->cl_ops == NULL)
904 return 0;
905
906 arg.w.stop = arg.w.skip = arg.w.count = 0;
907 arg.w.fn = check_loop_fn;
908 arg.depth = depth;
909 arg.p = p;
910 q->ops->cl_ops->walk(q, &arg.w);
911 return arg.w.stop ? -ELOOP : 0;
912}
913
914static int
915check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
916{
917 struct Qdisc *leaf;
Eric Dumazet20fea082007-11-14 01:44:41 -0800918 const struct Qdisc_class_ops *cops = q->ops->cl_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700919 struct check_loop_arg *arg = (struct check_loop_arg *)w;
920
921 leaf = cops->leaf(q, cl);
922 if (leaf) {
923 if (leaf == arg->p || arg->depth > 7)
924 return -ELOOP;
925 return check_loop(leaf, arg->p, arg->depth + 1);
926 }
927 return 0;
928}
929
930/*
931 * Delete/get qdisc.
932 */
933
934static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
935{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +0900936 struct net *net = sock_net(skb->sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700937 struct tcmsg *tcm = NLMSG_DATA(n);
Patrick McHardy1e904742008-01-22 22:11:17 -0800938 struct nlattr *tca[TCA_MAX + 1];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700939 struct net_device *dev;
940 u32 clid = tcm->tcm_parent;
941 struct Qdisc *q = NULL;
942 struct Qdisc *p = NULL;
943 int err;
944
Denis V. Lunevb8542722007-12-01 00:21:31 +1100945 if (net != &init_net)
946 return -EINVAL;
947
Eric W. Biederman881d9662007-09-17 11:56:21 -0700948 if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700949 return -ENODEV;
950
Patrick McHardy1e904742008-01-22 22:11:17 -0800951 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
952 if (err < 0)
953 return err;
954
Linus Torvalds1da177e2005-04-16 15:20:36 -0700955 if (clid) {
956 if (clid != TC_H_ROOT) {
957 if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
958 if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL)
959 return -ENOENT;
960 q = qdisc_leaf(p, clid);
961 } else { /* ingress */
David S. Miller8123b422008-08-08 23:23:39 -0700962 q = dev->rx_queue.qdisc_sleeping;
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900963 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700964 } else {
Patrick McHardyaf356af2009-09-04 06:41:18 +0000965 q = dev->qdisc;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700966 }
967 if (!q)
968 return -ENOENT;
969
970 if (tcm->tcm_handle && q->handle != tcm->tcm_handle)
971 return -EINVAL;
972 } else {
973 if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL)
974 return -ENOENT;
975 }
976
Patrick McHardy1e904742008-01-22 22:11:17 -0800977 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700978 return -EINVAL;
979
980 if (n->nlmsg_type == RTM_DELQDISC) {
981 if (!clid)
982 return -EINVAL;
983 if (q->handle == 0)
984 return -ENOENT;
David S. Miller99194cf2008-07-17 04:54:10 -0700985 if ((err = qdisc_graft(dev, p, skb, n, clid, NULL, q)) != 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700986 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700987 } else {
988 qdisc_notify(skb, n, clid, NULL, q);
989 }
990 return 0;
991}
992
993/*
994 Create/change qdisc.
995 */
996
997static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
998{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +0900999 struct net *net = sock_net(skb->sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001000 struct tcmsg *tcm;
Patrick McHardy1e904742008-01-22 22:11:17 -08001001 struct nlattr *tca[TCA_MAX + 1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001002 struct net_device *dev;
1003 u32 clid;
1004 struct Qdisc *q, *p;
1005 int err;
1006
Denis V. Lunevb8542722007-12-01 00:21:31 +11001007 if (net != &init_net)
1008 return -EINVAL;
1009
Linus Torvalds1da177e2005-04-16 15:20:36 -07001010replay:
1011 /* Reinit, just in case something touches this. */
1012 tcm = NLMSG_DATA(n);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001013 clid = tcm->tcm_parent;
1014 q = p = NULL;
1015
Eric W. Biederman881d9662007-09-17 11:56:21 -07001016 if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001017 return -ENODEV;
1018
Patrick McHardy1e904742008-01-22 22:11:17 -08001019 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
1020 if (err < 0)
1021 return err;
1022
Linus Torvalds1da177e2005-04-16 15:20:36 -07001023 if (clid) {
1024 if (clid != TC_H_ROOT) {
1025 if (clid != TC_H_INGRESS) {
1026 if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL)
1027 return -ENOENT;
1028 q = qdisc_leaf(p, clid);
1029 } else { /*ingress */
David S. Miller8123b422008-08-08 23:23:39 -07001030 q = dev->rx_queue.qdisc_sleeping;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001031 }
1032 } else {
Patrick McHardyaf356af2009-09-04 06:41:18 +00001033 q = dev->qdisc;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001034 }
1035
1036 /* It may be default qdisc, ignore it */
1037 if (q && q->handle == 0)
1038 q = NULL;
1039
1040 if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
1041 if (tcm->tcm_handle) {
1042 if (q && !(n->nlmsg_flags&NLM_F_REPLACE))
1043 return -EEXIST;
1044 if (TC_H_MIN(tcm->tcm_handle))
1045 return -EINVAL;
1046 if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL)
1047 goto create_n_graft;
1048 if (n->nlmsg_flags&NLM_F_EXCL)
1049 return -EEXIST;
Patrick McHardy1e904742008-01-22 22:11:17 -08001050 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001051 return -EINVAL;
1052 if (q == p ||
1053 (p && check_loop(q, p, 0)))
1054 return -ELOOP;
1055 atomic_inc(&q->refcnt);
1056 goto graft;
1057 } else {
1058 if (q == NULL)
1059 goto create_n_graft;
1060
1061 /* This magic test requires explanation.
1062 *
1063 * We know, that some child q is already
1064 * attached to this parent and have choice:
1065 * either to change it or to create/graft new one.
1066 *
1067 * 1. We are allowed to create/graft only
1068 * if CREATE and REPLACE flags are set.
1069 *
1070 * 2. If EXCL is set, requestor wanted to say,
1071 * that qdisc tcm_handle is not expected
1072 * to exist, so that we choose create/graft too.
1073 *
1074 * 3. The last case is when no flags are set.
1075 * Alas, it is sort of hole in API, we
1076 * cannot decide what to do unambiguously.
1077 * For now we select create/graft, if
1078 * user gave KIND, which does not match existing.
1079 */
1080 if ((n->nlmsg_flags&NLM_F_CREATE) &&
1081 (n->nlmsg_flags&NLM_F_REPLACE) &&
1082 ((n->nlmsg_flags&NLM_F_EXCL) ||
Patrick McHardy1e904742008-01-22 22:11:17 -08001083 (tca[TCA_KIND] &&
1084 nla_strcmp(tca[TCA_KIND], q->ops->id))))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001085 goto create_n_graft;
1086 }
1087 }
1088 } else {
1089 if (!tcm->tcm_handle)
1090 return -EINVAL;
1091 q = qdisc_lookup(dev, tcm->tcm_handle);
1092 }
1093
1094 /* Change qdisc parameters */
1095 if (q == NULL)
1096 return -ENOENT;
1097 if (n->nlmsg_flags&NLM_F_EXCL)
1098 return -EEXIST;
Patrick McHardy1e904742008-01-22 22:11:17 -08001099 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001100 return -EINVAL;
1101 err = qdisc_change(q, tca);
1102 if (err == 0)
1103 qdisc_notify(skb, n, clid, NULL, q);
1104 return err;
1105
1106create_n_graft:
1107 if (!(n->nlmsg_flags&NLM_F_CREATE))
1108 return -ENOENT;
1109 if (clid == TC_H_INGRESS)
Patrick McHardy23bcf632009-09-09 18:11:23 -07001110 q = qdisc_create(dev, &dev->rx_queue, p,
David S. Millerbb949fb2008-07-08 16:55:56 -07001111 tcm->tcm_parent, tcm->tcm_parent,
Patrick McHardyffc8fef2007-07-30 17:11:50 -07001112 tca, &err);
David S. Miller6ec1c692009-09-06 01:58:51 -07001113 else {
1114 unsigned int ntx = 0;
1115
1116 if (p && p->ops->cl_ops && p->ops->cl_ops->select_queue)
1117 ntx = p->ops->cl_ops->select_queue(p, tcm);
1118
Patrick McHardy23bcf632009-09-09 18:11:23 -07001119 q = qdisc_create(dev, netdev_get_tx_queue(dev, ntx), p,
David S. Millerbb949fb2008-07-08 16:55:56 -07001120 tcm->tcm_parent, tcm->tcm_handle,
Patrick McHardyffc8fef2007-07-30 17:11:50 -07001121 tca, &err);
David S. Miller6ec1c692009-09-06 01:58:51 -07001122 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001123 if (q == NULL) {
1124 if (err == -EAGAIN)
1125 goto replay;
1126 return err;
1127 }
1128
1129graft:
Ilpo Järvinene5befbd2008-08-18 22:30:01 -07001130 err = qdisc_graft(dev, p, skb, n, clid, q, NULL);
1131 if (err) {
1132 if (q)
1133 qdisc_destroy(q);
1134 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001135 }
Ilpo Järvinene5befbd2008-08-18 22:30:01 -07001136
Linus Torvalds1da177e2005-04-16 15:20:36 -07001137 return 0;
1138}
1139
1140static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
Jamal Hadi Salime431b8c2005-06-18 22:55:31 -07001141 u32 pid, u32 seq, u16 flags, int event)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001142{
1143 struct tcmsg *tcm;
1144 struct nlmsghdr *nlh;
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001145 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001146 struct gnet_dump d;
1147
Jamal Hadi Salime431b8c2005-06-18 22:55:31 -07001148 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001149 tcm = NLMSG_DATA(nlh);
1150 tcm->tcm_family = AF_UNSPEC;
Patrick McHardy9ef1d4c2005-06-28 12:55:30 -07001151 tcm->tcm__pad1 = 0;
1152 tcm->tcm__pad2 = 0;
David S. Miller5ce2d482008-07-08 17:06:30 -07001153 tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001154 tcm->tcm_parent = clid;
1155 tcm->tcm_handle = q->handle;
1156 tcm->tcm_info = atomic_read(&q->refcnt);
Patrick McHardy57e1c482008-01-23 20:34:28 -08001157 NLA_PUT_STRING(skb, TCA_KIND, q->ops->id);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001158 if (q->ops->dump && q->ops->dump(q, skb) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001159 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001160 q->qstats.qlen = q->q.qlen;
1161
Jussi Kivilinna175f9c12008-07-20 00:08:47 -07001162 if (q->stab && qdisc_dump_stab(skb, q->stab) < 0)
1163 goto nla_put_failure;
1164
Jarek Poplawski102396a2008-08-29 14:21:52 -07001165 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
1166 qdisc_root_sleeping_lock(q), &d) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001167 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001168
1169 if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001170 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001171
1172 if (gnet_stats_copy_basic(&d, &q->bstats) < 0 ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07001173 gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07001174 gnet_stats_copy_queue(&d, &q->qstats) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001175 goto nla_put_failure;
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09001176
Linus Torvalds1da177e2005-04-16 15:20:36 -07001177 if (gnet_stats_finish_copy(&d) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001178 goto nla_put_failure;
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09001179
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001180 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001181 return skb->len;
1182
1183nlmsg_failure:
Patrick McHardy1e904742008-01-22 22:11:17 -08001184nla_put_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001185 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001186 return -1;
1187}
1188
1189static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n,
1190 u32 clid, struct Qdisc *old, struct Qdisc *new)
1191{
1192 struct sk_buff *skb;
1193 u32 pid = oskb ? NETLINK_CB(oskb).pid : 0;
1194
1195 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1196 if (!skb)
1197 return -ENOBUFS;
1198
1199 if (old && old->handle) {
1200 if (tc_fill_qdisc(skb, old, clid, pid, n->nlmsg_seq, 0, RTM_DELQDISC) < 0)
1201 goto err_out;
1202 }
1203 if (new) {
1204 if (tc_fill_qdisc(skb, new, clid, pid, n->nlmsg_seq, old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
1205 goto err_out;
1206 }
1207
1208 if (skb->len)
Denis V. Lunev97c53ca2007-11-19 22:26:51 -08001209 return rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001210
1211err_out:
1212 kfree_skb(skb);
1213 return -EINVAL;
1214}
1215
David S. Miller30723672008-07-18 22:50:15 -07001216static bool tc_qdisc_dump_ignore(struct Qdisc *q)
1217{
1218 return (q->flags & TCQ_F_BUILTIN) ? true : false;
1219}
1220
1221static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
1222 struct netlink_callback *cb,
1223 int *q_idx_p, int s_q_idx)
1224{
1225 int ret = 0, q_idx = *q_idx_p;
1226 struct Qdisc *q;
1227
1228 if (!root)
1229 return 0;
1230
1231 q = root;
1232 if (q_idx < s_q_idx) {
1233 q_idx++;
1234 } else {
1235 if (!tc_qdisc_dump_ignore(q) &&
1236 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid,
1237 cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
1238 goto done;
1239 q_idx++;
1240 }
1241 list_for_each_entry(q, &root->list, list) {
1242 if (q_idx < s_q_idx) {
1243 q_idx++;
1244 continue;
1245 }
1246 if (!tc_qdisc_dump_ignore(q) &&
1247 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid,
1248 cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
1249 goto done;
1250 q_idx++;
1251 }
1252
1253out:
1254 *q_idx_p = q_idx;
1255 return ret;
1256done:
1257 ret = -1;
1258 goto out;
1259}
1260
Linus Torvalds1da177e2005-04-16 15:20:36 -07001261static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
1262{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001263 struct net *net = sock_net(skb->sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001264 int idx, q_idx;
1265 int s_idx, s_q_idx;
1266 struct net_device *dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001267
Denis V. Lunevb8542722007-12-01 00:21:31 +11001268 if (net != &init_net)
1269 return 0;
1270
Linus Torvalds1da177e2005-04-16 15:20:36 -07001271 s_idx = cb->args[0];
1272 s_q_idx = q_idx = cb->args[1];
1273 read_lock(&dev_base_lock);
Pavel Emelianov7562f872007-05-03 15:13:45 -07001274 idx = 0;
Eric W. Biederman881d9662007-09-17 11:56:21 -07001275 for_each_netdev(&init_net, dev) {
David S. Miller30723672008-07-18 22:50:15 -07001276 struct netdev_queue *dev_queue;
1277
Linus Torvalds1da177e2005-04-16 15:20:36 -07001278 if (idx < s_idx)
Pavel Emelianov7562f872007-05-03 15:13:45 -07001279 goto cont;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001280 if (idx > s_idx)
1281 s_q_idx = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001282 q_idx = 0;
David S. Miller30723672008-07-18 22:50:15 -07001283
Patrick McHardyaf356af2009-09-04 06:41:18 +00001284 if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx) < 0)
David S. Miller30723672008-07-18 22:50:15 -07001285 goto done;
1286
1287 dev_queue = &dev->rx_queue;
David S. Miller827ebd62008-08-07 20:26:40 -07001288 if (tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb, &q_idx, s_q_idx) < 0)
David S. Miller30723672008-07-18 22:50:15 -07001289 goto done;
1290
Pavel Emelianov7562f872007-05-03 15:13:45 -07001291cont:
1292 idx++;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001293 }
1294
1295done:
1296 read_unlock(&dev_base_lock);
1297
1298 cb->args[0] = idx;
1299 cb->args[1] = q_idx;
1300
1301 return skb->len;
1302}
1303
1304
1305
1306/************************************************
1307 * Traffic classes manipulation. *
1308 ************************************************/
1309
1310
1311
1312static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
1313{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001314 struct net *net = sock_net(skb->sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001315 struct tcmsg *tcm = NLMSG_DATA(n);
Patrick McHardy1e904742008-01-22 22:11:17 -08001316 struct nlattr *tca[TCA_MAX + 1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001317 struct net_device *dev;
1318 struct Qdisc *q = NULL;
Eric Dumazet20fea082007-11-14 01:44:41 -08001319 const struct Qdisc_class_ops *cops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001320 unsigned long cl = 0;
1321 unsigned long new_cl;
1322 u32 pid = tcm->tcm_parent;
1323 u32 clid = tcm->tcm_handle;
1324 u32 qid = TC_H_MAJ(clid);
1325 int err;
1326
Denis V. Lunevb8542722007-12-01 00:21:31 +11001327 if (net != &init_net)
1328 return -EINVAL;
1329
Eric W. Biederman881d9662007-09-17 11:56:21 -07001330 if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001331 return -ENODEV;
1332
Patrick McHardy1e904742008-01-22 22:11:17 -08001333 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
1334 if (err < 0)
1335 return err;
1336
Linus Torvalds1da177e2005-04-16 15:20:36 -07001337 /*
1338 parent == TC_H_UNSPEC - unspecified parent.
1339 parent == TC_H_ROOT - class is root, which has no parent.
1340 parent == X:0 - parent is root class.
1341 parent == X:Y - parent is a node in hierarchy.
1342 parent == 0:Y - parent is X:Y, where X:0 is qdisc.
1343
1344 handle == 0:0 - generate handle from kernel pool.
1345 handle == 0:Y - class is X:Y, where X:0 is qdisc.
1346 handle == X:Y - clear.
1347 handle == X:0 - root class.
1348 */
1349
1350 /* Step 1. Determine qdisc handle X:0 */
1351
1352 if (pid != TC_H_ROOT) {
1353 u32 qid1 = TC_H_MAJ(pid);
1354
1355 if (qid && qid1) {
1356 /* If both majors are known, they must be identical. */
1357 if (qid != qid1)
1358 return -EINVAL;
1359 } else if (qid1) {
1360 qid = qid1;
1361 } else if (qid == 0)
Patrick McHardyaf356af2009-09-04 06:41:18 +00001362 qid = dev->qdisc->handle;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001363
1364 /* Now qid is genuine qdisc handle consistent
1365 both with parent and child.
1366
1367 TC_H_MAJ(pid) still may be unspecified, complete it now.
1368 */
1369 if (pid)
1370 pid = TC_H_MAKE(qid, pid);
1371 } else {
1372 if (qid == 0)
Patrick McHardyaf356af2009-09-04 06:41:18 +00001373 qid = dev->qdisc->handle;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001374 }
1375
1376 /* OK. Locate qdisc */
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09001377 if ((q = qdisc_lookup(dev, qid)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001378 return -ENOENT;
1379
1380 /* An check that it supports classes */
1381 cops = q->ops->cl_ops;
1382 if (cops == NULL)
1383 return -EINVAL;
1384
1385 /* Now try to get class */
1386 if (clid == 0) {
1387 if (pid == TC_H_ROOT)
1388 clid = qid;
1389 } else
1390 clid = TC_H_MAKE(qid, clid);
1391
1392 if (clid)
1393 cl = cops->get(q, clid);
1394
1395 if (cl == 0) {
1396 err = -ENOENT;
1397 if (n->nlmsg_type != RTM_NEWTCLASS || !(n->nlmsg_flags&NLM_F_CREATE))
1398 goto out;
1399 } else {
1400 switch (n->nlmsg_type) {
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09001401 case RTM_NEWTCLASS:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001402 err = -EEXIST;
1403 if (n->nlmsg_flags&NLM_F_EXCL)
1404 goto out;
1405 break;
1406 case RTM_DELTCLASS:
Patrick McHardyde6d5cd2009-09-04 06:41:16 +00001407 err = -EOPNOTSUPP;
1408 if (cops->delete)
1409 err = cops->delete(q, cl);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001410 if (err == 0)
1411 tclass_notify(skb, n, q, cl, RTM_DELTCLASS);
1412 goto out;
1413 case RTM_GETTCLASS:
1414 err = tclass_notify(skb, n, q, cl, RTM_NEWTCLASS);
1415 goto out;
1416 default:
1417 err = -EINVAL;
1418 goto out;
1419 }
1420 }
1421
1422 new_cl = cl;
Patrick McHardyde6d5cd2009-09-04 06:41:16 +00001423 err = -EOPNOTSUPP;
1424 if (cops->change)
1425 err = cops->change(q, clid, pid, tca, &new_cl);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001426 if (err == 0)
1427 tclass_notify(skb, n, q, new_cl, RTM_NEWTCLASS);
1428
1429out:
1430 if (cl)
1431 cops->put(q, cl);
1432
1433 return err;
1434}
1435
1436
1437static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
1438 unsigned long cl,
Jamal Hadi Salime431b8c2005-06-18 22:55:31 -07001439 u32 pid, u32 seq, u16 flags, int event)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001440{
1441 struct tcmsg *tcm;
1442 struct nlmsghdr *nlh;
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001443 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001444 struct gnet_dump d;
Eric Dumazet20fea082007-11-14 01:44:41 -08001445 const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001446
Jamal Hadi Salime431b8c2005-06-18 22:55:31 -07001447 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001448 tcm = NLMSG_DATA(nlh);
1449 tcm->tcm_family = AF_UNSPEC;
Eric Dumazet16ebb5e2009-09-02 02:40:09 +00001450 tcm->tcm__pad1 = 0;
1451 tcm->tcm__pad2 = 0;
David S. Miller5ce2d482008-07-08 17:06:30 -07001452 tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001453 tcm->tcm_parent = q->handle;
1454 tcm->tcm_handle = q->handle;
1455 tcm->tcm_info = 0;
Patrick McHardy57e1c482008-01-23 20:34:28 -08001456 NLA_PUT_STRING(skb, TCA_KIND, q->ops->id);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001457 if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001458 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001459
Jarek Poplawski102396a2008-08-29 14:21:52 -07001460 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
1461 qdisc_root_sleeping_lock(q), &d) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001462 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001463
1464 if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001465 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001466
1467 if (gnet_stats_finish_copy(&d) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001468 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001469
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001470 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001471 return skb->len;
1472
1473nlmsg_failure:
Patrick McHardy1e904742008-01-22 22:11:17 -08001474nla_put_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001475 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001476 return -1;
1477}
1478
1479static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
1480 struct Qdisc *q, unsigned long cl, int event)
1481{
1482 struct sk_buff *skb;
1483 u32 pid = oskb ? NETLINK_CB(oskb).pid : 0;
1484
1485 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1486 if (!skb)
1487 return -ENOBUFS;
1488
1489 if (tc_fill_tclass(skb, q, cl, pid, n->nlmsg_seq, 0, event) < 0) {
1490 kfree_skb(skb);
1491 return -EINVAL;
1492 }
1493
Denis V. Lunev97c53ca2007-11-19 22:26:51 -08001494 return rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001495}
1496
1497struct qdisc_dump_args
1498{
1499 struct qdisc_walker w;
1500 struct sk_buff *skb;
1501 struct netlink_callback *cb;
1502};
1503
1504static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walker *arg)
1505{
1506 struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg;
1507
1508 return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).pid,
1509 a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTCLASS);
1510}
1511
David S. Miller30723672008-07-18 22:50:15 -07001512static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb,
1513 struct tcmsg *tcm, struct netlink_callback *cb,
1514 int *t_p, int s_t)
1515{
1516 struct qdisc_dump_args arg;
1517
1518 if (tc_qdisc_dump_ignore(q) ||
1519 *t_p < s_t || !q->ops->cl_ops ||
1520 (tcm->tcm_parent &&
1521 TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
1522 (*t_p)++;
1523 return 0;
1524 }
1525 if (*t_p > s_t)
1526 memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
1527 arg.w.fn = qdisc_class_dump;
1528 arg.skb = skb;
1529 arg.cb = cb;
1530 arg.w.stop = 0;
1531 arg.w.skip = cb->args[1];
1532 arg.w.count = 0;
1533 q->ops->cl_ops->walk(q, &arg.w);
1534 cb->args[1] = arg.w.count;
1535 if (arg.w.stop)
1536 return -1;
1537 (*t_p)++;
1538 return 0;
1539}
1540
1541static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
1542 struct tcmsg *tcm, struct netlink_callback *cb,
1543 int *t_p, int s_t)
1544{
1545 struct Qdisc *q;
1546
1547 if (!root)
1548 return 0;
1549
1550 if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0)
1551 return -1;
1552
1553 list_for_each_entry(q, &root->list, list) {
1554 if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
1555 return -1;
1556 }
1557
1558 return 0;
1559}
1560
Linus Torvalds1da177e2005-04-16 15:20:36 -07001561static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
1562{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001563 struct tcmsg *tcm = (struct tcmsg*)NLMSG_DATA(cb->nlh);
David S. Miller30723672008-07-18 22:50:15 -07001564 struct net *net = sock_net(skb->sk);
1565 struct netdev_queue *dev_queue;
1566 struct net_device *dev;
1567 int t, s_t;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001568
Denis V. Lunevb8542722007-12-01 00:21:31 +11001569 if (net != &init_net)
1570 return 0;
1571
Linus Torvalds1da177e2005-04-16 15:20:36 -07001572 if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm)))
1573 return 0;
Eric W. Biederman881d9662007-09-17 11:56:21 -07001574 if ((dev = dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001575 return 0;
1576
1577 s_t = cb->args[0];
1578 t = 0;
1579
Patrick McHardyaf356af2009-09-04 06:41:18 +00001580 if (tc_dump_tclass_root(dev->qdisc, skb, tcm, cb, &t, s_t) < 0)
David S. Miller30723672008-07-18 22:50:15 -07001581 goto done;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001582
David S. Miller30723672008-07-18 22:50:15 -07001583 dev_queue = &dev->rx_queue;
David S. Miller8123b422008-08-08 23:23:39 -07001584 if (tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb, &t, s_t) < 0)
David S. Miller30723672008-07-18 22:50:15 -07001585 goto done;
1586
1587done:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001588 cb->args[0] = t;
1589
1590 dev_put(dev);
1591 return skb->len;
1592}
1593
1594/* Main classifier routine: scans classifier chain attached
1595 to this qdisc, (optionally) tests for protocol and asks
1596 specific classifiers.
1597 */
Patrick McHardy73ca4912007-07-15 00:02:31 -07001598int tc_classify_compat(struct sk_buff *skb, struct tcf_proto *tp,
1599 struct tcf_result *res)
1600{
1601 __be16 protocol = skb->protocol;
1602 int err = 0;
1603
1604 for (; tp; tp = tp->next) {
1605 if ((tp->protocol == protocol ||
1606 tp->protocol == htons(ETH_P_ALL)) &&
1607 (err = tp->classify(skb, tp, res)) >= 0) {
1608#ifdef CONFIG_NET_CLS_ACT
1609 if (err != TC_ACT_RECLASSIFY && skb->tc_verd)
1610 skb->tc_verd = SET_TC_VERD(skb->tc_verd, 0);
1611#endif
1612 return err;
1613 }
1614 }
1615 return -1;
1616}
1617EXPORT_SYMBOL(tc_classify_compat);
1618
Linus Torvalds1da177e2005-04-16 15:20:36 -07001619int tc_classify(struct sk_buff *skb, struct tcf_proto *tp,
Patrick McHardy73ca4912007-07-15 00:02:31 -07001620 struct tcf_result *res)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001621{
1622 int err = 0;
Patrick McHardy73ca4912007-07-15 00:02:31 -07001623 __be16 protocol;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001624#ifdef CONFIG_NET_CLS_ACT
1625 struct tcf_proto *otp = tp;
1626reclassify:
1627#endif
1628 protocol = skb->protocol;
1629
Patrick McHardy73ca4912007-07-15 00:02:31 -07001630 err = tc_classify_compat(skb, tp, res);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001631#ifdef CONFIG_NET_CLS_ACT
Patrick McHardy73ca4912007-07-15 00:02:31 -07001632 if (err == TC_ACT_RECLASSIFY) {
1633 u32 verd = G_TC_VERD(skb->tc_verd);
1634 tp = otp;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001635
Patrick McHardy73ca4912007-07-15 00:02:31 -07001636 if (verd++ >= MAX_REC_LOOP) {
1637 printk("rule prio %u protocol %02x reclassify loop, "
1638 "packet dropped\n",
1639 tp->prio&0xffff, ntohs(tp->protocol));
1640 return TC_ACT_SHOT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001641 }
Patrick McHardy73ca4912007-07-15 00:02:31 -07001642 skb->tc_verd = SET_TC_VERD(skb->tc_verd, verd);
1643 goto reclassify;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001644 }
Patrick McHardy73ca4912007-07-15 00:02:31 -07001645#endif
1646 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001647}
Patrick McHardy73ca4912007-07-15 00:02:31 -07001648EXPORT_SYMBOL(tc_classify);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001649
Patrick McHardya48b5a62007-03-23 11:29:43 -07001650void tcf_destroy(struct tcf_proto *tp)
1651{
1652 tp->ops->destroy(tp);
1653 module_put(tp->ops->owner);
1654 kfree(tp);
1655}
1656
Patrick McHardyff31ab52008-07-01 19:52:38 -07001657void tcf_destroy_chain(struct tcf_proto **fl)
Patrick McHardya48b5a62007-03-23 11:29:43 -07001658{
1659 struct tcf_proto *tp;
1660
Patrick McHardyff31ab52008-07-01 19:52:38 -07001661 while ((tp = *fl) != NULL) {
1662 *fl = tp->next;
Patrick McHardya48b5a62007-03-23 11:29:43 -07001663 tcf_destroy(tp);
1664 }
1665}
1666EXPORT_SYMBOL(tcf_destroy_chain);
1667
Linus Torvalds1da177e2005-04-16 15:20:36 -07001668#ifdef CONFIG_PROC_FS
1669static int psched_show(struct seq_file *seq, void *v)
1670{
Patrick McHardy3c0cfc12007-10-10 16:32:41 -07001671 struct timespec ts;
1672
1673 hrtimer_get_res(CLOCK_MONOTONIC, &ts);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001674 seq_printf(seq, "%08x %08x %08x %08x\n",
Jarek Poplawskica44d6e2009-06-15 02:31:47 -07001675 (u32)NSEC_PER_USEC, (u32)PSCHED_TICKS2NS(1),
Patrick McHardy514bca32007-03-16 12:34:52 -07001676 1000000,
Patrick McHardy3c0cfc12007-10-10 16:32:41 -07001677 (u32)NSEC_PER_SEC/(u32)ktime_to_ns(timespec_to_ktime(ts)));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001678
1679 return 0;
1680}
1681
1682static int psched_open(struct inode *inode, struct file *file)
1683{
1684 return single_open(file, psched_show, PDE(inode)->data);
1685}
1686
Arjan van de Venda7071d2007-02-12 00:55:36 -08001687static const struct file_operations psched_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001688 .owner = THIS_MODULE,
1689 .open = psched_open,
1690 .read = seq_read,
1691 .llseek = seq_lseek,
1692 .release = single_release,
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09001693};
Linus Torvalds1da177e2005-04-16 15:20:36 -07001694#endif
1695
Linus Torvalds1da177e2005-04-16 15:20:36 -07001696static int __init pktsched_init(void)
1697{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001698 register_qdisc(&pfifo_qdisc_ops);
1699 register_qdisc(&bfifo_qdisc_ops);
David S. Miller6ec1c692009-09-06 01:58:51 -07001700 register_qdisc(&mq_qdisc_ops);
Eric W. Biederman457c4cb2007-09-12 12:01:34 +02001701 proc_net_fops_create(&init_net, "psched", 0, &psched_fops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001702
Thomas Grafbe577dd2007-03-22 11:55:50 -07001703 rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL);
1704 rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL);
1705 rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc);
1706 rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL);
1707 rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL);
1708 rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass);
1709
Linus Torvalds1da177e2005-04-16 15:20:36 -07001710 return 0;
1711}
1712
1713subsys_initcall(pktsched_init);