blob: 5219d5f9d75444e4a35482d2e707efcf0af2fe1d [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * net/sched/sch_api.c Packet scheduler API.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 *
11 * Fixes:
12 *
13 * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired.
14 * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
15 * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support
16 */
17
Linus Torvalds1da177e2005-04-16 15:20:36 -070018#include <linux/module.h>
19#include <linux/types.h>
20#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070021#include <linux/string.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070022#include <linux/errno.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070023#include <linux/skbuff.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070024#include <linux/init.h>
25#include <linux/proc_fs.h>
26#include <linux/seq_file.h>
27#include <linux/kmod.h>
28#include <linux/list.h>
Patrick McHardy41794772007-03-16 01:19:15 -070029#include <linux/hrtimer.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070030
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020031#include <net/net_namespace.h>
Denis V. Lunevb8542722007-12-01 00:21:31 +110032#include <net/sock.h>
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -070033#include <net/netlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070034#include <net/pkt_sched.h>
35
Linus Torvalds1da177e2005-04-16 15:20:36 -070036static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n, u32 clid,
37 struct Qdisc *old, struct Qdisc *new);
38static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
39 struct Qdisc *q, unsigned long cl, int event);
40
41/*
42
43 Short review.
44 -------------
45
46 This file consists of two interrelated parts:
47
48 1. queueing disciplines manager frontend.
49 2. traffic classes manager frontend.
50
51 Generally, queueing discipline ("qdisc") is a black box,
52 which is able to enqueue packets and to dequeue them (when
53 device is ready to send something) in order and at times
54 determined by algorithm hidden in it.
55
56 qdisc's are divided to two categories:
57 - "queues", which have no internal structure visible from outside.
58 - "schedulers", which split all the packets to "traffic classes",
59 using "packet classifiers" (look at cls_api.c)
60
61 In turn, classes may have child qdiscs (as rule, queues)
62 attached to them etc. etc. etc.
63
64 The goal of the routines in this file is to translate
65 information supplied by user in the form of handles
66 to more intelligible for kernel form, to make some sanity
67 checks and part of work, which is common to all qdiscs
68 and to provide rtnetlink notifications.
69
70 All real intelligent work is done inside qdisc modules.
71
72
73
74 Every discipline has two major routines: enqueue and dequeue.
75
76 ---dequeue
77
78 dequeue usually returns a skb to send. It is allowed to return NULL,
79 but it does not mean that queue is empty, it just means that
80 discipline does not want to send anything this time.
81 Queue is really empty if q->q.qlen == 0.
82 For complicated disciplines with multiple queues q->q is not
83 real packet queue, but however q->q.qlen must be valid.
84
85 ---enqueue
86
87 enqueue returns 0, if packet was enqueued successfully.
88 If packet (this one or another one) was dropped, it returns
89 not zero error code.
90 NET_XMIT_DROP - this packet dropped
91 Expected action: do not backoff, but wait until queue will clear.
92 NET_XMIT_CN - probably this packet enqueued, but another one dropped.
93 Expected action: backoff or ignore
94 NET_XMIT_POLICED - dropped by police.
95 Expected action: backoff or error to real-time apps.
96
97 Auxiliary routines:
98
99 ---requeue
100
101 requeues once dequeued packet. It is used for non-standard or
David S. Millere65d22e2008-07-08 16:46:01 -0700102 just buggy devices, which can defer output even if netif_queue_stopped()=0.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700103
104 ---reset
105
106 returns qdisc to initial state: purge all buffers, clear all
107 timers, counters (except for statistics) etc.
108
109 ---init
110
111 initializes newly created qdisc.
112
113 ---destroy
114
115 destroys resources allocated by init and during lifetime of qdisc.
116
117 ---change
118
119 changes qdisc parameters.
120 */
121
122/* Protects list of registered TC modules. It is pure SMP lock. */
123static DEFINE_RWLOCK(qdisc_mod_lock);
124
125
126/************************************************
127 * Queueing disciplines manipulation. *
128 ************************************************/
129
130
131/* The list of all installed queueing disciplines. */
132
133static struct Qdisc_ops *qdisc_base;
134
135/* Register/uregister queueing discipline */
136
137int register_qdisc(struct Qdisc_ops *qops)
138{
139 struct Qdisc_ops *q, **qp;
140 int rc = -EEXIST;
141
142 write_lock(&qdisc_mod_lock);
143 for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
144 if (!strcmp(qops->id, q->id))
145 goto out;
146
147 if (qops->enqueue == NULL)
148 qops->enqueue = noop_qdisc_ops.enqueue;
149 if (qops->requeue == NULL)
150 qops->requeue = noop_qdisc_ops.requeue;
151 if (qops->dequeue == NULL)
152 qops->dequeue = noop_qdisc_ops.dequeue;
153
154 qops->next = NULL;
155 *qp = qops;
156 rc = 0;
157out:
158 write_unlock(&qdisc_mod_lock);
159 return rc;
160}
Patrick McHardy62e3ba12008-01-22 22:10:23 -0800161EXPORT_SYMBOL(register_qdisc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700162
163int unregister_qdisc(struct Qdisc_ops *qops)
164{
165 struct Qdisc_ops *q, **qp;
166 int err = -ENOENT;
167
168 write_lock(&qdisc_mod_lock);
169 for (qp = &qdisc_base; (q=*qp)!=NULL; qp = &q->next)
170 if (q == qops)
171 break;
172 if (q) {
173 *qp = q->next;
174 q->next = NULL;
175 err = 0;
176 }
177 write_unlock(&qdisc_mod_lock);
178 return err;
179}
Patrick McHardy62e3ba12008-01-22 22:10:23 -0800180EXPORT_SYMBOL(unregister_qdisc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700181
182/* We know handle. Find qdisc among all qdisc's attached to device
183 (root qdisc, all its children, children of children etc.)
184 */
185
David S. Milleread81cc2008-07-17 00:50:32 -0700186struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
Patrick McHardy43effa12006-11-29 17:35:48 -0800187{
David S. Miller30723672008-07-18 22:50:15 -0700188 unsigned int i;
Patrick McHardy43effa12006-11-29 17:35:48 -0800189
David S. Miller30723672008-07-18 22:50:15 -0700190 for (i = 0; i < dev->num_tx_queues; i++) {
191 struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
192 struct Qdisc *q, *txq_root = txq->qdisc;
193
194 if (!(txq_root->flags & TCQ_F_BUILTIN) &&
195 txq_root->handle == handle)
196 return txq_root;
197
198 list_for_each_entry(q, &txq_root->list, list) {
199 if (q->handle == handle)
200 return q;
201 }
Patrick McHardy43effa12006-11-29 17:35:48 -0800202 }
203 return NULL;
204}
205
Linus Torvalds1da177e2005-04-16 15:20:36 -0700206static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
207{
208 unsigned long cl;
209 struct Qdisc *leaf;
Eric Dumazet20fea082007-11-14 01:44:41 -0800210 const struct Qdisc_class_ops *cops = p->ops->cl_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700211
212 if (cops == NULL)
213 return NULL;
214 cl = cops->get(p, classid);
215
216 if (cl == 0)
217 return NULL;
218 leaf = cops->leaf(p, cl);
219 cops->put(p, cl);
220 return leaf;
221}
222
223/* Find queueing discipline by name */
224
Patrick McHardy1e904742008-01-22 22:11:17 -0800225static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700226{
227 struct Qdisc_ops *q = NULL;
228
229 if (kind) {
230 read_lock(&qdisc_mod_lock);
231 for (q = qdisc_base; q; q = q->next) {
Patrick McHardy1e904742008-01-22 22:11:17 -0800232 if (nla_strcmp(kind, q->id) == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700233 if (!try_module_get(q->owner))
234 q = NULL;
235 break;
236 }
237 }
238 read_unlock(&qdisc_mod_lock);
239 }
240 return q;
241}
242
243static struct qdisc_rate_table *qdisc_rtab_list;
244
Patrick McHardy1e904742008-01-22 22:11:17 -0800245struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct nlattr *tab)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700246{
247 struct qdisc_rate_table *rtab;
248
249 for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) {
250 if (memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) == 0) {
251 rtab->refcnt++;
252 return rtab;
253 }
254 }
255
Patrick McHardy5feb5e12008-01-23 20:35:19 -0800256 if (tab == NULL || r->rate == 0 || r->cell_log == 0 ||
257 nla_len(tab) != TC_RTAB_SIZE)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700258 return NULL;
259
260 rtab = kmalloc(sizeof(*rtab), GFP_KERNEL);
261 if (rtab) {
262 rtab->rate = *r;
263 rtab->refcnt = 1;
Patrick McHardy1e904742008-01-22 22:11:17 -0800264 memcpy(rtab->data, nla_data(tab), 1024);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700265 rtab->next = qdisc_rtab_list;
266 qdisc_rtab_list = rtab;
267 }
268 return rtab;
269}
Patrick McHardy62e3ba12008-01-22 22:10:23 -0800270EXPORT_SYMBOL(qdisc_get_rtab);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700271
272void qdisc_put_rtab(struct qdisc_rate_table *tab)
273{
274 struct qdisc_rate_table *rtab, **rtabp;
275
276 if (!tab || --tab->refcnt)
277 return;
278
279 for (rtabp = &qdisc_rtab_list; (rtab=*rtabp) != NULL; rtabp = &rtab->next) {
280 if (rtab == tab) {
281 *rtabp = rtab->next;
282 kfree(rtab);
283 return;
284 }
285 }
286}
Patrick McHardy62e3ba12008-01-22 22:10:23 -0800287EXPORT_SYMBOL(qdisc_put_rtab);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700288
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700289static LIST_HEAD(qdisc_stab_list);
290static DEFINE_SPINLOCK(qdisc_stab_lock);
291
292static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = {
293 [TCA_STAB_BASE] = { .len = sizeof(struct tc_sizespec) },
294 [TCA_STAB_DATA] = { .type = NLA_BINARY },
295};
296
297static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt)
298{
299 struct nlattr *tb[TCA_STAB_MAX + 1];
300 struct qdisc_size_table *stab;
301 struct tc_sizespec *s;
302 unsigned int tsize = 0;
303 u16 *tab = NULL;
304 int err;
305
306 err = nla_parse_nested(tb, TCA_STAB_MAX, opt, stab_policy);
307 if (err < 0)
308 return ERR_PTR(err);
309 if (!tb[TCA_STAB_BASE])
310 return ERR_PTR(-EINVAL);
311
312 s = nla_data(tb[TCA_STAB_BASE]);
313
314 if (s->tsize > 0) {
315 if (!tb[TCA_STAB_DATA])
316 return ERR_PTR(-EINVAL);
317 tab = nla_data(tb[TCA_STAB_DATA]);
318 tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16);
319 }
320
321 if (!s || tsize != s->tsize || (!tab && tsize > 0))
322 return ERR_PTR(-EINVAL);
323
324 spin_lock(&qdisc_stab_lock);
325
326 list_for_each_entry(stab, &qdisc_stab_list, list) {
327 if (memcmp(&stab->szopts, s, sizeof(*s)))
328 continue;
329 if (tsize > 0 && memcmp(stab->data, tab, tsize * sizeof(u16)))
330 continue;
331 stab->refcnt++;
332 spin_unlock(&qdisc_stab_lock);
333 return stab;
334 }
335
336 spin_unlock(&qdisc_stab_lock);
337
338 stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL);
339 if (!stab)
340 return ERR_PTR(-ENOMEM);
341
342 stab->refcnt = 1;
343 stab->szopts = *s;
344 if (tsize > 0)
345 memcpy(stab->data, tab, tsize * sizeof(u16));
346
347 spin_lock(&qdisc_stab_lock);
348 list_add_tail(&stab->list, &qdisc_stab_list);
349 spin_unlock(&qdisc_stab_lock);
350
351 return stab;
352}
353
354void qdisc_put_stab(struct qdisc_size_table *tab)
355{
356 if (!tab)
357 return;
358
359 spin_lock(&qdisc_stab_lock);
360
361 if (--tab->refcnt == 0) {
362 list_del(&tab->list);
363 kfree(tab);
364 }
365
366 spin_unlock(&qdisc_stab_lock);
367}
368EXPORT_SYMBOL(qdisc_put_stab);
369
370static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab)
371{
372 struct nlattr *nest;
373
374 nest = nla_nest_start(skb, TCA_STAB);
375 NLA_PUT(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts);
376 nla_nest_end(skb, nest);
377
378 return skb->len;
379
380nla_put_failure:
381 return -1;
382}
383
384void qdisc_calculate_pkt_len(struct sk_buff *skb, struct qdisc_size_table *stab)
385{
386 int pkt_len, slot;
387
388 pkt_len = skb->len + stab->szopts.overhead;
389 if (unlikely(!stab->szopts.tsize))
390 goto out;
391
392 slot = pkt_len + stab->szopts.cell_align;
393 if (unlikely(slot < 0))
394 slot = 0;
395
396 slot >>= stab->szopts.cell_log;
397 if (likely(slot < stab->szopts.tsize))
398 pkt_len = stab->data[slot];
399 else
400 pkt_len = stab->data[stab->szopts.tsize - 1] *
401 (slot / stab->szopts.tsize) +
402 stab->data[slot % stab->szopts.tsize];
403
404 pkt_len <<= stab->szopts.size_log;
405out:
406 if (unlikely(pkt_len < 1))
407 pkt_len = 1;
408 qdisc_skb_cb(skb)->pkt_len = pkt_len;
409}
410EXPORT_SYMBOL(qdisc_calculate_pkt_len);
411
Patrick McHardy41794772007-03-16 01:19:15 -0700412static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
413{
414 struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
415 timer);
416
417 wd->qdisc->flags &= ~TCQ_F_THROTTLED;
Stephen Hemminger11274e52007-03-22 12:17:42 -0700418 smp_wmb();
David S. Miller37437bb2008-07-16 02:15:04 -0700419 __netif_schedule(wd->qdisc);
Stephen Hemminger19365022007-03-22 12:18:35 -0700420
Patrick McHardy41794772007-03-16 01:19:15 -0700421 return HRTIMER_NORESTART;
422}
423
424void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
425{
426 hrtimer_init(&wd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
427 wd->timer.function = qdisc_watchdog;
428 wd->qdisc = qdisc;
429}
430EXPORT_SYMBOL(qdisc_watchdog_init);
431
432void qdisc_watchdog_schedule(struct qdisc_watchdog *wd, psched_time_t expires)
433{
434 ktime_t time;
435
436 wd->qdisc->flags |= TCQ_F_THROTTLED;
437 time = ktime_set(0, 0);
438 time = ktime_add_ns(time, PSCHED_US2NS(expires));
439 hrtimer_start(&wd->timer, time, HRTIMER_MODE_ABS);
440}
441EXPORT_SYMBOL(qdisc_watchdog_schedule);
442
443void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
444{
445 hrtimer_cancel(&wd->timer);
446 wd->qdisc->flags &= ~TCQ_F_THROTTLED;
447}
448EXPORT_SYMBOL(qdisc_watchdog_cancel);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700449
Patrick McHardy6fe1c7a2008-07-05 23:21:31 -0700450struct hlist_head *qdisc_class_hash_alloc(unsigned int n)
451{
452 unsigned int size = n * sizeof(struct hlist_head), i;
453 struct hlist_head *h;
454
455 if (size <= PAGE_SIZE)
456 h = kmalloc(size, GFP_KERNEL);
457 else
458 h = (struct hlist_head *)
459 __get_free_pages(GFP_KERNEL, get_order(size));
460
461 if (h != NULL) {
462 for (i = 0; i < n; i++)
463 INIT_HLIST_HEAD(&h[i]);
464 }
465 return h;
466}
467
468static void qdisc_class_hash_free(struct hlist_head *h, unsigned int n)
469{
470 unsigned int size = n * sizeof(struct hlist_head);
471
472 if (size <= PAGE_SIZE)
473 kfree(h);
474 else
475 free_pages((unsigned long)h, get_order(size));
476}
477
478void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash)
479{
480 struct Qdisc_class_common *cl;
481 struct hlist_node *n, *next;
482 struct hlist_head *nhash, *ohash;
483 unsigned int nsize, nmask, osize;
484 unsigned int i, h;
485
486 /* Rehash when load factor exceeds 0.75 */
487 if (clhash->hashelems * 4 <= clhash->hashsize * 3)
488 return;
489 nsize = clhash->hashsize * 2;
490 nmask = nsize - 1;
491 nhash = qdisc_class_hash_alloc(nsize);
492 if (nhash == NULL)
493 return;
494
495 ohash = clhash->hash;
496 osize = clhash->hashsize;
497
498 sch_tree_lock(sch);
499 for (i = 0; i < osize; i++) {
500 hlist_for_each_entry_safe(cl, n, next, &ohash[i], hnode) {
501 h = qdisc_class_hash(cl->classid, nmask);
502 hlist_add_head(&cl->hnode, &nhash[h]);
503 }
504 }
505 clhash->hash = nhash;
506 clhash->hashsize = nsize;
507 clhash->hashmask = nmask;
508 sch_tree_unlock(sch);
509
510 qdisc_class_hash_free(ohash, osize);
511}
512EXPORT_SYMBOL(qdisc_class_hash_grow);
513
514int qdisc_class_hash_init(struct Qdisc_class_hash *clhash)
515{
516 unsigned int size = 4;
517
518 clhash->hash = qdisc_class_hash_alloc(size);
519 if (clhash->hash == NULL)
520 return -ENOMEM;
521 clhash->hashsize = size;
522 clhash->hashmask = size - 1;
523 clhash->hashelems = 0;
524 return 0;
525}
526EXPORT_SYMBOL(qdisc_class_hash_init);
527
528void qdisc_class_hash_destroy(struct Qdisc_class_hash *clhash)
529{
530 qdisc_class_hash_free(clhash->hash, clhash->hashsize);
531}
532EXPORT_SYMBOL(qdisc_class_hash_destroy);
533
534void qdisc_class_hash_insert(struct Qdisc_class_hash *clhash,
535 struct Qdisc_class_common *cl)
536{
537 unsigned int h;
538
539 INIT_HLIST_NODE(&cl->hnode);
540 h = qdisc_class_hash(cl->classid, clhash->hashmask);
541 hlist_add_head(&cl->hnode, &clhash->hash[h]);
542 clhash->hashelems++;
543}
544EXPORT_SYMBOL(qdisc_class_hash_insert);
545
546void qdisc_class_hash_remove(struct Qdisc_class_hash *clhash,
547 struct Qdisc_class_common *cl)
548{
549 hlist_del(&cl->hnode);
550 clhash->hashelems--;
551}
552EXPORT_SYMBOL(qdisc_class_hash_remove);
553
Linus Torvalds1da177e2005-04-16 15:20:36 -0700554/* Allocate an unique handle from space managed by kernel */
555
556static u32 qdisc_alloc_handle(struct net_device *dev)
557{
558 int i = 0x10000;
559 static u32 autohandle = TC_H_MAKE(0x80000000U, 0);
560
561 do {
562 autohandle += TC_H_MAKE(0x10000U, 0);
563 if (autohandle == TC_H_MAKE(TC_H_ROOT, 0))
564 autohandle = TC_H_MAKE(0x80000000U, 0);
565 } while (qdisc_lookup(dev, autohandle) && --i > 0);
566
567 return i>0 ? autohandle : 0;
568}
569
David S. Miller99194cf2008-07-17 04:54:10 -0700570/* Attach toplevel qdisc to device queue. */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700571
David S. Miller99194cf2008-07-17 04:54:10 -0700572static struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue,
573 struct Qdisc *qdisc)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700574{
David S. Miller53049972008-07-16 03:00:19 -0700575 spinlock_t *root_lock;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700576 struct Qdisc *oqdisc;
David S. Miller53049972008-07-16 03:00:19 -0700577 int ingress;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700578
David S. Miller53049972008-07-16 03:00:19 -0700579 ingress = 0;
580 if (qdisc && qdisc->flags&TCQ_F_INGRESS)
581 ingress = 1;
582
583 if (ingress) {
David S. Miller816f3252008-07-08 22:49:00 -0700584 oqdisc = dev_queue->qdisc;
David S. Miller53049972008-07-16 03:00:19 -0700585 } else {
David S. Miller53049972008-07-16 03:00:19 -0700586 oqdisc = dev_queue->qdisc_sleeping;
587 }
588
589 root_lock = qdisc_root_lock(oqdisc);
590 spin_lock_bh(root_lock);
591
592 if (ingress) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700593 /* Prune old scheduler */
594 if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1) {
595 /* delete */
596 qdisc_reset(oqdisc);
David S. Miller816f3252008-07-08 22:49:00 -0700597 dev_queue->qdisc = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700598 } else { /* new */
David S. Miller816f3252008-07-08 22:49:00 -0700599 dev_queue->qdisc = qdisc;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700600 }
601
602 } else {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700603 /* Prune old scheduler */
604 if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1)
605 qdisc_reset(oqdisc);
606
607 /* ... and graft new one */
608 if (qdisc == NULL)
609 qdisc = &noop_qdisc;
David S. Millerb0e1e642008-07-08 17:42:10 -0700610 dev_queue->qdisc_sleeping = qdisc;
611 dev_queue->qdisc = &noop_qdisc;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700612 }
613
David S. Miller53049972008-07-16 03:00:19 -0700614 spin_unlock_bh(root_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700615
Linus Torvalds1da177e2005-04-16 15:20:36 -0700616 return oqdisc;
617}
618
Patrick McHardy43effa12006-11-29 17:35:48 -0800619void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
620{
Eric Dumazet20fea082007-11-14 01:44:41 -0800621 const struct Qdisc_class_ops *cops;
Patrick McHardy43effa12006-11-29 17:35:48 -0800622 unsigned long cl;
623 u32 parentid;
624
625 if (n == 0)
626 return;
627 while ((parentid = sch->parent)) {
Jarek Poplawski066a3b52008-04-14 15:10:42 -0700628 if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS))
629 return;
630
David S. Miller5ce2d482008-07-08 17:06:30 -0700631 sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
Patrick McHardyffc8fef2007-07-30 17:11:50 -0700632 if (sch == NULL) {
633 WARN_ON(parentid != TC_H_ROOT);
634 return;
635 }
Patrick McHardy43effa12006-11-29 17:35:48 -0800636 cops = sch->ops->cl_ops;
637 if (cops->qlen_notify) {
638 cl = cops->get(sch, parentid);
639 cops->qlen_notify(sch, cl);
640 cops->put(sch, cl);
641 }
642 sch->q.qlen -= n;
643 }
644}
645EXPORT_SYMBOL(qdisc_tree_decrease_qlen);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700646
David S. Miller99194cf2008-07-17 04:54:10 -0700647static void notify_and_destroy(struct sk_buff *skb, struct nlmsghdr *n, u32 clid,
648 struct Qdisc *old, struct Qdisc *new)
649{
650 if (new || old)
651 qdisc_notify(skb, n, clid, old, new);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700652
David S. Miller99194cf2008-07-17 04:54:10 -0700653 if (old) {
654 spin_lock_bh(&old->q.lock);
655 qdisc_destroy(old);
656 spin_unlock_bh(&old->q.lock);
657 }
658}
659
660/* Graft qdisc "new" to class "classid" of qdisc "parent" or
661 * to device "dev".
662 *
663 * When appropriate send a netlink notification using 'skb'
664 * and "n".
665 *
666 * On success, destroy old qdisc.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700667 */
668
669static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
David S. Miller99194cf2008-07-17 04:54:10 -0700670 struct sk_buff *skb, struct nlmsghdr *n, u32 classid,
671 struct Qdisc *new, struct Qdisc *old)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700672{
David S. Miller99194cf2008-07-17 04:54:10 -0700673 struct Qdisc *q = old;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700674 int err = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700675
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900676 if (parent == NULL) {
David S. Miller99194cf2008-07-17 04:54:10 -0700677 unsigned int i, num_q, ingress;
678
679 ingress = 0;
680 num_q = dev->num_tx_queues;
681 if (q && q->flags & TCQ_F_INGRESS) {
682 num_q = 1;
683 ingress = 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700684 }
David S. Miller99194cf2008-07-17 04:54:10 -0700685
686 if (dev->flags & IFF_UP)
687 dev_deactivate(dev);
688
689 for (i = 0; i < num_q; i++) {
690 struct netdev_queue *dev_queue = &dev->rx_queue;
691
692 if (!ingress)
693 dev_queue = netdev_get_tx_queue(dev, i);
694
695 if (ingress) {
696 old = dev_graft_qdisc(dev_queue, q);
697 } else {
698 old = dev_graft_qdisc(dev_queue, new);
699 if (new && i > 0)
700 atomic_inc(&new->refcnt);
701 }
702 notify_and_destroy(skb, n, classid, old, new);
703 }
704
705 if (dev->flags & IFF_UP)
706 dev_activate(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700707 } else {
Eric Dumazet20fea082007-11-14 01:44:41 -0800708 const struct Qdisc_class_ops *cops = parent->ops->cl_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700709
710 err = -EINVAL;
711
712 if (cops) {
713 unsigned long cl = cops->get(parent, classid);
714 if (cl) {
David S. Miller99194cf2008-07-17 04:54:10 -0700715 err = cops->graft(parent, cl, new, &old);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700716 cops->put(parent, cl);
717 }
718 }
David S. Miller99194cf2008-07-17 04:54:10 -0700719 if (!err)
720 notify_and_destroy(skb, n, classid, old, new);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700721 }
722 return err;
723}
724
725/*
726 Allocate and initialize new qdisc.
727
728 Parameters are passed via opt.
729 */
730
731static struct Qdisc *
David S. Millerbb949fb2008-07-08 16:55:56 -0700732qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
733 u32 parent, u32 handle, struct nlattr **tca, int *errp)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700734{
735 int err;
Patrick McHardy1e904742008-01-22 22:11:17 -0800736 struct nlattr *kind = tca[TCA_KIND];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700737 struct Qdisc *sch;
738 struct Qdisc_ops *ops;
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700739 struct qdisc_size_table *stab;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700740
741 ops = qdisc_lookup_ops(kind);
742#ifdef CONFIG_KMOD
743 if (ops == NULL && kind != NULL) {
744 char name[IFNAMSIZ];
Patrick McHardy1e904742008-01-22 22:11:17 -0800745 if (nla_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700746 /* We dropped the RTNL semaphore in order to
747 * perform the module load. So, even if we
748 * succeeded in loading the module we have to
749 * tell the caller to replay the request. We
750 * indicate this using -EAGAIN.
751 * We replay the request because the device may
752 * go away in the mean time.
753 */
754 rtnl_unlock();
755 request_module("sch_%s", name);
756 rtnl_lock();
757 ops = qdisc_lookup_ops(kind);
758 if (ops != NULL) {
759 /* We will try again qdisc_lookup_ops,
760 * so don't keep a reference.
761 */
762 module_put(ops->owner);
763 err = -EAGAIN;
764 goto err_out;
765 }
766 }
767 }
768#endif
769
Jamal Hadi Salimb9e2cc02006-08-03 16:36:51 -0700770 err = -ENOENT;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700771 if (ops == NULL)
772 goto err_out;
773
David S. Miller5ce2d482008-07-08 17:06:30 -0700774 sch = qdisc_alloc(dev_queue, ops);
Thomas Graf3d54b822005-07-05 14:15:09 -0700775 if (IS_ERR(sch)) {
776 err = PTR_ERR(sch);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700777 goto err_out2;
Thomas Graf3d54b822005-07-05 14:15:09 -0700778 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700779
Patrick McHardyffc8fef2007-07-30 17:11:50 -0700780 sch->parent = parent;
781
Thomas Graf3d54b822005-07-05 14:15:09 -0700782 if (handle == TC_H_INGRESS) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700783 sch->flags |= TCQ_F_INGRESS;
Thomas Graf3d54b822005-07-05 14:15:09 -0700784 handle = TC_H_MAKE(TC_H_INGRESS, 0);
Patrick McHardyfd44de72007-04-16 17:07:08 -0700785 } else {
Patrick McHardyfd44de72007-04-16 17:07:08 -0700786 if (handle == 0) {
787 handle = qdisc_alloc_handle(dev);
788 err = -ENOMEM;
789 if (handle == 0)
790 goto err_out3;
791 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700792 }
793
Thomas Graf3d54b822005-07-05 14:15:09 -0700794 sch->handle = handle;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700795
Patrick McHardy1e904742008-01-22 22:11:17 -0800796 if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS])) == 0) {
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700797 if (tca[TCA_STAB]) {
798 stab = qdisc_get_stab(tca[TCA_STAB]);
799 if (IS_ERR(stab)) {
800 err = PTR_ERR(stab);
801 goto err_out3;
802 }
803 sch->stab = stab;
804 }
Patrick McHardy1e904742008-01-22 22:11:17 -0800805 if (tca[TCA_RATE]) {
Thomas Graf023e09a2005-07-05 14:15:53 -0700806 err = gen_new_estimator(&sch->bstats, &sch->rate_est,
David S. Miller7698b4f2008-07-16 01:42:40 -0700807 qdisc_root_lock(sch),
Patrick McHardy1e904742008-01-22 22:11:17 -0800808 tca[TCA_RATE]);
Thomas Graf023e09a2005-07-05 14:15:53 -0700809 if (err) {
810 /*
811 * Any broken qdiscs that would require
812 * a ops->reset() here? The qdisc was never
813 * in action so it shouldn't be necessary.
814 */
815 if (ops->destroy)
816 ops->destroy(sch);
817 goto err_out3;
818 }
819 }
David S. Miller30723672008-07-18 22:50:15 -0700820 if (parent)
821 list_add_tail(&sch->list, &dev_queue->qdisc->list);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700822
Linus Torvalds1da177e2005-04-16 15:20:36 -0700823 return sch;
824 }
825err_out3:
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700826 qdisc_put_stab(sch->stab);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700827 dev_put(dev);
Thomas Graf3d54b822005-07-05 14:15:09 -0700828 kfree((char *) sch - sch->padded);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700829err_out2:
830 module_put(ops->owner);
831err_out:
832 *errp = err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700833 return NULL;
834}
835
Patrick McHardy1e904742008-01-22 22:11:17 -0800836static int qdisc_change(struct Qdisc *sch, struct nlattr **tca)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700837{
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700838 struct qdisc_size_table *stab = NULL;
839 int err = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700840
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700841 if (tca[TCA_OPTIONS]) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700842 if (sch->ops->change == NULL)
843 return -EINVAL;
Patrick McHardy1e904742008-01-22 22:11:17 -0800844 err = sch->ops->change(sch, tca[TCA_OPTIONS]);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700845 if (err)
846 return err;
847 }
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700848
849 if (tca[TCA_STAB]) {
850 stab = qdisc_get_stab(tca[TCA_STAB]);
851 if (IS_ERR(stab))
852 return PTR_ERR(stab);
853 }
854
855 qdisc_put_stab(sch->stab);
856 sch->stab = stab;
857
Patrick McHardy1e904742008-01-22 22:11:17 -0800858 if (tca[TCA_RATE])
Linus Torvalds1da177e2005-04-16 15:20:36 -0700859 gen_replace_estimator(&sch->bstats, &sch->rate_est,
David S. Miller7698b4f2008-07-16 01:42:40 -0700860 qdisc_root_lock(sch), tca[TCA_RATE]);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700861 return 0;
862}
863
864struct check_loop_arg
865{
866 struct qdisc_walker w;
867 struct Qdisc *p;
868 int depth;
869};
870
871static int check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w);
872
873static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth)
874{
875 struct check_loop_arg arg;
876
877 if (q->ops->cl_ops == NULL)
878 return 0;
879
880 arg.w.stop = arg.w.skip = arg.w.count = 0;
881 arg.w.fn = check_loop_fn;
882 arg.depth = depth;
883 arg.p = p;
884 q->ops->cl_ops->walk(q, &arg.w);
885 return arg.w.stop ? -ELOOP : 0;
886}
887
888static int
889check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
890{
891 struct Qdisc *leaf;
Eric Dumazet20fea082007-11-14 01:44:41 -0800892 const struct Qdisc_class_ops *cops = q->ops->cl_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700893 struct check_loop_arg *arg = (struct check_loop_arg *)w;
894
895 leaf = cops->leaf(q, cl);
896 if (leaf) {
897 if (leaf == arg->p || arg->depth > 7)
898 return -ELOOP;
899 return check_loop(leaf, arg->p, arg->depth + 1);
900 }
901 return 0;
902}
903
904/*
905 * Delete/get qdisc.
906 */
907
908static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
909{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +0900910 struct net *net = sock_net(skb->sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700911 struct tcmsg *tcm = NLMSG_DATA(n);
Patrick McHardy1e904742008-01-22 22:11:17 -0800912 struct nlattr *tca[TCA_MAX + 1];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700913 struct net_device *dev;
914 u32 clid = tcm->tcm_parent;
915 struct Qdisc *q = NULL;
916 struct Qdisc *p = NULL;
917 int err;
918
Denis V. Lunevb8542722007-12-01 00:21:31 +1100919 if (net != &init_net)
920 return -EINVAL;
921
Eric W. Biederman881d9662007-09-17 11:56:21 -0700922 if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700923 return -ENODEV;
924
Patrick McHardy1e904742008-01-22 22:11:17 -0800925 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
926 if (err < 0)
927 return err;
928
Linus Torvalds1da177e2005-04-16 15:20:36 -0700929 if (clid) {
930 if (clid != TC_H_ROOT) {
931 if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
932 if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL)
933 return -ENOENT;
934 q = qdisc_leaf(p, clid);
935 } else { /* ingress */
David S. Miller816f3252008-07-08 22:49:00 -0700936 q = dev->rx_queue.qdisc;
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900937 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700938 } else {
David S. Millere8a04642008-07-17 00:34:19 -0700939 struct netdev_queue *dev_queue;
940 dev_queue = netdev_get_tx_queue(dev, 0);
David S. Millerb0e1e642008-07-08 17:42:10 -0700941 q = dev_queue->qdisc_sleeping;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700942 }
943 if (!q)
944 return -ENOENT;
945
946 if (tcm->tcm_handle && q->handle != tcm->tcm_handle)
947 return -EINVAL;
948 } else {
949 if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL)
950 return -ENOENT;
951 }
952
Patrick McHardy1e904742008-01-22 22:11:17 -0800953 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700954 return -EINVAL;
955
956 if (n->nlmsg_type == RTM_DELQDISC) {
957 if (!clid)
958 return -EINVAL;
959 if (q->handle == 0)
960 return -ENOENT;
David S. Miller99194cf2008-07-17 04:54:10 -0700961 if ((err = qdisc_graft(dev, p, skb, n, clid, NULL, q)) != 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700962 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700963 } else {
964 qdisc_notify(skb, n, clid, NULL, q);
965 }
966 return 0;
967}
968
969/*
970 Create/change qdisc.
971 */
972
973static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
974{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +0900975 struct net *net = sock_net(skb->sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700976 struct tcmsg *tcm;
Patrick McHardy1e904742008-01-22 22:11:17 -0800977 struct nlattr *tca[TCA_MAX + 1];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700978 struct net_device *dev;
979 u32 clid;
980 struct Qdisc *q, *p;
981 int err;
982
Denis V. Lunevb8542722007-12-01 00:21:31 +1100983 if (net != &init_net)
984 return -EINVAL;
985
Linus Torvalds1da177e2005-04-16 15:20:36 -0700986replay:
987 /* Reinit, just in case something touches this. */
988 tcm = NLMSG_DATA(n);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700989 clid = tcm->tcm_parent;
990 q = p = NULL;
991
Eric W. Biederman881d9662007-09-17 11:56:21 -0700992 if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700993 return -ENODEV;
994
Patrick McHardy1e904742008-01-22 22:11:17 -0800995 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
996 if (err < 0)
997 return err;
998
Linus Torvalds1da177e2005-04-16 15:20:36 -0700999 if (clid) {
1000 if (clid != TC_H_ROOT) {
1001 if (clid != TC_H_INGRESS) {
1002 if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL)
1003 return -ENOENT;
1004 q = qdisc_leaf(p, clid);
1005 } else { /*ingress */
David S. Miller816f3252008-07-08 22:49:00 -07001006 q = dev->rx_queue.qdisc;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001007 }
1008 } else {
David S. Millere8a04642008-07-17 00:34:19 -07001009 struct netdev_queue *dev_queue;
1010 dev_queue = netdev_get_tx_queue(dev, 0);
David S. Millerb0e1e642008-07-08 17:42:10 -07001011 q = dev_queue->qdisc_sleeping;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001012 }
1013
1014 /* It may be default qdisc, ignore it */
1015 if (q && q->handle == 0)
1016 q = NULL;
1017
1018 if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
1019 if (tcm->tcm_handle) {
1020 if (q && !(n->nlmsg_flags&NLM_F_REPLACE))
1021 return -EEXIST;
1022 if (TC_H_MIN(tcm->tcm_handle))
1023 return -EINVAL;
1024 if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL)
1025 goto create_n_graft;
1026 if (n->nlmsg_flags&NLM_F_EXCL)
1027 return -EEXIST;
Patrick McHardy1e904742008-01-22 22:11:17 -08001028 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001029 return -EINVAL;
1030 if (q == p ||
1031 (p && check_loop(q, p, 0)))
1032 return -ELOOP;
1033 atomic_inc(&q->refcnt);
1034 goto graft;
1035 } else {
1036 if (q == NULL)
1037 goto create_n_graft;
1038
1039 /* This magic test requires explanation.
1040 *
1041 * We know, that some child q is already
1042 * attached to this parent and have choice:
1043 * either to change it or to create/graft new one.
1044 *
1045 * 1. We are allowed to create/graft only
1046 * if CREATE and REPLACE flags are set.
1047 *
1048 * 2. If EXCL is set, requestor wanted to say,
1049 * that qdisc tcm_handle is not expected
1050 * to exist, so that we choose create/graft too.
1051 *
1052 * 3. The last case is when no flags are set.
1053 * Alas, it is sort of hole in API, we
1054 * cannot decide what to do unambiguously.
1055 * For now we select create/graft, if
1056 * user gave KIND, which does not match existing.
1057 */
1058 if ((n->nlmsg_flags&NLM_F_CREATE) &&
1059 (n->nlmsg_flags&NLM_F_REPLACE) &&
1060 ((n->nlmsg_flags&NLM_F_EXCL) ||
Patrick McHardy1e904742008-01-22 22:11:17 -08001061 (tca[TCA_KIND] &&
1062 nla_strcmp(tca[TCA_KIND], q->ops->id))))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001063 goto create_n_graft;
1064 }
1065 }
1066 } else {
1067 if (!tcm->tcm_handle)
1068 return -EINVAL;
1069 q = qdisc_lookup(dev, tcm->tcm_handle);
1070 }
1071
1072 /* Change qdisc parameters */
1073 if (q == NULL)
1074 return -ENOENT;
1075 if (n->nlmsg_flags&NLM_F_EXCL)
1076 return -EEXIST;
Patrick McHardy1e904742008-01-22 22:11:17 -08001077 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001078 return -EINVAL;
1079 err = qdisc_change(q, tca);
1080 if (err == 0)
1081 qdisc_notify(skb, n, clid, NULL, q);
1082 return err;
1083
1084create_n_graft:
1085 if (!(n->nlmsg_flags&NLM_F_CREATE))
1086 return -ENOENT;
1087 if (clid == TC_H_INGRESS)
David S. Millerbb949fb2008-07-08 16:55:56 -07001088 q = qdisc_create(dev, &dev->rx_queue,
1089 tcm->tcm_parent, tcm->tcm_parent,
Patrick McHardyffc8fef2007-07-30 17:11:50 -07001090 tca, &err);
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09001091 else
David S. Millere8a04642008-07-17 00:34:19 -07001092 q = qdisc_create(dev, netdev_get_tx_queue(dev, 0),
David S. Millerbb949fb2008-07-08 16:55:56 -07001093 tcm->tcm_parent, tcm->tcm_handle,
Patrick McHardyffc8fef2007-07-30 17:11:50 -07001094 tca, &err);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001095 if (q == NULL) {
1096 if (err == -EAGAIN)
1097 goto replay;
1098 return err;
1099 }
1100
1101graft:
1102 if (1) {
David S. Miller53049972008-07-16 03:00:19 -07001103 spinlock_t *root_lock;
1104
David S. Miller99194cf2008-07-17 04:54:10 -07001105 err = qdisc_graft(dev, p, skb, n, clid, q, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001106 if (err) {
1107 if (q) {
David S. Miller53049972008-07-16 03:00:19 -07001108 root_lock = qdisc_root_lock(q);
1109 spin_lock_bh(root_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001110 qdisc_destroy(q);
David S. Miller53049972008-07-16 03:00:19 -07001111 spin_unlock_bh(root_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001112 }
1113 return err;
1114 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001115 }
1116 return 0;
1117}
1118
1119static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
Jamal Hadi Salime431b8c2005-06-18 22:55:31 -07001120 u32 pid, u32 seq, u16 flags, int event)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001121{
1122 struct tcmsg *tcm;
1123 struct nlmsghdr *nlh;
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001124 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001125 struct gnet_dump d;
1126
Jamal Hadi Salime431b8c2005-06-18 22:55:31 -07001127 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001128 tcm = NLMSG_DATA(nlh);
1129 tcm->tcm_family = AF_UNSPEC;
Patrick McHardy9ef1d4c2005-06-28 12:55:30 -07001130 tcm->tcm__pad1 = 0;
1131 tcm->tcm__pad2 = 0;
David S. Miller5ce2d482008-07-08 17:06:30 -07001132 tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001133 tcm->tcm_parent = clid;
1134 tcm->tcm_handle = q->handle;
1135 tcm->tcm_info = atomic_read(&q->refcnt);
Patrick McHardy57e1c482008-01-23 20:34:28 -08001136 NLA_PUT_STRING(skb, TCA_KIND, q->ops->id);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001137 if (q->ops->dump && q->ops->dump(q, skb) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001138 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001139 q->qstats.qlen = q->q.qlen;
1140
Jussi Kivilinna175f9c12008-07-20 00:08:47 -07001141 if (q->stab && qdisc_dump_stab(skb, q->stab) < 0)
1142 goto nla_put_failure;
1143
Linus Torvalds1da177e2005-04-16 15:20:36 -07001144 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS,
David S. Miller7698b4f2008-07-16 01:42:40 -07001145 TCA_XSTATS, qdisc_root_lock(q), &d) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001146 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001147
1148 if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001149 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001150
1151 if (gnet_stats_copy_basic(&d, &q->bstats) < 0 ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07001152 gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07001153 gnet_stats_copy_queue(&d, &q->qstats) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001154 goto nla_put_failure;
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09001155
Linus Torvalds1da177e2005-04-16 15:20:36 -07001156 if (gnet_stats_finish_copy(&d) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001157 goto nla_put_failure;
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09001158
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001159 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001160 return skb->len;
1161
1162nlmsg_failure:
Patrick McHardy1e904742008-01-22 22:11:17 -08001163nla_put_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001164 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001165 return -1;
1166}
1167
1168static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n,
1169 u32 clid, struct Qdisc *old, struct Qdisc *new)
1170{
1171 struct sk_buff *skb;
1172 u32 pid = oskb ? NETLINK_CB(oskb).pid : 0;
1173
1174 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1175 if (!skb)
1176 return -ENOBUFS;
1177
1178 if (old && old->handle) {
1179 if (tc_fill_qdisc(skb, old, clid, pid, n->nlmsg_seq, 0, RTM_DELQDISC) < 0)
1180 goto err_out;
1181 }
1182 if (new) {
1183 if (tc_fill_qdisc(skb, new, clid, pid, n->nlmsg_seq, old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
1184 goto err_out;
1185 }
1186
1187 if (skb->len)
Denis V. Lunev97c53ca2007-11-19 22:26:51 -08001188 return rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001189
1190err_out:
1191 kfree_skb(skb);
1192 return -EINVAL;
1193}
1194
David S. Miller30723672008-07-18 22:50:15 -07001195static bool tc_qdisc_dump_ignore(struct Qdisc *q)
1196{
1197 return (q->flags & TCQ_F_BUILTIN) ? true : false;
1198}
1199
1200static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
1201 struct netlink_callback *cb,
1202 int *q_idx_p, int s_q_idx)
1203{
1204 int ret = 0, q_idx = *q_idx_p;
1205 struct Qdisc *q;
1206
1207 if (!root)
1208 return 0;
1209
1210 q = root;
1211 if (q_idx < s_q_idx) {
1212 q_idx++;
1213 } else {
1214 if (!tc_qdisc_dump_ignore(q) &&
1215 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid,
1216 cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
1217 goto done;
1218 q_idx++;
1219 }
1220 list_for_each_entry(q, &root->list, list) {
1221 if (q_idx < s_q_idx) {
1222 q_idx++;
1223 continue;
1224 }
1225 if (!tc_qdisc_dump_ignore(q) &&
1226 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid,
1227 cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
1228 goto done;
1229 q_idx++;
1230 }
1231
1232out:
1233 *q_idx_p = q_idx;
1234 return ret;
1235done:
1236 ret = -1;
1237 goto out;
1238}
1239
Linus Torvalds1da177e2005-04-16 15:20:36 -07001240static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
1241{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001242 struct net *net = sock_net(skb->sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001243 int idx, q_idx;
1244 int s_idx, s_q_idx;
1245 struct net_device *dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001246
Denis V. Lunevb8542722007-12-01 00:21:31 +11001247 if (net != &init_net)
1248 return 0;
1249
Linus Torvalds1da177e2005-04-16 15:20:36 -07001250 s_idx = cb->args[0];
1251 s_q_idx = q_idx = cb->args[1];
1252 read_lock(&dev_base_lock);
Pavel Emelianov7562f872007-05-03 15:13:45 -07001253 idx = 0;
Eric W. Biederman881d9662007-09-17 11:56:21 -07001254 for_each_netdev(&init_net, dev) {
David S. Miller30723672008-07-18 22:50:15 -07001255 struct netdev_queue *dev_queue;
1256
Linus Torvalds1da177e2005-04-16 15:20:36 -07001257 if (idx < s_idx)
Pavel Emelianov7562f872007-05-03 15:13:45 -07001258 goto cont;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001259 if (idx > s_idx)
1260 s_q_idx = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001261 q_idx = 0;
David S. Miller30723672008-07-18 22:50:15 -07001262
1263 dev_queue = netdev_get_tx_queue(dev, 0);
1264 if (tc_dump_qdisc_root(dev_queue->qdisc, skb, cb, &q_idx, s_q_idx) < 0)
1265 goto done;
1266
1267 dev_queue = &dev->rx_queue;
1268 if (tc_dump_qdisc_root(dev_queue->qdisc, skb, cb, &q_idx, s_q_idx) < 0)
1269 goto done;
1270
Pavel Emelianov7562f872007-05-03 15:13:45 -07001271cont:
1272 idx++;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001273 }
1274
1275done:
1276 read_unlock(&dev_base_lock);
1277
1278 cb->args[0] = idx;
1279 cb->args[1] = q_idx;
1280
1281 return skb->len;
1282}
1283
1284
1285
1286/************************************************
1287 * Traffic classes manipulation. *
1288 ************************************************/
1289
1290
1291
1292static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
1293{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001294 struct net *net = sock_net(skb->sk);
David S. Millerb0e1e642008-07-08 17:42:10 -07001295 struct netdev_queue *dev_queue;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001296 struct tcmsg *tcm = NLMSG_DATA(n);
Patrick McHardy1e904742008-01-22 22:11:17 -08001297 struct nlattr *tca[TCA_MAX + 1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001298 struct net_device *dev;
1299 struct Qdisc *q = NULL;
Eric Dumazet20fea082007-11-14 01:44:41 -08001300 const struct Qdisc_class_ops *cops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001301 unsigned long cl = 0;
1302 unsigned long new_cl;
1303 u32 pid = tcm->tcm_parent;
1304 u32 clid = tcm->tcm_handle;
1305 u32 qid = TC_H_MAJ(clid);
1306 int err;
1307
Denis V. Lunevb8542722007-12-01 00:21:31 +11001308 if (net != &init_net)
1309 return -EINVAL;
1310
Eric W. Biederman881d9662007-09-17 11:56:21 -07001311 if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001312 return -ENODEV;
1313
Patrick McHardy1e904742008-01-22 22:11:17 -08001314 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
1315 if (err < 0)
1316 return err;
1317
Linus Torvalds1da177e2005-04-16 15:20:36 -07001318 /*
1319 parent == TC_H_UNSPEC - unspecified parent.
1320 parent == TC_H_ROOT - class is root, which has no parent.
1321 parent == X:0 - parent is root class.
1322 parent == X:Y - parent is a node in hierarchy.
1323 parent == 0:Y - parent is X:Y, where X:0 is qdisc.
1324
1325 handle == 0:0 - generate handle from kernel pool.
1326 handle == 0:Y - class is X:Y, where X:0 is qdisc.
1327 handle == X:Y - clear.
1328 handle == X:0 - root class.
1329 */
1330
1331 /* Step 1. Determine qdisc handle X:0 */
1332
David S. Millere8a04642008-07-17 00:34:19 -07001333 dev_queue = netdev_get_tx_queue(dev, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001334 if (pid != TC_H_ROOT) {
1335 u32 qid1 = TC_H_MAJ(pid);
1336
1337 if (qid && qid1) {
1338 /* If both majors are known, they must be identical. */
1339 if (qid != qid1)
1340 return -EINVAL;
1341 } else if (qid1) {
1342 qid = qid1;
1343 } else if (qid == 0)
David S. Millerb0e1e642008-07-08 17:42:10 -07001344 qid = dev_queue->qdisc_sleeping->handle;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001345
1346 /* Now qid is genuine qdisc handle consistent
1347 both with parent and child.
1348
1349 TC_H_MAJ(pid) still may be unspecified, complete it now.
1350 */
1351 if (pid)
1352 pid = TC_H_MAKE(qid, pid);
1353 } else {
1354 if (qid == 0)
David S. Millerb0e1e642008-07-08 17:42:10 -07001355 qid = dev_queue->qdisc_sleeping->handle;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001356 }
1357
1358 /* OK. Locate qdisc */
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09001359 if ((q = qdisc_lookup(dev, qid)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001360 return -ENOENT;
1361
1362 /* An check that it supports classes */
1363 cops = q->ops->cl_ops;
1364 if (cops == NULL)
1365 return -EINVAL;
1366
1367 /* Now try to get class */
1368 if (clid == 0) {
1369 if (pid == TC_H_ROOT)
1370 clid = qid;
1371 } else
1372 clid = TC_H_MAKE(qid, clid);
1373
1374 if (clid)
1375 cl = cops->get(q, clid);
1376
1377 if (cl == 0) {
1378 err = -ENOENT;
1379 if (n->nlmsg_type != RTM_NEWTCLASS || !(n->nlmsg_flags&NLM_F_CREATE))
1380 goto out;
1381 } else {
1382 switch (n->nlmsg_type) {
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09001383 case RTM_NEWTCLASS:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001384 err = -EEXIST;
1385 if (n->nlmsg_flags&NLM_F_EXCL)
1386 goto out;
1387 break;
1388 case RTM_DELTCLASS:
1389 err = cops->delete(q, cl);
1390 if (err == 0)
1391 tclass_notify(skb, n, q, cl, RTM_DELTCLASS);
1392 goto out;
1393 case RTM_GETTCLASS:
1394 err = tclass_notify(skb, n, q, cl, RTM_NEWTCLASS);
1395 goto out;
1396 default:
1397 err = -EINVAL;
1398 goto out;
1399 }
1400 }
1401
1402 new_cl = cl;
1403 err = cops->change(q, clid, pid, tca, &new_cl);
1404 if (err == 0)
1405 tclass_notify(skb, n, q, new_cl, RTM_NEWTCLASS);
1406
1407out:
1408 if (cl)
1409 cops->put(q, cl);
1410
1411 return err;
1412}
1413
1414
1415static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
1416 unsigned long cl,
Jamal Hadi Salime431b8c2005-06-18 22:55:31 -07001417 u32 pid, u32 seq, u16 flags, int event)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001418{
1419 struct tcmsg *tcm;
1420 struct nlmsghdr *nlh;
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001421 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001422 struct gnet_dump d;
Eric Dumazet20fea082007-11-14 01:44:41 -08001423 const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001424
Jamal Hadi Salime431b8c2005-06-18 22:55:31 -07001425 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001426 tcm = NLMSG_DATA(nlh);
1427 tcm->tcm_family = AF_UNSPEC;
David S. Miller5ce2d482008-07-08 17:06:30 -07001428 tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001429 tcm->tcm_parent = q->handle;
1430 tcm->tcm_handle = q->handle;
1431 tcm->tcm_info = 0;
Patrick McHardy57e1c482008-01-23 20:34:28 -08001432 NLA_PUT_STRING(skb, TCA_KIND, q->ops->id);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001433 if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001434 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001435
1436 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS,
David S. Miller7698b4f2008-07-16 01:42:40 -07001437 TCA_XSTATS, qdisc_root_lock(q), &d) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001438 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001439
1440 if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001441 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001442
1443 if (gnet_stats_finish_copy(&d) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001444 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001445
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001446 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001447 return skb->len;
1448
1449nlmsg_failure:
Patrick McHardy1e904742008-01-22 22:11:17 -08001450nla_put_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001451 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001452 return -1;
1453}
1454
1455static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
1456 struct Qdisc *q, unsigned long cl, int event)
1457{
1458 struct sk_buff *skb;
1459 u32 pid = oskb ? NETLINK_CB(oskb).pid : 0;
1460
1461 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1462 if (!skb)
1463 return -ENOBUFS;
1464
1465 if (tc_fill_tclass(skb, q, cl, pid, n->nlmsg_seq, 0, event) < 0) {
1466 kfree_skb(skb);
1467 return -EINVAL;
1468 }
1469
Denis V. Lunev97c53ca2007-11-19 22:26:51 -08001470 return rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001471}
1472
1473struct qdisc_dump_args
1474{
1475 struct qdisc_walker w;
1476 struct sk_buff *skb;
1477 struct netlink_callback *cb;
1478};
1479
1480static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walker *arg)
1481{
1482 struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg;
1483
1484 return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).pid,
1485 a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTCLASS);
1486}
1487
David S. Miller30723672008-07-18 22:50:15 -07001488static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb,
1489 struct tcmsg *tcm, struct netlink_callback *cb,
1490 int *t_p, int s_t)
1491{
1492 struct qdisc_dump_args arg;
1493
1494 if (tc_qdisc_dump_ignore(q) ||
1495 *t_p < s_t || !q->ops->cl_ops ||
1496 (tcm->tcm_parent &&
1497 TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
1498 (*t_p)++;
1499 return 0;
1500 }
1501 if (*t_p > s_t)
1502 memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
1503 arg.w.fn = qdisc_class_dump;
1504 arg.skb = skb;
1505 arg.cb = cb;
1506 arg.w.stop = 0;
1507 arg.w.skip = cb->args[1];
1508 arg.w.count = 0;
1509 q->ops->cl_ops->walk(q, &arg.w);
1510 cb->args[1] = arg.w.count;
1511 if (arg.w.stop)
1512 return -1;
1513 (*t_p)++;
1514 return 0;
1515}
1516
1517static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
1518 struct tcmsg *tcm, struct netlink_callback *cb,
1519 int *t_p, int s_t)
1520{
1521 struct Qdisc *q;
1522
1523 if (!root)
1524 return 0;
1525
1526 if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0)
1527 return -1;
1528
1529 list_for_each_entry(q, &root->list, list) {
1530 if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
1531 return -1;
1532 }
1533
1534 return 0;
1535}
1536
Linus Torvalds1da177e2005-04-16 15:20:36 -07001537static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
1538{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001539 struct tcmsg *tcm = (struct tcmsg*)NLMSG_DATA(cb->nlh);
David S. Miller30723672008-07-18 22:50:15 -07001540 struct net *net = sock_net(skb->sk);
1541 struct netdev_queue *dev_queue;
1542 struct net_device *dev;
1543 int t, s_t;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001544
Denis V. Lunevb8542722007-12-01 00:21:31 +11001545 if (net != &init_net)
1546 return 0;
1547
Linus Torvalds1da177e2005-04-16 15:20:36 -07001548 if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm)))
1549 return 0;
Eric W. Biederman881d9662007-09-17 11:56:21 -07001550 if ((dev = dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001551 return 0;
1552
1553 s_t = cb->args[0];
1554 t = 0;
1555
David S. Miller30723672008-07-18 22:50:15 -07001556 dev_queue = netdev_get_tx_queue(dev, 0);
1557 if (tc_dump_tclass_root(dev_queue->qdisc, skb, tcm, cb, &t, s_t) < 0)
1558 goto done;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001559
David S. Miller30723672008-07-18 22:50:15 -07001560 dev_queue = &dev->rx_queue;
1561 if (tc_dump_tclass_root(dev_queue->qdisc, skb, tcm, cb, &t, s_t) < 0)
1562 goto done;
1563
1564done:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001565 cb->args[0] = t;
1566
1567 dev_put(dev);
1568 return skb->len;
1569}
1570
1571/* Main classifier routine: scans classifier chain attached
1572 to this qdisc, (optionally) tests for protocol and asks
1573 specific classifiers.
1574 */
Patrick McHardy73ca4912007-07-15 00:02:31 -07001575int tc_classify_compat(struct sk_buff *skb, struct tcf_proto *tp,
1576 struct tcf_result *res)
1577{
1578 __be16 protocol = skb->protocol;
1579 int err = 0;
1580
1581 for (; tp; tp = tp->next) {
1582 if ((tp->protocol == protocol ||
1583 tp->protocol == htons(ETH_P_ALL)) &&
1584 (err = tp->classify(skb, tp, res)) >= 0) {
1585#ifdef CONFIG_NET_CLS_ACT
1586 if (err != TC_ACT_RECLASSIFY && skb->tc_verd)
1587 skb->tc_verd = SET_TC_VERD(skb->tc_verd, 0);
1588#endif
1589 return err;
1590 }
1591 }
1592 return -1;
1593}
1594EXPORT_SYMBOL(tc_classify_compat);
1595
Linus Torvalds1da177e2005-04-16 15:20:36 -07001596int tc_classify(struct sk_buff *skb, struct tcf_proto *tp,
Patrick McHardy73ca4912007-07-15 00:02:31 -07001597 struct tcf_result *res)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001598{
1599 int err = 0;
Patrick McHardy73ca4912007-07-15 00:02:31 -07001600 __be16 protocol;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001601#ifdef CONFIG_NET_CLS_ACT
1602 struct tcf_proto *otp = tp;
1603reclassify:
1604#endif
1605 protocol = skb->protocol;
1606
Patrick McHardy73ca4912007-07-15 00:02:31 -07001607 err = tc_classify_compat(skb, tp, res);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001608#ifdef CONFIG_NET_CLS_ACT
Patrick McHardy73ca4912007-07-15 00:02:31 -07001609 if (err == TC_ACT_RECLASSIFY) {
1610 u32 verd = G_TC_VERD(skb->tc_verd);
1611 tp = otp;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001612
Patrick McHardy73ca4912007-07-15 00:02:31 -07001613 if (verd++ >= MAX_REC_LOOP) {
1614 printk("rule prio %u protocol %02x reclassify loop, "
1615 "packet dropped\n",
1616 tp->prio&0xffff, ntohs(tp->protocol));
1617 return TC_ACT_SHOT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001618 }
Patrick McHardy73ca4912007-07-15 00:02:31 -07001619 skb->tc_verd = SET_TC_VERD(skb->tc_verd, verd);
1620 goto reclassify;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001621 }
Patrick McHardy73ca4912007-07-15 00:02:31 -07001622#endif
1623 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001624}
Patrick McHardy73ca4912007-07-15 00:02:31 -07001625EXPORT_SYMBOL(tc_classify);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001626
Patrick McHardya48b5a62007-03-23 11:29:43 -07001627void tcf_destroy(struct tcf_proto *tp)
1628{
1629 tp->ops->destroy(tp);
1630 module_put(tp->ops->owner);
1631 kfree(tp);
1632}
1633
Patrick McHardyff31ab52008-07-01 19:52:38 -07001634void tcf_destroy_chain(struct tcf_proto **fl)
Patrick McHardya48b5a62007-03-23 11:29:43 -07001635{
1636 struct tcf_proto *tp;
1637
Patrick McHardyff31ab52008-07-01 19:52:38 -07001638 while ((tp = *fl) != NULL) {
1639 *fl = tp->next;
Patrick McHardya48b5a62007-03-23 11:29:43 -07001640 tcf_destroy(tp);
1641 }
1642}
1643EXPORT_SYMBOL(tcf_destroy_chain);
1644
Linus Torvalds1da177e2005-04-16 15:20:36 -07001645#ifdef CONFIG_PROC_FS
1646static int psched_show(struct seq_file *seq, void *v)
1647{
Patrick McHardy3c0cfc12007-10-10 16:32:41 -07001648 struct timespec ts;
1649
1650 hrtimer_get_res(CLOCK_MONOTONIC, &ts);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001651 seq_printf(seq, "%08x %08x %08x %08x\n",
Patrick McHardy641b9e02007-03-16 01:18:42 -07001652 (u32)NSEC_PER_USEC, (u32)PSCHED_US2NS(1),
Patrick McHardy514bca32007-03-16 12:34:52 -07001653 1000000,
Patrick McHardy3c0cfc12007-10-10 16:32:41 -07001654 (u32)NSEC_PER_SEC/(u32)ktime_to_ns(timespec_to_ktime(ts)));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001655
1656 return 0;
1657}
1658
1659static int psched_open(struct inode *inode, struct file *file)
1660{
1661 return single_open(file, psched_show, PDE(inode)->data);
1662}
1663
Arjan van de Venda7071d2007-02-12 00:55:36 -08001664static const struct file_operations psched_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001665 .owner = THIS_MODULE,
1666 .open = psched_open,
1667 .read = seq_read,
1668 .llseek = seq_lseek,
1669 .release = single_release,
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09001670};
Linus Torvalds1da177e2005-04-16 15:20:36 -07001671#endif
1672
Linus Torvalds1da177e2005-04-16 15:20:36 -07001673static int __init pktsched_init(void)
1674{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001675 register_qdisc(&pfifo_qdisc_ops);
1676 register_qdisc(&bfifo_qdisc_ops);
Eric W. Biederman457c4cb2007-09-12 12:01:34 +02001677 proc_net_fops_create(&init_net, "psched", 0, &psched_fops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001678
Thomas Grafbe577dd2007-03-22 11:55:50 -07001679 rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL);
1680 rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL);
1681 rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc);
1682 rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL);
1683 rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL);
1684 rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass);
1685
Linus Torvalds1da177e2005-04-16 15:20:36 -07001686 return 0;
1687}
1688
1689subsys_initcall(pktsched_init);