blob: ba1d121f312769c856821741901db6c45b276284 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * net/sched/sch_api.c Packet scheduler API.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 *
11 * Fixes:
12 *
13 * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired.
14 * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
15 * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support
16 */
17
Linus Torvalds1da177e2005-04-16 15:20:36 -070018#include <linux/module.h>
19#include <linux/types.h>
20#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070021#include <linux/string.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070022#include <linux/errno.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070023#include <linux/skbuff.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070024#include <linux/init.h>
25#include <linux/proc_fs.h>
26#include <linux/seq_file.h>
27#include <linux/kmod.h>
28#include <linux/list.h>
Patrick McHardy41794772007-03-16 01:19:15 -070029#include <linux/hrtimer.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070030
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020031#include <net/net_namespace.h>
Denis V. Lunevb8542722007-12-01 00:21:31 +110032#include <net/sock.h>
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -070033#include <net/netlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070034#include <net/pkt_sched.h>
35
Linus Torvalds1da177e2005-04-16 15:20:36 -070036static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n, u32 clid,
37 struct Qdisc *old, struct Qdisc *new);
38static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
39 struct Qdisc *q, unsigned long cl, int event);
40
41/*
42
43 Short review.
44 -------------
45
46 This file consists of two interrelated parts:
47
48 1. queueing disciplines manager frontend.
49 2. traffic classes manager frontend.
50
51 Generally, queueing discipline ("qdisc") is a black box,
52 which is able to enqueue packets and to dequeue them (when
53 device is ready to send something) in order and at times
54 determined by algorithm hidden in it.
55
56 qdisc's are divided to two categories:
57 - "queues", which have no internal structure visible from outside.
58 - "schedulers", which split all the packets to "traffic classes",
59 using "packet classifiers" (look at cls_api.c)
60
61 In turn, classes may have child qdiscs (as rule, queues)
62 attached to them etc. etc. etc.
63
64 The goal of the routines in this file is to translate
65 information supplied by user in the form of handles
66 to more intelligible for kernel form, to make some sanity
67 checks and part of work, which is common to all qdiscs
68 and to provide rtnetlink notifications.
69
70 All real intelligent work is done inside qdisc modules.
71
72
73
74 Every discipline has two major routines: enqueue and dequeue.
75
76 ---dequeue
77
78 dequeue usually returns a skb to send. It is allowed to return NULL,
79 but it does not mean that queue is empty, it just means that
80 discipline does not want to send anything this time.
81 Queue is really empty if q->q.qlen == 0.
82 For complicated disciplines with multiple queues q->q is not
83 real packet queue, but however q->q.qlen must be valid.
84
85 ---enqueue
86
87 enqueue returns 0, if packet was enqueued successfully.
88 If packet (this one or another one) was dropped, it returns
89 not zero error code.
90 NET_XMIT_DROP - this packet dropped
91 Expected action: do not backoff, but wait until queue will clear.
92 NET_XMIT_CN - probably this packet enqueued, but another one dropped.
93 Expected action: backoff or ignore
94 NET_XMIT_POLICED - dropped by police.
95 Expected action: backoff or error to real-time apps.
96
97 Auxiliary routines:
98
99 ---requeue
100
101 requeues once dequeued packet. It is used for non-standard or
David S. Millere65d22e2008-07-08 16:46:01 -0700102 just buggy devices, which can defer output even if netif_queue_stopped()=0.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700103
104 ---reset
105
106 returns qdisc to initial state: purge all buffers, clear all
107 timers, counters (except for statistics) etc.
108
109 ---init
110
111 initializes newly created qdisc.
112
113 ---destroy
114
115 destroys resources allocated by init and during lifetime of qdisc.
116
117 ---change
118
119 changes qdisc parameters.
120 */
121
122/* Protects list of registered TC modules. It is pure SMP lock. */
123static DEFINE_RWLOCK(qdisc_mod_lock);
124
125
126/************************************************
127 * Queueing disciplines manipulation. *
128 ************************************************/
129
130
131/* The list of all installed queueing disciplines. */
132
133static struct Qdisc_ops *qdisc_base;
134
135/* Register/uregister queueing discipline */
136
137int register_qdisc(struct Qdisc_ops *qops)
138{
139 struct Qdisc_ops *q, **qp;
140 int rc = -EEXIST;
141
142 write_lock(&qdisc_mod_lock);
143 for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
144 if (!strcmp(qops->id, q->id))
145 goto out;
146
147 if (qops->enqueue == NULL)
148 qops->enqueue = noop_qdisc_ops.enqueue;
149 if (qops->requeue == NULL)
150 qops->requeue = noop_qdisc_ops.requeue;
151 if (qops->dequeue == NULL)
152 qops->dequeue = noop_qdisc_ops.dequeue;
153
154 qops->next = NULL;
155 *qp = qops;
156 rc = 0;
157out:
158 write_unlock(&qdisc_mod_lock);
159 return rc;
160}
Patrick McHardy62e3ba12008-01-22 22:10:23 -0800161EXPORT_SYMBOL(register_qdisc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700162
163int unregister_qdisc(struct Qdisc_ops *qops)
164{
165 struct Qdisc_ops *q, **qp;
166 int err = -ENOENT;
167
168 write_lock(&qdisc_mod_lock);
169 for (qp = &qdisc_base; (q=*qp)!=NULL; qp = &q->next)
170 if (q == qops)
171 break;
172 if (q) {
173 *qp = q->next;
174 q->next = NULL;
175 err = 0;
176 }
177 write_unlock(&qdisc_mod_lock);
178 return err;
179}
Patrick McHardy62e3ba12008-01-22 22:10:23 -0800180EXPORT_SYMBOL(unregister_qdisc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700181
182/* We know handle. Find qdisc among all qdisc's attached to device
183 (root qdisc, all its children, children of children etc.)
184 */
185
David S. Milleread81cc2008-07-17 00:50:32 -0700186struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
Patrick McHardy43effa12006-11-29 17:35:48 -0800187{
David S. Miller30723672008-07-18 22:50:15 -0700188 unsigned int i;
Patrick McHardy43effa12006-11-29 17:35:48 -0800189
David S. Miller30723672008-07-18 22:50:15 -0700190 for (i = 0; i < dev->num_tx_queues; i++) {
191 struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
David S. Miller827ebd62008-08-07 20:26:40 -0700192 struct Qdisc *q, *txq_root = txq->qdisc_sleeping;
David S. Miller30723672008-07-18 22:50:15 -0700193
194 if (!(txq_root->flags & TCQ_F_BUILTIN) &&
195 txq_root->handle == handle)
196 return txq_root;
197
198 list_for_each_entry(q, &txq_root->list, list) {
199 if (q->handle == handle)
200 return q;
201 }
Patrick McHardy43effa12006-11-29 17:35:48 -0800202 }
203 return NULL;
204}
205
Linus Torvalds1da177e2005-04-16 15:20:36 -0700206static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
207{
208 unsigned long cl;
209 struct Qdisc *leaf;
Eric Dumazet20fea082007-11-14 01:44:41 -0800210 const struct Qdisc_class_ops *cops = p->ops->cl_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700211
212 if (cops == NULL)
213 return NULL;
214 cl = cops->get(p, classid);
215
216 if (cl == 0)
217 return NULL;
218 leaf = cops->leaf(p, cl);
219 cops->put(p, cl);
220 return leaf;
221}
222
223/* Find queueing discipline by name */
224
Patrick McHardy1e904742008-01-22 22:11:17 -0800225static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700226{
227 struct Qdisc_ops *q = NULL;
228
229 if (kind) {
230 read_lock(&qdisc_mod_lock);
231 for (q = qdisc_base; q; q = q->next) {
Patrick McHardy1e904742008-01-22 22:11:17 -0800232 if (nla_strcmp(kind, q->id) == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700233 if (!try_module_get(q->owner))
234 q = NULL;
235 break;
236 }
237 }
238 read_unlock(&qdisc_mod_lock);
239 }
240 return q;
241}
242
243static struct qdisc_rate_table *qdisc_rtab_list;
244
Patrick McHardy1e904742008-01-22 22:11:17 -0800245struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct nlattr *tab)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700246{
247 struct qdisc_rate_table *rtab;
248
249 for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) {
250 if (memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) == 0) {
251 rtab->refcnt++;
252 return rtab;
253 }
254 }
255
Patrick McHardy5feb5e12008-01-23 20:35:19 -0800256 if (tab == NULL || r->rate == 0 || r->cell_log == 0 ||
257 nla_len(tab) != TC_RTAB_SIZE)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700258 return NULL;
259
260 rtab = kmalloc(sizeof(*rtab), GFP_KERNEL);
261 if (rtab) {
262 rtab->rate = *r;
263 rtab->refcnt = 1;
Patrick McHardy1e904742008-01-22 22:11:17 -0800264 memcpy(rtab->data, nla_data(tab), 1024);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700265 rtab->next = qdisc_rtab_list;
266 qdisc_rtab_list = rtab;
267 }
268 return rtab;
269}
Patrick McHardy62e3ba12008-01-22 22:10:23 -0800270EXPORT_SYMBOL(qdisc_get_rtab);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700271
272void qdisc_put_rtab(struct qdisc_rate_table *tab)
273{
274 struct qdisc_rate_table *rtab, **rtabp;
275
276 if (!tab || --tab->refcnt)
277 return;
278
279 for (rtabp = &qdisc_rtab_list; (rtab=*rtabp) != NULL; rtabp = &rtab->next) {
280 if (rtab == tab) {
281 *rtabp = rtab->next;
282 kfree(rtab);
283 return;
284 }
285 }
286}
Patrick McHardy62e3ba12008-01-22 22:10:23 -0800287EXPORT_SYMBOL(qdisc_put_rtab);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700288
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700289static LIST_HEAD(qdisc_stab_list);
290static DEFINE_SPINLOCK(qdisc_stab_lock);
291
292static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = {
293 [TCA_STAB_BASE] = { .len = sizeof(struct tc_sizespec) },
294 [TCA_STAB_DATA] = { .type = NLA_BINARY },
295};
296
297static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt)
298{
299 struct nlattr *tb[TCA_STAB_MAX + 1];
300 struct qdisc_size_table *stab;
301 struct tc_sizespec *s;
302 unsigned int tsize = 0;
303 u16 *tab = NULL;
304 int err;
305
306 err = nla_parse_nested(tb, TCA_STAB_MAX, opt, stab_policy);
307 if (err < 0)
308 return ERR_PTR(err);
309 if (!tb[TCA_STAB_BASE])
310 return ERR_PTR(-EINVAL);
311
312 s = nla_data(tb[TCA_STAB_BASE]);
313
314 if (s->tsize > 0) {
315 if (!tb[TCA_STAB_DATA])
316 return ERR_PTR(-EINVAL);
317 tab = nla_data(tb[TCA_STAB_DATA]);
318 tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16);
319 }
320
321 if (!s || tsize != s->tsize || (!tab && tsize > 0))
322 return ERR_PTR(-EINVAL);
323
324 spin_lock(&qdisc_stab_lock);
325
326 list_for_each_entry(stab, &qdisc_stab_list, list) {
327 if (memcmp(&stab->szopts, s, sizeof(*s)))
328 continue;
329 if (tsize > 0 && memcmp(stab->data, tab, tsize * sizeof(u16)))
330 continue;
331 stab->refcnt++;
332 spin_unlock(&qdisc_stab_lock);
333 return stab;
334 }
335
336 spin_unlock(&qdisc_stab_lock);
337
338 stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL);
339 if (!stab)
340 return ERR_PTR(-ENOMEM);
341
342 stab->refcnt = 1;
343 stab->szopts = *s;
344 if (tsize > 0)
345 memcpy(stab->data, tab, tsize * sizeof(u16));
346
347 spin_lock(&qdisc_stab_lock);
348 list_add_tail(&stab->list, &qdisc_stab_list);
349 spin_unlock(&qdisc_stab_lock);
350
351 return stab;
352}
353
354void qdisc_put_stab(struct qdisc_size_table *tab)
355{
356 if (!tab)
357 return;
358
359 spin_lock(&qdisc_stab_lock);
360
361 if (--tab->refcnt == 0) {
362 list_del(&tab->list);
363 kfree(tab);
364 }
365
366 spin_unlock(&qdisc_stab_lock);
367}
368EXPORT_SYMBOL(qdisc_put_stab);
369
370static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab)
371{
372 struct nlattr *nest;
373
374 nest = nla_nest_start(skb, TCA_STAB);
375 NLA_PUT(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts);
376 nla_nest_end(skb, nest);
377
378 return skb->len;
379
380nla_put_failure:
381 return -1;
382}
383
384void qdisc_calculate_pkt_len(struct sk_buff *skb, struct qdisc_size_table *stab)
385{
386 int pkt_len, slot;
387
388 pkt_len = skb->len + stab->szopts.overhead;
389 if (unlikely(!stab->szopts.tsize))
390 goto out;
391
392 slot = pkt_len + stab->szopts.cell_align;
393 if (unlikely(slot < 0))
394 slot = 0;
395
396 slot >>= stab->szopts.cell_log;
397 if (likely(slot < stab->szopts.tsize))
398 pkt_len = stab->data[slot];
399 else
400 pkt_len = stab->data[stab->szopts.tsize - 1] *
401 (slot / stab->szopts.tsize) +
402 stab->data[slot % stab->szopts.tsize];
403
404 pkt_len <<= stab->szopts.size_log;
405out:
406 if (unlikely(pkt_len < 1))
407 pkt_len = 1;
408 qdisc_skb_cb(skb)->pkt_len = pkt_len;
409}
410EXPORT_SYMBOL(qdisc_calculate_pkt_len);
411
Patrick McHardy41794772007-03-16 01:19:15 -0700412static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
413{
414 struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
415 timer);
416
417 wd->qdisc->flags &= ~TCQ_F_THROTTLED;
Stephen Hemminger11274e52007-03-22 12:17:42 -0700418 smp_wmb();
David S. Miller37437bb2008-07-16 02:15:04 -0700419 __netif_schedule(wd->qdisc);
Stephen Hemminger19365022007-03-22 12:18:35 -0700420
Patrick McHardy41794772007-03-16 01:19:15 -0700421 return HRTIMER_NORESTART;
422}
423
424void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
425{
426 hrtimer_init(&wd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
427 wd->timer.function = qdisc_watchdog;
428 wd->qdisc = qdisc;
429}
430EXPORT_SYMBOL(qdisc_watchdog_init);
431
432void qdisc_watchdog_schedule(struct qdisc_watchdog *wd, psched_time_t expires)
433{
434 ktime_t time;
435
436 wd->qdisc->flags |= TCQ_F_THROTTLED;
437 time = ktime_set(0, 0);
438 time = ktime_add_ns(time, PSCHED_US2NS(expires));
439 hrtimer_start(&wd->timer, time, HRTIMER_MODE_ABS);
440}
441EXPORT_SYMBOL(qdisc_watchdog_schedule);
442
443void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
444{
445 hrtimer_cancel(&wd->timer);
446 wd->qdisc->flags &= ~TCQ_F_THROTTLED;
447}
448EXPORT_SYMBOL(qdisc_watchdog_cancel);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700449
Adrian Bunka94f7792008-07-22 14:20:11 -0700450static struct hlist_head *qdisc_class_hash_alloc(unsigned int n)
Patrick McHardy6fe1c7a2008-07-05 23:21:31 -0700451{
452 unsigned int size = n * sizeof(struct hlist_head), i;
453 struct hlist_head *h;
454
455 if (size <= PAGE_SIZE)
456 h = kmalloc(size, GFP_KERNEL);
457 else
458 h = (struct hlist_head *)
459 __get_free_pages(GFP_KERNEL, get_order(size));
460
461 if (h != NULL) {
462 for (i = 0; i < n; i++)
463 INIT_HLIST_HEAD(&h[i]);
464 }
465 return h;
466}
467
468static void qdisc_class_hash_free(struct hlist_head *h, unsigned int n)
469{
470 unsigned int size = n * sizeof(struct hlist_head);
471
472 if (size <= PAGE_SIZE)
473 kfree(h);
474 else
475 free_pages((unsigned long)h, get_order(size));
476}
477
478void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash)
479{
480 struct Qdisc_class_common *cl;
481 struct hlist_node *n, *next;
482 struct hlist_head *nhash, *ohash;
483 unsigned int nsize, nmask, osize;
484 unsigned int i, h;
485
486 /* Rehash when load factor exceeds 0.75 */
487 if (clhash->hashelems * 4 <= clhash->hashsize * 3)
488 return;
489 nsize = clhash->hashsize * 2;
490 nmask = nsize - 1;
491 nhash = qdisc_class_hash_alloc(nsize);
492 if (nhash == NULL)
493 return;
494
495 ohash = clhash->hash;
496 osize = clhash->hashsize;
497
498 sch_tree_lock(sch);
499 for (i = 0; i < osize; i++) {
500 hlist_for_each_entry_safe(cl, n, next, &ohash[i], hnode) {
501 h = qdisc_class_hash(cl->classid, nmask);
502 hlist_add_head(&cl->hnode, &nhash[h]);
503 }
504 }
505 clhash->hash = nhash;
506 clhash->hashsize = nsize;
507 clhash->hashmask = nmask;
508 sch_tree_unlock(sch);
509
510 qdisc_class_hash_free(ohash, osize);
511}
512EXPORT_SYMBOL(qdisc_class_hash_grow);
513
514int qdisc_class_hash_init(struct Qdisc_class_hash *clhash)
515{
516 unsigned int size = 4;
517
518 clhash->hash = qdisc_class_hash_alloc(size);
519 if (clhash->hash == NULL)
520 return -ENOMEM;
521 clhash->hashsize = size;
522 clhash->hashmask = size - 1;
523 clhash->hashelems = 0;
524 return 0;
525}
526EXPORT_SYMBOL(qdisc_class_hash_init);
527
528void qdisc_class_hash_destroy(struct Qdisc_class_hash *clhash)
529{
530 qdisc_class_hash_free(clhash->hash, clhash->hashsize);
531}
532EXPORT_SYMBOL(qdisc_class_hash_destroy);
533
534void qdisc_class_hash_insert(struct Qdisc_class_hash *clhash,
535 struct Qdisc_class_common *cl)
536{
537 unsigned int h;
538
539 INIT_HLIST_NODE(&cl->hnode);
540 h = qdisc_class_hash(cl->classid, clhash->hashmask);
541 hlist_add_head(&cl->hnode, &clhash->hash[h]);
542 clhash->hashelems++;
543}
544EXPORT_SYMBOL(qdisc_class_hash_insert);
545
546void qdisc_class_hash_remove(struct Qdisc_class_hash *clhash,
547 struct Qdisc_class_common *cl)
548{
549 hlist_del(&cl->hnode);
550 clhash->hashelems--;
551}
552EXPORT_SYMBOL(qdisc_class_hash_remove);
553
Linus Torvalds1da177e2005-04-16 15:20:36 -0700554/* Allocate an unique handle from space managed by kernel */
555
556static u32 qdisc_alloc_handle(struct net_device *dev)
557{
558 int i = 0x10000;
559 static u32 autohandle = TC_H_MAKE(0x80000000U, 0);
560
561 do {
562 autohandle += TC_H_MAKE(0x10000U, 0);
563 if (autohandle == TC_H_MAKE(TC_H_ROOT, 0))
564 autohandle = TC_H_MAKE(0x80000000U, 0);
565 } while (qdisc_lookup(dev, autohandle) && --i > 0);
566
567 return i>0 ? autohandle : 0;
568}
569
David S. Miller99194cf2008-07-17 04:54:10 -0700570/* Attach toplevel qdisc to device queue. */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700571
David S. Miller99194cf2008-07-17 04:54:10 -0700572static struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue,
573 struct Qdisc *qdisc)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700574{
David S. Miller8d50b532008-07-30 02:37:46 -0700575 struct Qdisc *oqdisc = dev_queue->qdisc_sleeping;
David S. Miller53049972008-07-16 03:00:19 -0700576 spinlock_t *root_lock;
David S. Miller53049972008-07-16 03:00:19 -0700577
578 root_lock = qdisc_root_lock(oqdisc);
579 spin_lock_bh(root_lock);
580
David S. Miller8d50b532008-07-30 02:37:46 -0700581 /* Prune old scheduler */
582 if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1)
583 qdisc_reset(oqdisc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700584
David S. Miller8d50b532008-07-30 02:37:46 -0700585 /* ... and graft new one */
586 if (qdisc == NULL)
587 qdisc = &noop_qdisc;
588 dev_queue->qdisc_sleeping = qdisc;
589 dev_queue->qdisc = &noop_qdisc;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700590
David S. Miller53049972008-07-16 03:00:19 -0700591 spin_unlock_bh(root_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700592
Linus Torvalds1da177e2005-04-16 15:20:36 -0700593 return oqdisc;
594}
595
Patrick McHardy43effa12006-11-29 17:35:48 -0800596void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
597{
Eric Dumazet20fea082007-11-14 01:44:41 -0800598 const struct Qdisc_class_ops *cops;
Patrick McHardy43effa12006-11-29 17:35:48 -0800599 unsigned long cl;
600 u32 parentid;
601
602 if (n == 0)
603 return;
604 while ((parentid = sch->parent)) {
Jarek Poplawski066a3b52008-04-14 15:10:42 -0700605 if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS))
606 return;
607
David S. Miller5ce2d482008-07-08 17:06:30 -0700608 sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
Patrick McHardyffc8fef2007-07-30 17:11:50 -0700609 if (sch == NULL) {
610 WARN_ON(parentid != TC_H_ROOT);
611 return;
612 }
Patrick McHardy43effa12006-11-29 17:35:48 -0800613 cops = sch->ops->cl_ops;
614 if (cops->qlen_notify) {
615 cl = cops->get(sch, parentid);
616 cops->qlen_notify(sch, cl);
617 cops->put(sch, cl);
618 }
619 sch->q.qlen -= n;
620 }
621}
622EXPORT_SYMBOL(qdisc_tree_decrease_qlen);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700623
David S. Miller99194cf2008-07-17 04:54:10 -0700624static void notify_and_destroy(struct sk_buff *skb, struct nlmsghdr *n, u32 clid,
625 struct Qdisc *old, struct Qdisc *new)
626{
627 if (new || old)
628 qdisc_notify(skb, n, clid, old, new);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700629
David S. Miller99194cf2008-07-17 04:54:10 -0700630 if (old) {
631 spin_lock_bh(&old->q.lock);
632 qdisc_destroy(old);
633 spin_unlock_bh(&old->q.lock);
634 }
635}
636
637/* Graft qdisc "new" to class "classid" of qdisc "parent" or
638 * to device "dev".
639 *
640 * When appropriate send a netlink notification using 'skb'
641 * and "n".
642 *
643 * On success, destroy old qdisc.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700644 */
645
646static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
David S. Miller99194cf2008-07-17 04:54:10 -0700647 struct sk_buff *skb, struct nlmsghdr *n, u32 classid,
648 struct Qdisc *new, struct Qdisc *old)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700649{
David S. Miller99194cf2008-07-17 04:54:10 -0700650 struct Qdisc *q = old;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700651 int err = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700652
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900653 if (parent == NULL) {
David S. Miller99194cf2008-07-17 04:54:10 -0700654 unsigned int i, num_q, ingress;
655
656 ingress = 0;
657 num_q = dev->num_tx_queues;
David S. Miller8d50b532008-07-30 02:37:46 -0700658 if ((q && q->flags & TCQ_F_INGRESS) ||
659 (new && new->flags & TCQ_F_INGRESS)) {
David S. Miller99194cf2008-07-17 04:54:10 -0700660 num_q = 1;
661 ingress = 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700662 }
David S. Miller99194cf2008-07-17 04:54:10 -0700663
664 if (dev->flags & IFF_UP)
665 dev_deactivate(dev);
666
667 for (i = 0; i < num_q; i++) {
668 struct netdev_queue *dev_queue = &dev->rx_queue;
669
670 if (!ingress)
671 dev_queue = netdev_get_tx_queue(dev, i);
672
David S. Miller8d50b532008-07-30 02:37:46 -0700673 old = dev_graft_qdisc(dev_queue, new);
674 if (new && i > 0)
675 atomic_inc(&new->refcnt);
676
David S. Miller99194cf2008-07-17 04:54:10 -0700677 notify_and_destroy(skb, n, classid, old, new);
678 }
679
680 if (dev->flags & IFF_UP)
681 dev_activate(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700682 } else {
Eric Dumazet20fea082007-11-14 01:44:41 -0800683 const struct Qdisc_class_ops *cops = parent->ops->cl_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700684
685 err = -EINVAL;
686
687 if (cops) {
688 unsigned long cl = cops->get(parent, classid);
689 if (cl) {
David S. Miller99194cf2008-07-17 04:54:10 -0700690 err = cops->graft(parent, cl, new, &old);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700691 cops->put(parent, cl);
692 }
693 }
David S. Miller99194cf2008-07-17 04:54:10 -0700694 if (!err)
695 notify_and_destroy(skb, n, classid, old, new);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700696 }
697 return err;
698}
699
700/*
701 Allocate and initialize new qdisc.
702
703 Parameters are passed via opt.
704 */
705
706static struct Qdisc *
David S. Millerbb949fb2008-07-08 16:55:56 -0700707qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
708 u32 parent, u32 handle, struct nlattr **tca, int *errp)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700709{
710 int err;
Patrick McHardy1e904742008-01-22 22:11:17 -0800711 struct nlattr *kind = tca[TCA_KIND];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700712 struct Qdisc *sch;
713 struct Qdisc_ops *ops;
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700714 struct qdisc_size_table *stab;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700715
716 ops = qdisc_lookup_ops(kind);
717#ifdef CONFIG_KMOD
718 if (ops == NULL && kind != NULL) {
719 char name[IFNAMSIZ];
Patrick McHardy1e904742008-01-22 22:11:17 -0800720 if (nla_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700721 /* We dropped the RTNL semaphore in order to
722 * perform the module load. So, even if we
723 * succeeded in loading the module we have to
724 * tell the caller to replay the request. We
725 * indicate this using -EAGAIN.
726 * We replay the request because the device may
727 * go away in the mean time.
728 */
729 rtnl_unlock();
730 request_module("sch_%s", name);
731 rtnl_lock();
732 ops = qdisc_lookup_ops(kind);
733 if (ops != NULL) {
734 /* We will try again qdisc_lookup_ops,
735 * so don't keep a reference.
736 */
737 module_put(ops->owner);
738 err = -EAGAIN;
739 goto err_out;
740 }
741 }
742 }
743#endif
744
Jamal Hadi Salimb9e2cc02006-08-03 16:36:51 -0700745 err = -ENOENT;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700746 if (ops == NULL)
747 goto err_out;
748
David S. Miller5ce2d482008-07-08 17:06:30 -0700749 sch = qdisc_alloc(dev_queue, ops);
Thomas Graf3d54b822005-07-05 14:15:09 -0700750 if (IS_ERR(sch)) {
751 err = PTR_ERR(sch);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700752 goto err_out2;
Thomas Graf3d54b822005-07-05 14:15:09 -0700753 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700754
Patrick McHardyffc8fef2007-07-30 17:11:50 -0700755 sch->parent = parent;
756
Thomas Graf3d54b822005-07-05 14:15:09 -0700757 if (handle == TC_H_INGRESS) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700758 sch->flags |= TCQ_F_INGRESS;
Thomas Graf3d54b822005-07-05 14:15:09 -0700759 handle = TC_H_MAKE(TC_H_INGRESS, 0);
Patrick McHardyfd44de72007-04-16 17:07:08 -0700760 } else {
Patrick McHardyfd44de72007-04-16 17:07:08 -0700761 if (handle == 0) {
762 handle = qdisc_alloc_handle(dev);
763 err = -ENOMEM;
764 if (handle == 0)
765 goto err_out3;
766 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700767 }
768
Thomas Graf3d54b822005-07-05 14:15:09 -0700769 sch->handle = handle;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700770
Patrick McHardy1e904742008-01-22 22:11:17 -0800771 if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS])) == 0) {
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700772 if (tca[TCA_STAB]) {
773 stab = qdisc_get_stab(tca[TCA_STAB]);
774 if (IS_ERR(stab)) {
775 err = PTR_ERR(stab);
776 goto err_out3;
777 }
778 sch->stab = stab;
779 }
Patrick McHardy1e904742008-01-22 22:11:17 -0800780 if (tca[TCA_RATE]) {
Thomas Graf023e09a2005-07-05 14:15:53 -0700781 err = gen_new_estimator(&sch->bstats, &sch->rate_est,
David S. Miller7698b4f2008-07-16 01:42:40 -0700782 qdisc_root_lock(sch),
Patrick McHardy1e904742008-01-22 22:11:17 -0800783 tca[TCA_RATE]);
Thomas Graf023e09a2005-07-05 14:15:53 -0700784 if (err) {
785 /*
786 * Any broken qdiscs that would require
787 * a ops->reset() here? The qdisc was never
788 * in action so it shouldn't be necessary.
789 */
790 if (ops->destroy)
791 ops->destroy(sch);
792 goto err_out3;
793 }
794 }
David S. Milleree7af822008-08-06 23:35:59 -0700795 if ((parent != TC_H_ROOT) && !(sch->flags & TCQ_F_INGRESS))
David S. Miller827ebd62008-08-07 20:26:40 -0700796 list_add_tail(&sch->list, &dev_queue->qdisc_sleeping->list);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700797
Linus Torvalds1da177e2005-04-16 15:20:36 -0700798 return sch;
799 }
800err_out3:
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700801 qdisc_put_stab(sch->stab);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700802 dev_put(dev);
Thomas Graf3d54b822005-07-05 14:15:09 -0700803 kfree((char *) sch - sch->padded);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700804err_out2:
805 module_put(ops->owner);
806err_out:
807 *errp = err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700808 return NULL;
809}
810
Patrick McHardy1e904742008-01-22 22:11:17 -0800811static int qdisc_change(struct Qdisc *sch, struct nlattr **tca)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700812{
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700813 struct qdisc_size_table *stab = NULL;
814 int err = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700815
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700816 if (tca[TCA_OPTIONS]) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700817 if (sch->ops->change == NULL)
818 return -EINVAL;
Patrick McHardy1e904742008-01-22 22:11:17 -0800819 err = sch->ops->change(sch, tca[TCA_OPTIONS]);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700820 if (err)
821 return err;
822 }
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700823
824 if (tca[TCA_STAB]) {
825 stab = qdisc_get_stab(tca[TCA_STAB]);
826 if (IS_ERR(stab))
827 return PTR_ERR(stab);
828 }
829
830 qdisc_put_stab(sch->stab);
831 sch->stab = stab;
832
Patrick McHardy1e904742008-01-22 22:11:17 -0800833 if (tca[TCA_RATE])
Linus Torvalds1da177e2005-04-16 15:20:36 -0700834 gen_replace_estimator(&sch->bstats, &sch->rate_est,
David S. Miller7698b4f2008-07-16 01:42:40 -0700835 qdisc_root_lock(sch), tca[TCA_RATE]);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700836 return 0;
837}
838
839struct check_loop_arg
840{
841 struct qdisc_walker w;
842 struct Qdisc *p;
843 int depth;
844};
845
846static int check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w);
847
848static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth)
849{
850 struct check_loop_arg arg;
851
852 if (q->ops->cl_ops == NULL)
853 return 0;
854
855 arg.w.stop = arg.w.skip = arg.w.count = 0;
856 arg.w.fn = check_loop_fn;
857 arg.depth = depth;
858 arg.p = p;
859 q->ops->cl_ops->walk(q, &arg.w);
860 return arg.w.stop ? -ELOOP : 0;
861}
862
863static int
864check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
865{
866 struct Qdisc *leaf;
Eric Dumazet20fea082007-11-14 01:44:41 -0800867 const struct Qdisc_class_ops *cops = q->ops->cl_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700868 struct check_loop_arg *arg = (struct check_loop_arg *)w;
869
870 leaf = cops->leaf(q, cl);
871 if (leaf) {
872 if (leaf == arg->p || arg->depth > 7)
873 return -ELOOP;
874 return check_loop(leaf, arg->p, arg->depth + 1);
875 }
876 return 0;
877}
878
879/*
880 * Delete/get qdisc.
881 */
882
883static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
884{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +0900885 struct net *net = sock_net(skb->sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700886 struct tcmsg *tcm = NLMSG_DATA(n);
Patrick McHardy1e904742008-01-22 22:11:17 -0800887 struct nlattr *tca[TCA_MAX + 1];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700888 struct net_device *dev;
889 u32 clid = tcm->tcm_parent;
890 struct Qdisc *q = NULL;
891 struct Qdisc *p = NULL;
892 int err;
893
Denis V. Lunevb8542722007-12-01 00:21:31 +1100894 if (net != &init_net)
895 return -EINVAL;
896
Eric W. Biederman881d9662007-09-17 11:56:21 -0700897 if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700898 return -ENODEV;
899
Patrick McHardy1e904742008-01-22 22:11:17 -0800900 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
901 if (err < 0)
902 return err;
903
Linus Torvalds1da177e2005-04-16 15:20:36 -0700904 if (clid) {
905 if (clid != TC_H_ROOT) {
906 if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
907 if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL)
908 return -ENOENT;
909 q = qdisc_leaf(p, clid);
910 } else { /* ingress */
David S. Miller816f3252008-07-08 22:49:00 -0700911 q = dev->rx_queue.qdisc;
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900912 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700913 } else {
David S. Millere8a04642008-07-17 00:34:19 -0700914 struct netdev_queue *dev_queue;
915 dev_queue = netdev_get_tx_queue(dev, 0);
David S. Millerb0e1e642008-07-08 17:42:10 -0700916 q = dev_queue->qdisc_sleeping;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700917 }
918 if (!q)
919 return -ENOENT;
920
921 if (tcm->tcm_handle && q->handle != tcm->tcm_handle)
922 return -EINVAL;
923 } else {
924 if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL)
925 return -ENOENT;
926 }
927
Patrick McHardy1e904742008-01-22 22:11:17 -0800928 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700929 return -EINVAL;
930
931 if (n->nlmsg_type == RTM_DELQDISC) {
932 if (!clid)
933 return -EINVAL;
934 if (q->handle == 0)
935 return -ENOENT;
David S. Miller99194cf2008-07-17 04:54:10 -0700936 if ((err = qdisc_graft(dev, p, skb, n, clid, NULL, q)) != 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700937 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700938 } else {
939 qdisc_notify(skb, n, clid, NULL, q);
940 }
941 return 0;
942}
943
944/*
945 Create/change qdisc.
946 */
947
948static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
949{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +0900950 struct net *net = sock_net(skb->sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700951 struct tcmsg *tcm;
Patrick McHardy1e904742008-01-22 22:11:17 -0800952 struct nlattr *tca[TCA_MAX + 1];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700953 struct net_device *dev;
954 u32 clid;
955 struct Qdisc *q, *p;
956 int err;
957
Denis V. Lunevb8542722007-12-01 00:21:31 +1100958 if (net != &init_net)
959 return -EINVAL;
960
Linus Torvalds1da177e2005-04-16 15:20:36 -0700961replay:
962 /* Reinit, just in case something touches this. */
963 tcm = NLMSG_DATA(n);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700964 clid = tcm->tcm_parent;
965 q = p = NULL;
966
Eric W. Biederman881d9662007-09-17 11:56:21 -0700967 if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700968 return -ENODEV;
969
Patrick McHardy1e904742008-01-22 22:11:17 -0800970 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
971 if (err < 0)
972 return err;
973
Linus Torvalds1da177e2005-04-16 15:20:36 -0700974 if (clid) {
975 if (clid != TC_H_ROOT) {
976 if (clid != TC_H_INGRESS) {
977 if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL)
978 return -ENOENT;
979 q = qdisc_leaf(p, clid);
980 } else { /*ingress */
David S. Miller816f3252008-07-08 22:49:00 -0700981 q = dev->rx_queue.qdisc;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700982 }
983 } else {
David S. Millere8a04642008-07-17 00:34:19 -0700984 struct netdev_queue *dev_queue;
985 dev_queue = netdev_get_tx_queue(dev, 0);
David S. Millerb0e1e642008-07-08 17:42:10 -0700986 q = dev_queue->qdisc_sleeping;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700987 }
988
989 /* It may be default qdisc, ignore it */
990 if (q && q->handle == 0)
991 q = NULL;
992
993 if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
994 if (tcm->tcm_handle) {
995 if (q && !(n->nlmsg_flags&NLM_F_REPLACE))
996 return -EEXIST;
997 if (TC_H_MIN(tcm->tcm_handle))
998 return -EINVAL;
999 if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL)
1000 goto create_n_graft;
1001 if (n->nlmsg_flags&NLM_F_EXCL)
1002 return -EEXIST;
Patrick McHardy1e904742008-01-22 22:11:17 -08001003 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001004 return -EINVAL;
1005 if (q == p ||
1006 (p && check_loop(q, p, 0)))
1007 return -ELOOP;
1008 atomic_inc(&q->refcnt);
1009 goto graft;
1010 } else {
1011 if (q == NULL)
1012 goto create_n_graft;
1013
1014 /* This magic test requires explanation.
1015 *
1016 * We know, that some child q is already
1017 * attached to this parent and have choice:
1018 * either to change it or to create/graft new one.
1019 *
1020 * 1. We are allowed to create/graft only
1021 * if CREATE and REPLACE flags are set.
1022 *
1023 * 2. If EXCL is set, requestor wanted to say,
1024 * that qdisc tcm_handle is not expected
1025 * to exist, so that we choose create/graft too.
1026 *
1027 * 3. The last case is when no flags are set.
1028 * Alas, it is sort of hole in API, we
1029 * cannot decide what to do unambiguously.
1030 * For now we select create/graft, if
1031 * user gave KIND, which does not match existing.
1032 */
1033 if ((n->nlmsg_flags&NLM_F_CREATE) &&
1034 (n->nlmsg_flags&NLM_F_REPLACE) &&
1035 ((n->nlmsg_flags&NLM_F_EXCL) ||
Patrick McHardy1e904742008-01-22 22:11:17 -08001036 (tca[TCA_KIND] &&
1037 nla_strcmp(tca[TCA_KIND], q->ops->id))))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001038 goto create_n_graft;
1039 }
1040 }
1041 } else {
1042 if (!tcm->tcm_handle)
1043 return -EINVAL;
1044 q = qdisc_lookup(dev, tcm->tcm_handle);
1045 }
1046
1047 /* Change qdisc parameters */
1048 if (q == NULL)
1049 return -ENOENT;
1050 if (n->nlmsg_flags&NLM_F_EXCL)
1051 return -EEXIST;
Patrick McHardy1e904742008-01-22 22:11:17 -08001052 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001053 return -EINVAL;
1054 err = qdisc_change(q, tca);
1055 if (err == 0)
1056 qdisc_notify(skb, n, clid, NULL, q);
1057 return err;
1058
1059create_n_graft:
1060 if (!(n->nlmsg_flags&NLM_F_CREATE))
1061 return -ENOENT;
1062 if (clid == TC_H_INGRESS)
David S. Millerbb949fb2008-07-08 16:55:56 -07001063 q = qdisc_create(dev, &dev->rx_queue,
1064 tcm->tcm_parent, tcm->tcm_parent,
Patrick McHardyffc8fef2007-07-30 17:11:50 -07001065 tca, &err);
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09001066 else
David S. Millere8a04642008-07-17 00:34:19 -07001067 q = qdisc_create(dev, netdev_get_tx_queue(dev, 0),
David S. Millerbb949fb2008-07-08 16:55:56 -07001068 tcm->tcm_parent, tcm->tcm_handle,
Patrick McHardyffc8fef2007-07-30 17:11:50 -07001069 tca, &err);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001070 if (q == NULL) {
1071 if (err == -EAGAIN)
1072 goto replay;
1073 return err;
1074 }
1075
1076graft:
1077 if (1) {
David S. Miller53049972008-07-16 03:00:19 -07001078 spinlock_t *root_lock;
1079
David S. Miller99194cf2008-07-17 04:54:10 -07001080 err = qdisc_graft(dev, p, skb, n, clid, q, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001081 if (err) {
1082 if (q) {
David S. Miller53049972008-07-16 03:00:19 -07001083 root_lock = qdisc_root_lock(q);
1084 spin_lock_bh(root_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001085 qdisc_destroy(q);
David S. Miller53049972008-07-16 03:00:19 -07001086 spin_unlock_bh(root_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001087 }
1088 return err;
1089 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001090 }
1091 return 0;
1092}
1093
1094static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
Jamal Hadi Salime431b8c2005-06-18 22:55:31 -07001095 u32 pid, u32 seq, u16 flags, int event)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001096{
1097 struct tcmsg *tcm;
1098 struct nlmsghdr *nlh;
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001099 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001100 struct gnet_dump d;
1101
Jamal Hadi Salime431b8c2005-06-18 22:55:31 -07001102 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001103 tcm = NLMSG_DATA(nlh);
1104 tcm->tcm_family = AF_UNSPEC;
Patrick McHardy9ef1d4c2005-06-28 12:55:30 -07001105 tcm->tcm__pad1 = 0;
1106 tcm->tcm__pad2 = 0;
David S. Miller5ce2d482008-07-08 17:06:30 -07001107 tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001108 tcm->tcm_parent = clid;
1109 tcm->tcm_handle = q->handle;
1110 tcm->tcm_info = atomic_read(&q->refcnt);
Patrick McHardy57e1c482008-01-23 20:34:28 -08001111 NLA_PUT_STRING(skb, TCA_KIND, q->ops->id);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001112 if (q->ops->dump && q->ops->dump(q, skb) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001113 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001114 q->qstats.qlen = q->q.qlen;
1115
Jussi Kivilinna175f9c12008-07-20 00:08:47 -07001116 if (q->stab && qdisc_dump_stab(skb, q->stab) < 0)
1117 goto nla_put_failure;
1118
Linus Torvalds1da177e2005-04-16 15:20:36 -07001119 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS,
David S. Miller7698b4f2008-07-16 01:42:40 -07001120 TCA_XSTATS, qdisc_root_lock(q), &d) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001121 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001122
1123 if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001124 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001125
1126 if (gnet_stats_copy_basic(&d, &q->bstats) < 0 ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07001127 gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07001128 gnet_stats_copy_queue(&d, &q->qstats) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001129 goto nla_put_failure;
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09001130
Linus Torvalds1da177e2005-04-16 15:20:36 -07001131 if (gnet_stats_finish_copy(&d) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001132 goto nla_put_failure;
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09001133
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001134 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001135 return skb->len;
1136
1137nlmsg_failure:
Patrick McHardy1e904742008-01-22 22:11:17 -08001138nla_put_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001139 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001140 return -1;
1141}
1142
1143static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n,
1144 u32 clid, struct Qdisc *old, struct Qdisc *new)
1145{
1146 struct sk_buff *skb;
1147 u32 pid = oskb ? NETLINK_CB(oskb).pid : 0;
1148
1149 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1150 if (!skb)
1151 return -ENOBUFS;
1152
1153 if (old && old->handle) {
1154 if (tc_fill_qdisc(skb, old, clid, pid, n->nlmsg_seq, 0, RTM_DELQDISC) < 0)
1155 goto err_out;
1156 }
1157 if (new) {
1158 if (tc_fill_qdisc(skb, new, clid, pid, n->nlmsg_seq, old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
1159 goto err_out;
1160 }
1161
1162 if (skb->len)
Denis V. Lunev97c53ca2007-11-19 22:26:51 -08001163 return rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001164
1165err_out:
1166 kfree_skb(skb);
1167 return -EINVAL;
1168}
1169
David S. Miller30723672008-07-18 22:50:15 -07001170static bool tc_qdisc_dump_ignore(struct Qdisc *q)
1171{
1172 return (q->flags & TCQ_F_BUILTIN) ? true : false;
1173}
1174
1175static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
1176 struct netlink_callback *cb,
1177 int *q_idx_p, int s_q_idx)
1178{
1179 int ret = 0, q_idx = *q_idx_p;
1180 struct Qdisc *q;
1181
1182 if (!root)
1183 return 0;
1184
1185 q = root;
1186 if (q_idx < s_q_idx) {
1187 q_idx++;
1188 } else {
1189 if (!tc_qdisc_dump_ignore(q) &&
1190 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid,
1191 cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
1192 goto done;
1193 q_idx++;
1194 }
1195 list_for_each_entry(q, &root->list, list) {
1196 if (q_idx < s_q_idx) {
1197 q_idx++;
1198 continue;
1199 }
1200 if (!tc_qdisc_dump_ignore(q) &&
1201 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid,
1202 cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
1203 goto done;
1204 q_idx++;
1205 }
1206
1207out:
1208 *q_idx_p = q_idx;
1209 return ret;
1210done:
1211 ret = -1;
1212 goto out;
1213}
1214
Linus Torvalds1da177e2005-04-16 15:20:36 -07001215static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
1216{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001217 struct net *net = sock_net(skb->sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001218 int idx, q_idx;
1219 int s_idx, s_q_idx;
1220 struct net_device *dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001221
Denis V. Lunevb8542722007-12-01 00:21:31 +11001222 if (net != &init_net)
1223 return 0;
1224
Linus Torvalds1da177e2005-04-16 15:20:36 -07001225 s_idx = cb->args[0];
1226 s_q_idx = q_idx = cb->args[1];
1227 read_lock(&dev_base_lock);
Pavel Emelianov7562f872007-05-03 15:13:45 -07001228 idx = 0;
Eric W. Biederman881d9662007-09-17 11:56:21 -07001229 for_each_netdev(&init_net, dev) {
David S. Miller30723672008-07-18 22:50:15 -07001230 struct netdev_queue *dev_queue;
1231
Linus Torvalds1da177e2005-04-16 15:20:36 -07001232 if (idx < s_idx)
Pavel Emelianov7562f872007-05-03 15:13:45 -07001233 goto cont;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001234 if (idx > s_idx)
1235 s_q_idx = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001236 q_idx = 0;
David S. Miller30723672008-07-18 22:50:15 -07001237
1238 dev_queue = netdev_get_tx_queue(dev, 0);
David S. Miller827ebd62008-08-07 20:26:40 -07001239 if (tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb, &q_idx, s_q_idx) < 0)
David S. Miller30723672008-07-18 22:50:15 -07001240 goto done;
1241
1242 dev_queue = &dev->rx_queue;
David S. Miller827ebd62008-08-07 20:26:40 -07001243 if (tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb, &q_idx, s_q_idx) < 0)
David S. Miller30723672008-07-18 22:50:15 -07001244 goto done;
1245
Pavel Emelianov7562f872007-05-03 15:13:45 -07001246cont:
1247 idx++;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001248 }
1249
1250done:
1251 read_unlock(&dev_base_lock);
1252
1253 cb->args[0] = idx;
1254 cb->args[1] = q_idx;
1255
1256 return skb->len;
1257}
1258
1259
1260
1261/************************************************
1262 * Traffic classes manipulation. *
1263 ************************************************/
1264
1265
1266
1267static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
1268{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001269 struct net *net = sock_net(skb->sk);
David S. Millerb0e1e642008-07-08 17:42:10 -07001270 struct netdev_queue *dev_queue;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001271 struct tcmsg *tcm = NLMSG_DATA(n);
Patrick McHardy1e904742008-01-22 22:11:17 -08001272 struct nlattr *tca[TCA_MAX + 1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001273 struct net_device *dev;
1274 struct Qdisc *q = NULL;
Eric Dumazet20fea082007-11-14 01:44:41 -08001275 const struct Qdisc_class_ops *cops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001276 unsigned long cl = 0;
1277 unsigned long new_cl;
1278 u32 pid = tcm->tcm_parent;
1279 u32 clid = tcm->tcm_handle;
1280 u32 qid = TC_H_MAJ(clid);
1281 int err;
1282
Denis V. Lunevb8542722007-12-01 00:21:31 +11001283 if (net != &init_net)
1284 return -EINVAL;
1285
Eric W. Biederman881d9662007-09-17 11:56:21 -07001286 if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001287 return -ENODEV;
1288
Patrick McHardy1e904742008-01-22 22:11:17 -08001289 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
1290 if (err < 0)
1291 return err;
1292
Linus Torvalds1da177e2005-04-16 15:20:36 -07001293 /*
1294 parent == TC_H_UNSPEC - unspecified parent.
1295 parent == TC_H_ROOT - class is root, which has no parent.
1296 parent == X:0 - parent is root class.
1297 parent == X:Y - parent is a node in hierarchy.
1298 parent == 0:Y - parent is X:Y, where X:0 is qdisc.
1299
1300 handle == 0:0 - generate handle from kernel pool.
1301 handle == 0:Y - class is X:Y, where X:0 is qdisc.
1302 handle == X:Y - clear.
1303 handle == X:0 - root class.
1304 */
1305
1306 /* Step 1. Determine qdisc handle X:0 */
1307
David S. Millere8a04642008-07-17 00:34:19 -07001308 dev_queue = netdev_get_tx_queue(dev, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001309 if (pid != TC_H_ROOT) {
1310 u32 qid1 = TC_H_MAJ(pid);
1311
1312 if (qid && qid1) {
1313 /* If both majors are known, they must be identical. */
1314 if (qid != qid1)
1315 return -EINVAL;
1316 } else if (qid1) {
1317 qid = qid1;
1318 } else if (qid == 0)
David S. Millerb0e1e642008-07-08 17:42:10 -07001319 qid = dev_queue->qdisc_sleeping->handle;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001320
1321 /* Now qid is genuine qdisc handle consistent
1322 both with parent and child.
1323
1324 TC_H_MAJ(pid) still may be unspecified, complete it now.
1325 */
1326 if (pid)
1327 pid = TC_H_MAKE(qid, pid);
1328 } else {
1329 if (qid == 0)
David S. Millerb0e1e642008-07-08 17:42:10 -07001330 qid = dev_queue->qdisc_sleeping->handle;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001331 }
1332
1333 /* OK. Locate qdisc */
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09001334 if ((q = qdisc_lookup(dev, qid)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001335 return -ENOENT;
1336
1337 /* An check that it supports classes */
1338 cops = q->ops->cl_ops;
1339 if (cops == NULL)
1340 return -EINVAL;
1341
1342 /* Now try to get class */
1343 if (clid == 0) {
1344 if (pid == TC_H_ROOT)
1345 clid = qid;
1346 } else
1347 clid = TC_H_MAKE(qid, clid);
1348
1349 if (clid)
1350 cl = cops->get(q, clid);
1351
1352 if (cl == 0) {
1353 err = -ENOENT;
1354 if (n->nlmsg_type != RTM_NEWTCLASS || !(n->nlmsg_flags&NLM_F_CREATE))
1355 goto out;
1356 } else {
1357 switch (n->nlmsg_type) {
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09001358 case RTM_NEWTCLASS:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001359 err = -EEXIST;
1360 if (n->nlmsg_flags&NLM_F_EXCL)
1361 goto out;
1362 break;
1363 case RTM_DELTCLASS:
1364 err = cops->delete(q, cl);
1365 if (err == 0)
1366 tclass_notify(skb, n, q, cl, RTM_DELTCLASS);
1367 goto out;
1368 case RTM_GETTCLASS:
1369 err = tclass_notify(skb, n, q, cl, RTM_NEWTCLASS);
1370 goto out;
1371 default:
1372 err = -EINVAL;
1373 goto out;
1374 }
1375 }
1376
1377 new_cl = cl;
1378 err = cops->change(q, clid, pid, tca, &new_cl);
1379 if (err == 0)
1380 tclass_notify(skb, n, q, new_cl, RTM_NEWTCLASS);
1381
1382out:
1383 if (cl)
1384 cops->put(q, cl);
1385
1386 return err;
1387}
1388
1389
1390static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
1391 unsigned long cl,
Jamal Hadi Salime431b8c2005-06-18 22:55:31 -07001392 u32 pid, u32 seq, u16 flags, int event)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001393{
1394 struct tcmsg *tcm;
1395 struct nlmsghdr *nlh;
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001396 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001397 struct gnet_dump d;
Eric Dumazet20fea082007-11-14 01:44:41 -08001398 const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001399
Jamal Hadi Salime431b8c2005-06-18 22:55:31 -07001400 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001401 tcm = NLMSG_DATA(nlh);
1402 tcm->tcm_family = AF_UNSPEC;
David S. Miller5ce2d482008-07-08 17:06:30 -07001403 tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001404 tcm->tcm_parent = q->handle;
1405 tcm->tcm_handle = q->handle;
1406 tcm->tcm_info = 0;
Patrick McHardy57e1c482008-01-23 20:34:28 -08001407 NLA_PUT_STRING(skb, TCA_KIND, q->ops->id);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001408 if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001409 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001410
1411 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS,
David S. Miller7698b4f2008-07-16 01:42:40 -07001412 TCA_XSTATS, qdisc_root_lock(q), &d) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001413 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001414
1415 if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001416 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001417
1418 if (gnet_stats_finish_copy(&d) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001419 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001420
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001421 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001422 return skb->len;
1423
1424nlmsg_failure:
Patrick McHardy1e904742008-01-22 22:11:17 -08001425nla_put_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001426 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001427 return -1;
1428}
1429
1430static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
1431 struct Qdisc *q, unsigned long cl, int event)
1432{
1433 struct sk_buff *skb;
1434 u32 pid = oskb ? NETLINK_CB(oskb).pid : 0;
1435
1436 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1437 if (!skb)
1438 return -ENOBUFS;
1439
1440 if (tc_fill_tclass(skb, q, cl, pid, n->nlmsg_seq, 0, event) < 0) {
1441 kfree_skb(skb);
1442 return -EINVAL;
1443 }
1444
Denis V. Lunev97c53ca2007-11-19 22:26:51 -08001445 return rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001446}
1447
1448struct qdisc_dump_args
1449{
1450 struct qdisc_walker w;
1451 struct sk_buff *skb;
1452 struct netlink_callback *cb;
1453};
1454
1455static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walker *arg)
1456{
1457 struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg;
1458
1459 return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).pid,
1460 a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTCLASS);
1461}
1462
David S. Miller30723672008-07-18 22:50:15 -07001463static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb,
1464 struct tcmsg *tcm, struct netlink_callback *cb,
1465 int *t_p, int s_t)
1466{
1467 struct qdisc_dump_args arg;
1468
1469 if (tc_qdisc_dump_ignore(q) ||
1470 *t_p < s_t || !q->ops->cl_ops ||
1471 (tcm->tcm_parent &&
1472 TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
1473 (*t_p)++;
1474 return 0;
1475 }
1476 if (*t_p > s_t)
1477 memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
1478 arg.w.fn = qdisc_class_dump;
1479 arg.skb = skb;
1480 arg.cb = cb;
1481 arg.w.stop = 0;
1482 arg.w.skip = cb->args[1];
1483 arg.w.count = 0;
1484 q->ops->cl_ops->walk(q, &arg.w);
1485 cb->args[1] = arg.w.count;
1486 if (arg.w.stop)
1487 return -1;
1488 (*t_p)++;
1489 return 0;
1490}
1491
1492static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
1493 struct tcmsg *tcm, struct netlink_callback *cb,
1494 int *t_p, int s_t)
1495{
1496 struct Qdisc *q;
1497
1498 if (!root)
1499 return 0;
1500
1501 if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0)
1502 return -1;
1503
1504 list_for_each_entry(q, &root->list, list) {
1505 if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
1506 return -1;
1507 }
1508
1509 return 0;
1510}
1511
Linus Torvalds1da177e2005-04-16 15:20:36 -07001512static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
1513{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001514 struct tcmsg *tcm = (struct tcmsg*)NLMSG_DATA(cb->nlh);
David S. Miller30723672008-07-18 22:50:15 -07001515 struct net *net = sock_net(skb->sk);
1516 struct netdev_queue *dev_queue;
1517 struct net_device *dev;
1518 int t, s_t;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001519
Denis V. Lunevb8542722007-12-01 00:21:31 +11001520 if (net != &init_net)
1521 return 0;
1522
Linus Torvalds1da177e2005-04-16 15:20:36 -07001523 if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm)))
1524 return 0;
Eric W. Biederman881d9662007-09-17 11:56:21 -07001525 if ((dev = dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001526 return 0;
1527
1528 s_t = cb->args[0];
1529 t = 0;
1530
David S. Miller30723672008-07-18 22:50:15 -07001531 dev_queue = netdev_get_tx_queue(dev, 0);
1532 if (tc_dump_tclass_root(dev_queue->qdisc, skb, tcm, cb, &t, s_t) < 0)
1533 goto done;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001534
David S. Miller30723672008-07-18 22:50:15 -07001535 dev_queue = &dev->rx_queue;
1536 if (tc_dump_tclass_root(dev_queue->qdisc, skb, tcm, cb, &t, s_t) < 0)
1537 goto done;
1538
1539done:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001540 cb->args[0] = t;
1541
1542 dev_put(dev);
1543 return skb->len;
1544}
1545
1546/* Main classifier routine: scans classifier chain attached
1547 to this qdisc, (optionally) tests for protocol and asks
1548 specific classifiers.
1549 */
Patrick McHardy73ca4912007-07-15 00:02:31 -07001550int tc_classify_compat(struct sk_buff *skb, struct tcf_proto *tp,
1551 struct tcf_result *res)
1552{
1553 __be16 protocol = skb->protocol;
1554 int err = 0;
1555
1556 for (; tp; tp = tp->next) {
1557 if ((tp->protocol == protocol ||
1558 tp->protocol == htons(ETH_P_ALL)) &&
1559 (err = tp->classify(skb, tp, res)) >= 0) {
1560#ifdef CONFIG_NET_CLS_ACT
1561 if (err != TC_ACT_RECLASSIFY && skb->tc_verd)
1562 skb->tc_verd = SET_TC_VERD(skb->tc_verd, 0);
1563#endif
1564 return err;
1565 }
1566 }
1567 return -1;
1568}
1569EXPORT_SYMBOL(tc_classify_compat);
1570
Linus Torvalds1da177e2005-04-16 15:20:36 -07001571int tc_classify(struct sk_buff *skb, struct tcf_proto *tp,
Patrick McHardy73ca4912007-07-15 00:02:31 -07001572 struct tcf_result *res)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001573{
1574 int err = 0;
Patrick McHardy73ca4912007-07-15 00:02:31 -07001575 __be16 protocol;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001576#ifdef CONFIG_NET_CLS_ACT
1577 struct tcf_proto *otp = tp;
1578reclassify:
1579#endif
1580 protocol = skb->protocol;
1581
Patrick McHardy73ca4912007-07-15 00:02:31 -07001582 err = tc_classify_compat(skb, tp, res);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001583#ifdef CONFIG_NET_CLS_ACT
Patrick McHardy73ca4912007-07-15 00:02:31 -07001584 if (err == TC_ACT_RECLASSIFY) {
1585 u32 verd = G_TC_VERD(skb->tc_verd);
1586 tp = otp;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001587
Patrick McHardy73ca4912007-07-15 00:02:31 -07001588 if (verd++ >= MAX_REC_LOOP) {
1589 printk("rule prio %u protocol %02x reclassify loop, "
1590 "packet dropped\n",
1591 tp->prio&0xffff, ntohs(tp->protocol));
1592 return TC_ACT_SHOT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001593 }
Patrick McHardy73ca4912007-07-15 00:02:31 -07001594 skb->tc_verd = SET_TC_VERD(skb->tc_verd, verd);
1595 goto reclassify;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001596 }
Patrick McHardy73ca4912007-07-15 00:02:31 -07001597#endif
1598 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001599}
Patrick McHardy73ca4912007-07-15 00:02:31 -07001600EXPORT_SYMBOL(tc_classify);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001601
Patrick McHardya48b5a62007-03-23 11:29:43 -07001602void tcf_destroy(struct tcf_proto *tp)
1603{
1604 tp->ops->destroy(tp);
1605 module_put(tp->ops->owner);
1606 kfree(tp);
1607}
1608
Patrick McHardyff31ab52008-07-01 19:52:38 -07001609void tcf_destroy_chain(struct tcf_proto **fl)
Patrick McHardya48b5a62007-03-23 11:29:43 -07001610{
1611 struct tcf_proto *tp;
1612
Patrick McHardyff31ab52008-07-01 19:52:38 -07001613 while ((tp = *fl) != NULL) {
1614 *fl = tp->next;
Patrick McHardya48b5a62007-03-23 11:29:43 -07001615 tcf_destroy(tp);
1616 }
1617}
1618EXPORT_SYMBOL(tcf_destroy_chain);
1619
Linus Torvalds1da177e2005-04-16 15:20:36 -07001620#ifdef CONFIG_PROC_FS
1621static int psched_show(struct seq_file *seq, void *v)
1622{
Patrick McHardy3c0cfc12007-10-10 16:32:41 -07001623 struct timespec ts;
1624
1625 hrtimer_get_res(CLOCK_MONOTONIC, &ts);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001626 seq_printf(seq, "%08x %08x %08x %08x\n",
Patrick McHardy641b9e02007-03-16 01:18:42 -07001627 (u32)NSEC_PER_USEC, (u32)PSCHED_US2NS(1),
Patrick McHardy514bca32007-03-16 12:34:52 -07001628 1000000,
Patrick McHardy3c0cfc12007-10-10 16:32:41 -07001629 (u32)NSEC_PER_SEC/(u32)ktime_to_ns(timespec_to_ktime(ts)));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001630
1631 return 0;
1632}
1633
1634static int psched_open(struct inode *inode, struct file *file)
1635{
1636 return single_open(file, psched_show, PDE(inode)->data);
1637}
1638
Arjan van de Venda7071d2007-02-12 00:55:36 -08001639static const struct file_operations psched_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001640 .owner = THIS_MODULE,
1641 .open = psched_open,
1642 .read = seq_read,
1643 .llseek = seq_lseek,
1644 .release = single_release,
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09001645};
Linus Torvalds1da177e2005-04-16 15:20:36 -07001646#endif
1647
Linus Torvalds1da177e2005-04-16 15:20:36 -07001648static int __init pktsched_init(void)
1649{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001650 register_qdisc(&pfifo_qdisc_ops);
1651 register_qdisc(&bfifo_qdisc_ops);
Eric W. Biederman457c4cb2007-09-12 12:01:34 +02001652 proc_net_fops_create(&init_net, "psched", 0, &psched_fops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001653
Thomas Grafbe577dd2007-03-22 11:55:50 -07001654 rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL);
1655 rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL);
1656 rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc);
1657 rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL);
1658 rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL);
1659 rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass);
1660
Linus Torvalds1da177e2005-04-16 15:20:36 -07001661 return 0;
1662}
1663
1664subsys_initcall(pktsched_init);