blob: c25465e5607aeb32cf39fb8c784514a64ebedcbf [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * net/sched/sch_api.c Packet scheduler API.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 *
11 * Fixes:
12 *
13 * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired.
14 * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
15 * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support
16 */
17
Linus Torvalds1da177e2005-04-16 15:20:36 -070018#include <linux/module.h>
19#include <linux/types.h>
20#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070021#include <linux/string.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070022#include <linux/errno.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070023#include <linux/skbuff.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070024#include <linux/init.h>
25#include <linux/proc_fs.h>
26#include <linux/seq_file.h>
27#include <linux/kmod.h>
28#include <linux/list.h>
Patrick McHardy41794772007-03-16 01:19:15 -070029#include <linux/hrtimer.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070030
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020031#include <net/net_namespace.h>
Denis V. Lunevb8542722007-12-01 00:21:31 +110032#include <net/sock.h>
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -070033#include <net/netlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070034#include <net/pkt_sched.h>
35
Linus Torvalds1da177e2005-04-16 15:20:36 -070036static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n, u32 clid,
37 struct Qdisc *old, struct Qdisc *new);
38static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
39 struct Qdisc *q, unsigned long cl, int event);
40
41/*
42
43 Short review.
44 -------------
45
46 This file consists of two interrelated parts:
47
48 1. queueing disciplines manager frontend.
49 2. traffic classes manager frontend.
50
51 Generally, queueing discipline ("qdisc") is a black box,
52 which is able to enqueue packets and to dequeue them (when
53 device is ready to send something) in order and at times
54 determined by algorithm hidden in it.
55
56 qdisc's are divided to two categories:
57 - "queues", which have no internal structure visible from outside.
58 - "schedulers", which split all the packets to "traffic classes",
59 using "packet classifiers" (look at cls_api.c)
60
61 In turn, classes may have child qdiscs (as rule, queues)
62 attached to them etc. etc. etc.
63
64 The goal of the routines in this file is to translate
65 information supplied by user in the form of handles
66 to more intelligible for kernel form, to make some sanity
67 checks and part of work, which is common to all qdiscs
68 and to provide rtnetlink notifications.
69
70 All real intelligent work is done inside qdisc modules.
71
72
73
74 Every discipline has two major routines: enqueue and dequeue.
75
76 ---dequeue
77
78 dequeue usually returns a skb to send. It is allowed to return NULL,
79 but it does not mean that queue is empty, it just means that
80 discipline does not want to send anything this time.
81 Queue is really empty if q->q.qlen == 0.
82 For complicated disciplines with multiple queues q->q is not
83 real packet queue, but however q->q.qlen must be valid.
84
85 ---enqueue
86
87 enqueue returns 0, if packet was enqueued successfully.
88 If packet (this one or another one) was dropped, it returns
89 not zero error code.
90 NET_XMIT_DROP - this packet dropped
91 Expected action: do not backoff, but wait until queue will clear.
92 NET_XMIT_CN - probably this packet enqueued, but another one dropped.
93 Expected action: backoff or ignore
94 NET_XMIT_POLICED - dropped by police.
95 Expected action: backoff or error to real-time apps.
96
97 Auxiliary routines:
98
99 ---requeue
100
101 requeues once dequeued packet. It is used for non-standard or
David S. Millere65d22e2008-07-08 16:46:01 -0700102 just buggy devices, which can defer output even if netif_queue_stopped()=0.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700103
104 ---reset
105
106 returns qdisc to initial state: purge all buffers, clear all
107 timers, counters (except for statistics) etc.
108
109 ---init
110
111 initializes newly created qdisc.
112
113 ---destroy
114
115 destroys resources allocated by init and during lifetime of qdisc.
116
117 ---change
118
119 changes qdisc parameters.
120 */
121
122/* Protects list of registered TC modules. It is pure SMP lock. */
123static DEFINE_RWLOCK(qdisc_mod_lock);
124
125
126/************************************************
127 * Queueing disciplines manipulation. *
128 ************************************************/
129
130
131/* The list of all installed queueing disciplines. */
132
133static struct Qdisc_ops *qdisc_base;
134
135/* Register/uregister queueing discipline */
136
137int register_qdisc(struct Qdisc_ops *qops)
138{
139 struct Qdisc_ops *q, **qp;
140 int rc = -EEXIST;
141
142 write_lock(&qdisc_mod_lock);
143 for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
144 if (!strcmp(qops->id, q->id))
145 goto out;
146
147 if (qops->enqueue == NULL)
148 qops->enqueue = noop_qdisc_ops.enqueue;
149 if (qops->requeue == NULL)
150 qops->requeue = noop_qdisc_ops.requeue;
151 if (qops->dequeue == NULL)
152 qops->dequeue = noop_qdisc_ops.dequeue;
153
154 qops->next = NULL;
155 *qp = qops;
156 rc = 0;
157out:
158 write_unlock(&qdisc_mod_lock);
159 return rc;
160}
Patrick McHardy62e3ba12008-01-22 22:10:23 -0800161EXPORT_SYMBOL(register_qdisc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700162
163int unregister_qdisc(struct Qdisc_ops *qops)
164{
165 struct Qdisc_ops *q, **qp;
166 int err = -ENOENT;
167
168 write_lock(&qdisc_mod_lock);
169 for (qp = &qdisc_base; (q=*qp)!=NULL; qp = &q->next)
170 if (q == qops)
171 break;
172 if (q) {
173 *qp = q->next;
174 q->next = NULL;
175 err = 0;
176 }
177 write_unlock(&qdisc_mod_lock);
178 return err;
179}
Patrick McHardy62e3ba12008-01-22 22:10:23 -0800180EXPORT_SYMBOL(unregister_qdisc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700181
182/* We know handle. Find qdisc among all qdisc's attached to device
183 (root qdisc, all its children, children of children etc.)
184 */
185
David S. Miller8123b422008-08-08 23:23:39 -0700186struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
187{
188 struct Qdisc *q;
189
190 if (!(root->flags & TCQ_F_BUILTIN) &&
191 root->handle == handle)
192 return root;
193
194 list_for_each_entry(q, &root->list, list) {
195 if (q->handle == handle)
196 return q;
197 }
198 return NULL;
199}
200
David S. Milleread81cc2008-07-17 00:50:32 -0700201struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
Patrick McHardy43effa12006-11-29 17:35:48 -0800202{
David S. Miller30723672008-07-18 22:50:15 -0700203 unsigned int i;
Patrick McHardy43effa12006-11-29 17:35:48 -0800204
David S. Miller30723672008-07-18 22:50:15 -0700205 for (i = 0; i < dev->num_tx_queues; i++) {
206 struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
David S. Miller827ebd62008-08-07 20:26:40 -0700207 struct Qdisc *q, *txq_root = txq->qdisc_sleeping;
David S. Miller30723672008-07-18 22:50:15 -0700208
David S. Miller8123b422008-08-08 23:23:39 -0700209 q = qdisc_match_from_root(txq_root, handle);
210 if (q)
211 return q;
Patrick McHardy43effa12006-11-29 17:35:48 -0800212 }
David S. Miller8123b422008-08-08 23:23:39 -0700213 return qdisc_match_from_root(dev->rx_queue.qdisc_sleeping, handle);
Patrick McHardy43effa12006-11-29 17:35:48 -0800214}
215
Linus Torvalds1da177e2005-04-16 15:20:36 -0700216static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
217{
218 unsigned long cl;
219 struct Qdisc *leaf;
Eric Dumazet20fea082007-11-14 01:44:41 -0800220 const struct Qdisc_class_ops *cops = p->ops->cl_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700221
222 if (cops == NULL)
223 return NULL;
224 cl = cops->get(p, classid);
225
226 if (cl == 0)
227 return NULL;
228 leaf = cops->leaf(p, cl);
229 cops->put(p, cl);
230 return leaf;
231}
232
233/* Find queueing discipline by name */
234
Patrick McHardy1e904742008-01-22 22:11:17 -0800235static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700236{
237 struct Qdisc_ops *q = NULL;
238
239 if (kind) {
240 read_lock(&qdisc_mod_lock);
241 for (q = qdisc_base; q; q = q->next) {
Patrick McHardy1e904742008-01-22 22:11:17 -0800242 if (nla_strcmp(kind, q->id) == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700243 if (!try_module_get(q->owner))
244 q = NULL;
245 break;
246 }
247 }
248 read_unlock(&qdisc_mod_lock);
249 }
250 return q;
251}
252
253static struct qdisc_rate_table *qdisc_rtab_list;
254
Patrick McHardy1e904742008-01-22 22:11:17 -0800255struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct nlattr *tab)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700256{
257 struct qdisc_rate_table *rtab;
258
259 for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) {
260 if (memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) == 0) {
261 rtab->refcnt++;
262 return rtab;
263 }
264 }
265
Patrick McHardy5feb5e12008-01-23 20:35:19 -0800266 if (tab == NULL || r->rate == 0 || r->cell_log == 0 ||
267 nla_len(tab) != TC_RTAB_SIZE)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700268 return NULL;
269
270 rtab = kmalloc(sizeof(*rtab), GFP_KERNEL);
271 if (rtab) {
272 rtab->rate = *r;
273 rtab->refcnt = 1;
Patrick McHardy1e904742008-01-22 22:11:17 -0800274 memcpy(rtab->data, nla_data(tab), 1024);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700275 rtab->next = qdisc_rtab_list;
276 qdisc_rtab_list = rtab;
277 }
278 return rtab;
279}
Patrick McHardy62e3ba12008-01-22 22:10:23 -0800280EXPORT_SYMBOL(qdisc_get_rtab);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700281
282void qdisc_put_rtab(struct qdisc_rate_table *tab)
283{
284 struct qdisc_rate_table *rtab, **rtabp;
285
286 if (!tab || --tab->refcnt)
287 return;
288
289 for (rtabp = &qdisc_rtab_list; (rtab=*rtabp) != NULL; rtabp = &rtab->next) {
290 if (rtab == tab) {
291 *rtabp = rtab->next;
292 kfree(rtab);
293 return;
294 }
295 }
296}
Patrick McHardy62e3ba12008-01-22 22:10:23 -0800297EXPORT_SYMBOL(qdisc_put_rtab);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700298
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700299static LIST_HEAD(qdisc_stab_list);
300static DEFINE_SPINLOCK(qdisc_stab_lock);
301
302static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = {
303 [TCA_STAB_BASE] = { .len = sizeof(struct tc_sizespec) },
304 [TCA_STAB_DATA] = { .type = NLA_BINARY },
305};
306
307static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt)
308{
309 struct nlattr *tb[TCA_STAB_MAX + 1];
310 struct qdisc_size_table *stab;
311 struct tc_sizespec *s;
312 unsigned int tsize = 0;
313 u16 *tab = NULL;
314 int err;
315
316 err = nla_parse_nested(tb, TCA_STAB_MAX, opt, stab_policy);
317 if (err < 0)
318 return ERR_PTR(err);
319 if (!tb[TCA_STAB_BASE])
320 return ERR_PTR(-EINVAL);
321
322 s = nla_data(tb[TCA_STAB_BASE]);
323
324 if (s->tsize > 0) {
325 if (!tb[TCA_STAB_DATA])
326 return ERR_PTR(-EINVAL);
327 tab = nla_data(tb[TCA_STAB_DATA]);
328 tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16);
329 }
330
331 if (!s || tsize != s->tsize || (!tab && tsize > 0))
332 return ERR_PTR(-EINVAL);
333
Jarek Poplawski1cfa2662008-08-11 18:11:06 -0700334 spin_lock_bh(&qdisc_stab_lock);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700335
336 list_for_each_entry(stab, &qdisc_stab_list, list) {
337 if (memcmp(&stab->szopts, s, sizeof(*s)))
338 continue;
339 if (tsize > 0 && memcmp(stab->data, tab, tsize * sizeof(u16)))
340 continue;
341 stab->refcnt++;
Jarek Poplawski1cfa2662008-08-11 18:11:06 -0700342 spin_unlock_bh(&qdisc_stab_lock);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700343 return stab;
344 }
345
Jarek Poplawski1cfa2662008-08-11 18:11:06 -0700346 spin_unlock_bh(&qdisc_stab_lock);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700347
348 stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL);
349 if (!stab)
350 return ERR_PTR(-ENOMEM);
351
352 stab->refcnt = 1;
353 stab->szopts = *s;
354 if (tsize > 0)
355 memcpy(stab->data, tab, tsize * sizeof(u16));
356
Jarek Poplawski1cfa2662008-08-11 18:11:06 -0700357 spin_lock_bh(&qdisc_stab_lock);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700358 list_add_tail(&stab->list, &qdisc_stab_list);
Jarek Poplawski1cfa2662008-08-11 18:11:06 -0700359 spin_unlock_bh(&qdisc_stab_lock);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700360
361 return stab;
362}
363
364void qdisc_put_stab(struct qdisc_size_table *tab)
365{
366 if (!tab)
367 return;
368
Jarek Poplawski1cfa2662008-08-11 18:11:06 -0700369 spin_lock_bh(&qdisc_stab_lock);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700370
371 if (--tab->refcnt == 0) {
372 list_del(&tab->list);
373 kfree(tab);
374 }
375
Jarek Poplawski1cfa2662008-08-11 18:11:06 -0700376 spin_unlock_bh(&qdisc_stab_lock);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700377}
378EXPORT_SYMBOL(qdisc_put_stab);
379
380static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab)
381{
382 struct nlattr *nest;
383
384 nest = nla_nest_start(skb, TCA_STAB);
385 NLA_PUT(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts);
386 nla_nest_end(skb, nest);
387
388 return skb->len;
389
390nla_put_failure:
391 return -1;
392}
393
394void qdisc_calculate_pkt_len(struct sk_buff *skb, struct qdisc_size_table *stab)
395{
396 int pkt_len, slot;
397
398 pkt_len = skb->len + stab->szopts.overhead;
399 if (unlikely(!stab->szopts.tsize))
400 goto out;
401
402 slot = pkt_len + stab->szopts.cell_align;
403 if (unlikely(slot < 0))
404 slot = 0;
405
406 slot >>= stab->szopts.cell_log;
407 if (likely(slot < stab->szopts.tsize))
408 pkt_len = stab->data[slot];
409 else
410 pkt_len = stab->data[stab->szopts.tsize - 1] *
411 (slot / stab->szopts.tsize) +
412 stab->data[slot % stab->szopts.tsize];
413
414 pkt_len <<= stab->szopts.size_log;
415out:
416 if (unlikely(pkt_len < 1))
417 pkt_len = 1;
418 qdisc_skb_cb(skb)->pkt_len = pkt_len;
419}
420EXPORT_SYMBOL(qdisc_calculate_pkt_len);
421
Patrick McHardy41794772007-03-16 01:19:15 -0700422static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
423{
424 struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
425 timer);
426
427 wd->qdisc->flags &= ~TCQ_F_THROTTLED;
Stephen Hemminger11274e52007-03-22 12:17:42 -0700428 smp_wmb();
David S. Miller37437bb2008-07-16 02:15:04 -0700429 __netif_schedule(wd->qdisc);
Stephen Hemminger19365022007-03-22 12:18:35 -0700430
Patrick McHardy41794772007-03-16 01:19:15 -0700431 return HRTIMER_NORESTART;
432}
433
434void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
435{
436 hrtimer_init(&wd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
437 wd->timer.function = qdisc_watchdog;
438 wd->qdisc = qdisc;
439}
440EXPORT_SYMBOL(qdisc_watchdog_init);
441
442void qdisc_watchdog_schedule(struct qdisc_watchdog *wd, psched_time_t expires)
443{
444 ktime_t time;
445
446 wd->qdisc->flags |= TCQ_F_THROTTLED;
447 time = ktime_set(0, 0);
448 time = ktime_add_ns(time, PSCHED_US2NS(expires));
449 hrtimer_start(&wd->timer, time, HRTIMER_MODE_ABS);
450}
451EXPORT_SYMBOL(qdisc_watchdog_schedule);
452
453void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
454{
455 hrtimer_cancel(&wd->timer);
456 wd->qdisc->flags &= ~TCQ_F_THROTTLED;
457}
458EXPORT_SYMBOL(qdisc_watchdog_cancel);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700459
Adrian Bunka94f7792008-07-22 14:20:11 -0700460static struct hlist_head *qdisc_class_hash_alloc(unsigned int n)
Patrick McHardy6fe1c7a2008-07-05 23:21:31 -0700461{
462 unsigned int size = n * sizeof(struct hlist_head), i;
463 struct hlist_head *h;
464
465 if (size <= PAGE_SIZE)
466 h = kmalloc(size, GFP_KERNEL);
467 else
468 h = (struct hlist_head *)
469 __get_free_pages(GFP_KERNEL, get_order(size));
470
471 if (h != NULL) {
472 for (i = 0; i < n; i++)
473 INIT_HLIST_HEAD(&h[i]);
474 }
475 return h;
476}
477
478static void qdisc_class_hash_free(struct hlist_head *h, unsigned int n)
479{
480 unsigned int size = n * sizeof(struct hlist_head);
481
482 if (size <= PAGE_SIZE)
483 kfree(h);
484 else
485 free_pages((unsigned long)h, get_order(size));
486}
487
488void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash)
489{
490 struct Qdisc_class_common *cl;
491 struct hlist_node *n, *next;
492 struct hlist_head *nhash, *ohash;
493 unsigned int nsize, nmask, osize;
494 unsigned int i, h;
495
496 /* Rehash when load factor exceeds 0.75 */
497 if (clhash->hashelems * 4 <= clhash->hashsize * 3)
498 return;
499 nsize = clhash->hashsize * 2;
500 nmask = nsize - 1;
501 nhash = qdisc_class_hash_alloc(nsize);
502 if (nhash == NULL)
503 return;
504
505 ohash = clhash->hash;
506 osize = clhash->hashsize;
507
508 sch_tree_lock(sch);
509 for (i = 0; i < osize; i++) {
510 hlist_for_each_entry_safe(cl, n, next, &ohash[i], hnode) {
511 h = qdisc_class_hash(cl->classid, nmask);
512 hlist_add_head(&cl->hnode, &nhash[h]);
513 }
514 }
515 clhash->hash = nhash;
516 clhash->hashsize = nsize;
517 clhash->hashmask = nmask;
518 sch_tree_unlock(sch);
519
520 qdisc_class_hash_free(ohash, osize);
521}
522EXPORT_SYMBOL(qdisc_class_hash_grow);
523
524int qdisc_class_hash_init(struct Qdisc_class_hash *clhash)
525{
526 unsigned int size = 4;
527
528 clhash->hash = qdisc_class_hash_alloc(size);
529 if (clhash->hash == NULL)
530 return -ENOMEM;
531 clhash->hashsize = size;
532 clhash->hashmask = size - 1;
533 clhash->hashelems = 0;
534 return 0;
535}
536EXPORT_SYMBOL(qdisc_class_hash_init);
537
538void qdisc_class_hash_destroy(struct Qdisc_class_hash *clhash)
539{
540 qdisc_class_hash_free(clhash->hash, clhash->hashsize);
541}
542EXPORT_SYMBOL(qdisc_class_hash_destroy);
543
544void qdisc_class_hash_insert(struct Qdisc_class_hash *clhash,
545 struct Qdisc_class_common *cl)
546{
547 unsigned int h;
548
549 INIT_HLIST_NODE(&cl->hnode);
550 h = qdisc_class_hash(cl->classid, clhash->hashmask);
551 hlist_add_head(&cl->hnode, &clhash->hash[h]);
552 clhash->hashelems++;
553}
554EXPORT_SYMBOL(qdisc_class_hash_insert);
555
556void qdisc_class_hash_remove(struct Qdisc_class_hash *clhash,
557 struct Qdisc_class_common *cl)
558{
559 hlist_del(&cl->hnode);
560 clhash->hashelems--;
561}
562EXPORT_SYMBOL(qdisc_class_hash_remove);
563
Linus Torvalds1da177e2005-04-16 15:20:36 -0700564/* Allocate an unique handle from space managed by kernel */
565
566static u32 qdisc_alloc_handle(struct net_device *dev)
567{
568 int i = 0x10000;
569 static u32 autohandle = TC_H_MAKE(0x80000000U, 0);
570
571 do {
572 autohandle += TC_H_MAKE(0x10000U, 0);
573 if (autohandle == TC_H_MAKE(TC_H_ROOT, 0))
574 autohandle = TC_H_MAKE(0x80000000U, 0);
575 } while (qdisc_lookup(dev, autohandle) && --i > 0);
576
577 return i>0 ? autohandle : 0;
578}
579
David S. Miller99194cf2008-07-17 04:54:10 -0700580/* Attach toplevel qdisc to device queue. */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700581
David S. Miller99194cf2008-07-17 04:54:10 -0700582static struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue,
583 struct Qdisc *qdisc)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700584{
David S. Miller8d50b532008-07-30 02:37:46 -0700585 struct Qdisc *oqdisc = dev_queue->qdisc_sleeping;
David S. Miller53049972008-07-16 03:00:19 -0700586 spinlock_t *root_lock;
David S. Miller53049972008-07-16 03:00:19 -0700587
588 root_lock = qdisc_root_lock(oqdisc);
589 spin_lock_bh(root_lock);
590
David S. Miller8d50b532008-07-30 02:37:46 -0700591 /* Prune old scheduler */
592 if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1)
593 qdisc_reset(oqdisc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700594
David S. Miller8d50b532008-07-30 02:37:46 -0700595 /* ... and graft new one */
596 if (qdisc == NULL)
597 qdisc = &noop_qdisc;
598 dev_queue->qdisc_sleeping = qdisc;
599 dev_queue->qdisc = &noop_qdisc;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700600
David S. Miller53049972008-07-16 03:00:19 -0700601 spin_unlock_bh(root_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700602
Linus Torvalds1da177e2005-04-16 15:20:36 -0700603 return oqdisc;
604}
605
Patrick McHardy43effa12006-11-29 17:35:48 -0800606void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
607{
Eric Dumazet20fea082007-11-14 01:44:41 -0800608 const struct Qdisc_class_ops *cops;
Patrick McHardy43effa12006-11-29 17:35:48 -0800609 unsigned long cl;
610 u32 parentid;
611
612 if (n == 0)
613 return;
614 while ((parentid = sch->parent)) {
Jarek Poplawski066a3b52008-04-14 15:10:42 -0700615 if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS))
616 return;
617
David S. Miller5ce2d482008-07-08 17:06:30 -0700618 sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
Patrick McHardyffc8fef2007-07-30 17:11:50 -0700619 if (sch == NULL) {
620 WARN_ON(parentid != TC_H_ROOT);
621 return;
622 }
Patrick McHardy43effa12006-11-29 17:35:48 -0800623 cops = sch->ops->cl_ops;
624 if (cops->qlen_notify) {
625 cl = cops->get(sch, parentid);
626 cops->qlen_notify(sch, cl);
627 cops->put(sch, cl);
628 }
629 sch->q.qlen -= n;
630 }
631}
632EXPORT_SYMBOL(qdisc_tree_decrease_qlen);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700633
David S. Miller99194cf2008-07-17 04:54:10 -0700634static void notify_and_destroy(struct sk_buff *skb, struct nlmsghdr *n, u32 clid,
635 struct Qdisc *old, struct Qdisc *new)
636{
637 if (new || old)
638 qdisc_notify(skb, n, clid, old, new);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700639
David S. Miller99194cf2008-07-17 04:54:10 -0700640 if (old) {
641 spin_lock_bh(&old->q.lock);
642 qdisc_destroy(old);
643 spin_unlock_bh(&old->q.lock);
644 }
645}
646
647/* Graft qdisc "new" to class "classid" of qdisc "parent" or
648 * to device "dev".
649 *
650 * When appropriate send a netlink notification using 'skb'
651 * and "n".
652 *
653 * On success, destroy old qdisc.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700654 */
655
656static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
David S. Miller99194cf2008-07-17 04:54:10 -0700657 struct sk_buff *skb, struct nlmsghdr *n, u32 classid,
658 struct Qdisc *new, struct Qdisc *old)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700659{
David S. Miller99194cf2008-07-17 04:54:10 -0700660 struct Qdisc *q = old;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700661 int err = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700662
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900663 if (parent == NULL) {
David S. Miller99194cf2008-07-17 04:54:10 -0700664 unsigned int i, num_q, ingress;
665
666 ingress = 0;
667 num_q = dev->num_tx_queues;
David S. Miller8d50b532008-07-30 02:37:46 -0700668 if ((q && q->flags & TCQ_F_INGRESS) ||
669 (new && new->flags & TCQ_F_INGRESS)) {
David S. Miller99194cf2008-07-17 04:54:10 -0700670 num_q = 1;
671 ingress = 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700672 }
David S. Miller99194cf2008-07-17 04:54:10 -0700673
674 if (dev->flags & IFF_UP)
675 dev_deactivate(dev);
676
677 for (i = 0; i < num_q; i++) {
678 struct netdev_queue *dev_queue = &dev->rx_queue;
679
680 if (!ingress)
681 dev_queue = netdev_get_tx_queue(dev, i);
682
David S. Miller8d50b532008-07-30 02:37:46 -0700683 old = dev_graft_qdisc(dev_queue, new);
684 if (new && i > 0)
685 atomic_inc(&new->refcnt);
686
David S. Miller99194cf2008-07-17 04:54:10 -0700687 notify_and_destroy(skb, n, classid, old, new);
688 }
689
690 if (dev->flags & IFF_UP)
691 dev_activate(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700692 } else {
Eric Dumazet20fea082007-11-14 01:44:41 -0800693 const struct Qdisc_class_ops *cops = parent->ops->cl_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700694
695 err = -EINVAL;
696
697 if (cops) {
698 unsigned long cl = cops->get(parent, classid);
699 if (cl) {
David S. Miller99194cf2008-07-17 04:54:10 -0700700 err = cops->graft(parent, cl, new, &old);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700701 cops->put(parent, cl);
702 }
703 }
David S. Miller99194cf2008-07-17 04:54:10 -0700704 if (!err)
705 notify_and_destroy(skb, n, classid, old, new);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700706 }
707 return err;
708}
709
710/*
711 Allocate and initialize new qdisc.
712
713 Parameters are passed via opt.
714 */
715
716static struct Qdisc *
David S. Millerbb949fb2008-07-08 16:55:56 -0700717qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
718 u32 parent, u32 handle, struct nlattr **tca, int *errp)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700719{
720 int err;
Patrick McHardy1e904742008-01-22 22:11:17 -0800721 struct nlattr *kind = tca[TCA_KIND];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700722 struct Qdisc *sch;
723 struct Qdisc_ops *ops;
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700724 struct qdisc_size_table *stab;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700725
726 ops = qdisc_lookup_ops(kind);
727#ifdef CONFIG_KMOD
728 if (ops == NULL && kind != NULL) {
729 char name[IFNAMSIZ];
Patrick McHardy1e904742008-01-22 22:11:17 -0800730 if (nla_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700731 /* We dropped the RTNL semaphore in order to
732 * perform the module load. So, even if we
733 * succeeded in loading the module we have to
734 * tell the caller to replay the request. We
735 * indicate this using -EAGAIN.
736 * We replay the request because the device may
737 * go away in the mean time.
738 */
739 rtnl_unlock();
740 request_module("sch_%s", name);
741 rtnl_lock();
742 ops = qdisc_lookup_ops(kind);
743 if (ops != NULL) {
744 /* We will try again qdisc_lookup_ops,
745 * so don't keep a reference.
746 */
747 module_put(ops->owner);
748 err = -EAGAIN;
749 goto err_out;
750 }
751 }
752 }
753#endif
754
Jamal Hadi Salimb9e2cc02006-08-03 16:36:51 -0700755 err = -ENOENT;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700756 if (ops == NULL)
757 goto err_out;
758
David S. Miller5ce2d482008-07-08 17:06:30 -0700759 sch = qdisc_alloc(dev_queue, ops);
Thomas Graf3d54b822005-07-05 14:15:09 -0700760 if (IS_ERR(sch)) {
761 err = PTR_ERR(sch);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700762 goto err_out2;
Thomas Graf3d54b822005-07-05 14:15:09 -0700763 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700764
Patrick McHardyffc8fef2007-07-30 17:11:50 -0700765 sch->parent = parent;
766
Thomas Graf3d54b822005-07-05 14:15:09 -0700767 if (handle == TC_H_INGRESS) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700768 sch->flags |= TCQ_F_INGRESS;
Thomas Graf3d54b822005-07-05 14:15:09 -0700769 handle = TC_H_MAKE(TC_H_INGRESS, 0);
Patrick McHardyfd44de72007-04-16 17:07:08 -0700770 } else {
Patrick McHardyfd44de72007-04-16 17:07:08 -0700771 if (handle == 0) {
772 handle = qdisc_alloc_handle(dev);
773 err = -ENOMEM;
774 if (handle == 0)
775 goto err_out3;
776 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700777 }
778
Thomas Graf3d54b822005-07-05 14:15:09 -0700779 sch->handle = handle;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700780
Patrick McHardy1e904742008-01-22 22:11:17 -0800781 if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS])) == 0) {
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700782 if (tca[TCA_STAB]) {
783 stab = qdisc_get_stab(tca[TCA_STAB]);
784 if (IS_ERR(stab)) {
785 err = PTR_ERR(stab);
786 goto err_out3;
787 }
788 sch->stab = stab;
789 }
Patrick McHardy1e904742008-01-22 22:11:17 -0800790 if (tca[TCA_RATE]) {
Thomas Graf023e09a2005-07-05 14:15:53 -0700791 err = gen_new_estimator(&sch->bstats, &sch->rate_est,
David S. Miller7698b4f2008-07-16 01:42:40 -0700792 qdisc_root_lock(sch),
Patrick McHardy1e904742008-01-22 22:11:17 -0800793 tca[TCA_RATE]);
Thomas Graf023e09a2005-07-05 14:15:53 -0700794 if (err) {
795 /*
796 * Any broken qdiscs that would require
797 * a ops->reset() here? The qdisc was never
798 * in action so it shouldn't be necessary.
799 */
800 if (ops->destroy)
801 ops->destroy(sch);
802 goto err_out3;
803 }
804 }
David S. Milleree7af822008-08-06 23:35:59 -0700805 if ((parent != TC_H_ROOT) && !(sch->flags & TCQ_F_INGRESS))
David S. Miller827ebd62008-08-07 20:26:40 -0700806 list_add_tail(&sch->list, &dev_queue->qdisc_sleeping->list);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700807
Linus Torvalds1da177e2005-04-16 15:20:36 -0700808 return sch;
809 }
810err_out3:
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700811 qdisc_put_stab(sch->stab);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700812 dev_put(dev);
Thomas Graf3d54b822005-07-05 14:15:09 -0700813 kfree((char *) sch - sch->padded);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700814err_out2:
815 module_put(ops->owner);
816err_out:
817 *errp = err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700818 return NULL;
819}
820
Patrick McHardy1e904742008-01-22 22:11:17 -0800821static int qdisc_change(struct Qdisc *sch, struct nlattr **tca)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700822{
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700823 struct qdisc_size_table *stab = NULL;
824 int err = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700825
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700826 if (tca[TCA_OPTIONS]) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700827 if (sch->ops->change == NULL)
828 return -EINVAL;
Patrick McHardy1e904742008-01-22 22:11:17 -0800829 err = sch->ops->change(sch, tca[TCA_OPTIONS]);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700830 if (err)
831 return err;
832 }
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700833
834 if (tca[TCA_STAB]) {
835 stab = qdisc_get_stab(tca[TCA_STAB]);
836 if (IS_ERR(stab))
837 return PTR_ERR(stab);
838 }
839
840 qdisc_put_stab(sch->stab);
841 sch->stab = stab;
842
Patrick McHardy1e904742008-01-22 22:11:17 -0800843 if (tca[TCA_RATE])
Linus Torvalds1da177e2005-04-16 15:20:36 -0700844 gen_replace_estimator(&sch->bstats, &sch->rate_est,
David S. Miller7698b4f2008-07-16 01:42:40 -0700845 qdisc_root_lock(sch), tca[TCA_RATE]);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700846 return 0;
847}
848
849struct check_loop_arg
850{
851 struct qdisc_walker w;
852 struct Qdisc *p;
853 int depth;
854};
855
856static int check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w);
857
858static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth)
859{
860 struct check_loop_arg arg;
861
862 if (q->ops->cl_ops == NULL)
863 return 0;
864
865 arg.w.stop = arg.w.skip = arg.w.count = 0;
866 arg.w.fn = check_loop_fn;
867 arg.depth = depth;
868 arg.p = p;
869 q->ops->cl_ops->walk(q, &arg.w);
870 return arg.w.stop ? -ELOOP : 0;
871}
872
873static int
874check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
875{
876 struct Qdisc *leaf;
Eric Dumazet20fea082007-11-14 01:44:41 -0800877 const struct Qdisc_class_ops *cops = q->ops->cl_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700878 struct check_loop_arg *arg = (struct check_loop_arg *)w;
879
880 leaf = cops->leaf(q, cl);
881 if (leaf) {
882 if (leaf == arg->p || arg->depth > 7)
883 return -ELOOP;
884 return check_loop(leaf, arg->p, arg->depth + 1);
885 }
886 return 0;
887}
888
889/*
890 * Delete/get qdisc.
891 */
892
893static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
894{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +0900895 struct net *net = sock_net(skb->sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700896 struct tcmsg *tcm = NLMSG_DATA(n);
Patrick McHardy1e904742008-01-22 22:11:17 -0800897 struct nlattr *tca[TCA_MAX + 1];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700898 struct net_device *dev;
899 u32 clid = tcm->tcm_parent;
900 struct Qdisc *q = NULL;
901 struct Qdisc *p = NULL;
902 int err;
903
Denis V. Lunevb8542722007-12-01 00:21:31 +1100904 if (net != &init_net)
905 return -EINVAL;
906
Eric W. Biederman881d9662007-09-17 11:56:21 -0700907 if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700908 return -ENODEV;
909
Patrick McHardy1e904742008-01-22 22:11:17 -0800910 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
911 if (err < 0)
912 return err;
913
Linus Torvalds1da177e2005-04-16 15:20:36 -0700914 if (clid) {
915 if (clid != TC_H_ROOT) {
916 if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
917 if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL)
918 return -ENOENT;
919 q = qdisc_leaf(p, clid);
920 } else { /* ingress */
David S. Miller8123b422008-08-08 23:23:39 -0700921 q = dev->rx_queue.qdisc_sleeping;
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900922 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700923 } else {
David S. Millere8a04642008-07-17 00:34:19 -0700924 struct netdev_queue *dev_queue;
925 dev_queue = netdev_get_tx_queue(dev, 0);
David S. Millerb0e1e642008-07-08 17:42:10 -0700926 q = dev_queue->qdisc_sleeping;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700927 }
928 if (!q)
929 return -ENOENT;
930
931 if (tcm->tcm_handle && q->handle != tcm->tcm_handle)
932 return -EINVAL;
933 } else {
934 if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL)
935 return -ENOENT;
936 }
937
Patrick McHardy1e904742008-01-22 22:11:17 -0800938 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700939 return -EINVAL;
940
941 if (n->nlmsg_type == RTM_DELQDISC) {
942 if (!clid)
943 return -EINVAL;
944 if (q->handle == 0)
945 return -ENOENT;
David S. Miller99194cf2008-07-17 04:54:10 -0700946 if ((err = qdisc_graft(dev, p, skb, n, clid, NULL, q)) != 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700947 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700948 } else {
949 qdisc_notify(skb, n, clid, NULL, q);
950 }
951 return 0;
952}
953
954/*
955 Create/change qdisc.
956 */
957
958static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
959{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +0900960 struct net *net = sock_net(skb->sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700961 struct tcmsg *tcm;
Patrick McHardy1e904742008-01-22 22:11:17 -0800962 struct nlattr *tca[TCA_MAX + 1];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700963 struct net_device *dev;
964 u32 clid;
965 struct Qdisc *q, *p;
966 int err;
967
Denis V. Lunevb8542722007-12-01 00:21:31 +1100968 if (net != &init_net)
969 return -EINVAL;
970
Linus Torvalds1da177e2005-04-16 15:20:36 -0700971replay:
972 /* Reinit, just in case something touches this. */
973 tcm = NLMSG_DATA(n);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700974 clid = tcm->tcm_parent;
975 q = p = NULL;
976
Eric W. Biederman881d9662007-09-17 11:56:21 -0700977 if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700978 return -ENODEV;
979
Patrick McHardy1e904742008-01-22 22:11:17 -0800980 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
981 if (err < 0)
982 return err;
983
Linus Torvalds1da177e2005-04-16 15:20:36 -0700984 if (clid) {
985 if (clid != TC_H_ROOT) {
986 if (clid != TC_H_INGRESS) {
987 if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL)
988 return -ENOENT;
989 q = qdisc_leaf(p, clid);
990 } else { /*ingress */
David S. Miller8123b422008-08-08 23:23:39 -0700991 q = dev->rx_queue.qdisc_sleeping;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700992 }
993 } else {
David S. Millere8a04642008-07-17 00:34:19 -0700994 struct netdev_queue *dev_queue;
995 dev_queue = netdev_get_tx_queue(dev, 0);
David S. Millerb0e1e642008-07-08 17:42:10 -0700996 q = dev_queue->qdisc_sleeping;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700997 }
998
999 /* It may be default qdisc, ignore it */
1000 if (q && q->handle == 0)
1001 q = NULL;
1002
1003 if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
1004 if (tcm->tcm_handle) {
1005 if (q && !(n->nlmsg_flags&NLM_F_REPLACE))
1006 return -EEXIST;
1007 if (TC_H_MIN(tcm->tcm_handle))
1008 return -EINVAL;
1009 if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL)
1010 goto create_n_graft;
1011 if (n->nlmsg_flags&NLM_F_EXCL)
1012 return -EEXIST;
Patrick McHardy1e904742008-01-22 22:11:17 -08001013 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001014 return -EINVAL;
1015 if (q == p ||
1016 (p && check_loop(q, p, 0)))
1017 return -ELOOP;
1018 atomic_inc(&q->refcnt);
1019 goto graft;
1020 } else {
1021 if (q == NULL)
1022 goto create_n_graft;
1023
1024 /* This magic test requires explanation.
1025 *
1026 * We know, that some child q is already
1027 * attached to this parent and have choice:
1028 * either to change it or to create/graft new one.
1029 *
1030 * 1. We are allowed to create/graft only
1031 * if CREATE and REPLACE flags are set.
1032 *
1033 * 2. If EXCL is set, requestor wanted to say,
1034 * that qdisc tcm_handle is not expected
1035 * to exist, so that we choose create/graft too.
1036 *
1037 * 3. The last case is when no flags are set.
1038 * Alas, it is sort of hole in API, we
1039 * cannot decide what to do unambiguously.
1040 * For now we select create/graft, if
1041 * user gave KIND, which does not match existing.
1042 */
1043 if ((n->nlmsg_flags&NLM_F_CREATE) &&
1044 (n->nlmsg_flags&NLM_F_REPLACE) &&
1045 ((n->nlmsg_flags&NLM_F_EXCL) ||
Patrick McHardy1e904742008-01-22 22:11:17 -08001046 (tca[TCA_KIND] &&
1047 nla_strcmp(tca[TCA_KIND], q->ops->id))))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001048 goto create_n_graft;
1049 }
1050 }
1051 } else {
1052 if (!tcm->tcm_handle)
1053 return -EINVAL;
1054 q = qdisc_lookup(dev, tcm->tcm_handle);
1055 }
1056
1057 /* Change qdisc parameters */
1058 if (q == NULL)
1059 return -ENOENT;
1060 if (n->nlmsg_flags&NLM_F_EXCL)
1061 return -EEXIST;
Patrick McHardy1e904742008-01-22 22:11:17 -08001062 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001063 return -EINVAL;
1064 err = qdisc_change(q, tca);
1065 if (err == 0)
1066 qdisc_notify(skb, n, clid, NULL, q);
1067 return err;
1068
1069create_n_graft:
1070 if (!(n->nlmsg_flags&NLM_F_CREATE))
1071 return -ENOENT;
1072 if (clid == TC_H_INGRESS)
David S. Millerbb949fb2008-07-08 16:55:56 -07001073 q = qdisc_create(dev, &dev->rx_queue,
1074 tcm->tcm_parent, tcm->tcm_parent,
Patrick McHardyffc8fef2007-07-30 17:11:50 -07001075 tca, &err);
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09001076 else
David S. Millere8a04642008-07-17 00:34:19 -07001077 q = qdisc_create(dev, netdev_get_tx_queue(dev, 0),
David S. Millerbb949fb2008-07-08 16:55:56 -07001078 tcm->tcm_parent, tcm->tcm_handle,
Patrick McHardyffc8fef2007-07-30 17:11:50 -07001079 tca, &err);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001080 if (q == NULL) {
1081 if (err == -EAGAIN)
1082 goto replay;
1083 return err;
1084 }
1085
1086graft:
1087 if (1) {
David S. Miller53049972008-07-16 03:00:19 -07001088 spinlock_t *root_lock;
1089
David S. Miller99194cf2008-07-17 04:54:10 -07001090 err = qdisc_graft(dev, p, skb, n, clid, q, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001091 if (err) {
1092 if (q) {
David S. Miller53049972008-07-16 03:00:19 -07001093 root_lock = qdisc_root_lock(q);
1094 spin_lock_bh(root_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001095 qdisc_destroy(q);
David S. Miller53049972008-07-16 03:00:19 -07001096 spin_unlock_bh(root_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001097 }
1098 return err;
1099 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001100 }
1101 return 0;
1102}
1103
1104static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
Jamal Hadi Salime431b8c2005-06-18 22:55:31 -07001105 u32 pid, u32 seq, u16 flags, int event)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001106{
1107 struct tcmsg *tcm;
1108 struct nlmsghdr *nlh;
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001109 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001110 struct gnet_dump d;
1111
Jamal Hadi Salime431b8c2005-06-18 22:55:31 -07001112 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001113 tcm = NLMSG_DATA(nlh);
1114 tcm->tcm_family = AF_UNSPEC;
Patrick McHardy9ef1d4c2005-06-28 12:55:30 -07001115 tcm->tcm__pad1 = 0;
1116 tcm->tcm__pad2 = 0;
David S. Miller5ce2d482008-07-08 17:06:30 -07001117 tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001118 tcm->tcm_parent = clid;
1119 tcm->tcm_handle = q->handle;
1120 tcm->tcm_info = atomic_read(&q->refcnt);
Patrick McHardy57e1c482008-01-23 20:34:28 -08001121 NLA_PUT_STRING(skb, TCA_KIND, q->ops->id);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001122 if (q->ops->dump && q->ops->dump(q, skb) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001123 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001124 q->qstats.qlen = q->q.qlen;
1125
Jussi Kivilinna175f9c12008-07-20 00:08:47 -07001126 if (q->stab && qdisc_dump_stab(skb, q->stab) < 0)
1127 goto nla_put_failure;
1128
Linus Torvalds1da177e2005-04-16 15:20:36 -07001129 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS,
David S. Miller7698b4f2008-07-16 01:42:40 -07001130 TCA_XSTATS, qdisc_root_lock(q), &d) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001131 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001132
1133 if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001134 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001135
1136 if (gnet_stats_copy_basic(&d, &q->bstats) < 0 ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07001137 gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07001138 gnet_stats_copy_queue(&d, &q->qstats) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001139 goto nla_put_failure;
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09001140
Linus Torvalds1da177e2005-04-16 15:20:36 -07001141 if (gnet_stats_finish_copy(&d) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001142 goto nla_put_failure;
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09001143
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001144 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001145 return skb->len;
1146
1147nlmsg_failure:
Patrick McHardy1e904742008-01-22 22:11:17 -08001148nla_put_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001149 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001150 return -1;
1151}
1152
1153static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n,
1154 u32 clid, struct Qdisc *old, struct Qdisc *new)
1155{
1156 struct sk_buff *skb;
1157 u32 pid = oskb ? NETLINK_CB(oskb).pid : 0;
1158
1159 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1160 if (!skb)
1161 return -ENOBUFS;
1162
1163 if (old && old->handle) {
1164 if (tc_fill_qdisc(skb, old, clid, pid, n->nlmsg_seq, 0, RTM_DELQDISC) < 0)
1165 goto err_out;
1166 }
1167 if (new) {
1168 if (tc_fill_qdisc(skb, new, clid, pid, n->nlmsg_seq, old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
1169 goto err_out;
1170 }
1171
1172 if (skb->len)
Denis V. Lunev97c53ca2007-11-19 22:26:51 -08001173 return rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001174
1175err_out:
1176 kfree_skb(skb);
1177 return -EINVAL;
1178}
1179
David S. Miller30723672008-07-18 22:50:15 -07001180static bool tc_qdisc_dump_ignore(struct Qdisc *q)
1181{
1182 return (q->flags & TCQ_F_BUILTIN) ? true : false;
1183}
1184
1185static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
1186 struct netlink_callback *cb,
1187 int *q_idx_p, int s_q_idx)
1188{
1189 int ret = 0, q_idx = *q_idx_p;
1190 struct Qdisc *q;
1191
1192 if (!root)
1193 return 0;
1194
1195 q = root;
1196 if (q_idx < s_q_idx) {
1197 q_idx++;
1198 } else {
1199 if (!tc_qdisc_dump_ignore(q) &&
1200 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid,
1201 cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
1202 goto done;
1203 q_idx++;
1204 }
1205 list_for_each_entry(q, &root->list, list) {
1206 if (q_idx < s_q_idx) {
1207 q_idx++;
1208 continue;
1209 }
1210 if (!tc_qdisc_dump_ignore(q) &&
1211 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid,
1212 cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
1213 goto done;
1214 q_idx++;
1215 }
1216
1217out:
1218 *q_idx_p = q_idx;
1219 return ret;
1220done:
1221 ret = -1;
1222 goto out;
1223}
1224
Linus Torvalds1da177e2005-04-16 15:20:36 -07001225static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
1226{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001227 struct net *net = sock_net(skb->sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001228 int idx, q_idx;
1229 int s_idx, s_q_idx;
1230 struct net_device *dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001231
Denis V. Lunevb8542722007-12-01 00:21:31 +11001232 if (net != &init_net)
1233 return 0;
1234
Linus Torvalds1da177e2005-04-16 15:20:36 -07001235 s_idx = cb->args[0];
1236 s_q_idx = q_idx = cb->args[1];
1237 read_lock(&dev_base_lock);
Pavel Emelianov7562f872007-05-03 15:13:45 -07001238 idx = 0;
Eric W. Biederman881d9662007-09-17 11:56:21 -07001239 for_each_netdev(&init_net, dev) {
David S. Miller30723672008-07-18 22:50:15 -07001240 struct netdev_queue *dev_queue;
1241
Linus Torvalds1da177e2005-04-16 15:20:36 -07001242 if (idx < s_idx)
Pavel Emelianov7562f872007-05-03 15:13:45 -07001243 goto cont;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001244 if (idx > s_idx)
1245 s_q_idx = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001246 q_idx = 0;
David S. Miller30723672008-07-18 22:50:15 -07001247
1248 dev_queue = netdev_get_tx_queue(dev, 0);
David S. Miller827ebd62008-08-07 20:26:40 -07001249 if (tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb, &q_idx, s_q_idx) < 0)
David S. Miller30723672008-07-18 22:50:15 -07001250 goto done;
1251
1252 dev_queue = &dev->rx_queue;
David S. Miller827ebd62008-08-07 20:26:40 -07001253 if (tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb, &q_idx, s_q_idx) < 0)
David S. Miller30723672008-07-18 22:50:15 -07001254 goto done;
1255
Pavel Emelianov7562f872007-05-03 15:13:45 -07001256cont:
1257 idx++;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001258 }
1259
1260done:
1261 read_unlock(&dev_base_lock);
1262
1263 cb->args[0] = idx;
1264 cb->args[1] = q_idx;
1265
1266 return skb->len;
1267}
1268
1269
1270
1271/************************************************
1272 * Traffic classes manipulation. *
1273 ************************************************/
1274
1275
1276
1277static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
1278{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001279 struct net *net = sock_net(skb->sk);
David S. Millerb0e1e642008-07-08 17:42:10 -07001280 struct netdev_queue *dev_queue;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001281 struct tcmsg *tcm = NLMSG_DATA(n);
Patrick McHardy1e904742008-01-22 22:11:17 -08001282 struct nlattr *tca[TCA_MAX + 1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001283 struct net_device *dev;
1284 struct Qdisc *q = NULL;
Eric Dumazet20fea082007-11-14 01:44:41 -08001285 const struct Qdisc_class_ops *cops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001286 unsigned long cl = 0;
1287 unsigned long new_cl;
1288 u32 pid = tcm->tcm_parent;
1289 u32 clid = tcm->tcm_handle;
1290 u32 qid = TC_H_MAJ(clid);
1291 int err;
1292
Denis V. Lunevb8542722007-12-01 00:21:31 +11001293 if (net != &init_net)
1294 return -EINVAL;
1295
Eric W. Biederman881d9662007-09-17 11:56:21 -07001296 if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001297 return -ENODEV;
1298
Patrick McHardy1e904742008-01-22 22:11:17 -08001299 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
1300 if (err < 0)
1301 return err;
1302
Linus Torvalds1da177e2005-04-16 15:20:36 -07001303 /*
1304 parent == TC_H_UNSPEC - unspecified parent.
1305 parent == TC_H_ROOT - class is root, which has no parent.
1306 parent == X:0 - parent is root class.
1307 parent == X:Y - parent is a node in hierarchy.
1308 parent == 0:Y - parent is X:Y, where X:0 is qdisc.
1309
1310 handle == 0:0 - generate handle from kernel pool.
1311 handle == 0:Y - class is X:Y, where X:0 is qdisc.
1312 handle == X:Y - clear.
1313 handle == X:0 - root class.
1314 */
1315
1316 /* Step 1. Determine qdisc handle X:0 */
1317
David S. Millere8a04642008-07-17 00:34:19 -07001318 dev_queue = netdev_get_tx_queue(dev, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001319 if (pid != TC_H_ROOT) {
1320 u32 qid1 = TC_H_MAJ(pid);
1321
1322 if (qid && qid1) {
1323 /* If both majors are known, they must be identical. */
1324 if (qid != qid1)
1325 return -EINVAL;
1326 } else if (qid1) {
1327 qid = qid1;
1328 } else if (qid == 0)
David S. Millerb0e1e642008-07-08 17:42:10 -07001329 qid = dev_queue->qdisc_sleeping->handle;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001330
1331 /* Now qid is genuine qdisc handle consistent
1332 both with parent and child.
1333
1334 TC_H_MAJ(pid) still may be unspecified, complete it now.
1335 */
1336 if (pid)
1337 pid = TC_H_MAKE(qid, pid);
1338 } else {
1339 if (qid == 0)
David S. Millerb0e1e642008-07-08 17:42:10 -07001340 qid = dev_queue->qdisc_sleeping->handle;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001341 }
1342
1343 /* OK. Locate qdisc */
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09001344 if ((q = qdisc_lookup(dev, qid)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001345 return -ENOENT;
1346
1347 /* An check that it supports classes */
1348 cops = q->ops->cl_ops;
1349 if (cops == NULL)
1350 return -EINVAL;
1351
1352 /* Now try to get class */
1353 if (clid == 0) {
1354 if (pid == TC_H_ROOT)
1355 clid = qid;
1356 } else
1357 clid = TC_H_MAKE(qid, clid);
1358
1359 if (clid)
1360 cl = cops->get(q, clid);
1361
1362 if (cl == 0) {
1363 err = -ENOENT;
1364 if (n->nlmsg_type != RTM_NEWTCLASS || !(n->nlmsg_flags&NLM_F_CREATE))
1365 goto out;
1366 } else {
1367 switch (n->nlmsg_type) {
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09001368 case RTM_NEWTCLASS:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001369 err = -EEXIST;
1370 if (n->nlmsg_flags&NLM_F_EXCL)
1371 goto out;
1372 break;
1373 case RTM_DELTCLASS:
1374 err = cops->delete(q, cl);
1375 if (err == 0)
1376 tclass_notify(skb, n, q, cl, RTM_DELTCLASS);
1377 goto out;
1378 case RTM_GETTCLASS:
1379 err = tclass_notify(skb, n, q, cl, RTM_NEWTCLASS);
1380 goto out;
1381 default:
1382 err = -EINVAL;
1383 goto out;
1384 }
1385 }
1386
1387 new_cl = cl;
1388 err = cops->change(q, clid, pid, tca, &new_cl);
1389 if (err == 0)
1390 tclass_notify(skb, n, q, new_cl, RTM_NEWTCLASS);
1391
1392out:
1393 if (cl)
1394 cops->put(q, cl);
1395
1396 return err;
1397}
1398
1399
1400static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
1401 unsigned long cl,
Jamal Hadi Salime431b8c2005-06-18 22:55:31 -07001402 u32 pid, u32 seq, u16 flags, int event)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001403{
1404 struct tcmsg *tcm;
1405 struct nlmsghdr *nlh;
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001406 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001407 struct gnet_dump d;
Eric Dumazet20fea082007-11-14 01:44:41 -08001408 const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001409
Jamal Hadi Salime431b8c2005-06-18 22:55:31 -07001410 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001411 tcm = NLMSG_DATA(nlh);
1412 tcm->tcm_family = AF_UNSPEC;
David S. Miller5ce2d482008-07-08 17:06:30 -07001413 tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001414 tcm->tcm_parent = q->handle;
1415 tcm->tcm_handle = q->handle;
1416 tcm->tcm_info = 0;
Patrick McHardy57e1c482008-01-23 20:34:28 -08001417 NLA_PUT_STRING(skb, TCA_KIND, q->ops->id);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001418 if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001419 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001420
1421 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS,
David S. Miller7698b4f2008-07-16 01:42:40 -07001422 TCA_XSTATS, qdisc_root_lock(q), &d) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001423 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001424
1425 if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001426 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001427
1428 if (gnet_stats_finish_copy(&d) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001429 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001430
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001431 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001432 return skb->len;
1433
1434nlmsg_failure:
Patrick McHardy1e904742008-01-22 22:11:17 -08001435nla_put_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001436 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001437 return -1;
1438}
1439
1440static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
1441 struct Qdisc *q, unsigned long cl, int event)
1442{
1443 struct sk_buff *skb;
1444 u32 pid = oskb ? NETLINK_CB(oskb).pid : 0;
1445
1446 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1447 if (!skb)
1448 return -ENOBUFS;
1449
1450 if (tc_fill_tclass(skb, q, cl, pid, n->nlmsg_seq, 0, event) < 0) {
1451 kfree_skb(skb);
1452 return -EINVAL;
1453 }
1454
Denis V. Lunev97c53ca2007-11-19 22:26:51 -08001455 return rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001456}
1457
1458struct qdisc_dump_args
1459{
1460 struct qdisc_walker w;
1461 struct sk_buff *skb;
1462 struct netlink_callback *cb;
1463};
1464
1465static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walker *arg)
1466{
1467 struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg;
1468
1469 return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).pid,
1470 a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTCLASS);
1471}
1472
David S. Miller30723672008-07-18 22:50:15 -07001473static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb,
1474 struct tcmsg *tcm, struct netlink_callback *cb,
1475 int *t_p, int s_t)
1476{
1477 struct qdisc_dump_args arg;
1478
1479 if (tc_qdisc_dump_ignore(q) ||
1480 *t_p < s_t || !q->ops->cl_ops ||
1481 (tcm->tcm_parent &&
1482 TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
1483 (*t_p)++;
1484 return 0;
1485 }
1486 if (*t_p > s_t)
1487 memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
1488 arg.w.fn = qdisc_class_dump;
1489 arg.skb = skb;
1490 arg.cb = cb;
1491 arg.w.stop = 0;
1492 arg.w.skip = cb->args[1];
1493 arg.w.count = 0;
1494 q->ops->cl_ops->walk(q, &arg.w);
1495 cb->args[1] = arg.w.count;
1496 if (arg.w.stop)
1497 return -1;
1498 (*t_p)++;
1499 return 0;
1500}
1501
1502static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
1503 struct tcmsg *tcm, struct netlink_callback *cb,
1504 int *t_p, int s_t)
1505{
1506 struct Qdisc *q;
1507
1508 if (!root)
1509 return 0;
1510
1511 if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0)
1512 return -1;
1513
1514 list_for_each_entry(q, &root->list, list) {
1515 if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
1516 return -1;
1517 }
1518
1519 return 0;
1520}
1521
Linus Torvalds1da177e2005-04-16 15:20:36 -07001522static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
1523{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001524 struct tcmsg *tcm = (struct tcmsg*)NLMSG_DATA(cb->nlh);
David S. Miller30723672008-07-18 22:50:15 -07001525 struct net *net = sock_net(skb->sk);
1526 struct netdev_queue *dev_queue;
1527 struct net_device *dev;
1528 int t, s_t;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001529
Denis V. Lunevb8542722007-12-01 00:21:31 +11001530 if (net != &init_net)
1531 return 0;
1532
Linus Torvalds1da177e2005-04-16 15:20:36 -07001533 if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm)))
1534 return 0;
Eric W. Biederman881d9662007-09-17 11:56:21 -07001535 if ((dev = dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001536 return 0;
1537
1538 s_t = cb->args[0];
1539 t = 0;
1540
David S. Miller30723672008-07-18 22:50:15 -07001541 dev_queue = netdev_get_tx_queue(dev, 0);
David S. Miller8123b422008-08-08 23:23:39 -07001542 if (tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb, &t, s_t) < 0)
David S. Miller30723672008-07-18 22:50:15 -07001543 goto done;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001544
David S. Miller30723672008-07-18 22:50:15 -07001545 dev_queue = &dev->rx_queue;
David S. Miller8123b422008-08-08 23:23:39 -07001546 if (tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb, &t, s_t) < 0)
David S. Miller30723672008-07-18 22:50:15 -07001547 goto done;
1548
1549done:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001550 cb->args[0] = t;
1551
1552 dev_put(dev);
1553 return skb->len;
1554}
1555
1556/* Main classifier routine: scans classifier chain attached
1557 to this qdisc, (optionally) tests for protocol and asks
1558 specific classifiers.
1559 */
Patrick McHardy73ca4912007-07-15 00:02:31 -07001560int tc_classify_compat(struct sk_buff *skb, struct tcf_proto *tp,
1561 struct tcf_result *res)
1562{
1563 __be16 protocol = skb->protocol;
1564 int err = 0;
1565
1566 for (; tp; tp = tp->next) {
1567 if ((tp->protocol == protocol ||
1568 tp->protocol == htons(ETH_P_ALL)) &&
1569 (err = tp->classify(skb, tp, res)) >= 0) {
1570#ifdef CONFIG_NET_CLS_ACT
1571 if (err != TC_ACT_RECLASSIFY && skb->tc_verd)
1572 skb->tc_verd = SET_TC_VERD(skb->tc_verd, 0);
1573#endif
1574 return err;
1575 }
1576 }
1577 return -1;
1578}
1579EXPORT_SYMBOL(tc_classify_compat);
1580
Linus Torvalds1da177e2005-04-16 15:20:36 -07001581int tc_classify(struct sk_buff *skb, struct tcf_proto *tp,
Patrick McHardy73ca4912007-07-15 00:02:31 -07001582 struct tcf_result *res)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001583{
1584 int err = 0;
Patrick McHardy73ca4912007-07-15 00:02:31 -07001585 __be16 protocol;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001586#ifdef CONFIG_NET_CLS_ACT
1587 struct tcf_proto *otp = tp;
1588reclassify:
1589#endif
1590 protocol = skb->protocol;
1591
Patrick McHardy73ca4912007-07-15 00:02:31 -07001592 err = tc_classify_compat(skb, tp, res);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001593#ifdef CONFIG_NET_CLS_ACT
Patrick McHardy73ca4912007-07-15 00:02:31 -07001594 if (err == TC_ACT_RECLASSIFY) {
1595 u32 verd = G_TC_VERD(skb->tc_verd);
1596 tp = otp;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001597
Patrick McHardy73ca4912007-07-15 00:02:31 -07001598 if (verd++ >= MAX_REC_LOOP) {
1599 printk("rule prio %u protocol %02x reclassify loop, "
1600 "packet dropped\n",
1601 tp->prio&0xffff, ntohs(tp->protocol));
1602 return TC_ACT_SHOT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001603 }
Patrick McHardy73ca4912007-07-15 00:02:31 -07001604 skb->tc_verd = SET_TC_VERD(skb->tc_verd, verd);
1605 goto reclassify;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001606 }
Patrick McHardy73ca4912007-07-15 00:02:31 -07001607#endif
1608 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001609}
Patrick McHardy73ca4912007-07-15 00:02:31 -07001610EXPORT_SYMBOL(tc_classify);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001611
Patrick McHardya48b5a62007-03-23 11:29:43 -07001612void tcf_destroy(struct tcf_proto *tp)
1613{
1614 tp->ops->destroy(tp);
1615 module_put(tp->ops->owner);
1616 kfree(tp);
1617}
1618
Patrick McHardyff31ab52008-07-01 19:52:38 -07001619void tcf_destroy_chain(struct tcf_proto **fl)
Patrick McHardya48b5a62007-03-23 11:29:43 -07001620{
1621 struct tcf_proto *tp;
1622
Patrick McHardyff31ab52008-07-01 19:52:38 -07001623 while ((tp = *fl) != NULL) {
1624 *fl = tp->next;
Patrick McHardya48b5a62007-03-23 11:29:43 -07001625 tcf_destroy(tp);
1626 }
1627}
1628EXPORT_SYMBOL(tcf_destroy_chain);
1629
Linus Torvalds1da177e2005-04-16 15:20:36 -07001630#ifdef CONFIG_PROC_FS
1631static int psched_show(struct seq_file *seq, void *v)
1632{
Patrick McHardy3c0cfc12007-10-10 16:32:41 -07001633 struct timespec ts;
1634
1635 hrtimer_get_res(CLOCK_MONOTONIC, &ts);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001636 seq_printf(seq, "%08x %08x %08x %08x\n",
Patrick McHardy641b9e02007-03-16 01:18:42 -07001637 (u32)NSEC_PER_USEC, (u32)PSCHED_US2NS(1),
Patrick McHardy514bca32007-03-16 12:34:52 -07001638 1000000,
Patrick McHardy3c0cfc12007-10-10 16:32:41 -07001639 (u32)NSEC_PER_SEC/(u32)ktime_to_ns(timespec_to_ktime(ts)));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001640
1641 return 0;
1642}
1643
1644static int psched_open(struct inode *inode, struct file *file)
1645{
1646 return single_open(file, psched_show, PDE(inode)->data);
1647}
1648
Arjan van de Venda7071d2007-02-12 00:55:36 -08001649static const struct file_operations psched_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001650 .owner = THIS_MODULE,
1651 .open = psched_open,
1652 .read = seq_read,
1653 .llseek = seq_lseek,
1654 .release = single_release,
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09001655};
Linus Torvalds1da177e2005-04-16 15:20:36 -07001656#endif
1657
Linus Torvalds1da177e2005-04-16 15:20:36 -07001658static int __init pktsched_init(void)
1659{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001660 register_qdisc(&pfifo_qdisc_ops);
1661 register_qdisc(&bfifo_qdisc_ops);
Eric W. Biederman457c4cb2007-09-12 12:01:34 +02001662 proc_net_fops_create(&init_net, "psched", 0, &psched_fops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001663
Thomas Grafbe577dd2007-03-22 11:55:50 -07001664 rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL);
1665 rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL);
1666 rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc);
1667 rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL);
1668 rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL);
1669 rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass);
1670
Linus Torvalds1da177e2005-04-16 15:20:36 -07001671 return 0;
1672}
1673
1674subsys_initcall(pktsched_init);