blob: 6ab4a2f92ca0b390849547e8e93f99b07e6d4a6b [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * net/sched/sch_api.c Packet scheduler API.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 *
11 * Fixes:
12 *
13 * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired.
14 * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
15 * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support
16 */
17
Linus Torvalds1da177e2005-04-16 15:20:36 -070018#include <linux/module.h>
19#include <linux/types.h>
20#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070021#include <linux/string.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070022#include <linux/errno.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070023#include <linux/skbuff.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070024#include <linux/init.h>
25#include <linux/proc_fs.h>
26#include <linux/seq_file.h>
27#include <linux/kmod.h>
28#include <linux/list.h>
Patrick McHardy41794772007-03-16 01:19:15 -070029#include <linux/hrtimer.h>
Jarek Poplawski25bfcd52008-08-18 20:53:34 -070030#include <linux/lockdep.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070031
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020032#include <net/net_namespace.h>
Denis V. Lunevb8542722007-12-01 00:21:31 +110033#include <net/sock.h>
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -070034#include <net/netlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070035#include <net/pkt_sched.h>
36
Linus Torvalds1da177e2005-04-16 15:20:36 -070037static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n, u32 clid,
38 struct Qdisc *old, struct Qdisc *new);
39static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
40 struct Qdisc *q, unsigned long cl, int event);
41
42/*
43
44 Short review.
45 -------------
46
47 This file consists of two interrelated parts:
48
49 1. queueing disciplines manager frontend.
50 2. traffic classes manager frontend.
51
52 Generally, queueing discipline ("qdisc") is a black box,
53 which is able to enqueue packets and to dequeue them (when
54 device is ready to send something) in order and at times
55 determined by algorithm hidden in it.
56
57 qdisc's are divided to two categories:
58 - "queues", which have no internal structure visible from outside.
59 - "schedulers", which split all the packets to "traffic classes",
60 using "packet classifiers" (look at cls_api.c)
61
62 In turn, classes may have child qdiscs (as rule, queues)
63 attached to them etc. etc. etc.
64
65 The goal of the routines in this file is to translate
66 information supplied by user in the form of handles
67 to more intelligible for kernel form, to make some sanity
68 checks and part of work, which is common to all qdiscs
69 and to provide rtnetlink notifications.
70
71 All real intelligent work is done inside qdisc modules.
72
73
74
75 Every discipline has two major routines: enqueue and dequeue.
76
77 ---dequeue
78
79 dequeue usually returns a skb to send. It is allowed to return NULL,
80 but it does not mean that queue is empty, it just means that
81 discipline does not want to send anything this time.
82 Queue is really empty if q->q.qlen == 0.
83 For complicated disciplines with multiple queues q->q is not
84 real packet queue, but however q->q.qlen must be valid.
85
86 ---enqueue
87
88 enqueue returns 0, if packet was enqueued successfully.
89 If packet (this one or another one) was dropped, it returns
90 not zero error code.
91 NET_XMIT_DROP - this packet dropped
92 Expected action: do not backoff, but wait until queue will clear.
93 NET_XMIT_CN - probably this packet enqueued, but another one dropped.
94 Expected action: backoff or ignore
95 NET_XMIT_POLICED - dropped by police.
96 Expected action: backoff or error to real-time apps.
97
98 Auxiliary routines:
99
100 ---requeue
101
102 requeues once dequeued packet. It is used for non-standard or
David S. Millere65d22e2008-07-08 16:46:01 -0700103 just buggy devices, which can defer output even if netif_queue_stopped()=0.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700104
105 ---reset
106
107 returns qdisc to initial state: purge all buffers, clear all
108 timers, counters (except for statistics) etc.
109
110 ---init
111
112 initializes newly created qdisc.
113
114 ---destroy
115
116 destroys resources allocated by init and during lifetime of qdisc.
117
118 ---change
119
120 changes qdisc parameters.
121 */
122
123/* Protects list of registered TC modules. It is pure SMP lock. */
124static DEFINE_RWLOCK(qdisc_mod_lock);
125
126
127/************************************************
128 * Queueing disciplines manipulation. *
129 ************************************************/
130
131
132/* The list of all installed queueing disciplines. */
133
134static struct Qdisc_ops *qdisc_base;
135
136/* Register/uregister queueing discipline */
137
138int register_qdisc(struct Qdisc_ops *qops)
139{
140 struct Qdisc_ops *q, **qp;
141 int rc = -EEXIST;
142
143 write_lock(&qdisc_mod_lock);
144 for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
145 if (!strcmp(qops->id, q->id))
146 goto out;
147
148 if (qops->enqueue == NULL)
149 qops->enqueue = noop_qdisc_ops.enqueue;
150 if (qops->requeue == NULL)
151 qops->requeue = noop_qdisc_ops.requeue;
152 if (qops->dequeue == NULL)
153 qops->dequeue = noop_qdisc_ops.dequeue;
154
155 qops->next = NULL;
156 *qp = qops;
157 rc = 0;
158out:
159 write_unlock(&qdisc_mod_lock);
160 return rc;
161}
Patrick McHardy62e3ba12008-01-22 22:10:23 -0800162EXPORT_SYMBOL(register_qdisc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700163
164int unregister_qdisc(struct Qdisc_ops *qops)
165{
166 struct Qdisc_ops *q, **qp;
167 int err = -ENOENT;
168
169 write_lock(&qdisc_mod_lock);
170 for (qp = &qdisc_base; (q=*qp)!=NULL; qp = &q->next)
171 if (q == qops)
172 break;
173 if (q) {
174 *qp = q->next;
175 q->next = NULL;
176 err = 0;
177 }
178 write_unlock(&qdisc_mod_lock);
179 return err;
180}
Patrick McHardy62e3ba12008-01-22 22:10:23 -0800181EXPORT_SYMBOL(unregister_qdisc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700182
183/* We know handle. Find qdisc among all qdisc's attached to device
184 (root qdisc, all its children, children of children etc.)
185 */
186
David S. Miller8123b422008-08-08 23:23:39 -0700187struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
188{
189 struct Qdisc *q;
190
191 if (!(root->flags & TCQ_F_BUILTIN) &&
192 root->handle == handle)
193 return root;
194
195 list_for_each_entry(q, &root->list, list) {
196 if (q->handle == handle)
197 return q;
198 }
199 return NULL;
200}
201
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700202/*
203 * This lock is needed until some qdiscs stop calling qdisc_tree_decrease_qlen()
204 * without rtnl_lock(); currently hfsc_dequeue(), netem_dequeue(), tbf_dequeue()
205 */
206static DEFINE_SPINLOCK(qdisc_list_lock);
207
208static void qdisc_list_add(struct Qdisc *q)
209{
210 if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
211 spin_lock_bh(&qdisc_list_lock);
212 list_add_tail(&q->list, &qdisc_root_sleeping(q)->list);
213 spin_unlock_bh(&qdisc_list_lock);
214 }
215}
216
217void qdisc_list_del(struct Qdisc *q)
218{
219 if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
220 spin_lock_bh(&qdisc_list_lock);
221 list_del(&q->list);
222 spin_unlock_bh(&qdisc_list_lock);
223 }
224}
225EXPORT_SYMBOL(qdisc_list_del);
226
David S. Milleread81cc2008-07-17 00:50:32 -0700227struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
Patrick McHardy43effa12006-11-29 17:35:48 -0800228{
David S. Miller30723672008-07-18 22:50:15 -0700229 unsigned int i;
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700230 struct Qdisc *q;
231
232 spin_lock_bh(&qdisc_list_lock);
Patrick McHardy43effa12006-11-29 17:35:48 -0800233
David S. Miller30723672008-07-18 22:50:15 -0700234 for (i = 0; i < dev->num_tx_queues; i++) {
235 struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700236 struct Qdisc *txq_root = txq->qdisc_sleeping;
David S. Miller30723672008-07-18 22:50:15 -0700237
David S. Miller8123b422008-08-08 23:23:39 -0700238 q = qdisc_match_from_root(txq_root, handle);
239 if (q)
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700240 goto unlock;
Patrick McHardy43effa12006-11-29 17:35:48 -0800241 }
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700242
243 q = qdisc_match_from_root(dev->rx_queue.qdisc_sleeping, handle);
244
245unlock:
246 spin_unlock_bh(&qdisc_list_lock);
247
248 return q;
Patrick McHardy43effa12006-11-29 17:35:48 -0800249}
250
Linus Torvalds1da177e2005-04-16 15:20:36 -0700251static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
252{
253 unsigned long cl;
254 struct Qdisc *leaf;
Eric Dumazet20fea082007-11-14 01:44:41 -0800255 const struct Qdisc_class_ops *cops = p->ops->cl_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700256
257 if (cops == NULL)
258 return NULL;
259 cl = cops->get(p, classid);
260
261 if (cl == 0)
262 return NULL;
263 leaf = cops->leaf(p, cl);
264 cops->put(p, cl);
265 return leaf;
266}
267
268/* Find queueing discipline by name */
269
Patrick McHardy1e904742008-01-22 22:11:17 -0800270static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700271{
272 struct Qdisc_ops *q = NULL;
273
274 if (kind) {
275 read_lock(&qdisc_mod_lock);
276 for (q = qdisc_base; q; q = q->next) {
Patrick McHardy1e904742008-01-22 22:11:17 -0800277 if (nla_strcmp(kind, q->id) == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700278 if (!try_module_get(q->owner))
279 q = NULL;
280 break;
281 }
282 }
283 read_unlock(&qdisc_mod_lock);
284 }
285 return q;
286}
287
288static struct qdisc_rate_table *qdisc_rtab_list;
289
Patrick McHardy1e904742008-01-22 22:11:17 -0800290struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct nlattr *tab)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700291{
292 struct qdisc_rate_table *rtab;
293
294 for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) {
295 if (memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) == 0) {
296 rtab->refcnt++;
297 return rtab;
298 }
299 }
300
Patrick McHardy5feb5e12008-01-23 20:35:19 -0800301 if (tab == NULL || r->rate == 0 || r->cell_log == 0 ||
302 nla_len(tab) != TC_RTAB_SIZE)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700303 return NULL;
304
305 rtab = kmalloc(sizeof(*rtab), GFP_KERNEL);
306 if (rtab) {
307 rtab->rate = *r;
308 rtab->refcnt = 1;
Patrick McHardy1e904742008-01-22 22:11:17 -0800309 memcpy(rtab->data, nla_data(tab), 1024);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700310 rtab->next = qdisc_rtab_list;
311 qdisc_rtab_list = rtab;
312 }
313 return rtab;
314}
Patrick McHardy62e3ba12008-01-22 22:10:23 -0800315EXPORT_SYMBOL(qdisc_get_rtab);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700316
317void qdisc_put_rtab(struct qdisc_rate_table *tab)
318{
319 struct qdisc_rate_table *rtab, **rtabp;
320
321 if (!tab || --tab->refcnt)
322 return;
323
324 for (rtabp = &qdisc_rtab_list; (rtab=*rtabp) != NULL; rtabp = &rtab->next) {
325 if (rtab == tab) {
326 *rtabp = rtab->next;
327 kfree(rtab);
328 return;
329 }
330 }
331}
Patrick McHardy62e3ba12008-01-22 22:10:23 -0800332EXPORT_SYMBOL(qdisc_put_rtab);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700333
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700334static LIST_HEAD(qdisc_stab_list);
335static DEFINE_SPINLOCK(qdisc_stab_lock);
336
337static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = {
338 [TCA_STAB_BASE] = { .len = sizeof(struct tc_sizespec) },
339 [TCA_STAB_DATA] = { .type = NLA_BINARY },
340};
341
342static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt)
343{
344 struct nlattr *tb[TCA_STAB_MAX + 1];
345 struct qdisc_size_table *stab;
346 struct tc_sizespec *s;
347 unsigned int tsize = 0;
348 u16 *tab = NULL;
349 int err;
350
351 err = nla_parse_nested(tb, TCA_STAB_MAX, opt, stab_policy);
352 if (err < 0)
353 return ERR_PTR(err);
354 if (!tb[TCA_STAB_BASE])
355 return ERR_PTR(-EINVAL);
356
357 s = nla_data(tb[TCA_STAB_BASE]);
358
359 if (s->tsize > 0) {
360 if (!tb[TCA_STAB_DATA])
361 return ERR_PTR(-EINVAL);
362 tab = nla_data(tb[TCA_STAB_DATA]);
363 tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16);
364 }
365
366 if (!s || tsize != s->tsize || (!tab && tsize > 0))
367 return ERR_PTR(-EINVAL);
368
David S. Millerf3b96052008-08-18 22:33:05 -0700369 spin_lock(&qdisc_stab_lock);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700370
371 list_for_each_entry(stab, &qdisc_stab_list, list) {
372 if (memcmp(&stab->szopts, s, sizeof(*s)))
373 continue;
374 if (tsize > 0 && memcmp(stab->data, tab, tsize * sizeof(u16)))
375 continue;
376 stab->refcnt++;
David S. Millerf3b96052008-08-18 22:33:05 -0700377 spin_unlock(&qdisc_stab_lock);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700378 return stab;
379 }
380
David S. Millerf3b96052008-08-18 22:33:05 -0700381 spin_unlock(&qdisc_stab_lock);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700382
383 stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL);
384 if (!stab)
385 return ERR_PTR(-ENOMEM);
386
387 stab->refcnt = 1;
388 stab->szopts = *s;
389 if (tsize > 0)
390 memcpy(stab->data, tab, tsize * sizeof(u16));
391
David S. Millerf3b96052008-08-18 22:33:05 -0700392 spin_lock(&qdisc_stab_lock);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700393 list_add_tail(&stab->list, &qdisc_stab_list);
David S. Millerf3b96052008-08-18 22:33:05 -0700394 spin_unlock(&qdisc_stab_lock);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700395
396 return stab;
397}
398
399void qdisc_put_stab(struct qdisc_size_table *tab)
400{
401 if (!tab)
402 return;
403
David S. Millerf3b96052008-08-18 22:33:05 -0700404 spin_lock(&qdisc_stab_lock);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700405
406 if (--tab->refcnt == 0) {
407 list_del(&tab->list);
408 kfree(tab);
409 }
410
David S. Millerf3b96052008-08-18 22:33:05 -0700411 spin_unlock(&qdisc_stab_lock);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700412}
413EXPORT_SYMBOL(qdisc_put_stab);
414
415static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab)
416{
417 struct nlattr *nest;
418
419 nest = nla_nest_start(skb, TCA_STAB);
Patrick McHardy3aa46142008-11-20 04:07:14 -0800420 if (nest == NULL)
421 goto nla_put_failure;
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700422 NLA_PUT(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts);
423 nla_nest_end(skb, nest);
424
425 return skb->len;
426
427nla_put_failure:
428 return -1;
429}
430
431void qdisc_calculate_pkt_len(struct sk_buff *skb, struct qdisc_size_table *stab)
432{
433 int pkt_len, slot;
434
435 pkt_len = skb->len + stab->szopts.overhead;
436 if (unlikely(!stab->szopts.tsize))
437 goto out;
438
439 slot = pkt_len + stab->szopts.cell_align;
440 if (unlikely(slot < 0))
441 slot = 0;
442
443 slot >>= stab->szopts.cell_log;
444 if (likely(slot < stab->szopts.tsize))
445 pkt_len = stab->data[slot];
446 else
447 pkt_len = stab->data[stab->szopts.tsize - 1] *
448 (slot / stab->szopts.tsize) +
449 stab->data[slot % stab->szopts.tsize];
450
451 pkt_len <<= stab->szopts.size_log;
452out:
453 if (unlikely(pkt_len < 1))
454 pkt_len = 1;
455 qdisc_skb_cb(skb)->pkt_len = pkt_len;
456}
457EXPORT_SYMBOL(qdisc_calculate_pkt_len);
458
Patrick McHardy41794772007-03-16 01:19:15 -0700459static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
460{
461 struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
462 timer);
463
464 wd->qdisc->flags &= ~TCQ_F_THROTTLED;
Stephen Hemminger11274e52007-03-22 12:17:42 -0700465 smp_wmb();
David S. Miller8608db02008-08-18 20:51:18 -0700466 __netif_schedule(qdisc_root(wd->qdisc));
Stephen Hemminger19365022007-03-22 12:18:35 -0700467
Patrick McHardy41794772007-03-16 01:19:15 -0700468 return HRTIMER_NORESTART;
469}
470
471void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
472{
473 hrtimer_init(&wd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
474 wd->timer.function = qdisc_watchdog;
475 wd->qdisc = qdisc;
476}
477EXPORT_SYMBOL(qdisc_watchdog_init);
478
479void qdisc_watchdog_schedule(struct qdisc_watchdog *wd, psched_time_t expires)
480{
481 ktime_t time;
482
Jarek Poplawski2540e052008-08-21 05:11:14 -0700483 if (test_bit(__QDISC_STATE_DEACTIVATED,
484 &qdisc_root_sleeping(wd->qdisc)->state))
485 return;
486
Patrick McHardy41794772007-03-16 01:19:15 -0700487 wd->qdisc->flags |= TCQ_F_THROTTLED;
488 time = ktime_set(0, 0);
489 time = ktime_add_ns(time, PSCHED_US2NS(expires));
490 hrtimer_start(&wd->timer, time, HRTIMER_MODE_ABS);
491}
492EXPORT_SYMBOL(qdisc_watchdog_schedule);
493
494void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
495{
496 hrtimer_cancel(&wd->timer);
497 wd->qdisc->flags &= ~TCQ_F_THROTTLED;
498}
499EXPORT_SYMBOL(qdisc_watchdog_cancel);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700500
Adrian Bunka94f7792008-07-22 14:20:11 -0700501static struct hlist_head *qdisc_class_hash_alloc(unsigned int n)
Patrick McHardy6fe1c7a2008-07-05 23:21:31 -0700502{
503 unsigned int size = n * sizeof(struct hlist_head), i;
504 struct hlist_head *h;
505
506 if (size <= PAGE_SIZE)
507 h = kmalloc(size, GFP_KERNEL);
508 else
509 h = (struct hlist_head *)
510 __get_free_pages(GFP_KERNEL, get_order(size));
511
512 if (h != NULL) {
513 for (i = 0; i < n; i++)
514 INIT_HLIST_HEAD(&h[i]);
515 }
516 return h;
517}
518
519static void qdisc_class_hash_free(struct hlist_head *h, unsigned int n)
520{
521 unsigned int size = n * sizeof(struct hlist_head);
522
523 if (size <= PAGE_SIZE)
524 kfree(h);
525 else
526 free_pages((unsigned long)h, get_order(size));
527}
528
529void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash)
530{
531 struct Qdisc_class_common *cl;
532 struct hlist_node *n, *next;
533 struct hlist_head *nhash, *ohash;
534 unsigned int nsize, nmask, osize;
535 unsigned int i, h;
536
537 /* Rehash when load factor exceeds 0.75 */
538 if (clhash->hashelems * 4 <= clhash->hashsize * 3)
539 return;
540 nsize = clhash->hashsize * 2;
541 nmask = nsize - 1;
542 nhash = qdisc_class_hash_alloc(nsize);
543 if (nhash == NULL)
544 return;
545
546 ohash = clhash->hash;
547 osize = clhash->hashsize;
548
549 sch_tree_lock(sch);
550 for (i = 0; i < osize; i++) {
551 hlist_for_each_entry_safe(cl, n, next, &ohash[i], hnode) {
552 h = qdisc_class_hash(cl->classid, nmask);
553 hlist_add_head(&cl->hnode, &nhash[h]);
554 }
555 }
556 clhash->hash = nhash;
557 clhash->hashsize = nsize;
558 clhash->hashmask = nmask;
559 sch_tree_unlock(sch);
560
561 qdisc_class_hash_free(ohash, osize);
562}
563EXPORT_SYMBOL(qdisc_class_hash_grow);
564
565int qdisc_class_hash_init(struct Qdisc_class_hash *clhash)
566{
567 unsigned int size = 4;
568
569 clhash->hash = qdisc_class_hash_alloc(size);
570 if (clhash->hash == NULL)
571 return -ENOMEM;
572 clhash->hashsize = size;
573 clhash->hashmask = size - 1;
574 clhash->hashelems = 0;
575 return 0;
576}
577EXPORT_SYMBOL(qdisc_class_hash_init);
578
579void qdisc_class_hash_destroy(struct Qdisc_class_hash *clhash)
580{
581 qdisc_class_hash_free(clhash->hash, clhash->hashsize);
582}
583EXPORT_SYMBOL(qdisc_class_hash_destroy);
584
585void qdisc_class_hash_insert(struct Qdisc_class_hash *clhash,
586 struct Qdisc_class_common *cl)
587{
588 unsigned int h;
589
590 INIT_HLIST_NODE(&cl->hnode);
591 h = qdisc_class_hash(cl->classid, clhash->hashmask);
592 hlist_add_head(&cl->hnode, &clhash->hash[h]);
593 clhash->hashelems++;
594}
595EXPORT_SYMBOL(qdisc_class_hash_insert);
596
597void qdisc_class_hash_remove(struct Qdisc_class_hash *clhash,
598 struct Qdisc_class_common *cl)
599{
600 hlist_del(&cl->hnode);
601 clhash->hashelems--;
602}
603EXPORT_SYMBOL(qdisc_class_hash_remove);
604
Linus Torvalds1da177e2005-04-16 15:20:36 -0700605/* Allocate an unique handle from space managed by kernel */
606
607static u32 qdisc_alloc_handle(struct net_device *dev)
608{
609 int i = 0x10000;
610 static u32 autohandle = TC_H_MAKE(0x80000000U, 0);
611
612 do {
613 autohandle += TC_H_MAKE(0x10000U, 0);
614 if (autohandle == TC_H_MAKE(TC_H_ROOT, 0))
615 autohandle = TC_H_MAKE(0x80000000U, 0);
616 } while (qdisc_lookup(dev, autohandle) && --i > 0);
617
618 return i>0 ? autohandle : 0;
619}
620
David S. Miller99194cf2008-07-17 04:54:10 -0700621/* Attach toplevel qdisc to device queue. */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700622
David S. Miller99194cf2008-07-17 04:54:10 -0700623static struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue,
624 struct Qdisc *qdisc)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700625{
David S. Miller8d50b532008-07-30 02:37:46 -0700626 struct Qdisc *oqdisc = dev_queue->qdisc_sleeping;
David S. Miller53049972008-07-16 03:00:19 -0700627 spinlock_t *root_lock;
David S. Miller53049972008-07-16 03:00:19 -0700628
Jarek Poplawski666d9bb2008-08-27 02:12:52 -0700629 root_lock = qdisc_lock(oqdisc);
David S. Miller53049972008-07-16 03:00:19 -0700630 spin_lock_bh(root_lock);
631
David S. Miller8d50b532008-07-30 02:37:46 -0700632 /* Prune old scheduler */
633 if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1)
634 qdisc_reset(oqdisc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700635
David S. Miller8d50b532008-07-30 02:37:46 -0700636 /* ... and graft new one */
637 if (qdisc == NULL)
638 qdisc = &noop_qdisc;
639 dev_queue->qdisc_sleeping = qdisc;
Jarek Poplawskif7a54c12008-08-27 02:22:07 -0700640 rcu_assign_pointer(dev_queue->qdisc, &noop_qdisc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700641
David S. Miller53049972008-07-16 03:00:19 -0700642 spin_unlock_bh(root_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700643
Linus Torvalds1da177e2005-04-16 15:20:36 -0700644 return oqdisc;
645}
646
Patrick McHardy43effa12006-11-29 17:35:48 -0800647void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
648{
Eric Dumazet20fea082007-11-14 01:44:41 -0800649 const struct Qdisc_class_ops *cops;
Patrick McHardy43effa12006-11-29 17:35:48 -0800650 unsigned long cl;
651 u32 parentid;
652
653 if (n == 0)
654 return;
655 while ((parentid = sch->parent)) {
Jarek Poplawski066a3b52008-04-14 15:10:42 -0700656 if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS))
657 return;
658
David S. Miller5ce2d482008-07-08 17:06:30 -0700659 sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
Patrick McHardyffc8fef2007-07-30 17:11:50 -0700660 if (sch == NULL) {
661 WARN_ON(parentid != TC_H_ROOT);
662 return;
663 }
Patrick McHardy43effa12006-11-29 17:35:48 -0800664 cops = sch->ops->cl_ops;
665 if (cops->qlen_notify) {
666 cl = cops->get(sch, parentid);
667 cops->qlen_notify(sch, cl);
668 cops->put(sch, cl);
669 }
670 sch->q.qlen -= n;
671 }
672}
673EXPORT_SYMBOL(qdisc_tree_decrease_qlen);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700674
David S. Miller99194cf2008-07-17 04:54:10 -0700675static void notify_and_destroy(struct sk_buff *skb, struct nlmsghdr *n, u32 clid,
676 struct Qdisc *old, struct Qdisc *new)
677{
678 if (new || old)
679 qdisc_notify(skb, n, clid, old, new);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700680
David S. Miller4d8863a2008-08-18 21:03:15 -0700681 if (old)
David S. Miller99194cf2008-07-17 04:54:10 -0700682 qdisc_destroy(old);
David S. Miller99194cf2008-07-17 04:54:10 -0700683}
684
685/* Graft qdisc "new" to class "classid" of qdisc "parent" or
686 * to device "dev".
687 *
688 * When appropriate send a netlink notification using 'skb'
689 * and "n".
690 *
691 * On success, destroy old qdisc.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700692 */
693
694static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
David S. Miller99194cf2008-07-17 04:54:10 -0700695 struct sk_buff *skb, struct nlmsghdr *n, u32 classid,
696 struct Qdisc *new, struct Qdisc *old)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700697{
David S. Miller99194cf2008-07-17 04:54:10 -0700698 struct Qdisc *q = old;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700699 int err = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700700
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900701 if (parent == NULL) {
David S. Miller99194cf2008-07-17 04:54:10 -0700702 unsigned int i, num_q, ingress;
703
704 ingress = 0;
705 num_q = dev->num_tx_queues;
David S. Miller8d50b532008-07-30 02:37:46 -0700706 if ((q && q->flags & TCQ_F_INGRESS) ||
707 (new && new->flags & TCQ_F_INGRESS)) {
David S. Miller99194cf2008-07-17 04:54:10 -0700708 num_q = 1;
709 ingress = 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700710 }
David S. Miller99194cf2008-07-17 04:54:10 -0700711
712 if (dev->flags & IFF_UP)
713 dev_deactivate(dev);
714
715 for (i = 0; i < num_q; i++) {
716 struct netdev_queue *dev_queue = &dev->rx_queue;
717
718 if (!ingress)
719 dev_queue = netdev_get_tx_queue(dev, i);
720
David S. Miller8d50b532008-07-30 02:37:46 -0700721 old = dev_graft_qdisc(dev_queue, new);
722 if (new && i > 0)
723 atomic_inc(&new->refcnt);
724
David S. Miller99194cf2008-07-17 04:54:10 -0700725 notify_and_destroy(skb, n, classid, old, new);
726 }
727
728 if (dev->flags & IFF_UP)
729 dev_activate(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700730 } else {
Eric Dumazet20fea082007-11-14 01:44:41 -0800731 const struct Qdisc_class_ops *cops = parent->ops->cl_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700732
733 err = -EINVAL;
734
735 if (cops) {
736 unsigned long cl = cops->get(parent, classid);
737 if (cl) {
David S. Miller99194cf2008-07-17 04:54:10 -0700738 err = cops->graft(parent, cl, new, &old);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700739 cops->put(parent, cl);
740 }
741 }
David S. Miller99194cf2008-07-17 04:54:10 -0700742 if (!err)
743 notify_and_destroy(skb, n, classid, old, new);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700744 }
745 return err;
746}
747
Jarek Poplawski25bfcd52008-08-18 20:53:34 -0700748/* lockdep annotation is needed for ingress; egress gets it only for name */
749static struct lock_class_key qdisc_tx_lock;
750static struct lock_class_key qdisc_rx_lock;
751
Linus Torvalds1da177e2005-04-16 15:20:36 -0700752/*
753 Allocate and initialize new qdisc.
754
755 Parameters are passed via opt.
756 */
757
758static struct Qdisc *
David S. Millerbb949fb2008-07-08 16:55:56 -0700759qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
760 u32 parent, u32 handle, struct nlattr **tca, int *errp)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700761{
762 int err;
Patrick McHardy1e904742008-01-22 22:11:17 -0800763 struct nlattr *kind = tca[TCA_KIND];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700764 struct Qdisc *sch;
765 struct Qdisc_ops *ops;
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700766 struct qdisc_size_table *stab;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700767
768 ops = qdisc_lookup_ops(kind);
Johannes Berg95a5afc2008-10-16 15:24:51 -0700769#ifdef CONFIG_MODULES
Linus Torvalds1da177e2005-04-16 15:20:36 -0700770 if (ops == NULL && kind != NULL) {
771 char name[IFNAMSIZ];
Patrick McHardy1e904742008-01-22 22:11:17 -0800772 if (nla_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700773 /* We dropped the RTNL semaphore in order to
774 * perform the module load. So, even if we
775 * succeeded in loading the module we have to
776 * tell the caller to replay the request. We
777 * indicate this using -EAGAIN.
778 * We replay the request because the device may
779 * go away in the mean time.
780 */
781 rtnl_unlock();
782 request_module("sch_%s", name);
783 rtnl_lock();
784 ops = qdisc_lookup_ops(kind);
785 if (ops != NULL) {
786 /* We will try again qdisc_lookup_ops,
787 * so don't keep a reference.
788 */
789 module_put(ops->owner);
790 err = -EAGAIN;
791 goto err_out;
792 }
793 }
794 }
795#endif
796
Jamal Hadi Salimb9e2cc02006-08-03 16:36:51 -0700797 err = -ENOENT;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700798 if (ops == NULL)
799 goto err_out;
800
David S. Miller5ce2d482008-07-08 17:06:30 -0700801 sch = qdisc_alloc(dev_queue, ops);
Thomas Graf3d54b822005-07-05 14:15:09 -0700802 if (IS_ERR(sch)) {
803 err = PTR_ERR(sch);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700804 goto err_out2;
Thomas Graf3d54b822005-07-05 14:15:09 -0700805 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700806
Patrick McHardyffc8fef2007-07-30 17:11:50 -0700807 sch->parent = parent;
808
Thomas Graf3d54b822005-07-05 14:15:09 -0700809 if (handle == TC_H_INGRESS) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700810 sch->flags |= TCQ_F_INGRESS;
Thomas Graf3d54b822005-07-05 14:15:09 -0700811 handle = TC_H_MAKE(TC_H_INGRESS, 0);
Jarek Poplawski25bfcd52008-08-18 20:53:34 -0700812 lockdep_set_class(qdisc_lock(sch), &qdisc_rx_lock);
Patrick McHardyfd44de72007-04-16 17:07:08 -0700813 } else {
Patrick McHardyfd44de72007-04-16 17:07:08 -0700814 if (handle == 0) {
815 handle = qdisc_alloc_handle(dev);
816 err = -ENOMEM;
817 if (handle == 0)
818 goto err_out3;
819 }
Jarek Poplawski25bfcd52008-08-18 20:53:34 -0700820 lockdep_set_class(qdisc_lock(sch), &qdisc_tx_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700821 }
822
Thomas Graf3d54b822005-07-05 14:15:09 -0700823 sch->handle = handle;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700824
Patrick McHardy1e904742008-01-22 22:11:17 -0800825 if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS])) == 0) {
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700826 if (tca[TCA_STAB]) {
827 stab = qdisc_get_stab(tca[TCA_STAB]);
828 if (IS_ERR(stab)) {
829 err = PTR_ERR(stab);
830 goto err_out3;
831 }
832 sch->stab = stab;
833 }
Patrick McHardy1e904742008-01-22 22:11:17 -0800834 if (tca[TCA_RATE]) {
Jarek Poplawskif6f9b932008-08-27 02:25:17 -0700835 spinlock_t *root_lock;
836
837 if ((sch->parent != TC_H_ROOT) &&
838 !(sch->flags & TCQ_F_INGRESS))
839 root_lock = qdisc_root_sleeping_lock(sch);
840 else
841 root_lock = qdisc_lock(sch);
842
Thomas Graf023e09a2005-07-05 14:15:53 -0700843 err = gen_new_estimator(&sch->bstats, &sch->rate_est,
Jarek Poplawskif6f9b932008-08-27 02:25:17 -0700844 root_lock, tca[TCA_RATE]);
Thomas Graf023e09a2005-07-05 14:15:53 -0700845 if (err) {
846 /*
847 * Any broken qdiscs that would require
848 * a ops->reset() here? The qdisc was never
849 * in action so it shouldn't be necessary.
850 */
851 if (ops->destroy)
852 ops->destroy(sch);
853 goto err_out3;
854 }
855 }
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700856
857 qdisc_list_add(sch);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700858
Linus Torvalds1da177e2005-04-16 15:20:36 -0700859 return sch;
860 }
861err_out3:
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700862 qdisc_put_stab(sch->stab);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700863 dev_put(dev);
Thomas Graf3d54b822005-07-05 14:15:09 -0700864 kfree((char *) sch - sch->padded);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700865err_out2:
866 module_put(ops->owner);
867err_out:
868 *errp = err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700869 return NULL;
870}
871
Patrick McHardy1e904742008-01-22 22:11:17 -0800872static int qdisc_change(struct Qdisc *sch, struct nlattr **tca)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700873{
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700874 struct qdisc_size_table *stab = NULL;
875 int err = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700876
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700877 if (tca[TCA_OPTIONS]) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700878 if (sch->ops->change == NULL)
879 return -EINVAL;
Patrick McHardy1e904742008-01-22 22:11:17 -0800880 err = sch->ops->change(sch, tca[TCA_OPTIONS]);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700881 if (err)
882 return err;
883 }
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700884
885 if (tca[TCA_STAB]) {
886 stab = qdisc_get_stab(tca[TCA_STAB]);
887 if (IS_ERR(stab))
888 return PTR_ERR(stab);
889 }
890
891 qdisc_put_stab(sch->stab);
892 sch->stab = stab;
893
Patrick McHardy1e904742008-01-22 22:11:17 -0800894 if (tca[TCA_RATE])
Linus Torvalds1da177e2005-04-16 15:20:36 -0700895 gen_replace_estimator(&sch->bstats, &sch->rate_est,
Jarek Poplawskif6f9b932008-08-27 02:25:17 -0700896 qdisc_root_sleeping_lock(sch),
897 tca[TCA_RATE]);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700898 return 0;
899}
900
901struct check_loop_arg
902{
903 struct qdisc_walker w;
904 struct Qdisc *p;
905 int depth;
906};
907
908static int check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w);
909
910static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth)
911{
912 struct check_loop_arg arg;
913
914 if (q->ops->cl_ops == NULL)
915 return 0;
916
917 arg.w.stop = arg.w.skip = arg.w.count = 0;
918 arg.w.fn = check_loop_fn;
919 arg.depth = depth;
920 arg.p = p;
921 q->ops->cl_ops->walk(q, &arg.w);
922 return arg.w.stop ? -ELOOP : 0;
923}
924
925static int
926check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
927{
928 struct Qdisc *leaf;
Eric Dumazet20fea082007-11-14 01:44:41 -0800929 const struct Qdisc_class_ops *cops = q->ops->cl_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700930 struct check_loop_arg *arg = (struct check_loop_arg *)w;
931
932 leaf = cops->leaf(q, cl);
933 if (leaf) {
934 if (leaf == arg->p || arg->depth > 7)
935 return -ELOOP;
936 return check_loop(leaf, arg->p, arg->depth + 1);
937 }
938 return 0;
939}
940
941/*
942 * Delete/get qdisc.
943 */
944
945static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
946{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +0900947 struct net *net = sock_net(skb->sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700948 struct tcmsg *tcm = NLMSG_DATA(n);
Patrick McHardy1e904742008-01-22 22:11:17 -0800949 struct nlattr *tca[TCA_MAX + 1];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700950 struct net_device *dev;
951 u32 clid = tcm->tcm_parent;
952 struct Qdisc *q = NULL;
953 struct Qdisc *p = NULL;
954 int err;
955
Denis V. Lunevb8542722007-12-01 00:21:31 +1100956 if (net != &init_net)
957 return -EINVAL;
958
Eric W. Biederman881d9662007-09-17 11:56:21 -0700959 if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700960 return -ENODEV;
961
Patrick McHardy1e904742008-01-22 22:11:17 -0800962 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
963 if (err < 0)
964 return err;
965
Linus Torvalds1da177e2005-04-16 15:20:36 -0700966 if (clid) {
967 if (clid != TC_H_ROOT) {
968 if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
969 if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL)
970 return -ENOENT;
971 q = qdisc_leaf(p, clid);
972 } else { /* ingress */
David S. Miller8123b422008-08-08 23:23:39 -0700973 q = dev->rx_queue.qdisc_sleeping;
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900974 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700975 } else {
David S. Millere8a04642008-07-17 00:34:19 -0700976 struct netdev_queue *dev_queue;
977 dev_queue = netdev_get_tx_queue(dev, 0);
David S. Millerb0e1e642008-07-08 17:42:10 -0700978 q = dev_queue->qdisc_sleeping;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700979 }
980 if (!q)
981 return -ENOENT;
982
983 if (tcm->tcm_handle && q->handle != tcm->tcm_handle)
984 return -EINVAL;
985 } else {
986 if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL)
987 return -ENOENT;
988 }
989
Patrick McHardy1e904742008-01-22 22:11:17 -0800990 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700991 return -EINVAL;
992
993 if (n->nlmsg_type == RTM_DELQDISC) {
994 if (!clid)
995 return -EINVAL;
996 if (q->handle == 0)
997 return -ENOENT;
David S. Miller99194cf2008-07-17 04:54:10 -0700998 if ((err = qdisc_graft(dev, p, skb, n, clid, NULL, q)) != 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700999 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001000 } else {
1001 qdisc_notify(skb, n, clid, NULL, q);
1002 }
1003 return 0;
1004}
1005
1006/*
1007 Create/change qdisc.
1008 */
1009
1010static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
1011{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001012 struct net *net = sock_net(skb->sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001013 struct tcmsg *tcm;
Patrick McHardy1e904742008-01-22 22:11:17 -08001014 struct nlattr *tca[TCA_MAX + 1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001015 struct net_device *dev;
1016 u32 clid;
1017 struct Qdisc *q, *p;
1018 int err;
1019
Denis V. Lunevb8542722007-12-01 00:21:31 +11001020 if (net != &init_net)
1021 return -EINVAL;
1022
Linus Torvalds1da177e2005-04-16 15:20:36 -07001023replay:
1024 /* Reinit, just in case something touches this. */
1025 tcm = NLMSG_DATA(n);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001026 clid = tcm->tcm_parent;
1027 q = p = NULL;
1028
Eric W. Biederman881d9662007-09-17 11:56:21 -07001029 if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001030 return -ENODEV;
1031
Patrick McHardy1e904742008-01-22 22:11:17 -08001032 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
1033 if (err < 0)
1034 return err;
1035
Linus Torvalds1da177e2005-04-16 15:20:36 -07001036 if (clid) {
1037 if (clid != TC_H_ROOT) {
1038 if (clid != TC_H_INGRESS) {
1039 if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL)
1040 return -ENOENT;
1041 q = qdisc_leaf(p, clid);
1042 } else { /*ingress */
David S. Miller8123b422008-08-08 23:23:39 -07001043 q = dev->rx_queue.qdisc_sleeping;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001044 }
1045 } else {
David S. Millere8a04642008-07-17 00:34:19 -07001046 struct netdev_queue *dev_queue;
1047 dev_queue = netdev_get_tx_queue(dev, 0);
David S. Millerb0e1e642008-07-08 17:42:10 -07001048 q = dev_queue->qdisc_sleeping;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001049 }
1050
1051 /* It may be default qdisc, ignore it */
1052 if (q && q->handle == 0)
1053 q = NULL;
1054
1055 if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
1056 if (tcm->tcm_handle) {
1057 if (q && !(n->nlmsg_flags&NLM_F_REPLACE))
1058 return -EEXIST;
1059 if (TC_H_MIN(tcm->tcm_handle))
1060 return -EINVAL;
1061 if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL)
1062 goto create_n_graft;
1063 if (n->nlmsg_flags&NLM_F_EXCL)
1064 return -EEXIST;
Patrick McHardy1e904742008-01-22 22:11:17 -08001065 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001066 return -EINVAL;
1067 if (q == p ||
1068 (p && check_loop(q, p, 0)))
1069 return -ELOOP;
1070 atomic_inc(&q->refcnt);
1071 goto graft;
1072 } else {
1073 if (q == NULL)
1074 goto create_n_graft;
1075
1076 /* This magic test requires explanation.
1077 *
1078 * We know, that some child q is already
1079 * attached to this parent and have choice:
1080 * either to change it or to create/graft new one.
1081 *
1082 * 1. We are allowed to create/graft only
1083 * if CREATE and REPLACE flags are set.
1084 *
1085 * 2. If EXCL is set, requestor wanted to say,
1086 * that qdisc tcm_handle is not expected
1087 * to exist, so that we choose create/graft too.
1088 *
1089 * 3. The last case is when no flags are set.
1090 * Alas, it is sort of hole in API, we
1091 * cannot decide what to do unambiguously.
1092 * For now we select create/graft, if
1093 * user gave KIND, which does not match existing.
1094 */
1095 if ((n->nlmsg_flags&NLM_F_CREATE) &&
1096 (n->nlmsg_flags&NLM_F_REPLACE) &&
1097 ((n->nlmsg_flags&NLM_F_EXCL) ||
Patrick McHardy1e904742008-01-22 22:11:17 -08001098 (tca[TCA_KIND] &&
1099 nla_strcmp(tca[TCA_KIND], q->ops->id))))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001100 goto create_n_graft;
1101 }
1102 }
1103 } else {
1104 if (!tcm->tcm_handle)
1105 return -EINVAL;
1106 q = qdisc_lookup(dev, tcm->tcm_handle);
1107 }
1108
1109 /* Change qdisc parameters */
1110 if (q == NULL)
1111 return -ENOENT;
1112 if (n->nlmsg_flags&NLM_F_EXCL)
1113 return -EEXIST;
Patrick McHardy1e904742008-01-22 22:11:17 -08001114 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001115 return -EINVAL;
1116 err = qdisc_change(q, tca);
1117 if (err == 0)
1118 qdisc_notify(skb, n, clid, NULL, q);
1119 return err;
1120
1121create_n_graft:
1122 if (!(n->nlmsg_flags&NLM_F_CREATE))
1123 return -ENOENT;
1124 if (clid == TC_H_INGRESS)
David S. Millerbb949fb2008-07-08 16:55:56 -07001125 q = qdisc_create(dev, &dev->rx_queue,
1126 tcm->tcm_parent, tcm->tcm_parent,
Patrick McHardyffc8fef2007-07-30 17:11:50 -07001127 tca, &err);
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09001128 else
David S. Millere8a04642008-07-17 00:34:19 -07001129 q = qdisc_create(dev, netdev_get_tx_queue(dev, 0),
David S. Millerbb949fb2008-07-08 16:55:56 -07001130 tcm->tcm_parent, tcm->tcm_handle,
Patrick McHardyffc8fef2007-07-30 17:11:50 -07001131 tca, &err);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001132 if (q == NULL) {
1133 if (err == -EAGAIN)
1134 goto replay;
1135 return err;
1136 }
1137
1138graft:
Ilpo Järvinene5befbd2008-08-18 22:30:01 -07001139 err = qdisc_graft(dev, p, skb, n, clid, q, NULL);
1140 if (err) {
1141 if (q)
1142 qdisc_destroy(q);
1143 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001144 }
Ilpo Järvinene5befbd2008-08-18 22:30:01 -07001145
Linus Torvalds1da177e2005-04-16 15:20:36 -07001146 return 0;
1147}
1148
1149static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
Jamal Hadi Salime431b8c2005-06-18 22:55:31 -07001150 u32 pid, u32 seq, u16 flags, int event)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001151{
1152 struct tcmsg *tcm;
1153 struct nlmsghdr *nlh;
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001154 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001155 struct gnet_dump d;
1156
Jamal Hadi Salime431b8c2005-06-18 22:55:31 -07001157 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001158 tcm = NLMSG_DATA(nlh);
1159 tcm->tcm_family = AF_UNSPEC;
Patrick McHardy9ef1d4c2005-06-28 12:55:30 -07001160 tcm->tcm__pad1 = 0;
1161 tcm->tcm__pad2 = 0;
David S. Miller5ce2d482008-07-08 17:06:30 -07001162 tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001163 tcm->tcm_parent = clid;
1164 tcm->tcm_handle = q->handle;
1165 tcm->tcm_info = atomic_read(&q->refcnt);
Patrick McHardy57e1c482008-01-23 20:34:28 -08001166 NLA_PUT_STRING(skb, TCA_KIND, q->ops->id);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001167 if (q->ops->dump && q->ops->dump(q, skb) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001168 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001169 q->qstats.qlen = q->q.qlen;
1170
Jussi Kivilinna175f9c12008-07-20 00:08:47 -07001171 if (q->stab && qdisc_dump_stab(skb, q->stab) < 0)
1172 goto nla_put_failure;
1173
Jarek Poplawski102396a2008-08-29 14:21:52 -07001174 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
1175 qdisc_root_sleeping_lock(q), &d) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001176 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001177
1178 if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001179 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001180
1181 if (gnet_stats_copy_basic(&d, &q->bstats) < 0 ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07001182 gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07001183 gnet_stats_copy_queue(&d, &q->qstats) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001184 goto nla_put_failure;
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09001185
Linus Torvalds1da177e2005-04-16 15:20:36 -07001186 if (gnet_stats_finish_copy(&d) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001187 goto nla_put_failure;
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09001188
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001189 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001190 return skb->len;
1191
1192nlmsg_failure:
Patrick McHardy1e904742008-01-22 22:11:17 -08001193nla_put_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001194 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001195 return -1;
1196}
1197
1198static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n,
1199 u32 clid, struct Qdisc *old, struct Qdisc *new)
1200{
1201 struct sk_buff *skb;
1202 u32 pid = oskb ? NETLINK_CB(oskb).pid : 0;
1203
1204 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1205 if (!skb)
1206 return -ENOBUFS;
1207
1208 if (old && old->handle) {
1209 if (tc_fill_qdisc(skb, old, clid, pid, n->nlmsg_seq, 0, RTM_DELQDISC) < 0)
1210 goto err_out;
1211 }
1212 if (new) {
1213 if (tc_fill_qdisc(skb, new, clid, pid, n->nlmsg_seq, old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
1214 goto err_out;
1215 }
1216
1217 if (skb->len)
Denis V. Lunev97c53ca2007-11-19 22:26:51 -08001218 return rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001219
1220err_out:
1221 kfree_skb(skb);
1222 return -EINVAL;
1223}
1224
David S. Miller30723672008-07-18 22:50:15 -07001225static bool tc_qdisc_dump_ignore(struct Qdisc *q)
1226{
1227 return (q->flags & TCQ_F_BUILTIN) ? true : false;
1228}
1229
1230static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
1231 struct netlink_callback *cb,
1232 int *q_idx_p, int s_q_idx)
1233{
1234 int ret = 0, q_idx = *q_idx_p;
1235 struct Qdisc *q;
1236
1237 if (!root)
1238 return 0;
1239
1240 q = root;
1241 if (q_idx < s_q_idx) {
1242 q_idx++;
1243 } else {
1244 if (!tc_qdisc_dump_ignore(q) &&
1245 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid,
1246 cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
1247 goto done;
1248 q_idx++;
1249 }
1250 list_for_each_entry(q, &root->list, list) {
1251 if (q_idx < s_q_idx) {
1252 q_idx++;
1253 continue;
1254 }
1255 if (!tc_qdisc_dump_ignore(q) &&
1256 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid,
1257 cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
1258 goto done;
1259 q_idx++;
1260 }
1261
1262out:
1263 *q_idx_p = q_idx;
1264 return ret;
1265done:
1266 ret = -1;
1267 goto out;
1268}
1269
Linus Torvalds1da177e2005-04-16 15:20:36 -07001270static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
1271{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001272 struct net *net = sock_net(skb->sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001273 int idx, q_idx;
1274 int s_idx, s_q_idx;
1275 struct net_device *dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001276
Denis V. Lunevb8542722007-12-01 00:21:31 +11001277 if (net != &init_net)
1278 return 0;
1279
Linus Torvalds1da177e2005-04-16 15:20:36 -07001280 s_idx = cb->args[0];
1281 s_q_idx = q_idx = cb->args[1];
1282 read_lock(&dev_base_lock);
Pavel Emelianov7562f872007-05-03 15:13:45 -07001283 idx = 0;
Eric W. Biederman881d9662007-09-17 11:56:21 -07001284 for_each_netdev(&init_net, dev) {
David S. Miller30723672008-07-18 22:50:15 -07001285 struct netdev_queue *dev_queue;
1286
Linus Torvalds1da177e2005-04-16 15:20:36 -07001287 if (idx < s_idx)
Pavel Emelianov7562f872007-05-03 15:13:45 -07001288 goto cont;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001289 if (idx > s_idx)
1290 s_q_idx = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001291 q_idx = 0;
David S. Miller30723672008-07-18 22:50:15 -07001292
1293 dev_queue = netdev_get_tx_queue(dev, 0);
David S. Miller827ebd62008-08-07 20:26:40 -07001294 if (tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb, &q_idx, s_q_idx) < 0)
David S. Miller30723672008-07-18 22:50:15 -07001295 goto done;
1296
1297 dev_queue = &dev->rx_queue;
David S. Miller827ebd62008-08-07 20:26:40 -07001298 if (tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb, &q_idx, s_q_idx) < 0)
David S. Miller30723672008-07-18 22:50:15 -07001299 goto done;
1300
Pavel Emelianov7562f872007-05-03 15:13:45 -07001301cont:
1302 idx++;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001303 }
1304
1305done:
1306 read_unlock(&dev_base_lock);
1307
1308 cb->args[0] = idx;
1309 cb->args[1] = q_idx;
1310
1311 return skb->len;
1312}
1313
1314
1315
1316/************************************************
1317 * Traffic classes manipulation. *
1318 ************************************************/
1319
1320
1321
1322static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
1323{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001324 struct net *net = sock_net(skb->sk);
David S. Millerb0e1e642008-07-08 17:42:10 -07001325 struct netdev_queue *dev_queue;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001326 struct tcmsg *tcm = NLMSG_DATA(n);
Patrick McHardy1e904742008-01-22 22:11:17 -08001327 struct nlattr *tca[TCA_MAX + 1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001328 struct net_device *dev;
1329 struct Qdisc *q = NULL;
Eric Dumazet20fea082007-11-14 01:44:41 -08001330 const struct Qdisc_class_ops *cops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001331 unsigned long cl = 0;
1332 unsigned long new_cl;
1333 u32 pid = tcm->tcm_parent;
1334 u32 clid = tcm->tcm_handle;
1335 u32 qid = TC_H_MAJ(clid);
1336 int err;
1337
Denis V. Lunevb8542722007-12-01 00:21:31 +11001338 if (net != &init_net)
1339 return -EINVAL;
1340
Eric W. Biederman881d9662007-09-17 11:56:21 -07001341 if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001342 return -ENODEV;
1343
Patrick McHardy1e904742008-01-22 22:11:17 -08001344 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
1345 if (err < 0)
1346 return err;
1347
Linus Torvalds1da177e2005-04-16 15:20:36 -07001348 /*
1349 parent == TC_H_UNSPEC - unspecified parent.
1350 parent == TC_H_ROOT - class is root, which has no parent.
1351 parent == X:0 - parent is root class.
1352 parent == X:Y - parent is a node in hierarchy.
1353 parent == 0:Y - parent is X:Y, where X:0 is qdisc.
1354
1355 handle == 0:0 - generate handle from kernel pool.
1356 handle == 0:Y - class is X:Y, where X:0 is qdisc.
1357 handle == X:Y - clear.
1358 handle == X:0 - root class.
1359 */
1360
1361 /* Step 1. Determine qdisc handle X:0 */
1362
David S. Millere8a04642008-07-17 00:34:19 -07001363 dev_queue = netdev_get_tx_queue(dev, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001364 if (pid != TC_H_ROOT) {
1365 u32 qid1 = TC_H_MAJ(pid);
1366
1367 if (qid && qid1) {
1368 /* If both majors are known, they must be identical. */
1369 if (qid != qid1)
1370 return -EINVAL;
1371 } else if (qid1) {
1372 qid = qid1;
1373 } else if (qid == 0)
David S. Millerb0e1e642008-07-08 17:42:10 -07001374 qid = dev_queue->qdisc_sleeping->handle;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001375
1376 /* Now qid is genuine qdisc handle consistent
1377 both with parent and child.
1378
1379 TC_H_MAJ(pid) still may be unspecified, complete it now.
1380 */
1381 if (pid)
1382 pid = TC_H_MAKE(qid, pid);
1383 } else {
1384 if (qid == 0)
David S. Millerb0e1e642008-07-08 17:42:10 -07001385 qid = dev_queue->qdisc_sleeping->handle;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001386 }
1387
1388 /* OK. Locate qdisc */
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09001389 if ((q = qdisc_lookup(dev, qid)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001390 return -ENOENT;
1391
1392 /* An check that it supports classes */
1393 cops = q->ops->cl_ops;
1394 if (cops == NULL)
1395 return -EINVAL;
1396
1397 /* Now try to get class */
1398 if (clid == 0) {
1399 if (pid == TC_H_ROOT)
1400 clid = qid;
1401 } else
1402 clid = TC_H_MAKE(qid, clid);
1403
1404 if (clid)
1405 cl = cops->get(q, clid);
1406
1407 if (cl == 0) {
1408 err = -ENOENT;
1409 if (n->nlmsg_type != RTM_NEWTCLASS || !(n->nlmsg_flags&NLM_F_CREATE))
1410 goto out;
1411 } else {
1412 switch (n->nlmsg_type) {
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09001413 case RTM_NEWTCLASS:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001414 err = -EEXIST;
1415 if (n->nlmsg_flags&NLM_F_EXCL)
1416 goto out;
1417 break;
1418 case RTM_DELTCLASS:
1419 err = cops->delete(q, cl);
1420 if (err == 0)
1421 tclass_notify(skb, n, q, cl, RTM_DELTCLASS);
1422 goto out;
1423 case RTM_GETTCLASS:
1424 err = tclass_notify(skb, n, q, cl, RTM_NEWTCLASS);
1425 goto out;
1426 default:
1427 err = -EINVAL;
1428 goto out;
1429 }
1430 }
1431
1432 new_cl = cl;
1433 err = cops->change(q, clid, pid, tca, &new_cl);
1434 if (err == 0)
1435 tclass_notify(skb, n, q, new_cl, RTM_NEWTCLASS);
1436
1437out:
1438 if (cl)
1439 cops->put(q, cl);
1440
1441 return err;
1442}
1443
1444
1445static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
1446 unsigned long cl,
Jamal Hadi Salime431b8c2005-06-18 22:55:31 -07001447 u32 pid, u32 seq, u16 flags, int event)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001448{
1449 struct tcmsg *tcm;
1450 struct nlmsghdr *nlh;
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001451 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001452 struct gnet_dump d;
Eric Dumazet20fea082007-11-14 01:44:41 -08001453 const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001454
Jamal Hadi Salime431b8c2005-06-18 22:55:31 -07001455 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001456 tcm = NLMSG_DATA(nlh);
1457 tcm->tcm_family = AF_UNSPEC;
David S. Miller5ce2d482008-07-08 17:06:30 -07001458 tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001459 tcm->tcm_parent = q->handle;
1460 tcm->tcm_handle = q->handle;
1461 tcm->tcm_info = 0;
Patrick McHardy57e1c482008-01-23 20:34:28 -08001462 NLA_PUT_STRING(skb, TCA_KIND, q->ops->id);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001463 if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001464 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001465
Jarek Poplawski102396a2008-08-29 14:21:52 -07001466 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
1467 qdisc_root_sleeping_lock(q), &d) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001468 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001469
1470 if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001471 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001472
1473 if (gnet_stats_finish_copy(&d) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001474 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001475
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001476 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001477 return skb->len;
1478
1479nlmsg_failure:
Patrick McHardy1e904742008-01-22 22:11:17 -08001480nla_put_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001481 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001482 return -1;
1483}
1484
1485static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
1486 struct Qdisc *q, unsigned long cl, int event)
1487{
1488 struct sk_buff *skb;
1489 u32 pid = oskb ? NETLINK_CB(oskb).pid : 0;
1490
1491 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1492 if (!skb)
1493 return -ENOBUFS;
1494
1495 if (tc_fill_tclass(skb, q, cl, pid, n->nlmsg_seq, 0, event) < 0) {
1496 kfree_skb(skb);
1497 return -EINVAL;
1498 }
1499
Denis V. Lunev97c53ca2007-11-19 22:26:51 -08001500 return rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001501}
1502
1503struct qdisc_dump_args
1504{
1505 struct qdisc_walker w;
1506 struct sk_buff *skb;
1507 struct netlink_callback *cb;
1508};
1509
1510static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walker *arg)
1511{
1512 struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg;
1513
1514 return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).pid,
1515 a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTCLASS);
1516}
1517
David S. Miller30723672008-07-18 22:50:15 -07001518static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb,
1519 struct tcmsg *tcm, struct netlink_callback *cb,
1520 int *t_p, int s_t)
1521{
1522 struct qdisc_dump_args arg;
1523
1524 if (tc_qdisc_dump_ignore(q) ||
1525 *t_p < s_t || !q->ops->cl_ops ||
1526 (tcm->tcm_parent &&
1527 TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
1528 (*t_p)++;
1529 return 0;
1530 }
1531 if (*t_p > s_t)
1532 memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
1533 arg.w.fn = qdisc_class_dump;
1534 arg.skb = skb;
1535 arg.cb = cb;
1536 arg.w.stop = 0;
1537 arg.w.skip = cb->args[1];
1538 arg.w.count = 0;
1539 q->ops->cl_ops->walk(q, &arg.w);
1540 cb->args[1] = arg.w.count;
1541 if (arg.w.stop)
1542 return -1;
1543 (*t_p)++;
1544 return 0;
1545}
1546
1547static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
1548 struct tcmsg *tcm, struct netlink_callback *cb,
1549 int *t_p, int s_t)
1550{
1551 struct Qdisc *q;
1552
1553 if (!root)
1554 return 0;
1555
1556 if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0)
1557 return -1;
1558
1559 list_for_each_entry(q, &root->list, list) {
1560 if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
1561 return -1;
1562 }
1563
1564 return 0;
1565}
1566
Linus Torvalds1da177e2005-04-16 15:20:36 -07001567static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
1568{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001569 struct tcmsg *tcm = (struct tcmsg*)NLMSG_DATA(cb->nlh);
David S. Miller30723672008-07-18 22:50:15 -07001570 struct net *net = sock_net(skb->sk);
1571 struct netdev_queue *dev_queue;
1572 struct net_device *dev;
1573 int t, s_t;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001574
Denis V. Lunevb8542722007-12-01 00:21:31 +11001575 if (net != &init_net)
1576 return 0;
1577
Linus Torvalds1da177e2005-04-16 15:20:36 -07001578 if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm)))
1579 return 0;
Eric W. Biederman881d9662007-09-17 11:56:21 -07001580 if ((dev = dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001581 return 0;
1582
1583 s_t = cb->args[0];
1584 t = 0;
1585
David S. Miller30723672008-07-18 22:50:15 -07001586 dev_queue = netdev_get_tx_queue(dev, 0);
David S. Miller8123b422008-08-08 23:23:39 -07001587 if (tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb, &t, s_t) < 0)
David S. Miller30723672008-07-18 22:50:15 -07001588 goto done;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001589
David S. Miller30723672008-07-18 22:50:15 -07001590 dev_queue = &dev->rx_queue;
David S. Miller8123b422008-08-08 23:23:39 -07001591 if (tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb, &t, s_t) < 0)
David S. Miller30723672008-07-18 22:50:15 -07001592 goto done;
1593
1594done:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001595 cb->args[0] = t;
1596
1597 dev_put(dev);
1598 return skb->len;
1599}
1600
1601/* Main classifier routine: scans classifier chain attached
1602 to this qdisc, (optionally) tests for protocol and asks
1603 specific classifiers.
1604 */
Patrick McHardy73ca4912007-07-15 00:02:31 -07001605int tc_classify_compat(struct sk_buff *skb, struct tcf_proto *tp,
1606 struct tcf_result *res)
1607{
1608 __be16 protocol = skb->protocol;
1609 int err = 0;
1610
1611 for (; tp; tp = tp->next) {
1612 if ((tp->protocol == protocol ||
1613 tp->protocol == htons(ETH_P_ALL)) &&
1614 (err = tp->classify(skb, tp, res)) >= 0) {
1615#ifdef CONFIG_NET_CLS_ACT
1616 if (err != TC_ACT_RECLASSIFY && skb->tc_verd)
1617 skb->tc_verd = SET_TC_VERD(skb->tc_verd, 0);
1618#endif
1619 return err;
1620 }
1621 }
1622 return -1;
1623}
1624EXPORT_SYMBOL(tc_classify_compat);
1625
Linus Torvalds1da177e2005-04-16 15:20:36 -07001626int tc_classify(struct sk_buff *skb, struct tcf_proto *tp,
Patrick McHardy73ca4912007-07-15 00:02:31 -07001627 struct tcf_result *res)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001628{
1629 int err = 0;
Patrick McHardy73ca4912007-07-15 00:02:31 -07001630 __be16 protocol;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001631#ifdef CONFIG_NET_CLS_ACT
1632 struct tcf_proto *otp = tp;
1633reclassify:
1634#endif
1635 protocol = skb->protocol;
1636
Patrick McHardy73ca4912007-07-15 00:02:31 -07001637 err = tc_classify_compat(skb, tp, res);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001638#ifdef CONFIG_NET_CLS_ACT
Patrick McHardy73ca4912007-07-15 00:02:31 -07001639 if (err == TC_ACT_RECLASSIFY) {
1640 u32 verd = G_TC_VERD(skb->tc_verd);
1641 tp = otp;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001642
Patrick McHardy73ca4912007-07-15 00:02:31 -07001643 if (verd++ >= MAX_REC_LOOP) {
1644 printk("rule prio %u protocol %02x reclassify loop, "
1645 "packet dropped\n",
1646 tp->prio&0xffff, ntohs(tp->protocol));
1647 return TC_ACT_SHOT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001648 }
Patrick McHardy73ca4912007-07-15 00:02:31 -07001649 skb->tc_verd = SET_TC_VERD(skb->tc_verd, verd);
1650 goto reclassify;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001651 }
Patrick McHardy73ca4912007-07-15 00:02:31 -07001652#endif
1653 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001654}
Patrick McHardy73ca4912007-07-15 00:02:31 -07001655EXPORT_SYMBOL(tc_classify);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001656
Patrick McHardya48b5a62007-03-23 11:29:43 -07001657void tcf_destroy(struct tcf_proto *tp)
1658{
1659 tp->ops->destroy(tp);
1660 module_put(tp->ops->owner);
1661 kfree(tp);
1662}
1663
Patrick McHardyff31ab52008-07-01 19:52:38 -07001664void tcf_destroy_chain(struct tcf_proto **fl)
Patrick McHardya48b5a62007-03-23 11:29:43 -07001665{
1666 struct tcf_proto *tp;
1667
Patrick McHardyff31ab52008-07-01 19:52:38 -07001668 while ((tp = *fl) != NULL) {
1669 *fl = tp->next;
Patrick McHardya48b5a62007-03-23 11:29:43 -07001670 tcf_destroy(tp);
1671 }
1672}
1673EXPORT_SYMBOL(tcf_destroy_chain);
1674
Linus Torvalds1da177e2005-04-16 15:20:36 -07001675#ifdef CONFIG_PROC_FS
1676static int psched_show(struct seq_file *seq, void *v)
1677{
Patrick McHardy3c0cfc12007-10-10 16:32:41 -07001678 struct timespec ts;
1679
1680 hrtimer_get_res(CLOCK_MONOTONIC, &ts);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001681 seq_printf(seq, "%08x %08x %08x %08x\n",
Patrick McHardy641b9e02007-03-16 01:18:42 -07001682 (u32)NSEC_PER_USEC, (u32)PSCHED_US2NS(1),
Patrick McHardy514bca32007-03-16 12:34:52 -07001683 1000000,
Patrick McHardy3c0cfc12007-10-10 16:32:41 -07001684 (u32)NSEC_PER_SEC/(u32)ktime_to_ns(timespec_to_ktime(ts)));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001685
1686 return 0;
1687}
1688
1689static int psched_open(struct inode *inode, struct file *file)
1690{
1691 return single_open(file, psched_show, PDE(inode)->data);
1692}
1693
Arjan van de Venda7071d2007-02-12 00:55:36 -08001694static const struct file_operations psched_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001695 .owner = THIS_MODULE,
1696 .open = psched_open,
1697 .read = seq_read,
1698 .llseek = seq_lseek,
1699 .release = single_release,
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09001700};
Linus Torvalds1da177e2005-04-16 15:20:36 -07001701#endif
1702
Linus Torvalds1da177e2005-04-16 15:20:36 -07001703static int __init pktsched_init(void)
1704{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001705 register_qdisc(&pfifo_qdisc_ops);
1706 register_qdisc(&bfifo_qdisc_ops);
Eric W. Biederman457c4cb2007-09-12 12:01:34 +02001707 proc_net_fops_create(&init_net, "psched", 0, &psched_fops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001708
Thomas Grafbe577dd2007-03-22 11:55:50 -07001709 rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL);
1710 rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL);
1711 rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc);
1712 rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL);
1713 rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL);
1714 rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass);
1715
Linus Torvalds1da177e2005-04-16 15:20:36 -07001716 return 0;
1717}
1718
1719subsys_initcall(pktsched_init);