blob: 5bcef13408c80c435a4c22f568cf87c22453f142 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * net/sched/sch_api.c Packet scheduler API.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 *
11 * Fixes:
12 *
13 * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired.
14 * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
15 * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support
16 */
17
Linus Torvalds1da177e2005-04-16 15:20:36 -070018#include <linux/module.h>
19#include <linux/types.h>
20#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070021#include <linux/string.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070022#include <linux/errno.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070023#include <linux/skbuff.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070024#include <linux/init.h>
25#include <linux/proc_fs.h>
26#include <linux/seq_file.h>
27#include <linux/kmod.h>
28#include <linux/list.h>
Patrick McHardy41794772007-03-16 01:19:15 -070029#include <linux/hrtimer.h>
Jarek Poplawski25bfcd52008-08-18 20:53:34 -070030#include <linux/lockdep.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070031
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020032#include <net/net_namespace.h>
Denis V. Lunevb8542722007-12-01 00:21:31 +110033#include <net/sock.h>
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -070034#include <net/netlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070035#include <net/pkt_sched.h>
36
Linus Torvalds1da177e2005-04-16 15:20:36 -070037static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n, u32 clid,
38 struct Qdisc *old, struct Qdisc *new);
39static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
40 struct Qdisc *q, unsigned long cl, int event);
41
42/*
43
44 Short review.
45 -------------
46
47 This file consists of two interrelated parts:
48
49 1. queueing disciplines manager frontend.
50 2. traffic classes manager frontend.
51
52 Generally, queueing discipline ("qdisc") is a black box,
53 which is able to enqueue packets and to dequeue them (when
54 device is ready to send something) in order and at times
55 determined by algorithm hidden in it.
56
57 qdisc's are divided to two categories:
58 - "queues", which have no internal structure visible from outside.
59 - "schedulers", which split all the packets to "traffic classes",
60 using "packet classifiers" (look at cls_api.c)
61
62 In turn, classes may have child qdiscs (as rule, queues)
63 attached to them etc. etc. etc.
64
65 The goal of the routines in this file is to translate
66 information supplied by user in the form of handles
67 to more intelligible for kernel form, to make some sanity
68 checks and part of work, which is common to all qdiscs
69 and to provide rtnetlink notifications.
70
71 All real intelligent work is done inside qdisc modules.
72
73
74
75 Every discipline has two major routines: enqueue and dequeue.
76
77 ---dequeue
78
79 dequeue usually returns a skb to send. It is allowed to return NULL,
80 but it does not mean that queue is empty, it just means that
81 discipline does not want to send anything this time.
82 Queue is really empty if q->q.qlen == 0.
83 For complicated disciplines with multiple queues q->q is not
84 real packet queue, but however q->q.qlen must be valid.
85
86 ---enqueue
87
88 enqueue returns 0, if packet was enqueued successfully.
89 If packet (this one or another one) was dropped, it returns
90 not zero error code.
91 NET_XMIT_DROP - this packet dropped
92 Expected action: do not backoff, but wait until queue will clear.
93 NET_XMIT_CN - probably this packet enqueued, but another one dropped.
94 Expected action: backoff or ignore
95 NET_XMIT_POLICED - dropped by police.
96 Expected action: backoff or error to real-time apps.
97
98 Auxiliary routines:
99
Jarek Poplawski99c0db22008-10-31 00:45:27 -0700100 ---peek
101
102 like dequeue but without removing a packet from the queue
103
Linus Torvalds1da177e2005-04-16 15:20:36 -0700104 ---reset
105
106 returns qdisc to initial state: purge all buffers, clear all
107 timers, counters (except for statistics) etc.
108
109 ---init
110
111 initializes newly created qdisc.
112
113 ---destroy
114
115 destroys resources allocated by init and during lifetime of qdisc.
116
117 ---change
118
119 changes qdisc parameters.
120 */
121
122/* Protects list of registered TC modules. It is pure SMP lock. */
123static DEFINE_RWLOCK(qdisc_mod_lock);
124
125
126/************************************************
127 * Queueing disciplines manipulation. *
128 ************************************************/
129
130
131/* The list of all installed queueing disciplines. */
132
133static struct Qdisc_ops *qdisc_base;
134
135/* Register/uregister queueing discipline */
136
137int register_qdisc(struct Qdisc_ops *qops)
138{
139 struct Qdisc_ops *q, **qp;
140 int rc = -EEXIST;
141
142 write_lock(&qdisc_mod_lock);
143 for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
144 if (!strcmp(qops->id, q->id))
145 goto out;
146
147 if (qops->enqueue == NULL)
148 qops->enqueue = noop_qdisc_ops.enqueue;
Jarek Poplawski99c0db22008-10-31 00:45:27 -0700149 if (qops->peek == NULL) {
150 if (qops->dequeue == NULL) {
151 qops->peek = noop_qdisc_ops.peek;
152 } else {
153 rc = -EINVAL;
154 goto out;
155 }
156 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700157 if (qops->dequeue == NULL)
158 qops->dequeue = noop_qdisc_ops.dequeue;
159
160 qops->next = NULL;
161 *qp = qops;
162 rc = 0;
163out:
164 write_unlock(&qdisc_mod_lock);
165 return rc;
166}
Patrick McHardy62e3ba12008-01-22 22:10:23 -0800167EXPORT_SYMBOL(register_qdisc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700168
169int unregister_qdisc(struct Qdisc_ops *qops)
170{
171 struct Qdisc_ops *q, **qp;
172 int err = -ENOENT;
173
174 write_lock(&qdisc_mod_lock);
175 for (qp = &qdisc_base; (q=*qp)!=NULL; qp = &q->next)
176 if (q == qops)
177 break;
178 if (q) {
179 *qp = q->next;
180 q->next = NULL;
181 err = 0;
182 }
183 write_unlock(&qdisc_mod_lock);
184 return err;
185}
Patrick McHardy62e3ba12008-01-22 22:10:23 -0800186EXPORT_SYMBOL(unregister_qdisc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700187
188/* We know handle. Find qdisc among all qdisc's attached to device
189 (root qdisc, all its children, children of children etc.)
190 */
191
David S. Miller8123b422008-08-08 23:23:39 -0700192struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
193{
194 struct Qdisc *q;
195
196 if (!(root->flags & TCQ_F_BUILTIN) &&
197 root->handle == handle)
198 return root;
199
200 list_for_each_entry(q, &root->list, list) {
201 if (q->handle == handle)
202 return q;
203 }
204 return NULL;
205}
206
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700207/*
208 * This lock is needed until some qdiscs stop calling qdisc_tree_decrease_qlen()
209 * without rtnl_lock(); currently hfsc_dequeue(), netem_dequeue(), tbf_dequeue()
210 */
211static DEFINE_SPINLOCK(qdisc_list_lock);
212
213static void qdisc_list_add(struct Qdisc *q)
214{
215 if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
216 spin_lock_bh(&qdisc_list_lock);
217 list_add_tail(&q->list, &qdisc_root_sleeping(q)->list);
218 spin_unlock_bh(&qdisc_list_lock);
219 }
220}
221
222void qdisc_list_del(struct Qdisc *q)
223{
224 if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
225 spin_lock_bh(&qdisc_list_lock);
226 list_del(&q->list);
227 spin_unlock_bh(&qdisc_list_lock);
228 }
229}
230EXPORT_SYMBOL(qdisc_list_del);
231
David S. Milleread81cc2008-07-17 00:50:32 -0700232struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
Patrick McHardy43effa12006-11-29 17:35:48 -0800233{
David S. Miller30723672008-07-18 22:50:15 -0700234 unsigned int i;
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700235 struct Qdisc *q;
236
237 spin_lock_bh(&qdisc_list_lock);
Patrick McHardy43effa12006-11-29 17:35:48 -0800238
David S. Miller30723672008-07-18 22:50:15 -0700239 for (i = 0; i < dev->num_tx_queues; i++) {
240 struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700241 struct Qdisc *txq_root = txq->qdisc_sleeping;
David S. Miller30723672008-07-18 22:50:15 -0700242
David S. Miller8123b422008-08-08 23:23:39 -0700243 q = qdisc_match_from_root(txq_root, handle);
244 if (q)
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700245 goto unlock;
Patrick McHardy43effa12006-11-29 17:35:48 -0800246 }
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700247
248 q = qdisc_match_from_root(dev->rx_queue.qdisc_sleeping, handle);
249
250unlock:
251 spin_unlock_bh(&qdisc_list_lock);
252
253 return q;
Patrick McHardy43effa12006-11-29 17:35:48 -0800254}
255
Linus Torvalds1da177e2005-04-16 15:20:36 -0700256static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
257{
258 unsigned long cl;
259 struct Qdisc *leaf;
Eric Dumazet20fea082007-11-14 01:44:41 -0800260 const struct Qdisc_class_ops *cops = p->ops->cl_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700261
262 if (cops == NULL)
263 return NULL;
264 cl = cops->get(p, classid);
265
266 if (cl == 0)
267 return NULL;
268 leaf = cops->leaf(p, cl);
269 cops->put(p, cl);
270 return leaf;
271}
272
273/* Find queueing discipline by name */
274
Patrick McHardy1e904742008-01-22 22:11:17 -0800275static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700276{
277 struct Qdisc_ops *q = NULL;
278
279 if (kind) {
280 read_lock(&qdisc_mod_lock);
281 for (q = qdisc_base; q; q = q->next) {
Patrick McHardy1e904742008-01-22 22:11:17 -0800282 if (nla_strcmp(kind, q->id) == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700283 if (!try_module_get(q->owner))
284 q = NULL;
285 break;
286 }
287 }
288 read_unlock(&qdisc_mod_lock);
289 }
290 return q;
291}
292
293static struct qdisc_rate_table *qdisc_rtab_list;
294
Patrick McHardy1e904742008-01-22 22:11:17 -0800295struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct nlattr *tab)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700296{
297 struct qdisc_rate_table *rtab;
298
299 for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) {
300 if (memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) == 0) {
301 rtab->refcnt++;
302 return rtab;
303 }
304 }
305
Patrick McHardy5feb5e12008-01-23 20:35:19 -0800306 if (tab == NULL || r->rate == 0 || r->cell_log == 0 ||
307 nla_len(tab) != TC_RTAB_SIZE)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700308 return NULL;
309
310 rtab = kmalloc(sizeof(*rtab), GFP_KERNEL);
311 if (rtab) {
312 rtab->rate = *r;
313 rtab->refcnt = 1;
Patrick McHardy1e904742008-01-22 22:11:17 -0800314 memcpy(rtab->data, nla_data(tab), 1024);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700315 rtab->next = qdisc_rtab_list;
316 qdisc_rtab_list = rtab;
317 }
318 return rtab;
319}
Patrick McHardy62e3ba12008-01-22 22:10:23 -0800320EXPORT_SYMBOL(qdisc_get_rtab);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700321
322void qdisc_put_rtab(struct qdisc_rate_table *tab)
323{
324 struct qdisc_rate_table *rtab, **rtabp;
325
326 if (!tab || --tab->refcnt)
327 return;
328
329 for (rtabp = &qdisc_rtab_list; (rtab=*rtabp) != NULL; rtabp = &rtab->next) {
330 if (rtab == tab) {
331 *rtabp = rtab->next;
332 kfree(rtab);
333 return;
334 }
335 }
336}
Patrick McHardy62e3ba12008-01-22 22:10:23 -0800337EXPORT_SYMBOL(qdisc_put_rtab);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700338
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700339static LIST_HEAD(qdisc_stab_list);
340static DEFINE_SPINLOCK(qdisc_stab_lock);
341
342static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = {
343 [TCA_STAB_BASE] = { .len = sizeof(struct tc_sizespec) },
344 [TCA_STAB_DATA] = { .type = NLA_BINARY },
345};
346
347static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt)
348{
349 struct nlattr *tb[TCA_STAB_MAX + 1];
350 struct qdisc_size_table *stab;
351 struct tc_sizespec *s;
352 unsigned int tsize = 0;
353 u16 *tab = NULL;
354 int err;
355
356 err = nla_parse_nested(tb, TCA_STAB_MAX, opt, stab_policy);
357 if (err < 0)
358 return ERR_PTR(err);
359 if (!tb[TCA_STAB_BASE])
360 return ERR_PTR(-EINVAL);
361
362 s = nla_data(tb[TCA_STAB_BASE]);
363
364 if (s->tsize > 0) {
365 if (!tb[TCA_STAB_DATA])
366 return ERR_PTR(-EINVAL);
367 tab = nla_data(tb[TCA_STAB_DATA]);
368 tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16);
369 }
370
371 if (!s || tsize != s->tsize || (!tab && tsize > 0))
372 return ERR_PTR(-EINVAL);
373
David S. Millerf3b96052008-08-18 22:33:05 -0700374 spin_lock(&qdisc_stab_lock);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700375
376 list_for_each_entry(stab, &qdisc_stab_list, list) {
377 if (memcmp(&stab->szopts, s, sizeof(*s)))
378 continue;
379 if (tsize > 0 && memcmp(stab->data, tab, tsize * sizeof(u16)))
380 continue;
381 stab->refcnt++;
David S. Millerf3b96052008-08-18 22:33:05 -0700382 spin_unlock(&qdisc_stab_lock);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700383 return stab;
384 }
385
David S. Millerf3b96052008-08-18 22:33:05 -0700386 spin_unlock(&qdisc_stab_lock);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700387
388 stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL);
389 if (!stab)
390 return ERR_PTR(-ENOMEM);
391
392 stab->refcnt = 1;
393 stab->szopts = *s;
394 if (tsize > 0)
395 memcpy(stab->data, tab, tsize * sizeof(u16));
396
David S. Millerf3b96052008-08-18 22:33:05 -0700397 spin_lock(&qdisc_stab_lock);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700398 list_add_tail(&stab->list, &qdisc_stab_list);
David S. Millerf3b96052008-08-18 22:33:05 -0700399 spin_unlock(&qdisc_stab_lock);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700400
401 return stab;
402}
403
404void qdisc_put_stab(struct qdisc_size_table *tab)
405{
406 if (!tab)
407 return;
408
David S. Millerf3b96052008-08-18 22:33:05 -0700409 spin_lock(&qdisc_stab_lock);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700410
411 if (--tab->refcnt == 0) {
412 list_del(&tab->list);
413 kfree(tab);
414 }
415
David S. Millerf3b96052008-08-18 22:33:05 -0700416 spin_unlock(&qdisc_stab_lock);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700417}
418EXPORT_SYMBOL(qdisc_put_stab);
419
420static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab)
421{
422 struct nlattr *nest;
423
424 nest = nla_nest_start(skb, TCA_STAB);
425 NLA_PUT(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts);
426 nla_nest_end(skb, nest);
427
428 return skb->len;
429
430nla_put_failure:
431 return -1;
432}
433
434void qdisc_calculate_pkt_len(struct sk_buff *skb, struct qdisc_size_table *stab)
435{
436 int pkt_len, slot;
437
438 pkt_len = skb->len + stab->szopts.overhead;
439 if (unlikely(!stab->szopts.tsize))
440 goto out;
441
442 slot = pkt_len + stab->szopts.cell_align;
443 if (unlikely(slot < 0))
444 slot = 0;
445
446 slot >>= stab->szopts.cell_log;
447 if (likely(slot < stab->szopts.tsize))
448 pkt_len = stab->data[slot];
449 else
450 pkt_len = stab->data[stab->szopts.tsize - 1] *
451 (slot / stab->szopts.tsize) +
452 stab->data[slot % stab->szopts.tsize];
453
454 pkt_len <<= stab->szopts.size_log;
455out:
456 if (unlikely(pkt_len < 1))
457 pkt_len = 1;
458 qdisc_skb_cb(skb)->pkt_len = pkt_len;
459}
460EXPORT_SYMBOL(qdisc_calculate_pkt_len);
461
Patrick McHardy41794772007-03-16 01:19:15 -0700462static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
463{
464 struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
465 timer);
466
467 wd->qdisc->flags &= ~TCQ_F_THROTTLED;
Stephen Hemminger11274e52007-03-22 12:17:42 -0700468 smp_wmb();
David S. Miller8608db02008-08-18 20:51:18 -0700469 __netif_schedule(qdisc_root(wd->qdisc));
Stephen Hemminger19365022007-03-22 12:18:35 -0700470
Patrick McHardy41794772007-03-16 01:19:15 -0700471 return HRTIMER_NORESTART;
472}
473
474void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
475{
476 hrtimer_init(&wd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
477 wd->timer.function = qdisc_watchdog;
478 wd->qdisc = qdisc;
479}
480EXPORT_SYMBOL(qdisc_watchdog_init);
481
482void qdisc_watchdog_schedule(struct qdisc_watchdog *wd, psched_time_t expires)
483{
484 ktime_t time;
485
Jarek Poplawski2540e052008-08-21 05:11:14 -0700486 if (test_bit(__QDISC_STATE_DEACTIVATED,
487 &qdisc_root_sleeping(wd->qdisc)->state))
488 return;
489
Patrick McHardy41794772007-03-16 01:19:15 -0700490 wd->qdisc->flags |= TCQ_F_THROTTLED;
491 time = ktime_set(0, 0);
492 time = ktime_add_ns(time, PSCHED_US2NS(expires));
493 hrtimer_start(&wd->timer, time, HRTIMER_MODE_ABS);
494}
495EXPORT_SYMBOL(qdisc_watchdog_schedule);
496
497void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
498{
499 hrtimer_cancel(&wd->timer);
500 wd->qdisc->flags &= ~TCQ_F_THROTTLED;
501}
502EXPORT_SYMBOL(qdisc_watchdog_cancel);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700503
Adrian Bunka94f7792008-07-22 14:20:11 -0700504static struct hlist_head *qdisc_class_hash_alloc(unsigned int n)
Patrick McHardy6fe1c7a2008-07-05 23:21:31 -0700505{
506 unsigned int size = n * sizeof(struct hlist_head), i;
507 struct hlist_head *h;
508
509 if (size <= PAGE_SIZE)
510 h = kmalloc(size, GFP_KERNEL);
511 else
512 h = (struct hlist_head *)
513 __get_free_pages(GFP_KERNEL, get_order(size));
514
515 if (h != NULL) {
516 for (i = 0; i < n; i++)
517 INIT_HLIST_HEAD(&h[i]);
518 }
519 return h;
520}
521
522static void qdisc_class_hash_free(struct hlist_head *h, unsigned int n)
523{
524 unsigned int size = n * sizeof(struct hlist_head);
525
526 if (size <= PAGE_SIZE)
527 kfree(h);
528 else
529 free_pages((unsigned long)h, get_order(size));
530}
531
532void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash)
533{
534 struct Qdisc_class_common *cl;
535 struct hlist_node *n, *next;
536 struct hlist_head *nhash, *ohash;
537 unsigned int nsize, nmask, osize;
538 unsigned int i, h;
539
540 /* Rehash when load factor exceeds 0.75 */
541 if (clhash->hashelems * 4 <= clhash->hashsize * 3)
542 return;
543 nsize = clhash->hashsize * 2;
544 nmask = nsize - 1;
545 nhash = qdisc_class_hash_alloc(nsize);
546 if (nhash == NULL)
547 return;
548
549 ohash = clhash->hash;
550 osize = clhash->hashsize;
551
552 sch_tree_lock(sch);
553 for (i = 0; i < osize; i++) {
554 hlist_for_each_entry_safe(cl, n, next, &ohash[i], hnode) {
555 h = qdisc_class_hash(cl->classid, nmask);
556 hlist_add_head(&cl->hnode, &nhash[h]);
557 }
558 }
559 clhash->hash = nhash;
560 clhash->hashsize = nsize;
561 clhash->hashmask = nmask;
562 sch_tree_unlock(sch);
563
564 qdisc_class_hash_free(ohash, osize);
565}
566EXPORT_SYMBOL(qdisc_class_hash_grow);
567
568int qdisc_class_hash_init(struct Qdisc_class_hash *clhash)
569{
570 unsigned int size = 4;
571
572 clhash->hash = qdisc_class_hash_alloc(size);
573 if (clhash->hash == NULL)
574 return -ENOMEM;
575 clhash->hashsize = size;
576 clhash->hashmask = size - 1;
577 clhash->hashelems = 0;
578 return 0;
579}
580EXPORT_SYMBOL(qdisc_class_hash_init);
581
582void qdisc_class_hash_destroy(struct Qdisc_class_hash *clhash)
583{
584 qdisc_class_hash_free(clhash->hash, clhash->hashsize);
585}
586EXPORT_SYMBOL(qdisc_class_hash_destroy);
587
588void qdisc_class_hash_insert(struct Qdisc_class_hash *clhash,
589 struct Qdisc_class_common *cl)
590{
591 unsigned int h;
592
593 INIT_HLIST_NODE(&cl->hnode);
594 h = qdisc_class_hash(cl->classid, clhash->hashmask);
595 hlist_add_head(&cl->hnode, &clhash->hash[h]);
596 clhash->hashelems++;
597}
598EXPORT_SYMBOL(qdisc_class_hash_insert);
599
600void qdisc_class_hash_remove(struct Qdisc_class_hash *clhash,
601 struct Qdisc_class_common *cl)
602{
603 hlist_del(&cl->hnode);
604 clhash->hashelems--;
605}
606EXPORT_SYMBOL(qdisc_class_hash_remove);
607
Linus Torvalds1da177e2005-04-16 15:20:36 -0700608/* Allocate an unique handle from space managed by kernel */
609
610static u32 qdisc_alloc_handle(struct net_device *dev)
611{
612 int i = 0x10000;
613 static u32 autohandle = TC_H_MAKE(0x80000000U, 0);
614
615 do {
616 autohandle += TC_H_MAKE(0x10000U, 0);
617 if (autohandle == TC_H_MAKE(TC_H_ROOT, 0))
618 autohandle = TC_H_MAKE(0x80000000U, 0);
619 } while (qdisc_lookup(dev, autohandle) && --i > 0);
620
621 return i>0 ? autohandle : 0;
622}
623
David S. Miller99194cf2008-07-17 04:54:10 -0700624/* Attach toplevel qdisc to device queue. */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700625
David S. Miller99194cf2008-07-17 04:54:10 -0700626static struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue,
627 struct Qdisc *qdisc)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700628{
David S. Miller8d50b532008-07-30 02:37:46 -0700629 struct Qdisc *oqdisc = dev_queue->qdisc_sleeping;
David S. Miller53049972008-07-16 03:00:19 -0700630 spinlock_t *root_lock;
David S. Miller53049972008-07-16 03:00:19 -0700631
Jarek Poplawski666d9bb2008-08-27 02:12:52 -0700632 root_lock = qdisc_lock(oqdisc);
David S. Miller53049972008-07-16 03:00:19 -0700633 spin_lock_bh(root_lock);
634
David S. Miller8d50b532008-07-30 02:37:46 -0700635 /* Prune old scheduler */
636 if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1)
637 qdisc_reset(oqdisc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700638
David S. Miller8d50b532008-07-30 02:37:46 -0700639 /* ... and graft new one */
640 if (qdisc == NULL)
641 qdisc = &noop_qdisc;
642 dev_queue->qdisc_sleeping = qdisc;
Jarek Poplawskif7a54c12008-08-27 02:22:07 -0700643 rcu_assign_pointer(dev_queue->qdisc, &noop_qdisc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700644
David S. Miller53049972008-07-16 03:00:19 -0700645 spin_unlock_bh(root_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700646
Linus Torvalds1da177e2005-04-16 15:20:36 -0700647 return oqdisc;
648}
649
Patrick McHardy43effa12006-11-29 17:35:48 -0800650void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
651{
Eric Dumazet20fea082007-11-14 01:44:41 -0800652 const struct Qdisc_class_ops *cops;
Patrick McHardy43effa12006-11-29 17:35:48 -0800653 unsigned long cl;
654 u32 parentid;
655
656 if (n == 0)
657 return;
658 while ((parentid = sch->parent)) {
Jarek Poplawski066a3b52008-04-14 15:10:42 -0700659 if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS))
660 return;
661
David S. Miller5ce2d482008-07-08 17:06:30 -0700662 sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
Patrick McHardyffc8fef2007-07-30 17:11:50 -0700663 if (sch == NULL) {
664 WARN_ON(parentid != TC_H_ROOT);
665 return;
666 }
Patrick McHardy43effa12006-11-29 17:35:48 -0800667 cops = sch->ops->cl_ops;
668 if (cops->qlen_notify) {
669 cl = cops->get(sch, parentid);
670 cops->qlen_notify(sch, cl);
671 cops->put(sch, cl);
672 }
673 sch->q.qlen -= n;
674 }
675}
676EXPORT_SYMBOL(qdisc_tree_decrease_qlen);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700677
David S. Miller99194cf2008-07-17 04:54:10 -0700678static void notify_and_destroy(struct sk_buff *skb, struct nlmsghdr *n, u32 clid,
679 struct Qdisc *old, struct Qdisc *new)
680{
681 if (new || old)
682 qdisc_notify(skb, n, clid, old, new);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700683
David S. Miller4d8863a2008-08-18 21:03:15 -0700684 if (old)
David S. Miller99194cf2008-07-17 04:54:10 -0700685 qdisc_destroy(old);
David S. Miller99194cf2008-07-17 04:54:10 -0700686}
687
688/* Graft qdisc "new" to class "classid" of qdisc "parent" or
689 * to device "dev".
690 *
691 * When appropriate send a netlink notification using 'skb'
692 * and "n".
693 *
694 * On success, destroy old qdisc.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700695 */
696
697static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
David S. Miller99194cf2008-07-17 04:54:10 -0700698 struct sk_buff *skb, struct nlmsghdr *n, u32 classid,
699 struct Qdisc *new, struct Qdisc *old)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700700{
David S. Miller99194cf2008-07-17 04:54:10 -0700701 struct Qdisc *q = old;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700702 int err = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700703
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900704 if (parent == NULL) {
David S. Miller99194cf2008-07-17 04:54:10 -0700705 unsigned int i, num_q, ingress;
706
707 ingress = 0;
708 num_q = dev->num_tx_queues;
David S. Miller8d50b532008-07-30 02:37:46 -0700709 if ((q && q->flags & TCQ_F_INGRESS) ||
710 (new && new->flags & TCQ_F_INGRESS)) {
David S. Miller99194cf2008-07-17 04:54:10 -0700711 num_q = 1;
712 ingress = 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700713 }
David S. Miller99194cf2008-07-17 04:54:10 -0700714
715 if (dev->flags & IFF_UP)
716 dev_deactivate(dev);
717
718 for (i = 0; i < num_q; i++) {
719 struct netdev_queue *dev_queue = &dev->rx_queue;
720
721 if (!ingress)
722 dev_queue = netdev_get_tx_queue(dev, i);
723
David S. Miller8d50b532008-07-30 02:37:46 -0700724 old = dev_graft_qdisc(dev_queue, new);
725 if (new && i > 0)
726 atomic_inc(&new->refcnt);
727
David S. Miller99194cf2008-07-17 04:54:10 -0700728 notify_and_destroy(skb, n, classid, old, new);
729 }
730
731 if (dev->flags & IFF_UP)
732 dev_activate(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700733 } else {
Eric Dumazet20fea082007-11-14 01:44:41 -0800734 const struct Qdisc_class_ops *cops = parent->ops->cl_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700735
736 err = -EINVAL;
737
738 if (cops) {
739 unsigned long cl = cops->get(parent, classid);
740 if (cl) {
David S. Miller99194cf2008-07-17 04:54:10 -0700741 err = cops->graft(parent, cl, new, &old);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700742 cops->put(parent, cl);
743 }
744 }
David S. Miller99194cf2008-07-17 04:54:10 -0700745 if (!err)
746 notify_and_destroy(skb, n, classid, old, new);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700747 }
748 return err;
749}
750
Jarek Poplawski25bfcd52008-08-18 20:53:34 -0700751/* lockdep annotation is needed for ingress; egress gets it only for name */
752static struct lock_class_key qdisc_tx_lock;
753static struct lock_class_key qdisc_rx_lock;
754
Linus Torvalds1da177e2005-04-16 15:20:36 -0700755/*
756 Allocate and initialize new qdisc.
757
758 Parameters are passed via opt.
759 */
760
761static struct Qdisc *
David S. Millerbb949fb2008-07-08 16:55:56 -0700762qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
763 u32 parent, u32 handle, struct nlattr **tca, int *errp)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700764{
765 int err;
Patrick McHardy1e904742008-01-22 22:11:17 -0800766 struct nlattr *kind = tca[TCA_KIND];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700767 struct Qdisc *sch;
768 struct Qdisc_ops *ops;
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700769 struct qdisc_size_table *stab;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700770
771 ops = qdisc_lookup_ops(kind);
Johannes Berg95a5afc2008-10-16 15:24:51 -0700772#ifdef CONFIG_MODULES
Linus Torvalds1da177e2005-04-16 15:20:36 -0700773 if (ops == NULL && kind != NULL) {
774 char name[IFNAMSIZ];
Patrick McHardy1e904742008-01-22 22:11:17 -0800775 if (nla_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700776 /* We dropped the RTNL semaphore in order to
777 * perform the module load. So, even if we
778 * succeeded in loading the module we have to
779 * tell the caller to replay the request. We
780 * indicate this using -EAGAIN.
781 * We replay the request because the device may
782 * go away in the mean time.
783 */
784 rtnl_unlock();
785 request_module("sch_%s", name);
786 rtnl_lock();
787 ops = qdisc_lookup_ops(kind);
788 if (ops != NULL) {
789 /* We will try again qdisc_lookup_ops,
790 * so don't keep a reference.
791 */
792 module_put(ops->owner);
793 err = -EAGAIN;
794 goto err_out;
795 }
796 }
797 }
798#endif
799
Jamal Hadi Salimb9e2cc02006-08-03 16:36:51 -0700800 err = -ENOENT;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700801 if (ops == NULL)
802 goto err_out;
803
David S. Miller5ce2d482008-07-08 17:06:30 -0700804 sch = qdisc_alloc(dev_queue, ops);
Thomas Graf3d54b822005-07-05 14:15:09 -0700805 if (IS_ERR(sch)) {
806 err = PTR_ERR(sch);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700807 goto err_out2;
Thomas Graf3d54b822005-07-05 14:15:09 -0700808 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700809
Patrick McHardyffc8fef2007-07-30 17:11:50 -0700810 sch->parent = parent;
811
Thomas Graf3d54b822005-07-05 14:15:09 -0700812 if (handle == TC_H_INGRESS) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700813 sch->flags |= TCQ_F_INGRESS;
Thomas Graf3d54b822005-07-05 14:15:09 -0700814 handle = TC_H_MAKE(TC_H_INGRESS, 0);
Jarek Poplawski25bfcd52008-08-18 20:53:34 -0700815 lockdep_set_class(qdisc_lock(sch), &qdisc_rx_lock);
Patrick McHardyfd44de72007-04-16 17:07:08 -0700816 } else {
Patrick McHardyfd44de72007-04-16 17:07:08 -0700817 if (handle == 0) {
818 handle = qdisc_alloc_handle(dev);
819 err = -ENOMEM;
820 if (handle == 0)
821 goto err_out3;
822 }
Jarek Poplawski25bfcd52008-08-18 20:53:34 -0700823 lockdep_set_class(qdisc_lock(sch), &qdisc_tx_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700824 }
825
Thomas Graf3d54b822005-07-05 14:15:09 -0700826 sch->handle = handle;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700827
Patrick McHardy1e904742008-01-22 22:11:17 -0800828 if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS])) == 0) {
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700829 if (tca[TCA_STAB]) {
830 stab = qdisc_get_stab(tca[TCA_STAB]);
831 if (IS_ERR(stab)) {
832 err = PTR_ERR(stab);
833 goto err_out3;
834 }
835 sch->stab = stab;
836 }
Patrick McHardy1e904742008-01-22 22:11:17 -0800837 if (tca[TCA_RATE]) {
Jarek Poplawskif6f9b932008-08-27 02:25:17 -0700838 spinlock_t *root_lock;
839
840 if ((sch->parent != TC_H_ROOT) &&
841 !(sch->flags & TCQ_F_INGRESS))
842 root_lock = qdisc_root_sleeping_lock(sch);
843 else
844 root_lock = qdisc_lock(sch);
845
Thomas Graf023e09a2005-07-05 14:15:53 -0700846 err = gen_new_estimator(&sch->bstats, &sch->rate_est,
Jarek Poplawskif6f9b932008-08-27 02:25:17 -0700847 root_lock, tca[TCA_RATE]);
Thomas Graf023e09a2005-07-05 14:15:53 -0700848 if (err) {
849 /*
850 * Any broken qdiscs that would require
851 * a ops->reset() here? The qdisc was never
852 * in action so it shouldn't be necessary.
853 */
854 if (ops->destroy)
855 ops->destroy(sch);
856 goto err_out3;
857 }
858 }
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700859
860 qdisc_list_add(sch);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700861
Linus Torvalds1da177e2005-04-16 15:20:36 -0700862 return sch;
863 }
864err_out3:
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700865 qdisc_put_stab(sch->stab);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700866 dev_put(dev);
Thomas Graf3d54b822005-07-05 14:15:09 -0700867 kfree((char *) sch - sch->padded);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700868err_out2:
869 module_put(ops->owner);
870err_out:
871 *errp = err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700872 return NULL;
873}
874
Patrick McHardy1e904742008-01-22 22:11:17 -0800875static int qdisc_change(struct Qdisc *sch, struct nlattr **tca)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700876{
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700877 struct qdisc_size_table *stab = NULL;
878 int err = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700879
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700880 if (tca[TCA_OPTIONS]) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700881 if (sch->ops->change == NULL)
882 return -EINVAL;
Patrick McHardy1e904742008-01-22 22:11:17 -0800883 err = sch->ops->change(sch, tca[TCA_OPTIONS]);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700884 if (err)
885 return err;
886 }
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700887
888 if (tca[TCA_STAB]) {
889 stab = qdisc_get_stab(tca[TCA_STAB]);
890 if (IS_ERR(stab))
891 return PTR_ERR(stab);
892 }
893
894 qdisc_put_stab(sch->stab);
895 sch->stab = stab;
896
Patrick McHardy1e904742008-01-22 22:11:17 -0800897 if (tca[TCA_RATE])
Linus Torvalds1da177e2005-04-16 15:20:36 -0700898 gen_replace_estimator(&sch->bstats, &sch->rate_est,
Jarek Poplawskif6f9b932008-08-27 02:25:17 -0700899 qdisc_root_sleeping_lock(sch),
900 tca[TCA_RATE]);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700901 return 0;
902}
903
904struct check_loop_arg
905{
906 struct qdisc_walker w;
907 struct Qdisc *p;
908 int depth;
909};
910
911static int check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w);
912
913static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth)
914{
915 struct check_loop_arg arg;
916
917 if (q->ops->cl_ops == NULL)
918 return 0;
919
920 arg.w.stop = arg.w.skip = arg.w.count = 0;
921 arg.w.fn = check_loop_fn;
922 arg.depth = depth;
923 arg.p = p;
924 q->ops->cl_ops->walk(q, &arg.w);
925 return arg.w.stop ? -ELOOP : 0;
926}
927
928static int
929check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
930{
931 struct Qdisc *leaf;
Eric Dumazet20fea082007-11-14 01:44:41 -0800932 const struct Qdisc_class_ops *cops = q->ops->cl_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700933 struct check_loop_arg *arg = (struct check_loop_arg *)w;
934
935 leaf = cops->leaf(q, cl);
936 if (leaf) {
937 if (leaf == arg->p || arg->depth > 7)
938 return -ELOOP;
939 return check_loop(leaf, arg->p, arg->depth + 1);
940 }
941 return 0;
942}
943
944/*
945 * Delete/get qdisc.
946 */
947
948static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
949{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +0900950 struct net *net = sock_net(skb->sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700951 struct tcmsg *tcm = NLMSG_DATA(n);
Patrick McHardy1e904742008-01-22 22:11:17 -0800952 struct nlattr *tca[TCA_MAX + 1];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700953 struct net_device *dev;
954 u32 clid = tcm->tcm_parent;
955 struct Qdisc *q = NULL;
956 struct Qdisc *p = NULL;
957 int err;
958
Denis V. Lunevb8542722007-12-01 00:21:31 +1100959 if (net != &init_net)
960 return -EINVAL;
961
Eric W. Biederman881d9662007-09-17 11:56:21 -0700962 if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700963 return -ENODEV;
964
Patrick McHardy1e904742008-01-22 22:11:17 -0800965 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
966 if (err < 0)
967 return err;
968
Linus Torvalds1da177e2005-04-16 15:20:36 -0700969 if (clid) {
970 if (clid != TC_H_ROOT) {
971 if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
972 if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL)
973 return -ENOENT;
974 q = qdisc_leaf(p, clid);
975 } else { /* ingress */
David S. Miller8123b422008-08-08 23:23:39 -0700976 q = dev->rx_queue.qdisc_sleeping;
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900977 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700978 } else {
David S. Millere8a04642008-07-17 00:34:19 -0700979 struct netdev_queue *dev_queue;
980 dev_queue = netdev_get_tx_queue(dev, 0);
David S. Millerb0e1e642008-07-08 17:42:10 -0700981 q = dev_queue->qdisc_sleeping;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700982 }
983 if (!q)
984 return -ENOENT;
985
986 if (tcm->tcm_handle && q->handle != tcm->tcm_handle)
987 return -EINVAL;
988 } else {
989 if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL)
990 return -ENOENT;
991 }
992
Patrick McHardy1e904742008-01-22 22:11:17 -0800993 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700994 return -EINVAL;
995
996 if (n->nlmsg_type == RTM_DELQDISC) {
997 if (!clid)
998 return -EINVAL;
999 if (q->handle == 0)
1000 return -ENOENT;
David S. Miller99194cf2008-07-17 04:54:10 -07001001 if ((err = qdisc_graft(dev, p, skb, n, clid, NULL, q)) != 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001002 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001003 } else {
1004 qdisc_notify(skb, n, clid, NULL, q);
1005 }
1006 return 0;
1007}
1008
1009/*
1010 Create/change qdisc.
1011 */
1012
1013static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
1014{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001015 struct net *net = sock_net(skb->sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001016 struct tcmsg *tcm;
Patrick McHardy1e904742008-01-22 22:11:17 -08001017 struct nlattr *tca[TCA_MAX + 1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001018 struct net_device *dev;
1019 u32 clid;
1020 struct Qdisc *q, *p;
1021 int err;
1022
Denis V. Lunevb8542722007-12-01 00:21:31 +11001023 if (net != &init_net)
1024 return -EINVAL;
1025
Linus Torvalds1da177e2005-04-16 15:20:36 -07001026replay:
1027 /* Reinit, just in case something touches this. */
1028 tcm = NLMSG_DATA(n);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001029 clid = tcm->tcm_parent;
1030 q = p = NULL;
1031
Eric W. Biederman881d9662007-09-17 11:56:21 -07001032 if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001033 return -ENODEV;
1034
Patrick McHardy1e904742008-01-22 22:11:17 -08001035 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
1036 if (err < 0)
1037 return err;
1038
Linus Torvalds1da177e2005-04-16 15:20:36 -07001039 if (clid) {
1040 if (clid != TC_H_ROOT) {
1041 if (clid != TC_H_INGRESS) {
1042 if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL)
1043 return -ENOENT;
1044 q = qdisc_leaf(p, clid);
1045 } else { /*ingress */
David S. Miller8123b422008-08-08 23:23:39 -07001046 q = dev->rx_queue.qdisc_sleeping;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001047 }
1048 } else {
David S. Millere8a04642008-07-17 00:34:19 -07001049 struct netdev_queue *dev_queue;
1050 dev_queue = netdev_get_tx_queue(dev, 0);
David S. Millerb0e1e642008-07-08 17:42:10 -07001051 q = dev_queue->qdisc_sleeping;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001052 }
1053
1054 /* It may be default qdisc, ignore it */
1055 if (q && q->handle == 0)
1056 q = NULL;
1057
1058 if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
1059 if (tcm->tcm_handle) {
1060 if (q && !(n->nlmsg_flags&NLM_F_REPLACE))
1061 return -EEXIST;
1062 if (TC_H_MIN(tcm->tcm_handle))
1063 return -EINVAL;
1064 if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL)
1065 goto create_n_graft;
1066 if (n->nlmsg_flags&NLM_F_EXCL)
1067 return -EEXIST;
Patrick McHardy1e904742008-01-22 22:11:17 -08001068 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001069 return -EINVAL;
1070 if (q == p ||
1071 (p && check_loop(q, p, 0)))
1072 return -ELOOP;
1073 atomic_inc(&q->refcnt);
1074 goto graft;
1075 } else {
1076 if (q == NULL)
1077 goto create_n_graft;
1078
1079 /* This magic test requires explanation.
1080 *
1081 * We know, that some child q is already
1082 * attached to this parent and have choice:
1083 * either to change it or to create/graft new one.
1084 *
1085 * 1. We are allowed to create/graft only
1086 * if CREATE and REPLACE flags are set.
1087 *
1088 * 2. If EXCL is set, requestor wanted to say,
1089 * that qdisc tcm_handle is not expected
1090 * to exist, so that we choose create/graft too.
1091 *
1092 * 3. The last case is when no flags are set.
1093 * Alas, it is sort of hole in API, we
1094 * cannot decide what to do unambiguously.
1095 * For now we select create/graft, if
1096 * user gave KIND, which does not match existing.
1097 */
1098 if ((n->nlmsg_flags&NLM_F_CREATE) &&
1099 (n->nlmsg_flags&NLM_F_REPLACE) &&
1100 ((n->nlmsg_flags&NLM_F_EXCL) ||
Patrick McHardy1e904742008-01-22 22:11:17 -08001101 (tca[TCA_KIND] &&
1102 nla_strcmp(tca[TCA_KIND], q->ops->id))))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001103 goto create_n_graft;
1104 }
1105 }
1106 } else {
1107 if (!tcm->tcm_handle)
1108 return -EINVAL;
1109 q = qdisc_lookup(dev, tcm->tcm_handle);
1110 }
1111
1112 /* Change qdisc parameters */
1113 if (q == NULL)
1114 return -ENOENT;
1115 if (n->nlmsg_flags&NLM_F_EXCL)
1116 return -EEXIST;
Patrick McHardy1e904742008-01-22 22:11:17 -08001117 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001118 return -EINVAL;
1119 err = qdisc_change(q, tca);
1120 if (err == 0)
1121 qdisc_notify(skb, n, clid, NULL, q);
1122 return err;
1123
1124create_n_graft:
1125 if (!(n->nlmsg_flags&NLM_F_CREATE))
1126 return -ENOENT;
1127 if (clid == TC_H_INGRESS)
David S. Millerbb949fb2008-07-08 16:55:56 -07001128 q = qdisc_create(dev, &dev->rx_queue,
1129 tcm->tcm_parent, tcm->tcm_parent,
Patrick McHardyffc8fef2007-07-30 17:11:50 -07001130 tca, &err);
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09001131 else
David S. Millere8a04642008-07-17 00:34:19 -07001132 q = qdisc_create(dev, netdev_get_tx_queue(dev, 0),
David S. Millerbb949fb2008-07-08 16:55:56 -07001133 tcm->tcm_parent, tcm->tcm_handle,
Patrick McHardyffc8fef2007-07-30 17:11:50 -07001134 tca, &err);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001135 if (q == NULL) {
1136 if (err == -EAGAIN)
1137 goto replay;
1138 return err;
1139 }
1140
1141graft:
Ilpo Järvinene5befbd2008-08-18 22:30:01 -07001142 err = qdisc_graft(dev, p, skb, n, clid, q, NULL);
1143 if (err) {
1144 if (q)
1145 qdisc_destroy(q);
1146 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001147 }
Ilpo Järvinene5befbd2008-08-18 22:30:01 -07001148
Linus Torvalds1da177e2005-04-16 15:20:36 -07001149 return 0;
1150}
1151
1152static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
Jamal Hadi Salime431b8c2005-06-18 22:55:31 -07001153 u32 pid, u32 seq, u16 flags, int event)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001154{
1155 struct tcmsg *tcm;
1156 struct nlmsghdr *nlh;
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001157 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001158 struct gnet_dump d;
1159
Jamal Hadi Salime431b8c2005-06-18 22:55:31 -07001160 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001161 tcm = NLMSG_DATA(nlh);
1162 tcm->tcm_family = AF_UNSPEC;
Patrick McHardy9ef1d4c2005-06-28 12:55:30 -07001163 tcm->tcm__pad1 = 0;
1164 tcm->tcm__pad2 = 0;
David S. Miller5ce2d482008-07-08 17:06:30 -07001165 tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001166 tcm->tcm_parent = clid;
1167 tcm->tcm_handle = q->handle;
1168 tcm->tcm_info = atomic_read(&q->refcnt);
Patrick McHardy57e1c482008-01-23 20:34:28 -08001169 NLA_PUT_STRING(skb, TCA_KIND, q->ops->id);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001170 if (q->ops->dump && q->ops->dump(q, skb) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001171 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001172 q->qstats.qlen = q->q.qlen;
1173
Jussi Kivilinna175f9c12008-07-20 00:08:47 -07001174 if (q->stab && qdisc_dump_stab(skb, q->stab) < 0)
1175 goto nla_put_failure;
1176
Jarek Poplawski102396a2008-08-29 14:21:52 -07001177 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
1178 qdisc_root_sleeping_lock(q), &d) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001179 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001180
1181 if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001182 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001183
1184 if (gnet_stats_copy_basic(&d, &q->bstats) < 0 ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07001185 gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07001186 gnet_stats_copy_queue(&d, &q->qstats) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001187 goto nla_put_failure;
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09001188
Linus Torvalds1da177e2005-04-16 15:20:36 -07001189 if (gnet_stats_finish_copy(&d) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001190 goto nla_put_failure;
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09001191
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001192 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001193 return skb->len;
1194
1195nlmsg_failure:
Patrick McHardy1e904742008-01-22 22:11:17 -08001196nla_put_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001197 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001198 return -1;
1199}
1200
1201static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n,
1202 u32 clid, struct Qdisc *old, struct Qdisc *new)
1203{
1204 struct sk_buff *skb;
1205 u32 pid = oskb ? NETLINK_CB(oskb).pid : 0;
1206
1207 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1208 if (!skb)
1209 return -ENOBUFS;
1210
1211 if (old && old->handle) {
1212 if (tc_fill_qdisc(skb, old, clid, pid, n->nlmsg_seq, 0, RTM_DELQDISC) < 0)
1213 goto err_out;
1214 }
1215 if (new) {
1216 if (tc_fill_qdisc(skb, new, clid, pid, n->nlmsg_seq, old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
1217 goto err_out;
1218 }
1219
1220 if (skb->len)
Denis V. Lunev97c53ca2007-11-19 22:26:51 -08001221 return rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001222
1223err_out:
1224 kfree_skb(skb);
1225 return -EINVAL;
1226}
1227
David S. Miller30723672008-07-18 22:50:15 -07001228static bool tc_qdisc_dump_ignore(struct Qdisc *q)
1229{
1230 return (q->flags & TCQ_F_BUILTIN) ? true : false;
1231}
1232
1233static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
1234 struct netlink_callback *cb,
1235 int *q_idx_p, int s_q_idx)
1236{
1237 int ret = 0, q_idx = *q_idx_p;
1238 struct Qdisc *q;
1239
1240 if (!root)
1241 return 0;
1242
1243 q = root;
1244 if (q_idx < s_q_idx) {
1245 q_idx++;
1246 } else {
1247 if (!tc_qdisc_dump_ignore(q) &&
1248 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid,
1249 cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
1250 goto done;
1251 q_idx++;
1252 }
1253 list_for_each_entry(q, &root->list, list) {
1254 if (q_idx < s_q_idx) {
1255 q_idx++;
1256 continue;
1257 }
1258 if (!tc_qdisc_dump_ignore(q) &&
1259 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid,
1260 cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
1261 goto done;
1262 q_idx++;
1263 }
1264
1265out:
1266 *q_idx_p = q_idx;
1267 return ret;
1268done:
1269 ret = -1;
1270 goto out;
1271}
1272
Linus Torvalds1da177e2005-04-16 15:20:36 -07001273static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
1274{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001275 struct net *net = sock_net(skb->sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001276 int idx, q_idx;
1277 int s_idx, s_q_idx;
1278 struct net_device *dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001279
Denis V. Lunevb8542722007-12-01 00:21:31 +11001280 if (net != &init_net)
1281 return 0;
1282
Linus Torvalds1da177e2005-04-16 15:20:36 -07001283 s_idx = cb->args[0];
1284 s_q_idx = q_idx = cb->args[1];
1285 read_lock(&dev_base_lock);
Pavel Emelianov7562f872007-05-03 15:13:45 -07001286 idx = 0;
Eric W. Biederman881d9662007-09-17 11:56:21 -07001287 for_each_netdev(&init_net, dev) {
David S. Miller30723672008-07-18 22:50:15 -07001288 struct netdev_queue *dev_queue;
1289
Linus Torvalds1da177e2005-04-16 15:20:36 -07001290 if (idx < s_idx)
Pavel Emelianov7562f872007-05-03 15:13:45 -07001291 goto cont;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001292 if (idx > s_idx)
1293 s_q_idx = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001294 q_idx = 0;
David S. Miller30723672008-07-18 22:50:15 -07001295
1296 dev_queue = netdev_get_tx_queue(dev, 0);
David S. Miller827ebd62008-08-07 20:26:40 -07001297 if (tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb, &q_idx, s_q_idx) < 0)
David S. Miller30723672008-07-18 22:50:15 -07001298 goto done;
1299
1300 dev_queue = &dev->rx_queue;
David S. Miller827ebd62008-08-07 20:26:40 -07001301 if (tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb, &q_idx, s_q_idx) < 0)
David S. Miller30723672008-07-18 22:50:15 -07001302 goto done;
1303
Pavel Emelianov7562f872007-05-03 15:13:45 -07001304cont:
1305 idx++;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001306 }
1307
1308done:
1309 read_unlock(&dev_base_lock);
1310
1311 cb->args[0] = idx;
1312 cb->args[1] = q_idx;
1313
1314 return skb->len;
1315}
1316
1317
1318
1319/************************************************
1320 * Traffic classes manipulation. *
1321 ************************************************/
1322
1323
1324
1325static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
1326{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001327 struct net *net = sock_net(skb->sk);
David S. Millerb0e1e642008-07-08 17:42:10 -07001328 struct netdev_queue *dev_queue;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001329 struct tcmsg *tcm = NLMSG_DATA(n);
Patrick McHardy1e904742008-01-22 22:11:17 -08001330 struct nlattr *tca[TCA_MAX + 1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001331 struct net_device *dev;
1332 struct Qdisc *q = NULL;
Eric Dumazet20fea082007-11-14 01:44:41 -08001333 const struct Qdisc_class_ops *cops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001334 unsigned long cl = 0;
1335 unsigned long new_cl;
1336 u32 pid = tcm->tcm_parent;
1337 u32 clid = tcm->tcm_handle;
1338 u32 qid = TC_H_MAJ(clid);
1339 int err;
1340
Denis V. Lunevb8542722007-12-01 00:21:31 +11001341 if (net != &init_net)
1342 return -EINVAL;
1343
Eric W. Biederman881d9662007-09-17 11:56:21 -07001344 if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001345 return -ENODEV;
1346
Patrick McHardy1e904742008-01-22 22:11:17 -08001347 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
1348 if (err < 0)
1349 return err;
1350
Linus Torvalds1da177e2005-04-16 15:20:36 -07001351 /*
1352 parent == TC_H_UNSPEC - unspecified parent.
1353 parent == TC_H_ROOT - class is root, which has no parent.
1354 parent == X:0 - parent is root class.
1355 parent == X:Y - parent is a node in hierarchy.
1356 parent == 0:Y - parent is X:Y, where X:0 is qdisc.
1357
1358 handle == 0:0 - generate handle from kernel pool.
1359 handle == 0:Y - class is X:Y, where X:0 is qdisc.
1360 handle == X:Y - clear.
1361 handle == X:0 - root class.
1362 */
1363
1364 /* Step 1. Determine qdisc handle X:0 */
1365
David S. Millere8a04642008-07-17 00:34:19 -07001366 dev_queue = netdev_get_tx_queue(dev, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001367 if (pid != TC_H_ROOT) {
1368 u32 qid1 = TC_H_MAJ(pid);
1369
1370 if (qid && qid1) {
1371 /* If both majors are known, they must be identical. */
1372 if (qid != qid1)
1373 return -EINVAL;
1374 } else if (qid1) {
1375 qid = qid1;
1376 } else if (qid == 0)
David S. Millerb0e1e642008-07-08 17:42:10 -07001377 qid = dev_queue->qdisc_sleeping->handle;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001378
1379 /* Now qid is genuine qdisc handle consistent
1380 both with parent and child.
1381
1382 TC_H_MAJ(pid) still may be unspecified, complete it now.
1383 */
1384 if (pid)
1385 pid = TC_H_MAKE(qid, pid);
1386 } else {
1387 if (qid == 0)
David S. Millerb0e1e642008-07-08 17:42:10 -07001388 qid = dev_queue->qdisc_sleeping->handle;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001389 }
1390
1391 /* OK. Locate qdisc */
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09001392 if ((q = qdisc_lookup(dev, qid)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001393 return -ENOENT;
1394
1395 /* An check that it supports classes */
1396 cops = q->ops->cl_ops;
1397 if (cops == NULL)
1398 return -EINVAL;
1399
1400 /* Now try to get class */
1401 if (clid == 0) {
1402 if (pid == TC_H_ROOT)
1403 clid = qid;
1404 } else
1405 clid = TC_H_MAKE(qid, clid);
1406
1407 if (clid)
1408 cl = cops->get(q, clid);
1409
1410 if (cl == 0) {
1411 err = -ENOENT;
1412 if (n->nlmsg_type != RTM_NEWTCLASS || !(n->nlmsg_flags&NLM_F_CREATE))
1413 goto out;
1414 } else {
1415 switch (n->nlmsg_type) {
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09001416 case RTM_NEWTCLASS:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001417 err = -EEXIST;
1418 if (n->nlmsg_flags&NLM_F_EXCL)
1419 goto out;
1420 break;
1421 case RTM_DELTCLASS:
1422 err = cops->delete(q, cl);
1423 if (err == 0)
1424 tclass_notify(skb, n, q, cl, RTM_DELTCLASS);
1425 goto out;
1426 case RTM_GETTCLASS:
1427 err = tclass_notify(skb, n, q, cl, RTM_NEWTCLASS);
1428 goto out;
1429 default:
1430 err = -EINVAL;
1431 goto out;
1432 }
1433 }
1434
1435 new_cl = cl;
1436 err = cops->change(q, clid, pid, tca, &new_cl);
1437 if (err == 0)
1438 tclass_notify(skb, n, q, new_cl, RTM_NEWTCLASS);
1439
1440out:
1441 if (cl)
1442 cops->put(q, cl);
1443
1444 return err;
1445}
1446
1447
1448static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
1449 unsigned long cl,
Jamal Hadi Salime431b8c2005-06-18 22:55:31 -07001450 u32 pid, u32 seq, u16 flags, int event)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001451{
1452 struct tcmsg *tcm;
1453 struct nlmsghdr *nlh;
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001454 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001455 struct gnet_dump d;
Eric Dumazet20fea082007-11-14 01:44:41 -08001456 const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001457
Jamal Hadi Salime431b8c2005-06-18 22:55:31 -07001458 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001459 tcm = NLMSG_DATA(nlh);
1460 tcm->tcm_family = AF_UNSPEC;
David S. Miller5ce2d482008-07-08 17:06:30 -07001461 tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001462 tcm->tcm_parent = q->handle;
1463 tcm->tcm_handle = q->handle;
1464 tcm->tcm_info = 0;
Patrick McHardy57e1c482008-01-23 20:34:28 -08001465 NLA_PUT_STRING(skb, TCA_KIND, q->ops->id);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001466 if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001467 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001468
Jarek Poplawski102396a2008-08-29 14:21:52 -07001469 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
1470 qdisc_root_sleeping_lock(q), &d) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001471 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001472
1473 if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001474 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001475
1476 if (gnet_stats_finish_copy(&d) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001477 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001478
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001479 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001480 return skb->len;
1481
1482nlmsg_failure:
Patrick McHardy1e904742008-01-22 22:11:17 -08001483nla_put_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001484 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001485 return -1;
1486}
1487
1488static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
1489 struct Qdisc *q, unsigned long cl, int event)
1490{
1491 struct sk_buff *skb;
1492 u32 pid = oskb ? NETLINK_CB(oskb).pid : 0;
1493
1494 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1495 if (!skb)
1496 return -ENOBUFS;
1497
1498 if (tc_fill_tclass(skb, q, cl, pid, n->nlmsg_seq, 0, event) < 0) {
1499 kfree_skb(skb);
1500 return -EINVAL;
1501 }
1502
Denis V. Lunev97c53ca2007-11-19 22:26:51 -08001503 return rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001504}
1505
1506struct qdisc_dump_args
1507{
1508 struct qdisc_walker w;
1509 struct sk_buff *skb;
1510 struct netlink_callback *cb;
1511};
1512
1513static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walker *arg)
1514{
1515 struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg;
1516
1517 return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).pid,
1518 a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTCLASS);
1519}
1520
David S. Miller30723672008-07-18 22:50:15 -07001521static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb,
1522 struct tcmsg *tcm, struct netlink_callback *cb,
1523 int *t_p, int s_t)
1524{
1525 struct qdisc_dump_args arg;
1526
1527 if (tc_qdisc_dump_ignore(q) ||
1528 *t_p < s_t || !q->ops->cl_ops ||
1529 (tcm->tcm_parent &&
1530 TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
1531 (*t_p)++;
1532 return 0;
1533 }
1534 if (*t_p > s_t)
1535 memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
1536 arg.w.fn = qdisc_class_dump;
1537 arg.skb = skb;
1538 arg.cb = cb;
1539 arg.w.stop = 0;
1540 arg.w.skip = cb->args[1];
1541 arg.w.count = 0;
1542 q->ops->cl_ops->walk(q, &arg.w);
1543 cb->args[1] = arg.w.count;
1544 if (arg.w.stop)
1545 return -1;
1546 (*t_p)++;
1547 return 0;
1548}
1549
1550static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
1551 struct tcmsg *tcm, struct netlink_callback *cb,
1552 int *t_p, int s_t)
1553{
1554 struct Qdisc *q;
1555
1556 if (!root)
1557 return 0;
1558
1559 if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0)
1560 return -1;
1561
1562 list_for_each_entry(q, &root->list, list) {
1563 if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
1564 return -1;
1565 }
1566
1567 return 0;
1568}
1569
Linus Torvalds1da177e2005-04-16 15:20:36 -07001570static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
1571{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001572 struct tcmsg *tcm = (struct tcmsg*)NLMSG_DATA(cb->nlh);
David S. Miller30723672008-07-18 22:50:15 -07001573 struct net *net = sock_net(skb->sk);
1574 struct netdev_queue *dev_queue;
1575 struct net_device *dev;
1576 int t, s_t;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001577
Denis V. Lunevb8542722007-12-01 00:21:31 +11001578 if (net != &init_net)
1579 return 0;
1580
Linus Torvalds1da177e2005-04-16 15:20:36 -07001581 if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm)))
1582 return 0;
Eric W. Biederman881d9662007-09-17 11:56:21 -07001583 if ((dev = dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001584 return 0;
1585
1586 s_t = cb->args[0];
1587 t = 0;
1588
David S. Miller30723672008-07-18 22:50:15 -07001589 dev_queue = netdev_get_tx_queue(dev, 0);
David S. Miller8123b422008-08-08 23:23:39 -07001590 if (tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb, &t, s_t) < 0)
David S. Miller30723672008-07-18 22:50:15 -07001591 goto done;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001592
David S. Miller30723672008-07-18 22:50:15 -07001593 dev_queue = &dev->rx_queue;
David S. Miller8123b422008-08-08 23:23:39 -07001594 if (tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb, &t, s_t) < 0)
David S. Miller30723672008-07-18 22:50:15 -07001595 goto done;
1596
1597done:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001598 cb->args[0] = t;
1599
1600 dev_put(dev);
1601 return skb->len;
1602}
1603
1604/* Main classifier routine: scans classifier chain attached
1605 to this qdisc, (optionally) tests for protocol and asks
1606 specific classifiers.
1607 */
Patrick McHardy73ca4912007-07-15 00:02:31 -07001608int tc_classify_compat(struct sk_buff *skb, struct tcf_proto *tp,
1609 struct tcf_result *res)
1610{
1611 __be16 protocol = skb->protocol;
1612 int err = 0;
1613
1614 for (; tp; tp = tp->next) {
1615 if ((tp->protocol == protocol ||
1616 tp->protocol == htons(ETH_P_ALL)) &&
1617 (err = tp->classify(skb, tp, res)) >= 0) {
1618#ifdef CONFIG_NET_CLS_ACT
1619 if (err != TC_ACT_RECLASSIFY && skb->tc_verd)
1620 skb->tc_verd = SET_TC_VERD(skb->tc_verd, 0);
1621#endif
1622 return err;
1623 }
1624 }
1625 return -1;
1626}
1627EXPORT_SYMBOL(tc_classify_compat);
1628
Linus Torvalds1da177e2005-04-16 15:20:36 -07001629int tc_classify(struct sk_buff *skb, struct tcf_proto *tp,
Patrick McHardy73ca4912007-07-15 00:02:31 -07001630 struct tcf_result *res)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001631{
1632 int err = 0;
Patrick McHardy73ca4912007-07-15 00:02:31 -07001633 __be16 protocol;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001634#ifdef CONFIG_NET_CLS_ACT
1635 struct tcf_proto *otp = tp;
1636reclassify:
1637#endif
1638 protocol = skb->protocol;
1639
Patrick McHardy73ca4912007-07-15 00:02:31 -07001640 err = tc_classify_compat(skb, tp, res);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001641#ifdef CONFIG_NET_CLS_ACT
Patrick McHardy73ca4912007-07-15 00:02:31 -07001642 if (err == TC_ACT_RECLASSIFY) {
1643 u32 verd = G_TC_VERD(skb->tc_verd);
1644 tp = otp;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001645
Patrick McHardy73ca4912007-07-15 00:02:31 -07001646 if (verd++ >= MAX_REC_LOOP) {
1647 printk("rule prio %u protocol %02x reclassify loop, "
1648 "packet dropped\n",
1649 tp->prio&0xffff, ntohs(tp->protocol));
1650 return TC_ACT_SHOT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001651 }
Patrick McHardy73ca4912007-07-15 00:02:31 -07001652 skb->tc_verd = SET_TC_VERD(skb->tc_verd, verd);
1653 goto reclassify;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001654 }
Patrick McHardy73ca4912007-07-15 00:02:31 -07001655#endif
1656 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001657}
Patrick McHardy73ca4912007-07-15 00:02:31 -07001658EXPORT_SYMBOL(tc_classify);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001659
Patrick McHardya48b5a62007-03-23 11:29:43 -07001660void tcf_destroy(struct tcf_proto *tp)
1661{
1662 tp->ops->destroy(tp);
1663 module_put(tp->ops->owner);
1664 kfree(tp);
1665}
1666
Patrick McHardyff31ab52008-07-01 19:52:38 -07001667void tcf_destroy_chain(struct tcf_proto **fl)
Patrick McHardya48b5a62007-03-23 11:29:43 -07001668{
1669 struct tcf_proto *tp;
1670
Patrick McHardyff31ab52008-07-01 19:52:38 -07001671 while ((tp = *fl) != NULL) {
1672 *fl = tp->next;
Patrick McHardya48b5a62007-03-23 11:29:43 -07001673 tcf_destroy(tp);
1674 }
1675}
1676EXPORT_SYMBOL(tcf_destroy_chain);
1677
Linus Torvalds1da177e2005-04-16 15:20:36 -07001678#ifdef CONFIG_PROC_FS
1679static int psched_show(struct seq_file *seq, void *v)
1680{
Patrick McHardy3c0cfc12007-10-10 16:32:41 -07001681 struct timespec ts;
1682
1683 hrtimer_get_res(CLOCK_MONOTONIC, &ts);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001684 seq_printf(seq, "%08x %08x %08x %08x\n",
Patrick McHardy641b9e02007-03-16 01:18:42 -07001685 (u32)NSEC_PER_USEC, (u32)PSCHED_US2NS(1),
Patrick McHardy514bca32007-03-16 12:34:52 -07001686 1000000,
Patrick McHardy3c0cfc12007-10-10 16:32:41 -07001687 (u32)NSEC_PER_SEC/(u32)ktime_to_ns(timespec_to_ktime(ts)));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001688
1689 return 0;
1690}
1691
1692static int psched_open(struct inode *inode, struct file *file)
1693{
1694 return single_open(file, psched_show, PDE(inode)->data);
1695}
1696
Arjan van de Venda7071d2007-02-12 00:55:36 -08001697static const struct file_operations psched_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001698 .owner = THIS_MODULE,
1699 .open = psched_open,
1700 .read = seq_read,
1701 .llseek = seq_lseek,
1702 .release = single_release,
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09001703};
Linus Torvalds1da177e2005-04-16 15:20:36 -07001704#endif
1705
Linus Torvalds1da177e2005-04-16 15:20:36 -07001706static int __init pktsched_init(void)
1707{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001708 register_qdisc(&pfifo_qdisc_ops);
1709 register_qdisc(&bfifo_qdisc_ops);
Eric W. Biederman457c4cb2007-09-12 12:01:34 +02001710 proc_net_fops_create(&init_net, "psched", 0, &psched_fops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001711
Thomas Grafbe577dd2007-03-22 11:55:50 -07001712 rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL);
1713 rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL);
1714 rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc);
1715 rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL);
1716 rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL);
1717 rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass);
1718
Linus Torvalds1da177e2005-04-16 15:20:36 -07001719 return 0;
1720}
1721
1722subsys_initcall(pktsched_init);