blob: f859dd5fabf414ec61cec3cc13d0ae06513bc780 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * net/sched/sch_api.c Packet scheduler API.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 *
11 * Fixes:
12 *
13 * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired.
14 * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
15 * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support
16 */
17
Linus Torvalds1da177e2005-04-16 15:20:36 -070018#include <linux/module.h>
19#include <linux/types.h>
20#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070021#include <linux/string.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070022#include <linux/errno.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070023#include <linux/skbuff.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070024#include <linux/init.h>
25#include <linux/proc_fs.h>
26#include <linux/seq_file.h>
27#include <linux/kmod.h>
28#include <linux/list.h>
Patrick McHardy41794772007-03-16 01:19:15 -070029#include <linux/hrtimer.h>
Jarek Poplawski25bfcd52008-08-18 20:53:34 -070030#include <linux/lockdep.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070031
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020032#include <net/net_namespace.h>
Denis V. Lunevb8542722007-12-01 00:21:31 +110033#include <net/sock.h>
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -070034#include <net/netlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070035#include <net/pkt_sched.h>
36
Linus Torvalds1da177e2005-04-16 15:20:36 -070037static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n, u32 clid,
38 struct Qdisc *old, struct Qdisc *new);
39static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
40 struct Qdisc *q, unsigned long cl, int event);
41
42/*
43
44 Short review.
45 -------------
46
47 This file consists of two interrelated parts:
48
49 1. queueing disciplines manager frontend.
50 2. traffic classes manager frontend.
51
52 Generally, queueing discipline ("qdisc") is a black box,
53 which is able to enqueue packets and to dequeue them (when
54 device is ready to send something) in order and at times
55 determined by algorithm hidden in it.
56
57 qdisc's are divided to two categories:
58 - "queues", which have no internal structure visible from outside.
59 - "schedulers", which split all the packets to "traffic classes",
60 using "packet classifiers" (look at cls_api.c)
61
62 In turn, classes may have child qdiscs (as rule, queues)
63 attached to them etc. etc. etc.
64
65 The goal of the routines in this file is to translate
66 information supplied by user in the form of handles
67 to more intelligible for kernel form, to make some sanity
68 checks and part of work, which is common to all qdiscs
69 and to provide rtnetlink notifications.
70
71 All real intelligent work is done inside qdisc modules.
72
73
74
75 Every discipline has two major routines: enqueue and dequeue.
76
77 ---dequeue
78
79 dequeue usually returns a skb to send. It is allowed to return NULL,
80 but it does not mean that queue is empty, it just means that
81 discipline does not want to send anything this time.
82 Queue is really empty if q->q.qlen == 0.
83 For complicated disciplines with multiple queues q->q is not
84 real packet queue, but however q->q.qlen must be valid.
85
86 ---enqueue
87
88 enqueue returns 0, if packet was enqueued successfully.
89 If packet (this one or another one) was dropped, it returns
90 not zero error code.
91 NET_XMIT_DROP - this packet dropped
92 Expected action: do not backoff, but wait until queue will clear.
93 NET_XMIT_CN - probably this packet enqueued, but another one dropped.
94 Expected action: backoff or ignore
95 NET_XMIT_POLICED - dropped by police.
96 Expected action: backoff or error to real-time apps.
97
98 Auxiliary routines:
99
Jarek Poplawski99c0db22008-10-31 00:45:27 -0700100 ---peek
101
102 like dequeue but without removing a packet from the queue
103
Linus Torvalds1da177e2005-04-16 15:20:36 -0700104 ---reset
105
106 returns qdisc to initial state: purge all buffers, clear all
107 timers, counters (except for statistics) etc.
108
109 ---init
110
111 initializes newly created qdisc.
112
113 ---destroy
114
115 destroys resources allocated by init and during lifetime of qdisc.
116
117 ---change
118
119 changes qdisc parameters.
120 */
121
122/* Protects list of registered TC modules. It is pure SMP lock. */
123static DEFINE_RWLOCK(qdisc_mod_lock);
124
125
126/************************************************
127 * Queueing disciplines manipulation. *
128 ************************************************/
129
130
131/* The list of all installed queueing disciplines. */
132
133static struct Qdisc_ops *qdisc_base;
134
135/* Register/uregister queueing discipline */
136
137int register_qdisc(struct Qdisc_ops *qops)
138{
139 struct Qdisc_ops *q, **qp;
140 int rc = -EEXIST;
141
142 write_lock(&qdisc_mod_lock);
143 for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
144 if (!strcmp(qops->id, q->id))
145 goto out;
146
147 if (qops->enqueue == NULL)
148 qops->enqueue = noop_qdisc_ops.enqueue;
Jarek Poplawski99c0db22008-10-31 00:45:27 -0700149 if (qops->peek == NULL) {
150 if (qops->dequeue == NULL) {
151 qops->peek = noop_qdisc_ops.peek;
152 } else {
153 rc = -EINVAL;
154 goto out;
155 }
156 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700157 if (qops->dequeue == NULL)
158 qops->dequeue = noop_qdisc_ops.dequeue;
159
160 qops->next = NULL;
161 *qp = qops;
162 rc = 0;
163out:
164 write_unlock(&qdisc_mod_lock);
165 return rc;
166}
Patrick McHardy62e3ba12008-01-22 22:10:23 -0800167EXPORT_SYMBOL(register_qdisc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700168
169int unregister_qdisc(struct Qdisc_ops *qops)
170{
171 struct Qdisc_ops *q, **qp;
172 int err = -ENOENT;
173
174 write_lock(&qdisc_mod_lock);
175 for (qp = &qdisc_base; (q=*qp)!=NULL; qp = &q->next)
176 if (q == qops)
177 break;
178 if (q) {
179 *qp = q->next;
180 q->next = NULL;
181 err = 0;
182 }
183 write_unlock(&qdisc_mod_lock);
184 return err;
185}
Patrick McHardy62e3ba12008-01-22 22:10:23 -0800186EXPORT_SYMBOL(unregister_qdisc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700187
188/* We know handle. Find qdisc among all qdisc's attached to device
189 (root qdisc, all its children, children of children etc.)
190 */
191
David S. Miller8123b422008-08-08 23:23:39 -0700192struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
193{
194 struct Qdisc *q;
195
196 if (!(root->flags & TCQ_F_BUILTIN) &&
197 root->handle == handle)
198 return root;
199
200 list_for_each_entry(q, &root->list, list) {
201 if (q->handle == handle)
202 return q;
203 }
204 return NULL;
205}
206
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700207static void qdisc_list_add(struct Qdisc *q)
208{
Jarek Poplawskif6486d42008-11-25 13:56:06 -0800209 if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS))
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700210 list_add_tail(&q->list, &qdisc_root_sleeping(q)->list);
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700211}
212
213void qdisc_list_del(struct Qdisc *q)
214{
Jarek Poplawskif6486d42008-11-25 13:56:06 -0800215 if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS))
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700216 list_del(&q->list);
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700217}
218EXPORT_SYMBOL(qdisc_list_del);
219
David S. Milleread81cc2008-07-17 00:50:32 -0700220struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
Patrick McHardy43effa12006-11-29 17:35:48 -0800221{
David S. Miller30723672008-07-18 22:50:15 -0700222 unsigned int i;
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700223 struct Qdisc *q;
224
David S. Miller30723672008-07-18 22:50:15 -0700225 for (i = 0; i < dev->num_tx_queues; i++) {
226 struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700227 struct Qdisc *txq_root = txq->qdisc_sleeping;
David S. Miller30723672008-07-18 22:50:15 -0700228
David S. Miller8123b422008-08-08 23:23:39 -0700229 q = qdisc_match_from_root(txq_root, handle);
230 if (q)
Jarek Poplawskif6486d42008-11-25 13:56:06 -0800231 goto out;
Patrick McHardy43effa12006-11-29 17:35:48 -0800232 }
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700233
234 q = qdisc_match_from_root(dev->rx_queue.qdisc_sleeping, handle);
Jarek Poplawskif6486d42008-11-25 13:56:06 -0800235out:
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700236 return q;
Patrick McHardy43effa12006-11-29 17:35:48 -0800237}
238
Linus Torvalds1da177e2005-04-16 15:20:36 -0700239static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
240{
241 unsigned long cl;
242 struct Qdisc *leaf;
Eric Dumazet20fea082007-11-14 01:44:41 -0800243 const struct Qdisc_class_ops *cops = p->ops->cl_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700244
245 if (cops == NULL)
246 return NULL;
247 cl = cops->get(p, classid);
248
249 if (cl == 0)
250 return NULL;
251 leaf = cops->leaf(p, cl);
252 cops->put(p, cl);
253 return leaf;
254}
255
256/* Find queueing discipline by name */
257
Patrick McHardy1e904742008-01-22 22:11:17 -0800258static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700259{
260 struct Qdisc_ops *q = NULL;
261
262 if (kind) {
263 read_lock(&qdisc_mod_lock);
264 for (q = qdisc_base; q; q = q->next) {
Patrick McHardy1e904742008-01-22 22:11:17 -0800265 if (nla_strcmp(kind, q->id) == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700266 if (!try_module_get(q->owner))
267 q = NULL;
268 break;
269 }
270 }
271 read_unlock(&qdisc_mod_lock);
272 }
273 return q;
274}
275
276static struct qdisc_rate_table *qdisc_rtab_list;
277
Patrick McHardy1e904742008-01-22 22:11:17 -0800278struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct nlattr *tab)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700279{
280 struct qdisc_rate_table *rtab;
281
282 for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) {
283 if (memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) == 0) {
284 rtab->refcnt++;
285 return rtab;
286 }
287 }
288
Patrick McHardy5feb5e12008-01-23 20:35:19 -0800289 if (tab == NULL || r->rate == 0 || r->cell_log == 0 ||
290 nla_len(tab) != TC_RTAB_SIZE)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700291 return NULL;
292
293 rtab = kmalloc(sizeof(*rtab), GFP_KERNEL);
294 if (rtab) {
295 rtab->rate = *r;
296 rtab->refcnt = 1;
Patrick McHardy1e904742008-01-22 22:11:17 -0800297 memcpy(rtab->data, nla_data(tab), 1024);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700298 rtab->next = qdisc_rtab_list;
299 qdisc_rtab_list = rtab;
300 }
301 return rtab;
302}
Patrick McHardy62e3ba12008-01-22 22:10:23 -0800303EXPORT_SYMBOL(qdisc_get_rtab);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700304
305void qdisc_put_rtab(struct qdisc_rate_table *tab)
306{
307 struct qdisc_rate_table *rtab, **rtabp;
308
309 if (!tab || --tab->refcnt)
310 return;
311
312 for (rtabp = &qdisc_rtab_list; (rtab=*rtabp) != NULL; rtabp = &rtab->next) {
313 if (rtab == tab) {
314 *rtabp = rtab->next;
315 kfree(rtab);
316 return;
317 }
318 }
319}
Patrick McHardy62e3ba12008-01-22 22:10:23 -0800320EXPORT_SYMBOL(qdisc_put_rtab);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700321
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700322static LIST_HEAD(qdisc_stab_list);
323static DEFINE_SPINLOCK(qdisc_stab_lock);
324
325static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = {
326 [TCA_STAB_BASE] = { .len = sizeof(struct tc_sizespec) },
327 [TCA_STAB_DATA] = { .type = NLA_BINARY },
328};
329
330static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt)
331{
332 struct nlattr *tb[TCA_STAB_MAX + 1];
333 struct qdisc_size_table *stab;
334 struct tc_sizespec *s;
335 unsigned int tsize = 0;
336 u16 *tab = NULL;
337 int err;
338
339 err = nla_parse_nested(tb, TCA_STAB_MAX, opt, stab_policy);
340 if (err < 0)
341 return ERR_PTR(err);
342 if (!tb[TCA_STAB_BASE])
343 return ERR_PTR(-EINVAL);
344
345 s = nla_data(tb[TCA_STAB_BASE]);
346
347 if (s->tsize > 0) {
348 if (!tb[TCA_STAB_DATA])
349 return ERR_PTR(-EINVAL);
350 tab = nla_data(tb[TCA_STAB_DATA]);
351 tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16);
352 }
353
354 if (!s || tsize != s->tsize || (!tab && tsize > 0))
355 return ERR_PTR(-EINVAL);
356
David S. Millerf3b96052008-08-18 22:33:05 -0700357 spin_lock(&qdisc_stab_lock);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700358
359 list_for_each_entry(stab, &qdisc_stab_list, list) {
360 if (memcmp(&stab->szopts, s, sizeof(*s)))
361 continue;
362 if (tsize > 0 && memcmp(stab->data, tab, tsize * sizeof(u16)))
363 continue;
364 stab->refcnt++;
David S. Millerf3b96052008-08-18 22:33:05 -0700365 spin_unlock(&qdisc_stab_lock);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700366 return stab;
367 }
368
David S. Millerf3b96052008-08-18 22:33:05 -0700369 spin_unlock(&qdisc_stab_lock);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700370
371 stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL);
372 if (!stab)
373 return ERR_PTR(-ENOMEM);
374
375 stab->refcnt = 1;
376 stab->szopts = *s;
377 if (tsize > 0)
378 memcpy(stab->data, tab, tsize * sizeof(u16));
379
David S. Millerf3b96052008-08-18 22:33:05 -0700380 spin_lock(&qdisc_stab_lock);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700381 list_add_tail(&stab->list, &qdisc_stab_list);
David S. Millerf3b96052008-08-18 22:33:05 -0700382 spin_unlock(&qdisc_stab_lock);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700383
384 return stab;
385}
386
387void qdisc_put_stab(struct qdisc_size_table *tab)
388{
389 if (!tab)
390 return;
391
David S. Millerf3b96052008-08-18 22:33:05 -0700392 spin_lock(&qdisc_stab_lock);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700393
394 if (--tab->refcnt == 0) {
395 list_del(&tab->list);
396 kfree(tab);
397 }
398
David S. Millerf3b96052008-08-18 22:33:05 -0700399 spin_unlock(&qdisc_stab_lock);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700400}
401EXPORT_SYMBOL(qdisc_put_stab);
402
403static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab)
404{
405 struct nlattr *nest;
406
407 nest = nla_nest_start(skb, TCA_STAB);
Patrick McHardy3aa46142008-11-20 04:07:14 -0800408 if (nest == NULL)
409 goto nla_put_failure;
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700410 NLA_PUT(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts);
411 nla_nest_end(skb, nest);
412
413 return skb->len;
414
415nla_put_failure:
416 return -1;
417}
418
419void qdisc_calculate_pkt_len(struct sk_buff *skb, struct qdisc_size_table *stab)
420{
421 int pkt_len, slot;
422
423 pkt_len = skb->len + stab->szopts.overhead;
424 if (unlikely(!stab->szopts.tsize))
425 goto out;
426
427 slot = pkt_len + stab->szopts.cell_align;
428 if (unlikely(slot < 0))
429 slot = 0;
430
431 slot >>= stab->szopts.cell_log;
432 if (likely(slot < stab->szopts.tsize))
433 pkt_len = stab->data[slot];
434 else
435 pkt_len = stab->data[stab->szopts.tsize - 1] *
436 (slot / stab->szopts.tsize) +
437 stab->data[slot % stab->szopts.tsize];
438
439 pkt_len <<= stab->szopts.size_log;
440out:
441 if (unlikely(pkt_len < 1))
442 pkt_len = 1;
443 qdisc_skb_cb(skb)->pkt_len = pkt_len;
444}
445EXPORT_SYMBOL(qdisc_calculate_pkt_len);
446
Patrick McHardy41794772007-03-16 01:19:15 -0700447static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
448{
449 struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
450 timer);
451
452 wd->qdisc->flags &= ~TCQ_F_THROTTLED;
Stephen Hemminger11274e52007-03-22 12:17:42 -0700453 smp_wmb();
David S. Miller8608db02008-08-18 20:51:18 -0700454 __netif_schedule(qdisc_root(wd->qdisc));
Stephen Hemminger19365022007-03-22 12:18:35 -0700455
Patrick McHardy41794772007-03-16 01:19:15 -0700456 return HRTIMER_NORESTART;
457}
458
459void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
460{
461 hrtimer_init(&wd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
462 wd->timer.function = qdisc_watchdog;
463 wd->qdisc = qdisc;
464}
465EXPORT_SYMBOL(qdisc_watchdog_init);
466
467void qdisc_watchdog_schedule(struct qdisc_watchdog *wd, psched_time_t expires)
468{
469 ktime_t time;
470
Jarek Poplawski2540e052008-08-21 05:11:14 -0700471 if (test_bit(__QDISC_STATE_DEACTIVATED,
472 &qdisc_root_sleeping(wd->qdisc)->state))
473 return;
474
Patrick McHardy41794772007-03-16 01:19:15 -0700475 wd->qdisc->flags |= TCQ_F_THROTTLED;
476 time = ktime_set(0, 0);
477 time = ktime_add_ns(time, PSCHED_US2NS(expires));
478 hrtimer_start(&wd->timer, time, HRTIMER_MODE_ABS);
479}
480EXPORT_SYMBOL(qdisc_watchdog_schedule);
481
482void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
483{
484 hrtimer_cancel(&wd->timer);
485 wd->qdisc->flags &= ~TCQ_F_THROTTLED;
486}
487EXPORT_SYMBOL(qdisc_watchdog_cancel);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700488
Adrian Bunka94f7792008-07-22 14:20:11 -0700489static struct hlist_head *qdisc_class_hash_alloc(unsigned int n)
Patrick McHardy6fe1c7a2008-07-05 23:21:31 -0700490{
491 unsigned int size = n * sizeof(struct hlist_head), i;
492 struct hlist_head *h;
493
494 if (size <= PAGE_SIZE)
495 h = kmalloc(size, GFP_KERNEL);
496 else
497 h = (struct hlist_head *)
498 __get_free_pages(GFP_KERNEL, get_order(size));
499
500 if (h != NULL) {
501 for (i = 0; i < n; i++)
502 INIT_HLIST_HEAD(&h[i]);
503 }
504 return h;
505}
506
507static void qdisc_class_hash_free(struct hlist_head *h, unsigned int n)
508{
509 unsigned int size = n * sizeof(struct hlist_head);
510
511 if (size <= PAGE_SIZE)
512 kfree(h);
513 else
514 free_pages((unsigned long)h, get_order(size));
515}
516
517void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash)
518{
519 struct Qdisc_class_common *cl;
520 struct hlist_node *n, *next;
521 struct hlist_head *nhash, *ohash;
522 unsigned int nsize, nmask, osize;
523 unsigned int i, h;
524
525 /* Rehash when load factor exceeds 0.75 */
526 if (clhash->hashelems * 4 <= clhash->hashsize * 3)
527 return;
528 nsize = clhash->hashsize * 2;
529 nmask = nsize - 1;
530 nhash = qdisc_class_hash_alloc(nsize);
531 if (nhash == NULL)
532 return;
533
534 ohash = clhash->hash;
535 osize = clhash->hashsize;
536
537 sch_tree_lock(sch);
538 for (i = 0; i < osize; i++) {
539 hlist_for_each_entry_safe(cl, n, next, &ohash[i], hnode) {
540 h = qdisc_class_hash(cl->classid, nmask);
541 hlist_add_head(&cl->hnode, &nhash[h]);
542 }
543 }
544 clhash->hash = nhash;
545 clhash->hashsize = nsize;
546 clhash->hashmask = nmask;
547 sch_tree_unlock(sch);
548
549 qdisc_class_hash_free(ohash, osize);
550}
551EXPORT_SYMBOL(qdisc_class_hash_grow);
552
553int qdisc_class_hash_init(struct Qdisc_class_hash *clhash)
554{
555 unsigned int size = 4;
556
557 clhash->hash = qdisc_class_hash_alloc(size);
558 if (clhash->hash == NULL)
559 return -ENOMEM;
560 clhash->hashsize = size;
561 clhash->hashmask = size - 1;
562 clhash->hashelems = 0;
563 return 0;
564}
565EXPORT_SYMBOL(qdisc_class_hash_init);
566
567void qdisc_class_hash_destroy(struct Qdisc_class_hash *clhash)
568{
569 qdisc_class_hash_free(clhash->hash, clhash->hashsize);
570}
571EXPORT_SYMBOL(qdisc_class_hash_destroy);
572
573void qdisc_class_hash_insert(struct Qdisc_class_hash *clhash,
574 struct Qdisc_class_common *cl)
575{
576 unsigned int h;
577
578 INIT_HLIST_NODE(&cl->hnode);
579 h = qdisc_class_hash(cl->classid, clhash->hashmask);
580 hlist_add_head(&cl->hnode, &clhash->hash[h]);
581 clhash->hashelems++;
582}
583EXPORT_SYMBOL(qdisc_class_hash_insert);
584
585void qdisc_class_hash_remove(struct Qdisc_class_hash *clhash,
586 struct Qdisc_class_common *cl)
587{
588 hlist_del(&cl->hnode);
589 clhash->hashelems--;
590}
591EXPORT_SYMBOL(qdisc_class_hash_remove);
592
Linus Torvalds1da177e2005-04-16 15:20:36 -0700593/* Allocate an unique handle from space managed by kernel */
594
595static u32 qdisc_alloc_handle(struct net_device *dev)
596{
597 int i = 0x10000;
598 static u32 autohandle = TC_H_MAKE(0x80000000U, 0);
599
600 do {
601 autohandle += TC_H_MAKE(0x10000U, 0);
602 if (autohandle == TC_H_MAKE(TC_H_ROOT, 0))
603 autohandle = TC_H_MAKE(0x80000000U, 0);
604 } while (qdisc_lookup(dev, autohandle) && --i > 0);
605
606 return i>0 ? autohandle : 0;
607}
608
David S. Miller99194cf2008-07-17 04:54:10 -0700609/* Attach toplevel qdisc to device queue. */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700610
David S. Miller99194cf2008-07-17 04:54:10 -0700611static struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue,
612 struct Qdisc *qdisc)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700613{
David S. Miller8d50b532008-07-30 02:37:46 -0700614 struct Qdisc *oqdisc = dev_queue->qdisc_sleeping;
David S. Miller53049972008-07-16 03:00:19 -0700615 spinlock_t *root_lock;
David S. Miller53049972008-07-16 03:00:19 -0700616
Jarek Poplawski666d9bb2008-08-27 02:12:52 -0700617 root_lock = qdisc_lock(oqdisc);
David S. Miller53049972008-07-16 03:00:19 -0700618 spin_lock_bh(root_lock);
619
David S. Miller8d50b532008-07-30 02:37:46 -0700620 /* Prune old scheduler */
621 if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1)
622 qdisc_reset(oqdisc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700623
David S. Miller8d50b532008-07-30 02:37:46 -0700624 /* ... and graft new one */
625 if (qdisc == NULL)
626 qdisc = &noop_qdisc;
627 dev_queue->qdisc_sleeping = qdisc;
Jarek Poplawskif7a54c12008-08-27 02:22:07 -0700628 rcu_assign_pointer(dev_queue->qdisc, &noop_qdisc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700629
David S. Miller53049972008-07-16 03:00:19 -0700630 spin_unlock_bh(root_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700631
Linus Torvalds1da177e2005-04-16 15:20:36 -0700632 return oqdisc;
633}
634
Patrick McHardy43effa12006-11-29 17:35:48 -0800635void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
636{
Eric Dumazet20fea082007-11-14 01:44:41 -0800637 const struct Qdisc_class_ops *cops;
Patrick McHardy43effa12006-11-29 17:35:48 -0800638 unsigned long cl;
639 u32 parentid;
640
641 if (n == 0)
642 return;
643 while ((parentid = sch->parent)) {
Jarek Poplawski066a3b52008-04-14 15:10:42 -0700644 if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS))
645 return;
646
David S. Miller5ce2d482008-07-08 17:06:30 -0700647 sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
Patrick McHardyffc8fef2007-07-30 17:11:50 -0700648 if (sch == NULL) {
649 WARN_ON(parentid != TC_H_ROOT);
650 return;
651 }
Patrick McHardy43effa12006-11-29 17:35:48 -0800652 cops = sch->ops->cl_ops;
653 if (cops->qlen_notify) {
654 cl = cops->get(sch, parentid);
655 cops->qlen_notify(sch, cl);
656 cops->put(sch, cl);
657 }
658 sch->q.qlen -= n;
659 }
660}
661EXPORT_SYMBOL(qdisc_tree_decrease_qlen);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700662
David S. Miller99194cf2008-07-17 04:54:10 -0700663static void notify_and_destroy(struct sk_buff *skb, struct nlmsghdr *n, u32 clid,
664 struct Qdisc *old, struct Qdisc *new)
665{
666 if (new || old)
667 qdisc_notify(skb, n, clid, old, new);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700668
David S. Miller4d8863a2008-08-18 21:03:15 -0700669 if (old)
David S. Miller99194cf2008-07-17 04:54:10 -0700670 qdisc_destroy(old);
David S. Miller99194cf2008-07-17 04:54:10 -0700671}
672
673/* Graft qdisc "new" to class "classid" of qdisc "parent" or
674 * to device "dev".
675 *
676 * When appropriate send a netlink notification using 'skb'
677 * and "n".
678 *
679 * On success, destroy old qdisc.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700680 */
681
682static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
David S. Miller99194cf2008-07-17 04:54:10 -0700683 struct sk_buff *skb, struct nlmsghdr *n, u32 classid,
684 struct Qdisc *new, struct Qdisc *old)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700685{
David S. Miller99194cf2008-07-17 04:54:10 -0700686 struct Qdisc *q = old;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700687 int err = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700688
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900689 if (parent == NULL) {
David S. Miller99194cf2008-07-17 04:54:10 -0700690 unsigned int i, num_q, ingress;
691
692 ingress = 0;
693 num_q = dev->num_tx_queues;
David S. Miller8d50b532008-07-30 02:37:46 -0700694 if ((q && q->flags & TCQ_F_INGRESS) ||
695 (new && new->flags & TCQ_F_INGRESS)) {
David S. Miller99194cf2008-07-17 04:54:10 -0700696 num_q = 1;
697 ingress = 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700698 }
David S. Miller99194cf2008-07-17 04:54:10 -0700699
700 if (dev->flags & IFF_UP)
701 dev_deactivate(dev);
702
703 for (i = 0; i < num_q; i++) {
704 struct netdev_queue *dev_queue = &dev->rx_queue;
705
706 if (!ingress)
707 dev_queue = netdev_get_tx_queue(dev, i);
708
David S. Miller8d50b532008-07-30 02:37:46 -0700709 old = dev_graft_qdisc(dev_queue, new);
710 if (new && i > 0)
711 atomic_inc(&new->refcnt);
712
David S. Miller99194cf2008-07-17 04:54:10 -0700713 notify_and_destroy(skb, n, classid, old, new);
714 }
715
716 if (dev->flags & IFF_UP)
717 dev_activate(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700718 } else {
Eric Dumazet20fea082007-11-14 01:44:41 -0800719 const struct Qdisc_class_ops *cops = parent->ops->cl_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700720
721 err = -EINVAL;
722
723 if (cops) {
724 unsigned long cl = cops->get(parent, classid);
725 if (cl) {
David S. Miller99194cf2008-07-17 04:54:10 -0700726 err = cops->graft(parent, cl, new, &old);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700727 cops->put(parent, cl);
728 }
729 }
David S. Miller99194cf2008-07-17 04:54:10 -0700730 if (!err)
731 notify_and_destroy(skb, n, classid, old, new);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700732 }
733 return err;
734}
735
Jarek Poplawski25bfcd52008-08-18 20:53:34 -0700736/* lockdep annotation is needed for ingress; egress gets it only for name */
737static struct lock_class_key qdisc_tx_lock;
738static struct lock_class_key qdisc_rx_lock;
739
Linus Torvalds1da177e2005-04-16 15:20:36 -0700740/*
741 Allocate and initialize new qdisc.
742
743 Parameters are passed via opt.
744 */
745
746static struct Qdisc *
David S. Millerbb949fb2008-07-08 16:55:56 -0700747qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
748 u32 parent, u32 handle, struct nlattr **tca, int *errp)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700749{
750 int err;
Patrick McHardy1e904742008-01-22 22:11:17 -0800751 struct nlattr *kind = tca[TCA_KIND];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700752 struct Qdisc *sch;
753 struct Qdisc_ops *ops;
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700754 struct qdisc_size_table *stab;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700755
756 ops = qdisc_lookup_ops(kind);
Johannes Berg95a5afc2008-10-16 15:24:51 -0700757#ifdef CONFIG_MODULES
Linus Torvalds1da177e2005-04-16 15:20:36 -0700758 if (ops == NULL && kind != NULL) {
759 char name[IFNAMSIZ];
Patrick McHardy1e904742008-01-22 22:11:17 -0800760 if (nla_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700761 /* We dropped the RTNL semaphore in order to
762 * perform the module load. So, even if we
763 * succeeded in loading the module we have to
764 * tell the caller to replay the request. We
765 * indicate this using -EAGAIN.
766 * We replay the request because the device may
767 * go away in the mean time.
768 */
769 rtnl_unlock();
770 request_module("sch_%s", name);
771 rtnl_lock();
772 ops = qdisc_lookup_ops(kind);
773 if (ops != NULL) {
774 /* We will try again qdisc_lookup_ops,
775 * so don't keep a reference.
776 */
777 module_put(ops->owner);
778 err = -EAGAIN;
779 goto err_out;
780 }
781 }
782 }
783#endif
784
Jamal Hadi Salimb9e2cc02006-08-03 16:36:51 -0700785 err = -ENOENT;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700786 if (ops == NULL)
787 goto err_out;
788
David S. Miller5ce2d482008-07-08 17:06:30 -0700789 sch = qdisc_alloc(dev_queue, ops);
Thomas Graf3d54b822005-07-05 14:15:09 -0700790 if (IS_ERR(sch)) {
791 err = PTR_ERR(sch);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700792 goto err_out2;
Thomas Graf3d54b822005-07-05 14:15:09 -0700793 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700794
Patrick McHardyffc8fef2007-07-30 17:11:50 -0700795 sch->parent = parent;
796
Thomas Graf3d54b822005-07-05 14:15:09 -0700797 if (handle == TC_H_INGRESS) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700798 sch->flags |= TCQ_F_INGRESS;
Thomas Graf3d54b822005-07-05 14:15:09 -0700799 handle = TC_H_MAKE(TC_H_INGRESS, 0);
Jarek Poplawski25bfcd52008-08-18 20:53:34 -0700800 lockdep_set_class(qdisc_lock(sch), &qdisc_rx_lock);
Patrick McHardyfd44de72007-04-16 17:07:08 -0700801 } else {
Patrick McHardyfd44de72007-04-16 17:07:08 -0700802 if (handle == 0) {
803 handle = qdisc_alloc_handle(dev);
804 err = -ENOMEM;
805 if (handle == 0)
806 goto err_out3;
807 }
Jarek Poplawski25bfcd52008-08-18 20:53:34 -0700808 lockdep_set_class(qdisc_lock(sch), &qdisc_tx_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700809 }
810
Thomas Graf3d54b822005-07-05 14:15:09 -0700811 sch->handle = handle;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700812
Patrick McHardy1e904742008-01-22 22:11:17 -0800813 if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS])) == 0) {
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700814 if (tca[TCA_STAB]) {
815 stab = qdisc_get_stab(tca[TCA_STAB]);
816 if (IS_ERR(stab)) {
817 err = PTR_ERR(stab);
818 goto err_out3;
819 }
820 sch->stab = stab;
821 }
Patrick McHardy1e904742008-01-22 22:11:17 -0800822 if (tca[TCA_RATE]) {
Jarek Poplawskif6f9b932008-08-27 02:25:17 -0700823 spinlock_t *root_lock;
824
825 if ((sch->parent != TC_H_ROOT) &&
826 !(sch->flags & TCQ_F_INGRESS))
827 root_lock = qdisc_root_sleeping_lock(sch);
828 else
829 root_lock = qdisc_lock(sch);
830
Thomas Graf023e09a2005-07-05 14:15:53 -0700831 err = gen_new_estimator(&sch->bstats, &sch->rate_est,
Jarek Poplawskif6f9b932008-08-27 02:25:17 -0700832 root_lock, tca[TCA_RATE]);
Thomas Graf023e09a2005-07-05 14:15:53 -0700833 if (err) {
834 /*
835 * Any broken qdiscs that would require
836 * a ops->reset() here? The qdisc was never
837 * in action so it shouldn't be necessary.
838 */
839 if (ops->destroy)
840 ops->destroy(sch);
841 goto err_out3;
842 }
843 }
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700844
845 qdisc_list_add(sch);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700846
Linus Torvalds1da177e2005-04-16 15:20:36 -0700847 return sch;
848 }
849err_out3:
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700850 qdisc_put_stab(sch->stab);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700851 dev_put(dev);
Thomas Graf3d54b822005-07-05 14:15:09 -0700852 kfree((char *) sch - sch->padded);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700853err_out2:
854 module_put(ops->owner);
855err_out:
856 *errp = err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700857 return NULL;
858}
859
Patrick McHardy1e904742008-01-22 22:11:17 -0800860static int qdisc_change(struct Qdisc *sch, struct nlattr **tca)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700861{
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700862 struct qdisc_size_table *stab = NULL;
863 int err = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700864
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700865 if (tca[TCA_OPTIONS]) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700866 if (sch->ops->change == NULL)
867 return -EINVAL;
Patrick McHardy1e904742008-01-22 22:11:17 -0800868 err = sch->ops->change(sch, tca[TCA_OPTIONS]);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700869 if (err)
870 return err;
871 }
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700872
873 if (tca[TCA_STAB]) {
874 stab = qdisc_get_stab(tca[TCA_STAB]);
875 if (IS_ERR(stab))
876 return PTR_ERR(stab);
877 }
878
879 qdisc_put_stab(sch->stab);
880 sch->stab = stab;
881
Patrick McHardy1e904742008-01-22 22:11:17 -0800882 if (tca[TCA_RATE])
Stephen Hemminger71bcb092008-11-25 21:13:31 -0800883 /* NB: ignores errors from replace_estimator
884 because change can't be undone. */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700885 gen_replace_estimator(&sch->bstats, &sch->rate_est,
Stephen Hemminger71bcb092008-11-25 21:13:31 -0800886 qdisc_root_sleeping_lock(sch),
887 tca[TCA_RATE]);
888
Linus Torvalds1da177e2005-04-16 15:20:36 -0700889 return 0;
890}
891
892struct check_loop_arg
893{
894 struct qdisc_walker w;
895 struct Qdisc *p;
896 int depth;
897};
898
899static int check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w);
900
901static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth)
902{
903 struct check_loop_arg arg;
904
905 if (q->ops->cl_ops == NULL)
906 return 0;
907
908 arg.w.stop = arg.w.skip = arg.w.count = 0;
909 arg.w.fn = check_loop_fn;
910 arg.depth = depth;
911 arg.p = p;
912 q->ops->cl_ops->walk(q, &arg.w);
913 return arg.w.stop ? -ELOOP : 0;
914}
915
916static int
917check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
918{
919 struct Qdisc *leaf;
Eric Dumazet20fea082007-11-14 01:44:41 -0800920 const struct Qdisc_class_ops *cops = q->ops->cl_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700921 struct check_loop_arg *arg = (struct check_loop_arg *)w;
922
923 leaf = cops->leaf(q, cl);
924 if (leaf) {
925 if (leaf == arg->p || arg->depth > 7)
926 return -ELOOP;
927 return check_loop(leaf, arg->p, arg->depth + 1);
928 }
929 return 0;
930}
931
932/*
933 * Delete/get qdisc.
934 */
935
936static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
937{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +0900938 struct net *net = sock_net(skb->sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700939 struct tcmsg *tcm = NLMSG_DATA(n);
Patrick McHardy1e904742008-01-22 22:11:17 -0800940 struct nlattr *tca[TCA_MAX + 1];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700941 struct net_device *dev;
942 u32 clid = tcm->tcm_parent;
943 struct Qdisc *q = NULL;
944 struct Qdisc *p = NULL;
945 int err;
946
Denis V. Lunevb8542722007-12-01 00:21:31 +1100947 if (net != &init_net)
948 return -EINVAL;
949
Eric W. Biederman881d9662007-09-17 11:56:21 -0700950 if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700951 return -ENODEV;
952
Patrick McHardy1e904742008-01-22 22:11:17 -0800953 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
954 if (err < 0)
955 return err;
956
Linus Torvalds1da177e2005-04-16 15:20:36 -0700957 if (clid) {
958 if (clid != TC_H_ROOT) {
959 if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
960 if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL)
961 return -ENOENT;
962 q = qdisc_leaf(p, clid);
963 } else { /* ingress */
David S. Miller8123b422008-08-08 23:23:39 -0700964 q = dev->rx_queue.qdisc_sleeping;
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900965 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700966 } else {
David S. Millere8a04642008-07-17 00:34:19 -0700967 struct netdev_queue *dev_queue;
968 dev_queue = netdev_get_tx_queue(dev, 0);
David S. Millerb0e1e642008-07-08 17:42:10 -0700969 q = dev_queue->qdisc_sleeping;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700970 }
971 if (!q)
972 return -ENOENT;
973
974 if (tcm->tcm_handle && q->handle != tcm->tcm_handle)
975 return -EINVAL;
976 } else {
977 if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL)
978 return -ENOENT;
979 }
980
Patrick McHardy1e904742008-01-22 22:11:17 -0800981 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700982 return -EINVAL;
983
984 if (n->nlmsg_type == RTM_DELQDISC) {
985 if (!clid)
986 return -EINVAL;
987 if (q->handle == 0)
988 return -ENOENT;
David S. Miller99194cf2008-07-17 04:54:10 -0700989 if ((err = qdisc_graft(dev, p, skb, n, clid, NULL, q)) != 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700990 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700991 } else {
992 qdisc_notify(skb, n, clid, NULL, q);
993 }
994 return 0;
995}
996
997/*
998 Create/change qdisc.
999 */
1000
1001static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
1002{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001003 struct net *net = sock_net(skb->sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001004 struct tcmsg *tcm;
Patrick McHardy1e904742008-01-22 22:11:17 -08001005 struct nlattr *tca[TCA_MAX + 1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001006 struct net_device *dev;
1007 u32 clid;
1008 struct Qdisc *q, *p;
1009 int err;
1010
Denis V. Lunevb8542722007-12-01 00:21:31 +11001011 if (net != &init_net)
1012 return -EINVAL;
1013
Linus Torvalds1da177e2005-04-16 15:20:36 -07001014replay:
1015 /* Reinit, just in case something touches this. */
1016 tcm = NLMSG_DATA(n);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001017 clid = tcm->tcm_parent;
1018 q = p = NULL;
1019
Eric W. Biederman881d9662007-09-17 11:56:21 -07001020 if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001021 return -ENODEV;
1022
Patrick McHardy1e904742008-01-22 22:11:17 -08001023 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
1024 if (err < 0)
1025 return err;
1026
Linus Torvalds1da177e2005-04-16 15:20:36 -07001027 if (clid) {
1028 if (clid != TC_H_ROOT) {
1029 if (clid != TC_H_INGRESS) {
1030 if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL)
1031 return -ENOENT;
1032 q = qdisc_leaf(p, clid);
1033 } else { /*ingress */
David S. Miller8123b422008-08-08 23:23:39 -07001034 q = dev->rx_queue.qdisc_sleeping;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001035 }
1036 } else {
David S. Millere8a04642008-07-17 00:34:19 -07001037 struct netdev_queue *dev_queue;
1038 dev_queue = netdev_get_tx_queue(dev, 0);
David S. Millerb0e1e642008-07-08 17:42:10 -07001039 q = dev_queue->qdisc_sleeping;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001040 }
1041
1042 /* It may be default qdisc, ignore it */
1043 if (q && q->handle == 0)
1044 q = NULL;
1045
1046 if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
1047 if (tcm->tcm_handle) {
1048 if (q && !(n->nlmsg_flags&NLM_F_REPLACE))
1049 return -EEXIST;
1050 if (TC_H_MIN(tcm->tcm_handle))
1051 return -EINVAL;
1052 if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL)
1053 goto create_n_graft;
1054 if (n->nlmsg_flags&NLM_F_EXCL)
1055 return -EEXIST;
Patrick McHardy1e904742008-01-22 22:11:17 -08001056 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001057 return -EINVAL;
1058 if (q == p ||
1059 (p && check_loop(q, p, 0)))
1060 return -ELOOP;
1061 atomic_inc(&q->refcnt);
1062 goto graft;
1063 } else {
1064 if (q == NULL)
1065 goto create_n_graft;
1066
1067 /* This magic test requires explanation.
1068 *
1069 * We know, that some child q is already
1070 * attached to this parent and have choice:
1071 * either to change it or to create/graft new one.
1072 *
1073 * 1. We are allowed to create/graft only
1074 * if CREATE and REPLACE flags are set.
1075 *
1076 * 2. If EXCL is set, requestor wanted to say,
1077 * that qdisc tcm_handle is not expected
1078 * to exist, so that we choose create/graft too.
1079 *
1080 * 3. The last case is when no flags are set.
1081 * Alas, it is sort of hole in API, we
1082 * cannot decide what to do unambiguously.
1083 * For now we select create/graft, if
1084 * user gave KIND, which does not match existing.
1085 */
1086 if ((n->nlmsg_flags&NLM_F_CREATE) &&
1087 (n->nlmsg_flags&NLM_F_REPLACE) &&
1088 ((n->nlmsg_flags&NLM_F_EXCL) ||
Patrick McHardy1e904742008-01-22 22:11:17 -08001089 (tca[TCA_KIND] &&
1090 nla_strcmp(tca[TCA_KIND], q->ops->id))))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001091 goto create_n_graft;
1092 }
1093 }
1094 } else {
1095 if (!tcm->tcm_handle)
1096 return -EINVAL;
1097 q = qdisc_lookup(dev, tcm->tcm_handle);
1098 }
1099
1100 /* Change qdisc parameters */
1101 if (q == NULL)
1102 return -ENOENT;
1103 if (n->nlmsg_flags&NLM_F_EXCL)
1104 return -EEXIST;
Patrick McHardy1e904742008-01-22 22:11:17 -08001105 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001106 return -EINVAL;
1107 err = qdisc_change(q, tca);
1108 if (err == 0)
1109 qdisc_notify(skb, n, clid, NULL, q);
1110 return err;
1111
1112create_n_graft:
1113 if (!(n->nlmsg_flags&NLM_F_CREATE))
1114 return -ENOENT;
1115 if (clid == TC_H_INGRESS)
David S. Millerbb949fb2008-07-08 16:55:56 -07001116 q = qdisc_create(dev, &dev->rx_queue,
1117 tcm->tcm_parent, tcm->tcm_parent,
Patrick McHardyffc8fef2007-07-30 17:11:50 -07001118 tca, &err);
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09001119 else
David S. Millere8a04642008-07-17 00:34:19 -07001120 q = qdisc_create(dev, netdev_get_tx_queue(dev, 0),
David S. Millerbb949fb2008-07-08 16:55:56 -07001121 tcm->tcm_parent, tcm->tcm_handle,
Patrick McHardyffc8fef2007-07-30 17:11:50 -07001122 tca, &err);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001123 if (q == NULL) {
1124 if (err == -EAGAIN)
1125 goto replay;
1126 return err;
1127 }
1128
1129graft:
Ilpo Järvinene5befbd2008-08-18 22:30:01 -07001130 err = qdisc_graft(dev, p, skb, n, clid, q, NULL);
1131 if (err) {
1132 if (q)
1133 qdisc_destroy(q);
1134 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001135 }
Ilpo Järvinene5befbd2008-08-18 22:30:01 -07001136
Linus Torvalds1da177e2005-04-16 15:20:36 -07001137 return 0;
1138}
1139
1140static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
Jamal Hadi Salime431b8c2005-06-18 22:55:31 -07001141 u32 pid, u32 seq, u16 flags, int event)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001142{
1143 struct tcmsg *tcm;
1144 struct nlmsghdr *nlh;
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001145 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001146 struct gnet_dump d;
1147
Jamal Hadi Salime431b8c2005-06-18 22:55:31 -07001148 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001149 tcm = NLMSG_DATA(nlh);
1150 tcm->tcm_family = AF_UNSPEC;
Patrick McHardy9ef1d4c2005-06-28 12:55:30 -07001151 tcm->tcm__pad1 = 0;
1152 tcm->tcm__pad2 = 0;
David S. Miller5ce2d482008-07-08 17:06:30 -07001153 tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001154 tcm->tcm_parent = clid;
1155 tcm->tcm_handle = q->handle;
1156 tcm->tcm_info = atomic_read(&q->refcnt);
Patrick McHardy57e1c482008-01-23 20:34:28 -08001157 NLA_PUT_STRING(skb, TCA_KIND, q->ops->id);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001158 if (q->ops->dump && q->ops->dump(q, skb) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001159 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001160 q->qstats.qlen = q->q.qlen;
1161
Jussi Kivilinna175f9c12008-07-20 00:08:47 -07001162 if (q->stab && qdisc_dump_stab(skb, q->stab) < 0)
1163 goto nla_put_failure;
1164
Jarek Poplawski102396a2008-08-29 14:21:52 -07001165 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
1166 qdisc_root_sleeping_lock(q), &d) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001167 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001168
1169 if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001170 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001171
1172 if (gnet_stats_copy_basic(&d, &q->bstats) < 0 ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07001173 gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07001174 gnet_stats_copy_queue(&d, &q->qstats) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001175 goto nla_put_failure;
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09001176
Linus Torvalds1da177e2005-04-16 15:20:36 -07001177 if (gnet_stats_finish_copy(&d) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001178 goto nla_put_failure;
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09001179
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001180 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001181 return skb->len;
1182
1183nlmsg_failure:
Patrick McHardy1e904742008-01-22 22:11:17 -08001184nla_put_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001185 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001186 return -1;
1187}
1188
1189static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n,
1190 u32 clid, struct Qdisc *old, struct Qdisc *new)
1191{
1192 struct sk_buff *skb;
1193 u32 pid = oskb ? NETLINK_CB(oskb).pid : 0;
1194
1195 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1196 if (!skb)
1197 return -ENOBUFS;
1198
1199 if (old && old->handle) {
1200 if (tc_fill_qdisc(skb, old, clid, pid, n->nlmsg_seq, 0, RTM_DELQDISC) < 0)
1201 goto err_out;
1202 }
1203 if (new) {
1204 if (tc_fill_qdisc(skb, new, clid, pid, n->nlmsg_seq, old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
1205 goto err_out;
1206 }
1207
1208 if (skb->len)
Denis V. Lunev97c53ca2007-11-19 22:26:51 -08001209 return rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001210
1211err_out:
1212 kfree_skb(skb);
1213 return -EINVAL;
1214}
1215
David S. Miller30723672008-07-18 22:50:15 -07001216static bool tc_qdisc_dump_ignore(struct Qdisc *q)
1217{
1218 return (q->flags & TCQ_F_BUILTIN) ? true : false;
1219}
1220
1221static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
1222 struct netlink_callback *cb,
1223 int *q_idx_p, int s_q_idx)
1224{
1225 int ret = 0, q_idx = *q_idx_p;
1226 struct Qdisc *q;
1227
1228 if (!root)
1229 return 0;
1230
1231 q = root;
1232 if (q_idx < s_q_idx) {
1233 q_idx++;
1234 } else {
1235 if (!tc_qdisc_dump_ignore(q) &&
1236 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid,
1237 cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
1238 goto done;
1239 q_idx++;
1240 }
1241 list_for_each_entry(q, &root->list, list) {
1242 if (q_idx < s_q_idx) {
1243 q_idx++;
1244 continue;
1245 }
1246 if (!tc_qdisc_dump_ignore(q) &&
1247 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid,
1248 cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
1249 goto done;
1250 q_idx++;
1251 }
1252
1253out:
1254 *q_idx_p = q_idx;
1255 return ret;
1256done:
1257 ret = -1;
1258 goto out;
1259}
1260
Linus Torvalds1da177e2005-04-16 15:20:36 -07001261static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
1262{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001263 struct net *net = sock_net(skb->sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001264 int idx, q_idx;
1265 int s_idx, s_q_idx;
1266 struct net_device *dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001267
Denis V. Lunevb8542722007-12-01 00:21:31 +11001268 if (net != &init_net)
1269 return 0;
1270
Linus Torvalds1da177e2005-04-16 15:20:36 -07001271 s_idx = cb->args[0];
1272 s_q_idx = q_idx = cb->args[1];
1273 read_lock(&dev_base_lock);
Pavel Emelianov7562f872007-05-03 15:13:45 -07001274 idx = 0;
Eric W. Biederman881d9662007-09-17 11:56:21 -07001275 for_each_netdev(&init_net, dev) {
David S. Miller30723672008-07-18 22:50:15 -07001276 struct netdev_queue *dev_queue;
1277
Linus Torvalds1da177e2005-04-16 15:20:36 -07001278 if (idx < s_idx)
Pavel Emelianov7562f872007-05-03 15:13:45 -07001279 goto cont;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001280 if (idx > s_idx)
1281 s_q_idx = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001282 q_idx = 0;
David S. Miller30723672008-07-18 22:50:15 -07001283
1284 dev_queue = netdev_get_tx_queue(dev, 0);
David S. Miller827ebd62008-08-07 20:26:40 -07001285 if (tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb, &q_idx, s_q_idx) < 0)
David S. Miller30723672008-07-18 22:50:15 -07001286 goto done;
1287
1288 dev_queue = &dev->rx_queue;
David S. Miller827ebd62008-08-07 20:26:40 -07001289 if (tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb, &q_idx, s_q_idx) < 0)
David S. Miller30723672008-07-18 22:50:15 -07001290 goto done;
1291
Pavel Emelianov7562f872007-05-03 15:13:45 -07001292cont:
1293 idx++;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001294 }
1295
1296done:
1297 read_unlock(&dev_base_lock);
1298
1299 cb->args[0] = idx;
1300 cb->args[1] = q_idx;
1301
1302 return skb->len;
1303}
1304
1305
1306
1307/************************************************
1308 * Traffic classes manipulation. *
1309 ************************************************/
1310
1311
1312
1313static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
1314{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001315 struct net *net = sock_net(skb->sk);
David S. Millerb0e1e642008-07-08 17:42:10 -07001316 struct netdev_queue *dev_queue;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001317 struct tcmsg *tcm = NLMSG_DATA(n);
Patrick McHardy1e904742008-01-22 22:11:17 -08001318 struct nlattr *tca[TCA_MAX + 1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001319 struct net_device *dev;
1320 struct Qdisc *q = NULL;
Eric Dumazet20fea082007-11-14 01:44:41 -08001321 const struct Qdisc_class_ops *cops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001322 unsigned long cl = 0;
1323 unsigned long new_cl;
1324 u32 pid = tcm->tcm_parent;
1325 u32 clid = tcm->tcm_handle;
1326 u32 qid = TC_H_MAJ(clid);
1327 int err;
1328
Denis V. Lunevb8542722007-12-01 00:21:31 +11001329 if (net != &init_net)
1330 return -EINVAL;
1331
Eric W. Biederman881d9662007-09-17 11:56:21 -07001332 if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001333 return -ENODEV;
1334
Patrick McHardy1e904742008-01-22 22:11:17 -08001335 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
1336 if (err < 0)
1337 return err;
1338
Linus Torvalds1da177e2005-04-16 15:20:36 -07001339 /*
1340 parent == TC_H_UNSPEC - unspecified parent.
1341 parent == TC_H_ROOT - class is root, which has no parent.
1342 parent == X:0 - parent is root class.
1343 parent == X:Y - parent is a node in hierarchy.
1344 parent == 0:Y - parent is X:Y, where X:0 is qdisc.
1345
1346 handle == 0:0 - generate handle from kernel pool.
1347 handle == 0:Y - class is X:Y, where X:0 is qdisc.
1348 handle == X:Y - clear.
1349 handle == X:0 - root class.
1350 */
1351
1352 /* Step 1. Determine qdisc handle X:0 */
1353
David S. Millere8a04642008-07-17 00:34:19 -07001354 dev_queue = netdev_get_tx_queue(dev, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001355 if (pid != TC_H_ROOT) {
1356 u32 qid1 = TC_H_MAJ(pid);
1357
1358 if (qid && qid1) {
1359 /* If both majors are known, they must be identical. */
1360 if (qid != qid1)
1361 return -EINVAL;
1362 } else if (qid1) {
1363 qid = qid1;
1364 } else if (qid == 0)
David S. Millerb0e1e642008-07-08 17:42:10 -07001365 qid = dev_queue->qdisc_sleeping->handle;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001366
1367 /* Now qid is genuine qdisc handle consistent
1368 both with parent and child.
1369
1370 TC_H_MAJ(pid) still may be unspecified, complete it now.
1371 */
1372 if (pid)
1373 pid = TC_H_MAKE(qid, pid);
1374 } else {
1375 if (qid == 0)
David S. Millerb0e1e642008-07-08 17:42:10 -07001376 qid = dev_queue->qdisc_sleeping->handle;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001377 }
1378
1379 /* OK. Locate qdisc */
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09001380 if ((q = qdisc_lookup(dev, qid)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001381 return -ENOENT;
1382
1383 /* An check that it supports classes */
1384 cops = q->ops->cl_ops;
1385 if (cops == NULL)
1386 return -EINVAL;
1387
1388 /* Now try to get class */
1389 if (clid == 0) {
1390 if (pid == TC_H_ROOT)
1391 clid = qid;
1392 } else
1393 clid = TC_H_MAKE(qid, clid);
1394
1395 if (clid)
1396 cl = cops->get(q, clid);
1397
1398 if (cl == 0) {
1399 err = -ENOENT;
1400 if (n->nlmsg_type != RTM_NEWTCLASS || !(n->nlmsg_flags&NLM_F_CREATE))
1401 goto out;
1402 } else {
1403 switch (n->nlmsg_type) {
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09001404 case RTM_NEWTCLASS:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001405 err = -EEXIST;
1406 if (n->nlmsg_flags&NLM_F_EXCL)
1407 goto out;
1408 break;
1409 case RTM_DELTCLASS:
1410 err = cops->delete(q, cl);
1411 if (err == 0)
1412 tclass_notify(skb, n, q, cl, RTM_DELTCLASS);
1413 goto out;
1414 case RTM_GETTCLASS:
1415 err = tclass_notify(skb, n, q, cl, RTM_NEWTCLASS);
1416 goto out;
1417 default:
1418 err = -EINVAL;
1419 goto out;
1420 }
1421 }
1422
1423 new_cl = cl;
1424 err = cops->change(q, clid, pid, tca, &new_cl);
1425 if (err == 0)
1426 tclass_notify(skb, n, q, new_cl, RTM_NEWTCLASS);
1427
1428out:
1429 if (cl)
1430 cops->put(q, cl);
1431
1432 return err;
1433}
1434
1435
1436static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
1437 unsigned long cl,
Jamal Hadi Salime431b8c2005-06-18 22:55:31 -07001438 u32 pid, u32 seq, u16 flags, int event)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001439{
1440 struct tcmsg *tcm;
1441 struct nlmsghdr *nlh;
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001442 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001443 struct gnet_dump d;
Eric Dumazet20fea082007-11-14 01:44:41 -08001444 const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001445
Jamal Hadi Salime431b8c2005-06-18 22:55:31 -07001446 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001447 tcm = NLMSG_DATA(nlh);
1448 tcm->tcm_family = AF_UNSPEC;
David S. Miller5ce2d482008-07-08 17:06:30 -07001449 tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001450 tcm->tcm_parent = q->handle;
1451 tcm->tcm_handle = q->handle;
1452 tcm->tcm_info = 0;
Patrick McHardy57e1c482008-01-23 20:34:28 -08001453 NLA_PUT_STRING(skb, TCA_KIND, q->ops->id);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001454 if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001455 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001456
Jarek Poplawski102396a2008-08-29 14:21:52 -07001457 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
1458 qdisc_root_sleeping_lock(q), &d) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001459 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001460
1461 if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001462 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001463
1464 if (gnet_stats_finish_copy(&d) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001465 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001466
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001467 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001468 return skb->len;
1469
1470nlmsg_failure:
Patrick McHardy1e904742008-01-22 22:11:17 -08001471nla_put_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001472 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001473 return -1;
1474}
1475
1476static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
1477 struct Qdisc *q, unsigned long cl, int event)
1478{
1479 struct sk_buff *skb;
1480 u32 pid = oskb ? NETLINK_CB(oskb).pid : 0;
1481
1482 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1483 if (!skb)
1484 return -ENOBUFS;
1485
1486 if (tc_fill_tclass(skb, q, cl, pid, n->nlmsg_seq, 0, event) < 0) {
1487 kfree_skb(skb);
1488 return -EINVAL;
1489 }
1490
Denis V. Lunev97c53ca2007-11-19 22:26:51 -08001491 return rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001492}
1493
1494struct qdisc_dump_args
1495{
1496 struct qdisc_walker w;
1497 struct sk_buff *skb;
1498 struct netlink_callback *cb;
1499};
1500
1501static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walker *arg)
1502{
1503 struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg;
1504
1505 return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).pid,
1506 a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTCLASS);
1507}
1508
David S. Miller30723672008-07-18 22:50:15 -07001509static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb,
1510 struct tcmsg *tcm, struct netlink_callback *cb,
1511 int *t_p, int s_t)
1512{
1513 struct qdisc_dump_args arg;
1514
1515 if (tc_qdisc_dump_ignore(q) ||
1516 *t_p < s_t || !q->ops->cl_ops ||
1517 (tcm->tcm_parent &&
1518 TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
1519 (*t_p)++;
1520 return 0;
1521 }
1522 if (*t_p > s_t)
1523 memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
1524 arg.w.fn = qdisc_class_dump;
1525 arg.skb = skb;
1526 arg.cb = cb;
1527 arg.w.stop = 0;
1528 arg.w.skip = cb->args[1];
1529 arg.w.count = 0;
1530 q->ops->cl_ops->walk(q, &arg.w);
1531 cb->args[1] = arg.w.count;
1532 if (arg.w.stop)
1533 return -1;
1534 (*t_p)++;
1535 return 0;
1536}
1537
1538static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
1539 struct tcmsg *tcm, struct netlink_callback *cb,
1540 int *t_p, int s_t)
1541{
1542 struct Qdisc *q;
1543
1544 if (!root)
1545 return 0;
1546
1547 if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0)
1548 return -1;
1549
1550 list_for_each_entry(q, &root->list, list) {
1551 if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
1552 return -1;
1553 }
1554
1555 return 0;
1556}
1557
Linus Torvalds1da177e2005-04-16 15:20:36 -07001558static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
1559{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001560 struct tcmsg *tcm = (struct tcmsg*)NLMSG_DATA(cb->nlh);
David S. Miller30723672008-07-18 22:50:15 -07001561 struct net *net = sock_net(skb->sk);
1562 struct netdev_queue *dev_queue;
1563 struct net_device *dev;
1564 int t, s_t;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001565
Denis V. Lunevb8542722007-12-01 00:21:31 +11001566 if (net != &init_net)
1567 return 0;
1568
Linus Torvalds1da177e2005-04-16 15:20:36 -07001569 if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm)))
1570 return 0;
Eric W. Biederman881d9662007-09-17 11:56:21 -07001571 if ((dev = dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001572 return 0;
1573
1574 s_t = cb->args[0];
1575 t = 0;
1576
David S. Miller30723672008-07-18 22:50:15 -07001577 dev_queue = netdev_get_tx_queue(dev, 0);
David S. Miller8123b422008-08-08 23:23:39 -07001578 if (tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb, &t, s_t) < 0)
David S. Miller30723672008-07-18 22:50:15 -07001579 goto done;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001580
David S. Miller30723672008-07-18 22:50:15 -07001581 dev_queue = &dev->rx_queue;
David S. Miller8123b422008-08-08 23:23:39 -07001582 if (tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb, &t, s_t) < 0)
David S. Miller30723672008-07-18 22:50:15 -07001583 goto done;
1584
1585done:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001586 cb->args[0] = t;
1587
1588 dev_put(dev);
1589 return skb->len;
1590}
1591
1592/* Main classifier routine: scans classifier chain attached
1593 to this qdisc, (optionally) tests for protocol and asks
1594 specific classifiers.
1595 */
Patrick McHardy73ca4912007-07-15 00:02:31 -07001596int tc_classify_compat(struct sk_buff *skb, struct tcf_proto *tp,
1597 struct tcf_result *res)
1598{
1599 __be16 protocol = skb->protocol;
1600 int err = 0;
1601
1602 for (; tp; tp = tp->next) {
1603 if ((tp->protocol == protocol ||
1604 tp->protocol == htons(ETH_P_ALL)) &&
1605 (err = tp->classify(skb, tp, res)) >= 0) {
1606#ifdef CONFIG_NET_CLS_ACT
1607 if (err != TC_ACT_RECLASSIFY && skb->tc_verd)
1608 skb->tc_verd = SET_TC_VERD(skb->tc_verd, 0);
1609#endif
1610 return err;
1611 }
1612 }
1613 return -1;
1614}
1615EXPORT_SYMBOL(tc_classify_compat);
1616
Linus Torvalds1da177e2005-04-16 15:20:36 -07001617int tc_classify(struct sk_buff *skb, struct tcf_proto *tp,
Patrick McHardy73ca4912007-07-15 00:02:31 -07001618 struct tcf_result *res)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001619{
1620 int err = 0;
Patrick McHardy73ca4912007-07-15 00:02:31 -07001621 __be16 protocol;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001622#ifdef CONFIG_NET_CLS_ACT
1623 struct tcf_proto *otp = tp;
1624reclassify:
1625#endif
1626 protocol = skb->protocol;
1627
Patrick McHardy73ca4912007-07-15 00:02:31 -07001628 err = tc_classify_compat(skb, tp, res);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001629#ifdef CONFIG_NET_CLS_ACT
Patrick McHardy73ca4912007-07-15 00:02:31 -07001630 if (err == TC_ACT_RECLASSIFY) {
1631 u32 verd = G_TC_VERD(skb->tc_verd);
1632 tp = otp;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001633
Patrick McHardy73ca4912007-07-15 00:02:31 -07001634 if (verd++ >= MAX_REC_LOOP) {
1635 printk("rule prio %u protocol %02x reclassify loop, "
1636 "packet dropped\n",
1637 tp->prio&0xffff, ntohs(tp->protocol));
1638 return TC_ACT_SHOT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001639 }
Patrick McHardy73ca4912007-07-15 00:02:31 -07001640 skb->tc_verd = SET_TC_VERD(skb->tc_verd, verd);
1641 goto reclassify;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001642 }
Patrick McHardy73ca4912007-07-15 00:02:31 -07001643#endif
1644 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001645}
Patrick McHardy73ca4912007-07-15 00:02:31 -07001646EXPORT_SYMBOL(tc_classify);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001647
Patrick McHardya48b5a62007-03-23 11:29:43 -07001648void tcf_destroy(struct tcf_proto *tp)
1649{
1650 tp->ops->destroy(tp);
1651 module_put(tp->ops->owner);
1652 kfree(tp);
1653}
1654
Patrick McHardyff31ab52008-07-01 19:52:38 -07001655void tcf_destroy_chain(struct tcf_proto **fl)
Patrick McHardya48b5a62007-03-23 11:29:43 -07001656{
1657 struct tcf_proto *tp;
1658
Patrick McHardyff31ab52008-07-01 19:52:38 -07001659 while ((tp = *fl) != NULL) {
1660 *fl = tp->next;
Patrick McHardya48b5a62007-03-23 11:29:43 -07001661 tcf_destroy(tp);
1662 }
1663}
1664EXPORT_SYMBOL(tcf_destroy_chain);
1665
Linus Torvalds1da177e2005-04-16 15:20:36 -07001666#ifdef CONFIG_PROC_FS
1667static int psched_show(struct seq_file *seq, void *v)
1668{
Patrick McHardy3c0cfc12007-10-10 16:32:41 -07001669 struct timespec ts;
1670
1671 hrtimer_get_res(CLOCK_MONOTONIC, &ts);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001672 seq_printf(seq, "%08x %08x %08x %08x\n",
Patrick McHardy641b9e02007-03-16 01:18:42 -07001673 (u32)NSEC_PER_USEC, (u32)PSCHED_US2NS(1),
Patrick McHardy514bca32007-03-16 12:34:52 -07001674 1000000,
Patrick McHardy3c0cfc12007-10-10 16:32:41 -07001675 (u32)NSEC_PER_SEC/(u32)ktime_to_ns(timespec_to_ktime(ts)));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001676
1677 return 0;
1678}
1679
1680static int psched_open(struct inode *inode, struct file *file)
1681{
1682 return single_open(file, psched_show, PDE(inode)->data);
1683}
1684
Arjan van de Venda7071d2007-02-12 00:55:36 -08001685static const struct file_operations psched_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001686 .owner = THIS_MODULE,
1687 .open = psched_open,
1688 .read = seq_read,
1689 .llseek = seq_lseek,
1690 .release = single_release,
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09001691};
Linus Torvalds1da177e2005-04-16 15:20:36 -07001692#endif
1693
Linus Torvalds1da177e2005-04-16 15:20:36 -07001694static int __init pktsched_init(void)
1695{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001696 register_qdisc(&pfifo_qdisc_ops);
1697 register_qdisc(&bfifo_qdisc_ops);
Eric W. Biederman457c4cb2007-09-12 12:01:34 +02001698 proc_net_fops_create(&init_net, "psched", 0, &psched_fops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001699
Thomas Grafbe577dd2007-03-22 11:55:50 -07001700 rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL);
1701 rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL);
1702 rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc);
1703 rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL);
1704 rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL);
1705 rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass);
1706
Linus Torvalds1da177e2005-04-16 15:20:36 -07001707 return 0;
1708}
1709
1710subsys_initcall(pktsched_init);