blob: dc89a9343f30c100c82f773bead6148549c86a74 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * net/sched/sch_api.c Packet scheduler API.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 *
11 * Fixes:
12 *
13 * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired.
14 * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
15 * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support
16 */
17
Linus Torvalds1da177e2005-04-16 15:20:36 -070018#include <linux/module.h>
19#include <linux/types.h>
20#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070021#include <linux/string.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070022#include <linux/errno.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070023#include <linux/skbuff.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070024#include <linux/init.h>
25#include <linux/proc_fs.h>
26#include <linux/seq_file.h>
27#include <linux/kmod.h>
28#include <linux/list.h>
Patrick McHardy41794772007-03-16 01:19:15 -070029#include <linux/hrtimer.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070030
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020031#include <net/net_namespace.h>
Denis V. Lunevb8542722007-12-01 00:21:31 +110032#include <net/sock.h>
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -070033#include <net/netlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070034#include <net/pkt_sched.h>
35
Linus Torvalds1da177e2005-04-16 15:20:36 -070036static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n, u32 clid,
37 struct Qdisc *old, struct Qdisc *new);
38static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
39 struct Qdisc *q, unsigned long cl, int event);
40
41/*
42
43 Short review.
44 -------------
45
46 This file consists of two interrelated parts:
47
48 1. queueing disciplines manager frontend.
49 2. traffic classes manager frontend.
50
51 Generally, queueing discipline ("qdisc") is a black box,
52 which is able to enqueue packets and to dequeue them (when
53 device is ready to send something) in order and at times
54 determined by algorithm hidden in it.
55
56 qdisc's are divided to two categories:
57 - "queues", which have no internal structure visible from outside.
58 - "schedulers", which split all the packets to "traffic classes",
59 using "packet classifiers" (look at cls_api.c)
60
61 In turn, classes may have child qdiscs (as rule, queues)
62 attached to them etc. etc. etc.
63
64 The goal of the routines in this file is to translate
65 information supplied by user in the form of handles
66 to more intelligible for kernel form, to make some sanity
67 checks and part of work, which is common to all qdiscs
68 and to provide rtnetlink notifications.
69
70 All real intelligent work is done inside qdisc modules.
71
72
73
74 Every discipline has two major routines: enqueue and dequeue.
75
76 ---dequeue
77
78 dequeue usually returns a skb to send. It is allowed to return NULL,
79 but it does not mean that queue is empty, it just means that
80 discipline does not want to send anything this time.
81 Queue is really empty if q->q.qlen == 0.
82 For complicated disciplines with multiple queues q->q is not
83 real packet queue, but however q->q.qlen must be valid.
84
85 ---enqueue
86
87 enqueue returns 0, if packet was enqueued successfully.
88 If packet (this one or another one) was dropped, it returns
89 not zero error code.
90 NET_XMIT_DROP - this packet dropped
91 Expected action: do not backoff, but wait until queue will clear.
92 NET_XMIT_CN - probably this packet enqueued, but another one dropped.
93 Expected action: backoff or ignore
94 NET_XMIT_POLICED - dropped by police.
95 Expected action: backoff or error to real-time apps.
96
97 Auxiliary routines:
98
99 ---requeue
100
101 requeues once dequeued packet. It is used for non-standard or
102 just buggy devices, which can defer output even if dev->tbusy=0.
103
104 ---reset
105
106 returns qdisc to initial state: purge all buffers, clear all
107 timers, counters (except for statistics) etc.
108
109 ---init
110
111 initializes newly created qdisc.
112
113 ---destroy
114
115 destroys resources allocated by init and during lifetime of qdisc.
116
117 ---change
118
119 changes qdisc parameters.
120 */
121
122/* Protects list of registered TC modules. It is pure SMP lock. */
123static DEFINE_RWLOCK(qdisc_mod_lock);
124
125
126/************************************************
127 * Queueing disciplines manipulation. *
128 ************************************************/
129
130
131/* The list of all installed queueing disciplines. */
132
133static struct Qdisc_ops *qdisc_base;
134
135/* Register/uregister queueing discipline */
136
137int register_qdisc(struct Qdisc_ops *qops)
138{
139 struct Qdisc_ops *q, **qp;
140 int rc = -EEXIST;
141
142 write_lock(&qdisc_mod_lock);
143 for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
144 if (!strcmp(qops->id, q->id))
145 goto out;
146
147 if (qops->enqueue == NULL)
148 qops->enqueue = noop_qdisc_ops.enqueue;
149 if (qops->requeue == NULL)
150 qops->requeue = noop_qdisc_ops.requeue;
151 if (qops->dequeue == NULL)
152 qops->dequeue = noop_qdisc_ops.dequeue;
153
154 qops->next = NULL;
155 *qp = qops;
156 rc = 0;
157out:
158 write_unlock(&qdisc_mod_lock);
159 return rc;
160}
Patrick McHardy62e3ba12008-01-22 22:10:23 -0800161EXPORT_SYMBOL(register_qdisc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700162
163int unregister_qdisc(struct Qdisc_ops *qops)
164{
165 struct Qdisc_ops *q, **qp;
166 int err = -ENOENT;
167
168 write_lock(&qdisc_mod_lock);
169 for (qp = &qdisc_base; (q=*qp)!=NULL; qp = &q->next)
170 if (q == qops)
171 break;
172 if (q) {
173 *qp = q->next;
174 q->next = NULL;
175 err = 0;
176 }
177 write_unlock(&qdisc_mod_lock);
178 return err;
179}
Patrick McHardy62e3ba12008-01-22 22:10:23 -0800180EXPORT_SYMBOL(unregister_qdisc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700181
182/* We know handle. Find qdisc among all qdisc's attached to device
183 (root qdisc, all its children, children of children etc.)
184 */
185
Patrick McHardy0463d4a2007-04-16 17:02:10 -0700186struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
Patrick McHardy43effa12006-11-29 17:35:48 -0800187{
188 struct Qdisc *q;
189
190 list_for_each_entry(q, &dev->qdisc_list, list) {
191 if (q->handle == handle)
192 return q;
193 }
194 return NULL;
195}
196
Linus Torvalds1da177e2005-04-16 15:20:36 -0700197static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
198{
199 unsigned long cl;
200 struct Qdisc *leaf;
Eric Dumazet20fea082007-11-14 01:44:41 -0800201 const struct Qdisc_class_ops *cops = p->ops->cl_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700202
203 if (cops == NULL)
204 return NULL;
205 cl = cops->get(p, classid);
206
207 if (cl == 0)
208 return NULL;
209 leaf = cops->leaf(p, cl);
210 cops->put(p, cl);
211 return leaf;
212}
213
214/* Find queueing discipline by name */
215
216static struct Qdisc_ops *qdisc_lookup_ops(struct rtattr *kind)
217{
218 struct Qdisc_ops *q = NULL;
219
220 if (kind) {
221 read_lock(&qdisc_mod_lock);
222 for (q = qdisc_base; q; q = q->next) {
223 if (rtattr_strcmp(kind, q->id) == 0) {
224 if (!try_module_get(q->owner))
225 q = NULL;
226 break;
227 }
228 }
229 read_unlock(&qdisc_mod_lock);
230 }
231 return q;
232}
233
234static struct qdisc_rate_table *qdisc_rtab_list;
235
236struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct rtattr *tab)
237{
238 struct qdisc_rate_table *rtab;
239
240 for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) {
241 if (memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) == 0) {
242 rtab->refcnt++;
243 return rtab;
244 }
245 }
246
247 if (tab == NULL || r->rate == 0 || r->cell_log == 0 || RTA_PAYLOAD(tab) != 1024)
248 return NULL;
249
250 rtab = kmalloc(sizeof(*rtab), GFP_KERNEL);
251 if (rtab) {
252 rtab->rate = *r;
253 rtab->refcnt = 1;
254 memcpy(rtab->data, RTA_DATA(tab), 1024);
255 rtab->next = qdisc_rtab_list;
256 qdisc_rtab_list = rtab;
257 }
258 return rtab;
259}
Patrick McHardy62e3ba12008-01-22 22:10:23 -0800260EXPORT_SYMBOL(qdisc_get_rtab);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700261
262void qdisc_put_rtab(struct qdisc_rate_table *tab)
263{
264 struct qdisc_rate_table *rtab, **rtabp;
265
266 if (!tab || --tab->refcnt)
267 return;
268
269 for (rtabp = &qdisc_rtab_list; (rtab=*rtabp) != NULL; rtabp = &rtab->next) {
270 if (rtab == tab) {
271 *rtabp = rtab->next;
272 kfree(rtab);
273 return;
274 }
275 }
276}
Patrick McHardy62e3ba12008-01-22 22:10:23 -0800277EXPORT_SYMBOL(qdisc_put_rtab);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700278
Patrick McHardy41794772007-03-16 01:19:15 -0700279static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
280{
281 struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
282 timer);
Stephen Hemminger19365022007-03-22 12:18:35 -0700283 struct net_device *dev = wd->qdisc->dev;
Patrick McHardy41794772007-03-16 01:19:15 -0700284
285 wd->qdisc->flags &= ~TCQ_F_THROTTLED;
Stephen Hemminger11274e52007-03-22 12:17:42 -0700286 smp_wmb();
Patrick McHardy0621ed22007-07-14 20:49:26 -0700287 netif_schedule(dev);
Stephen Hemminger19365022007-03-22 12:18:35 -0700288
Patrick McHardy41794772007-03-16 01:19:15 -0700289 return HRTIMER_NORESTART;
290}
291
292void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
293{
294 hrtimer_init(&wd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
295 wd->timer.function = qdisc_watchdog;
296 wd->qdisc = qdisc;
297}
298EXPORT_SYMBOL(qdisc_watchdog_init);
299
300void qdisc_watchdog_schedule(struct qdisc_watchdog *wd, psched_time_t expires)
301{
302 ktime_t time;
303
304 wd->qdisc->flags |= TCQ_F_THROTTLED;
305 time = ktime_set(0, 0);
306 time = ktime_add_ns(time, PSCHED_US2NS(expires));
307 hrtimer_start(&wd->timer, time, HRTIMER_MODE_ABS);
308}
309EXPORT_SYMBOL(qdisc_watchdog_schedule);
310
311void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
312{
313 hrtimer_cancel(&wd->timer);
314 wd->qdisc->flags &= ~TCQ_F_THROTTLED;
315}
316EXPORT_SYMBOL(qdisc_watchdog_cancel);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700317
318/* Allocate an unique handle from space managed by kernel */
319
320static u32 qdisc_alloc_handle(struct net_device *dev)
321{
322 int i = 0x10000;
323 static u32 autohandle = TC_H_MAKE(0x80000000U, 0);
324
325 do {
326 autohandle += TC_H_MAKE(0x10000U, 0);
327 if (autohandle == TC_H_MAKE(TC_H_ROOT, 0))
328 autohandle = TC_H_MAKE(0x80000000U, 0);
329 } while (qdisc_lookup(dev, autohandle) && --i > 0);
330
331 return i>0 ? autohandle : 0;
332}
333
334/* Attach toplevel qdisc to device dev */
335
336static struct Qdisc *
337dev_graft_qdisc(struct net_device *dev, struct Qdisc *qdisc)
338{
339 struct Qdisc *oqdisc;
340
341 if (dev->flags & IFF_UP)
342 dev_deactivate(dev);
343
344 qdisc_lock_tree(dev);
345 if (qdisc && qdisc->flags&TCQ_F_INGRESS) {
346 oqdisc = dev->qdisc_ingress;
347 /* Prune old scheduler */
348 if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1) {
349 /* delete */
350 qdisc_reset(oqdisc);
351 dev->qdisc_ingress = NULL;
352 } else { /* new */
353 dev->qdisc_ingress = qdisc;
354 }
355
356 } else {
357
358 oqdisc = dev->qdisc_sleeping;
359
360 /* Prune old scheduler */
361 if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1)
362 qdisc_reset(oqdisc);
363
364 /* ... and graft new one */
365 if (qdisc == NULL)
366 qdisc = &noop_qdisc;
367 dev->qdisc_sleeping = qdisc;
368 dev->qdisc = &noop_qdisc;
369 }
370
371 qdisc_unlock_tree(dev);
372
373 if (dev->flags & IFF_UP)
374 dev_activate(dev);
375
376 return oqdisc;
377}
378
Patrick McHardy43effa12006-11-29 17:35:48 -0800379void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
380{
Eric Dumazet20fea082007-11-14 01:44:41 -0800381 const struct Qdisc_class_ops *cops;
Patrick McHardy43effa12006-11-29 17:35:48 -0800382 unsigned long cl;
383 u32 parentid;
384
385 if (n == 0)
386 return;
387 while ((parentid = sch->parent)) {
Patrick McHardy0463d4a2007-04-16 17:02:10 -0700388 sch = qdisc_lookup(sch->dev, TC_H_MAJ(parentid));
Patrick McHardyffc8fef2007-07-30 17:11:50 -0700389 if (sch == NULL) {
390 WARN_ON(parentid != TC_H_ROOT);
391 return;
392 }
Patrick McHardy43effa12006-11-29 17:35:48 -0800393 cops = sch->ops->cl_ops;
394 if (cops->qlen_notify) {
395 cl = cops->get(sch, parentid);
396 cops->qlen_notify(sch, cl);
397 cops->put(sch, cl);
398 }
399 sch->q.qlen -= n;
400 }
401}
402EXPORT_SYMBOL(qdisc_tree_decrease_qlen);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700403
404/* Graft qdisc "new" to class "classid" of qdisc "parent" or
405 to device "dev".
406
407 Old qdisc is not destroyed but returned in *old.
408 */
409
410static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
411 u32 classid,
412 struct Qdisc *new, struct Qdisc **old)
413{
414 int err = 0;
415 struct Qdisc *q = *old;
416
417
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900418 if (parent == NULL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700419 if (q && q->flags&TCQ_F_INGRESS) {
420 *old = dev_graft_qdisc(dev, q);
421 } else {
422 *old = dev_graft_qdisc(dev, new);
423 }
424 } else {
Eric Dumazet20fea082007-11-14 01:44:41 -0800425 const struct Qdisc_class_ops *cops = parent->ops->cl_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700426
427 err = -EINVAL;
428
429 if (cops) {
430 unsigned long cl = cops->get(parent, classid);
431 if (cl) {
432 err = cops->graft(parent, cl, new, old);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700433 cops->put(parent, cl);
434 }
435 }
436 }
437 return err;
438}
439
440/*
441 Allocate and initialize new qdisc.
442
443 Parameters are passed via opt.
444 */
445
446static struct Qdisc *
Patrick McHardyffc8fef2007-07-30 17:11:50 -0700447qdisc_create(struct net_device *dev, u32 parent, u32 handle,
448 struct rtattr **tca, int *errp)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700449{
450 int err;
451 struct rtattr *kind = tca[TCA_KIND-1];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700452 struct Qdisc *sch;
453 struct Qdisc_ops *ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700454
455 ops = qdisc_lookup_ops(kind);
456#ifdef CONFIG_KMOD
457 if (ops == NULL && kind != NULL) {
458 char name[IFNAMSIZ];
459 if (rtattr_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) {
460 /* We dropped the RTNL semaphore in order to
461 * perform the module load. So, even if we
462 * succeeded in loading the module we have to
463 * tell the caller to replay the request. We
464 * indicate this using -EAGAIN.
465 * We replay the request because the device may
466 * go away in the mean time.
467 */
468 rtnl_unlock();
469 request_module("sch_%s", name);
470 rtnl_lock();
471 ops = qdisc_lookup_ops(kind);
472 if (ops != NULL) {
473 /* We will try again qdisc_lookup_ops,
474 * so don't keep a reference.
475 */
476 module_put(ops->owner);
477 err = -EAGAIN;
478 goto err_out;
479 }
480 }
481 }
482#endif
483
Jamal Hadi Salimb9e2cc02006-08-03 16:36:51 -0700484 err = -ENOENT;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700485 if (ops == NULL)
486 goto err_out;
487
Thomas Graf3d54b822005-07-05 14:15:09 -0700488 sch = qdisc_alloc(dev, ops);
489 if (IS_ERR(sch)) {
490 err = PTR_ERR(sch);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700491 goto err_out2;
Thomas Graf3d54b822005-07-05 14:15:09 -0700492 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700493
Patrick McHardyffc8fef2007-07-30 17:11:50 -0700494 sch->parent = parent;
495
Thomas Graf3d54b822005-07-05 14:15:09 -0700496 if (handle == TC_H_INGRESS) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700497 sch->flags |= TCQ_F_INGRESS;
Patrick McHardyfd44de72007-04-16 17:07:08 -0700498 sch->stats_lock = &dev->ingress_lock;
Thomas Graf3d54b822005-07-05 14:15:09 -0700499 handle = TC_H_MAKE(TC_H_INGRESS, 0);
Patrick McHardyfd44de72007-04-16 17:07:08 -0700500 } else {
501 sch->stats_lock = &dev->queue_lock;
502 if (handle == 0) {
503 handle = qdisc_alloc_handle(dev);
504 err = -ENOMEM;
505 if (handle == 0)
506 goto err_out3;
507 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700508 }
509
Thomas Graf3d54b822005-07-05 14:15:09 -0700510 sch->handle = handle;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700511
512 if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS-1])) == 0) {
Thomas Graf023e09a2005-07-05 14:15:53 -0700513 if (tca[TCA_RATE-1]) {
514 err = gen_new_estimator(&sch->bstats, &sch->rate_est,
515 sch->stats_lock,
516 tca[TCA_RATE-1]);
517 if (err) {
518 /*
519 * Any broken qdiscs that would require
520 * a ops->reset() here? The qdisc was never
521 * in action so it shouldn't be necessary.
522 */
523 if (ops->destroy)
524 ops->destroy(sch);
525 goto err_out3;
526 }
527 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700528 qdisc_lock_tree(dev);
529 list_add_tail(&sch->list, &dev->qdisc_list);
530 qdisc_unlock_tree(dev);
531
Linus Torvalds1da177e2005-04-16 15:20:36 -0700532 return sch;
533 }
534err_out3:
535 dev_put(dev);
Thomas Graf3d54b822005-07-05 14:15:09 -0700536 kfree((char *) sch - sch->padded);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700537err_out2:
538 module_put(ops->owner);
539err_out:
540 *errp = err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700541 return NULL;
542}
543
544static int qdisc_change(struct Qdisc *sch, struct rtattr **tca)
545{
546 if (tca[TCA_OPTIONS-1]) {
547 int err;
548
549 if (sch->ops->change == NULL)
550 return -EINVAL;
551 err = sch->ops->change(sch, tca[TCA_OPTIONS-1]);
552 if (err)
553 return err;
554 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700555 if (tca[TCA_RATE-1])
556 gen_replace_estimator(&sch->bstats, &sch->rate_est,
557 sch->stats_lock, tca[TCA_RATE-1]);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700558 return 0;
559}
560
561struct check_loop_arg
562{
563 struct qdisc_walker w;
564 struct Qdisc *p;
565 int depth;
566};
567
568static int check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w);
569
570static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth)
571{
572 struct check_loop_arg arg;
573
574 if (q->ops->cl_ops == NULL)
575 return 0;
576
577 arg.w.stop = arg.w.skip = arg.w.count = 0;
578 arg.w.fn = check_loop_fn;
579 arg.depth = depth;
580 arg.p = p;
581 q->ops->cl_ops->walk(q, &arg.w);
582 return arg.w.stop ? -ELOOP : 0;
583}
584
585static int
586check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
587{
588 struct Qdisc *leaf;
Eric Dumazet20fea082007-11-14 01:44:41 -0800589 const struct Qdisc_class_ops *cops = q->ops->cl_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700590 struct check_loop_arg *arg = (struct check_loop_arg *)w;
591
592 leaf = cops->leaf(q, cl);
593 if (leaf) {
594 if (leaf == arg->p || arg->depth > 7)
595 return -ELOOP;
596 return check_loop(leaf, arg->p, arg->depth + 1);
597 }
598 return 0;
599}
600
601/*
602 * Delete/get qdisc.
603 */
604
605static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
606{
Denis V. Lunevb8542722007-12-01 00:21:31 +1100607 struct net *net = skb->sk->sk_net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700608 struct tcmsg *tcm = NLMSG_DATA(n);
609 struct rtattr **tca = arg;
610 struct net_device *dev;
611 u32 clid = tcm->tcm_parent;
612 struct Qdisc *q = NULL;
613 struct Qdisc *p = NULL;
614 int err;
615
Denis V. Lunevb8542722007-12-01 00:21:31 +1100616 if (net != &init_net)
617 return -EINVAL;
618
Eric W. Biederman881d9662007-09-17 11:56:21 -0700619 if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700620 return -ENODEV;
621
622 if (clid) {
623 if (clid != TC_H_ROOT) {
624 if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
625 if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL)
626 return -ENOENT;
627 q = qdisc_leaf(p, clid);
628 } else { /* ingress */
629 q = dev->qdisc_ingress;
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900630 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700631 } else {
632 q = dev->qdisc_sleeping;
633 }
634 if (!q)
635 return -ENOENT;
636
637 if (tcm->tcm_handle && q->handle != tcm->tcm_handle)
638 return -EINVAL;
639 } else {
640 if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL)
641 return -ENOENT;
642 }
643
644 if (tca[TCA_KIND-1] && rtattr_strcmp(tca[TCA_KIND-1], q->ops->id))
645 return -EINVAL;
646
647 if (n->nlmsg_type == RTM_DELQDISC) {
648 if (!clid)
649 return -EINVAL;
650 if (q->handle == 0)
651 return -ENOENT;
652 if ((err = qdisc_graft(dev, p, clid, NULL, &q)) != 0)
653 return err;
654 if (q) {
655 qdisc_notify(skb, n, clid, q, NULL);
Patrick McHardyfd44de72007-04-16 17:07:08 -0700656 qdisc_lock_tree(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700657 qdisc_destroy(q);
Patrick McHardyfd44de72007-04-16 17:07:08 -0700658 qdisc_unlock_tree(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700659 }
660 } else {
661 qdisc_notify(skb, n, clid, NULL, q);
662 }
663 return 0;
664}
665
666/*
667 Create/change qdisc.
668 */
669
670static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
671{
Denis V. Lunevb8542722007-12-01 00:21:31 +1100672 struct net *net = skb->sk->sk_net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700673 struct tcmsg *tcm;
674 struct rtattr **tca;
675 struct net_device *dev;
676 u32 clid;
677 struct Qdisc *q, *p;
678 int err;
679
Denis V. Lunevb8542722007-12-01 00:21:31 +1100680 if (net != &init_net)
681 return -EINVAL;
682
Linus Torvalds1da177e2005-04-16 15:20:36 -0700683replay:
684 /* Reinit, just in case something touches this. */
685 tcm = NLMSG_DATA(n);
686 tca = arg;
687 clid = tcm->tcm_parent;
688 q = p = NULL;
689
Eric W. Biederman881d9662007-09-17 11:56:21 -0700690 if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700691 return -ENODEV;
692
693 if (clid) {
694 if (clid != TC_H_ROOT) {
695 if (clid != TC_H_INGRESS) {
696 if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL)
697 return -ENOENT;
698 q = qdisc_leaf(p, clid);
699 } else { /*ingress */
700 q = dev->qdisc_ingress;
701 }
702 } else {
703 q = dev->qdisc_sleeping;
704 }
705
706 /* It may be default qdisc, ignore it */
707 if (q && q->handle == 0)
708 q = NULL;
709
710 if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
711 if (tcm->tcm_handle) {
712 if (q && !(n->nlmsg_flags&NLM_F_REPLACE))
713 return -EEXIST;
714 if (TC_H_MIN(tcm->tcm_handle))
715 return -EINVAL;
716 if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL)
717 goto create_n_graft;
718 if (n->nlmsg_flags&NLM_F_EXCL)
719 return -EEXIST;
720 if (tca[TCA_KIND-1] && rtattr_strcmp(tca[TCA_KIND-1], q->ops->id))
721 return -EINVAL;
722 if (q == p ||
723 (p && check_loop(q, p, 0)))
724 return -ELOOP;
725 atomic_inc(&q->refcnt);
726 goto graft;
727 } else {
728 if (q == NULL)
729 goto create_n_graft;
730
731 /* This magic test requires explanation.
732 *
733 * We know, that some child q is already
734 * attached to this parent and have choice:
735 * either to change it or to create/graft new one.
736 *
737 * 1. We are allowed to create/graft only
738 * if CREATE and REPLACE flags are set.
739 *
740 * 2. If EXCL is set, requestor wanted to say,
741 * that qdisc tcm_handle is not expected
742 * to exist, so that we choose create/graft too.
743 *
744 * 3. The last case is when no flags are set.
745 * Alas, it is sort of hole in API, we
746 * cannot decide what to do unambiguously.
747 * For now we select create/graft, if
748 * user gave KIND, which does not match existing.
749 */
750 if ((n->nlmsg_flags&NLM_F_CREATE) &&
751 (n->nlmsg_flags&NLM_F_REPLACE) &&
752 ((n->nlmsg_flags&NLM_F_EXCL) ||
753 (tca[TCA_KIND-1] &&
754 rtattr_strcmp(tca[TCA_KIND-1], q->ops->id))))
755 goto create_n_graft;
756 }
757 }
758 } else {
759 if (!tcm->tcm_handle)
760 return -EINVAL;
761 q = qdisc_lookup(dev, tcm->tcm_handle);
762 }
763
764 /* Change qdisc parameters */
765 if (q == NULL)
766 return -ENOENT;
767 if (n->nlmsg_flags&NLM_F_EXCL)
768 return -EEXIST;
769 if (tca[TCA_KIND-1] && rtattr_strcmp(tca[TCA_KIND-1], q->ops->id))
770 return -EINVAL;
771 err = qdisc_change(q, tca);
772 if (err == 0)
773 qdisc_notify(skb, n, clid, NULL, q);
774 return err;
775
776create_n_graft:
777 if (!(n->nlmsg_flags&NLM_F_CREATE))
778 return -ENOENT;
779 if (clid == TC_H_INGRESS)
Patrick McHardyffc8fef2007-07-30 17:11:50 -0700780 q = qdisc_create(dev, tcm->tcm_parent, tcm->tcm_parent,
781 tca, &err);
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900782 else
Patrick McHardyffc8fef2007-07-30 17:11:50 -0700783 q = qdisc_create(dev, tcm->tcm_parent, tcm->tcm_handle,
784 tca, &err);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700785 if (q == NULL) {
786 if (err == -EAGAIN)
787 goto replay;
788 return err;
789 }
790
791graft:
792 if (1) {
793 struct Qdisc *old_q = NULL;
794 err = qdisc_graft(dev, p, clid, q, &old_q);
795 if (err) {
796 if (q) {
Patrick McHardyfd44de72007-04-16 17:07:08 -0700797 qdisc_lock_tree(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700798 qdisc_destroy(q);
Patrick McHardyfd44de72007-04-16 17:07:08 -0700799 qdisc_unlock_tree(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700800 }
801 return err;
802 }
803 qdisc_notify(skb, n, clid, old_q, q);
804 if (old_q) {
Patrick McHardyfd44de72007-04-16 17:07:08 -0700805 qdisc_lock_tree(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700806 qdisc_destroy(old_q);
Patrick McHardyfd44de72007-04-16 17:07:08 -0700807 qdisc_unlock_tree(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700808 }
809 }
810 return 0;
811}
812
813static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
Jamal Hadi Salime431b8c2005-06-18 22:55:31 -0700814 u32 pid, u32 seq, u16 flags, int event)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700815{
816 struct tcmsg *tcm;
817 struct nlmsghdr *nlh;
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -0700818 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700819 struct gnet_dump d;
820
Jamal Hadi Salime431b8c2005-06-18 22:55:31 -0700821 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700822 tcm = NLMSG_DATA(nlh);
823 tcm->tcm_family = AF_UNSPEC;
Patrick McHardy9ef1d4c2005-06-28 12:55:30 -0700824 tcm->tcm__pad1 = 0;
825 tcm->tcm__pad2 = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700826 tcm->tcm_ifindex = q->dev->ifindex;
827 tcm->tcm_parent = clid;
828 tcm->tcm_handle = q->handle;
829 tcm->tcm_info = atomic_read(&q->refcnt);
830 RTA_PUT(skb, TCA_KIND, IFNAMSIZ, q->ops->id);
831 if (q->ops->dump && q->ops->dump(q, skb) < 0)
832 goto rtattr_failure;
833 q->qstats.qlen = q->q.qlen;
834
835 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS,
836 TCA_XSTATS, q->stats_lock, &d) < 0)
837 goto rtattr_failure;
838
839 if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
840 goto rtattr_failure;
841
842 if (gnet_stats_copy_basic(&d, &q->bstats) < 0 ||
Linus Torvalds1da177e2005-04-16 15:20:36 -0700843 gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
Linus Torvalds1da177e2005-04-16 15:20:36 -0700844 gnet_stats_copy_queue(&d, &q->qstats) < 0)
845 goto rtattr_failure;
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900846
Linus Torvalds1da177e2005-04-16 15:20:36 -0700847 if (gnet_stats_finish_copy(&d) < 0)
848 goto rtattr_failure;
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900849
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -0700850 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700851 return skb->len;
852
853nlmsg_failure:
854rtattr_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -0700855 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700856 return -1;
857}
858
859static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n,
860 u32 clid, struct Qdisc *old, struct Qdisc *new)
861{
862 struct sk_buff *skb;
863 u32 pid = oskb ? NETLINK_CB(oskb).pid : 0;
864
865 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
866 if (!skb)
867 return -ENOBUFS;
868
869 if (old && old->handle) {
870 if (tc_fill_qdisc(skb, old, clid, pid, n->nlmsg_seq, 0, RTM_DELQDISC) < 0)
871 goto err_out;
872 }
873 if (new) {
874 if (tc_fill_qdisc(skb, new, clid, pid, n->nlmsg_seq, old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
875 goto err_out;
876 }
877
878 if (skb->len)
Denis V. Lunev97c53ca2007-11-19 22:26:51 -0800879 return rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700880
881err_out:
882 kfree_skb(skb);
883 return -EINVAL;
884}
885
886static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
887{
Denis V. Lunevb8542722007-12-01 00:21:31 +1100888 struct net *net = skb->sk->sk_net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700889 int idx, q_idx;
890 int s_idx, s_q_idx;
891 struct net_device *dev;
892 struct Qdisc *q;
893
Denis V. Lunevb8542722007-12-01 00:21:31 +1100894 if (net != &init_net)
895 return 0;
896
Linus Torvalds1da177e2005-04-16 15:20:36 -0700897 s_idx = cb->args[0];
898 s_q_idx = q_idx = cb->args[1];
899 read_lock(&dev_base_lock);
Pavel Emelianov7562f872007-05-03 15:13:45 -0700900 idx = 0;
Eric W. Biederman881d9662007-09-17 11:56:21 -0700901 for_each_netdev(&init_net, dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700902 if (idx < s_idx)
Pavel Emelianov7562f872007-05-03 15:13:45 -0700903 goto cont;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700904 if (idx > s_idx)
905 s_q_idx = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700906 q_idx = 0;
907 list_for_each_entry(q, &dev->qdisc_list, list) {
908 if (q_idx < s_q_idx) {
909 q_idx++;
910 continue;
911 }
912 if (tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid,
Patrick McHardy0463d4a2007-04-16 17:02:10 -0700913 cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700914 goto done;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700915 q_idx++;
916 }
Pavel Emelianov7562f872007-05-03 15:13:45 -0700917cont:
918 idx++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700919 }
920
921done:
922 read_unlock(&dev_base_lock);
923
924 cb->args[0] = idx;
925 cb->args[1] = q_idx;
926
927 return skb->len;
928}
929
930
931
932/************************************************
933 * Traffic classes manipulation. *
934 ************************************************/
935
936
937
938static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
939{
Denis V. Lunevb8542722007-12-01 00:21:31 +1100940 struct net *net = skb->sk->sk_net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700941 struct tcmsg *tcm = NLMSG_DATA(n);
942 struct rtattr **tca = arg;
943 struct net_device *dev;
944 struct Qdisc *q = NULL;
Eric Dumazet20fea082007-11-14 01:44:41 -0800945 const struct Qdisc_class_ops *cops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700946 unsigned long cl = 0;
947 unsigned long new_cl;
948 u32 pid = tcm->tcm_parent;
949 u32 clid = tcm->tcm_handle;
950 u32 qid = TC_H_MAJ(clid);
951 int err;
952
Denis V. Lunevb8542722007-12-01 00:21:31 +1100953 if (net != &init_net)
954 return -EINVAL;
955
Eric W. Biederman881d9662007-09-17 11:56:21 -0700956 if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700957 return -ENODEV;
958
959 /*
960 parent == TC_H_UNSPEC - unspecified parent.
961 parent == TC_H_ROOT - class is root, which has no parent.
962 parent == X:0 - parent is root class.
963 parent == X:Y - parent is a node in hierarchy.
964 parent == 0:Y - parent is X:Y, where X:0 is qdisc.
965
966 handle == 0:0 - generate handle from kernel pool.
967 handle == 0:Y - class is X:Y, where X:0 is qdisc.
968 handle == X:Y - clear.
969 handle == X:0 - root class.
970 */
971
972 /* Step 1. Determine qdisc handle X:0 */
973
974 if (pid != TC_H_ROOT) {
975 u32 qid1 = TC_H_MAJ(pid);
976
977 if (qid && qid1) {
978 /* If both majors are known, they must be identical. */
979 if (qid != qid1)
980 return -EINVAL;
981 } else if (qid1) {
982 qid = qid1;
983 } else if (qid == 0)
984 qid = dev->qdisc_sleeping->handle;
985
986 /* Now qid is genuine qdisc handle consistent
987 both with parent and child.
988
989 TC_H_MAJ(pid) still may be unspecified, complete it now.
990 */
991 if (pid)
992 pid = TC_H_MAKE(qid, pid);
993 } else {
994 if (qid == 0)
995 qid = dev->qdisc_sleeping->handle;
996 }
997
998 /* OK. Locate qdisc */
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900999 if ((q = qdisc_lookup(dev, qid)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001000 return -ENOENT;
1001
1002 /* An check that it supports classes */
1003 cops = q->ops->cl_ops;
1004 if (cops == NULL)
1005 return -EINVAL;
1006
1007 /* Now try to get class */
1008 if (clid == 0) {
1009 if (pid == TC_H_ROOT)
1010 clid = qid;
1011 } else
1012 clid = TC_H_MAKE(qid, clid);
1013
1014 if (clid)
1015 cl = cops->get(q, clid);
1016
1017 if (cl == 0) {
1018 err = -ENOENT;
1019 if (n->nlmsg_type != RTM_NEWTCLASS || !(n->nlmsg_flags&NLM_F_CREATE))
1020 goto out;
1021 } else {
1022 switch (n->nlmsg_type) {
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09001023 case RTM_NEWTCLASS:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001024 err = -EEXIST;
1025 if (n->nlmsg_flags&NLM_F_EXCL)
1026 goto out;
1027 break;
1028 case RTM_DELTCLASS:
1029 err = cops->delete(q, cl);
1030 if (err == 0)
1031 tclass_notify(skb, n, q, cl, RTM_DELTCLASS);
1032 goto out;
1033 case RTM_GETTCLASS:
1034 err = tclass_notify(skb, n, q, cl, RTM_NEWTCLASS);
1035 goto out;
1036 default:
1037 err = -EINVAL;
1038 goto out;
1039 }
1040 }
1041
1042 new_cl = cl;
1043 err = cops->change(q, clid, pid, tca, &new_cl);
1044 if (err == 0)
1045 tclass_notify(skb, n, q, new_cl, RTM_NEWTCLASS);
1046
1047out:
1048 if (cl)
1049 cops->put(q, cl);
1050
1051 return err;
1052}
1053
1054
1055static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
1056 unsigned long cl,
Jamal Hadi Salime431b8c2005-06-18 22:55:31 -07001057 u32 pid, u32 seq, u16 flags, int event)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001058{
1059 struct tcmsg *tcm;
1060 struct nlmsghdr *nlh;
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001061 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001062 struct gnet_dump d;
Eric Dumazet20fea082007-11-14 01:44:41 -08001063 const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001064
Jamal Hadi Salime431b8c2005-06-18 22:55:31 -07001065 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001066 tcm = NLMSG_DATA(nlh);
1067 tcm->tcm_family = AF_UNSPEC;
1068 tcm->tcm_ifindex = q->dev->ifindex;
1069 tcm->tcm_parent = q->handle;
1070 tcm->tcm_handle = q->handle;
1071 tcm->tcm_info = 0;
1072 RTA_PUT(skb, TCA_KIND, IFNAMSIZ, q->ops->id);
1073 if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
1074 goto rtattr_failure;
1075
1076 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS,
1077 TCA_XSTATS, q->stats_lock, &d) < 0)
1078 goto rtattr_failure;
1079
1080 if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
1081 goto rtattr_failure;
1082
1083 if (gnet_stats_finish_copy(&d) < 0)
1084 goto rtattr_failure;
1085
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001086 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001087 return skb->len;
1088
1089nlmsg_failure:
1090rtattr_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001091 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001092 return -1;
1093}
1094
1095static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
1096 struct Qdisc *q, unsigned long cl, int event)
1097{
1098 struct sk_buff *skb;
1099 u32 pid = oskb ? NETLINK_CB(oskb).pid : 0;
1100
1101 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1102 if (!skb)
1103 return -ENOBUFS;
1104
1105 if (tc_fill_tclass(skb, q, cl, pid, n->nlmsg_seq, 0, event) < 0) {
1106 kfree_skb(skb);
1107 return -EINVAL;
1108 }
1109
Denis V. Lunev97c53ca2007-11-19 22:26:51 -08001110 return rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001111}
1112
1113struct qdisc_dump_args
1114{
1115 struct qdisc_walker w;
1116 struct sk_buff *skb;
1117 struct netlink_callback *cb;
1118};
1119
1120static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walker *arg)
1121{
1122 struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg;
1123
1124 return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).pid,
1125 a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTCLASS);
1126}
1127
1128static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
1129{
Denis V. Lunevb8542722007-12-01 00:21:31 +11001130 struct net *net = skb->sk->sk_net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001131 int t;
1132 int s_t;
1133 struct net_device *dev;
1134 struct Qdisc *q;
1135 struct tcmsg *tcm = (struct tcmsg*)NLMSG_DATA(cb->nlh);
1136 struct qdisc_dump_args arg;
1137
Denis V. Lunevb8542722007-12-01 00:21:31 +11001138 if (net != &init_net)
1139 return 0;
1140
Linus Torvalds1da177e2005-04-16 15:20:36 -07001141 if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm)))
1142 return 0;
Eric W. Biederman881d9662007-09-17 11:56:21 -07001143 if ((dev = dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001144 return 0;
1145
1146 s_t = cb->args[0];
1147 t = 0;
1148
Linus Torvalds1da177e2005-04-16 15:20:36 -07001149 list_for_each_entry(q, &dev->qdisc_list, list) {
1150 if (t < s_t || !q->ops->cl_ops ||
1151 (tcm->tcm_parent &&
1152 TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
1153 t++;
1154 continue;
1155 }
1156 if (t > s_t)
1157 memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
1158 arg.w.fn = qdisc_class_dump;
1159 arg.skb = skb;
1160 arg.cb = cb;
1161 arg.w.stop = 0;
1162 arg.w.skip = cb->args[1];
1163 arg.w.count = 0;
1164 q->ops->cl_ops->walk(q, &arg.w);
1165 cb->args[1] = arg.w.count;
1166 if (arg.w.stop)
1167 break;
1168 t++;
1169 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001170
1171 cb->args[0] = t;
1172
1173 dev_put(dev);
1174 return skb->len;
1175}
1176
1177/* Main classifier routine: scans classifier chain attached
1178 to this qdisc, (optionally) tests for protocol and asks
1179 specific classifiers.
1180 */
Patrick McHardy73ca4912007-07-15 00:02:31 -07001181int tc_classify_compat(struct sk_buff *skb, struct tcf_proto *tp,
1182 struct tcf_result *res)
1183{
1184 __be16 protocol = skb->protocol;
1185 int err = 0;
1186
1187 for (; tp; tp = tp->next) {
1188 if ((tp->protocol == protocol ||
1189 tp->protocol == htons(ETH_P_ALL)) &&
1190 (err = tp->classify(skb, tp, res)) >= 0) {
1191#ifdef CONFIG_NET_CLS_ACT
1192 if (err != TC_ACT_RECLASSIFY && skb->tc_verd)
1193 skb->tc_verd = SET_TC_VERD(skb->tc_verd, 0);
1194#endif
1195 return err;
1196 }
1197 }
1198 return -1;
1199}
1200EXPORT_SYMBOL(tc_classify_compat);
1201
Linus Torvalds1da177e2005-04-16 15:20:36 -07001202int tc_classify(struct sk_buff *skb, struct tcf_proto *tp,
Patrick McHardy73ca4912007-07-15 00:02:31 -07001203 struct tcf_result *res)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001204{
1205 int err = 0;
Patrick McHardy73ca4912007-07-15 00:02:31 -07001206 __be16 protocol;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001207#ifdef CONFIG_NET_CLS_ACT
1208 struct tcf_proto *otp = tp;
1209reclassify:
1210#endif
1211 protocol = skb->protocol;
1212
Patrick McHardy73ca4912007-07-15 00:02:31 -07001213 err = tc_classify_compat(skb, tp, res);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001214#ifdef CONFIG_NET_CLS_ACT
Patrick McHardy73ca4912007-07-15 00:02:31 -07001215 if (err == TC_ACT_RECLASSIFY) {
1216 u32 verd = G_TC_VERD(skb->tc_verd);
1217 tp = otp;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001218
Patrick McHardy73ca4912007-07-15 00:02:31 -07001219 if (verd++ >= MAX_REC_LOOP) {
1220 printk("rule prio %u protocol %02x reclassify loop, "
1221 "packet dropped\n",
1222 tp->prio&0xffff, ntohs(tp->protocol));
1223 return TC_ACT_SHOT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001224 }
Patrick McHardy73ca4912007-07-15 00:02:31 -07001225 skb->tc_verd = SET_TC_VERD(skb->tc_verd, verd);
1226 goto reclassify;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001227 }
Patrick McHardy73ca4912007-07-15 00:02:31 -07001228#endif
1229 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001230}
Patrick McHardy73ca4912007-07-15 00:02:31 -07001231EXPORT_SYMBOL(tc_classify);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001232
Patrick McHardya48b5a62007-03-23 11:29:43 -07001233void tcf_destroy(struct tcf_proto *tp)
1234{
1235 tp->ops->destroy(tp);
1236 module_put(tp->ops->owner);
1237 kfree(tp);
1238}
1239
1240void tcf_destroy_chain(struct tcf_proto *fl)
1241{
1242 struct tcf_proto *tp;
1243
1244 while ((tp = fl) != NULL) {
1245 fl = tp->next;
1246 tcf_destroy(tp);
1247 }
1248}
1249EXPORT_SYMBOL(tcf_destroy_chain);
1250
Linus Torvalds1da177e2005-04-16 15:20:36 -07001251#ifdef CONFIG_PROC_FS
1252static int psched_show(struct seq_file *seq, void *v)
1253{
Patrick McHardy3c0cfc12007-10-10 16:32:41 -07001254 struct timespec ts;
1255
1256 hrtimer_get_res(CLOCK_MONOTONIC, &ts);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001257 seq_printf(seq, "%08x %08x %08x %08x\n",
Patrick McHardy641b9e02007-03-16 01:18:42 -07001258 (u32)NSEC_PER_USEC, (u32)PSCHED_US2NS(1),
Patrick McHardy514bca32007-03-16 12:34:52 -07001259 1000000,
Patrick McHardy3c0cfc12007-10-10 16:32:41 -07001260 (u32)NSEC_PER_SEC/(u32)ktime_to_ns(timespec_to_ktime(ts)));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001261
1262 return 0;
1263}
1264
1265static int psched_open(struct inode *inode, struct file *file)
1266{
1267 return single_open(file, psched_show, PDE(inode)->data);
1268}
1269
Arjan van de Venda7071d2007-02-12 00:55:36 -08001270static const struct file_operations psched_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001271 .owner = THIS_MODULE,
1272 .open = psched_open,
1273 .read = seq_read,
1274 .llseek = seq_lseek,
1275 .release = single_release,
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09001276};
Linus Torvalds1da177e2005-04-16 15:20:36 -07001277#endif
1278
Linus Torvalds1da177e2005-04-16 15:20:36 -07001279static int __init pktsched_init(void)
1280{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001281 register_qdisc(&pfifo_qdisc_ops);
1282 register_qdisc(&bfifo_qdisc_ops);
Eric W. Biederman457c4cb2007-09-12 12:01:34 +02001283 proc_net_fops_create(&init_net, "psched", 0, &psched_fops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001284
Thomas Grafbe577dd2007-03-22 11:55:50 -07001285 rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL);
1286 rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL);
1287 rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc);
1288 rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL);
1289 rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL);
1290 rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass);
1291
Linus Torvalds1da177e2005-04-16 15:20:36 -07001292 return 0;
1293}
1294
1295subsys_initcall(pktsched_init);