blob: 3385ee5925418d73da404e429c33b20f6db9f4a1 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * net/sched/sch_generic.c Generic packet scheduler routines.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 * Jamal Hadi Salim, <hadi@cyberus.ca> 990601
11 * - Ingress support
12 */
13
14#include <asm/uaccess.h>
15#include <asm/system.h>
16#include <linux/bitops.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070017#include <linux/module.h>
18#include <linux/types.h>
19#include <linux/kernel.h>
20#include <linux/sched.h>
21#include <linux/string.h>
22#include <linux/mm.h>
23#include <linux/socket.h>
24#include <linux/sockios.h>
25#include <linux/in.h>
26#include <linux/errno.h>
27#include <linux/interrupt.h>
28#include <linux/netdevice.h>
29#include <linux/skbuff.h>
30#include <linux/rtnetlink.h>
31#include <linux/init.h>
32#include <linux/rcupdate.h>
33#include <linux/list.h>
34#include <net/sock.h>
35#include <net/pkt_sched.h>
36
37/* Main transmission queue. */
38
Patrick McHardy0463d4a2007-04-16 17:02:10 -070039/* Modifications to data participating in scheduling must be protected with
40 * dev->queue_lock spinlock.
41 *
42 * The idea is the following:
43 * - enqueue, dequeue are serialized via top level device
44 * spinlock dev->queue_lock.
Patrick McHardyfd44de72007-04-16 17:07:08 -070045 * - ingress filtering is serialized via top level device
46 * spinlock dev->ingress_lock.
Patrick McHardy0463d4a2007-04-16 17:02:10 -070047 * - updates to tree and tree walking are only done under the rtnl mutex.
Linus Torvalds1da177e2005-04-16 15:20:36 -070048 */
Linus Torvalds1da177e2005-04-16 15:20:36 -070049
50void qdisc_lock_tree(struct net_device *dev)
51{
Linus Torvalds1da177e2005-04-16 15:20:36 -070052 spin_lock_bh(&dev->queue_lock);
Patrick McHardyfd44de72007-04-16 17:07:08 -070053 spin_lock(&dev->ingress_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -070054}
55
56void qdisc_unlock_tree(struct net_device *dev)
57{
Patrick McHardyfd44de72007-04-16 17:07:08 -070058 spin_unlock(&dev->ingress_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -070059 spin_unlock_bh(&dev->queue_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -070060}
61
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +090062/*
Linus Torvalds1da177e2005-04-16 15:20:36 -070063 dev->queue_lock serializes queue accesses for this device
64 AND dev->qdisc pointer itself.
65
Herbert Xu932ff272006-06-09 12:20:56 -070066 netif_tx_lock serializes accesses to device driver.
Linus Torvalds1da177e2005-04-16 15:20:36 -070067
Herbert Xu932ff272006-06-09 12:20:56 -070068 dev->queue_lock and netif_tx_lock are mutually exclusive,
Linus Torvalds1da177e2005-04-16 15:20:36 -070069 if one is grabbed, another must be free.
70 */
71
72
73/* Kick device.
74 Note, that this procedure can be called by a watchdog timer, so that
75 we do not check dev->tbusy flag here.
76
77 Returns: 0 - queue is empty.
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +090078 >0 - queue is not empty, but throttled.
Linus Torvalds1da177e2005-04-16 15:20:36 -070079 <0 - queue is not empty. Device is throttled, if dev->tbusy != 0.
80
81 NOTE: Called under dev->queue_lock with locally disabled BH.
82*/
83
Herbert Xu48d83322006-06-19 23:57:59 -070084static inline int qdisc_restart(struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -070085{
86 struct Qdisc *q = dev->qdisc;
87 struct sk_buff *skb;
88
89 /* Dequeue packet */
Herbert Xuf6a78bf2006-06-22 02:57:17 -070090 if (((skb = dev->gso_skb)) || ((skb = q->dequeue(q)))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -070091 unsigned nolock = (dev->features & NETIF_F_LLTX);
Herbert Xuf6a78bf2006-06-22 02:57:17 -070092
93 dev->gso_skb = NULL;
94
Linus Torvalds1da177e2005-04-16 15:20:36 -070095 /*
96 * When the driver has LLTX set it does its own locking
97 * in start_xmit. No need to add additional overhead by
98 * locking again. These checks are worth it because
99 * even uncongested locks can be quite expensive.
100 * The driver can do trylock like here too, in case
101 * of lock congestion it should return -1 and the packet
102 * will be requeued.
103 */
104 if (!nolock) {
Herbert Xu932ff272006-06-09 12:20:56 -0700105 if (!netif_tx_trylock(dev)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700106 collision:
107 /* So, someone grabbed the driver. */
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900108
Linus Torvalds1da177e2005-04-16 15:20:36 -0700109 /* It may be transient configuration error,
110 when hard_start_xmit() recurses. We detect
111 it by checking xmit owner and drop the
112 packet when deadloop is detected.
113 */
114 if (dev->xmit_lock_owner == smp_processor_id()) {
115 kfree_skb(skb);
116 if (net_ratelimit())
117 printk(KERN_DEBUG "Dead loop on netdevice %s, fix it urgently!\n", dev->name);
118 return -1;
119 }
120 __get_cpu_var(netdev_rx_stat).cpu_collision++;
121 goto requeue;
122 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700123 }
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900124
Linus Torvalds1da177e2005-04-16 15:20:36 -0700125 {
126 /* And release queue */
127 spin_unlock(&dev->queue_lock);
128
129 if (!netif_queue_stopped(dev)) {
130 int ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700131
Herbert Xuf6a78bf2006-06-22 02:57:17 -0700132 ret = dev_hard_start_xmit(skb, dev);
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900133 if (ret == NETDEV_TX_OK) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700134 if (!nolock) {
Herbert Xu932ff272006-06-09 12:20:56 -0700135 netif_tx_unlock(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700136 }
137 spin_lock(&dev->queue_lock);
138 return -1;
139 }
140 if (ret == NETDEV_TX_LOCKED && nolock) {
141 spin_lock(&dev->queue_lock);
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900142 goto collision;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700143 }
144 }
145
146 /* NETDEV_TX_BUSY - we need to requeue */
147 /* Release the driver */
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900148 if (!nolock) {
Herbert Xu932ff272006-06-09 12:20:56 -0700149 netif_tx_unlock(dev);
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900150 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700151 spin_lock(&dev->queue_lock);
152 q = dev->qdisc;
153 }
154
155 /* Device kicked us out :(
156 This is possible in three cases:
157
158 0. driver is locked
159 1. fastroute is enabled
160 2. device cannot determine busy state
161 before start of transmission (f.e. dialout)
162 3. device is buggy (ppp)
163 */
164
165requeue:
Herbert Xuf6a78bf2006-06-22 02:57:17 -0700166 if (skb->next)
167 dev->gso_skb = skb;
168 else
169 q->ops->requeue(skb, q);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700170 netif_schedule(dev);
171 return 1;
172 }
Stephen Hemminger8cbe1d42005-05-03 16:24:03 -0700173 BUG_ON((int) q->q.qlen < 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700174 return q->q.qlen;
175}
176
Herbert Xu48d83322006-06-19 23:57:59 -0700177void __qdisc_run(struct net_device *dev)
178{
Herbert Xud4828d82006-06-22 02:28:18 -0700179 if (unlikely(dev->qdisc == &noop_qdisc))
180 goto out;
181
Herbert Xu48d83322006-06-19 23:57:59 -0700182 while (qdisc_restart(dev) < 0 && !netif_queue_stopped(dev))
183 /* NOTHING */;
184
Herbert Xud4828d82006-06-22 02:28:18 -0700185out:
Herbert Xu48d83322006-06-19 23:57:59 -0700186 clear_bit(__LINK_STATE_QDISC_RUNNING, &dev->state);
187}
188
Linus Torvalds1da177e2005-04-16 15:20:36 -0700189static void dev_watchdog(unsigned long arg)
190{
191 struct net_device *dev = (struct net_device *)arg;
192
Herbert Xu932ff272006-06-09 12:20:56 -0700193 netif_tx_lock(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700194 if (dev->qdisc != &noop_qdisc) {
195 if (netif_device_present(dev) &&
196 netif_running(dev) &&
197 netif_carrier_ok(dev)) {
198 if (netif_queue_stopped(dev) &&
Stephen Hemminger338f7562006-05-16 15:02:12 -0700199 time_after(jiffies, dev->trans_start + dev->watchdog_timeo)) {
200
201 printk(KERN_INFO "NETDEV WATCHDOG: %s: transmit timed out\n",
202 dev->name);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700203 dev->tx_timeout(dev);
204 }
Arjan van de Venf5a6e012007-02-05 17:59:51 -0800205 if (!mod_timer(&dev->watchdog_timer, round_jiffies(jiffies + dev->watchdog_timeo)))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700206 dev_hold(dev);
207 }
208 }
Herbert Xu932ff272006-06-09 12:20:56 -0700209 netif_tx_unlock(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700210
211 dev_put(dev);
212}
213
214static void dev_watchdog_init(struct net_device *dev)
215{
216 init_timer(&dev->watchdog_timer);
217 dev->watchdog_timer.data = (unsigned long)dev;
218 dev->watchdog_timer.function = dev_watchdog;
219}
220
221void __netdev_watchdog_up(struct net_device *dev)
222{
223 if (dev->tx_timeout) {
224 if (dev->watchdog_timeo <= 0)
225 dev->watchdog_timeo = 5*HZ;
226 if (!mod_timer(&dev->watchdog_timer, jiffies + dev->watchdog_timeo))
227 dev_hold(dev);
228 }
229}
230
231static void dev_watchdog_up(struct net_device *dev)
232{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700233 __netdev_watchdog_up(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700234}
235
236static void dev_watchdog_down(struct net_device *dev)
237{
Herbert Xu932ff272006-06-09 12:20:56 -0700238 netif_tx_lock_bh(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700239 if (del_timer(&dev->watchdog_timer))
Stephen Hemminger15333062006-03-20 22:32:28 -0800240 dev_put(dev);
Herbert Xu932ff272006-06-09 12:20:56 -0700241 netif_tx_unlock_bh(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700242}
243
Denis Vlasenko0a242ef2005-08-11 15:32:53 -0700244void netif_carrier_on(struct net_device *dev)
245{
246 if (test_and_clear_bit(__LINK_STATE_NOCARRIER, &dev->state))
247 linkwatch_fire_event(dev);
248 if (netif_running(dev))
249 __netdev_watchdog_up(dev);
250}
251
252void netif_carrier_off(struct net_device *dev)
253{
254 if (!test_and_set_bit(__LINK_STATE_NOCARRIER, &dev->state))
255 linkwatch_fire_event(dev);
256}
257
Linus Torvalds1da177e2005-04-16 15:20:36 -0700258/* "NOOP" scheduler: the best scheduler, recommended for all interfaces
259 under all circumstances. It is difficult to invent anything faster or
260 cheaper.
261 */
262
Thomas Graf94df1092005-06-18 22:59:08 -0700263static int noop_enqueue(struct sk_buff *skb, struct Qdisc * qdisc)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700264{
265 kfree_skb(skb);
266 return NET_XMIT_CN;
267}
268
Thomas Graf94df1092005-06-18 22:59:08 -0700269static struct sk_buff *noop_dequeue(struct Qdisc * qdisc)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700270{
271 return NULL;
272}
273
Thomas Graf94df1092005-06-18 22:59:08 -0700274static int noop_requeue(struct sk_buff *skb, struct Qdisc* qdisc)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700275{
276 if (net_ratelimit())
Thomas Graf94df1092005-06-18 22:59:08 -0700277 printk(KERN_DEBUG "%s deferred output. It is buggy.\n",
278 skb->dev->name);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700279 kfree_skb(skb);
280 return NET_XMIT_CN;
281}
282
283struct Qdisc_ops noop_qdisc_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700284 .id = "noop",
285 .priv_size = 0,
286 .enqueue = noop_enqueue,
287 .dequeue = noop_dequeue,
288 .requeue = noop_requeue,
289 .owner = THIS_MODULE,
290};
291
292struct Qdisc noop_qdisc = {
293 .enqueue = noop_enqueue,
294 .dequeue = noop_dequeue,
295 .flags = TCQ_F_BUILTIN,
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900296 .ops = &noop_qdisc_ops,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700297 .list = LIST_HEAD_INIT(noop_qdisc.list),
298};
299
300static struct Qdisc_ops noqueue_qdisc_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700301 .id = "noqueue",
302 .priv_size = 0,
303 .enqueue = noop_enqueue,
304 .dequeue = noop_dequeue,
305 .requeue = noop_requeue,
306 .owner = THIS_MODULE,
307};
308
309static struct Qdisc noqueue_qdisc = {
310 .enqueue = NULL,
311 .dequeue = noop_dequeue,
312 .flags = TCQ_F_BUILTIN,
313 .ops = &noqueue_qdisc_ops,
314 .list = LIST_HEAD_INIT(noqueue_qdisc.list),
315};
316
317
318static const u8 prio2band[TC_PRIO_MAX+1] =
319 { 1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1 };
320
321/* 3-band FIFO queue: old style, but should be a bit faster than
322 generic prio+fifo combination.
323 */
324
Thomas Graff87a9c32005-06-18 22:58:53 -0700325#define PFIFO_FAST_BANDS 3
326
Thomas Graf321090e2005-06-18 22:58:35 -0700327static inline struct sk_buff_head *prio2list(struct sk_buff *skb,
328 struct Qdisc *qdisc)
329{
330 struct sk_buff_head *list = qdisc_priv(qdisc);
331 return list + prio2band[skb->priority & TC_PRIO_MAX];
332}
333
Thomas Graff87a9c32005-06-18 22:58:53 -0700334static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700335{
Thomas Graf321090e2005-06-18 22:58:35 -0700336 struct sk_buff_head *list = prio2list(skb, qdisc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700337
Thomas Graf821d24a2005-06-18 22:58:15 -0700338 if (skb_queue_len(list) < qdisc->dev->tx_queue_len) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700339 qdisc->q.qlen++;
Thomas Graf821d24a2005-06-18 22:58:15 -0700340 return __qdisc_enqueue_tail(skb, qdisc, list);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700341 }
Thomas Graf821d24a2005-06-18 22:58:15 -0700342
343 return qdisc_drop(skb, qdisc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700344}
345
Thomas Graff87a9c32005-06-18 22:58:53 -0700346static struct sk_buff *pfifo_fast_dequeue(struct Qdisc* qdisc)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700347{
348 int prio;
349 struct sk_buff_head *list = qdisc_priv(qdisc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700350
Thomas Graf452f2992005-07-18 13:30:53 -0700351 for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) {
352 if (!skb_queue_empty(list + prio)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700353 qdisc->q.qlen--;
Thomas Graf452f2992005-07-18 13:30:53 -0700354 return __qdisc_dequeue_head(qdisc, list + prio);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700355 }
356 }
Thomas Graff87a9c32005-06-18 22:58:53 -0700357
Linus Torvalds1da177e2005-04-16 15:20:36 -0700358 return NULL;
359}
360
Thomas Graff87a9c32005-06-18 22:58:53 -0700361static int pfifo_fast_requeue(struct sk_buff *skb, struct Qdisc* qdisc)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700362{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700363 qdisc->q.qlen++;
Thomas Graf321090e2005-06-18 22:58:35 -0700364 return __qdisc_requeue(skb, qdisc, prio2list(skb, qdisc));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700365}
366
Thomas Graff87a9c32005-06-18 22:58:53 -0700367static void pfifo_fast_reset(struct Qdisc* qdisc)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700368{
369 int prio;
370 struct sk_buff_head *list = qdisc_priv(qdisc);
371
Thomas Graff87a9c32005-06-18 22:58:53 -0700372 for (prio = 0; prio < PFIFO_FAST_BANDS; prio++)
Thomas Graf821d24a2005-06-18 22:58:15 -0700373 __qdisc_reset_queue(qdisc, list + prio);
374
375 qdisc->qstats.backlog = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700376 qdisc->q.qlen = 0;
377}
378
379static int pfifo_fast_dump(struct Qdisc *qdisc, struct sk_buff *skb)
380{
Thomas Graff87a9c32005-06-18 22:58:53 -0700381 struct tc_prio_qopt opt = { .bands = PFIFO_FAST_BANDS };
Linus Torvalds1da177e2005-04-16 15:20:36 -0700382
Linus Torvalds1da177e2005-04-16 15:20:36 -0700383 memcpy(&opt.priomap, prio2band, TC_PRIO_MAX+1);
384 RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
385 return skb->len;
386
387rtattr_failure:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700388 return -1;
389}
390
391static int pfifo_fast_init(struct Qdisc *qdisc, struct rtattr *opt)
392{
Thomas Graff87a9c32005-06-18 22:58:53 -0700393 int prio;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700394 struct sk_buff_head *list = qdisc_priv(qdisc);
395
Thomas Graff87a9c32005-06-18 22:58:53 -0700396 for (prio = 0; prio < PFIFO_FAST_BANDS; prio++)
397 skb_queue_head_init(list + prio);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700398
399 return 0;
400}
401
402static struct Qdisc_ops pfifo_fast_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700403 .id = "pfifo_fast",
Thomas Graff87a9c32005-06-18 22:58:53 -0700404 .priv_size = PFIFO_FAST_BANDS * sizeof(struct sk_buff_head),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700405 .enqueue = pfifo_fast_enqueue,
406 .dequeue = pfifo_fast_dequeue,
407 .requeue = pfifo_fast_requeue,
408 .init = pfifo_fast_init,
409 .reset = pfifo_fast_reset,
410 .dump = pfifo_fast_dump,
411 .owner = THIS_MODULE,
412};
413
Thomas Graf3d54b822005-07-05 14:15:09 -0700414struct Qdisc *qdisc_alloc(struct net_device *dev, struct Qdisc_ops *ops)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700415{
416 void *p;
417 struct Qdisc *sch;
Thomas Graf3d54b822005-07-05 14:15:09 -0700418 unsigned int size;
419 int err = -ENOBUFS;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700420
421 /* ensure that the Qdisc and the private data are 32-byte aligned */
Thomas Graf3d54b822005-07-05 14:15:09 -0700422 size = QDISC_ALIGN(sizeof(*sch));
423 size += ops->priv_size + (QDISC_ALIGNTO - 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700424
Panagiotis Issaris0da974f2006-07-21 14:51:30 -0700425 p = kzalloc(size, GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700426 if (!p)
Thomas Graf3d54b822005-07-05 14:15:09 -0700427 goto errout;
Thomas Graf3d54b822005-07-05 14:15:09 -0700428 sch = (struct Qdisc *) QDISC_ALIGN((unsigned long) p);
429 sch->padded = (char *) sch - (char *) p;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700430
431 INIT_LIST_HEAD(&sch->list);
432 skb_queue_head_init(&sch->q);
433 sch->ops = ops;
434 sch->enqueue = ops->enqueue;
435 sch->dequeue = ops->dequeue;
436 sch->dev = dev;
437 dev_hold(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700438 atomic_set(&sch->refcnt, 1);
Thomas Graf3d54b822005-07-05 14:15:09 -0700439
440 return sch;
441errout:
442 return ERR_PTR(-err);
443}
444
Patrick McHardy9f9afec2006-11-29 17:35:18 -0800445struct Qdisc * qdisc_create_dflt(struct net_device *dev, struct Qdisc_ops *ops,
446 unsigned int parentid)
Thomas Graf3d54b822005-07-05 14:15:09 -0700447{
448 struct Qdisc *sch;
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900449
Thomas Graf3d54b822005-07-05 14:15:09 -0700450 sch = qdisc_alloc(dev, ops);
451 if (IS_ERR(sch))
452 goto errout;
Patrick McHardyfd44de72007-04-16 17:07:08 -0700453 sch->stats_lock = &dev->queue_lock;
Patrick McHardy9f9afec2006-11-29 17:35:18 -0800454 sch->parent = parentid;
Thomas Graf3d54b822005-07-05 14:15:09 -0700455
Linus Torvalds1da177e2005-04-16 15:20:36 -0700456 if (!ops->init || ops->init(sch, NULL) == 0)
457 return sch;
458
Thomas Graf0fbbeb12005-08-23 10:12:44 -0700459 qdisc_destroy(sch);
Thomas Graf3d54b822005-07-05 14:15:09 -0700460errout:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700461 return NULL;
462}
463
464/* Under dev->queue_lock and BH! */
465
466void qdisc_reset(struct Qdisc *qdisc)
467{
468 struct Qdisc_ops *ops = qdisc->ops;
469
470 if (ops->reset)
471 ops->reset(qdisc);
472}
473
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900474/* this is the rcu callback function to clean up a qdisc when there
Linus Torvalds1da177e2005-04-16 15:20:36 -0700475 * are no further references to it */
476
477static void __qdisc_destroy(struct rcu_head *head)
478{
479 struct Qdisc *qdisc = container_of(head, struct Qdisc, q_rcu);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700480 kfree((char *) qdisc - qdisc->padded);
481}
482
483/* Under dev->queue_lock and BH! */
484
485void qdisc_destroy(struct Qdisc *qdisc)
486{
Patrick McHardy85670cc2006-09-27 16:45:45 -0700487 struct Qdisc_ops *ops = qdisc->ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700488
489 if (qdisc->flags & TCQ_F_BUILTIN ||
Patrick McHardy85670cc2006-09-27 16:45:45 -0700490 !atomic_dec_and_test(&qdisc->refcnt))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700491 return;
492
Patrick McHardy85670cc2006-09-27 16:45:45 -0700493 list_del(&qdisc->list);
494#ifdef CONFIG_NET_ESTIMATOR
495 gen_kill_estimator(&qdisc->bstats, &qdisc->rate_est);
496#endif
497 if (ops->reset)
498 ops->reset(qdisc);
499 if (ops->destroy)
500 ops->destroy(qdisc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700501
Patrick McHardy85670cc2006-09-27 16:45:45 -0700502 module_put(ops->owner);
503 dev_put(qdisc->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700504 call_rcu(&qdisc->q_rcu, __qdisc_destroy);
505}
506
507void dev_activate(struct net_device *dev)
508{
509 /* No queueing discipline is attached to device;
510 create default one i.e. pfifo_fast for devices,
511 which need queueing and noqueue_qdisc for
512 virtual interfaces
513 */
514
515 if (dev->qdisc_sleeping == &noop_qdisc) {
516 struct Qdisc *qdisc;
517 if (dev->tx_queue_len) {
Patrick McHardy9f9afec2006-11-29 17:35:18 -0800518 qdisc = qdisc_create_dflt(dev, &pfifo_fast_ops,
519 TC_H_ROOT);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700520 if (qdisc == NULL) {
521 printk(KERN_INFO "%s: activation failed\n", dev->name);
522 return;
523 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700524 list_add_tail(&qdisc->list, &dev->qdisc_list);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700525 } else {
526 qdisc = &noqueue_qdisc;
527 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700528 dev->qdisc_sleeping = qdisc;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700529 }
530
Tommy S. Christensencacaddf2005-05-03 16:18:52 -0700531 if (!netif_carrier_ok(dev))
532 /* Delay activation until next carrier-on event */
533 return;
534
Linus Torvalds1da177e2005-04-16 15:20:36 -0700535 spin_lock_bh(&dev->queue_lock);
536 rcu_assign_pointer(dev->qdisc, dev->qdisc_sleeping);
537 if (dev->qdisc != &noqueue_qdisc) {
538 dev->trans_start = jiffies;
539 dev_watchdog_up(dev);
540 }
541 spin_unlock_bh(&dev->queue_lock);
542}
543
544void dev_deactivate(struct net_device *dev)
545{
546 struct Qdisc *qdisc;
547
548 spin_lock_bh(&dev->queue_lock);
549 qdisc = dev->qdisc;
550 dev->qdisc = &noop_qdisc;
551
552 qdisc_reset(qdisc);
553
554 spin_unlock_bh(&dev->queue_lock);
555
556 dev_watchdog_down(dev);
557
Herbert Xud4828d82006-06-22 02:28:18 -0700558 /* Wait for outstanding dev_queue_xmit calls. */
559 synchronize_rcu();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700560
Herbert Xud4828d82006-06-22 02:28:18 -0700561 /* Wait for outstanding qdisc_run calls. */
562 while (test_bit(__LINK_STATE_QDISC_RUNNING, &dev->state))
563 yield();
Herbert Xuf6a78bf2006-06-22 02:57:17 -0700564
565 if (dev->gso_skb) {
566 kfree_skb(dev->gso_skb);
567 dev->gso_skb = NULL;
568 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700569}
570
571void dev_init_scheduler(struct net_device *dev)
572{
573 qdisc_lock_tree(dev);
574 dev->qdisc = &noop_qdisc;
575 dev->qdisc_sleeping = &noop_qdisc;
576 INIT_LIST_HEAD(&dev->qdisc_list);
577 qdisc_unlock_tree(dev);
578
579 dev_watchdog_init(dev);
580}
581
582void dev_shutdown(struct net_device *dev)
583{
584 struct Qdisc *qdisc;
585
586 qdisc_lock_tree(dev);
587 qdisc = dev->qdisc_sleeping;
588 dev->qdisc = &noop_qdisc;
589 dev->qdisc_sleeping = &noop_qdisc;
590 qdisc_destroy(qdisc);
591#if defined(CONFIG_NET_SCH_INGRESS) || defined(CONFIG_NET_SCH_INGRESS_MODULE)
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900592 if ((qdisc = dev->qdisc_ingress) != NULL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700593 dev->qdisc_ingress = NULL;
594 qdisc_destroy(qdisc);
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900595 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700596#endif
597 BUG_TRAP(!timer_pending(&dev->watchdog_timer));
598 qdisc_unlock_tree(dev);
599}
600
Denis Vlasenko0a242ef2005-08-11 15:32:53 -0700601EXPORT_SYMBOL(netif_carrier_on);
602EXPORT_SYMBOL(netif_carrier_off);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700603EXPORT_SYMBOL(noop_qdisc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700604EXPORT_SYMBOL(qdisc_create_dflt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700605EXPORT_SYMBOL(qdisc_destroy);
606EXPORT_SYMBOL(qdisc_reset);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700607EXPORT_SYMBOL(qdisc_lock_tree);
608EXPORT_SYMBOL(qdisc_unlock_tree);