blob: 52eb3439d7c6bf561dc88fca37fdaea0e8a1da4b [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * net/sched/sch_generic.c Generic packet scheduler routines.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 * Jamal Hadi Salim, <hadi@cyberus.ca> 990601
11 * - Ingress support
12 */
13
14#include <asm/uaccess.h>
15#include <asm/system.h>
16#include <linux/bitops.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070017#include <linux/module.h>
18#include <linux/types.h>
19#include <linux/kernel.h>
20#include <linux/sched.h>
21#include <linux/string.h>
22#include <linux/mm.h>
23#include <linux/socket.h>
24#include <linux/sockios.h>
25#include <linux/in.h>
26#include <linux/errno.h>
27#include <linux/interrupt.h>
28#include <linux/netdevice.h>
29#include <linux/skbuff.h>
30#include <linux/rtnetlink.h>
31#include <linux/init.h>
32#include <linux/rcupdate.h>
33#include <linux/list.h>
34#include <net/sock.h>
35#include <net/pkt_sched.h>
36
37/* Main transmission queue. */
38
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +090039/* Main qdisc structure lock.
Linus Torvalds1da177e2005-04-16 15:20:36 -070040
41 However, modifications
42 to data, participating in scheduling must be additionally
43 protected with dev->queue_lock spinlock.
44
45 The idea is the following:
46 - enqueue, dequeue are serialized via top level device
47 spinlock dev->queue_lock.
Patrick McHardy85670cc2006-09-27 16:45:45 -070048 - tree walking is protected by read_lock(qdisc_tree_lock)
Linus Torvalds1da177e2005-04-16 15:20:36 -070049 and this lock is used only in process context.
Patrick McHardy85670cc2006-09-27 16:45:45 -070050 - updates to tree are made only under rtnl semaphore,
51 hence this lock may be made without local bh disabling.
Linus Torvalds1da177e2005-04-16 15:20:36 -070052
53 qdisc_tree_lock must be grabbed BEFORE dev->queue_lock!
54 */
55DEFINE_RWLOCK(qdisc_tree_lock);
56
57void qdisc_lock_tree(struct net_device *dev)
58{
Patrick McHardy85670cc2006-09-27 16:45:45 -070059 write_lock(&qdisc_tree_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -070060 spin_lock_bh(&dev->queue_lock);
61}
62
63void qdisc_unlock_tree(struct net_device *dev)
64{
65 spin_unlock_bh(&dev->queue_lock);
Patrick McHardy85670cc2006-09-27 16:45:45 -070066 write_unlock(&qdisc_tree_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -070067}
68
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +090069/*
Linus Torvalds1da177e2005-04-16 15:20:36 -070070 dev->queue_lock serializes queue accesses for this device
71 AND dev->qdisc pointer itself.
72
Herbert Xu932ff272006-06-09 12:20:56 -070073 netif_tx_lock serializes accesses to device driver.
Linus Torvalds1da177e2005-04-16 15:20:36 -070074
Herbert Xu932ff272006-06-09 12:20:56 -070075 dev->queue_lock and netif_tx_lock are mutually exclusive,
Linus Torvalds1da177e2005-04-16 15:20:36 -070076 if one is grabbed, another must be free.
77 */
78
79
80/* Kick device.
81 Note, that this procedure can be called by a watchdog timer, so that
82 we do not check dev->tbusy flag here.
83
84 Returns: 0 - queue is empty.
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +090085 >0 - queue is not empty, but throttled.
Linus Torvalds1da177e2005-04-16 15:20:36 -070086 <0 - queue is not empty. Device is throttled, if dev->tbusy != 0.
87
88 NOTE: Called under dev->queue_lock with locally disabled BH.
89*/
90
Herbert Xu48d83322006-06-19 23:57:59 -070091static inline int qdisc_restart(struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -070092{
93 struct Qdisc *q = dev->qdisc;
94 struct sk_buff *skb;
95
96 /* Dequeue packet */
Herbert Xuf6a78bf2006-06-22 02:57:17 -070097 if (((skb = dev->gso_skb)) || ((skb = q->dequeue(q)))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -070098 unsigned nolock = (dev->features & NETIF_F_LLTX);
Herbert Xuf6a78bf2006-06-22 02:57:17 -070099
100 dev->gso_skb = NULL;
101
Linus Torvalds1da177e2005-04-16 15:20:36 -0700102 /*
103 * When the driver has LLTX set it does its own locking
104 * in start_xmit. No need to add additional overhead by
105 * locking again. These checks are worth it because
106 * even uncongested locks can be quite expensive.
107 * The driver can do trylock like here too, in case
108 * of lock congestion it should return -1 and the packet
109 * will be requeued.
110 */
111 if (!nolock) {
Herbert Xu932ff272006-06-09 12:20:56 -0700112 if (!netif_tx_trylock(dev)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700113 collision:
114 /* So, someone grabbed the driver. */
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900115
Linus Torvalds1da177e2005-04-16 15:20:36 -0700116 /* It may be transient configuration error,
117 when hard_start_xmit() recurses. We detect
118 it by checking xmit owner and drop the
119 packet when deadloop is detected.
120 */
121 if (dev->xmit_lock_owner == smp_processor_id()) {
122 kfree_skb(skb);
123 if (net_ratelimit())
124 printk(KERN_DEBUG "Dead loop on netdevice %s, fix it urgently!\n", dev->name);
125 return -1;
126 }
127 __get_cpu_var(netdev_rx_stat).cpu_collision++;
128 goto requeue;
129 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700130 }
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900131
Linus Torvalds1da177e2005-04-16 15:20:36 -0700132 {
133 /* And release queue */
134 spin_unlock(&dev->queue_lock);
135
136 if (!netif_queue_stopped(dev)) {
137 int ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700138
Herbert Xuf6a78bf2006-06-22 02:57:17 -0700139 ret = dev_hard_start_xmit(skb, dev);
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900140 if (ret == NETDEV_TX_OK) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700141 if (!nolock) {
Herbert Xu932ff272006-06-09 12:20:56 -0700142 netif_tx_unlock(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700143 }
144 spin_lock(&dev->queue_lock);
145 return -1;
146 }
147 if (ret == NETDEV_TX_LOCKED && nolock) {
148 spin_lock(&dev->queue_lock);
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900149 goto collision;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700150 }
151 }
152
153 /* NETDEV_TX_BUSY - we need to requeue */
154 /* Release the driver */
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900155 if (!nolock) {
Herbert Xu932ff272006-06-09 12:20:56 -0700156 netif_tx_unlock(dev);
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900157 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700158 spin_lock(&dev->queue_lock);
159 q = dev->qdisc;
160 }
161
162 /* Device kicked us out :(
163 This is possible in three cases:
164
165 0. driver is locked
166 1. fastroute is enabled
167 2. device cannot determine busy state
168 before start of transmission (f.e. dialout)
169 3. device is buggy (ppp)
170 */
171
172requeue:
Herbert Xuf6a78bf2006-06-22 02:57:17 -0700173 if (skb->next)
174 dev->gso_skb = skb;
175 else
176 q->ops->requeue(skb, q);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700177 netif_schedule(dev);
178 return 1;
179 }
Stephen Hemminger8cbe1d42005-05-03 16:24:03 -0700180 BUG_ON((int) q->q.qlen < 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700181 return q->q.qlen;
182}
183
Herbert Xu48d83322006-06-19 23:57:59 -0700184void __qdisc_run(struct net_device *dev)
185{
Herbert Xud4828d82006-06-22 02:28:18 -0700186 if (unlikely(dev->qdisc == &noop_qdisc))
187 goto out;
188
Herbert Xu48d83322006-06-19 23:57:59 -0700189 while (qdisc_restart(dev) < 0 && !netif_queue_stopped(dev))
190 /* NOTHING */;
191
Herbert Xud4828d82006-06-22 02:28:18 -0700192out:
Herbert Xu48d83322006-06-19 23:57:59 -0700193 clear_bit(__LINK_STATE_QDISC_RUNNING, &dev->state);
194}
195
Linus Torvalds1da177e2005-04-16 15:20:36 -0700196static void dev_watchdog(unsigned long arg)
197{
198 struct net_device *dev = (struct net_device *)arg;
199
Herbert Xu932ff272006-06-09 12:20:56 -0700200 netif_tx_lock(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700201 if (dev->qdisc != &noop_qdisc) {
202 if (netif_device_present(dev) &&
203 netif_running(dev) &&
204 netif_carrier_ok(dev)) {
205 if (netif_queue_stopped(dev) &&
Stephen Hemminger338f7562006-05-16 15:02:12 -0700206 time_after(jiffies, dev->trans_start + dev->watchdog_timeo)) {
207
208 printk(KERN_INFO "NETDEV WATCHDOG: %s: transmit timed out\n",
209 dev->name);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700210 dev->tx_timeout(dev);
211 }
Arjan van de Venf5a6e012007-02-05 17:59:51 -0800212 if (!mod_timer(&dev->watchdog_timer, round_jiffies(jiffies + dev->watchdog_timeo)))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700213 dev_hold(dev);
214 }
215 }
Herbert Xu932ff272006-06-09 12:20:56 -0700216 netif_tx_unlock(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700217
218 dev_put(dev);
219}
220
221static void dev_watchdog_init(struct net_device *dev)
222{
223 init_timer(&dev->watchdog_timer);
224 dev->watchdog_timer.data = (unsigned long)dev;
225 dev->watchdog_timer.function = dev_watchdog;
226}
227
228void __netdev_watchdog_up(struct net_device *dev)
229{
230 if (dev->tx_timeout) {
231 if (dev->watchdog_timeo <= 0)
232 dev->watchdog_timeo = 5*HZ;
233 if (!mod_timer(&dev->watchdog_timer, jiffies + dev->watchdog_timeo))
234 dev_hold(dev);
235 }
236}
237
238static void dev_watchdog_up(struct net_device *dev)
239{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700240 __netdev_watchdog_up(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700241}
242
243static void dev_watchdog_down(struct net_device *dev)
244{
Herbert Xu932ff272006-06-09 12:20:56 -0700245 netif_tx_lock_bh(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700246 if (del_timer(&dev->watchdog_timer))
Stephen Hemminger15333062006-03-20 22:32:28 -0800247 dev_put(dev);
Herbert Xu932ff272006-06-09 12:20:56 -0700248 netif_tx_unlock_bh(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700249}
250
Denis Vlasenko0a242ef2005-08-11 15:32:53 -0700251void netif_carrier_on(struct net_device *dev)
252{
253 if (test_and_clear_bit(__LINK_STATE_NOCARRIER, &dev->state))
254 linkwatch_fire_event(dev);
255 if (netif_running(dev))
256 __netdev_watchdog_up(dev);
257}
258
259void netif_carrier_off(struct net_device *dev)
260{
261 if (!test_and_set_bit(__LINK_STATE_NOCARRIER, &dev->state))
262 linkwatch_fire_event(dev);
263}
264
Linus Torvalds1da177e2005-04-16 15:20:36 -0700265/* "NOOP" scheduler: the best scheduler, recommended for all interfaces
266 under all circumstances. It is difficult to invent anything faster or
267 cheaper.
268 */
269
Thomas Graf94df1092005-06-18 22:59:08 -0700270static int noop_enqueue(struct sk_buff *skb, struct Qdisc * qdisc)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700271{
272 kfree_skb(skb);
273 return NET_XMIT_CN;
274}
275
Thomas Graf94df1092005-06-18 22:59:08 -0700276static struct sk_buff *noop_dequeue(struct Qdisc * qdisc)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700277{
278 return NULL;
279}
280
Thomas Graf94df1092005-06-18 22:59:08 -0700281static int noop_requeue(struct sk_buff *skb, struct Qdisc* qdisc)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700282{
283 if (net_ratelimit())
Thomas Graf94df1092005-06-18 22:59:08 -0700284 printk(KERN_DEBUG "%s deferred output. It is buggy.\n",
285 skb->dev->name);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700286 kfree_skb(skb);
287 return NET_XMIT_CN;
288}
289
290struct Qdisc_ops noop_qdisc_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700291 .id = "noop",
292 .priv_size = 0,
293 .enqueue = noop_enqueue,
294 .dequeue = noop_dequeue,
295 .requeue = noop_requeue,
296 .owner = THIS_MODULE,
297};
298
299struct Qdisc noop_qdisc = {
300 .enqueue = noop_enqueue,
301 .dequeue = noop_dequeue,
302 .flags = TCQ_F_BUILTIN,
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900303 .ops = &noop_qdisc_ops,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700304 .list = LIST_HEAD_INIT(noop_qdisc.list),
305};
306
307static struct Qdisc_ops noqueue_qdisc_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700308 .id = "noqueue",
309 .priv_size = 0,
310 .enqueue = noop_enqueue,
311 .dequeue = noop_dequeue,
312 .requeue = noop_requeue,
313 .owner = THIS_MODULE,
314};
315
316static struct Qdisc noqueue_qdisc = {
317 .enqueue = NULL,
318 .dequeue = noop_dequeue,
319 .flags = TCQ_F_BUILTIN,
320 .ops = &noqueue_qdisc_ops,
321 .list = LIST_HEAD_INIT(noqueue_qdisc.list),
322};
323
324
325static const u8 prio2band[TC_PRIO_MAX+1] =
326 { 1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1 };
327
328/* 3-band FIFO queue: old style, but should be a bit faster than
329 generic prio+fifo combination.
330 */
331
Thomas Graff87a9c32005-06-18 22:58:53 -0700332#define PFIFO_FAST_BANDS 3
333
Thomas Graf321090e2005-06-18 22:58:35 -0700334static inline struct sk_buff_head *prio2list(struct sk_buff *skb,
335 struct Qdisc *qdisc)
336{
337 struct sk_buff_head *list = qdisc_priv(qdisc);
338 return list + prio2band[skb->priority & TC_PRIO_MAX];
339}
340
Thomas Graff87a9c32005-06-18 22:58:53 -0700341static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700342{
Thomas Graf321090e2005-06-18 22:58:35 -0700343 struct sk_buff_head *list = prio2list(skb, qdisc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700344
Thomas Graf821d24a2005-06-18 22:58:15 -0700345 if (skb_queue_len(list) < qdisc->dev->tx_queue_len) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700346 qdisc->q.qlen++;
Thomas Graf821d24a2005-06-18 22:58:15 -0700347 return __qdisc_enqueue_tail(skb, qdisc, list);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700348 }
Thomas Graf821d24a2005-06-18 22:58:15 -0700349
350 return qdisc_drop(skb, qdisc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700351}
352
Thomas Graff87a9c32005-06-18 22:58:53 -0700353static struct sk_buff *pfifo_fast_dequeue(struct Qdisc* qdisc)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700354{
355 int prio;
356 struct sk_buff_head *list = qdisc_priv(qdisc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700357
Thomas Graf452f2992005-07-18 13:30:53 -0700358 for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) {
359 if (!skb_queue_empty(list + prio)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700360 qdisc->q.qlen--;
Thomas Graf452f2992005-07-18 13:30:53 -0700361 return __qdisc_dequeue_head(qdisc, list + prio);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700362 }
363 }
Thomas Graff87a9c32005-06-18 22:58:53 -0700364
Linus Torvalds1da177e2005-04-16 15:20:36 -0700365 return NULL;
366}
367
Thomas Graff87a9c32005-06-18 22:58:53 -0700368static int pfifo_fast_requeue(struct sk_buff *skb, struct Qdisc* qdisc)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700369{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700370 qdisc->q.qlen++;
Thomas Graf321090e2005-06-18 22:58:35 -0700371 return __qdisc_requeue(skb, qdisc, prio2list(skb, qdisc));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700372}
373
Thomas Graff87a9c32005-06-18 22:58:53 -0700374static void pfifo_fast_reset(struct Qdisc* qdisc)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700375{
376 int prio;
377 struct sk_buff_head *list = qdisc_priv(qdisc);
378
Thomas Graff87a9c32005-06-18 22:58:53 -0700379 for (prio = 0; prio < PFIFO_FAST_BANDS; prio++)
Thomas Graf821d24a2005-06-18 22:58:15 -0700380 __qdisc_reset_queue(qdisc, list + prio);
381
382 qdisc->qstats.backlog = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700383 qdisc->q.qlen = 0;
384}
385
386static int pfifo_fast_dump(struct Qdisc *qdisc, struct sk_buff *skb)
387{
Thomas Graff87a9c32005-06-18 22:58:53 -0700388 struct tc_prio_qopt opt = { .bands = PFIFO_FAST_BANDS };
Linus Torvalds1da177e2005-04-16 15:20:36 -0700389
Linus Torvalds1da177e2005-04-16 15:20:36 -0700390 memcpy(&opt.priomap, prio2band, TC_PRIO_MAX+1);
391 RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
392 return skb->len;
393
394rtattr_failure:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700395 return -1;
396}
397
398static int pfifo_fast_init(struct Qdisc *qdisc, struct rtattr *opt)
399{
Thomas Graff87a9c32005-06-18 22:58:53 -0700400 int prio;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700401 struct sk_buff_head *list = qdisc_priv(qdisc);
402
Thomas Graff87a9c32005-06-18 22:58:53 -0700403 for (prio = 0; prio < PFIFO_FAST_BANDS; prio++)
404 skb_queue_head_init(list + prio);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700405
406 return 0;
407}
408
409static struct Qdisc_ops pfifo_fast_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700410 .id = "pfifo_fast",
Thomas Graff87a9c32005-06-18 22:58:53 -0700411 .priv_size = PFIFO_FAST_BANDS * sizeof(struct sk_buff_head),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700412 .enqueue = pfifo_fast_enqueue,
413 .dequeue = pfifo_fast_dequeue,
414 .requeue = pfifo_fast_requeue,
415 .init = pfifo_fast_init,
416 .reset = pfifo_fast_reset,
417 .dump = pfifo_fast_dump,
418 .owner = THIS_MODULE,
419};
420
Thomas Graf3d54b822005-07-05 14:15:09 -0700421struct Qdisc *qdisc_alloc(struct net_device *dev, struct Qdisc_ops *ops)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700422{
423 void *p;
424 struct Qdisc *sch;
Thomas Graf3d54b822005-07-05 14:15:09 -0700425 unsigned int size;
426 int err = -ENOBUFS;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700427
428 /* ensure that the Qdisc and the private data are 32-byte aligned */
Thomas Graf3d54b822005-07-05 14:15:09 -0700429 size = QDISC_ALIGN(sizeof(*sch));
430 size += ops->priv_size + (QDISC_ALIGNTO - 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700431
Panagiotis Issaris0da974f2006-07-21 14:51:30 -0700432 p = kzalloc(size, GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700433 if (!p)
Thomas Graf3d54b822005-07-05 14:15:09 -0700434 goto errout;
Thomas Graf3d54b822005-07-05 14:15:09 -0700435 sch = (struct Qdisc *) QDISC_ALIGN((unsigned long) p);
436 sch->padded = (char *) sch - (char *) p;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700437
438 INIT_LIST_HEAD(&sch->list);
439 skb_queue_head_init(&sch->q);
440 sch->ops = ops;
441 sch->enqueue = ops->enqueue;
442 sch->dequeue = ops->dequeue;
443 sch->dev = dev;
444 dev_hold(dev);
445 sch->stats_lock = &dev->queue_lock;
446 atomic_set(&sch->refcnt, 1);
Thomas Graf3d54b822005-07-05 14:15:09 -0700447
448 return sch;
449errout:
450 return ERR_PTR(-err);
451}
452
Patrick McHardy9f9afec2006-11-29 17:35:18 -0800453struct Qdisc * qdisc_create_dflt(struct net_device *dev, struct Qdisc_ops *ops,
454 unsigned int parentid)
Thomas Graf3d54b822005-07-05 14:15:09 -0700455{
456 struct Qdisc *sch;
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900457
Thomas Graf3d54b822005-07-05 14:15:09 -0700458 sch = qdisc_alloc(dev, ops);
459 if (IS_ERR(sch))
460 goto errout;
Patrick McHardy9f9afec2006-11-29 17:35:18 -0800461 sch->parent = parentid;
Thomas Graf3d54b822005-07-05 14:15:09 -0700462
Linus Torvalds1da177e2005-04-16 15:20:36 -0700463 if (!ops->init || ops->init(sch, NULL) == 0)
464 return sch;
465
Thomas Graf0fbbeb12005-08-23 10:12:44 -0700466 qdisc_destroy(sch);
Thomas Graf3d54b822005-07-05 14:15:09 -0700467errout:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700468 return NULL;
469}
470
471/* Under dev->queue_lock and BH! */
472
473void qdisc_reset(struct Qdisc *qdisc)
474{
475 struct Qdisc_ops *ops = qdisc->ops;
476
477 if (ops->reset)
478 ops->reset(qdisc);
479}
480
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900481/* this is the rcu callback function to clean up a qdisc when there
Linus Torvalds1da177e2005-04-16 15:20:36 -0700482 * are no further references to it */
483
484static void __qdisc_destroy(struct rcu_head *head)
485{
486 struct Qdisc *qdisc = container_of(head, struct Qdisc, q_rcu);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700487 kfree((char *) qdisc - qdisc->padded);
488}
489
490/* Under dev->queue_lock and BH! */
491
492void qdisc_destroy(struct Qdisc *qdisc)
493{
Patrick McHardy85670cc2006-09-27 16:45:45 -0700494 struct Qdisc_ops *ops = qdisc->ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700495
496 if (qdisc->flags & TCQ_F_BUILTIN ||
Patrick McHardy85670cc2006-09-27 16:45:45 -0700497 !atomic_dec_and_test(&qdisc->refcnt))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700498 return;
499
Patrick McHardy85670cc2006-09-27 16:45:45 -0700500 list_del(&qdisc->list);
501#ifdef CONFIG_NET_ESTIMATOR
502 gen_kill_estimator(&qdisc->bstats, &qdisc->rate_est);
503#endif
504 if (ops->reset)
505 ops->reset(qdisc);
506 if (ops->destroy)
507 ops->destroy(qdisc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700508
Patrick McHardy85670cc2006-09-27 16:45:45 -0700509 module_put(ops->owner);
510 dev_put(qdisc->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700511 call_rcu(&qdisc->q_rcu, __qdisc_destroy);
512}
513
514void dev_activate(struct net_device *dev)
515{
516 /* No queueing discipline is attached to device;
517 create default one i.e. pfifo_fast for devices,
518 which need queueing and noqueue_qdisc for
519 virtual interfaces
520 */
521
522 if (dev->qdisc_sleeping == &noop_qdisc) {
523 struct Qdisc *qdisc;
524 if (dev->tx_queue_len) {
Patrick McHardy9f9afec2006-11-29 17:35:18 -0800525 qdisc = qdisc_create_dflt(dev, &pfifo_fast_ops,
526 TC_H_ROOT);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700527 if (qdisc == NULL) {
528 printk(KERN_INFO "%s: activation failed\n", dev->name);
529 return;
530 }
Patrick McHardy85670cc2006-09-27 16:45:45 -0700531 write_lock(&qdisc_tree_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700532 list_add_tail(&qdisc->list, &dev->qdisc_list);
Patrick McHardy85670cc2006-09-27 16:45:45 -0700533 write_unlock(&qdisc_tree_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700534 } else {
535 qdisc = &noqueue_qdisc;
536 }
Patrick McHardy85670cc2006-09-27 16:45:45 -0700537 write_lock(&qdisc_tree_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700538 dev->qdisc_sleeping = qdisc;
Patrick McHardy85670cc2006-09-27 16:45:45 -0700539 write_unlock(&qdisc_tree_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700540 }
541
Tommy S. Christensencacaddf2005-05-03 16:18:52 -0700542 if (!netif_carrier_ok(dev))
543 /* Delay activation until next carrier-on event */
544 return;
545
Linus Torvalds1da177e2005-04-16 15:20:36 -0700546 spin_lock_bh(&dev->queue_lock);
547 rcu_assign_pointer(dev->qdisc, dev->qdisc_sleeping);
548 if (dev->qdisc != &noqueue_qdisc) {
549 dev->trans_start = jiffies;
550 dev_watchdog_up(dev);
551 }
552 spin_unlock_bh(&dev->queue_lock);
553}
554
555void dev_deactivate(struct net_device *dev)
556{
557 struct Qdisc *qdisc;
558
559 spin_lock_bh(&dev->queue_lock);
560 qdisc = dev->qdisc;
561 dev->qdisc = &noop_qdisc;
562
563 qdisc_reset(qdisc);
564
565 spin_unlock_bh(&dev->queue_lock);
566
567 dev_watchdog_down(dev);
568
Herbert Xud4828d82006-06-22 02:28:18 -0700569 /* Wait for outstanding dev_queue_xmit calls. */
570 synchronize_rcu();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700571
Herbert Xud4828d82006-06-22 02:28:18 -0700572 /* Wait for outstanding qdisc_run calls. */
573 while (test_bit(__LINK_STATE_QDISC_RUNNING, &dev->state))
574 yield();
Herbert Xuf6a78bf2006-06-22 02:57:17 -0700575
576 if (dev->gso_skb) {
577 kfree_skb(dev->gso_skb);
578 dev->gso_skb = NULL;
579 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700580}
581
582void dev_init_scheduler(struct net_device *dev)
583{
584 qdisc_lock_tree(dev);
585 dev->qdisc = &noop_qdisc;
586 dev->qdisc_sleeping = &noop_qdisc;
587 INIT_LIST_HEAD(&dev->qdisc_list);
588 qdisc_unlock_tree(dev);
589
590 dev_watchdog_init(dev);
591}
592
593void dev_shutdown(struct net_device *dev)
594{
595 struct Qdisc *qdisc;
596
597 qdisc_lock_tree(dev);
598 qdisc = dev->qdisc_sleeping;
599 dev->qdisc = &noop_qdisc;
600 dev->qdisc_sleeping = &noop_qdisc;
601 qdisc_destroy(qdisc);
602#if defined(CONFIG_NET_SCH_INGRESS) || defined(CONFIG_NET_SCH_INGRESS_MODULE)
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900603 if ((qdisc = dev->qdisc_ingress) != NULL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700604 dev->qdisc_ingress = NULL;
605 qdisc_destroy(qdisc);
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900606 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700607#endif
608 BUG_TRAP(!timer_pending(&dev->watchdog_timer));
609 qdisc_unlock_tree(dev);
610}
611
Denis Vlasenko0a242ef2005-08-11 15:32:53 -0700612EXPORT_SYMBOL(netif_carrier_on);
613EXPORT_SYMBOL(netif_carrier_off);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700614EXPORT_SYMBOL(noop_qdisc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700615EXPORT_SYMBOL(qdisc_create_dflt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700616EXPORT_SYMBOL(qdisc_destroy);
617EXPORT_SYMBOL(qdisc_reset);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700618EXPORT_SYMBOL(qdisc_lock_tree);
619EXPORT_SYMBOL(qdisc_unlock_tree);