blob: 8ac05981be202ab5090c2c466e8855bcf7ff83e4 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/* net/sched/sch_teql.c "True" (or "trivial") link equalizer.
2 *
3 * This program is free software; you can redistribute it and/or
4 * modify it under the terms of the GNU General Public License
5 * as published by the Free Software Foundation; either version
6 * 2 of the License, or (at your option) any later version.
7 *
8 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
9 */
10
11#include <linux/module.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070012#include <linux/types.h>
13#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070014#include <linux/string.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070015#include <linux/errno.h>
Arnaldo Carvalho de Melo14c85022005-12-27 02:43:12 -020016#include <linux/if_arp.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070017#include <linux/netdevice.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070018#include <linux/init.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070019#include <linux/skbuff.h>
20#include <linux/moduleparam.h>
Patrick McHardy0ba48052007-07-02 22:49:07 -070021#include <net/dst.h>
22#include <net/neighbour.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070023#include <net/pkt_sched.h>
24
25/*
26 How to setup it.
27 ----------------
28
29 After loading this module you will find a new device teqlN
30 and new qdisc with the same name. To join a slave to the equalizer
31 you should just set this qdisc on a device f.e.
32
33 # tc qdisc add dev eth0 root teql0
34 # tc qdisc add dev eth1 root teql0
35
36 That's all. Full PnP 8)
37
38 Applicability.
39 --------------
40
41 1. Slave devices MUST be active devices, i.e., they must raise the tbusy
42 signal and generate EOI events. If you want to equalize virtual devices
43 like tunnels, use a normal eql device.
44 2. This device puts no limitations on physical slave characteristics
45 f.e. it will equalize 9600baud line and 100Mb ethernet perfectly :-)
46 Certainly, large difference in link speeds will make the resulting
47 eqalized link unusable, because of huge packet reordering.
48 I estimate an upper useful difference as ~10 times.
49 3. If the slave requires address resolution, only protocols using
50 neighbour cache (IPv4/IPv6) will work over the equalized link.
51 Other protocols are still allowed to use the slave device directly,
52 which will not break load balancing, though native slave
53 traffic will have the highest priority. */
54
55struct teql_master
56{
57 struct Qdisc_ops qops;
58 struct net_device *dev;
59 struct Qdisc *slaves;
60 struct list_head master_list;
61 struct net_device_stats stats;
62};
63
64struct teql_sched_data
65{
66 struct Qdisc *next;
67 struct teql_master *m;
68 struct neighbour *ncache;
69 struct sk_buff_head q;
70};
71
72#define NEXT_SLAVE(q) (((struct teql_sched_data*)qdisc_priv(q))->next)
73
Roel Kluincc8fd142008-01-31 17:08:47 -080074#define FMASK (IFF_BROADCAST|IFF_POINTOPOINT)
Linus Torvalds1da177e2005-04-16 15:20:36 -070075
76/* "teql*" qdisc routines */
77
78static int
79teql_enqueue(struct sk_buff *skb, struct Qdisc* sch)
80{
David S. Miller5ce2d482008-07-08 17:06:30 -070081 struct net_device *dev = qdisc_dev(sch);
Linus Torvalds1da177e2005-04-16 15:20:36 -070082 struct teql_sched_data *q = qdisc_priv(sch);
83
Krishna Kumar4cd8c9e2007-05-08 18:57:50 -070084 if (q->q.qlen < dev->tx_queue_len) {
85 __skb_queue_tail(&q->q, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -070086 sch->bstats.bytes += skb->len;
87 sch->bstats.packets++;
88 return 0;
89 }
90
Linus Torvalds1da177e2005-04-16 15:20:36 -070091 kfree_skb(skb);
92 sch->qstats.drops++;
93 return NET_XMIT_DROP;
94}
95
96static int
97teql_requeue(struct sk_buff *skb, struct Qdisc* sch)
98{
99 struct teql_sched_data *q = qdisc_priv(sch);
100
101 __skb_queue_head(&q->q, skb);
102 sch->qstats.requeues++;
103 return 0;
104}
105
106static struct sk_buff *
107teql_dequeue(struct Qdisc* sch)
108{
109 struct teql_sched_data *dat = qdisc_priv(sch);
David S. Millerb0e1e642008-07-08 17:42:10 -0700110 struct netdev_queue *dat_queue;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700111 struct sk_buff *skb;
112
113 skb = __skb_dequeue(&dat->q);
David S. Millerb0e1e642008-07-08 17:42:10 -0700114 dat_queue = &dat->m->dev->tx_queue;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700115 if (skb == NULL) {
David S. Millerb0e1e642008-07-08 17:42:10 -0700116 struct net_device *m = qdisc_dev(dat_queue->qdisc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700117 if (m) {
118 dat->m->slaves = sch;
119 netif_wake_queue(m);
120 }
121 }
David S. Millerb0e1e642008-07-08 17:42:10 -0700122 sch->q.qlen = dat->q.qlen + dat_queue->qdisc->q.qlen;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700123 return skb;
124}
125
126static __inline__ void
127teql_neigh_release(struct neighbour *n)
128{
129 if (n)
130 neigh_release(n);
131}
132
133static void
134teql_reset(struct Qdisc* sch)
135{
136 struct teql_sched_data *dat = qdisc_priv(sch);
137
138 skb_queue_purge(&dat->q);
139 sch->q.qlen = 0;
140 teql_neigh_release(xchg(&dat->ncache, NULL));
141}
142
143static void
144teql_destroy(struct Qdisc* sch)
145{
146 struct Qdisc *q, *prev;
147 struct teql_sched_data *dat = qdisc_priv(sch);
148 struct teql_master *master = dat->m;
149
150 if ((prev = master->slaves) != NULL) {
151 do {
152 q = NEXT_SLAVE(prev);
153 if (q == sch) {
154 NEXT_SLAVE(prev) = NEXT_SLAVE(q);
155 if (q == master->slaves) {
156 master->slaves = NEXT_SLAVE(q);
157 if (q == master->slaves) {
158 master->slaves = NULL;
David S. Millerdc2b4842008-07-08 17:18:23 -0700159 spin_lock_bh(&master->dev->tx_queue.lock);
David S. Millerb0e1e642008-07-08 17:42:10 -0700160 qdisc_reset(master->dev->tx_queue.qdisc);
David S. Millerdc2b4842008-07-08 17:18:23 -0700161 spin_unlock_bh(&master->dev->tx_queue.lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700162 }
163 }
164 skb_queue_purge(&dat->q);
165 teql_neigh_release(xchg(&dat->ncache, NULL));
166 break;
167 }
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900168
Linus Torvalds1da177e2005-04-16 15:20:36 -0700169 } while ((prev = q) != master->slaves);
170 }
171}
172
Patrick McHardy1e904742008-01-22 22:11:17 -0800173static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700174{
David S. Miller5ce2d482008-07-08 17:06:30 -0700175 struct net_device *dev = qdisc_dev(sch);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700176 struct teql_master *m = (struct teql_master*)sch->ops;
177 struct teql_sched_data *q = qdisc_priv(sch);
178
179 if (dev->hard_header_len > m->dev->hard_header_len)
180 return -EINVAL;
181
182 if (m->dev == dev)
183 return -ELOOP;
184
185 q->m = m;
186
187 skb_queue_head_init(&q->q);
188
189 if (m->slaves) {
190 if (m->dev->flags & IFF_UP) {
191 if ((m->dev->flags&IFF_POINTOPOINT && !(dev->flags&IFF_POINTOPOINT))
192 || (m->dev->flags&IFF_BROADCAST && !(dev->flags&IFF_BROADCAST))
193 || (m->dev->flags&IFF_MULTICAST && !(dev->flags&IFF_MULTICAST))
194 || dev->mtu < m->dev->mtu)
195 return -EINVAL;
196 } else {
197 if (!(dev->flags&IFF_POINTOPOINT))
198 m->dev->flags &= ~IFF_POINTOPOINT;
199 if (!(dev->flags&IFF_BROADCAST))
200 m->dev->flags &= ~IFF_BROADCAST;
201 if (!(dev->flags&IFF_MULTICAST))
202 m->dev->flags &= ~IFF_MULTICAST;
203 if (dev->mtu < m->dev->mtu)
204 m->dev->mtu = dev->mtu;
205 }
206 q->next = NEXT_SLAVE(m->slaves);
207 NEXT_SLAVE(m->slaves) = sch;
208 } else {
209 q->next = sch;
210 m->slaves = sch;
211 m->dev->mtu = dev->mtu;
212 m->dev->flags = (m->dev->flags&~FMASK)|(dev->flags&FMASK);
213 }
214 return 0;
215}
216
Linus Torvalds1da177e2005-04-16 15:20:36 -0700217
218static int
219__teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *dev)
220{
David S. Millerb0e1e642008-07-08 17:42:10 -0700221 struct teql_sched_data *q = qdisc_priv(dev->tx_queue.qdisc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700222 struct neighbour *mn = skb->dst->neighbour;
223 struct neighbour *n = q->ncache;
224
225 if (mn->tbl == NULL)
226 return -EINVAL;
227 if (n && n->tbl == mn->tbl &&
228 memcmp(n->primary_key, mn->primary_key, mn->tbl->key_len) == 0) {
229 atomic_inc(&n->refcnt);
230 } else {
231 n = __neigh_lookup_errno(mn->tbl, mn->primary_key, dev);
232 if (IS_ERR(n))
233 return PTR_ERR(n);
234 }
235 if (neigh_event_send(n, skb_res) == 0) {
236 int err;
Stephen Hemminger0c4e8582007-10-09 01:36:32 -0700237
Linus Torvalds1da177e2005-04-16 15:20:36 -0700238 read_lock(&n->lock);
Stephen Hemminger0c4e8582007-10-09 01:36:32 -0700239 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
240 n->ha, NULL, skb->len);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700241 read_unlock(&n->lock);
Stephen Hemminger0c4e8582007-10-09 01:36:32 -0700242
Linus Torvalds1da177e2005-04-16 15:20:36 -0700243 if (err < 0) {
244 neigh_release(n);
245 return -EINVAL;
246 }
247 teql_neigh_release(xchg(&q->ncache, n));
248 return 0;
249 }
250 neigh_release(n);
251 return (skb_res == NULL) ? -EAGAIN : 1;
252}
253
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -0700254static inline int teql_resolve(struct sk_buff *skb,
255 struct sk_buff *skb_res, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700256{
David S. Millerb0e1e642008-07-08 17:42:10 -0700257 if (dev->tx_queue.qdisc == &noop_qdisc)
Evgeniy Polyakov4f9f8312007-11-06 03:08:09 -0800258 return -ENODEV;
259
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -0700260 if (dev->header_ops == NULL ||
Linus Torvalds1da177e2005-04-16 15:20:36 -0700261 skb->dst == NULL ||
262 skb->dst->neighbour == NULL)
263 return 0;
264 return __teql_resolve(skb, skb_res, dev);
265}
266
267static int teql_master_xmit(struct sk_buff *skb, struct net_device *dev)
268{
Patrick McHardy2941a482006-01-08 22:05:26 -0800269 struct teql_master *master = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700270 struct Qdisc *start, *q;
271 int busy;
272 int nores;
273 int len = skb->len;
Pavel Emelyanov4e3ab472007-10-21 17:01:29 -0700274 int subq = skb_get_queue_mapping(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700275 struct sk_buff *skb_res = NULL;
276
277 start = master->slaves;
278
279restart:
280 nores = 0;
281 busy = 0;
282
283 if ((q = start) == NULL)
284 goto drop;
285
286 do {
David S. Miller5ce2d482008-07-08 17:06:30 -0700287 struct net_device *slave = qdisc_dev(q);
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900288
David S. Millerb0e1e642008-07-08 17:42:10 -0700289 if (slave->tx_queue.qdisc_sleeping != q)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700290 continue;
Peter P Waskiewicz Jrf25f4e42007-07-06 13:36:20 -0700291 if (netif_queue_stopped(slave) ||
Pavel Emelyanov668f8952007-10-21 17:01:56 -0700292 __netif_subqueue_stopped(slave, subq) ||
Peter P Waskiewicz Jrf25f4e42007-07-06 13:36:20 -0700293 !netif_running(slave)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700294 busy = 1;
295 continue;
296 }
297
298 switch (teql_resolve(skb, skb_res, slave)) {
299 case 0:
Herbert Xu932ff272006-06-09 12:20:56 -0700300 if (netif_tx_trylock(slave)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700301 if (!netif_queue_stopped(slave) &&
Pavel Emelyanov668f8952007-10-21 17:01:56 -0700302 !__netif_subqueue_stopped(slave, subq) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -0700303 slave->hard_start_xmit(skb, slave) == 0) {
Herbert Xu932ff272006-06-09 12:20:56 -0700304 netif_tx_unlock(slave);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700305 master->slaves = NEXT_SLAVE(q);
306 netif_wake_queue(dev);
307 master->stats.tx_packets++;
308 master->stats.tx_bytes += len;
309 return 0;
310 }
Herbert Xu932ff272006-06-09 12:20:56 -0700311 netif_tx_unlock(slave);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700312 }
313 if (netif_queue_stopped(dev))
314 busy = 1;
315 break;
316 case 1:
317 master->slaves = NEXT_SLAVE(q);
318 return 0;
319 default:
320 nores = 1;
321 break;
322 }
Arnaldo Carvalho de Melobbe735e2007-03-10 22:16:10 -0300323 __skb_pull(skb, skb_network_offset(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700324 } while ((q = NEXT_SLAVE(q)) != start);
325
326 if (nores && skb_res == NULL) {
327 skb_res = skb;
328 goto restart;
329 }
330
331 if (busy) {
332 netif_stop_queue(dev);
333 return 1;
334 }
335 master->stats.tx_errors++;
336
337drop:
338 master->stats.tx_dropped++;
339 dev_kfree_skb(skb);
340 return 0;
341}
342
343static int teql_master_open(struct net_device *dev)
344{
345 struct Qdisc * q;
Patrick McHardy2941a482006-01-08 22:05:26 -0800346 struct teql_master *m = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700347 int mtu = 0xFFFE;
348 unsigned flags = IFF_NOARP|IFF_MULTICAST;
349
350 if (m->slaves == NULL)
351 return -EUNATCH;
352
353 flags = FMASK;
354
355 q = m->slaves;
356 do {
David S. Miller5ce2d482008-07-08 17:06:30 -0700357 struct net_device *slave = qdisc_dev(q);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700358
359 if (slave == NULL)
360 return -EUNATCH;
361
362 if (slave->mtu < mtu)
363 mtu = slave->mtu;
364 if (slave->hard_header_len > LL_MAX_HEADER)
365 return -EINVAL;
366
367 /* If all the slaves are BROADCAST, master is BROADCAST
368 If all the slaves are PtP, master is PtP
369 Otherwise, master is NBMA.
370 */
371 if (!(slave->flags&IFF_POINTOPOINT))
372 flags &= ~IFF_POINTOPOINT;
373 if (!(slave->flags&IFF_BROADCAST))
374 flags &= ~IFF_BROADCAST;
375 if (!(slave->flags&IFF_MULTICAST))
376 flags &= ~IFF_MULTICAST;
377 } while ((q = NEXT_SLAVE(q)) != m->slaves);
378
379 m->dev->mtu = mtu;
380 m->dev->flags = (m->dev->flags&~FMASK) | flags;
381 netif_start_queue(m->dev);
382 return 0;
383}
384
385static int teql_master_close(struct net_device *dev)
386{
387 netif_stop_queue(dev);
388 return 0;
389}
390
391static struct net_device_stats *teql_master_stats(struct net_device *dev)
392{
Patrick McHardy2941a482006-01-08 22:05:26 -0800393 struct teql_master *m = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700394 return &m->stats;
395}
396
397static int teql_master_mtu(struct net_device *dev, int new_mtu)
398{
Patrick McHardy2941a482006-01-08 22:05:26 -0800399 struct teql_master *m = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700400 struct Qdisc *q;
401
402 if (new_mtu < 68)
403 return -EINVAL;
404
405 q = m->slaves;
406 if (q) {
407 do {
David S. Miller5ce2d482008-07-08 17:06:30 -0700408 if (new_mtu > qdisc_dev(q)->mtu)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700409 return -EINVAL;
410 } while ((q=NEXT_SLAVE(q)) != m->slaves);
411 }
412
413 dev->mtu = new_mtu;
414 return 0;
415}
416
417static __init void teql_master_setup(struct net_device *dev)
418{
Patrick McHardy2941a482006-01-08 22:05:26 -0800419 struct teql_master *master = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700420 struct Qdisc_ops *ops = &master->qops;
421
422 master->dev = dev;
423 ops->priv_size = sizeof(struct teql_sched_data);
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900424
Linus Torvalds1da177e2005-04-16 15:20:36 -0700425 ops->enqueue = teql_enqueue;
426 ops->dequeue = teql_dequeue;
427 ops->requeue = teql_requeue;
428 ops->init = teql_qdisc_init;
429 ops->reset = teql_reset;
430 ops->destroy = teql_destroy;
431 ops->owner = THIS_MODULE;
432
433 dev->open = teql_master_open;
434 dev->hard_start_xmit = teql_master_xmit;
435 dev->stop = teql_master_close;
436 dev->get_stats = teql_master_stats;
437 dev->change_mtu = teql_master_mtu;
438 dev->type = ARPHRD_VOID;
439 dev->mtu = 1500;
440 dev->tx_queue_len = 100;
441 dev->flags = IFF_NOARP;
442 dev->hard_header_len = LL_MAX_HEADER;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700443}
444
445static LIST_HEAD(master_dev_list);
446static int max_equalizers = 1;
447module_param(max_equalizers, int, 0);
448MODULE_PARM_DESC(max_equalizers, "Max number of link equalizers");
449
450static int __init teql_init(void)
451{
452 int i;
453 int err = -ENODEV;
454
455 for (i = 0; i < max_equalizers; i++) {
456 struct net_device *dev;
457 struct teql_master *master;
458
459 dev = alloc_netdev(sizeof(struct teql_master),
460 "teql%d", teql_master_setup);
461 if (!dev) {
462 err = -ENOMEM;
463 break;
464 }
465
466 if ((err = register_netdev(dev))) {
467 free_netdev(dev);
468 break;
469 }
470
Patrick McHardy2941a482006-01-08 22:05:26 -0800471 master = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700472
473 strlcpy(master->qops.id, dev->name, IFNAMSIZ);
474 err = register_qdisc(&master->qops);
475
476 if (err) {
477 unregister_netdev(dev);
478 free_netdev(dev);
479 break;
480 }
481
482 list_add_tail(&master->master_list, &master_dev_list);
483 }
484 return i ? 0 : err;
485}
486
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900487static void __exit teql_exit(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700488{
489 struct teql_master *master, *nxt;
490
491 list_for_each_entry_safe(master, nxt, &master_dev_list, master_list) {
492
493 list_del(&master->master_list);
494
495 unregister_qdisc(&master->qops);
496 unregister_netdev(master->dev);
497 free_netdev(master->dev);
498 }
499}
500
501module_init(teql_init);
502module_exit(teql_exit);
503
504MODULE_LICENSE("GPL");