blob: 94ede696da2a5f8b700543e19b5a85f345400eef [file] [log] [blame]
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001/*
2 * Linux IPv6 multicast routing support for BSD pim6sd
3 * Based on net/ipv4/ipmr.c.
4 *
5 * (c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
6 * LSIIT Laboratory, Strasbourg, France
7 * (c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
8 * 6WIND, Paris, France
9 * Copyright (C)2007,2008 USAGI/WIDE Project
10 * YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 *
17 */
18
19#include <asm/system.h>
20#include <asm/uaccess.h>
21#include <linux/types.h>
22#include <linux/sched.h>
23#include <linux/errno.h>
24#include <linux/timer.h>
25#include <linux/mm.h>
26#include <linux/kernel.h>
27#include <linux/fcntl.h>
28#include <linux/stat.h>
29#include <linux/socket.h>
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +090030#include <linux/inet.h>
31#include <linux/netdevice.h>
32#include <linux/inetdevice.h>
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +090033#include <linux/proc_fs.h>
34#include <linux/seq_file.h>
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +090035#include <linux/init.h>
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +090036#include <net/protocol.h>
37#include <linux/skbuff.h>
38#include <net/sock.h>
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +090039#include <net/raw.h>
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +090040#include <linux/notifier.h>
41#include <linux/if_arp.h>
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +090042#include <net/checksum.h>
43#include <net/netlink.h>
44
45#include <net/ipv6.h>
46#include <net/ip6_route.h>
47#include <linux/mroute6.h>
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +090048#include <linux/pim.h>
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +090049#include <net/addrconf.h>
50#include <linux/netfilter_ipv6.h>
51
52struct sock *mroute6_socket;
53
54
55/* Big lock, protecting vif table, mrt cache and mroute socket state.
56 Note that the changes are semaphored via rtnl_lock.
57 */
58
59static DEFINE_RWLOCK(mrt_lock);
60
61/*
62 * Multicast router control variables
63 */
64
65static struct mif_device vif6_table[MAXMIFS]; /* Devices */
66static int maxvif;
67
68#define MIF_EXISTS(idx) (vif6_table[idx].dev != NULL)
69
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +090070static int mroute_do_assert; /* Set in PIM assert */
71#ifdef CONFIG_IPV6_PIMSM_V2
72static int mroute_do_pim;
73#else
74#define mroute_do_pim 0
75#endif
76
Rami Rosen6ac7eb02008-04-10 12:40:10 +030077static struct mfc6_cache *mfc6_cache_array[MFC6_LINES]; /* Forwarding cache */
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +090078
79static struct mfc6_cache *mfc_unres_queue; /* Queue of unresolved entries */
80static atomic_t cache_resolve_queue_len; /* Size of unresolved */
81
82/* Special spinlock for queue of unresolved entries */
83static DEFINE_SPINLOCK(mfc_unres_lock);
84
85/* We return to original Alan's scheme. Hash table of resolved
86 entries is changed only in process context and protected
87 with weak lock mrt_lock. Queue of unresolved entries is protected
88 with strong spinlock mfc_unres_lock.
89
90 In this case data path is free of exclusive locks at all.
91 */
92
93static struct kmem_cache *mrt_cachep __read_mostly;
94
95static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache);
Rami Rosen6ac7eb02008-04-10 12:40:10 +030096static int ip6mr_cache_report(struct sk_buff *pkt, mifi_t mifi, int assert);
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +090097static int ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm);
98
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +090099#ifdef CONFIG_IPV6_PIMSM_V2
100static struct inet6_protocol pim6_protocol;
101#endif
102
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900103static struct timer_list ipmr_expire_timer;
104
105
106#ifdef CONFIG_PROC_FS
107
108struct ipmr_mfc_iter {
109 struct mfc6_cache **cache;
110 int ct;
111};
112
113
114static struct mfc6_cache *ipmr_mfc_seq_idx(struct ipmr_mfc_iter *it, loff_t pos)
115{
116 struct mfc6_cache *mfc;
117
118 it->cache = mfc6_cache_array;
119 read_lock(&mrt_lock);
120 for (it->ct = 0; it->ct < ARRAY_SIZE(mfc6_cache_array); it->ct++)
121 for (mfc = mfc6_cache_array[it->ct]; mfc; mfc = mfc->next)
122 if (pos-- == 0)
123 return mfc;
124 read_unlock(&mrt_lock);
125
126 it->cache = &mfc_unres_queue;
127 spin_lock_bh(&mfc_unres_lock);
128 for (mfc = mfc_unres_queue; mfc; mfc = mfc->next)
129 if (pos-- == 0)
130 return mfc;
131 spin_unlock_bh(&mfc_unres_lock);
132
133 it->cache = NULL;
134 return NULL;
135}
136
137
138
139
140/*
141 * The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif
142 */
143
144struct ipmr_vif_iter {
145 int ct;
146};
147
148static struct mif_device *ip6mr_vif_seq_idx(struct ipmr_vif_iter *iter,
149 loff_t pos)
150{
151 for (iter->ct = 0; iter->ct < maxvif; ++iter->ct) {
152 if (!MIF_EXISTS(iter->ct))
153 continue;
154 if (pos-- == 0)
155 return &vif6_table[iter->ct];
156 }
157 return NULL;
158}
159
160static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
161 __acquires(mrt_lock)
162{
163 read_lock(&mrt_lock);
164 return (*pos ? ip6mr_vif_seq_idx(seq->private, *pos - 1)
165 : SEQ_START_TOKEN);
166}
167
168static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
169{
170 struct ipmr_vif_iter *iter = seq->private;
171
172 ++*pos;
173 if (v == SEQ_START_TOKEN)
174 return ip6mr_vif_seq_idx(iter, 0);
175
176 while (++iter->ct < maxvif) {
177 if (!MIF_EXISTS(iter->ct))
178 continue;
179 return &vif6_table[iter->ct];
180 }
181 return NULL;
182}
183
184static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
185 __releases(mrt_lock)
186{
187 read_unlock(&mrt_lock);
188}
189
190static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
191{
192 if (v == SEQ_START_TOKEN) {
193 seq_puts(seq,
194 "Interface BytesIn PktsIn BytesOut PktsOut Flags\n");
195 } else {
196 const struct mif_device *vif = v;
197 const char *name = vif->dev ? vif->dev->name : "none";
198
199 seq_printf(seq,
200 "%2Zd %-10s %8ld %7ld %8ld %7ld %05X\n",
201 vif - vif6_table,
202 name, vif->bytes_in, vif->pkt_in,
203 vif->bytes_out, vif->pkt_out,
204 vif->flags);
205 }
206 return 0;
207}
208
209static struct seq_operations ip6mr_vif_seq_ops = {
210 .start = ip6mr_vif_seq_start,
211 .next = ip6mr_vif_seq_next,
212 .stop = ip6mr_vif_seq_stop,
213 .show = ip6mr_vif_seq_show,
214};
215
216static int ip6mr_vif_open(struct inode *inode, struct file *file)
217{
218 return seq_open_private(file, &ip6mr_vif_seq_ops,
219 sizeof(struct ipmr_vif_iter));
220}
221
222static struct file_operations ip6mr_vif_fops = {
223 .owner = THIS_MODULE,
224 .open = ip6mr_vif_open,
225 .read = seq_read,
226 .llseek = seq_lseek,
227 .release = seq_release,
228};
229
230static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
231{
232 return (*pos ? ipmr_mfc_seq_idx(seq->private, *pos - 1)
233 : SEQ_START_TOKEN);
234}
235
236static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
237{
238 struct mfc6_cache *mfc = v;
239 struct ipmr_mfc_iter *it = seq->private;
240
241 ++*pos;
242
243 if (v == SEQ_START_TOKEN)
244 return ipmr_mfc_seq_idx(seq->private, 0);
245
246 if (mfc->next)
247 return mfc->next;
248
249 if (it->cache == &mfc_unres_queue)
250 goto end_of_list;
251
252 BUG_ON(it->cache != mfc6_cache_array);
253
254 while (++it->ct < ARRAY_SIZE(mfc6_cache_array)) {
255 mfc = mfc6_cache_array[it->ct];
256 if (mfc)
257 return mfc;
258 }
259
260 /* exhausted cache_array, show unresolved */
261 read_unlock(&mrt_lock);
262 it->cache = &mfc_unres_queue;
263 it->ct = 0;
264
265 spin_lock_bh(&mfc_unres_lock);
266 mfc = mfc_unres_queue;
267 if (mfc)
268 return mfc;
269
270 end_of_list:
271 spin_unlock_bh(&mfc_unres_lock);
272 it->cache = NULL;
273
274 return NULL;
275}
276
277static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
278{
279 struct ipmr_mfc_iter *it = seq->private;
280
281 if (it->cache == &mfc_unres_queue)
282 spin_unlock_bh(&mfc_unres_lock);
283 else if (it->cache == mfc6_cache_array)
284 read_unlock(&mrt_lock);
285}
286
287static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
288{
289 int n;
290
291 if (v == SEQ_START_TOKEN) {
292 seq_puts(seq,
293 "Group "
294 "Origin "
295 "Iif Pkts Bytes Wrong Oifs\n");
296 } else {
297 const struct mfc6_cache *mfc = v;
298 const struct ipmr_mfc_iter *it = seq->private;
299
300 seq_printf(seq,
301 NIP6_FMT " " NIP6_FMT " %-3d %8ld %8ld %8ld",
302 NIP6(mfc->mf6c_mcastgrp), NIP6(mfc->mf6c_origin),
303 mfc->mf6c_parent,
304 mfc->mfc_un.res.pkt,
305 mfc->mfc_un.res.bytes,
306 mfc->mfc_un.res.wrong_if);
307
308 if (it->cache != &mfc_unres_queue) {
309 for (n = mfc->mfc_un.res.minvif;
310 n < mfc->mfc_un.res.maxvif; n++) {
311 if (MIF_EXISTS(n) &&
312 mfc->mfc_un.res.ttls[n] < 255)
313 seq_printf(seq,
314 " %2d:%-3d",
315 n, mfc->mfc_un.res.ttls[n]);
316 }
317 }
318 seq_putc(seq, '\n');
319 }
320 return 0;
321}
322
323static struct seq_operations ipmr_mfc_seq_ops = {
324 .start = ipmr_mfc_seq_start,
325 .next = ipmr_mfc_seq_next,
326 .stop = ipmr_mfc_seq_stop,
327 .show = ipmr_mfc_seq_show,
328};
329
330static int ipmr_mfc_open(struct inode *inode, struct file *file)
331{
332 return seq_open_private(file, &ipmr_mfc_seq_ops,
333 sizeof(struct ipmr_mfc_iter));
334}
335
336static struct file_operations ip6mr_mfc_fops = {
337 .owner = THIS_MODULE,
338 .open = ipmr_mfc_open,
339 .read = seq_read,
340 .llseek = seq_lseek,
341 .release = seq_release,
342};
343#endif
344
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +0900345#ifdef CONFIG_IPV6_PIMSM_V2
346static int reg_vif_num = -1;
347
348static int pim6_rcv(struct sk_buff *skb)
349{
350 struct pimreghdr *pim;
351 struct ipv6hdr *encap;
352 struct net_device *reg_dev = NULL;
353
354 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
355 goto drop;
356
357 pim = (struct pimreghdr *)skb_transport_header(skb);
358 if (pim->type != ((PIM_VERSION << 4) | PIM_REGISTER) ||
359 (pim->flags & PIM_NULL_REGISTER) ||
360 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
361 (u16)csum_fold(skb_checksum(skb, 0, skb->len, 0))))
362 goto drop;
363
364 /* check if the inner packet is destined to mcast group */
365 encap = (struct ipv6hdr *)(skb_transport_header(skb) +
366 sizeof(*pim));
367
368 if (!ipv6_addr_is_multicast(&encap->daddr) ||
369 encap->payload_len == 0 ||
370 ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
371 goto drop;
372
373 read_lock(&mrt_lock);
374 if (reg_vif_num >= 0)
375 reg_dev = vif6_table[reg_vif_num].dev;
376 if (reg_dev)
377 dev_hold(reg_dev);
378 read_unlock(&mrt_lock);
379
380 if (reg_dev == NULL)
381 goto drop;
382
383 skb->mac_header = skb->network_header;
384 skb_pull(skb, (u8 *)encap - skb->data);
385 skb_reset_network_header(skb);
386 skb->dev = reg_dev;
387 skb->protocol = htons(ETH_P_IP);
388 skb->ip_summed = 0;
389 skb->pkt_type = PACKET_HOST;
390 dst_release(skb->dst);
391 ((struct net_device_stats *)netdev_priv(reg_dev))->rx_bytes += skb->len;
392 ((struct net_device_stats *)netdev_priv(reg_dev))->rx_packets++;
393 skb->dst = NULL;
394 nf_reset(skb);
395 netif_rx(skb);
396 dev_put(reg_dev);
397 return 0;
398 drop:
399 kfree_skb(skb);
400 return 0;
401}
402
403static struct inet6_protocol pim6_protocol = {
404 .handler = pim6_rcv,
405};
406
407/* Service routines creating virtual interfaces: PIMREG */
408
409static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
410{
411 read_lock(&mrt_lock);
412 ((struct net_device_stats *)netdev_priv(dev))->tx_bytes += skb->len;
413 ((struct net_device_stats *)netdev_priv(dev))->tx_packets++;
414 ip6mr_cache_report(skb, reg_vif_num, MRT6MSG_WHOLEPKT);
415 read_unlock(&mrt_lock);
416 kfree_skb(skb);
417 return 0;
418}
419
420static struct net_device_stats *reg_vif_get_stats(struct net_device *dev)
421{
422 return (struct net_device_stats *)netdev_priv(dev);
423}
424
425static void reg_vif_setup(struct net_device *dev)
426{
427 dev->type = ARPHRD_PIMREG;
428 dev->mtu = 1500 - sizeof(struct ipv6hdr) - 8;
429 dev->flags = IFF_NOARP;
430 dev->hard_start_xmit = reg_vif_xmit;
431 dev->get_stats = reg_vif_get_stats;
432 dev->destructor = free_netdev;
433}
434
435static struct net_device *ip6mr_reg_vif(void)
436{
437 struct net_device *dev;
438 struct inet6_dev *in_dev;
439
440 dev = alloc_netdev(sizeof(struct net_device_stats), "pim6reg",
441 reg_vif_setup);
442
443 if (dev == NULL)
444 return NULL;
445
446 if (register_netdevice(dev)) {
447 free_netdev(dev);
448 return NULL;
449 }
450 dev->iflink = 0;
451
452 in_dev = ipv6_find_idev(dev);
453 if (!in_dev)
454 goto failure;
455
456 if (dev_open(dev))
457 goto failure;
458
459 return dev;
460
461failure:
462 /* allow the register to be completed before unregistering. */
463 rtnl_unlock();
464 rtnl_lock();
465
466 unregister_netdevice(dev);
467 return NULL;
468}
469#endif
470
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900471/*
472 * Delete a VIF entry
473 */
474
475static int mif6_delete(int vifi)
476{
477 struct mif_device *v;
478 struct net_device *dev;
479 if (vifi < 0 || vifi >= maxvif)
480 return -EADDRNOTAVAIL;
481
482 v = &vif6_table[vifi];
483
484 write_lock_bh(&mrt_lock);
485 dev = v->dev;
486 v->dev = NULL;
487
488 if (!dev) {
489 write_unlock_bh(&mrt_lock);
490 return -EADDRNOTAVAIL;
491 }
492
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +0900493#ifdef CONFIG_IPV6_PIMSM_V2
494 if (vifi == reg_vif_num)
495 reg_vif_num = -1;
496#endif
497
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900498 if (vifi + 1 == maxvif) {
499 int tmp;
500 for (tmp = vifi - 1; tmp >= 0; tmp--) {
501 if (MIF_EXISTS(tmp))
502 break;
503 }
504 maxvif = tmp + 1;
505 }
506
507 write_unlock_bh(&mrt_lock);
508
509 dev_set_allmulti(dev, -1);
510
511 if (v->flags & MIFF_REGISTER)
512 unregister_netdevice(dev);
513
514 dev_put(dev);
515 return 0;
516}
517
518/* Destroy an unresolved cache entry, killing queued skbs
519 and reporting error to netlink readers.
520 */
521
522static void ip6mr_destroy_unres(struct mfc6_cache *c)
523{
524 struct sk_buff *skb;
525
526 atomic_dec(&cache_resolve_queue_len);
527
528 while((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) {
529 if (ipv6_hdr(skb)->version == 0) {
530 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
531 nlh->nlmsg_type = NLMSG_ERROR;
532 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
533 skb_trim(skb, nlh->nlmsg_len);
534 ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -ETIMEDOUT;
535 rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid);
536 } else
537 kfree_skb(skb);
538 }
539
540 kmem_cache_free(mrt_cachep, c);
541}
542
543
544/* Single timer process for all the unresolved queue. */
545
546static void ipmr_do_expire_process(unsigned long dummy)
547{
548 unsigned long now = jiffies;
549 unsigned long expires = 10 * HZ;
550 struct mfc6_cache *c, **cp;
551
552 cp = &mfc_unres_queue;
553
554 while ((c = *cp) != NULL) {
555 if (time_after(c->mfc_un.unres.expires, now)) {
556 /* not yet... */
557 unsigned long interval = c->mfc_un.unres.expires - now;
558 if (interval < expires)
559 expires = interval;
560 cp = &c->next;
561 continue;
562 }
563
564 *cp = c->next;
565 ip6mr_destroy_unres(c);
566 }
567
568 if (atomic_read(&cache_resolve_queue_len))
569 mod_timer(&ipmr_expire_timer, jiffies + expires);
570}
571
572static void ipmr_expire_process(unsigned long dummy)
573{
574 if (!spin_trylock(&mfc_unres_lock)) {
575 mod_timer(&ipmr_expire_timer, jiffies + 1);
576 return;
577 }
578
579 if (atomic_read(&cache_resolve_queue_len))
580 ipmr_do_expire_process(dummy);
581
582 spin_unlock(&mfc_unres_lock);
583}
584
585/* Fill oifs list. It is called under write locked mrt_lock. */
586
587static void ip6mr_update_thresholds(struct mfc6_cache *cache, unsigned char *ttls)
588{
589 int vifi;
590
Rami Rosen6ac7eb02008-04-10 12:40:10 +0300591 cache->mfc_un.res.minvif = MAXMIFS;
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900592 cache->mfc_un.res.maxvif = 0;
Rami Rosen6ac7eb02008-04-10 12:40:10 +0300593 memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900594
595 for (vifi = 0; vifi < maxvif; vifi++) {
596 if (MIF_EXISTS(vifi) && ttls[vifi] && ttls[vifi] < 255) {
597 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
598 if (cache->mfc_un.res.minvif > vifi)
599 cache->mfc_un.res.minvif = vifi;
600 if (cache->mfc_un.res.maxvif <= vifi)
601 cache->mfc_un.res.maxvif = vifi + 1;
602 }
603 }
604}
605
606static int mif6_add(struct mif6ctl *vifc, int mrtsock)
607{
608 int vifi = vifc->mif6c_mifi;
609 struct mif_device *v = &vif6_table[vifi];
610 struct net_device *dev;
611
612 /* Is vif busy ? */
613 if (MIF_EXISTS(vifi))
614 return -EADDRINUSE;
615
616 switch (vifc->mif6c_flags) {
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +0900617#ifdef CONFIG_IPV6_PIMSM_V2
618 case MIFF_REGISTER:
619 /*
620 * Special Purpose VIF in PIM
621 * All the packets will be sent to the daemon
622 */
623 if (reg_vif_num >= 0)
624 return -EADDRINUSE;
625 dev = ip6mr_reg_vif();
626 if (!dev)
627 return -ENOBUFS;
628 break;
629#endif
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900630 case 0:
631 dev = dev_get_by_index(&init_net, vifc->mif6c_pifi);
632 if (!dev)
633 return -EADDRNOTAVAIL;
634 dev_put(dev);
635 break;
636 default:
637 return -EINVAL;
638 }
639
640 dev_set_allmulti(dev, 1);
641
642 /*
643 * Fill in the VIF structures
644 */
645 v->rate_limit = vifc->vifc_rate_limit;
646 v->flags = vifc->mif6c_flags;
647 if (!mrtsock)
648 v->flags |= VIFF_STATIC;
649 v->threshold = vifc->vifc_threshold;
650 v->bytes_in = 0;
651 v->bytes_out = 0;
652 v->pkt_in = 0;
653 v->pkt_out = 0;
654 v->link = dev->ifindex;
655 if (v->flags & MIFF_REGISTER)
656 v->link = dev->iflink;
657
658 /* And finish update writing critical data */
659 write_lock_bh(&mrt_lock);
660 dev_hold(dev);
661 v->dev = dev;
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +0900662#ifdef CONFIG_IPV6_PIMSM_V2
663 if (v->flags & MIFF_REGISTER)
664 reg_vif_num = vifi;
665#endif
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900666 if (vifi + 1 > maxvif)
667 maxvif = vifi + 1;
668 write_unlock_bh(&mrt_lock);
669 return 0;
670}
671
672static struct mfc6_cache *ip6mr_cache_find(struct in6_addr *origin, struct in6_addr *mcastgrp)
673{
674 int line = MFC6_HASH(mcastgrp, origin);
675 struct mfc6_cache *c;
676
677 for (c = mfc6_cache_array[line]; c; c = c->next) {
678 if (ipv6_addr_equal(&c->mf6c_origin, origin) &&
679 ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp))
680 break;
681 }
682 return c;
683}
684
685/*
686 * Allocate a multicast cache entry
687 */
688static struct mfc6_cache *ip6mr_cache_alloc(void)
689{
690 struct mfc6_cache *c = kmem_cache_alloc(mrt_cachep, GFP_KERNEL);
691 if (c == NULL)
692 return NULL;
693 memset(c, 0, sizeof(*c));
Rami Rosen6ac7eb02008-04-10 12:40:10 +0300694 c->mfc_un.res.minvif = MAXMIFS;
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900695 return c;
696}
697
698static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
699{
700 struct mfc6_cache *c = kmem_cache_alloc(mrt_cachep, GFP_ATOMIC);
701 if (c == NULL)
702 return NULL;
703 memset(c, 0, sizeof(*c));
704 skb_queue_head_init(&c->mfc_un.unres.unresolved);
705 c->mfc_un.unres.expires = jiffies + 10 * HZ;
706 return c;
707}
708
709/*
710 * A cache entry has gone into a resolved state from queued
711 */
712
713static void ip6mr_cache_resolve(struct mfc6_cache *uc, struct mfc6_cache *c)
714{
715 struct sk_buff *skb;
716
717 /*
718 * Play the pending entries through our router
719 */
720
721 while((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
722 if (ipv6_hdr(skb)->version == 0) {
723 int err;
724 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
725
726 if (ip6mr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
YOSHIFUJI Hideaki549e0282008-04-05 22:17:39 +0900727 nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900728 } else {
729 nlh->nlmsg_type = NLMSG_ERROR;
730 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
731 skb_trim(skb, nlh->nlmsg_len);
732 ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -EMSGSIZE;
733 }
734 err = rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid);
735 } else
736 ip6_mr_forward(skb, c);
737 }
738}
739
740/*
741 * Bounce a cache query up to pim6sd. We could use netlink for this but pim6sd
742 * expects the following bizarre scheme.
743 *
744 * Called under mrt_lock.
745 */
746
Rami Rosen6ac7eb02008-04-10 12:40:10 +0300747static int ip6mr_cache_report(struct sk_buff *pkt, mifi_t mifi, int assert)
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900748{
749 struct sk_buff *skb;
750 struct mrt6msg *msg;
751 int ret;
752
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +0900753#ifdef CONFIG_IPV6_PIMSM_V2
754 if (assert == MRT6MSG_WHOLEPKT)
755 skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
756 +sizeof(*msg));
757 else
758#endif
759 skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900760
761 if (!skb)
762 return -ENOBUFS;
763
764 /* I suppose that internal messages
765 * do not require checksums */
766
767 skb->ip_summed = CHECKSUM_UNNECESSARY;
768
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +0900769#ifdef CONFIG_IPV6_PIMSM_V2
770 if (assert == MRT6MSG_WHOLEPKT) {
771 /* Ugly, but we have no choice with this interface.
772 Duplicate old header, fix length etc.
773 And all this only to mangle msg->im6_msgtype and
774 to set msg->im6_mbz to "mbz" :-)
775 */
776 skb_push(skb, -skb_network_offset(pkt));
777
778 skb_push(skb, sizeof(*msg));
779 skb_reset_transport_header(skb);
780 msg = (struct mrt6msg *)skb_transport_header(skb);
781 msg->im6_mbz = 0;
782 msg->im6_msgtype = MRT6MSG_WHOLEPKT;
783 msg->im6_mif = reg_vif_num;
784 msg->im6_pad = 0;
785 ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
786 ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
787
788 skb->ip_summed = CHECKSUM_UNNECESSARY;
789 } else
790#endif
791 {
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900792 /*
793 * Copy the IP header
794 */
795
796 skb_put(skb, sizeof(struct ipv6hdr));
797 skb_reset_network_header(skb);
798 skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
799
800 /*
801 * Add our header
802 */
803 skb_put(skb, sizeof(*msg));
804 skb_reset_transport_header(skb);
805 msg = (struct mrt6msg *)skb_transport_header(skb);
806
807 msg->im6_mbz = 0;
808 msg->im6_msgtype = assert;
Rami Rosen6ac7eb02008-04-10 12:40:10 +0300809 msg->im6_mif = mifi;
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900810 msg->im6_pad = 0;
811 ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
812 ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
813
814 skb->dst = dst_clone(pkt->dst);
815 skb->ip_summed = CHECKSUM_UNNECESSARY;
816
817 skb_pull(skb, sizeof(struct ipv6hdr));
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +0900818 }
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900819
820 if (mroute6_socket == NULL) {
821 kfree_skb(skb);
822 return -EINVAL;
823 }
824
825 /*
826 * Deliver to user space multicast routing algorithms
827 */
828 if ((ret = sock_queue_rcv_skb(mroute6_socket, skb)) < 0) {
829 if (net_ratelimit())
830 printk(KERN_WARNING "mroute6: pending queue full, dropping entries.\n");
831 kfree_skb(skb);
832 }
833
834 return ret;
835}
836
837/*
838 * Queue a packet for resolution. It gets locked cache entry!
839 */
840
841static int
Rami Rosen6ac7eb02008-04-10 12:40:10 +0300842ip6mr_cache_unresolved(mifi_t mifi, struct sk_buff *skb)
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900843{
844 int err;
845 struct mfc6_cache *c;
846
847 spin_lock_bh(&mfc_unres_lock);
848 for (c = mfc_unres_queue; c; c = c->next) {
849 if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
850 ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr))
851 break;
852 }
853
854 if (c == NULL) {
855 /*
856 * Create a new entry if allowable
857 */
858
859 if (atomic_read(&cache_resolve_queue_len) >= 10 ||
860 (c = ip6mr_cache_alloc_unres()) == NULL) {
861 spin_unlock_bh(&mfc_unres_lock);
862
863 kfree_skb(skb);
864 return -ENOBUFS;
865 }
866
867 /*
868 * Fill in the new cache entry
869 */
870 c->mf6c_parent = -1;
871 c->mf6c_origin = ipv6_hdr(skb)->saddr;
872 c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
873
874 /*
875 * Reflect first query at pim6sd
876 */
Rami Rosen6ac7eb02008-04-10 12:40:10 +0300877 if ((err = ip6mr_cache_report(skb, mifi, MRT6MSG_NOCACHE)) < 0) {
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900878 /* If the report failed throw the cache entry
879 out - Brad Parker
880 */
881 spin_unlock_bh(&mfc_unres_lock);
882
883 kmem_cache_free(mrt_cachep, c);
884 kfree_skb(skb);
885 return err;
886 }
887
888 atomic_inc(&cache_resolve_queue_len);
889 c->next = mfc_unres_queue;
890 mfc_unres_queue = c;
891
892 ipmr_do_expire_process(1);
893 }
894
895 /*
896 * See if we can append the packet
897 */
898 if (c->mfc_un.unres.unresolved.qlen > 3) {
899 kfree_skb(skb);
900 err = -ENOBUFS;
901 } else {
902 skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
903 err = 0;
904 }
905
906 spin_unlock_bh(&mfc_unres_lock);
907 return err;
908}
909
910/*
911 * MFC6 cache manipulation by user space
912 */
913
914static int ip6mr_mfc_delete(struct mf6cctl *mfc)
915{
916 int line;
917 struct mfc6_cache *c, **cp;
918
919 line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
920
921 for (cp = &mfc6_cache_array[line]; (c = *cp) != NULL; cp = &c->next) {
922 if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
923 ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) {
924 write_lock_bh(&mrt_lock);
925 *cp = c->next;
926 write_unlock_bh(&mrt_lock);
927
928 kmem_cache_free(mrt_cachep, c);
929 return 0;
930 }
931 }
932 return -ENOENT;
933}
934
935static int ip6mr_device_event(struct notifier_block *this,
936 unsigned long event, void *ptr)
937{
938 struct net_device *dev = ptr;
939 struct mif_device *v;
940 int ct;
941
942 if (dev_net(dev) != &init_net)
943 return NOTIFY_DONE;
944
945 if (event != NETDEV_UNREGISTER)
946 return NOTIFY_DONE;
947
948 v = &vif6_table[0];
949 for (ct = 0; ct < maxvif; ct++, v++) {
950 if (v->dev == dev)
951 mif6_delete(ct);
952 }
953 return NOTIFY_DONE;
954}
955
956static struct notifier_block ip6_mr_notifier = {
957 .notifier_call = ip6mr_device_event
958};
959
960/*
961 * Setup for IP multicast routing
962 */
963
964void __init ip6_mr_init(void)
965{
966 mrt_cachep = kmem_cache_create("ip6_mrt_cache",
967 sizeof(struct mfc6_cache),
968 0, SLAB_HWCACHE_ALIGN,
969 NULL);
970 if (!mrt_cachep)
971 panic("cannot allocate ip6_mrt_cache");
972
973 setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0);
974 register_netdevice_notifier(&ip6_mr_notifier);
975#ifdef CONFIG_PROC_FS
976 proc_net_fops_create(&init_net, "ip6_mr_vif", 0, &ip6mr_vif_fops);
977 proc_net_fops_create(&init_net, "ip6_mr_cache", 0, &ip6mr_mfc_fops);
978#endif
979}
980
981
982static int ip6mr_mfc_add(struct mf6cctl *mfc, int mrtsock)
983{
984 int line;
985 struct mfc6_cache *uc, *c, **cp;
Rami Rosen6ac7eb02008-04-10 12:40:10 +0300986 unsigned char ttls[MAXMIFS];
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900987 int i;
988
Rami Rosen6ac7eb02008-04-10 12:40:10 +0300989 memset(ttls, 255, MAXMIFS);
990 for (i = 0; i < MAXMIFS; i++) {
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900991 if (IF_ISSET(i, &mfc->mf6cc_ifset))
992 ttls[i] = 1;
993
994 }
995
996 line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
997
998 for (cp = &mfc6_cache_array[line]; (c = *cp) != NULL; cp = &c->next) {
999 if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1000 ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr))
1001 break;
1002 }
1003
1004 if (c != NULL) {
1005 write_lock_bh(&mrt_lock);
1006 c->mf6c_parent = mfc->mf6cc_parent;
1007 ip6mr_update_thresholds(c, ttls);
1008 if (!mrtsock)
1009 c->mfc_flags |= MFC_STATIC;
1010 write_unlock_bh(&mrt_lock);
1011 return 0;
1012 }
1013
1014 if (!ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1015 return -EINVAL;
1016
1017 c = ip6mr_cache_alloc();
1018 if (c == NULL)
1019 return -ENOMEM;
1020
1021 c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1022 c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1023 c->mf6c_parent = mfc->mf6cc_parent;
1024 ip6mr_update_thresholds(c, ttls);
1025 if (!mrtsock)
1026 c->mfc_flags |= MFC_STATIC;
1027
1028 write_lock_bh(&mrt_lock);
1029 c->next = mfc6_cache_array[line];
1030 mfc6_cache_array[line] = c;
1031 write_unlock_bh(&mrt_lock);
1032
1033 /*
1034 * Check to see if we resolved a queued list. If so we
1035 * need to send on the frames and tidy up.
1036 */
1037 spin_lock_bh(&mfc_unres_lock);
1038 for (cp = &mfc_unres_queue; (uc = *cp) != NULL;
1039 cp = &uc->next) {
1040 if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1041 ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1042 *cp = uc->next;
1043 if (atomic_dec_and_test(&cache_resolve_queue_len))
1044 del_timer(&ipmr_expire_timer);
1045 break;
1046 }
1047 }
1048 spin_unlock_bh(&mfc_unres_lock);
1049
1050 if (uc) {
1051 ip6mr_cache_resolve(uc, c);
1052 kmem_cache_free(mrt_cachep, uc);
1053 }
1054 return 0;
1055}
1056
1057/*
1058 * Close the multicast socket, and clear the vif tables etc
1059 */
1060
1061static void mroute_clean_tables(struct sock *sk)
1062{
1063 int i;
1064
1065 /*
1066 * Shut down all active vif entries
1067 */
1068 for (i = 0; i < maxvif; i++) {
1069 if (!(vif6_table[i].flags & VIFF_STATIC))
1070 mif6_delete(i);
1071 }
1072
1073 /*
1074 * Wipe the cache
1075 */
1076 for (i = 0; i < ARRAY_SIZE(mfc6_cache_array); i++) {
1077 struct mfc6_cache *c, **cp;
1078
1079 cp = &mfc6_cache_array[i];
1080 while ((c = *cp) != NULL) {
1081 if (c->mfc_flags & MFC_STATIC) {
1082 cp = &c->next;
1083 continue;
1084 }
1085 write_lock_bh(&mrt_lock);
1086 *cp = c->next;
1087 write_unlock_bh(&mrt_lock);
1088
1089 kmem_cache_free(mrt_cachep, c);
1090 }
1091 }
1092
1093 if (atomic_read(&cache_resolve_queue_len) != 0) {
1094 struct mfc6_cache *c;
1095
1096 spin_lock_bh(&mfc_unres_lock);
1097 while (mfc_unres_queue != NULL) {
1098 c = mfc_unres_queue;
1099 mfc_unres_queue = c->next;
1100 spin_unlock_bh(&mfc_unres_lock);
1101
1102 ip6mr_destroy_unres(c);
1103
1104 spin_lock_bh(&mfc_unres_lock);
1105 }
1106 spin_unlock_bh(&mfc_unres_lock);
1107 }
1108}
1109
1110static int ip6mr_sk_init(struct sock *sk)
1111{
1112 int err = 0;
1113
1114 rtnl_lock();
1115 write_lock_bh(&mrt_lock);
1116 if (likely(mroute6_socket == NULL))
1117 mroute6_socket = sk;
1118 else
1119 err = -EADDRINUSE;
1120 write_unlock_bh(&mrt_lock);
1121
1122 rtnl_unlock();
1123
1124 return err;
1125}
1126
1127int ip6mr_sk_done(struct sock *sk)
1128{
1129 int err = 0;
1130
1131 rtnl_lock();
1132 if (sk == mroute6_socket) {
1133 write_lock_bh(&mrt_lock);
1134 mroute6_socket = NULL;
1135 write_unlock_bh(&mrt_lock);
1136
1137 mroute_clean_tables(sk);
1138 } else
1139 err = -EACCES;
1140 rtnl_unlock();
1141
1142 return err;
1143}
1144
1145/*
1146 * Socket options and virtual interface manipulation. The whole
1147 * virtual interface system is a complete heap, but unfortunately
1148 * that's how BSD mrouted happens to think. Maybe one day with a proper
1149 * MOSPF/PIM router set up we can clean this up.
1150 */
1151
1152int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int optlen)
1153{
1154 int ret;
1155 struct mif6ctl vif;
1156 struct mf6cctl mfc;
1157 mifi_t mifi;
1158
1159 if (optname != MRT6_INIT) {
1160 if (sk != mroute6_socket && !capable(CAP_NET_ADMIN))
1161 return -EACCES;
1162 }
1163
1164 switch (optname) {
1165 case MRT6_INIT:
1166 if (sk->sk_type != SOCK_RAW ||
1167 inet_sk(sk)->num != IPPROTO_ICMPV6)
1168 return -EOPNOTSUPP;
1169 if (optlen < sizeof(int))
1170 return -EINVAL;
1171
1172 return ip6mr_sk_init(sk);
1173
1174 case MRT6_DONE:
1175 return ip6mr_sk_done(sk);
1176
1177 case MRT6_ADD_MIF:
1178 if (optlen < sizeof(vif))
1179 return -EINVAL;
1180 if (copy_from_user(&vif, optval, sizeof(vif)))
1181 return -EFAULT;
Rami Rosen6ac7eb02008-04-10 12:40:10 +03001182 if (vif.mif6c_mifi >= MAXMIFS)
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001183 return -ENFILE;
1184 rtnl_lock();
1185 ret = mif6_add(&vif, sk == mroute6_socket);
1186 rtnl_unlock();
1187 return ret;
1188
1189 case MRT6_DEL_MIF:
1190 if (optlen < sizeof(mifi_t))
1191 return -EINVAL;
1192 if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
1193 return -EFAULT;
1194 rtnl_lock();
1195 ret = mif6_delete(mifi);
1196 rtnl_unlock();
1197 return ret;
1198
1199 /*
1200 * Manipulate the forwarding caches. These live
1201 * in a sort of kernel/user symbiosis.
1202 */
1203 case MRT6_ADD_MFC:
1204 case MRT6_DEL_MFC:
1205 if (optlen < sizeof(mfc))
1206 return -EINVAL;
1207 if (copy_from_user(&mfc, optval, sizeof(mfc)))
1208 return -EFAULT;
1209 rtnl_lock();
1210 if (optname == MRT6_DEL_MFC)
1211 ret = ip6mr_mfc_delete(&mfc);
1212 else
1213 ret = ip6mr_mfc_add(&mfc, sk == mroute6_socket);
1214 rtnl_unlock();
1215 return ret;
1216
1217 /*
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +09001218 * Control PIM assert (to activate pim will activate assert)
1219 */
1220 case MRT6_ASSERT:
1221 {
1222 int v;
1223 if (get_user(v, (int __user *)optval))
1224 return -EFAULT;
1225 mroute_do_assert = !!v;
1226 return 0;
1227 }
1228
1229#ifdef CONFIG_IPV6_PIMSM_V2
1230 case MRT6_PIM:
1231 {
YOSHIFUJI Hideakia9f83bf2008-04-10 15:41:28 +09001232 int v;
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +09001233 if (get_user(v, (int __user *)optval))
1234 return -EFAULT;
1235 v = !!v;
1236 rtnl_lock();
1237 ret = 0;
1238 if (v != mroute_do_pim) {
1239 mroute_do_pim = v;
1240 mroute_do_assert = v;
1241 if (mroute_do_pim)
1242 ret = inet6_add_protocol(&pim6_protocol,
1243 IPPROTO_PIM);
1244 else
1245 ret = inet6_del_protocol(&pim6_protocol,
1246 IPPROTO_PIM);
1247 if (ret < 0)
1248 ret = -EAGAIN;
1249 }
1250 rtnl_unlock();
1251 return ret;
1252 }
1253
1254#endif
1255 /*
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001256 * Spurious command, or MRT_VERSION which you cannot
1257 * set.
1258 */
1259 default:
1260 return -ENOPROTOOPT;
1261 }
1262}
1263
1264/*
1265 * Getsock opt support for the multicast routing system.
1266 */
1267
1268int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
1269 int __user *optlen)
1270{
1271 int olr;
1272 int val;
1273
1274 switch (optname) {
1275 case MRT6_VERSION:
1276 val = 0x0305;
1277 break;
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +09001278#ifdef CONFIG_IPV6_PIMSM_V2
1279 case MRT6_PIM:
1280 val = mroute_do_pim;
1281 break;
1282#endif
1283 case MRT6_ASSERT:
1284 val = mroute_do_assert;
1285 break;
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001286 default:
1287 return -ENOPROTOOPT;
1288 }
1289
1290 if (get_user(olr, optlen))
1291 return -EFAULT;
1292
1293 olr = min_t(int, olr, sizeof(int));
1294 if (olr < 0)
1295 return -EINVAL;
1296
1297 if (put_user(olr, optlen))
1298 return -EFAULT;
1299 if (copy_to_user(optval, &val, olr))
1300 return -EFAULT;
1301 return 0;
1302}
1303
1304/*
1305 * The IP multicast ioctl support routines.
1306 */
1307
1308int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
1309{
1310 struct sioc_sg_req6 sr;
1311 struct sioc_mif_req6 vr;
1312 struct mif_device *vif;
1313 struct mfc6_cache *c;
1314
1315 switch (cmd) {
1316 case SIOCGETMIFCNT_IN6:
1317 if (copy_from_user(&vr, arg, sizeof(vr)))
1318 return -EFAULT;
1319 if (vr.mifi >= maxvif)
1320 return -EINVAL;
1321 read_lock(&mrt_lock);
1322 vif = &vif6_table[vr.mifi];
1323 if (MIF_EXISTS(vr.mifi)) {
1324 vr.icount = vif->pkt_in;
1325 vr.ocount = vif->pkt_out;
1326 vr.ibytes = vif->bytes_in;
1327 vr.obytes = vif->bytes_out;
1328 read_unlock(&mrt_lock);
1329
1330 if (copy_to_user(arg, &vr, sizeof(vr)))
1331 return -EFAULT;
1332 return 0;
1333 }
1334 read_unlock(&mrt_lock);
1335 return -EADDRNOTAVAIL;
1336 case SIOCGETSGCNT_IN6:
1337 if (copy_from_user(&sr, arg, sizeof(sr)))
1338 return -EFAULT;
1339
1340 read_lock(&mrt_lock);
1341 c = ip6mr_cache_find(&sr.src.sin6_addr, &sr.grp.sin6_addr);
1342 if (c) {
1343 sr.pktcnt = c->mfc_un.res.pkt;
1344 sr.bytecnt = c->mfc_un.res.bytes;
1345 sr.wrong_if = c->mfc_un.res.wrong_if;
1346 read_unlock(&mrt_lock);
1347
1348 if (copy_to_user(arg, &sr, sizeof(sr)))
1349 return -EFAULT;
1350 return 0;
1351 }
1352 read_unlock(&mrt_lock);
1353 return -EADDRNOTAVAIL;
1354 default:
1355 return -ENOIOCTLCMD;
1356 }
1357}
1358
1359
1360static inline int ip6mr_forward2_finish(struct sk_buff *skb)
1361{
1362 /* XXX stats */
1363 return dst_output(skb);
1364}
1365
1366/*
1367 * Processing handlers for ip6mr_forward
1368 */
1369
1370static int ip6mr_forward2(struct sk_buff *skb, struct mfc6_cache *c, int vifi)
1371{
1372 struct ipv6hdr *ipv6h;
1373 struct mif_device *vif = &vif6_table[vifi];
1374 struct net_device *dev;
1375 struct dst_entry *dst;
1376 struct flowi fl;
1377
1378 if (vif->dev == NULL)
1379 goto out_free;
1380
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +09001381#ifdef CONFIG_IPV6_PIMSM_V2
1382 if (vif->flags & MIFF_REGISTER) {
1383 vif->pkt_out++;
1384 vif->bytes_out += skb->len;
1385 ((struct net_device_stats *)netdev_priv(vif->dev))->tx_bytes += skb->len;
1386 ((struct net_device_stats *)netdev_priv(vif->dev))->tx_packets++;
1387 ip6mr_cache_report(skb, vifi, MRT6MSG_WHOLEPKT);
1388 kfree_skb(skb);
1389 return 0;
1390 }
1391#endif
1392
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001393 ipv6h = ipv6_hdr(skb);
1394
1395 fl = (struct flowi) {
1396 .oif = vif->link,
1397 .nl_u = { .ip6_u =
1398 { .daddr = ipv6h->daddr, }
1399 }
1400 };
1401
1402 dst = ip6_route_output(&init_net, NULL, &fl);
1403 if (!dst)
1404 goto out_free;
1405
1406 dst_release(skb->dst);
1407 skb->dst = dst;
1408
1409 /*
1410 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1411 * not only before forwarding, but after forwarding on all output
1412 * interfaces. It is clear, if mrouter runs a multicasting
1413 * program, it should receive packets not depending to what interface
1414 * program is joined.
1415 * If we will not make it, the program will have to join on all
1416 * interfaces. On the other hand, multihoming host (or router, but
1417 * not mrouter) cannot join to more than one interface - it will
1418 * result in receiving multiple packets.
1419 */
1420 dev = vif->dev;
1421 skb->dev = dev;
1422 vif->pkt_out++;
1423 vif->bytes_out += skb->len;
1424
1425 /* We are about to write */
1426 /* XXX: extension headers? */
1427 if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev)))
1428 goto out_free;
1429
1430 ipv6h = ipv6_hdr(skb);
1431 ipv6h->hop_limit--;
1432
1433 IP6CB(skb)->flags |= IP6SKB_FORWARDED;
1434
1435 return NF_HOOK(PF_INET6, NF_INET_FORWARD, skb, skb->dev, dev,
1436 ip6mr_forward2_finish);
1437
1438out_free:
1439 kfree_skb(skb);
1440 return 0;
1441}
1442
1443static int ip6mr_find_vif(struct net_device *dev)
1444{
1445 int ct;
1446 for (ct = maxvif - 1; ct >= 0; ct--) {
1447 if (vif6_table[ct].dev == dev)
1448 break;
1449 }
1450 return ct;
1451}
1452
1453static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache)
1454{
1455 int psend = -1;
1456 int vif, ct;
1457
1458 vif = cache->mf6c_parent;
1459 cache->mfc_un.res.pkt++;
1460 cache->mfc_un.res.bytes += skb->len;
1461
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +09001462 /*
1463 * Wrong interface: drop packet and (maybe) send PIM assert.
1464 */
1465 if (vif6_table[vif].dev != skb->dev) {
1466 int true_vifi;
1467
1468 cache->mfc_un.res.wrong_if++;
1469 true_vifi = ip6mr_find_vif(skb->dev);
1470
1471 if (true_vifi >= 0 && mroute_do_assert &&
1472 /* pimsm uses asserts, when switching from RPT to SPT,
1473 so that we cannot check that packet arrived on an oif.
1474 It is bad, but otherwise we would need to move pretty
1475 large chunk of pimd to kernel. Ough... --ANK
1476 */
1477 (mroute_do_pim || cache->mfc_un.res.ttls[true_vifi] < 255) &&
1478 time_after(jiffies,
1479 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1480 cache->mfc_un.res.last_assert = jiffies;
1481 ip6mr_cache_report(skb, true_vifi, MRT6MSG_WRONGMIF);
1482 }
1483 goto dont_forward;
1484 }
1485
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001486 vif6_table[vif].pkt_in++;
1487 vif6_table[vif].bytes_in += skb->len;
1488
1489 /*
1490 * Forward the frame
1491 */
1492 for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) {
1493 if (ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) {
1494 if (psend != -1) {
1495 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1496 if (skb2)
1497 ip6mr_forward2(skb2, cache, psend);
1498 }
1499 psend = ct;
1500 }
1501 }
1502 if (psend != -1) {
1503 ip6mr_forward2(skb, cache, psend);
1504 return 0;
1505 }
1506
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +09001507dont_forward:
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001508 kfree_skb(skb);
1509 return 0;
1510}
1511
1512
1513/*
1514 * Multicast packets for forwarding arrive here
1515 */
1516
1517int ip6_mr_input(struct sk_buff *skb)
1518{
1519 struct mfc6_cache *cache;
1520
1521 read_lock(&mrt_lock);
1522 cache = ip6mr_cache_find(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
1523
1524 /*
1525 * No usable cache entry
1526 */
1527 if (cache == NULL) {
1528 int vif;
1529
1530 vif = ip6mr_find_vif(skb->dev);
1531 if (vif >= 0) {
1532 int err = ip6mr_cache_unresolved(vif, skb);
1533 read_unlock(&mrt_lock);
1534
1535 return err;
1536 }
1537 read_unlock(&mrt_lock);
1538 kfree_skb(skb);
1539 return -ENODEV;
1540 }
1541
1542 ip6_mr_forward(skb, cache);
1543
1544 read_unlock(&mrt_lock);
1545
1546 return 0;
1547}
1548
1549
1550static int
1551ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm)
1552{
1553 int ct;
1554 struct rtnexthop *nhp;
1555 struct net_device *dev = vif6_table[c->mf6c_parent].dev;
YOSHIFUJI Hideaki549e0282008-04-05 22:17:39 +09001556 u8 *b = skb_tail_pointer(skb);
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001557 struct rtattr *mp_head;
1558
1559 if (dev)
1560 RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex);
1561
1562 mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
1563
1564 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1565 if (c->mfc_un.res.ttls[ct] < 255) {
1566 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1567 goto rtattr_failure;
1568 nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1569 nhp->rtnh_flags = 0;
1570 nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
1571 nhp->rtnh_ifindex = vif6_table[ct].dev->ifindex;
1572 nhp->rtnh_len = sizeof(*nhp);
1573 }
1574 }
1575 mp_head->rta_type = RTA_MULTIPATH;
YOSHIFUJI Hideaki549e0282008-04-05 22:17:39 +09001576 mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001577 rtm->rtm_type = RTN_MULTICAST;
1578 return 1;
1579
1580rtattr_failure:
1581 nlmsg_trim(skb, b);
1582 return -EMSGSIZE;
1583}
1584
1585int ip6mr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1586{
1587 int err;
1588 struct mfc6_cache *cache;
1589 struct rt6_info *rt = (struct rt6_info *)skb->dst;
1590
1591 read_lock(&mrt_lock);
1592 cache = ip6mr_cache_find(&rt->rt6i_src.addr, &rt->rt6i_dst.addr);
1593
1594 if (!cache) {
1595 struct sk_buff *skb2;
1596 struct ipv6hdr *iph;
1597 struct net_device *dev;
1598 int vif;
1599
1600 if (nowait) {
1601 read_unlock(&mrt_lock);
1602 return -EAGAIN;
1603 }
1604
1605 dev = skb->dev;
1606 if (dev == NULL || (vif = ip6mr_find_vif(dev)) < 0) {
1607 read_unlock(&mrt_lock);
1608 return -ENODEV;
1609 }
1610
1611 /* really correct? */
1612 skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
1613 if (!skb2) {
1614 read_unlock(&mrt_lock);
1615 return -ENOMEM;
1616 }
1617
1618 skb_reset_transport_header(skb2);
1619
1620 skb_put(skb2, sizeof(struct ipv6hdr));
1621 skb_reset_network_header(skb2);
1622
1623 iph = ipv6_hdr(skb2);
1624 iph->version = 0;
1625 iph->priority = 0;
1626 iph->flow_lbl[0] = 0;
1627 iph->flow_lbl[1] = 0;
1628 iph->flow_lbl[2] = 0;
1629 iph->payload_len = 0;
1630 iph->nexthdr = IPPROTO_NONE;
1631 iph->hop_limit = 0;
1632 ipv6_addr_copy(&iph->saddr, &rt->rt6i_src.addr);
1633 ipv6_addr_copy(&iph->daddr, &rt->rt6i_dst.addr);
1634
1635 err = ip6mr_cache_unresolved(vif, skb2);
1636 read_unlock(&mrt_lock);
1637
1638 return err;
1639 }
1640
1641 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1642 cache->mfc_flags |= MFC_NOTIFY;
1643
1644 err = ip6mr_fill_mroute(skb, cache, rtm);
1645 read_unlock(&mrt_lock);
1646 return err;
1647}
1648