blob: e4f809a93f4701efbb3f68eec77e00593ad01f7c [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * IP multicast routing support for mrouted 3.6/3.8
3 *
4 * (c) 1995 Alan Cox, <alan@redhat.com>
5 * Linux Consultancy and Custom Driver Development
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 *
12 * Version: $Id: ipmr.c,v 1.65 2001/10/31 21:55:54 davem Exp $
13 *
14 * Fixes:
15 * Michael Chastain : Incorrect size of copying.
16 * Alan Cox : Added the cache manager code
17 * Alan Cox : Fixed the clone/copy bug and device race.
18 * Mike McLagan : Routing by source
19 * Malcolm Beattie : Buffer handling fixes.
20 * Alexey Kuznetsov : Double buffer free and other fixes.
21 * SVR Anand : Fixed several multicast bugs and problems.
22 * Alexey Kuznetsov : Status, optimisations and more.
23 * Brad Parker : Better behaviour on mrouted upcall
24 * overflow.
25 * Carlos Picoto : PIMv1 Support
26 * Pavlin Ivanov Radoslavov: PIMv2 Registers must checksum only PIM header
27 * Relax this requrement to work with older peers.
28 *
29 */
30
31#include <linux/config.h>
32#include <asm/system.h>
33#include <asm/uaccess.h>
34#include <linux/types.h>
35#include <linux/sched.h>
36#include <linux/errno.h>
37#include <linux/timer.h>
38#include <linux/mm.h>
39#include <linux/kernel.h>
40#include <linux/fcntl.h>
41#include <linux/stat.h>
42#include <linux/socket.h>
43#include <linux/in.h>
44#include <linux/inet.h>
45#include <linux/netdevice.h>
46#include <linux/inetdevice.h>
47#include <linux/igmp.h>
48#include <linux/proc_fs.h>
49#include <linux/seq_file.h>
50#include <linux/mroute.h>
51#include <linux/init.h>
52#include <net/ip.h>
53#include <net/protocol.h>
54#include <linux/skbuff.h>
55#include <net/sock.h>
56#include <net/icmp.h>
57#include <net/udp.h>
58#include <net/raw.h>
59#include <linux/notifier.h>
60#include <linux/if_arp.h>
61#include <linux/netfilter_ipv4.h>
62#include <net/ipip.h>
63#include <net/checksum.h>
64
65#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
66#define CONFIG_IP_PIMSM 1
67#endif
68
69static struct sock *mroute_socket;
70
71
72/* Big lock, protecting vif table, mrt cache and mroute socket state.
73 Note that the changes are semaphored via rtnl_lock.
74 */
75
76static DEFINE_RWLOCK(mrt_lock);
77
78/*
79 * Multicast router control variables
80 */
81
82static struct vif_device vif_table[MAXVIFS]; /* Devices */
83static int maxvif;
84
85#define VIF_EXISTS(idx) (vif_table[idx].dev != NULL)
86
87static int mroute_do_assert; /* Set in PIM assert */
88static int mroute_do_pim;
89
90static struct mfc_cache *mfc_cache_array[MFC_LINES]; /* Forwarding cache */
91
92static struct mfc_cache *mfc_unres_queue; /* Queue of unresolved entries */
93static atomic_t cache_resolve_queue_len; /* Size of unresolved */
94
95/* Special spinlock for queue of unresolved entries */
96static DEFINE_SPINLOCK(mfc_unres_lock);
97
98/* We return to original Alan's scheme. Hash table of resolved
99 entries is changed only in process context and protected
100 with weak lock mrt_lock. Queue of unresolved entries is protected
101 with strong spinlock mfc_unres_lock.
102
103 In this case data path is free of exclusive locks at all.
104 */
105
106static kmem_cache_t *mrt_cachep;
107
108static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local);
109static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert);
110static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm);
111
112#ifdef CONFIG_IP_PIMSM_V2
113static struct net_protocol pim_protocol;
114#endif
115
116static struct timer_list ipmr_expire_timer;
117
118/* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
119
120static
121struct net_device *ipmr_new_tunnel(struct vifctl *v)
122{
123 struct net_device *dev;
124
125 dev = __dev_get_by_name("tunl0");
126
127 if (dev) {
128 int err;
129 struct ifreq ifr;
130 mm_segment_t oldfs;
131 struct ip_tunnel_parm p;
132 struct in_device *in_dev;
133
134 memset(&p, 0, sizeof(p));
135 p.iph.daddr = v->vifc_rmt_addr.s_addr;
136 p.iph.saddr = v->vifc_lcl_addr.s_addr;
137 p.iph.version = 4;
138 p.iph.ihl = 5;
139 p.iph.protocol = IPPROTO_IPIP;
140 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
141 ifr.ifr_ifru.ifru_data = (void*)&p;
142
143 oldfs = get_fs(); set_fs(KERNEL_DS);
144 err = dev->do_ioctl(dev, &ifr, SIOCADDTUNNEL);
145 set_fs(oldfs);
146
147 dev = NULL;
148
149 if (err == 0 && (dev = __dev_get_by_name(p.name)) != NULL) {
150 dev->flags |= IFF_MULTICAST;
151
152 in_dev = __in_dev_get(dev);
153 if (in_dev == NULL && (in_dev = inetdev_init(dev)) == NULL)
154 goto failure;
155 in_dev->cnf.rp_filter = 0;
156
157 if (dev_open(dev))
158 goto failure;
159 }
160 }
161 return dev;
162
163failure:
164 /* allow the register to be completed before unregistering. */
165 rtnl_unlock();
166 rtnl_lock();
167
168 unregister_netdevice(dev);
169 return NULL;
170}
171
172#ifdef CONFIG_IP_PIMSM
173
174static int reg_vif_num = -1;
175
176static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
177{
178 read_lock(&mrt_lock);
179 ((struct net_device_stats*)dev->priv)->tx_bytes += skb->len;
180 ((struct net_device_stats*)dev->priv)->tx_packets++;
181 ipmr_cache_report(skb, reg_vif_num, IGMPMSG_WHOLEPKT);
182 read_unlock(&mrt_lock);
183 kfree_skb(skb);
184 return 0;
185}
186
187static struct net_device_stats *reg_vif_get_stats(struct net_device *dev)
188{
189 return (struct net_device_stats*)dev->priv;
190}
191
192static void reg_vif_setup(struct net_device *dev)
193{
194 dev->type = ARPHRD_PIMREG;
195 dev->mtu = 1500 - sizeof(struct iphdr) - 8;
196 dev->flags = IFF_NOARP;
197 dev->hard_start_xmit = reg_vif_xmit;
198 dev->get_stats = reg_vif_get_stats;
199 dev->destructor = free_netdev;
200}
201
202static struct net_device *ipmr_reg_vif(void)
203{
204 struct net_device *dev;
205 struct in_device *in_dev;
206
207 dev = alloc_netdev(sizeof(struct net_device_stats), "pimreg",
208 reg_vif_setup);
209
210 if (dev == NULL)
211 return NULL;
212
213 if (register_netdevice(dev)) {
214 free_netdev(dev);
215 return NULL;
216 }
217 dev->iflink = 0;
218
219 if ((in_dev = inetdev_init(dev)) == NULL)
220 goto failure;
221
222 in_dev->cnf.rp_filter = 0;
223
224 if (dev_open(dev))
225 goto failure;
226
227 return dev;
228
229failure:
230 /* allow the register to be completed before unregistering. */
231 rtnl_unlock();
232 rtnl_lock();
233
234 unregister_netdevice(dev);
235 return NULL;
236}
237#endif
238
239/*
240 * Delete a VIF entry
241 */
242
243static int vif_delete(int vifi)
244{
245 struct vif_device *v;
246 struct net_device *dev;
247 struct in_device *in_dev;
248
249 if (vifi < 0 || vifi >= maxvif)
250 return -EADDRNOTAVAIL;
251
252 v = &vif_table[vifi];
253
254 write_lock_bh(&mrt_lock);
255 dev = v->dev;
256 v->dev = NULL;
257
258 if (!dev) {
259 write_unlock_bh(&mrt_lock);
260 return -EADDRNOTAVAIL;
261 }
262
263#ifdef CONFIG_IP_PIMSM
264 if (vifi == reg_vif_num)
265 reg_vif_num = -1;
266#endif
267
268 if (vifi+1 == maxvif) {
269 int tmp;
270 for (tmp=vifi-1; tmp>=0; tmp--) {
271 if (VIF_EXISTS(tmp))
272 break;
273 }
274 maxvif = tmp+1;
275 }
276
277 write_unlock_bh(&mrt_lock);
278
279 dev_set_allmulti(dev, -1);
280
281 if ((in_dev = __in_dev_get(dev)) != NULL) {
282 in_dev->cnf.mc_forwarding--;
283 ip_rt_multicast_event(in_dev);
284 }
285
286 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
287 unregister_netdevice(dev);
288
289 dev_put(dev);
290 return 0;
291}
292
293/* Destroy an unresolved cache entry, killing queued skbs
294 and reporting error to netlink readers.
295 */
296
297static void ipmr_destroy_unres(struct mfc_cache *c)
298{
299 struct sk_buff *skb;
300
301 atomic_dec(&cache_resolve_queue_len);
302
303 while((skb=skb_dequeue(&c->mfc_un.unres.unresolved))) {
304 if (skb->nh.iph->version == 0) {
305 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
306 nlh->nlmsg_type = NLMSG_ERROR;
307 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
308 skb_trim(skb, nlh->nlmsg_len);
309 ((struct nlmsgerr*)NLMSG_DATA(nlh))->error = -ETIMEDOUT;
310 netlink_unicast(rtnl, skb, NETLINK_CB(skb).dst_pid, MSG_DONTWAIT);
311 } else
312 kfree_skb(skb);
313 }
314
315 kmem_cache_free(mrt_cachep, c);
316}
317
318
319/* Single timer process for all the unresolved queue. */
320
321static void ipmr_expire_process(unsigned long dummy)
322{
323 unsigned long now;
324 unsigned long expires;
325 struct mfc_cache *c, **cp;
326
327 if (!spin_trylock(&mfc_unres_lock)) {
328 mod_timer(&ipmr_expire_timer, jiffies+HZ/10);
329 return;
330 }
331
332 if (atomic_read(&cache_resolve_queue_len) == 0)
333 goto out;
334
335 now = jiffies;
336 expires = 10*HZ;
337 cp = &mfc_unres_queue;
338
339 while ((c=*cp) != NULL) {
340 if (time_after(c->mfc_un.unres.expires, now)) {
341 unsigned long interval = c->mfc_un.unres.expires - now;
342 if (interval < expires)
343 expires = interval;
344 cp = &c->next;
345 continue;
346 }
347
348 *cp = c->next;
349
350 ipmr_destroy_unres(c);
351 }
352
353 if (atomic_read(&cache_resolve_queue_len))
354 mod_timer(&ipmr_expire_timer, jiffies + expires);
355
356out:
357 spin_unlock(&mfc_unres_lock);
358}
359
360/* Fill oifs list. It is called under write locked mrt_lock. */
361
362static void ipmr_update_threshoulds(struct mfc_cache *cache, unsigned char *ttls)
363{
364 int vifi;
365
366 cache->mfc_un.res.minvif = MAXVIFS;
367 cache->mfc_un.res.maxvif = 0;
368 memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
369
370 for (vifi=0; vifi<maxvif; vifi++) {
371 if (VIF_EXISTS(vifi) && ttls[vifi] && ttls[vifi] < 255) {
372 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
373 if (cache->mfc_un.res.minvif > vifi)
374 cache->mfc_un.res.minvif = vifi;
375 if (cache->mfc_un.res.maxvif <= vifi)
376 cache->mfc_un.res.maxvif = vifi + 1;
377 }
378 }
379}
380
381static int vif_add(struct vifctl *vifc, int mrtsock)
382{
383 int vifi = vifc->vifc_vifi;
384 struct vif_device *v = &vif_table[vifi];
385 struct net_device *dev;
386 struct in_device *in_dev;
387
388 /* Is vif busy ? */
389 if (VIF_EXISTS(vifi))
390 return -EADDRINUSE;
391
392 switch (vifc->vifc_flags) {
393#ifdef CONFIG_IP_PIMSM
394 case VIFF_REGISTER:
395 /*
396 * Special Purpose VIF in PIM
397 * All the packets will be sent to the daemon
398 */
399 if (reg_vif_num >= 0)
400 return -EADDRINUSE;
401 dev = ipmr_reg_vif();
402 if (!dev)
403 return -ENOBUFS;
404 break;
405#endif
406 case VIFF_TUNNEL:
407 dev = ipmr_new_tunnel(vifc);
408 if (!dev)
409 return -ENOBUFS;
410 break;
411 case 0:
412 dev=ip_dev_find(vifc->vifc_lcl_addr.s_addr);
413 if (!dev)
414 return -EADDRNOTAVAIL;
415 __dev_put(dev);
416 break;
417 default:
418 return -EINVAL;
419 }
420
421 if ((in_dev = __in_dev_get(dev)) == NULL)
422 return -EADDRNOTAVAIL;
423 in_dev->cnf.mc_forwarding++;
424 dev_set_allmulti(dev, +1);
425 ip_rt_multicast_event(in_dev);
426
427 /*
428 * Fill in the VIF structures
429 */
430 v->rate_limit=vifc->vifc_rate_limit;
431 v->local=vifc->vifc_lcl_addr.s_addr;
432 v->remote=vifc->vifc_rmt_addr.s_addr;
433 v->flags=vifc->vifc_flags;
434 if (!mrtsock)
435 v->flags |= VIFF_STATIC;
436 v->threshold=vifc->vifc_threshold;
437 v->bytes_in = 0;
438 v->bytes_out = 0;
439 v->pkt_in = 0;
440 v->pkt_out = 0;
441 v->link = dev->ifindex;
442 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
443 v->link = dev->iflink;
444
445 /* And finish update writing critical data */
446 write_lock_bh(&mrt_lock);
447 dev_hold(dev);
448 v->dev=dev;
449#ifdef CONFIG_IP_PIMSM
450 if (v->flags&VIFF_REGISTER)
451 reg_vif_num = vifi;
452#endif
453 if (vifi+1 > maxvif)
454 maxvif = vifi+1;
455 write_unlock_bh(&mrt_lock);
456 return 0;
457}
458
459static struct mfc_cache *ipmr_cache_find(__u32 origin, __u32 mcastgrp)
460{
461 int line=MFC_HASH(mcastgrp,origin);
462 struct mfc_cache *c;
463
464 for (c=mfc_cache_array[line]; c; c = c->next) {
465 if (c->mfc_origin==origin && c->mfc_mcastgrp==mcastgrp)
466 break;
467 }
468 return c;
469}
470
471/*
472 * Allocate a multicast cache entry
473 */
474static struct mfc_cache *ipmr_cache_alloc(void)
475{
476 struct mfc_cache *c=kmem_cache_alloc(mrt_cachep, GFP_KERNEL);
477 if(c==NULL)
478 return NULL;
479 memset(c, 0, sizeof(*c));
480 c->mfc_un.res.minvif = MAXVIFS;
481 return c;
482}
483
484static struct mfc_cache *ipmr_cache_alloc_unres(void)
485{
486 struct mfc_cache *c=kmem_cache_alloc(mrt_cachep, GFP_ATOMIC);
487 if(c==NULL)
488 return NULL;
489 memset(c, 0, sizeof(*c));
490 skb_queue_head_init(&c->mfc_un.unres.unresolved);
491 c->mfc_un.unres.expires = jiffies + 10*HZ;
492 return c;
493}
494
495/*
496 * A cache entry has gone into a resolved state from queued
497 */
498
499static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
500{
501 struct sk_buff *skb;
502
503 /*
504 * Play the pending entries through our router
505 */
506
507 while((skb=__skb_dequeue(&uc->mfc_un.unres.unresolved))) {
508 if (skb->nh.iph->version == 0) {
509 int err;
510 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
511
512 if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
513 nlh->nlmsg_len = skb->tail - (u8*)nlh;
514 } else {
515 nlh->nlmsg_type = NLMSG_ERROR;
516 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
517 skb_trim(skb, nlh->nlmsg_len);
518 ((struct nlmsgerr*)NLMSG_DATA(nlh))->error = -EMSGSIZE;
519 }
520 err = netlink_unicast(rtnl, skb, NETLINK_CB(skb).dst_pid, MSG_DONTWAIT);
521 } else
522 ip_mr_forward(skb, c, 0);
523 }
524}
525
526/*
527 * Bounce a cache query up to mrouted. We could use netlink for this but mrouted
528 * expects the following bizarre scheme.
529 *
530 * Called under mrt_lock.
531 */
532
533static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
534{
535 struct sk_buff *skb;
536 int ihl = pkt->nh.iph->ihl<<2;
537 struct igmphdr *igmp;
538 struct igmpmsg *msg;
539 int ret;
540
541#ifdef CONFIG_IP_PIMSM
542 if (assert == IGMPMSG_WHOLEPKT)
543 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
544 else
545#endif
546 skb = alloc_skb(128, GFP_ATOMIC);
547
548 if(!skb)
549 return -ENOBUFS;
550
551#ifdef CONFIG_IP_PIMSM
552 if (assert == IGMPMSG_WHOLEPKT) {
553 /* Ugly, but we have no choice with this interface.
554 Duplicate old header, fix ihl, length etc.
555 And all this only to mangle msg->im_msgtype and
556 to set msg->im_mbz to "mbz" :-)
557 */
558 msg = (struct igmpmsg*)skb_push(skb, sizeof(struct iphdr));
559 skb->nh.raw = skb->h.raw = (u8*)msg;
560 memcpy(msg, pkt->nh.raw, sizeof(struct iphdr));
561 msg->im_msgtype = IGMPMSG_WHOLEPKT;
562 msg->im_mbz = 0;
563 msg->im_vif = reg_vif_num;
564 skb->nh.iph->ihl = sizeof(struct iphdr) >> 2;
565 skb->nh.iph->tot_len = htons(ntohs(pkt->nh.iph->tot_len) + sizeof(struct iphdr));
566 } else
567#endif
568 {
569
570 /*
571 * Copy the IP header
572 */
573
574 skb->nh.iph = (struct iphdr *)skb_put(skb, ihl);
575 memcpy(skb->data,pkt->data,ihl);
576 skb->nh.iph->protocol = 0; /* Flag to the kernel this is a route add */
577 msg = (struct igmpmsg*)skb->nh.iph;
578 msg->im_vif = vifi;
579 skb->dst = dst_clone(pkt->dst);
580
581 /*
582 * Add our header
583 */
584
585 igmp=(struct igmphdr *)skb_put(skb,sizeof(struct igmphdr));
586 igmp->type =
587 msg->im_msgtype = assert;
588 igmp->code = 0;
589 skb->nh.iph->tot_len=htons(skb->len); /* Fix the length */
590 skb->h.raw = skb->nh.raw;
591 }
592
593 if (mroute_socket == NULL) {
594 kfree_skb(skb);
595 return -EINVAL;
596 }
597
598 /*
599 * Deliver to mrouted
600 */
601 if ((ret=sock_queue_rcv_skb(mroute_socket,skb))<0) {
602 if (net_ratelimit())
603 printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
604 kfree_skb(skb);
605 }
606
607 return ret;
608}
609
610/*
611 * Queue a packet for resolution. It gets locked cache entry!
612 */
613
614static int
615ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb)
616{
617 int err;
618 struct mfc_cache *c;
619
620 spin_lock_bh(&mfc_unres_lock);
621 for (c=mfc_unres_queue; c; c=c->next) {
622 if (c->mfc_mcastgrp == skb->nh.iph->daddr &&
623 c->mfc_origin == skb->nh.iph->saddr)
624 break;
625 }
626
627 if (c == NULL) {
628 /*
629 * Create a new entry if allowable
630 */
631
632 if (atomic_read(&cache_resolve_queue_len)>=10 ||
633 (c=ipmr_cache_alloc_unres())==NULL) {
634 spin_unlock_bh(&mfc_unres_lock);
635
636 kfree_skb(skb);
637 return -ENOBUFS;
638 }
639
640 /*
641 * Fill in the new cache entry
642 */
643 c->mfc_parent=-1;
644 c->mfc_origin=skb->nh.iph->saddr;
645 c->mfc_mcastgrp=skb->nh.iph->daddr;
646
647 /*
648 * Reflect first query at mrouted.
649 */
650 if ((err = ipmr_cache_report(skb, vifi, IGMPMSG_NOCACHE))<0) {
651 /* If the report failed throw the cache entry
652 out - Brad Parker
653 */
654 spin_unlock_bh(&mfc_unres_lock);
655
656 kmem_cache_free(mrt_cachep, c);
657 kfree_skb(skb);
658 return err;
659 }
660
661 atomic_inc(&cache_resolve_queue_len);
662 c->next = mfc_unres_queue;
663 mfc_unres_queue = c;
664
665 mod_timer(&ipmr_expire_timer, c->mfc_un.unres.expires);
666 }
667
668 /*
669 * See if we can append the packet
670 */
671 if (c->mfc_un.unres.unresolved.qlen>3) {
672 kfree_skb(skb);
673 err = -ENOBUFS;
674 } else {
675 skb_queue_tail(&c->mfc_un.unres.unresolved,skb);
676 err = 0;
677 }
678
679 spin_unlock_bh(&mfc_unres_lock);
680 return err;
681}
682
683/*
684 * MFC cache manipulation by user space mroute daemon
685 */
686
687static int ipmr_mfc_delete(struct mfcctl *mfc)
688{
689 int line;
690 struct mfc_cache *c, **cp;
691
692 line=MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
693
694 for (cp=&mfc_cache_array[line]; (c=*cp) != NULL; cp = &c->next) {
695 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
696 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
697 write_lock_bh(&mrt_lock);
698 *cp = c->next;
699 write_unlock_bh(&mrt_lock);
700
701 kmem_cache_free(mrt_cachep, c);
702 return 0;
703 }
704 }
705 return -ENOENT;
706}
707
708static int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock)
709{
710 int line;
711 struct mfc_cache *uc, *c, **cp;
712
713 line=MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
714
715 for (cp=&mfc_cache_array[line]; (c=*cp) != NULL; cp = &c->next) {
716 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
717 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr)
718 break;
719 }
720
721 if (c != NULL) {
722 write_lock_bh(&mrt_lock);
723 c->mfc_parent = mfc->mfcc_parent;
724 ipmr_update_threshoulds(c, mfc->mfcc_ttls);
725 if (!mrtsock)
726 c->mfc_flags |= MFC_STATIC;
727 write_unlock_bh(&mrt_lock);
728 return 0;
729 }
730
731 if(!MULTICAST(mfc->mfcc_mcastgrp.s_addr))
732 return -EINVAL;
733
734 c=ipmr_cache_alloc();
735 if (c==NULL)
736 return -ENOMEM;
737
738 c->mfc_origin=mfc->mfcc_origin.s_addr;
739 c->mfc_mcastgrp=mfc->mfcc_mcastgrp.s_addr;
740 c->mfc_parent=mfc->mfcc_parent;
741 ipmr_update_threshoulds(c, mfc->mfcc_ttls);
742 if (!mrtsock)
743 c->mfc_flags |= MFC_STATIC;
744
745 write_lock_bh(&mrt_lock);
746 c->next = mfc_cache_array[line];
747 mfc_cache_array[line] = c;
748 write_unlock_bh(&mrt_lock);
749
750 /*
751 * Check to see if we resolved a queued list. If so we
752 * need to send on the frames and tidy up.
753 */
754 spin_lock_bh(&mfc_unres_lock);
755 for (cp = &mfc_unres_queue; (uc=*cp) != NULL;
756 cp = &uc->next) {
757 if (uc->mfc_origin == c->mfc_origin &&
758 uc->mfc_mcastgrp == c->mfc_mcastgrp) {
759 *cp = uc->next;
760 if (atomic_dec_and_test(&cache_resolve_queue_len))
761 del_timer(&ipmr_expire_timer);
762 break;
763 }
764 }
765 spin_unlock_bh(&mfc_unres_lock);
766
767 if (uc) {
768 ipmr_cache_resolve(uc, c);
769 kmem_cache_free(mrt_cachep, uc);
770 }
771 return 0;
772}
773
774/*
775 * Close the multicast socket, and clear the vif tables etc
776 */
777
778static void mroute_clean_tables(struct sock *sk)
779{
780 int i;
781
782 /*
783 * Shut down all active vif entries
784 */
785 for(i=0; i<maxvif; i++) {
786 if (!(vif_table[i].flags&VIFF_STATIC))
787 vif_delete(i);
788 }
789
790 /*
791 * Wipe the cache
792 */
793 for (i=0;i<MFC_LINES;i++) {
794 struct mfc_cache *c, **cp;
795
796 cp = &mfc_cache_array[i];
797 while ((c = *cp) != NULL) {
798 if (c->mfc_flags&MFC_STATIC) {
799 cp = &c->next;
800 continue;
801 }
802 write_lock_bh(&mrt_lock);
803 *cp = c->next;
804 write_unlock_bh(&mrt_lock);
805
806 kmem_cache_free(mrt_cachep, c);
807 }
808 }
809
810 if (atomic_read(&cache_resolve_queue_len) != 0) {
811 struct mfc_cache *c;
812
813 spin_lock_bh(&mfc_unres_lock);
814 while (mfc_unres_queue != NULL) {
815 c = mfc_unres_queue;
816 mfc_unres_queue = c->next;
817 spin_unlock_bh(&mfc_unres_lock);
818
819 ipmr_destroy_unres(c);
820
821 spin_lock_bh(&mfc_unres_lock);
822 }
823 spin_unlock_bh(&mfc_unres_lock);
824 }
825}
826
827static void mrtsock_destruct(struct sock *sk)
828{
829 rtnl_lock();
830 if (sk == mroute_socket) {
831 ipv4_devconf.mc_forwarding--;
832
833 write_lock_bh(&mrt_lock);
834 mroute_socket=NULL;
835 write_unlock_bh(&mrt_lock);
836
837 mroute_clean_tables(sk);
838 }
839 rtnl_unlock();
840}
841
842/*
843 * Socket options and virtual interface manipulation. The whole
844 * virtual interface system is a complete heap, but unfortunately
845 * that's how BSD mrouted happens to think. Maybe one day with a proper
846 * MOSPF/PIM router set up we can clean this up.
847 */
848
849int ip_mroute_setsockopt(struct sock *sk,int optname,char __user *optval,int optlen)
850{
851 int ret;
852 struct vifctl vif;
853 struct mfcctl mfc;
854
855 if(optname!=MRT_INIT)
856 {
857 if(sk!=mroute_socket && !capable(CAP_NET_ADMIN))
858 return -EACCES;
859 }
860
861 switch(optname)
862 {
863 case MRT_INIT:
864 if (sk->sk_type != SOCK_RAW ||
865 inet_sk(sk)->num != IPPROTO_IGMP)
866 return -EOPNOTSUPP;
867 if(optlen!=sizeof(int))
868 return -ENOPROTOOPT;
869
870 rtnl_lock();
871 if (mroute_socket) {
872 rtnl_unlock();
873 return -EADDRINUSE;
874 }
875
876 ret = ip_ra_control(sk, 1, mrtsock_destruct);
877 if (ret == 0) {
878 write_lock_bh(&mrt_lock);
879 mroute_socket=sk;
880 write_unlock_bh(&mrt_lock);
881
882 ipv4_devconf.mc_forwarding++;
883 }
884 rtnl_unlock();
885 return ret;
886 case MRT_DONE:
887 if (sk!=mroute_socket)
888 return -EACCES;
889 return ip_ra_control(sk, 0, NULL);
890 case MRT_ADD_VIF:
891 case MRT_DEL_VIF:
892 if(optlen!=sizeof(vif))
893 return -EINVAL;
894 if (copy_from_user(&vif,optval,sizeof(vif)))
895 return -EFAULT;
896 if(vif.vifc_vifi >= MAXVIFS)
897 return -ENFILE;
898 rtnl_lock();
899 if (optname==MRT_ADD_VIF) {
900 ret = vif_add(&vif, sk==mroute_socket);
901 } else {
902 ret = vif_delete(vif.vifc_vifi);
903 }
904 rtnl_unlock();
905 return ret;
906
907 /*
908 * Manipulate the forwarding caches. These live
909 * in a sort of kernel/user symbiosis.
910 */
911 case MRT_ADD_MFC:
912 case MRT_DEL_MFC:
913 if(optlen!=sizeof(mfc))
914 return -EINVAL;
915 if (copy_from_user(&mfc,optval, sizeof(mfc)))
916 return -EFAULT;
917 rtnl_lock();
918 if (optname==MRT_DEL_MFC)
919 ret = ipmr_mfc_delete(&mfc);
920 else
921 ret = ipmr_mfc_add(&mfc, sk==mroute_socket);
922 rtnl_unlock();
923 return ret;
924 /*
925 * Control PIM assert.
926 */
927 case MRT_ASSERT:
928 {
929 int v;
930 if(get_user(v,(int __user *)optval))
931 return -EFAULT;
932 mroute_do_assert=(v)?1:0;
933 return 0;
934 }
935#ifdef CONFIG_IP_PIMSM
936 case MRT_PIM:
937 {
938 int v, ret;
939 if(get_user(v,(int __user *)optval))
940 return -EFAULT;
941 v = (v)?1:0;
942 rtnl_lock();
943 ret = 0;
944 if (v != mroute_do_pim) {
945 mroute_do_pim = v;
946 mroute_do_assert = v;
947#ifdef CONFIG_IP_PIMSM_V2
948 if (mroute_do_pim)
949 ret = inet_add_protocol(&pim_protocol,
950 IPPROTO_PIM);
951 else
952 ret = inet_del_protocol(&pim_protocol,
953 IPPROTO_PIM);
954 if (ret < 0)
955 ret = -EAGAIN;
956#endif
957 }
958 rtnl_unlock();
959 return ret;
960 }
961#endif
962 /*
963 * Spurious command, or MRT_VERSION which you cannot
964 * set.
965 */
966 default:
967 return -ENOPROTOOPT;
968 }
969}
970
971/*
972 * Getsock opt support for the multicast routing system.
973 */
974
975int ip_mroute_getsockopt(struct sock *sk,int optname,char __user *optval,int __user *optlen)
976{
977 int olr;
978 int val;
979
980 if(optname!=MRT_VERSION &&
981#ifdef CONFIG_IP_PIMSM
982 optname!=MRT_PIM &&
983#endif
984 optname!=MRT_ASSERT)
985 return -ENOPROTOOPT;
986
987 if (get_user(olr, optlen))
988 return -EFAULT;
989
990 olr = min_t(unsigned int, olr, sizeof(int));
991 if (olr < 0)
992 return -EINVAL;
993
994 if(put_user(olr,optlen))
995 return -EFAULT;
996 if(optname==MRT_VERSION)
997 val=0x0305;
998#ifdef CONFIG_IP_PIMSM
999 else if(optname==MRT_PIM)
1000 val=mroute_do_pim;
1001#endif
1002 else
1003 val=mroute_do_assert;
1004 if(copy_to_user(optval,&val,olr))
1005 return -EFAULT;
1006 return 0;
1007}
1008
1009/*
1010 * The IP multicast ioctl support routines.
1011 */
1012
1013int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1014{
1015 struct sioc_sg_req sr;
1016 struct sioc_vif_req vr;
1017 struct vif_device *vif;
1018 struct mfc_cache *c;
1019
1020 switch(cmd)
1021 {
1022 case SIOCGETVIFCNT:
1023 if (copy_from_user(&vr,arg,sizeof(vr)))
1024 return -EFAULT;
1025 if(vr.vifi>=maxvif)
1026 return -EINVAL;
1027 read_lock(&mrt_lock);
1028 vif=&vif_table[vr.vifi];
1029 if(VIF_EXISTS(vr.vifi)) {
1030 vr.icount=vif->pkt_in;
1031 vr.ocount=vif->pkt_out;
1032 vr.ibytes=vif->bytes_in;
1033 vr.obytes=vif->bytes_out;
1034 read_unlock(&mrt_lock);
1035
1036 if (copy_to_user(arg,&vr,sizeof(vr)))
1037 return -EFAULT;
1038 return 0;
1039 }
1040 read_unlock(&mrt_lock);
1041 return -EADDRNOTAVAIL;
1042 case SIOCGETSGCNT:
1043 if (copy_from_user(&sr,arg,sizeof(sr)))
1044 return -EFAULT;
1045
1046 read_lock(&mrt_lock);
1047 c = ipmr_cache_find(sr.src.s_addr, sr.grp.s_addr);
1048 if (c) {
1049 sr.pktcnt = c->mfc_un.res.pkt;
1050 sr.bytecnt = c->mfc_un.res.bytes;
1051 sr.wrong_if = c->mfc_un.res.wrong_if;
1052 read_unlock(&mrt_lock);
1053
1054 if (copy_to_user(arg,&sr,sizeof(sr)))
1055 return -EFAULT;
1056 return 0;
1057 }
1058 read_unlock(&mrt_lock);
1059 return -EADDRNOTAVAIL;
1060 default:
1061 return -ENOIOCTLCMD;
1062 }
1063}
1064
1065
1066static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1067{
1068 struct vif_device *v;
1069 int ct;
1070 if (event != NETDEV_UNREGISTER)
1071 return NOTIFY_DONE;
1072 v=&vif_table[0];
1073 for(ct=0;ct<maxvif;ct++,v++) {
1074 if (v->dev==ptr)
1075 vif_delete(ct);
1076 }
1077 return NOTIFY_DONE;
1078}
1079
1080
1081static struct notifier_block ip_mr_notifier={
1082 .notifier_call = ipmr_device_event,
1083};
1084
1085/*
1086 * Encapsulate a packet by attaching a valid IPIP header to it.
1087 * This avoids tunnel drivers and other mess and gives us the speed so
1088 * important for multicast video.
1089 */
1090
1091static void ip_encap(struct sk_buff *skb, u32 saddr, u32 daddr)
1092{
1093 struct iphdr *iph = (struct iphdr *)skb_push(skb,sizeof(struct iphdr));
1094
1095 iph->version = 4;
1096 iph->tos = skb->nh.iph->tos;
1097 iph->ttl = skb->nh.iph->ttl;
1098 iph->frag_off = 0;
1099 iph->daddr = daddr;
1100 iph->saddr = saddr;
1101 iph->protocol = IPPROTO_IPIP;
1102 iph->ihl = 5;
1103 iph->tot_len = htons(skb->len);
1104 ip_select_ident(iph, skb->dst, NULL);
1105 ip_send_check(iph);
1106
1107 skb->h.ipiph = skb->nh.iph;
1108 skb->nh.iph = iph;
1109 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1110 nf_reset(skb);
1111}
1112
1113static inline int ipmr_forward_finish(struct sk_buff *skb)
1114{
1115 struct ip_options * opt = &(IPCB(skb)->opt);
1116
1117 IP_INC_STATS_BH(IPSTATS_MIB_OUTFORWDATAGRAMS);
1118
1119 if (unlikely(opt->optlen))
1120 ip_forward_options(skb);
1121
1122 return dst_output(skb);
1123}
1124
1125/*
1126 * Processing handlers for ipmr_forward
1127 */
1128
1129static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
1130{
1131 struct iphdr *iph = skb->nh.iph;
1132 struct vif_device *vif = &vif_table[vifi];
1133 struct net_device *dev;
1134 struct rtable *rt;
1135 int encap = 0;
1136
1137 if (vif->dev == NULL)
1138 goto out_free;
1139
1140#ifdef CONFIG_IP_PIMSM
1141 if (vif->flags & VIFF_REGISTER) {
1142 vif->pkt_out++;
1143 vif->bytes_out+=skb->len;
1144 ((struct net_device_stats*)vif->dev->priv)->tx_bytes += skb->len;
1145 ((struct net_device_stats*)vif->dev->priv)->tx_packets++;
1146 ipmr_cache_report(skb, vifi, IGMPMSG_WHOLEPKT);
1147 kfree_skb(skb);
1148 return;
1149 }
1150#endif
1151
1152 if (vif->flags&VIFF_TUNNEL) {
1153 struct flowi fl = { .oif = vif->link,
1154 .nl_u = { .ip4_u =
1155 { .daddr = vif->remote,
1156 .saddr = vif->local,
1157 .tos = RT_TOS(iph->tos) } },
1158 .proto = IPPROTO_IPIP };
1159 if (ip_route_output_key(&rt, &fl))
1160 goto out_free;
1161 encap = sizeof(struct iphdr);
1162 } else {
1163 struct flowi fl = { .oif = vif->link,
1164 .nl_u = { .ip4_u =
1165 { .daddr = iph->daddr,
1166 .tos = RT_TOS(iph->tos) } },
1167 .proto = IPPROTO_IPIP };
1168 if (ip_route_output_key(&rt, &fl))
1169 goto out_free;
1170 }
1171
1172 dev = rt->u.dst.dev;
1173
1174 if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) {
1175 /* Do not fragment multicasts. Alas, IPv4 does not
1176 allow to send ICMP, so that packets will disappear
1177 to blackhole.
1178 */
1179
1180 IP_INC_STATS_BH(IPSTATS_MIB_FRAGFAILS);
1181 ip_rt_put(rt);
1182 goto out_free;
1183 }
1184
1185 encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len;
1186
1187 if (skb_cow(skb, encap)) {
1188 ip_rt_put(rt);
1189 goto out_free;
1190 }
1191
1192 vif->pkt_out++;
1193 vif->bytes_out+=skb->len;
1194
1195 dst_release(skb->dst);
1196 skb->dst = &rt->u.dst;
1197 iph = skb->nh.iph;
1198 ip_decrease_ttl(iph);
1199
1200 /* FIXME: forward and output firewalls used to be called here.
1201 * What do we do with netfilter? -- RR */
1202 if (vif->flags & VIFF_TUNNEL) {
1203 ip_encap(skb, vif->local, vif->remote);
1204 /* FIXME: extra output firewall step used to be here. --RR */
1205 ((struct ip_tunnel *)vif->dev->priv)->stat.tx_packets++;
1206 ((struct ip_tunnel *)vif->dev->priv)->stat.tx_bytes+=skb->len;
1207 }
1208
1209 IPCB(skb)->flags |= IPSKB_FORWARDED;
1210
1211 /*
1212 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1213 * not only before forwarding, but after forwarding on all output
1214 * interfaces. It is clear, if mrouter runs a multicasting
1215 * program, it should receive packets not depending to what interface
1216 * program is joined.
1217 * If we will not make it, the program will have to join on all
1218 * interfaces. On the other hand, multihoming host (or router, but
1219 * not mrouter) cannot join to more than one interface - it will
1220 * result in receiving multiple packets.
1221 */
1222 NF_HOOK(PF_INET, NF_IP_FORWARD, skb, skb->dev, dev,
1223 ipmr_forward_finish);
1224 return;
1225
1226out_free:
1227 kfree_skb(skb);
1228 return;
1229}
1230
1231static int ipmr_find_vif(struct net_device *dev)
1232{
1233 int ct;
1234 for (ct=maxvif-1; ct>=0; ct--) {
1235 if (vif_table[ct].dev == dev)
1236 break;
1237 }
1238 return ct;
1239}
1240
1241/* "local" means that we should preserve one skb (for local delivery) */
1242
1243static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local)
1244{
1245 int psend = -1;
1246 int vif, ct;
1247
1248 vif = cache->mfc_parent;
1249 cache->mfc_un.res.pkt++;
1250 cache->mfc_un.res.bytes += skb->len;
1251
1252 /*
1253 * Wrong interface: drop packet and (maybe) send PIM assert.
1254 */
1255 if (vif_table[vif].dev != skb->dev) {
1256 int true_vifi;
1257
1258 if (((struct rtable*)skb->dst)->fl.iif == 0) {
1259 /* It is our own packet, looped back.
1260 Very complicated situation...
1261
1262 The best workaround until routing daemons will be
1263 fixed is not to redistribute packet, if it was
1264 send through wrong interface. It means, that
1265 multicast applications WILL NOT work for
1266 (S,G), which have default multicast route pointing
1267 to wrong oif. In any case, it is not a good
1268 idea to use multicasting applications on router.
1269 */
1270 goto dont_forward;
1271 }
1272
1273 cache->mfc_un.res.wrong_if++;
1274 true_vifi = ipmr_find_vif(skb->dev);
1275
1276 if (true_vifi >= 0 && mroute_do_assert &&
1277 /* pimsm uses asserts, when switching from RPT to SPT,
1278 so that we cannot check that packet arrived on an oif.
1279 It is bad, but otherwise we would need to move pretty
1280 large chunk of pimd to kernel. Ough... --ANK
1281 */
1282 (mroute_do_pim || cache->mfc_un.res.ttls[true_vifi] < 255) &&
1283 time_after(jiffies,
1284 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1285 cache->mfc_un.res.last_assert = jiffies;
1286 ipmr_cache_report(skb, true_vifi, IGMPMSG_WRONGVIF);
1287 }
1288 goto dont_forward;
1289 }
1290
1291 vif_table[vif].pkt_in++;
1292 vif_table[vif].bytes_in+=skb->len;
1293
1294 /*
1295 * Forward the frame
1296 */
1297 for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
1298 if (skb->nh.iph->ttl > cache->mfc_un.res.ttls[ct]) {
1299 if (psend != -1) {
1300 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1301 if (skb2)
1302 ipmr_queue_xmit(skb2, cache, psend);
1303 }
1304 psend=ct;
1305 }
1306 }
1307 if (psend != -1) {
1308 if (local) {
1309 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1310 if (skb2)
1311 ipmr_queue_xmit(skb2, cache, psend);
1312 } else {
1313 ipmr_queue_xmit(skb, cache, psend);
1314 return 0;
1315 }
1316 }
1317
1318dont_forward:
1319 if (!local)
1320 kfree_skb(skb);
1321 return 0;
1322}
1323
1324
1325/*
1326 * Multicast packets for forwarding arrive here
1327 */
1328
1329int ip_mr_input(struct sk_buff *skb)
1330{
1331 struct mfc_cache *cache;
1332 int local = ((struct rtable*)skb->dst)->rt_flags&RTCF_LOCAL;
1333
1334 /* Packet is looped back after forward, it should not be
1335 forwarded second time, but still can be delivered locally.
1336 */
1337 if (IPCB(skb)->flags&IPSKB_FORWARDED)
1338 goto dont_forward;
1339
1340 if (!local) {
1341 if (IPCB(skb)->opt.router_alert) {
1342 if (ip_call_ra_chain(skb))
1343 return 0;
1344 } else if (skb->nh.iph->protocol == IPPROTO_IGMP){
1345 /* IGMPv1 (and broken IGMPv2 implementations sort of
1346 Cisco IOS <= 11.2(8)) do not put router alert
1347 option to IGMP packets destined to routable
1348 groups. It is very bad, because it means
1349 that we can forward NO IGMP messages.
1350 */
1351 read_lock(&mrt_lock);
1352 if (mroute_socket) {
Patrick McHardy2715bcf2005-06-21 14:06:24 -07001353 nf_reset(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001354 raw_rcv(mroute_socket, skb);
1355 read_unlock(&mrt_lock);
1356 return 0;
1357 }
1358 read_unlock(&mrt_lock);
1359 }
1360 }
1361
1362 read_lock(&mrt_lock);
1363 cache = ipmr_cache_find(skb->nh.iph->saddr, skb->nh.iph->daddr);
1364
1365 /*
1366 * No usable cache entry
1367 */
1368 if (cache==NULL) {
1369 int vif;
1370
1371 if (local) {
1372 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1373 ip_local_deliver(skb);
1374 if (skb2 == NULL) {
1375 read_unlock(&mrt_lock);
1376 return -ENOBUFS;
1377 }
1378 skb = skb2;
1379 }
1380
1381 vif = ipmr_find_vif(skb->dev);
1382 if (vif >= 0) {
1383 int err = ipmr_cache_unresolved(vif, skb);
1384 read_unlock(&mrt_lock);
1385
1386 return err;
1387 }
1388 read_unlock(&mrt_lock);
1389 kfree_skb(skb);
1390 return -ENODEV;
1391 }
1392
1393 ip_mr_forward(skb, cache, local);
1394
1395 read_unlock(&mrt_lock);
1396
1397 if (local)
1398 return ip_local_deliver(skb);
1399
1400 return 0;
1401
1402dont_forward:
1403 if (local)
1404 return ip_local_deliver(skb);
1405 kfree_skb(skb);
1406 return 0;
1407}
1408
1409#ifdef CONFIG_IP_PIMSM_V1
1410/*
1411 * Handle IGMP messages of PIMv1
1412 */
1413
1414int pim_rcv_v1(struct sk_buff * skb)
1415{
1416 struct igmphdr *pim;
1417 struct iphdr *encap;
1418 struct net_device *reg_dev = NULL;
1419
1420 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
1421 goto drop;
1422
1423 pim = (struct igmphdr*)skb->h.raw;
1424
1425 if (!mroute_do_pim ||
1426 skb->len < sizeof(*pim) + sizeof(*encap) ||
1427 pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
1428 goto drop;
1429
1430 encap = (struct iphdr*)(skb->h.raw + sizeof(struct igmphdr));
1431 /*
1432 Check that:
1433 a. packet is really destinted to a multicast group
1434 b. packet is not a NULL-REGISTER
1435 c. packet is not truncated
1436 */
1437 if (!MULTICAST(encap->daddr) ||
1438 encap->tot_len == 0 ||
1439 ntohs(encap->tot_len) + sizeof(*pim) > skb->len)
1440 goto drop;
1441
1442 read_lock(&mrt_lock);
1443 if (reg_vif_num >= 0)
1444 reg_dev = vif_table[reg_vif_num].dev;
1445 if (reg_dev)
1446 dev_hold(reg_dev);
1447 read_unlock(&mrt_lock);
1448
1449 if (reg_dev == NULL)
1450 goto drop;
1451
1452 skb->mac.raw = skb->nh.raw;
1453 skb_pull(skb, (u8*)encap - skb->data);
1454 skb->nh.iph = (struct iphdr *)skb->data;
1455 skb->dev = reg_dev;
1456 memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
1457 skb->protocol = htons(ETH_P_IP);
1458 skb->ip_summed = 0;
1459 skb->pkt_type = PACKET_HOST;
1460 dst_release(skb->dst);
1461 skb->dst = NULL;
1462 ((struct net_device_stats*)reg_dev->priv)->rx_bytes += skb->len;
1463 ((struct net_device_stats*)reg_dev->priv)->rx_packets++;
1464 nf_reset(skb);
1465 netif_rx(skb);
1466 dev_put(reg_dev);
1467 return 0;
1468 drop:
1469 kfree_skb(skb);
1470 return 0;
1471}
1472#endif
1473
1474#ifdef CONFIG_IP_PIMSM_V2
1475static int pim_rcv(struct sk_buff * skb)
1476{
1477 struct pimreghdr *pim;
1478 struct iphdr *encap;
1479 struct net_device *reg_dev = NULL;
1480
1481 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
1482 goto drop;
1483
1484 pim = (struct pimreghdr*)skb->h.raw;
1485 if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
1486 (pim->flags&PIM_NULL_REGISTER) ||
1487 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
1488 (u16)csum_fold(skb_checksum(skb, 0, skb->len, 0))))
1489 goto drop;
1490
1491 /* check if the inner packet is destined to mcast group */
1492 encap = (struct iphdr*)(skb->h.raw + sizeof(struct pimreghdr));
1493 if (!MULTICAST(encap->daddr) ||
1494 encap->tot_len == 0 ||
1495 ntohs(encap->tot_len) + sizeof(*pim) > skb->len)
1496 goto drop;
1497
1498 read_lock(&mrt_lock);
1499 if (reg_vif_num >= 0)
1500 reg_dev = vif_table[reg_vif_num].dev;
1501 if (reg_dev)
1502 dev_hold(reg_dev);
1503 read_unlock(&mrt_lock);
1504
1505 if (reg_dev == NULL)
1506 goto drop;
1507
1508 skb->mac.raw = skb->nh.raw;
1509 skb_pull(skb, (u8*)encap - skb->data);
1510 skb->nh.iph = (struct iphdr *)skb->data;
1511 skb->dev = reg_dev;
1512 memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
1513 skb->protocol = htons(ETH_P_IP);
1514 skb->ip_summed = 0;
1515 skb->pkt_type = PACKET_HOST;
1516 dst_release(skb->dst);
1517 ((struct net_device_stats*)reg_dev->priv)->rx_bytes += skb->len;
1518 ((struct net_device_stats*)reg_dev->priv)->rx_packets++;
1519 skb->dst = NULL;
1520 nf_reset(skb);
1521 netif_rx(skb);
1522 dev_put(reg_dev);
1523 return 0;
1524 drop:
1525 kfree_skb(skb);
1526 return 0;
1527}
1528#endif
1529
1530static int
1531ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm)
1532{
1533 int ct;
1534 struct rtnexthop *nhp;
1535 struct net_device *dev = vif_table[c->mfc_parent].dev;
1536 u8 *b = skb->tail;
1537 struct rtattr *mp_head;
1538
1539 if (dev)
1540 RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex);
1541
1542 mp_head = (struct rtattr*)skb_put(skb, RTA_LENGTH(0));
1543
1544 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1545 if (c->mfc_un.res.ttls[ct] < 255) {
1546 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1547 goto rtattr_failure;
1548 nhp = (struct rtnexthop*)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1549 nhp->rtnh_flags = 0;
1550 nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
1551 nhp->rtnh_ifindex = vif_table[ct].dev->ifindex;
1552 nhp->rtnh_len = sizeof(*nhp);
1553 }
1554 }
1555 mp_head->rta_type = RTA_MULTIPATH;
1556 mp_head->rta_len = skb->tail - (u8*)mp_head;
1557 rtm->rtm_type = RTN_MULTICAST;
1558 return 1;
1559
1560rtattr_failure:
1561 skb_trim(skb, b - skb->data);
1562 return -EMSGSIZE;
1563}
1564
1565int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1566{
1567 int err;
1568 struct mfc_cache *cache;
1569 struct rtable *rt = (struct rtable*)skb->dst;
1570
1571 read_lock(&mrt_lock);
1572 cache = ipmr_cache_find(rt->rt_src, rt->rt_dst);
1573
1574 if (cache==NULL) {
1575 struct net_device *dev;
1576 int vif;
1577
1578 if (nowait) {
1579 read_unlock(&mrt_lock);
1580 return -EAGAIN;
1581 }
1582
1583 dev = skb->dev;
1584 if (dev == NULL || (vif = ipmr_find_vif(dev)) < 0) {
1585 read_unlock(&mrt_lock);
1586 return -ENODEV;
1587 }
1588 skb->nh.raw = skb_push(skb, sizeof(struct iphdr));
1589 skb->nh.iph->ihl = sizeof(struct iphdr)>>2;
1590 skb->nh.iph->saddr = rt->rt_src;
1591 skb->nh.iph->daddr = rt->rt_dst;
1592 skb->nh.iph->version = 0;
1593 err = ipmr_cache_unresolved(vif, skb);
1594 read_unlock(&mrt_lock);
1595 return err;
1596 }
1597
1598 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1599 cache->mfc_flags |= MFC_NOTIFY;
1600 err = ipmr_fill_mroute(skb, cache, rtm);
1601 read_unlock(&mrt_lock);
1602 return err;
1603}
1604
1605#ifdef CONFIG_PROC_FS
1606/*
1607 * The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
1608 */
1609struct ipmr_vif_iter {
1610 int ct;
1611};
1612
1613static struct vif_device *ipmr_vif_seq_idx(struct ipmr_vif_iter *iter,
1614 loff_t pos)
1615{
1616 for (iter->ct = 0; iter->ct < maxvif; ++iter->ct) {
1617 if(!VIF_EXISTS(iter->ct))
1618 continue;
1619 if (pos-- == 0)
1620 return &vif_table[iter->ct];
1621 }
1622 return NULL;
1623}
1624
1625static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
1626{
1627 read_lock(&mrt_lock);
1628 return *pos ? ipmr_vif_seq_idx(seq->private, *pos - 1)
1629 : SEQ_START_TOKEN;
1630}
1631
1632static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1633{
1634 struct ipmr_vif_iter *iter = seq->private;
1635
1636 ++*pos;
1637 if (v == SEQ_START_TOKEN)
1638 return ipmr_vif_seq_idx(iter, 0);
1639
1640 while (++iter->ct < maxvif) {
1641 if(!VIF_EXISTS(iter->ct))
1642 continue;
1643 return &vif_table[iter->ct];
1644 }
1645 return NULL;
1646}
1647
1648static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
1649{
1650 read_unlock(&mrt_lock);
1651}
1652
1653static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
1654{
1655 if (v == SEQ_START_TOKEN) {
1656 seq_puts(seq,
1657 "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n");
1658 } else {
1659 const struct vif_device *vif = v;
1660 const char *name = vif->dev ? vif->dev->name : "none";
1661
1662 seq_printf(seq,
1663 "%2Zd %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n",
1664 vif - vif_table,
1665 name, vif->bytes_in, vif->pkt_in,
1666 vif->bytes_out, vif->pkt_out,
1667 vif->flags, vif->local, vif->remote);
1668 }
1669 return 0;
1670}
1671
1672static struct seq_operations ipmr_vif_seq_ops = {
1673 .start = ipmr_vif_seq_start,
1674 .next = ipmr_vif_seq_next,
1675 .stop = ipmr_vif_seq_stop,
1676 .show = ipmr_vif_seq_show,
1677};
1678
1679static int ipmr_vif_open(struct inode *inode, struct file *file)
1680{
1681 struct seq_file *seq;
1682 int rc = -ENOMEM;
1683 struct ipmr_vif_iter *s = kmalloc(sizeof(*s), GFP_KERNEL);
1684
1685 if (!s)
1686 goto out;
1687
1688 rc = seq_open(file, &ipmr_vif_seq_ops);
1689 if (rc)
1690 goto out_kfree;
1691
1692 s->ct = 0;
1693 seq = file->private_data;
1694 seq->private = s;
1695out:
1696 return rc;
1697out_kfree:
1698 kfree(s);
1699 goto out;
1700
1701}
1702
1703static struct file_operations ipmr_vif_fops = {
1704 .owner = THIS_MODULE,
1705 .open = ipmr_vif_open,
1706 .read = seq_read,
1707 .llseek = seq_lseek,
1708 .release = seq_release_private,
1709};
1710
1711struct ipmr_mfc_iter {
1712 struct mfc_cache **cache;
1713 int ct;
1714};
1715
1716
1717static struct mfc_cache *ipmr_mfc_seq_idx(struct ipmr_mfc_iter *it, loff_t pos)
1718{
1719 struct mfc_cache *mfc;
1720
1721 it->cache = mfc_cache_array;
1722 read_lock(&mrt_lock);
1723 for (it->ct = 0; it->ct < MFC_LINES; it->ct++)
1724 for(mfc = mfc_cache_array[it->ct]; mfc; mfc = mfc->next)
1725 if (pos-- == 0)
1726 return mfc;
1727 read_unlock(&mrt_lock);
1728
1729 it->cache = &mfc_unres_queue;
1730 spin_lock_bh(&mfc_unres_lock);
1731 for(mfc = mfc_unres_queue; mfc; mfc = mfc->next)
1732 if (pos-- == 0)
1733 return mfc;
1734 spin_unlock_bh(&mfc_unres_lock);
1735
1736 it->cache = NULL;
1737 return NULL;
1738}
1739
1740
1741static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
1742{
1743 struct ipmr_mfc_iter *it = seq->private;
1744 it->cache = NULL;
1745 it->ct = 0;
1746 return *pos ? ipmr_mfc_seq_idx(seq->private, *pos - 1)
1747 : SEQ_START_TOKEN;
1748}
1749
1750static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1751{
1752 struct mfc_cache *mfc = v;
1753 struct ipmr_mfc_iter *it = seq->private;
1754
1755 ++*pos;
1756
1757 if (v == SEQ_START_TOKEN)
1758 return ipmr_mfc_seq_idx(seq->private, 0);
1759
1760 if (mfc->next)
1761 return mfc->next;
1762
1763 if (it->cache == &mfc_unres_queue)
1764 goto end_of_list;
1765
1766 BUG_ON(it->cache != mfc_cache_array);
1767
1768 while (++it->ct < MFC_LINES) {
1769 mfc = mfc_cache_array[it->ct];
1770 if (mfc)
1771 return mfc;
1772 }
1773
1774 /* exhausted cache_array, show unresolved */
1775 read_unlock(&mrt_lock);
1776 it->cache = &mfc_unres_queue;
1777 it->ct = 0;
1778
1779 spin_lock_bh(&mfc_unres_lock);
1780 mfc = mfc_unres_queue;
1781 if (mfc)
1782 return mfc;
1783
1784 end_of_list:
1785 spin_unlock_bh(&mfc_unres_lock);
1786 it->cache = NULL;
1787
1788 return NULL;
1789}
1790
1791static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
1792{
1793 struct ipmr_mfc_iter *it = seq->private;
1794
1795 if (it->cache == &mfc_unres_queue)
1796 spin_unlock_bh(&mfc_unres_lock);
1797 else if (it->cache == mfc_cache_array)
1798 read_unlock(&mrt_lock);
1799}
1800
1801static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
1802{
1803 int n;
1804
1805 if (v == SEQ_START_TOKEN) {
1806 seq_puts(seq,
1807 "Group Origin Iif Pkts Bytes Wrong Oifs\n");
1808 } else {
1809 const struct mfc_cache *mfc = v;
1810 const struct ipmr_mfc_iter *it = seq->private;
1811
1812 seq_printf(seq, "%08lX %08lX %-3d %8ld %8ld %8ld",
1813 (unsigned long) mfc->mfc_mcastgrp,
1814 (unsigned long) mfc->mfc_origin,
1815 mfc->mfc_parent,
1816 mfc->mfc_un.res.pkt,
1817 mfc->mfc_un.res.bytes,
1818 mfc->mfc_un.res.wrong_if);
1819
1820 if (it->cache != &mfc_unres_queue) {
1821 for(n = mfc->mfc_un.res.minvif;
1822 n < mfc->mfc_un.res.maxvif; n++ ) {
1823 if(VIF_EXISTS(n)
1824 && mfc->mfc_un.res.ttls[n] < 255)
1825 seq_printf(seq,
1826 " %2d:%-3d",
1827 n, mfc->mfc_un.res.ttls[n]);
1828 }
1829 }
1830 seq_putc(seq, '\n');
1831 }
1832 return 0;
1833}
1834
1835static struct seq_operations ipmr_mfc_seq_ops = {
1836 .start = ipmr_mfc_seq_start,
1837 .next = ipmr_mfc_seq_next,
1838 .stop = ipmr_mfc_seq_stop,
1839 .show = ipmr_mfc_seq_show,
1840};
1841
1842static int ipmr_mfc_open(struct inode *inode, struct file *file)
1843{
1844 struct seq_file *seq;
1845 int rc = -ENOMEM;
1846 struct ipmr_mfc_iter *s = kmalloc(sizeof(*s), GFP_KERNEL);
1847
1848 if (!s)
1849 goto out;
1850
1851 rc = seq_open(file, &ipmr_mfc_seq_ops);
1852 if (rc)
1853 goto out_kfree;
1854
1855 seq = file->private_data;
1856 seq->private = s;
1857out:
1858 return rc;
1859out_kfree:
1860 kfree(s);
1861 goto out;
1862
1863}
1864
1865static struct file_operations ipmr_mfc_fops = {
1866 .owner = THIS_MODULE,
1867 .open = ipmr_mfc_open,
1868 .read = seq_read,
1869 .llseek = seq_lseek,
1870 .release = seq_release_private,
1871};
1872#endif
1873
1874#ifdef CONFIG_IP_PIMSM_V2
1875static struct net_protocol pim_protocol = {
1876 .handler = pim_rcv,
1877};
1878#endif
1879
1880
1881/*
1882 * Setup for IP multicast routing
1883 */
1884
1885void __init ip_mr_init(void)
1886{
1887 mrt_cachep = kmem_cache_create("ip_mrt_cache",
1888 sizeof(struct mfc_cache),
1889 0, SLAB_HWCACHE_ALIGN,
1890 NULL, NULL);
1891 if (!mrt_cachep)
1892 panic("cannot allocate ip_mrt_cache");
1893
1894 init_timer(&ipmr_expire_timer);
1895 ipmr_expire_timer.function=ipmr_expire_process;
1896 register_netdevice_notifier(&ip_mr_notifier);
1897#ifdef CONFIG_PROC_FS
1898 proc_net_fops_create("ip_mr_vif", 0, &ipmr_vif_fops);
1899 proc_net_fops_create("ip_mr_cache", 0, &ipmr_mfc_fops);
1900#endif
1901}