blob: 2f4d8afd067ccaa4f4f6b90a7d75091913ca6722 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * IP multicast routing support for mrouted 3.6/3.8
3 *
4 * (c) 1995 Alan Cox, <alan@redhat.com>
5 * Linux Consultancy and Custom Driver Development
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 *
Linus Torvalds1da177e2005-04-16 15:20:36 -070012 * Fixes:
13 * Michael Chastain : Incorrect size of copying.
14 * Alan Cox : Added the cache manager code
15 * Alan Cox : Fixed the clone/copy bug and device race.
16 * Mike McLagan : Routing by source
17 * Malcolm Beattie : Buffer handling fixes.
18 * Alexey Kuznetsov : Double buffer free and other fixes.
19 * SVR Anand : Fixed several multicast bugs and problems.
20 * Alexey Kuznetsov : Status, optimisations and more.
21 * Brad Parker : Better behaviour on mrouted upcall
22 * overflow.
23 * Carlos Picoto : PIMv1 Support
24 * Pavlin Ivanov Radoslavov: PIMv2 Registers must checksum only PIM header
25 * Relax this requrement to work with older peers.
26 *
27 */
28
Linus Torvalds1da177e2005-04-16 15:20:36 -070029#include <asm/system.h>
30#include <asm/uaccess.h>
31#include <linux/types.h>
Randy Dunlap4fc268d2006-01-11 12:17:47 -080032#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070033#include <linux/errno.h>
34#include <linux/timer.h>
35#include <linux/mm.h>
36#include <linux/kernel.h>
37#include <linux/fcntl.h>
38#include <linux/stat.h>
39#include <linux/socket.h>
40#include <linux/in.h>
41#include <linux/inet.h>
42#include <linux/netdevice.h>
43#include <linux/inetdevice.h>
44#include <linux/igmp.h>
45#include <linux/proc_fs.h>
46#include <linux/seq_file.h>
47#include <linux/mroute.h>
48#include <linux/init.h>
Kris Katterjohn46f25df2006-01-05 16:35:42 -080049#include <linux/if_ether.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020050#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070051#include <net/ip.h>
52#include <net/protocol.h>
53#include <linux/skbuff.h>
Arnaldo Carvalho de Melo14c85022005-12-27 02:43:12 -020054#include <net/route.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070055#include <net/sock.h>
56#include <net/icmp.h>
57#include <net/udp.h>
58#include <net/raw.h>
59#include <linux/notifier.h>
60#include <linux/if_arp.h>
61#include <linux/netfilter_ipv4.h>
62#include <net/ipip.h>
63#include <net/checksum.h>
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -070064#include <net/netlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070065
66#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
67#define CONFIG_IP_PIMSM 1
68#endif
69
70static struct sock *mroute_socket;
71
72
73/* Big lock, protecting vif table, mrt cache and mroute socket state.
74 Note that the changes are semaphored via rtnl_lock.
75 */
76
77static DEFINE_RWLOCK(mrt_lock);
78
79/*
80 * Multicast router control variables
81 */
82
83static struct vif_device vif_table[MAXVIFS]; /* Devices */
84static int maxvif;
85
86#define VIF_EXISTS(idx) (vif_table[idx].dev != NULL)
87
88static int mroute_do_assert; /* Set in PIM assert */
89static int mroute_do_pim;
90
91static struct mfc_cache *mfc_cache_array[MFC_LINES]; /* Forwarding cache */
92
93static struct mfc_cache *mfc_unres_queue; /* Queue of unresolved entries */
94static atomic_t cache_resolve_queue_len; /* Size of unresolved */
95
96/* Special spinlock for queue of unresolved entries */
97static DEFINE_SPINLOCK(mfc_unres_lock);
98
99/* We return to original Alan's scheme. Hash table of resolved
100 entries is changed only in process context and protected
101 with weak lock mrt_lock. Queue of unresolved entries is protected
102 with strong spinlock mfc_unres_lock.
103
104 In this case data path is free of exclusive locks at all.
105 */
106
Christoph Lametere18b8902006-12-06 20:33:20 -0800107static struct kmem_cache *mrt_cachep __read_mostly;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700108
109static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local);
110static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert);
111static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm);
112
113#ifdef CONFIG_IP_PIMSM_V2
114static struct net_protocol pim_protocol;
115#endif
116
117static struct timer_list ipmr_expire_timer;
118
119/* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
120
Wang Chend6070322008-07-14 20:55:26 -0700121static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
122{
123 dev_close(dev);
124
125 dev = __dev_get_by_name(&init_net, "tunl0");
126 if (dev) {
127 struct ifreq ifr;
128 mm_segment_t oldfs;
129 struct ip_tunnel_parm p;
130
131 memset(&p, 0, sizeof(p));
132 p.iph.daddr = v->vifc_rmt_addr.s_addr;
133 p.iph.saddr = v->vifc_lcl_addr.s_addr;
134 p.iph.version = 4;
135 p.iph.ihl = 5;
136 p.iph.protocol = IPPROTO_IPIP;
137 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
138 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
139
140 oldfs = get_fs(); set_fs(KERNEL_DS);
141 dev->do_ioctl(dev, &ifr, SIOCDELTUNNEL);
142 set_fs(oldfs);
143 }
144}
145
Linus Torvalds1da177e2005-04-16 15:20:36 -0700146static
147struct net_device *ipmr_new_tunnel(struct vifctl *v)
148{
149 struct net_device *dev;
150
Eric W. Biederman881d9662007-09-17 11:56:21 -0700151 dev = __dev_get_by_name(&init_net, "tunl0");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700152
153 if (dev) {
154 int err;
155 struct ifreq ifr;
156 mm_segment_t oldfs;
157 struct ip_tunnel_parm p;
158 struct in_device *in_dev;
159
160 memset(&p, 0, sizeof(p));
161 p.iph.daddr = v->vifc_rmt_addr.s_addr;
162 p.iph.saddr = v->vifc_lcl_addr.s_addr;
163 p.iph.version = 4;
164 p.iph.ihl = 5;
165 p.iph.protocol = IPPROTO_IPIP;
166 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
Stephen Hemmingerba93ef72008-01-21 17:28:59 -0800167 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700168
169 oldfs = get_fs(); set_fs(KERNEL_DS);
170 err = dev->do_ioctl(dev, &ifr, SIOCADDTUNNEL);
171 set_fs(oldfs);
172
173 dev = NULL;
174
Eric W. Biederman881d9662007-09-17 11:56:21 -0700175 if (err == 0 && (dev = __dev_get_by_name(&init_net, p.name)) != NULL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700176 dev->flags |= IFF_MULTICAST;
177
Herbert Xue5ed6392005-10-03 14:35:55 -0700178 in_dev = __in_dev_get_rtnl(dev);
Herbert Xu71e27da2007-06-04 23:36:06 -0700179 if (in_dev == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700180 goto failure;
Herbert Xu71e27da2007-06-04 23:36:06 -0700181
182 ipv4_devconf_setall(in_dev);
183 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700184
185 if (dev_open(dev))
186 goto failure;
187 }
188 }
189 return dev;
190
191failure:
192 /* allow the register to be completed before unregistering. */
193 rtnl_unlock();
194 rtnl_lock();
195
196 unregister_netdevice(dev);
197 return NULL;
198}
199
200#ifdef CONFIG_IP_PIMSM
201
202static int reg_vif_num = -1;
203
204static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
205{
206 read_lock(&mrt_lock);
Pavel Emelyanovcf3677a2008-05-21 14:17:33 -0700207 dev->stats.tx_bytes += skb->len;
208 dev->stats.tx_packets++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700209 ipmr_cache_report(skb, reg_vif_num, IGMPMSG_WHOLEPKT);
210 read_unlock(&mrt_lock);
211 kfree_skb(skb);
212 return 0;
213}
214
Linus Torvalds1da177e2005-04-16 15:20:36 -0700215static void reg_vif_setup(struct net_device *dev)
216{
217 dev->type = ARPHRD_PIMREG;
Kris Katterjohn46f25df2006-01-05 16:35:42 -0800218 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700219 dev->flags = IFF_NOARP;
220 dev->hard_start_xmit = reg_vif_xmit;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700221 dev->destructor = free_netdev;
222}
223
224static struct net_device *ipmr_reg_vif(void)
225{
226 struct net_device *dev;
227 struct in_device *in_dev;
228
Pavel Emelyanovcf3677a2008-05-21 14:17:33 -0700229 dev = alloc_netdev(0, "pimreg", reg_vif_setup);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700230
231 if (dev == NULL)
232 return NULL;
233
234 if (register_netdevice(dev)) {
235 free_netdev(dev);
236 return NULL;
237 }
238 dev->iflink = 0;
239
Herbert Xu71e27da2007-06-04 23:36:06 -0700240 rcu_read_lock();
241 if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
242 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700243 goto failure;
Herbert Xu71e27da2007-06-04 23:36:06 -0700244 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700245
Herbert Xu71e27da2007-06-04 23:36:06 -0700246 ipv4_devconf_setall(in_dev);
247 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
248 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700249
250 if (dev_open(dev))
251 goto failure;
252
253 return dev;
254
255failure:
256 /* allow the register to be completed before unregistering. */
257 rtnl_unlock();
258 rtnl_lock();
259
260 unregister_netdevice(dev);
261 return NULL;
262}
263#endif
264
265/*
266 * Delete a VIF entry
267 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900268
Linus Torvalds1da177e2005-04-16 15:20:36 -0700269static int vif_delete(int vifi)
270{
271 struct vif_device *v;
272 struct net_device *dev;
273 struct in_device *in_dev;
274
275 if (vifi < 0 || vifi >= maxvif)
276 return -EADDRNOTAVAIL;
277
278 v = &vif_table[vifi];
279
280 write_lock_bh(&mrt_lock);
281 dev = v->dev;
282 v->dev = NULL;
283
284 if (!dev) {
285 write_unlock_bh(&mrt_lock);
286 return -EADDRNOTAVAIL;
287 }
288
289#ifdef CONFIG_IP_PIMSM
290 if (vifi == reg_vif_num)
291 reg_vif_num = -1;
292#endif
293
294 if (vifi+1 == maxvif) {
295 int tmp;
296 for (tmp=vifi-1; tmp>=0; tmp--) {
297 if (VIF_EXISTS(tmp))
298 break;
299 }
300 maxvif = tmp+1;
301 }
302
303 write_unlock_bh(&mrt_lock);
304
305 dev_set_allmulti(dev, -1);
306
Herbert Xue5ed6392005-10-03 14:35:55 -0700307 if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
Herbert Xu42f811b2007-06-04 23:34:44 -0700308 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700309 ip_rt_multicast_event(in_dev);
310 }
311
312 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
313 unregister_netdevice(dev);
314
315 dev_put(dev);
316 return 0;
317}
318
319/* Destroy an unresolved cache entry, killing queued skbs
320 and reporting error to netlink readers.
321 */
322
323static void ipmr_destroy_unres(struct mfc_cache *c)
324{
325 struct sk_buff *skb;
Patrick McHardy9ef1d4c2005-06-28 12:55:30 -0700326 struct nlmsgerr *e;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700327
328 atomic_dec(&cache_resolve_queue_len);
329
Stephen Hemminger132adf52007-03-08 20:44:43 -0800330 while ((skb=skb_dequeue(&c->mfc_un.unres.unresolved))) {
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700331 if (ip_hdr(skb)->version == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700332 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
333 nlh->nlmsg_type = NLMSG_ERROR;
334 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
335 skb_trim(skb, nlh->nlmsg_len);
Patrick McHardy9ef1d4c2005-06-28 12:55:30 -0700336 e = NLMSG_DATA(nlh);
337 e->error = -ETIMEDOUT;
338 memset(&e->msg, 0, sizeof(e->msg));
Thomas Graf2942e902006-08-15 00:30:25 -0700339
Denis V. Lunev97c53ca2007-11-19 22:26:51 -0800340 rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700341 } else
342 kfree_skb(skb);
343 }
344
345 kmem_cache_free(mrt_cachep, c);
346}
347
348
349/* Single timer process for all the unresolved queue. */
350
351static void ipmr_expire_process(unsigned long dummy)
352{
353 unsigned long now;
354 unsigned long expires;
355 struct mfc_cache *c, **cp;
356
357 if (!spin_trylock(&mfc_unres_lock)) {
358 mod_timer(&ipmr_expire_timer, jiffies+HZ/10);
359 return;
360 }
361
362 if (atomic_read(&cache_resolve_queue_len) == 0)
363 goto out;
364
365 now = jiffies;
366 expires = 10*HZ;
367 cp = &mfc_unres_queue;
368
369 while ((c=*cp) != NULL) {
370 if (time_after(c->mfc_un.unres.expires, now)) {
371 unsigned long interval = c->mfc_un.unres.expires - now;
372 if (interval < expires)
373 expires = interval;
374 cp = &c->next;
375 continue;
376 }
377
378 *cp = c->next;
379
380 ipmr_destroy_unres(c);
381 }
382
383 if (atomic_read(&cache_resolve_queue_len))
384 mod_timer(&ipmr_expire_timer, jiffies + expires);
385
386out:
387 spin_unlock(&mfc_unres_lock);
388}
389
390/* Fill oifs list. It is called under write locked mrt_lock. */
391
Baruch Evend1b04c02005-07-30 17:41:59 -0700392static void ipmr_update_thresholds(struct mfc_cache *cache, unsigned char *ttls)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700393{
394 int vifi;
395
396 cache->mfc_un.res.minvif = MAXVIFS;
397 cache->mfc_un.res.maxvif = 0;
398 memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
399
400 for (vifi=0; vifi<maxvif; vifi++) {
401 if (VIF_EXISTS(vifi) && ttls[vifi] && ttls[vifi] < 255) {
402 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
403 if (cache->mfc_un.res.minvif > vifi)
404 cache->mfc_un.res.minvif = vifi;
405 if (cache->mfc_un.res.maxvif <= vifi)
406 cache->mfc_un.res.maxvif = vifi + 1;
407 }
408 }
409}
410
411static int vif_add(struct vifctl *vifc, int mrtsock)
412{
413 int vifi = vifc->vifc_vifi;
414 struct vif_device *v = &vif_table[vifi];
415 struct net_device *dev;
416 struct in_device *in_dev;
Wang Chend6070322008-07-14 20:55:26 -0700417 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700418
419 /* Is vif busy ? */
420 if (VIF_EXISTS(vifi))
421 return -EADDRINUSE;
422
423 switch (vifc->vifc_flags) {
424#ifdef CONFIG_IP_PIMSM
425 case VIFF_REGISTER:
426 /*
427 * Special Purpose VIF in PIM
428 * All the packets will be sent to the daemon
429 */
430 if (reg_vif_num >= 0)
431 return -EADDRINUSE;
432 dev = ipmr_reg_vif();
433 if (!dev)
434 return -ENOBUFS;
Wang Chend6070322008-07-14 20:55:26 -0700435 err = dev_set_allmulti(dev, 1);
436 if (err) {
437 unregister_netdevice(dev);
438 return err;
439 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700440 break;
441#endif
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900442 case VIFF_TUNNEL:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700443 dev = ipmr_new_tunnel(vifc);
444 if (!dev)
445 return -ENOBUFS;
Wang Chend6070322008-07-14 20:55:26 -0700446 err = dev_set_allmulti(dev, 1);
447 if (err) {
448 ipmr_del_tunnel(dev, vifc);
449 return err;
450 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700451 break;
452 case 0:
Denis V. Lunev1ab35272008-01-22 22:04:30 -0800453 dev = ip_dev_find(&init_net, vifc->vifc_lcl_addr.s_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700454 if (!dev)
455 return -EADDRNOTAVAIL;
Stephen Hemminger15333062006-03-20 22:32:28 -0800456 dev_put(dev);
Wang Chend6070322008-07-14 20:55:26 -0700457 err = dev_set_allmulti(dev, 1);
458 if (err)
459 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700460 break;
461 default:
462 return -EINVAL;
463 }
464
Herbert Xue5ed6392005-10-03 14:35:55 -0700465 if ((in_dev = __in_dev_get_rtnl(dev)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700466 return -EADDRNOTAVAIL;
Herbert Xu42f811b2007-06-04 23:34:44 -0700467 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700468 ip_rt_multicast_event(in_dev);
469
470 /*
471 * Fill in the VIF structures
472 */
473 v->rate_limit=vifc->vifc_rate_limit;
474 v->local=vifc->vifc_lcl_addr.s_addr;
475 v->remote=vifc->vifc_rmt_addr.s_addr;
476 v->flags=vifc->vifc_flags;
477 if (!mrtsock)
478 v->flags |= VIFF_STATIC;
479 v->threshold=vifc->vifc_threshold;
480 v->bytes_in = 0;
481 v->bytes_out = 0;
482 v->pkt_in = 0;
483 v->pkt_out = 0;
484 v->link = dev->ifindex;
485 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
486 v->link = dev->iflink;
487
488 /* And finish update writing critical data */
489 write_lock_bh(&mrt_lock);
490 dev_hold(dev);
491 v->dev=dev;
492#ifdef CONFIG_IP_PIMSM
493 if (v->flags&VIFF_REGISTER)
494 reg_vif_num = vifi;
495#endif
496 if (vifi+1 > maxvif)
497 maxvif = vifi+1;
498 write_unlock_bh(&mrt_lock);
499 return 0;
500}
501
Al Viro114c7842006-09-27 18:39:29 -0700502static struct mfc_cache *ipmr_cache_find(__be32 origin, __be32 mcastgrp)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700503{
504 int line=MFC_HASH(mcastgrp,origin);
505 struct mfc_cache *c;
506
507 for (c=mfc_cache_array[line]; c; c = c->next) {
508 if (c->mfc_origin==origin && c->mfc_mcastgrp==mcastgrp)
509 break;
510 }
511 return c;
512}
513
514/*
515 * Allocate a multicast cache entry
516 */
517static struct mfc_cache *ipmr_cache_alloc(void)
518{
Robert P. J. Dayc3762222007-02-10 01:45:03 -0800519 struct mfc_cache *c=kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
Stephen Hemminger132adf52007-03-08 20:44:43 -0800520 if (c==NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700521 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700522 c->mfc_un.res.minvif = MAXVIFS;
523 return c;
524}
525
526static struct mfc_cache *ipmr_cache_alloc_unres(void)
527{
Robert P. J. Dayc3762222007-02-10 01:45:03 -0800528 struct mfc_cache *c=kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
Stephen Hemminger132adf52007-03-08 20:44:43 -0800529 if (c==NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700530 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700531 skb_queue_head_init(&c->mfc_un.unres.unresolved);
532 c->mfc_un.unres.expires = jiffies + 10*HZ;
533 return c;
534}
535
536/*
537 * A cache entry has gone into a resolved state from queued
538 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900539
Linus Torvalds1da177e2005-04-16 15:20:36 -0700540static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
541{
542 struct sk_buff *skb;
Patrick McHardy9ef1d4c2005-06-28 12:55:30 -0700543 struct nlmsgerr *e;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700544
545 /*
546 * Play the pending entries through our router
547 */
548
Stephen Hemminger132adf52007-03-08 20:44:43 -0800549 while ((skb=__skb_dequeue(&uc->mfc_un.unres.unresolved))) {
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700550 if (ip_hdr(skb)->version == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700551 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
552
553 if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -0700554 nlh->nlmsg_len = (skb_tail_pointer(skb) -
555 (u8 *)nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700556 } else {
557 nlh->nlmsg_type = NLMSG_ERROR;
558 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
559 skb_trim(skb, nlh->nlmsg_len);
Patrick McHardy9ef1d4c2005-06-28 12:55:30 -0700560 e = NLMSG_DATA(nlh);
561 e->error = -EMSGSIZE;
562 memset(&e->msg, 0, sizeof(e->msg));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700563 }
Thomas Graf2942e902006-08-15 00:30:25 -0700564
Denis V. Lunev97c53ca2007-11-19 22:26:51 -0800565 rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700566 } else
567 ip_mr_forward(skb, c, 0);
568 }
569}
570
571/*
572 * Bounce a cache query up to mrouted. We could use netlink for this but mrouted
573 * expects the following bizarre scheme.
574 *
575 * Called under mrt_lock.
576 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900577
Linus Torvalds1da177e2005-04-16 15:20:36 -0700578static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
579{
580 struct sk_buff *skb;
Arnaldo Carvalho de Meloc9bdd4b2007-03-12 20:09:15 -0300581 const int ihl = ip_hdrlen(pkt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700582 struct igmphdr *igmp;
583 struct igmpmsg *msg;
584 int ret;
585
586#ifdef CONFIG_IP_PIMSM
587 if (assert == IGMPMSG_WHOLEPKT)
588 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
589 else
590#endif
591 skb = alloc_skb(128, GFP_ATOMIC);
592
Stephen Hemminger132adf52007-03-08 20:44:43 -0800593 if (!skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700594 return -ENOBUFS;
595
596#ifdef CONFIG_IP_PIMSM
597 if (assert == IGMPMSG_WHOLEPKT) {
598 /* Ugly, but we have no choice with this interface.
599 Duplicate old header, fix ihl, length etc.
600 And all this only to mangle msg->im_msgtype and
601 to set msg->im_mbz to "mbz" :-)
602 */
Arnaldo Carvalho de Melo878c8142007-03-11 22:38:29 -0300603 skb_push(skb, sizeof(struct iphdr));
604 skb_reset_network_header(skb);
Arnaldo Carvalho de Melobadff6d2007-03-13 13:06:52 -0300605 skb_reset_transport_header(skb);
Arnaldo Carvalho de Melo0272ffc2007-03-12 20:05:39 -0300606 msg = (struct igmpmsg *)skb_network_header(skb);
Arnaldo Carvalho de Melod56f90a2007-04-10 20:50:43 -0700607 memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700608 msg->im_msgtype = IGMPMSG_WHOLEPKT;
609 msg->im_mbz = 0;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900610 msg->im_vif = reg_vif_num;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700611 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
612 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
613 sizeof(struct iphdr));
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900614 } else
Linus Torvalds1da177e2005-04-16 15:20:36 -0700615#endif
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900616 {
617
Linus Torvalds1da177e2005-04-16 15:20:36 -0700618 /*
619 * Copy the IP header
620 */
621
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -0700622 skb->network_header = skb->tail;
Arnaldo Carvalho de Meloddc7b8e2007-03-15 21:42:27 -0300623 skb_put(skb, ihl);
Arnaldo Carvalho de Melo27d7ff42007-03-31 11:55:19 -0300624 skb_copy_to_linear_data(skb, pkt->data, ihl);
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700625 ip_hdr(skb)->protocol = 0; /* Flag to the kernel this is a route add */
626 msg = (struct igmpmsg *)skb_network_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700627 msg->im_vif = vifi;
628 skb->dst = dst_clone(pkt->dst);
629
630 /*
631 * Add our header
632 */
633
634 igmp=(struct igmphdr *)skb_put(skb,sizeof(struct igmphdr));
635 igmp->type =
636 msg->im_msgtype = assert;
637 igmp->code = 0;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700638 ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -0700639 skb->transport_header = skb->network_header;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900640 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700641
642 if (mroute_socket == NULL) {
643 kfree_skb(skb);
644 return -EINVAL;
645 }
646
647 /*
648 * Deliver to mrouted
649 */
650 if ((ret=sock_queue_rcv_skb(mroute_socket,skb))<0) {
651 if (net_ratelimit())
652 printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
653 kfree_skb(skb);
654 }
655
656 return ret;
657}
658
659/*
660 * Queue a packet for resolution. It gets locked cache entry!
661 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900662
Linus Torvalds1da177e2005-04-16 15:20:36 -0700663static int
664ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb)
665{
666 int err;
667 struct mfc_cache *c;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700668 const struct iphdr *iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700669
670 spin_lock_bh(&mfc_unres_lock);
671 for (c=mfc_unres_queue; c; c=c->next) {
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700672 if (c->mfc_mcastgrp == iph->daddr &&
673 c->mfc_origin == iph->saddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700674 break;
675 }
676
677 if (c == NULL) {
678 /*
679 * Create a new entry if allowable
680 */
681
682 if (atomic_read(&cache_resolve_queue_len)>=10 ||
683 (c=ipmr_cache_alloc_unres())==NULL) {
684 spin_unlock_bh(&mfc_unres_lock);
685
686 kfree_skb(skb);
687 return -ENOBUFS;
688 }
689
690 /*
691 * Fill in the new cache entry
692 */
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700693 c->mfc_parent = -1;
694 c->mfc_origin = iph->saddr;
695 c->mfc_mcastgrp = iph->daddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700696
697 /*
698 * Reflect first query at mrouted.
699 */
700 if ((err = ipmr_cache_report(skb, vifi, IGMPMSG_NOCACHE))<0) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900701 /* If the report failed throw the cache entry
Linus Torvalds1da177e2005-04-16 15:20:36 -0700702 out - Brad Parker
703 */
704 spin_unlock_bh(&mfc_unres_lock);
705
706 kmem_cache_free(mrt_cachep, c);
707 kfree_skb(skb);
708 return err;
709 }
710
711 atomic_inc(&cache_resolve_queue_len);
712 c->next = mfc_unres_queue;
713 mfc_unres_queue = c;
714
715 mod_timer(&ipmr_expire_timer, c->mfc_un.unres.expires);
716 }
717
718 /*
719 * See if we can append the packet
720 */
721 if (c->mfc_un.unres.unresolved.qlen>3) {
722 kfree_skb(skb);
723 err = -ENOBUFS;
724 } else {
725 skb_queue_tail(&c->mfc_un.unres.unresolved,skb);
726 err = 0;
727 }
728
729 spin_unlock_bh(&mfc_unres_lock);
730 return err;
731}
732
733/*
734 * MFC cache manipulation by user space mroute daemon
735 */
736
737static int ipmr_mfc_delete(struct mfcctl *mfc)
738{
739 int line;
740 struct mfc_cache *c, **cp;
741
742 line=MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
743
744 for (cp=&mfc_cache_array[line]; (c=*cp) != NULL; cp = &c->next) {
745 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
746 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
747 write_lock_bh(&mrt_lock);
748 *cp = c->next;
749 write_unlock_bh(&mrt_lock);
750
751 kmem_cache_free(mrt_cachep, c);
752 return 0;
753 }
754 }
755 return -ENOENT;
756}
757
758static int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock)
759{
760 int line;
761 struct mfc_cache *uc, *c, **cp;
762
763 line=MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
764
765 for (cp=&mfc_cache_array[line]; (c=*cp) != NULL; cp = &c->next) {
766 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
767 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr)
768 break;
769 }
770
771 if (c != NULL) {
772 write_lock_bh(&mrt_lock);
773 c->mfc_parent = mfc->mfcc_parent;
Baruch Evend1b04c02005-07-30 17:41:59 -0700774 ipmr_update_thresholds(c, mfc->mfcc_ttls);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700775 if (!mrtsock)
776 c->mfc_flags |= MFC_STATIC;
777 write_unlock_bh(&mrt_lock);
778 return 0;
779 }
780
Joe Perchesf97c1e02007-12-16 13:45:43 -0800781 if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700782 return -EINVAL;
783
784 c=ipmr_cache_alloc();
785 if (c==NULL)
786 return -ENOMEM;
787
788 c->mfc_origin=mfc->mfcc_origin.s_addr;
789 c->mfc_mcastgrp=mfc->mfcc_mcastgrp.s_addr;
790 c->mfc_parent=mfc->mfcc_parent;
Baruch Evend1b04c02005-07-30 17:41:59 -0700791 ipmr_update_thresholds(c, mfc->mfcc_ttls);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700792 if (!mrtsock)
793 c->mfc_flags |= MFC_STATIC;
794
795 write_lock_bh(&mrt_lock);
796 c->next = mfc_cache_array[line];
797 mfc_cache_array[line] = c;
798 write_unlock_bh(&mrt_lock);
799
800 /*
801 * Check to see if we resolved a queued list. If so we
802 * need to send on the frames and tidy up.
803 */
804 spin_lock_bh(&mfc_unres_lock);
805 for (cp = &mfc_unres_queue; (uc=*cp) != NULL;
806 cp = &uc->next) {
807 if (uc->mfc_origin == c->mfc_origin &&
808 uc->mfc_mcastgrp == c->mfc_mcastgrp) {
809 *cp = uc->next;
810 if (atomic_dec_and_test(&cache_resolve_queue_len))
811 del_timer(&ipmr_expire_timer);
812 break;
813 }
814 }
815 spin_unlock_bh(&mfc_unres_lock);
816
817 if (uc) {
818 ipmr_cache_resolve(uc, c);
819 kmem_cache_free(mrt_cachep, uc);
820 }
821 return 0;
822}
823
824/*
825 * Close the multicast socket, and clear the vif tables etc
826 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900827
Linus Torvalds1da177e2005-04-16 15:20:36 -0700828static void mroute_clean_tables(struct sock *sk)
829{
830 int i;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900831
Linus Torvalds1da177e2005-04-16 15:20:36 -0700832 /*
833 * Shut down all active vif entries
834 */
Stephen Hemminger132adf52007-03-08 20:44:43 -0800835 for (i=0; i<maxvif; i++) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700836 if (!(vif_table[i].flags&VIFF_STATIC))
837 vif_delete(i);
838 }
839
840 /*
841 * Wipe the cache
842 */
843 for (i=0;i<MFC_LINES;i++) {
844 struct mfc_cache *c, **cp;
845
846 cp = &mfc_cache_array[i];
847 while ((c = *cp) != NULL) {
848 if (c->mfc_flags&MFC_STATIC) {
849 cp = &c->next;
850 continue;
851 }
852 write_lock_bh(&mrt_lock);
853 *cp = c->next;
854 write_unlock_bh(&mrt_lock);
855
856 kmem_cache_free(mrt_cachep, c);
857 }
858 }
859
860 if (atomic_read(&cache_resolve_queue_len) != 0) {
861 struct mfc_cache *c;
862
863 spin_lock_bh(&mfc_unres_lock);
864 while (mfc_unres_queue != NULL) {
865 c = mfc_unres_queue;
866 mfc_unres_queue = c->next;
867 spin_unlock_bh(&mfc_unres_lock);
868
869 ipmr_destroy_unres(c);
870
871 spin_lock_bh(&mfc_unres_lock);
872 }
873 spin_unlock_bh(&mfc_unres_lock);
874 }
875}
876
877static void mrtsock_destruct(struct sock *sk)
878{
879 rtnl_lock();
880 if (sk == mroute_socket) {
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +0900881 IPV4_DEVCONF_ALL(sock_net(sk), MC_FORWARDING)--;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700882
883 write_lock_bh(&mrt_lock);
884 mroute_socket=NULL;
885 write_unlock_bh(&mrt_lock);
886
887 mroute_clean_tables(sk);
888 }
889 rtnl_unlock();
890}
891
892/*
893 * Socket options and virtual interface manipulation. The whole
894 * virtual interface system is a complete heap, but unfortunately
895 * that's how BSD mrouted happens to think. Maybe one day with a proper
896 * MOSPF/PIM router set up we can clean this up.
897 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900898
Linus Torvalds1da177e2005-04-16 15:20:36 -0700899int ip_mroute_setsockopt(struct sock *sk,int optname,char __user *optval,int optlen)
900{
901 int ret;
902 struct vifctl vif;
903 struct mfcctl mfc;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900904
Stephen Hemminger132adf52007-03-08 20:44:43 -0800905 if (optname != MRT_INIT) {
906 if (sk != mroute_socket && !capable(CAP_NET_ADMIN))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700907 return -EACCES;
908 }
909
Stephen Hemminger132adf52007-03-08 20:44:43 -0800910 switch (optname) {
911 case MRT_INIT:
912 if (sk->sk_type != SOCK_RAW ||
913 inet_sk(sk)->num != IPPROTO_IGMP)
914 return -EOPNOTSUPP;
915 if (optlen!=sizeof(int))
916 return -ENOPROTOOPT;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700917
Stephen Hemminger132adf52007-03-08 20:44:43 -0800918 rtnl_lock();
919 if (mroute_socket) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700920 rtnl_unlock();
Stephen Hemminger132adf52007-03-08 20:44:43 -0800921 return -EADDRINUSE;
922 }
923
924 ret = ip_ra_control(sk, 1, mrtsock_destruct);
925 if (ret == 0) {
926 write_lock_bh(&mrt_lock);
927 mroute_socket=sk;
928 write_unlock_bh(&mrt_lock);
929
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +0900930 IPV4_DEVCONF_ALL(sock_net(sk), MC_FORWARDING)++;
Stephen Hemminger132adf52007-03-08 20:44:43 -0800931 }
932 rtnl_unlock();
933 return ret;
934 case MRT_DONE:
935 if (sk!=mroute_socket)
936 return -EACCES;
937 return ip_ra_control(sk, 0, NULL);
938 case MRT_ADD_VIF:
939 case MRT_DEL_VIF:
940 if (optlen!=sizeof(vif))
941 return -EINVAL;
942 if (copy_from_user(&vif,optval,sizeof(vif)))
943 return -EFAULT;
944 if (vif.vifc_vifi >= MAXVIFS)
945 return -ENFILE;
946 rtnl_lock();
947 if (optname==MRT_ADD_VIF) {
948 ret = vif_add(&vif, sk==mroute_socket);
949 } else {
950 ret = vif_delete(vif.vifc_vifi);
951 }
952 rtnl_unlock();
953 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700954
955 /*
956 * Manipulate the forwarding caches. These live
957 * in a sort of kernel/user symbiosis.
958 */
Stephen Hemminger132adf52007-03-08 20:44:43 -0800959 case MRT_ADD_MFC:
960 case MRT_DEL_MFC:
961 if (optlen!=sizeof(mfc))
962 return -EINVAL;
963 if (copy_from_user(&mfc,optval, sizeof(mfc)))
964 return -EFAULT;
965 rtnl_lock();
966 if (optname==MRT_DEL_MFC)
967 ret = ipmr_mfc_delete(&mfc);
968 else
969 ret = ipmr_mfc_add(&mfc, sk==mroute_socket);
970 rtnl_unlock();
971 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700972 /*
973 * Control PIM assert.
974 */
Stephen Hemminger132adf52007-03-08 20:44:43 -0800975 case MRT_ASSERT:
976 {
977 int v;
978 if (get_user(v,(int __user *)optval))
979 return -EFAULT;
980 mroute_do_assert=(v)?1:0;
981 return 0;
982 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700983#ifdef CONFIG_IP_PIMSM
Stephen Hemminger132adf52007-03-08 20:44:43 -0800984 case MRT_PIM:
985 {
Stephen Hemmingerba93ef72008-01-21 17:28:59 -0800986 int v;
987
Stephen Hemminger132adf52007-03-08 20:44:43 -0800988 if (get_user(v,(int __user *)optval))
989 return -EFAULT;
Stephen Hemmingerba93ef72008-01-21 17:28:59 -0800990 v = (v) ? 1 : 0;
991
Stephen Hemminger132adf52007-03-08 20:44:43 -0800992 rtnl_lock();
993 ret = 0;
994 if (v != mroute_do_pim) {
995 mroute_do_pim = v;
996 mroute_do_assert = v;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700997#ifdef CONFIG_IP_PIMSM_V2
Stephen Hemminger132adf52007-03-08 20:44:43 -0800998 if (mroute_do_pim)
999 ret = inet_add_protocol(&pim_protocol,
1000 IPPROTO_PIM);
1001 else
1002 ret = inet_del_protocol(&pim_protocol,
1003 IPPROTO_PIM);
1004 if (ret < 0)
1005 ret = -EAGAIN;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001006#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001007 }
Stephen Hemminger132adf52007-03-08 20:44:43 -08001008 rtnl_unlock();
1009 return ret;
1010 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001011#endif
Stephen Hemminger132adf52007-03-08 20:44:43 -08001012 /*
1013 * Spurious command, or MRT_VERSION which you cannot
1014 * set.
1015 */
1016 default:
1017 return -ENOPROTOOPT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001018 }
1019}
1020
1021/*
1022 * Getsock opt support for the multicast routing system.
1023 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001024
Linus Torvalds1da177e2005-04-16 15:20:36 -07001025int ip_mroute_getsockopt(struct sock *sk,int optname,char __user *optval,int __user *optlen)
1026{
1027 int olr;
1028 int val;
1029
Stephen Hemminger132adf52007-03-08 20:44:43 -08001030 if (optname!=MRT_VERSION &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001031#ifdef CONFIG_IP_PIMSM
1032 optname!=MRT_PIM &&
1033#endif
1034 optname!=MRT_ASSERT)
1035 return -ENOPROTOOPT;
1036
1037 if (get_user(olr, optlen))
1038 return -EFAULT;
1039
1040 olr = min_t(unsigned int, olr, sizeof(int));
1041 if (olr < 0)
1042 return -EINVAL;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001043
Stephen Hemminger132adf52007-03-08 20:44:43 -08001044 if (put_user(olr,optlen))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001045 return -EFAULT;
Stephen Hemminger132adf52007-03-08 20:44:43 -08001046 if (optname==MRT_VERSION)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001047 val=0x0305;
1048#ifdef CONFIG_IP_PIMSM
Stephen Hemminger132adf52007-03-08 20:44:43 -08001049 else if (optname==MRT_PIM)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001050 val=mroute_do_pim;
1051#endif
1052 else
1053 val=mroute_do_assert;
Stephen Hemminger132adf52007-03-08 20:44:43 -08001054 if (copy_to_user(optval,&val,olr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001055 return -EFAULT;
1056 return 0;
1057}
1058
1059/*
1060 * The IP multicast ioctl support routines.
1061 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001062
Linus Torvalds1da177e2005-04-16 15:20:36 -07001063int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1064{
1065 struct sioc_sg_req sr;
1066 struct sioc_vif_req vr;
1067 struct vif_device *vif;
1068 struct mfc_cache *c;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001069
Stephen Hemminger132adf52007-03-08 20:44:43 -08001070 switch (cmd) {
1071 case SIOCGETVIFCNT:
1072 if (copy_from_user(&vr,arg,sizeof(vr)))
1073 return -EFAULT;
1074 if (vr.vifi>=maxvif)
1075 return -EINVAL;
1076 read_lock(&mrt_lock);
1077 vif=&vif_table[vr.vifi];
1078 if (VIF_EXISTS(vr.vifi)) {
1079 vr.icount=vif->pkt_in;
1080 vr.ocount=vif->pkt_out;
1081 vr.ibytes=vif->bytes_in;
1082 vr.obytes=vif->bytes_out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001083 read_unlock(&mrt_lock);
Stephen Hemminger132adf52007-03-08 20:44:43 -08001084
1085 if (copy_to_user(arg,&vr,sizeof(vr)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001086 return -EFAULT;
Stephen Hemminger132adf52007-03-08 20:44:43 -08001087 return 0;
1088 }
1089 read_unlock(&mrt_lock);
1090 return -EADDRNOTAVAIL;
1091 case SIOCGETSGCNT:
1092 if (copy_from_user(&sr,arg,sizeof(sr)))
1093 return -EFAULT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001094
Stephen Hemminger132adf52007-03-08 20:44:43 -08001095 read_lock(&mrt_lock);
1096 c = ipmr_cache_find(sr.src.s_addr, sr.grp.s_addr);
1097 if (c) {
1098 sr.pktcnt = c->mfc_un.res.pkt;
1099 sr.bytecnt = c->mfc_un.res.bytes;
1100 sr.wrong_if = c->mfc_un.res.wrong_if;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001101 read_unlock(&mrt_lock);
Stephen Hemminger132adf52007-03-08 20:44:43 -08001102
1103 if (copy_to_user(arg,&sr,sizeof(sr)))
1104 return -EFAULT;
1105 return 0;
1106 }
1107 read_unlock(&mrt_lock);
1108 return -EADDRNOTAVAIL;
1109 default:
1110 return -ENOIOCTLCMD;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001111 }
1112}
1113
1114
1115static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1116{
Eric W. Biedermane9dc8652007-09-12 13:02:17 +02001117 struct net_device *dev = ptr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001118 struct vif_device *v;
1119 int ct;
Eric W. Biedermane9dc8652007-09-12 13:02:17 +02001120
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001121 if (dev_net(dev) != &init_net)
Eric W. Biedermane9dc8652007-09-12 13:02:17 +02001122 return NOTIFY_DONE;
1123
Linus Torvalds1da177e2005-04-16 15:20:36 -07001124 if (event != NETDEV_UNREGISTER)
1125 return NOTIFY_DONE;
1126 v=&vif_table[0];
Stephen Hemminger132adf52007-03-08 20:44:43 -08001127 for (ct=0;ct<maxvif;ct++,v++) {
Eric W. Biedermane9dc8652007-09-12 13:02:17 +02001128 if (v->dev==dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001129 vif_delete(ct);
1130 }
1131 return NOTIFY_DONE;
1132}
1133
1134
1135static struct notifier_block ip_mr_notifier={
1136 .notifier_call = ipmr_device_event,
1137};
1138
1139/*
1140 * Encapsulate a packet by attaching a valid IPIP header to it.
1141 * This avoids tunnel drivers and other mess and gives us the speed so
1142 * important for multicast video.
1143 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001144
Al Viro114c7842006-09-27 18:39:29 -07001145static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001146{
Arnaldo Carvalho de Melo8856dfa2007-03-10 19:40:39 -03001147 struct iphdr *iph;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001148 struct iphdr *old_iph = ip_hdr(skb);
Arnaldo Carvalho de Melo8856dfa2007-03-10 19:40:39 -03001149
1150 skb_push(skb, sizeof(struct iphdr));
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -07001151 skb->transport_header = skb->network_header;
Arnaldo Carvalho de Melo8856dfa2007-03-10 19:40:39 -03001152 skb_reset_network_header(skb);
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001153 iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001154
1155 iph->version = 4;
Arnaldo Carvalho de Meloe023dd62007-03-12 20:09:36 -03001156 iph->tos = old_iph->tos;
1157 iph->ttl = old_iph->ttl;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001158 iph->frag_off = 0;
1159 iph->daddr = daddr;
1160 iph->saddr = saddr;
1161 iph->protocol = IPPROTO_IPIP;
1162 iph->ihl = 5;
1163 iph->tot_len = htons(skb->len);
1164 ip_select_ident(iph, skb->dst, NULL);
1165 ip_send_check(iph);
1166
Linus Torvalds1da177e2005-04-16 15:20:36 -07001167 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1168 nf_reset(skb);
1169}
1170
1171static inline int ipmr_forward_finish(struct sk_buff *skb)
1172{
1173 struct ip_options * opt = &(IPCB(skb)->opt);
1174
1175 IP_INC_STATS_BH(IPSTATS_MIB_OUTFORWDATAGRAMS);
1176
1177 if (unlikely(opt->optlen))
1178 ip_forward_options(skb);
1179
1180 return dst_output(skb);
1181}
1182
1183/*
1184 * Processing handlers for ipmr_forward
1185 */
1186
1187static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
1188{
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001189 const struct iphdr *iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001190 struct vif_device *vif = &vif_table[vifi];
1191 struct net_device *dev;
1192 struct rtable *rt;
1193 int encap = 0;
1194
1195 if (vif->dev == NULL)
1196 goto out_free;
1197
1198#ifdef CONFIG_IP_PIMSM
1199 if (vif->flags & VIFF_REGISTER) {
1200 vif->pkt_out++;
1201 vif->bytes_out+=skb->len;
Pavel Emelyanovcf3677a2008-05-21 14:17:33 -07001202 vif->dev->stats.tx_bytes += skb->len;
1203 vif->dev->stats.tx_packets++;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001204 ipmr_cache_report(skb, vifi, IGMPMSG_WHOLEPKT);
1205 kfree_skb(skb);
1206 return;
1207 }
1208#endif
1209
1210 if (vif->flags&VIFF_TUNNEL) {
1211 struct flowi fl = { .oif = vif->link,
1212 .nl_u = { .ip4_u =
1213 { .daddr = vif->remote,
1214 .saddr = vif->local,
1215 .tos = RT_TOS(iph->tos) } },
1216 .proto = IPPROTO_IPIP };
Denis V. Lunevf2063512008-01-22 22:07:34 -08001217 if (ip_route_output_key(&init_net, &rt, &fl))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001218 goto out_free;
1219 encap = sizeof(struct iphdr);
1220 } else {
1221 struct flowi fl = { .oif = vif->link,
1222 .nl_u = { .ip4_u =
1223 { .daddr = iph->daddr,
1224 .tos = RT_TOS(iph->tos) } },
1225 .proto = IPPROTO_IPIP };
Denis V. Lunevf2063512008-01-22 22:07:34 -08001226 if (ip_route_output_key(&init_net, &rt, &fl))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001227 goto out_free;
1228 }
1229
1230 dev = rt->u.dst.dev;
1231
1232 if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) {
1233 /* Do not fragment multicasts. Alas, IPv4 does not
1234 allow to send ICMP, so that packets will disappear
1235 to blackhole.
1236 */
1237
1238 IP_INC_STATS_BH(IPSTATS_MIB_FRAGFAILS);
1239 ip_rt_put(rt);
1240 goto out_free;
1241 }
1242
1243 encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len;
1244
1245 if (skb_cow(skb, encap)) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001246 ip_rt_put(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001247 goto out_free;
1248 }
1249
1250 vif->pkt_out++;
1251 vif->bytes_out+=skb->len;
1252
1253 dst_release(skb->dst);
1254 skb->dst = &rt->u.dst;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001255 ip_decrease_ttl(ip_hdr(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001256
1257 /* FIXME: forward and output firewalls used to be called here.
1258 * What do we do with netfilter? -- RR */
1259 if (vif->flags & VIFF_TUNNEL) {
1260 ip_encap(skb, vif->local, vif->remote);
1261 /* FIXME: extra output firewall step used to be here. --RR */
Pavel Emelyanov2f4c02d2008-05-21 14:16:14 -07001262 vif->dev->stats.tx_packets++;
1263 vif->dev->stats.tx_bytes += skb->len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001264 }
1265
1266 IPCB(skb)->flags |= IPSKB_FORWARDED;
1267
1268 /*
1269 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1270 * not only before forwarding, but after forwarding on all output
1271 * interfaces. It is clear, if mrouter runs a multicasting
1272 * program, it should receive packets not depending to what interface
1273 * program is joined.
1274 * If we will not make it, the program will have to join on all
1275 * interfaces. On the other hand, multihoming host (or router, but
1276 * not mrouter) cannot join to more than one interface - it will
1277 * result in receiving multiple packets.
1278 */
Patrick McHardy6e23ae22007-11-19 18:53:30 -08001279 NF_HOOK(PF_INET, NF_INET_FORWARD, skb, skb->dev, dev,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001280 ipmr_forward_finish);
1281 return;
1282
1283out_free:
1284 kfree_skb(skb);
1285 return;
1286}
1287
1288static int ipmr_find_vif(struct net_device *dev)
1289{
1290 int ct;
1291 for (ct=maxvif-1; ct>=0; ct--) {
1292 if (vif_table[ct].dev == dev)
1293 break;
1294 }
1295 return ct;
1296}
1297
1298/* "local" means that we should preserve one skb (for local delivery) */
1299
1300static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local)
1301{
1302 int psend = -1;
1303 int vif, ct;
1304
1305 vif = cache->mfc_parent;
1306 cache->mfc_un.res.pkt++;
1307 cache->mfc_un.res.bytes += skb->len;
1308
1309 /*
1310 * Wrong interface: drop packet and (maybe) send PIM assert.
1311 */
1312 if (vif_table[vif].dev != skb->dev) {
1313 int true_vifi;
1314
Eric Dumazetee6b9672008-03-05 18:30:47 -08001315 if (skb->rtable->fl.iif == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001316 /* It is our own packet, looped back.
1317 Very complicated situation...
1318
1319 The best workaround until routing daemons will be
1320 fixed is not to redistribute packet, if it was
1321 send through wrong interface. It means, that
1322 multicast applications WILL NOT work for
1323 (S,G), which have default multicast route pointing
1324 to wrong oif. In any case, it is not a good
1325 idea to use multicasting applications on router.
1326 */
1327 goto dont_forward;
1328 }
1329
1330 cache->mfc_un.res.wrong_if++;
1331 true_vifi = ipmr_find_vif(skb->dev);
1332
1333 if (true_vifi >= 0 && mroute_do_assert &&
1334 /* pimsm uses asserts, when switching from RPT to SPT,
1335 so that we cannot check that packet arrived on an oif.
1336 It is bad, but otherwise we would need to move pretty
1337 large chunk of pimd to kernel. Ough... --ANK
1338 */
1339 (mroute_do_pim || cache->mfc_un.res.ttls[true_vifi] < 255) &&
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001340 time_after(jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001341 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1342 cache->mfc_un.res.last_assert = jiffies;
1343 ipmr_cache_report(skb, true_vifi, IGMPMSG_WRONGVIF);
1344 }
1345 goto dont_forward;
1346 }
1347
1348 vif_table[vif].pkt_in++;
1349 vif_table[vif].bytes_in+=skb->len;
1350
1351 /*
1352 * Forward the frame
1353 */
1354 for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001355 if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001356 if (psend != -1) {
1357 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1358 if (skb2)
1359 ipmr_queue_xmit(skb2, cache, psend);
1360 }
1361 psend=ct;
1362 }
1363 }
1364 if (psend != -1) {
1365 if (local) {
1366 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1367 if (skb2)
1368 ipmr_queue_xmit(skb2, cache, psend);
1369 } else {
1370 ipmr_queue_xmit(skb, cache, psend);
1371 return 0;
1372 }
1373 }
1374
1375dont_forward:
1376 if (!local)
1377 kfree_skb(skb);
1378 return 0;
1379}
1380
1381
1382/*
1383 * Multicast packets for forwarding arrive here
1384 */
1385
1386int ip_mr_input(struct sk_buff *skb)
1387{
1388 struct mfc_cache *cache;
Eric Dumazetee6b9672008-03-05 18:30:47 -08001389 int local = skb->rtable->rt_flags&RTCF_LOCAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001390
1391 /* Packet is looped back after forward, it should not be
1392 forwarded second time, but still can be delivered locally.
1393 */
1394 if (IPCB(skb)->flags&IPSKB_FORWARDED)
1395 goto dont_forward;
1396
1397 if (!local) {
1398 if (IPCB(skb)->opt.router_alert) {
1399 if (ip_call_ra_chain(skb))
1400 return 0;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001401 } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP){
Linus Torvalds1da177e2005-04-16 15:20:36 -07001402 /* IGMPv1 (and broken IGMPv2 implementations sort of
1403 Cisco IOS <= 11.2(8)) do not put router alert
1404 option to IGMP packets destined to routable
1405 groups. It is very bad, because it means
1406 that we can forward NO IGMP messages.
1407 */
1408 read_lock(&mrt_lock);
1409 if (mroute_socket) {
Patrick McHardy2715bcf2005-06-21 14:06:24 -07001410 nf_reset(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001411 raw_rcv(mroute_socket, skb);
1412 read_unlock(&mrt_lock);
1413 return 0;
1414 }
1415 read_unlock(&mrt_lock);
1416 }
1417 }
1418
1419 read_lock(&mrt_lock);
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001420 cache = ipmr_cache_find(ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001421
1422 /*
1423 * No usable cache entry
1424 */
1425 if (cache==NULL) {
1426 int vif;
1427
1428 if (local) {
1429 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1430 ip_local_deliver(skb);
1431 if (skb2 == NULL) {
1432 read_unlock(&mrt_lock);
1433 return -ENOBUFS;
1434 }
1435 skb = skb2;
1436 }
1437
1438 vif = ipmr_find_vif(skb->dev);
1439 if (vif >= 0) {
1440 int err = ipmr_cache_unresolved(vif, skb);
1441 read_unlock(&mrt_lock);
1442
1443 return err;
1444 }
1445 read_unlock(&mrt_lock);
1446 kfree_skb(skb);
1447 return -ENODEV;
1448 }
1449
1450 ip_mr_forward(skb, cache, local);
1451
1452 read_unlock(&mrt_lock);
1453
1454 if (local)
1455 return ip_local_deliver(skb);
1456
1457 return 0;
1458
1459dont_forward:
1460 if (local)
1461 return ip_local_deliver(skb);
1462 kfree_skb(skb);
1463 return 0;
1464}
1465
1466#ifdef CONFIG_IP_PIMSM_V1
1467/*
1468 * Handle IGMP messages of PIMv1
1469 */
1470
1471int pim_rcv_v1(struct sk_buff * skb)
1472{
1473 struct igmphdr *pim;
1474 struct iphdr *encap;
1475 struct net_device *reg_dev = NULL;
1476
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001477 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001478 goto drop;
1479
Arnaldo Carvalho de Melod9edf9e2007-03-13 14:19:23 -03001480 pim = igmp_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001481
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001482 if (!mroute_do_pim ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07001483 skb->len < sizeof(*pim) + sizeof(*encap) ||
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001484 pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001485 goto drop;
1486
Arnaldo Carvalho de Melo9c702202007-04-25 18:04:18 -07001487 encap = (struct iphdr *)(skb_transport_header(skb) +
1488 sizeof(struct igmphdr));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001489 /*
1490 Check that:
1491 a. packet is really destinted to a multicast group
1492 b. packet is not a NULL-REGISTER
1493 c. packet is not truncated
1494 */
Joe Perchesf97c1e02007-12-16 13:45:43 -08001495 if (!ipv4_is_multicast(encap->daddr) ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07001496 encap->tot_len == 0 ||
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001497 ntohs(encap->tot_len) + sizeof(*pim) > skb->len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001498 goto drop;
1499
1500 read_lock(&mrt_lock);
1501 if (reg_vif_num >= 0)
1502 reg_dev = vif_table[reg_vif_num].dev;
1503 if (reg_dev)
1504 dev_hold(reg_dev);
1505 read_unlock(&mrt_lock);
1506
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001507 if (reg_dev == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001508 goto drop;
1509
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -07001510 skb->mac_header = skb->network_header;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001511 skb_pull(skb, (u8*)encap - skb->data);
Arnaldo Carvalho de Melo31c77112007-03-10 19:04:55 -03001512 skb_reset_network_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001513 skb->dev = reg_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001514 skb->protocol = htons(ETH_P_IP);
1515 skb->ip_summed = 0;
1516 skb->pkt_type = PACKET_HOST;
1517 dst_release(skb->dst);
1518 skb->dst = NULL;
Pavel Emelyanovcf3677a2008-05-21 14:17:33 -07001519 reg_dev->stats.rx_bytes += skb->len;
1520 reg_dev->stats.rx_packets++;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001521 nf_reset(skb);
1522 netif_rx(skb);
1523 dev_put(reg_dev);
1524 return 0;
1525 drop:
1526 kfree_skb(skb);
1527 return 0;
1528}
1529#endif
1530
1531#ifdef CONFIG_IP_PIMSM_V2
1532static int pim_rcv(struct sk_buff * skb)
1533{
1534 struct pimreghdr *pim;
1535 struct iphdr *encap;
1536 struct net_device *reg_dev = NULL;
1537
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001538 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001539 goto drop;
1540
Arnaldo Carvalho de Melo9c702202007-04-25 18:04:18 -07001541 pim = (struct pimreghdr *)skb_transport_header(skb);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001542 if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07001543 (pim->flags&PIM_NULL_REGISTER) ||
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001544 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
Al Virod3bc23e2006-11-14 21:24:49 -08001545 csum_fold(skb_checksum(skb, 0, skb->len, 0))))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001546 goto drop;
1547
1548 /* check if the inner packet is destined to mcast group */
Arnaldo Carvalho de Melo9c702202007-04-25 18:04:18 -07001549 encap = (struct iphdr *)(skb_transport_header(skb) +
1550 sizeof(struct pimreghdr));
Joe Perchesf97c1e02007-12-16 13:45:43 -08001551 if (!ipv4_is_multicast(encap->daddr) ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07001552 encap->tot_len == 0 ||
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001553 ntohs(encap->tot_len) + sizeof(*pim) > skb->len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001554 goto drop;
1555
1556 read_lock(&mrt_lock);
1557 if (reg_vif_num >= 0)
1558 reg_dev = vif_table[reg_vif_num].dev;
1559 if (reg_dev)
1560 dev_hold(reg_dev);
1561 read_unlock(&mrt_lock);
1562
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001563 if (reg_dev == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001564 goto drop;
1565
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -07001566 skb->mac_header = skb->network_header;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001567 skb_pull(skb, (u8*)encap - skb->data);
Arnaldo Carvalho de Melo31c77112007-03-10 19:04:55 -03001568 skb_reset_network_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001569 skb->dev = reg_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001570 skb->protocol = htons(ETH_P_IP);
1571 skb->ip_summed = 0;
1572 skb->pkt_type = PACKET_HOST;
1573 dst_release(skb->dst);
Pavel Emelyanovcf3677a2008-05-21 14:17:33 -07001574 reg_dev->stats.rx_bytes += skb->len;
1575 reg_dev->stats.rx_packets++;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001576 skb->dst = NULL;
1577 nf_reset(skb);
1578 netif_rx(skb);
1579 dev_put(reg_dev);
1580 return 0;
1581 drop:
1582 kfree_skb(skb);
1583 return 0;
1584}
1585#endif
1586
1587static int
1588ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm)
1589{
1590 int ct;
1591 struct rtnexthop *nhp;
1592 struct net_device *dev = vif_table[c->mfc_parent].dev;
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001593 u8 *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001594 struct rtattr *mp_head;
1595
1596 if (dev)
1597 RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex);
1598
1599 mp_head = (struct rtattr*)skb_put(skb, RTA_LENGTH(0));
1600
1601 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1602 if (c->mfc_un.res.ttls[ct] < 255) {
1603 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1604 goto rtattr_failure;
1605 nhp = (struct rtnexthop*)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1606 nhp->rtnh_flags = 0;
1607 nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
1608 nhp->rtnh_ifindex = vif_table[ct].dev->ifindex;
1609 nhp->rtnh_len = sizeof(*nhp);
1610 }
1611 }
1612 mp_head->rta_type = RTA_MULTIPATH;
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001613 mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001614 rtm->rtm_type = RTN_MULTICAST;
1615 return 1;
1616
1617rtattr_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001618 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001619 return -EMSGSIZE;
1620}
1621
1622int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1623{
1624 int err;
1625 struct mfc_cache *cache;
Eric Dumazetee6b9672008-03-05 18:30:47 -08001626 struct rtable *rt = skb->rtable;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001627
1628 read_lock(&mrt_lock);
1629 cache = ipmr_cache_find(rt->rt_src, rt->rt_dst);
1630
1631 if (cache==NULL) {
Alexey Kuznetsov72287492006-07-25 16:45:12 -07001632 struct sk_buff *skb2;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001633 struct iphdr *iph;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001634 struct net_device *dev;
1635 int vif;
1636
1637 if (nowait) {
1638 read_unlock(&mrt_lock);
1639 return -EAGAIN;
1640 }
1641
1642 dev = skb->dev;
1643 if (dev == NULL || (vif = ipmr_find_vif(dev)) < 0) {
1644 read_unlock(&mrt_lock);
1645 return -ENODEV;
1646 }
Alexey Kuznetsov72287492006-07-25 16:45:12 -07001647 skb2 = skb_clone(skb, GFP_ATOMIC);
1648 if (!skb2) {
1649 read_unlock(&mrt_lock);
1650 return -ENOMEM;
1651 }
1652
Arnaldo Carvalho de Meloe2d1bca2007-04-10 20:46:21 -07001653 skb_push(skb2, sizeof(struct iphdr));
1654 skb_reset_network_header(skb2);
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001655 iph = ip_hdr(skb2);
1656 iph->ihl = sizeof(struct iphdr) >> 2;
1657 iph->saddr = rt->rt_src;
1658 iph->daddr = rt->rt_dst;
1659 iph->version = 0;
Alexey Kuznetsov72287492006-07-25 16:45:12 -07001660 err = ipmr_cache_unresolved(vif, skb2);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001661 read_unlock(&mrt_lock);
1662 return err;
1663 }
1664
1665 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1666 cache->mfc_flags |= MFC_NOTIFY;
1667 err = ipmr_fill_mroute(skb, cache, rtm);
1668 read_unlock(&mrt_lock);
1669 return err;
1670}
1671
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001672#ifdef CONFIG_PROC_FS
Linus Torvalds1da177e2005-04-16 15:20:36 -07001673/*
1674 * The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
1675 */
1676struct ipmr_vif_iter {
1677 int ct;
1678};
1679
1680static struct vif_device *ipmr_vif_seq_idx(struct ipmr_vif_iter *iter,
1681 loff_t pos)
1682{
1683 for (iter->ct = 0; iter->ct < maxvif; ++iter->ct) {
Stephen Hemminger132adf52007-03-08 20:44:43 -08001684 if (!VIF_EXISTS(iter->ct))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001685 continue;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001686 if (pos-- == 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001687 return &vif_table[iter->ct];
1688 }
1689 return NULL;
1690}
1691
1692static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
Stephen Hemmingerba93ef72008-01-21 17:28:59 -08001693 __acquires(mrt_lock)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001694{
1695 read_lock(&mrt_lock);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001696 return *pos ? ipmr_vif_seq_idx(seq->private, *pos - 1)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001697 : SEQ_START_TOKEN;
1698}
1699
1700static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1701{
1702 struct ipmr_vif_iter *iter = seq->private;
1703
1704 ++*pos;
1705 if (v == SEQ_START_TOKEN)
1706 return ipmr_vif_seq_idx(iter, 0);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001707
Linus Torvalds1da177e2005-04-16 15:20:36 -07001708 while (++iter->ct < maxvif) {
Stephen Hemminger132adf52007-03-08 20:44:43 -08001709 if (!VIF_EXISTS(iter->ct))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001710 continue;
1711 return &vif_table[iter->ct];
1712 }
1713 return NULL;
1714}
1715
1716static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
Stephen Hemmingerba93ef72008-01-21 17:28:59 -08001717 __releases(mrt_lock)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001718{
1719 read_unlock(&mrt_lock);
1720}
1721
1722static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
1723{
1724 if (v == SEQ_START_TOKEN) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001725 seq_puts(seq,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001726 "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n");
1727 } else {
1728 const struct vif_device *vif = v;
1729 const char *name = vif->dev ? vif->dev->name : "none";
1730
1731 seq_printf(seq,
1732 "%2Zd %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n",
1733 vif - vif_table,
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001734 name, vif->bytes_in, vif->pkt_in,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001735 vif->bytes_out, vif->pkt_out,
1736 vif->flags, vif->local, vif->remote);
1737 }
1738 return 0;
1739}
1740
Stephen Hemmingerf6908082007-03-12 14:34:29 -07001741static const struct seq_operations ipmr_vif_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001742 .start = ipmr_vif_seq_start,
1743 .next = ipmr_vif_seq_next,
1744 .stop = ipmr_vif_seq_stop,
1745 .show = ipmr_vif_seq_show,
1746};
1747
1748static int ipmr_vif_open(struct inode *inode, struct file *file)
1749{
Pavel Emelyanovcf7732e2007-10-10 02:29:29 -07001750 return seq_open_private(file, &ipmr_vif_seq_ops,
1751 sizeof(struct ipmr_vif_iter));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001752}
1753
Arjan van de Ven9a321442007-02-12 00:55:35 -08001754static const struct file_operations ipmr_vif_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001755 .owner = THIS_MODULE,
1756 .open = ipmr_vif_open,
1757 .read = seq_read,
1758 .llseek = seq_lseek,
1759 .release = seq_release_private,
1760};
1761
1762struct ipmr_mfc_iter {
1763 struct mfc_cache **cache;
1764 int ct;
1765};
1766
1767
1768static struct mfc_cache *ipmr_mfc_seq_idx(struct ipmr_mfc_iter *it, loff_t pos)
1769{
1770 struct mfc_cache *mfc;
1771
1772 it->cache = mfc_cache_array;
1773 read_lock(&mrt_lock);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001774 for (it->ct = 0; it->ct < MFC_LINES; it->ct++)
Stephen Hemminger132adf52007-03-08 20:44:43 -08001775 for (mfc = mfc_cache_array[it->ct]; mfc; mfc = mfc->next)
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001776 if (pos-- == 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001777 return mfc;
1778 read_unlock(&mrt_lock);
1779
1780 it->cache = &mfc_unres_queue;
1781 spin_lock_bh(&mfc_unres_lock);
Stephen Hemminger132adf52007-03-08 20:44:43 -08001782 for (mfc = mfc_unres_queue; mfc; mfc = mfc->next)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001783 if (pos-- == 0)
1784 return mfc;
1785 spin_unlock_bh(&mfc_unres_lock);
1786
1787 it->cache = NULL;
1788 return NULL;
1789}
1790
1791
1792static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
1793{
1794 struct ipmr_mfc_iter *it = seq->private;
1795 it->cache = NULL;
1796 it->ct = 0;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001797 return *pos ? ipmr_mfc_seq_idx(seq->private, *pos - 1)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001798 : SEQ_START_TOKEN;
1799}
1800
1801static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1802{
1803 struct mfc_cache *mfc = v;
1804 struct ipmr_mfc_iter *it = seq->private;
1805
1806 ++*pos;
1807
1808 if (v == SEQ_START_TOKEN)
1809 return ipmr_mfc_seq_idx(seq->private, 0);
1810
1811 if (mfc->next)
1812 return mfc->next;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001813
1814 if (it->cache == &mfc_unres_queue)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001815 goto end_of_list;
1816
1817 BUG_ON(it->cache != mfc_cache_array);
1818
1819 while (++it->ct < MFC_LINES) {
1820 mfc = mfc_cache_array[it->ct];
1821 if (mfc)
1822 return mfc;
1823 }
1824
1825 /* exhausted cache_array, show unresolved */
1826 read_unlock(&mrt_lock);
1827 it->cache = &mfc_unres_queue;
1828 it->ct = 0;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001829
Linus Torvalds1da177e2005-04-16 15:20:36 -07001830 spin_lock_bh(&mfc_unres_lock);
1831 mfc = mfc_unres_queue;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001832 if (mfc)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001833 return mfc;
1834
1835 end_of_list:
1836 spin_unlock_bh(&mfc_unres_lock);
1837 it->cache = NULL;
1838
1839 return NULL;
1840}
1841
1842static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
1843{
1844 struct ipmr_mfc_iter *it = seq->private;
1845
1846 if (it->cache == &mfc_unres_queue)
1847 spin_unlock_bh(&mfc_unres_lock);
1848 else if (it->cache == mfc_cache_array)
1849 read_unlock(&mrt_lock);
1850}
1851
1852static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
1853{
1854 int n;
1855
1856 if (v == SEQ_START_TOKEN) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001857 seq_puts(seq,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001858 "Group Origin Iif Pkts Bytes Wrong Oifs\n");
1859 } else {
1860 const struct mfc_cache *mfc = v;
1861 const struct ipmr_mfc_iter *it = seq->private;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001862
Linus Torvalds1da177e2005-04-16 15:20:36 -07001863 seq_printf(seq, "%08lX %08lX %-3d %8ld %8ld %8ld",
1864 (unsigned long) mfc->mfc_mcastgrp,
1865 (unsigned long) mfc->mfc_origin,
1866 mfc->mfc_parent,
1867 mfc->mfc_un.res.pkt,
1868 mfc->mfc_un.res.bytes,
1869 mfc->mfc_un.res.wrong_if);
1870
1871 if (it->cache != &mfc_unres_queue) {
Stephen Hemminger132adf52007-03-08 20:44:43 -08001872 for (n = mfc->mfc_un.res.minvif;
1873 n < mfc->mfc_un.res.maxvif; n++ ) {
1874 if (VIF_EXISTS(n)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001875 && mfc->mfc_un.res.ttls[n] < 255)
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001876 seq_printf(seq,
1877 " %2d:%-3d",
Linus Torvalds1da177e2005-04-16 15:20:36 -07001878 n, mfc->mfc_un.res.ttls[n]);
1879 }
1880 }
1881 seq_putc(seq, '\n');
1882 }
1883 return 0;
1884}
1885
Stephen Hemmingerf6908082007-03-12 14:34:29 -07001886static const struct seq_operations ipmr_mfc_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001887 .start = ipmr_mfc_seq_start,
1888 .next = ipmr_mfc_seq_next,
1889 .stop = ipmr_mfc_seq_stop,
1890 .show = ipmr_mfc_seq_show,
1891};
1892
1893static int ipmr_mfc_open(struct inode *inode, struct file *file)
1894{
Pavel Emelyanovcf7732e2007-10-10 02:29:29 -07001895 return seq_open_private(file, &ipmr_mfc_seq_ops,
1896 sizeof(struct ipmr_mfc_iter));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001897}
1898
Arjan van de Ven9a321442007-02-12 00:55:35 -08001899static const struct file_operations ipmr_mfc_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001900 .owner = THIS_MODULE,
1901 .open = ipmr_mfc_open,
1902 .read = seq_read,
1903 .llseek = seq_lseek,
1904 .release = seq_release_private,
1905};
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001906#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001907
1908#ifdef CONFIG_IP_PIMSM_V2
1909static struct net_protocol pim_protocol = {
1910 .handler = pim_rcv,
1911};
1912#endif
1913
1914
1915/*
1916 * Setup for IP multicast routing
1917 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001918
Wang Chen03d2f892008-07-03 12:13:36 +08001919int __init ip_mr_init(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001920{
Wang Chen03d2f892008-07-03 12:13:36 +08001921 int err;
1922
Linus Torvalds1da177e2005-04-16 15:20:36 -07001923 mrt_cachep = kmem_cache_create("ip_mrt_cache",
1924 sizeof(struct mfc_cache),
Alexey Dobriyane5d679f332006-08-26 19:25:52 -07001925 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
Paul Mundt20c2df82007-07-20 10:11:58 +09001926 NULL);
Wang Chen03d2f892008-07-03 12:13:36 +08001927 if (!mrt_cachep)
1928 return -ENOMEM;
1929
Pavel Emelyanovb24b8a22008-01-23 21:20:07 -08001930 setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0);
Wang Chen03d2f892008-07-03 12:13:36 +08001931 err = register_netdevice_notifier(&ip_mr_notifier);
1932 if (err)
1933 goto reg_notif_fail;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001934#ifdef CONFIG_PROC_FS
Wang Chen03d2f892008-07-03 12:13:36 +08001935 err = -ENOMEM;
1936 if (!proc_net_fops_create(&init_net, "ip_mr_vif", 0, &ipmr_vif_fops))
1937 goto proc_vif_fail;
1938 if (!proc_net_fops_create(&init_net, "ip_mr_cache", 0, &ipmr_mfc_fops))
1939 goto proc_cache_fail;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001940#endif
Wang Chen03d2f892008-07-03 12:13:36 +08001941 return 0;
1942reg_notif_fail:
1943 kmem_cache_destroy(mrt_cachep);
1944#ifdef CONFIG_PROC_FS
1945proc_vif_fail:
1946 unregister_netdevice_notifier(&ip_mr_notifier);
1947proc_cache_fail:
1948 proc_net_remove(&init_net, "ip_mr_vif");
1949#endif
1950 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001951}