blob: b42e082cc17048ddcdee7103f933c6123c663111 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * IP multicast routing support for mrouted 3.6/3.8
3 *
Alan Cox113aa832008-10-13 19:01:08 -07004 * (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk>
Linus Torvalds1da177e2005-04-16 15:20:36 -07005 * Linux Consultancy and Custom Driver Development
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 *
Linus Torvalds1da177e2005-04-16 15:20:36 -070012 * Fixes:
13 * Michael Chastain : Incorrect size of copying.
14 * Alan Cox : Added the cache manager code
15 * Alan Cox : Fixed the clone/copy bug and device race.
16 * Mike McLagan : Routing by source
17 * Malcolm Beattie : Buffer handling fixes.
18 * Alexey Kuznetsov : Double buffer free and other fixes.
19 * SVR Anand : Fixed several multicast bugs and problems.
20 * Alexey Kuznetsov : Status, optimisations and more.
21 * Brad Parker : Better behaviour on mrouted upcall
22 * overflow.
23 * Carlos Picoto : PIMv1 Support
24 * Pavlin Ivanov Radoslavov: PIMv2 Registers must checksum only PIM header
25 * Relax this requrement to work with older peers.
26 *
27 */
28
Linus Torvalds1da177e2005-04-16 15:20:36 -070029#include <asm/system.h>
30#include <asm/uaccess.h>
31#include <linux/types.h>
Randy Dunlap4fc268d2006-01-11 12:17:47 -080032#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070033#include <linux/errno.h>
34#include <linux/timer.h>
35#include <linux/mm.h>
36#include <linux/kernel.h>
37#include <linux/fcntl.h>
38#include <linux/stat.h>
39#include <linux/socket.h>
40#include <linux/in.h>
41#include <linux/inet.h>
42#include <linux/netdevice.h>
43#include <linux/inetdevice.h>
44#include <linux/igmp.h>
45#include <linux/proc_fs.h>
46#include <linux/seq_file.h>
47#include <linux/mroute.h>
48#include <linux/init.h>
Kris Katterjohn46f25df2006-01-05 16:35:42 -080049#include <linux/if_ether.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020050#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070051#include <net/ip.h>
52#include <net/protocol.h>
53#include <linux/skbuff.h>
Arnaldo Carvalho de Melo14c85022005-12-27 02:43:12 -020054#include <net/route.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070055#include <net/sock.h>
56#include <net/icmp.h>
57#include <net/udp.h>
58#include <net/raw.h>
59#include <linux/notifier.h>
60#include <linux/if_arp.h>
61#include <linux/netfilter_ipv4.h>
62#include <net/ipip.h>
63#include <net/checksum.h>
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -070064#include <net/netlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070065
66#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
67#define CONFIG_IP_PIMSM 1
68#endif
69
70static struct sock *mroute_socket;
71
72
73/* Big lock, protecting vif table, mrt cache and mroute socket state.
74 Note that the changes are semaphored via rtnl_lock.
75 */
76
77static DEFINE_RWLOCK(mrt_lock);
78
79/*
80 * Multicast router control variables
81 */
82
83static struct vif_device vif_table[MAXVIFS]; /* Devices */
84static int maxvif;
85
86#define VIF_EXISTS(idx) (vif_table[idx].dev != NULL)
87
88static int mroute_do_assert; /* Set in PIM assert */
89static int mroute_do_pim;
90
91static struct mfc_cache *mfc_cache_array[MFC_LINES]; /* Forwarding cache */
92
93static struct mfc_cache *mfc_unres_queue; /* Queue of unresolved entries */
94static atomic_t cache_resolve_queue_len; /* Size of unresolved */
95
96/* Special spinlock for queue of unresolved entries */
97static DEFINE_SPINLOCK(mfc_unres_lock);
98
99/* We return to original Alan's scheme. Hash table of resolved
100 entries is changed only in process context and protected
101 with weak lock mrt_lock. Queue of unresolved entries is protected
102 with strong spinlock mfc_unres_lock.
103
104 In this case data path is free of exclusive locks at all.
105 */
106
Christoph Lametere18b8902006-12-06 20:33:20 -0800107static struct kmem_cache *mrt_cachep __read_mostly;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700108
109static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local);
110static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert);
111static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm);
112
113#ifdef CONFIG_IP_PIMSM_V2
114static struct net_protocol pim_protocol;
115#endif
116
117static struct timer_list ipmr_expire_timer;
118
119/* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
120
Wang Chend6070322008-07-14 20:55:26 -0700121static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
122{
123 dev_close(dev);
124
125 dev = __dev_get_by_name(&init_net, "tunl0");
126 if (dev) {
127 struct ifreq ifr;
128 mm_segment_t oldfs;
129 struct ip_tunnel_parm p;
130
131 memset(&p, 0, sizeof(p));
132 p.iph.daddr = v->vifc_rmt_addr.s_addr;
133 p.iph.saddr = v->vifc_lcl_addr.s_addr;
134 p.iph.version = 4;
135 p.iph.ihl = 5;
136 p.iph.protocol = IPPROTO_IPIP;
137 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
138 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
139
140 oldfs = get_fs(); set_fs(KERNEL_DS);
141 dev->do_ioctl(dev, &ifr, SIOCDELTUNNEL);
142 set_fs(oldfs);
143 }
144}
145
Linus Torvalds1da177e2005-04-16 15:20:36 -0700146static
147struct net_device *ipmr_new_tunnel(struct vifctl *v)
148{
149 struct net_device *dev;
150
Eric W. Biederman881d9662007-09-17 11:56:21 -0700151 dev = __dev_get_by_name(&init_net, "tunl0");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700152
153 if (dev) {
154 int err;
155 struct ifreq ifr;
156 mm_segment_t oldfs;
157 struct ip_tunnel_parm p;
158 struct in_device *in_dev;
159
160 memset(&p, 0, sizeof(p));
161 p.iph.daddr = v->vifc_rmt_addr.s_addr;
162 p.iph.saddr = v->vifc_lcl_addr.s_addr;
163 p.iph.version = 4;
164 p.iph.ihl = 5;
165 p.iph.protocol = IPPROTO_IPIP;
166 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
Stephen Hemmingerba93ef72008-01-21 17:28:59 -0800167 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700168
169 oldfs = get_fs(); set_fs(KERNEL_DS);
170 err = dev->do_ioctl(dev, &ifr, SIOCADDTUNNEL);
171 set_fs(oldfs);
172
173 dev = NULL;
174
Eric W. Biederman881d9662007-09-17 11:56:21 -0700175 if (err == 0 && (dev = __dev_get_by_name(&init_net, p.name)) != NULL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700176 dev->flags |= IFF_MULTICAST;
177
Herbert Xue5ed6392005-10-03 14:35:55 -0700178 in_dev = __in_dev_get_rtnl(dev);
Herbert Xu71e27da2007-06-04 23:36:06 -0700179 if (in_dev == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700180 goto failure;
Herbert Xu71e27da2007-06-04 23:36:06 -0700181
182 ipv4_devconf_setall(in_dev);
183 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700184
185 if (dev_open(dev))
186 goto failure;
Wang Chen7dc00c82008-07-14 20:56:34 -0700187 dev_hold(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700188 }
189 }
190 return dev;
191
192failure:
193 /* allow the register to be completed before unregistering. */
194 rtnl_unlock();
195 rtnl_lock();
196
197 unregister_netdevice(dev);
198 return NULL;
199}
200
201#ifdef CONFIG_IP_PIMSM
202
203static int reg_vif_num = -1;
204
205static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
206{
207 read_lock(&mrt_lock);
Pavel Emelyanovcf3677a2008-05-21 14:17:33 -0700208 dev->stats.tx_bytes += skb->len;
209 dev->stats.tx_packets++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700210 ipmr_cache_report(skb, reg_vif_num, IGMPMSG_WHOLEPKT);
211 read_unlock(&mrt_lock);
212 kfree_skb(skb);
213 return 0;
214}
215
Linus Torvalds1da177e2005-04-16 15:20:36 -0700216static void reg_vif_setup(struct net_device *dev)
217{
218 dev->type = ARPHRD_PIMREG;
Kris Katterjohn46f25df2006-01-05 16:35:42 -0800219 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700220 dev->flags = IFF_NOARP;
221 dev->hard_start_xmit = reg_vif_xmit;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700222 dev->destructor = free_netdev;
223}
224
225static struct net_device *ipmr_reg_vif(void)
226{
227 struct net_device *dev;
228 struct in_device *in_dev;
229
Pavel Emelyanovcf3677a2008-05-21 14:17:33 -0700230 dev = alloc_netdev(0, "pimreg", reg_vif_setup);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700231
232 if (dev == NULL)
233 return NULL;
234
235 if (register_netdevice(dev)) {
236 free_netdev(dev);
237 return NULL;
238 }
239 dev->iflink = 0;
240
Herbert Xu71e27da2007-06-04 23:36:06 -0700241 rcu_read_lock();
242 if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
243 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700244 goto failure;
Herbert Xu71e27da2007-06-04 23:36:06 -0700245 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700246
Herbert Xu71e27da2007-06-04 23:36:06 -0700247 ipv4_devconf_setall(in_dev);
248 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
249 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700250
251 if (dev_open(dev))
252 goto failure;
253
Wang Chen7dc00c82008-07-14 20:56:34 -0700254 dev_hold(dev);
255
Linus Torvalds1da177e2005-04-16 15:20:36 -0700256 return dev;
257
258failure:
259 /* allow the register to be completed before unregistering. */
260 rtnl_unlock();
261 rtnl_lock();
262
263 unregister_netdevice(dev);
264 return NULL;
265}
266#endif
267
268/*
269 * Delete a VIF entry
Wang Chen7dc00c82008-07-14 20:56:34 -0700270 * @notify: Set to 1, if the caller is a notifier_call
Linus Torvalds1da177e2005-04-16 15:20:36 -0700271 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900272
Wang Chen7dc00c82008-07-14 20:56:34 -0700273static int vif_delete(int vifi, int notify)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700274{
275 struct vif_device *v;
276 struct net_device *dev;
277 struct in_device *in_dev;
278
279 if (vifi < 0 || vifi >= maxvif)
280 return -EADDRNOTAVAIL;
281
282 v = &vif_table[vifi];
283
284 write_lock_bh(&mrt_lock);
285 dev = v->dev;
286 v->dev = NULL;
287
288 if (!dev) {
289 write_unlock_bh(&mrt_lock);
290 return -EADDRNOTAVAIL;
291 }
292
293#ifdef CONFIG_IP_PIMSM
294 if (vifi == reg_vif_num)
295 reg_vif_num = -1;
296#endif
297
298 if (vifi+1 == maxvif) {
299 int tmp;
300 for (tmp=vifi-1; tmp>=0; tmp--) {
301 if (VIF_EXISTS(tmp))
302 break;
303 }
304 maxvif = tmp+1;
305 }
306
307 write_unlock_bh(&mrt_lock);
308
309 dev_set_allmulti(dev, -1);
310
Herbert Xue5ed6392005-10-03 14:35:55 -0700311 if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
Herbert Xu42f811b2007-06-04 23:34:44 -0700312 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700313 ip_rt_multicast_event(in_dev);
314 }
315
Wang Chen7dc00c82008-07-14 20:56:34 -0700316 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700317 unregister_netdevice(dev);
318
319 dev_put(dev);
320 return 0;
321}
322
323/* Destroy an unresolved cache entry, killing queued skbs
324 and reporting error to netlink readers.
325 */
326
327static void ipmr_destroy_unres(struct mfc_cache *c)
328{
329 struct sk_buff *skb;
Patrick McHardy9ef1d4c2005-06-28 12:55:30 -0700330 struct nlmsgerr *e;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700331
332 atomic_dec(&cache_resolve_queue_len);
333
Stephen Hemminger132adf52007-03-08 20:44:43 -0800334 while ((skb=skb_dequeue(&c->mfc_un.unres.unresolved))) {
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700335 if (ip_hdr(skb)->version == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700336 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
337 nlh->nlmsg_type = NLMSG_ERROR;
338 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
339 skb_trim(skb, nlh->nlmsg_len);
Patrick McHardy9ef1d4c2005-06-28 12:55:30 -0700340 e = NLMSG_DATA(nlh);
341 e->error = -ETIMEDOUT;
342 memset(&e->msg, 0, sizeof(e->msg));
Thomas Graf2942e902006-08-15 00:30:25 -0700343
Denis V. Lunev97c53ca2007-11-19 22:26:51 -0800344 rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700345 } else
346 kfree_skb(skb);
347 }
348
349 kmem_cache_free(mrt_cachep, c);
350}
351
352
353/* Single timer process for all the unresolved queue. */
354
355static void ipmr_expire_process(unsigned long dummy)
356{
357 unsigned long now;
358 unsigned long expires;
359 struct mfc_cache *c, **cp;
360
361 if (!spin_trylock(&mfc_unres_lock)) {
362 mod_timer(&ipmr_expire_timer, jiffies+HZ/10);
363 return;
364 }
365
366 if (atomic_read(&cache_resolve_queue_len) == 0)
367 goto out;
368
369 now = jiffies;
370 expires = 10*HZ;
371 cp = &mfc_unres_queue;
372
373 while ((c=*cp) != NULL) {
374 if (time_after(c->mfc_un.unres.expires, now)) {
375 unsigned long interval = c->mfc_un.unres.expires - now;
376 if (interval < expires)
377 expires = interval;
378 cp = &c->next;
379 continue;
380 }
381
382 *cp = c->next;
383
384 ipmr_destroy_unres(c);
385 }
386
387 if (atomic_read(&cache_resolve_queue_len))
388 mod_timer(&ipmr_expire_timer, jiffies + expires);
389
390out:
391 spin_unlock(&mfc_unres_lock);
392}
393
394/* Fill oifs list. It is called under write locked mrt_lock. */
395
Baruch Evend1b04c02005-07-30 17:41:59 -0700396static void ipmr_update_thresholds(struct mfc_cache *cache, unsigned char *ttls)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700397{
398 int vifi;
399
400 cache->mfc_un.res.minvif = MAXVIFS;
401 cache->mfc_un.res.maxvif = 0;
402 memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
403
404 for (vifi=0; vifi<maxvif; vifi++) {
405 if (VIF_EXISTS(vifi) && ttls[vifi] && ttls[vifi] < 255) {
406 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
407 if (cache->mfc_un.res.minvif > vifi)
408 cache->mfc_un.res.minvif = vifi;
409 if (cache->mfc_un.res.maxvif <= vifi)
410 cache->mfc_un.res.maxvif = vifi + 1;
411 }
412 }
413}
414
415static int vif_add(struct vifctl *vifc, int mrtsock)
416{
417 int vifi = vifc->vifc_vifi;
418 struct vif_device *v = &vif_table[vifi];
419 struct net_device *dev;
420 struct in_device *in_dev;
Wang Chend6070322008-07-14 20:55:26 -0700421 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700422
423 /* Is vif busy ? */
424 if (VIF_EXISTS(vifi))
425 return -EADDRINUSE;
426
427 switch (vifc->vifc_flags) {
428#ifdef CONFIG_IP_PIMSM
429 case VIFF_REGISTER:
430 /*
431 * Special Purpose VIF in PIM
432 * All the packets will be sent to the daemon
433 */
434 if (reg_vif_num >= 0)
435 return -EADDRINUSE;
436 dev = ipmr_reg_vif();
437 if (!dev)
438 return -ENOBUFS;
Wang Chend6070322008-07-14 20:55:26 -0700439 err = dev_set_allmulti(dev, 1);
440 if (err) {
441 unregister_netdevice(dev);
Wang Chen7dc00c82008-07-14 20:56:34 -0700442 dev_put(dev);
Wang Chend6070322008-07-14 20:55:26 -0700443 return err;
444 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700445 break;
446#endif
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900447 case VIFF_TUNNEL:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700448 dev = ipmr_new_tunnel(vifc);
449 if (!dev)
450 return -ENOBUFS;
Wang Chend6070322008-07-14 20:55:26 -0700451 err = dev_set_allmulti(dev, 1);
452 if (err) {
453 ipmr_del_tunnel(dev, vifc);
Wang Chen7dc00c82008-07-14 20:56:34 -0700454 dev_put(dev);
Wang Chend6070322008-07-14 20:55:26 -0700455 return err;
456 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700457 break;
458 case 0:
Denis V. Lunev1ab35272008-01-22 22:04:30 -0800459 dev = ip_dev_find(&init_net, vifc->vifc_lcl_addr.s_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700460 if (!dev)
461 return -EADDRNOTAVAIL;
Wang Chend6070322008-07-14 20:55:26 -0700462 err = dev_set_allmulti(dev, 1);
Wang Chen7dc00c82008-07-14 20:56:34 -0700463 if (err) {
464 dev_put(dev);
Wang Chend6070322008-07-14 20:55:26 -0700465 return err;
Wang Chen7dc00c82008-07-14 20:56:34 -0700466 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700467 break;
468 default:
469 return -EINVAL;
470 }
471
Herbert Xue5ed6392005-10-03 14:35:55 -0700472 if ((in_dev = __in_dev_get_rtnl(dev)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700473 return -EADDRNOTAVAIL;
Herbert Xu42f811b2007-06-04 23:34:44 -0700474 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700475 ip_rt_multicast_event(in_dev);
476
477 /*
478 * Fill in the VIF structures
479 */
480 v->rate_limit=vifc->vifc_rate_limit;
481 v->local=vifc->vifc_lcl_addr.s_addr;
482 v->remote=vifc->vifc_rmt_addr.s_addr;
483 v->flags=vifc->vifc_flags;
484 if (!mrtsock)
485 v->flags |= VIFF_STATIC;
486 v->threshold=vifc->vifc_threshold;
487 v->bytes_in = 0;
488 v->bytes_out = 0;
489 v->pkt_in = 0;
490 v->pkt_out = 0;
491 v->link = dev->ifindex;
492 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
493 v->link = dev->iflink;
494
495 /* And finish update writing critical data */
496 write_lock_bh(&mrt_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700497 v->dev=dev;
498#ifdef CONFIG_IP_PIMSM
499 if (v->flags&VIFF_REGISTER)
500 reg_vif_num = vifi;
501#endif
502 if (vifi+1 > maxvif)
503 maxvif = vifi+1;
504 write_unlock_bh(&mrt_lock);
505 return 0;
506}
507
Al Viro114c7842006-09-27 18:39:29 -0700508static struct mfc_cache *ipmr_cache_find(__be32 origin, __be32 mcastgrp)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700509{
510 int line=MFC_HASH(mcastgrp,origin);
511 struct mfc_cache *c;
512
513 for (c=mfc_cache_array[line]; c; c = c->next) {
514 if (c->mfc_origin==origin && c->mfc_mcastgrp==mcastgrp)
515 break;
516 }
517 return c;
518}
519
520/*
521 * Allocate a multicast cache entry
522 */
523static struct mfc_cache *ipmr_cache_alloc(void)
524{
Robert P. J. Dayc3762222007-02-10 01:45:03 -0800525 struct mfc_cache *c=kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
Stephen Hemminger132adf52007-03-08 20:44:43 -0800526 if (c==NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700527 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700528 c->mfc_un.res.minvif = MAXVIFS;
529 return c;
530}
531
532static struct mfc_cache *ipmr_cache_alloc_unres(void)
533{
Robert P. J. Dayc3762222007-02-10 01:45:03 -0800534 struct mfc_cache *c=kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
Stephen Hemminger132adf52007-03-08 20:44:43 -0800535 if (c==NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700536 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700537 skb_queue_head_init(&c->mfc_un.unres.unresolved);
538 c->mfc_un.unres.expires = jiffies + 10*HZ;
539 return c;
540}
541
542/*
543 * A cache entry has gone into a resolved state from queued
544 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900545
Linus Torvalds1da177e2005-04-16 15:20:36 -0700546static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
547{
548 struct sk_buff *skb;
Patrick McHardy9ef1d4c2005-06-28 12:55:30 -0700549 struct nlmsgerr *e;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700550
551 /*
552 * Play the pending entries through our router
553 */
554
Stephen Hemminger132adf52007-03-08 20:44:43 -0800555 while ((skb=__skb_dequeue(&uc->mfc_un.unres.unresolved))) {
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700556 if (ip_hdr(skb)->version == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700557 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
558
559 if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -0700560 nlh->nlmsg_len = (skb_tail_pointer(skb) -
561 (u8 *)nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700562 } else {
563 nlh->nlmsg_type = NLMSG_ERROR;
564 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
565 skb_trim(skb, nlh->nlmsg_len);
Patrick McHardy9ef1d4c2005-06-28 12:55:30 -0700566 e = NLMSG_DATA(nlh);
567 e->error = -EMSGSIZE;
568 memset(&e->msg, 0, sizeof(e->msg));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700569 }
Thomas Graf2942e902006-08-15 00:30:25 -0700570
Denis V. Lunev97c53ca2007-11-19 22:26:51 -0800571 rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700572 } else
573 ip_mr_forward(skb, c, 0);
574 }
575}
576
577/*
578 * Bounce a cache query up to mrouted. We could use netlink for this but mrouted
579 * expects the following bizarre scheme.
580 *
581 * Called under mrt_lock.
582 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900583
Linus Torvalds1da177e2005-04-16 15:20:36 -0700584static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
585{
586 struct sk_buff *skb;
Arnaldo Carvalho de Meloc9bdd4b2007-03-12 20:09:15 -0300587 const int ihl = ip_hdrlen(pkt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700588 struct igmphdr *igmp;
589 struct igmpmsg *msg;
590 int ret;
591
592#ifdef CONFIG_IP_PIMSM
593 if (assert == IGMPMSG_WHOLEPKT)
594 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
595 else
596#endif
597 skb = alloc_skb(128, GFP_ATOMIC);
598
Stephen Hemminger132adf52007-03-08 20:44:43 -0800599 if (!skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700600 return -ENOBUFS;
601
602#ifdef CONFIG_IP_PIMSM
603 if (assert == IGMPMSG_WHOLEPKT) {
604 /* Ugly, but we have no choice with this interface.
605 Duplicate old header, fix ihl, length etc.
606 And all this only to mangle msg->im_msgtype and
607 to set msg->im_mbz to "mbz" :-)
608 */
Arnaldo Carvalho de Melo878c8142007-03-11 22:38:29 -0300609 skb_push(skb, sizeof(struct iphdr));
610 skb_reset_network_header(skb);
Arnaldo Carvalho de Melobadff6d2007-03-13 13:06:52 -0300611 skb_reset_transport_header(skb);
Arnaldo Carvalho de Melo0272ffc2007-03-12 20:05:39 -0300612 msg = (struct igmpmsg *)skb_network_header(skb);
Arnaldo Carvalho de Melod56f90a2007-04-10 20:50:43 -0700613 memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700614 msg->im_msgtype = IGMPMSG_WHOLEPKT;
615 msg->im_mbz = 0;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900616 msg->im_vif = reg_vif_num;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700617 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
618 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
619 sizeof(struct iphdr));
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900620 } else
Linus Torvalds1da177e2005-04-16 15:20:36 -0700621#endif
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900622 {
623
Linus Torvalds1da177e2005-04-16 15:20:36 -0700624 /*
625 * Copy the IP header
626 */
627
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -0700628 skb->network_header = skb->tail;
Arnaldo Carvalho de Meloddc7b8e2007-03-15 21:42:27 -0300629 skb_put(skb, ihl);
Arnaldo Carvalho de Melo27d7ff42007-03-31 11:55:19 -0300630 skb_copy_to_linear_data(skb, pkt->data, ihl);
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700631 ip_hdr(skb)->protocol = 0; /* Flag to the kernel this is a route add */
632 msg = (struct igmpmsg *)skb_network_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700633 msg->im_vif = vifi;
634 skb->dst = dst_clone(pkt->dst);
635
636 /*
637 * Add our header
638 */
639
640 igmp=(struct igmphdr *)skb_put(skb,sizeof(struct igmphdr));
641 igmp->type =
642 msg->im_msgtype = assert;
643 igmp->code = 0;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700644 ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -0700645 skb->transport_header = skb->network_header;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900646 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700647
648 if (mroute_socket == NULL) {
649 kfree_skb(skb);
650 return -EINVAL;
651 }
652
653 /*
654 * Deliver to mrouted
655 */
656 if ((ret=sock_queue_rcv_skb(mroute_socket,skb))<0) {
657 if (net_ratelimit())
658 printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
659 kfree_skb(skb);
660 }
661
662 return ret;
663}
664
665/*
666 * Queue a packet for resolution. It gets locked cache entry!
667 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900668
Linus Torvalds1da177e2005-04-16 15:20:36 -0700669static int
670ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb)
671{
672 int err;
673 struct mfc_cache *c;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700674 const struct iphdr *iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700675
676 spin_lock_bh(&mfc_unres_lock);
677 for (c=mfc_unres_queue; c; c=c->next) {
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700678 if (c->mfc_mcastgrp == iph->daddr &&
679 c->mfc_origin == iph->saddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700680 break;
681 }
682
683 if (c == NULL) {
684 /*
685 * Create a new entry if allowable
686 */
687
688 if (atomic_read(&cache_resolve_queue_len)>=10 ||
689 (c=ipmr_cache_alloc_unres())==NULL) {
690 spin_unlock_bh(&mfc_unres_lock);
691
692 kfree_skb(skb);
693 return -ENOBUFS;
694 }
695
696 /*
697 * Fill in the new cache entry
698 */
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700699 c->mfc_parent = -1;
700 c->mfc_origin = iph->saddr;
701 c->mfc_mcastgrp = iph->daddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700702
703 /*
704 * Reflect first query at mrouted.
705 */
706 if ((err = ipmr_cache_report(skb, vifi, IGMPMSG_NOCACHE))<0) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900707 /* If the report failed throw the cache entry
Linus Torvalds1da177e2005-04-16 15:20:36 -0700708 out - Brad Parker
709 */
710 spin_unlock_bh(&mfc_unres_lock);
711
712 kmem_cache_free(mrt_cachep, c);
713 kfree_skb(skb);
714 return err;
715 }
716
717 atomic_inc(&cache_resolve_queue_len);
718 c->next = mfc_unres_queue;
719 mfc_unres_queue = c;
720
721 mod_timer(&ipmr_expire_timer, c->mfc_un.unres.expires);
722 }
723
724 /*
725 * See if we can append the packet
726 */
727 if (c->mfc_un.unres.unresolved.qlen>3) {
728 kfree_skb(skb);
729 err = -ENOBUFS;
730 } else {
731 skb_queue_tail(&c->mfc_un.unres.unresolved,skb);
732 err = 0;
733 }
734
735 spin_unlock_bh(&mfc_unres_lock);
736 return err;
737}
738
739/*
740 * MFC cache manipulation by user space mroute daemon
741 */
742
743static int ipmr_mfc_delete(struct mfcctl *mfc)
744{
745 int line;
746 struct mfc_cache *c, **cp;
747
748 line=MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
749
750 for (cp=&mfc_cache_array[line]; (c=*cp) != NULL; cp = &c->next) {
751 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
752 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
753 write_lock_bh(&mrt_lock);
754 *cp = c->next;
755 write_unlock_bh(&mrt_lock);
756
757 kmem_cache_free(mrt_cachep, c);
758 return 0;
759 }
760 }
761 return -ENOENT;
762}
763
764static int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock)
765{
766 int line;
767 struct mfc_cache *uc, *c, **cp;
768
769 line=MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
770
771 for (cp=&mfc_cache_array[line]; (c=*cp) != NULL; cp = &c->next) {
772 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
773 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr)
774 break;
775 }
776
777 if (c != NULL) {
778 write_lock_bh(&mrt_lock);
779 c->mfc_parent = mfc->mfcc_parent;
Baruch Evend1b04c02005-07-30 17:41:59 -0700780 ipmr_update_thresholds(c, mfc->mfcc_ttls);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700781 if (!mrtsock)
782 c->mfc_flags |= MFC_STATIC;
783 write_unlock_bh(&mrt_lock);
784 return 0;
785 }
786
Joe Perchesf97c1e02007-12-16 13:45:43 -0800787 if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700788 return -EINVAL;
789
790 c=ipmr_cache_alloc();
791 if (c==NULL)
792 return -ENOMEM;
793
794 c->mfc_origin=mfc->mfcc_origin.s_addr;
795 c->mfc_mcastgrp=mfc->mfcc_mcastgrp.s_addr;
796 c->mfc_parent=mfc->mfcc_parent;
Baruch Evend1b04c02005-07-30 17:41:59 -0700797 ipmr_update_thresholds(c, mfc->mfcc_ttls);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700798 if (!mrtsock)
799 c->mfc_flags |= MFC_STATIC;
800
801 write_lock_bh(&mrt_lock);
802 c->next = mfc_cache_array[line];
803 mfc_cache_array[line] = c;
804 write_unlock_bh(&mrt_lock);
805
806 /*
807 * Check to see if we resolved a queued list. If so we
808 * need to send on the frames and tidy up.
809 */
810 spin_lock_bh(&mfc_unres_lock);
811 for (cp = &mfc_unres_queue; (uc=*cp) != NULL;
812 cp = &uc->next) {
813 if (uc->mfc_origin == c->mfc_origin &&
814 uc->mfc_mcastgrp == c->mfc_mcastgrp) {
815 *cp = uc->next;
816 if (atomic_dec_and_test(&cache_resolve_queue_len))
817 del_timer(&ipmr_expire_timer);
818 break;
819 }
820 }
821 spin_unlock_bh(&mfc_unres_lock);
822
823 if (uc) {
824 ipmr_cache_resolve(uc, c);
825 kmem_cache_free(mrt_cachep, uc);
826 }
827 return 0;
828}
829
830/*
831 * Close the multicast socket, and clear the vif tables etc
832 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900833
Linus Torvalds1da177e2005-04-16 15:20:36 -0700834static void mroute_clean_tables(struct sock *sk)
835{
836 int i;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900837
Linus Torvalds1da177e2005-04-16 15:20:36 -0700838 /*
839 * Shut down all active vif entries
840 */
Stephen Hemminger132adf52007-03-08 20:44:43 -0800841 for (i=0; i<maxvif; i++) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700842 if (!(vif_table[i].flags&VIFF_STATIC))
Wang Chen7dc00c82008-07-14 20:56:34 -0700843 vif_delete(i, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700844 }
845
846 /*
847 * Wipe the cache
848 */
849 for (i=0;i<MFC_LINES;i++) {
850 struct mfc_cache *c, **cp;
851
852 cp = &mfc_cache_array[i];
853 while ((c = *cp) != NULL) {
854 if (c->mfc_flags&MFC_STATIC) {
855 cp = &c->next;
856 continue;
857 }
858 write_lock_bh(&mrt_lock);
859 *cp = c->next;
860 write_unlock_bh(&mrt_lock);
861
862 kmem_cache_free(mrt_cachep, c);
863 }
864 }
865
866 if (atomic_read(&cache_resolve_queue_len) != 0) {
867 struct mfc_cache *c;
868
869 spin_lock_bh(&mfc_unres_lock);
870 while (mfc_unres_queue != NULL) {
871 c = mfc_unres_queue;
872 mfc_unres_queue = c->next;
873 spin_unlock_bh(&mfc_unres_lock);
874
875 ipmr_destroy_unres(c);
876
877 spin_lock_bh(&mfc_unres_lock);
878 }
879 spin_unlock_bh(&mfc_unres_lock);
880 }
881}
882
883static void mrtsock_destruct(struct sock *sk)
884{
885 rtnl_lock();
886 if (sk == mroute_socket) {
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +0900887 IPV4_DEVCONF_ALL(sock_net(sk), MC_FORWARDING)--;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700888
889 write_lock_bh(&mrt_lock);
890 mroute_socket=NULL;
891 write_unlock_bh(&mrt_lock);
892
893 mroute_clean_tables(sk);
894 }
895 rtnl_unlock();
896}
897
898/*
899 * Socket options and virtual interface manipulation. The whole
900 * virtual interface system is a complete heap, but unfortunately
901 * that's how BSD mrouted happens to think. Maybe one day with a proper
902 * MOSPF/PIM router set up we can clean this up.
903 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900904
Linus Torvalds1da177e2005-04-16 15:20:36 -0700905int ip_mroute_setsockopt(struct sock *sk,int optname,char __user *optval,int optlen)
906{
907 int ret;
908 struct vifctl vif;
909 struct mfcctl mfc;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900910
Stephen Hemminger132adf52007-03-08 20:44:43 -0800911 if (optname != MRT_INIT) {
912 if (sk != mroute_socket && !capable(CAP_NET_ADMIN))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700913 return -EACCES;
914 }
915
Stephen Hemminger132adf52007-03-08 20:44:43 -0800916 switch (optname) {
917 case MRT_INIT:
918 if (sk->sk_type != SOCK_RAW ||
919 inet_sk(sk)->num != IPPROTO_IGMP)
920 return -EOPNOTSUPP;
921 if (optlen!=sizeof(int))
922 return -ENOPROTOOPT;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700923
Stephen Hemminger132adf52007-03-08 20:44:43 -0800924 rtnl_lock();
925 if (mroute_socket) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700926 rtnl_unlock();
Stephen Hemminger132adf52007-03-08 20:44:43 -0800927 return -EADDRINUSE;
928 }
929
930 ret = ip_ra_control(sk, 1, mrtsock_destruct);
931 if (ret == 0) {
932 write_lock_bh(&mrt_lock);
933 mroute_socket=sk;
934 write_unlock_bh(&mrt_lock);
935
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +0900936 IPV4_DEVCONF_ALL(sock_net(sk), MC_FORWARDING)++;
Stephen Hemminger132adf52007-03-08 20:44:43 -0800937 }
938 rtnl_unlock();
939 return ret;
940 case MRT_DONE:
941 if (sk!=mroute_socket)
942 return -EACCES;
943 return ip_ra_control(sk, 0, NULL);
944 case MRT_ADD_VIF:
945 case MRT_DEL_VIF:
946 if (optlen!=sizeof(vif))
947 return -EINVAL;
948 if (copy_from_user(&vif,optval,sizeof(vif)))
949 return -EFAULT;
950 if (vif.vifc_vifi >= MAXVIFS)
951 return -ENFILE;
952 rtnl_lock();
953 if (optname==MRT_ADD_VIF) {
954 ret = vif_add(&vif, sk==mroute_socket);
955 } else {
Wang Chen7dc00c82008-07-14 20:56:34 -0700956 ret = vif_delete(vif.vifc_vifi, 0);
Stephen Hemminger132adf52007-03-08 20:44:43 -0800957 }
958 rtnl_unlock();
959 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700960
961 /*
962 * Manipulate the forwarding caches. These live
963 * in a sort of kernel/user symbiosis.
964 */
Stephen Hemminger132adf52007-03-08 20:44:43 -0800965 case MRT_ADD_MFC:
966 case MRT_DEL_MFC:
967 if (optlen!=sizeof(mfc))
968 return -EINVAL;
969 if (copy_from_user(&mfc,optval, sizeof(mfc)))
970 return -EFAULT;
971 rtnl_lock();
972 if (optname==MRT_DEL_MFC)
973 ret = ipmr_mfc_delete(&mfc);
974 else
975 ret = ipmr_mfc_add(&mfc, sk==mroute_socket);
976 rtnl_unlock();
977 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700978 /*
979 * Control PIM assert.
980 */
Stephen Hemminger132adf52007-03-08 20:44:43 -0800981 case MRT_ASSERT:
982 {
983 int v;
984 if (get_user(v,(int __user *)optval))
985 return -EFAULT;
986 mroute_do_assert=(v)?1:0;
987 return 0;
988 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700989#ifdef CONFIG_IP_PIMSM
Stephen Hemminger132adf52007-03-08 20:44:43 -0800990 case MRT_PIM:
991 {
Stephen Hemmingerba93ef72008-01-21 17:28:59 -0800992 int v;
993
Stephen Hemminger132adf52007-03-08 20:44:43 -0800994 if (get_user(v,(int __user *)optval))
995 return -EFAULT;
Stephen Hemmingerba93ef72008-01-21 17:28:59 -0800996 v = (v) ? 1 : 0;
997
Stephen Hemminger132adf52007-03-08 20:44:43 -0800998 rtnl_lock();
999 ret = 0;
1000 if (v != mroute_do_pim) {
1001 mroute_do_pim = v;
1002 mroute_do_assert = v;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001003#ifdef CONFIG_IP_PIMSM_V2
Stephen Hemminger132adf52007-03-08 20:44:43 -08001004 if (mroute_do_pim)
1005 ret = inet_add_protocol(&pim_protocol,
1006 IPPROTO_PIM);
1007 else
1008 ret = inet_del_protocol(&pim_protocol,
1009 IPPROTO_PIM);
1010 if (ret < 0)
1011 ret = -EAGAIN;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001012#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001013 }
Stephen Hemminger132adf52007-03-08 20:44:43 -08001014 rtnl_unlock();
1015 return ret;
1016 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001017#endif
Stephen Hemminger132adf52007-03-08 20:44:43 -08001018 /*
1019 * Spurious command, or MRT_VERSION which you cannot
1020 * set.
1021 */
1022 default:
1023 return -ENOPROTOOPT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001024 }
1025}
1026
1027/*
1028 * Getsock opt support for the multicast routing system.
1029 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001030
Linus Torvalds1da177e2005-04-16 15:20:36 -07001031int ip_mroute_getsockopt(struct sock *sk,int optname,char __user *optval,int __user *optlen)
1032{
1033 int olr;
1034 int val;
1035
Stephen Hemminger132adf52007-03-08 20:44:43 -08001036 if (optname!=MRT_VERSION &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001037#ifdef CONFIG_IP_PIMSM
1038 optname!=MRT_PIM &&
1039#endif
1040 optname!=MRT_ASSERT)
1041 return -ENOPROTOOPT;
1042
1043 if (get_user(olr, optlen))
1044 return -EFAULT;
1045
1046 olr = min_t(unsigned int, olr, sizeof(int));
1047 if (olr < 0)
1048 return -EINVAL;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001049
Stephen Hemminger132adf52007-03-08 20:44:43 -08001050 if (put_user(olr,optlen))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001051 return -EFAULT;
Stephen Hemminger132adf52007-03-08 20:44:43 -08001052 if (optname==MRT_VERSION)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001053 val=0x0305;
1054#ifdef CONFIG_IP_PIMSM
Stephen Hemminger132adf52007-03-08 20:44:43 -08001055 else if (optname==MRT_PIM)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001056 val=mroute_do_pim;
1057#endif
1058 else
1059 val=mroute_do_assert;
Stephen Hemminger132adf52007-03-08 20:44:43 -08001060 if (copy_to_user(optval,&val,olr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001061 return -EFAULT;
1062 return 0;
1063}
1064
1065/*
1066 * The IP multicast ioctl support routines.
1067 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001068
Linus Torvalds1da177e2005-04-16 15:20:36 -07001069int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1070{
1071 struct sioc_sg_req sr;
1072 struct sioc_vif_req vr;
1073 struct vif_device *vif;
1074 struct mfc_cache *c;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001075
Stephen Hemminger132adf52007-03-08 20:44:43 -08001076 switch (cmd) {
1077 case SIOCGETVIFCNT:
1078 if (copy_from_user(&vr,arg,sizeof(vr)))
1079 return -EFAULT;
1080 if (vr.vifi>=maxvif)
1081 return -EINVAL;
1082 read_lock(&mrt_lock);
1083 vif=&vif_table[vr.vifi];
1084 if (VIF_EXISTS(vr.vifi)) {
1085 vr.icount=vif->pkt_in;
1086 vr.ocount=vif->pkt_out;
1087 vr.ibytes=vif->bytes_in;
1088 vr.obytes=vif->bytes_out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001089 read_unlock(&mrt_lock);
Stephen Hemminger132adf52007-03-08 20:44:43 -08001090
1091 if (copy_to_user(arg,&vr,sizeof(vr)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001092 return -EFAULT;
Stephen Hemminger132adf52007-03-08 20:44:43 -08001093 return 0;
1094 }
1095 read_unlock(&mrt_lock);
1096 return -EADDRNOTAVAIL;
1097 case SIOCGETSGCNT:
1098 if (copy_from_user(&sr,arg,sizeof(sr)))
1099 return -EFAULT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001100
Stephen Hemminger132adf52007-03-08 20:44:43 -08001101 read_lock(&mrt_lock);
1102 c = ipmr_cache_find(sr.src.s_addr, sr.grp.s_addr);
1103 if (c) {
1104 sr.pktcnt = c->mfc_un.res.pkt;
1105 sr.bytecnt = c->mfc_un.res.bytes;
1106 sr.wrong_if = c->mfc_un.res.wrong_if;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001107 read_unlock(&mrt_lock);
Stephen Hemminger132adf52007-03-08 20:44:43 -08001108
1109 if (copy_to_user(arg,&sr,sizeof(sr)))
1110 return -EFAULT;
1111 return 0;
1112 }
1113 read_unlock(&mrt_lock);
1114 return -EADDRNOTAVAIL;
1115 default:
1116 return -ENOIOCTLCMD;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001117 }
1118}
1119
1120
1121static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1122{
Eric W. Biedermane9dc8652007-09-12 13:02:17 +02001123 struct net_device *dev = ptr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001124 struct vif_device *v;
1125 int ct;
Eric W. Biedermane9dc8652007-09-12 13:02:17 +02001126
YOSHIFUJI Hideaki721499e2008-07-19 22:34:43 -07001127 if (!net_eq(dev_net(dev), &init_net))
Eric W. Biedermane9dc8652007-09-12 13:02:17 +02001128 return NOTIFY_DONE;
1129
Linus Torvalds1da177e2005-04-16 15:20:36 -07001130 if (event != NETDEV_UNREGISTER)
1131 return NOTIFY_DONE;
1132 v=&vif_table[0];
Stephen Hemminger132adf52007-03-08 20:44:43 -08001133 for (ct=0;ct<maxvif;ct++,v++) {
Eric W. Biedermane9dc8652007-09-12 13:02:17 +02001134 if (v->dev==dev)
Wang Chen7dc00c82008-07-14 20:56:34 -07001135 vif_delete(ct, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001136 }
1137 return NOTIFY_DONE;
1138}
1139
1140
1141static struct notifier_block ip_mr_notifier={
1142 .notifier_call = ipmr_device_event,
1143};
1144
1145/*
1146 * Encapsulate a packet by attaching a valid IPIP header to it.
1147 * This avoids tunnel drivers and other mess and gives us the speed so
1148 * important for multicast video.
1149 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001150
Al Viro114c7842006-09-27 18:39:29 -07001151static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001152{
Arnaldo Carvalho de Melo8856dfa2007-03-10 19:40:39 -03001153 struct iphdr *iph;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001154 struct iphdr *old_iph = ip_hdr(skb);
Arnaldo Carvalho de Melo8856dfa2007-03-10 19:40:39 -03001155
1156 skb_push(skb, sizeof(struct iphdr));
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -07001157 skb->transport_header = skb->network_header;
Arnaldo Carvalho de Melo8856dfa2007-03-10 19:40:39 -03001158 skb_reset_network_header(skb);
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001159 iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001160
1161 iph->version = 4;
Arnaldo Carvalho de Meloe023dd62007-03-12 20:09:36 -03001162 iph->tos = old_iph->tos;
1163 iph->ttl = old_iph->ttl;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001164 iph->frag_off = 0;
1165 iph->daddr = daddr;
1166 iph->saddr = saddr;
1167 iph->protocol = IPPROTO_IPIP;
1168 iph->ihl = 5;
1169 iph->tot_len = htons(skb->len);
1170 ip_select_ident(iph, skb->dst, NULL);
1171 ip_send_check(iph);
1172
Linus Torvalds1da177e2005-04-16 15:20:36 -07001173 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1174 nf_reset(skb);
1175}
1176
1177static inline int ipmr_forward_finish(struct sk_buff *skb)
1178{
1179 struct ip_options * opt = &(IPCB(skb)->opt);
1180
Pavel Emelyanov7c73a6f2008-07-16 20:20:11 -07001181 IP_INC_STATS_BH(dev_net(skb->dst->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001182
1183 if (unlikely(opt->optlen))
1184 ip_forward_options(skb);
1185
1186 return dst_output(skb);
1187}
1188
1189/*
1190 * Processing handlers for ipmr_forward
1191 */
1192
1193static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
1194{
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001195 const struct iphdr *iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001196 struct vif_device *vif = &vif_table[vifi];
1197 struct net_device *dev;
1198 struct rtable *rt;
1199 int encap = 0;
1200
1201 if (vif->dev == NULL)
1202 goto out_free;
1203
1204#ifdef CONFIG_IP_PIMSM
1205 if (vif->flags & VIFF_REGISTER) {
1206 vif->pkt_out++;
1207 vif->bytes_out+=skb->len;
Pavel Emelyanovcf3677a2008-05-21 14:17:33 -07001208 vif->dev->stats.tx_bytes += skb->len;
1209 vif->dev->stats.tx_packets++;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001210 ipmr_cache_report(skb, vifi, IGMPMSG_WHOLEPKT);
1211 kfree_skb(skb);
1212 return;
1213 }
1214#endif
1215
1216 if (vif->flags&VIFF_TUNNEL) {
1217 struct flowi fl = { .oif = vif->link,
1218 .nl_u = { .ip4_u =
1219 { .daddr = vif->remote,
1220 .saddr = vif->local,
1221 .tos = RT_TOS(iph->tos) } },
1222 .proto = IPPROTO_IPIP };
Denis V. Lunevf2063512008-01-22 22:07:34 -08001223 if (ip_route_output_key(&init_net, &rt, &fl))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001224 goto out_free;
1225 encap = sizeof(struct iphdr);
1226 } else {
1227 struct flowi fl = { .oif = vif->link,
1228 .nl_u = { .ip4_u =
1229 { .daddr = iph->daddr,
1230 .tos = RT_TOS(iph->tos) } },
1231 .proto = IPPROTO_IPIP };
Denis V. Lunevf2063512008-01-22 22:07:34 -08001232 if (ip_route_output_key(&init_net, &rt, &fl))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001233 goto out_free;
1234 }
1235
1236 dev = rt->u.dst.dev;
1237
1238 if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) {
1239 /* Do not fragment multicasts. Alas, IPv4 does not
1240 allow to send ICMP, so that packets will disappear
1241 to blackhole.
1242 */
1243
Pavel Emelyanov7c73a6f2008-07-16 20:20:11 -07001244 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001245 ip_rt_put(rt);
1246 goto out_free;
1247 }
1248
1249 encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len;
1250
1251 if (skb_cow(skb, encap)) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001252 ip_rt_put(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001253 goto out_free;
1254 }
1255
1256 vif->pkt_out++;
1257 vif->bytes_out+=skb->len;
1258
1259 dst_release(skb->dst);
1260 skb->dst = &rt->u.dst;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001261 ip_decrease_ttl(ip_hdr(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001262
1263 /* FIXME: forward and output firewalls used to be called here.
1264 * What do we do with netfilter? -- RR */
1265 if (vif->flags & VIFF_TUNNEL) {
1266 ip_encap(skb, vif->local, vif->remote);
1267 /* FIXME: extra output firewall step used to be here. --RR */
Pavel Emelyanov2f4c02d2008-05-21 14:16:14 -07001268 vif->dev->stats.tx_packets++;
1269 vif->dev->stats.tx_bytes += skb->len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001270 }
1271
1272 IPCB(skb)->flags |= IPSKB_FORWARDED;
1273
1274 /*
1275 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1276 * not only before forwarding, but after forwarding on all output
1277 * interfaces. It is clear, if mrouter runs a multicasting
1278 * program, it should receive packets not depending to what interface
1279 * program is joined.
1280 * If we will not make it, the program will have to join on all
1281 * interfaces. On the other hand, multihoming host (or router, but
1282 * not mrouter) cannot join to more than one interface - it will
1283 * result in receiving multiple packets.
1284 */
Patrick McHardy6e23ae22007-11-19 18:53:30 -08001285 NF_HOOK(PF_INET, NF_INET_FORWARD, skb, skb->dev, dev,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001286 ipmr_forward_finish);
1287 return;
1288
1289out_free:
1290 kfree_skb(skb);
1291 return;
1292}
1293
1294static int ipmr_find_vif(struct net_device *dev)
1295{
1296 int ct;
1297 for (ct=maxvif-1; ct>=0; ct--) {
1298 if (vif_table[ct].dev == dev)
1299 break;
1300 }
1301 return ct;
1302}
1303
1304/* "local" means that we should preserve one skb (for local delivery) */
1305
1306static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local)
1307{
1308 int psend = -1;
1309 int vif, ct;
1310
1311 vif = cache->mfc_parent;
1312 cache->mfc_un.res.pkt++;
1313 cache->mfc_un.res.bytes += skb->len;
1314
1315 /*
1316 * Wrong interface: drop packet and (maybe) send PIM assert.
1317 */
1318 if (vif_table[vif].dev != skb->dev) {
1319 int true_vifi;
1320
Eric Dumazetee6b9672008-03-05 18:30:47 -08001321 if (skb->rtable->fl.iif == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001322 /* It is our own packet, looped back.
1323 Very complicated situation...
1324
1325 The best workaround until routing daemons will be
1326 fixed is not to redistribute packet, if it was
1327 send through wrong interface. It means, that
1328 multicast applications WILL NOT work for
1329 (S,G), which have default multicast route pointing
1330 to wrong oif. In any case, it is not a good
1331 idea to use multicasting applications on router.
1332 */
1333 goto dont_forward;
1334 }
1335
1336 cache->mfc_un.res.wrong_if++;
1337 true_vifi = ipmr_find_vif(skb->dev);
1338
1339 if (true_vifi >= 0 && mroute_do_assert &&
1340 /* pimsm uses asserts, when switching from RPT to SPT,
1341 so that we cannot check that packet arrived on an oif.
1342 It is bad, but otherwise we would need to move pretty
1343 large chunk of pimd to kernel. Ough... --ANK
1344 */
1345 (mroute_do_pim || cache->mfc_un.res.ttls[true_vifi] < 255) &&
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001346 time_after(jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001347 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1348 cache->mfc_un.res.last_assert = jiffies;
1349 ipmr_cache_report(skb, true_vifi, IGMPMSG_WRONGVIF);
1350 }
1351 goto dont_forward;
1352 }
1353
1354 vif_table[vif].pkt_in++;
1355 vif_table[vif].bytes_in+=skb->len;
1356
1357 /*
1358 * Forward the frame
1359 */
1360 for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001361 if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001362 if (psend != -1) {
1363 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1364 if (skb2)
1365 ipmr_queue_xmit(skb2, cache, psend);
1366 }
1367 psend=ct;
1368 }
1369 }
1370 if (psend != -1) {
1371 if (local) {
1372 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1373 if (skb2)
1374 ipmr_queue_xmit(skb2, cache, psend);
1375 } else {
1376 ipmr_queue_xmit(skb, cache, psend);
1377 return 0;
1378 }
1379 }
1380
1381dont_forward:
1382 if (!local)
1383 kfree_skb(skb);
1384 return 0;
1385}
1386
1387
1388/*
1389 * Multicast packets for forwarding arrive here
1390 */
1391
1392int ip_mr_input(struct sk_buff *skb)
1393{
1394 struct mfc_cache *cache;
Eric Dumazetee6b9672008-03-05 18:30:47 -08001395 int local = skb->rtable->rt_flags&RTCF_LOCAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001396
1397 /* Packet is looped back after forward, it should not be
1398 forwarded second time, but still can be delivered locally.
1399 */
1400 if (IPCB(skb)->flags&IPSKB_FORWARDED)
1401 goto dont_forward;
1402
1403 if (!local) {
1404 if (IPCB(skb)->opt.router_alert) {
1405 if (ip_call_ra_chain(skb))
1406 return 0;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001407 } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP){
Linus Torvalds1da177e2005-04-16 15:20:36 -07001408 /* IGMPv1 (and broken IGMPv2 implementations sort of
1409 Cisco IOS <= 11.2(8)) do not put router alert
1410 option to IGMP packets destined to routable
1411 groups. It is very bad, because it means
1412 that we can forward NO IGMP messages.
1413 */
1414 read_lock(&mrt_lock);
1415 if (mroute_socket) {
Patrick McHardy2715bcf2005-06-21 14:06:24 -07001416 nf_reset(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001417 raw_rcv(mroute_socket, skb);
1418 read_unlock(&mrt_lock);
1419 return 0;
1420 }
1421 read_unlock(&mrt_lock);
1422 }
1423 }
1424
1425 read_lock(&mrt_lock);
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001426 cache = ipmr_cache_find(ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001427
1428 /*
1429 * No usable cache entry
1430 */
1431 if (cache==NULL) {
1432 int vif;
1433
1434 if (local) {
1435 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1436 ip_local_deliver(skb);
1437 if (skb2 == NULL) {
1438 read_unlock(&mrt_lock);
1439 return -ENOBUFS;
1440 }
1441 skb = skb2;
1442 }
1443
1444 vif = ipmr_find_vif(skb->dev);
1445 if (vif >= 0) {
1446 int err = ipmr_cache_unresolved(vif, skb);
1447 read_unlock(&mrt_lock);
1448
1449 return err;
1450 }
1451 read_unlock(&mrt_lock);
1452 kfree_skb(skb);
1453 return -ENODEV;
1454 }
1455
1456 ip_mr_forward(skb, cache, local);
1457
1458 read_unlock(&mrt_lock);
1459
1460 if (local)
1461 return ip_local_deliver(skb);
1462
1463 return 0;
1464
1465dont_forward:
1466 if (local)
1467 return ip_local_deliver(skb);
1468 kfree_skb(skb);
1469 return 0;
1470}
1471
1472#ifdef CONFIG_IP_PIMSM_V1
1473/*
1474 * Handle IGMP messages of PIMv1
1475 */
1476
1477int pim_rcv_v1(struct sk_buff * skb)
1478{
1479 struct igmphdr *pim;
1480 struct iphdr *encap;
1481 struct net_device *reg_dev = NULL;
1482
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001483 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001484 goto drop;
1485
Arnaldo Carvalho de Melod9edf9e2007-03-13 14:19:23 -03001486 pim = igmp_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001487
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001488 if (!mroute_do_pim ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07001489 skb->len < sizeof(*pim) + sizeof(*encap) ||
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001490 pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001491 goto drop;
1492
Arnaldo Carvalho de Melo9c702202007-04-25 18:04:18 -07001493 encap = (struct iphdr *)(skb_transport_header(skb) +
1494 sizeof(struct igmphdr));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001495 /*
1496 Check that:
1497 a. packet is really destinted to a multicast group
1498 b. packet is not a NULL-REGISTER
1499 c. packet is not truncated
1500 */
Joe Perchesf97c1e02007-12-16 13:45:43 -08001501 if (!ipv4_is_multicast(encap->daddr) ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07001502 encap->tot_len == 0 ||
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001503 ntohs(encap->tot_len) + sizeof(*pim) > skb->len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001504 goto drop;
1505
1506 read_lock(&mrt_lock);
1507 if (reg_vif_num >= 0)
1508 reg_dev = vif_table[reg_vif_num].dev;
1509 if (reg_dev)
1510 dev_hold(reg_dev);
1511 read_unlock(&mrt_lock);
1512
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001513 if (reg_dev == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001514 goto drop;
1515
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -07001516 skb->mac_header = skb->network_header;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001517 skb_pull(skb, (u8*)encap - skb->data);
Arnaldo Carvalho de Melo31c77112007-03-10 19:04:55 -03001518 skb_reset_network_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001519 skb->dev = reg_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001520 skb->protocol = htons(ETH_P_IP);
1521 skb->ip_summed = 0;
1522 skb->pkt_type = PACKET_HOST;
1523 dst_release(skb->dst);
1524 skb->dst = NULL;
Pavel Emelyanovcf3677a2008-05-21 14:17:33 -07001525 reg_dev->stats.rx_bytes += skb->len;
1526 reg_dev->stats.rx_packets++;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001527 nf_reset(skb);
1528 netif_rx(skb);
1529 dev_put(reg_dev);
1530 return 0;
1531 drop:
1532 kfree_skb(skb);
1533 return 0;
1534}
1535#endif
1536
1537#ifdef CONFIG_IP_PIMSM_V2
1538static int pim_rcv(struct sk_buff * skb)
1539{
1540 struct pimreghdr *pim;
1541 struct iphdr *encap;
1542 struct net_device *reg_dev = NULL;
1543
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001544 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001545 goto drop;
1546
Arnaldo Carvalho de Melo9c702202007-04-25 18:04:18 -07001547 pim = (struct pimreghdr *)skb_transport_header(skb);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001548 if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07001549 (pim->flags&PIM_NULL_REGISTER) ||
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001550 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
Al Virod3bc23e2006-11-14 21:24:49 -08001551 csum_fold(skb_checksum(skb, 0, skb->len, 0))))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001552 goto drop;
1553
1554 /* check if the inner packet is destined to mcast group */
Arnaldo Carvalho de Melo9c702202007-04-25 18:04:18 -07001555 encap = (struct iphdr *)(skb_transport_header(skb) +
1556 sizeof(struct pimreghdr));
Joe Perchesf97c1e02007-12-16 13:45:43 -08001557 if (!ipv4_is_multicast(encap->daddr) ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07001558 encap->tot_len == 0 ||
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001559 ntohs(encap->tot_len) + sizeof(*pim) > skb->len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001560 goto drop;
1561
1562 read_lock(&mrt_lock);
1563 if (reg_vif_num >= 0)
1564 reg_dev = vif_table[reg_vif_num].dev;
1565 if (reg_dev)
1566 dev_hold(reg_dev);
1567 read_unlock(&mrt_lock);
1568
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001569 if (reg_dev == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001570 goto drop;
1571
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -07001572 skb->mac_header = skb->network_header;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001573 skb_pull(skb, (u8*)encap - skb->data);
Arnaldo Carvalho de Melo31c77112007-03-10 19:04:55 -03001574 skb_reset_network_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001575 skb->dev = reg_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001576 skb->protocol = htons(ETH_P_IP);
1577 skb->ip_summed = 0;
1578 skb->pkt_type = PACKET_HOST;
1579 dst_release(skb->dst);
Pavel Emelyanovcf3677a2008-05-21 14:17:33 -07001580 reg_dev->stats.rx_bytes += skb->len;
1581 reg_dev->stats.rx_packets++;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001582 skb->dst = NULL;
1583 nf_reset(skb);
1584 netif_rx(skb);
1585 dev_put(reg_dev);
1586 return 0;
1587 drop:
1588 kfree_skb(skb);
1589 return 0;
1590}
1591#endif
1592
1593static int
1594ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm)
1595{
1596 int ct;
1597 struct rtnexthop *nhp;
1598 struct net_device *dev = vif_table[c->mfc_parent].dev;
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001599 u8 *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001600 struct rtattr *mp_head;
1601
1602 if (dev)
1603 RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex);
1604
1605 mp_head = (struct rtattr*)skb_put(skb, RTA_LENGTH(0));
1606
1607 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1608 if (c->mfc_un.res.ttls[ct] < 255) {
1609 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1610 goto rtattr_failure;
1611 nhp = (struct rtnexthop*)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1612 nhp->rtnh_flags = 0;
1613 nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
1614 nhp->rtnh_ifindex = vif_table[ct].dev->ifindex;
1615 nhp->rtnh_len = sizeof(*nhp);
1616 }
1617 }
1618 mp_head->rta_type = RTA_MULTIPATH;
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001619 mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001620 rtm->rtm_type = RTN_MULTICAST;
1621 return 1;
1622
1623rtattr_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001624 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001625 return -EMSGSIZE;
1626}
1627
1628int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1629{
1630 int err;
1631 struct mfc_cache *cache;
Eric Dumazetee6b9672008-03-05 18:30:47 -08001632 struct rtable *rt = skb->rtable;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001633
1634 read_lock(&mrt_lock);
1635 cache = ipmr_cache_find(rt->rt_src, rt->rt_dst);
1636
1637 if (cache==NULL) {
Alexey Kuznetsov72287492006-07-25 16:45:12 -07001638 struct sk_buff *skb2;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001639 struct iphdr *iph;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001640 struct net_device *dev;
1641 int vif;
1642
1643 if (nowait) {
1644 read_unlock(&mrt_lock);
1645 return -EAGAIN;
1646 }
1647
1648 dev = skb->dev;
1649 if (dev == NULL || (vif = ipmr_find_vif(dev)) < 0) {
1650 read_unlock(&mrt_lock);
1651 return -ENODEV;
1652 }
Alexey Kuznetsov72287492006-07-25 16:45:12 -07001653 skb2 = skb_clone(skb, GFP_ATOMIC);
1654 if (!skb2) {
1655 read_unlock(&mrt_lock);
1656 return -ENOMEM;
1657 }
1658
Arnaldo Carvalho de Meloe2d1bca2007-04-10 20:46:21 -07001659 skb_push(skb2, sizeof(struct iphdr));
1660 skb_reset_network_header(skb2);
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001661 iph = ip_hdr(skb2);
1662 iph->ihl = sizeof(struct iphdr) >> 2;
1663 iph->saddr = rt->rt_src;
1664 iph->daddr = rt->rt_dst;
1665 iph->version = 0;
Alexey Kuznetsov72287492006-07-25 16:45:12 -07001666 err = ipmr_cache_unresolved(vif, skb2);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001667 read_unlock(&mrt_lock);
1668 return err;
1669 }
1670
1671 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1672 cache->mfc_flags |= MFC_NOTIFY;
1673 err = ipmr_fill_mroute(skb, cache, rtm);
1674 read_unlock(&mrt_lock);
1675 return err;
1676}
1677
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001678#ifdef CONFIG_PROC_FS
Linus Torvalds1da177e2005-04-16 15:20:36 -07001679/*
1680 * The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
1681 */
1682struct ipmr_vif_iter {
1683 int ct;
1684};
1685
1686static struct vif_device *ipmr_vif_seq_idx(struct ipmr_vif_iter *iter,
1687 loff_t pos)
1688{
1689 for (iter->ct = 0; iter->ct < maxvif; ++iter->ct) {
Stephen Hemminger132adf52007-03-08 20:44:43 -08001690 if (!VIF_EXISTS(iter->ct))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001691 continue;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001692 if (pos-- == 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001693 return &vif_table[iter->ct];
1694 }
1695 return NULL;
1696}
1697
1698static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
Stephen Hemmingerba93ef72008-01-21 17:28:59 -08001699 __acquires(mrt_lock)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001700{
1701 read_lock(&mrt_lock);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001702 return *pos ? ipmr_vif_seq_idx(seq->private, *pos - 1)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001703 : SEQ_START_TOKEN;
1704}
1705
1706static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1707{
1708 struct ipmr_vif_iter *iter = seq->private;
1709
1710 ++*pos;
1711 if (v == SEQ_START_TOKEN)
1712 return ipmr_vif_seq_idx(iter, 0);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001713
Linus Torvalds1da177e2005-04-16 15:20:36 -07001714 while (++iter->ct < maxvif) {
Stephen Hemminger132adf52007-03-08 20:44:43 -08001715 if (!VIF_EXISTS(iter->ct))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001716 continue;
1717 return &vif_table[iter->ct];
1718 }
1719 return NULL;
1720}
1721
1722static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
Stephen Hemmingerba93ef72008-01-21 17:28:59 -08001723 __releases(mrt_lock)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001724{
1725 read_unlock(&mrt_lock);
1726}
1727
1728static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
1729{
1730 if (v == SEQ_START_TOKEN) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001731 seq_puts(seq,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001732 "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n");
1733 } else {
1734 const struct vif_device *vif = v;
1735 const char *name = vif->dev ? vif->dev->name : "none";
1736
1737 seq_printf(seq,
1738 "%2Zd %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n",
1739 vif - vif_table,
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001740 name, vif->bytes_in, vif->pkt_in,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001741 vif->bytes_out, vif->pkt_out,
1742 vif->flags, vif->local, vif->remote);
1743 }
1744 return 0;
1745}
1746
Stephen Hemmingerf6908082007-03-12 14:34:29 -07001747static const struct seq_operations ipmr_vif_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001748 .start = ipmr_vif_seq_start,
1749 .next = ipmr_vif_seq_next,
1750 .stop = ipmr_vif_seq_stop,
1751 .show = ipmr_vif_seq_show,
1752};
1753
1754static int ipmr_vif_open(struct inode *inode, struct file *file)
1755{
Pavel Emelyanovcf7732e2007-10-10 02:29:29 -07001756 return seq_open_private(file, &ipmr_vif_seq_ops,
1757 sizeof(struct ipmr_vif_iter));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001758}
1759
Arjan van de Ven9a321442007-02-12 00:55:35 -08001760static const struct file_operations ipmr_vif_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001761 .owner = THIS_MODULE,
1762 .open = ipmr_vif_open,
1763 .read = seq_read,
1764 .llseek = seq_lseek,
1765 .release = seq_release_private,
1766};
1767
1768struct ipmr_mfc_iter {
1769 struct mfc_cache **cache;
1770 int ct;
1771};
1772
1773
1774static struct mfc_cache *ipmr_mfc_seq_idx(struct ipmr_mfc_iter *it, loff_t pos)
1775{
1776 struct mfc_cache *mfc;
1777
1778 it->cache = mfc_cache_array;
1779 read_lock(&mrt_lock);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001780 for (it->ct = 0; it->ct < MFC_LINES; it->ct++)
Stephen Hemminger132adf52007-03-08 20:44:43 -08001781 for (mfc = mfc_cache_array[it->ct]; mfc; mfc = mfc->next)
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001782 if (pos-- == 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001783 return mfc;
1784 read_unlock(&mrt_lock);
1785
1786 it->cache = &mfc_unres_queue;
1787 spin_lock_bh(&mfc_unres_lock);
Stephen Hemminger132adf52007-03-08 20:44:43 -08001788 for (mfc = mfc_unres_queue; mfc; mfc = mfc->next)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001789 if (pos-- == 0)
1790 return mfc;
1791 spin_unlock_bh(&mfc_unres_lock);
1792
1793 it->cache = NULL;
1794 return NULL;
1795}
1796
1797
1798static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
1799{
1800 struct ipmr_mfc_iter *it = seq->private;
1801 it->cache = NULL;
1802 it->ct = 0;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001803 return *pos ? ipmr_mfc_seq_idx(seq->private, *pos - 1)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001804 : SEQ_START_TOKEN;
1805}
1806
1807static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1808{
1809 struct mfc_cache *mfc = v;
1810 struct ipmr_mfc_iter *it = seq->private;
1811
1812 ++*pos;
1813
1814 if (v == SEQ_START_TOKEN)
1815 return ipmr_mfc_seq_idx(seq->private, 0);
1816
1817 if (mfc->next)
1818 return mfc->next;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001819
1820 if (it->cache == &mfc_unres_queue)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001821 goto end_of_list;
1822
1823 BUG_ON(it->cache != mfc_cache_array);
1824
1825 while (++it->ct < MFC_LINES) {
1826 mfc = mfc_cache_array[it->ct];
1827 if (mfc)
1828 return mfc;
1829 }
1830
1831 /* exhausted cache_array, show unresolved */
1832 read_unlock(&mrt_lock);
1833 it->cache = &mfc_unres_queue;
1834 it->ct = 0;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001835
Linus Torvalds1da177e2005-04-16 15:20:36 -07001836 spin_lock_bh(&mfc_unres_lock);
1837 mfc = mfc_unres_queue;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001838 if (mfc)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001839 return mfc;
1840
1841 end_of_list:
1842 spin_unlock_bh(&mfc_unres_lock);
1843 it->cache = NULL;
1844
1845 return NULL;
1846}
1847
1848static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
1849{
1850 struct ipmr_mfc_iter *it = seq->private;
1851
1852 if (it->cache == &mfc_unres_queue)
1853 spin_unlock_bh(&mfc_unres_lock);
1854 else if (it->cache == mfc_cache_array)
1855 read_unlock(&mrt_lock);
1856}
1857
1858static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
1859{
1860 int n;
1861
1862 if (v == SEQ_START_TOKEN) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001863 seq_puts(seq,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001864 "Group Origin Iif Pkts Bytes Wrong Oifs\n");
1865 } else {
1866 const struct mfc_cache *mfc = v;
1867 const struct ipmr_mfc_iter *it = seq->private;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001868
Linus Torvalds1da177e2005-04-16 15:20:36 -07001869 seq_printf(seq, "%08lX %08lX %-3d %8ld %8ld %8ld",
1870 (unsigned long) mfc->mfc_mcastgrp,
1871 (unsigned long) mfc->mfc_origin,
1872 mfc->mfc_parent,
1873 mfc->mfc_un.res.pkt,
1874 mfc->mfc_un.res.bytes,
1875 mfc->mfc_un.res.wrong_if);
1876
1877 if (it->cache != &mfc_unres_queue) {
Stephen Hemminger132adf52007-03-08 20:44:43 -08001878 for (n = mfc->mfc_un.res.minvif;
1879 n < mfc->mfc_un.res.maxvif; n++ ) {
1880 if (VIF_EXISTS(n)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001881 && mfc->mfc_un.res.ttls[n] < 255)
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001882 seq_printf(seq,
1883 " %2d:%-3d",
Linus Torvalds1da177e2005-04-16 15:20:36 -07001884 n, mfc->mfc_un.res.ttls[n]);
1885 }
1886 }
1887 seq_putc(seq, '\n');
1888 }
1889 return 0;
1890}
1891
Stephen Hemmingerf6908082007-03-12 14:34:29 -07001892static const struct seq_operations ipmr_mfc_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001893 .start = ipmr_mfc_seq_start,
1894 .next = ipmr_mfc_seq_next,
1895 .stop = ipmr_mfc_seq_stop,
1896 .show = ipmr_mfc_seq_show,
1897};
1898
1899static int ipmr_mfc_open(struct inode *inode, struct file *file)
1900{
Pavel Emelyanovcf7732e2007-10-10 02:29:29 -07001901 return seq_open_private(file, &ipmr_mfc_seq_ops,
1902 sizeof(struct ipmr_mfc_iter));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001903}
1904
Arjan van de Ven9a321442007-02-12 00:55:35 -08001905static const struct file_operations ipmr_mfc_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001906 .owner = THIS_MODULE,
1907 .open = ipmr_mfc_open,
1908 .read = seq_read,
1909 .llseek = seq_lseek,
1910 .release = seq_release_private,
1911};
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001912#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001913
1914#ifdef CONFIG_IP_PIMSM_V2
1915static struct net_protocol pim_protocol = {
1916 .handler = pim_rcv,
1917};
1918#endif
1919
1920
1921/*
1922 * Setup for IP multicast routing
1923 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001924
Wang Chen03d2f892008-07-03 12:13:36 +08001925int __init ip_mr_init(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001926{
Wang Chen03d2f892008-07-03 12:13:36 +08001927 int err;
1928
Linus Torvalds1da177e2005-04-16 15:20:36 -07001929 mrt_cachep = kmem_cache_create("ip_mrt_cache",
1930 sizeof(struct mfc_cache),
Alexey Dobriyane5d679f332006-08-26 19:25:52 -07001931 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
Paul Mundt20c2df82007-07-20 10:11:58 +09001932 NULL);
Wang Chen03d2f892008-07-03 12:13:36 +08001933 if (!mrt_cachep)
1934 return -ENOMEM;
1935
Pavel Emelyanovb24b8a22008-01-23 21:20:07 -08001936 setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0);
Wang Chen03d2f892008-07-03 12:13:36 +08001937 err = register_netdevice_notifier(&ip_mr_notifier);
1938 if (err)
1939 goto reg_notif_fail;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001940#ifdef CONFIG_PROC_FS
Wang Chen03d2f892008-07-03 12:13:36 +08001941 err = -ENOMEM;
1942 if (!proc_net_fops_create(&init_net, "ip_mr_vif", 0, &ipmr_vif_fops))
1943 goto proc_vif_fail;
1944 if (!proc_net_fops_create(&init_net, "ip_mr_cache", 0, &ipmr_mfc_fops))
1945 goto proc_cache_fail;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001946#endif
Wang Chen03d2f892008-07-03 12:13:36 +08001947 return 0;
1948reg_notif_fail:
1949 kmem_cache_destroy(mrt_cachep);
1950#ifdef CONFIG_PROC_FS
1951proc_vif_fail:
1952 unregister_netdevice_notifier(&ip_mr_notifier);
1953proc_cache_fail:
1954 proc_net_remove(&init_net, "ip_mr_vif");
1955#endif
1956 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001957}