blob: feafd14eb7b946050b82ec4d2cebf2a26e712170 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * IP multicast routing support for mrouted 3.6/3.8
3 *
Alan Cox113aa832008-10-13 19:01:08 -07004 * (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk>
Linus Torvalds1da177e2005-04-16 15:20:36 -07005 * Linux Consultancy and Custom Driver Development
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 *
Linus Torvalds1da177e2005-04-16 15:20:36 -070012 * Fixes:
13 * Michael Chastain : Incorrect size of copying.
14 * Alan Cox : Added the cache manager code
15 * Alan Cox : Fixed the clone/copy bug and device race.
16 * Mike McLagan : Routing by source
17 * Malcolm Beattie : Buffer handling fixes.
18 * Alexey Kuznetsov : Double buffer free and other fixes.
19 * SVR Anand : Fixed several multicast bugs and problems.
20 * Alexey Kuznetsov : Status, optimisations and more.
21 * Brad Parker : Better behaviour on mrouted upcall
22 * overflow.
23 * Carlos Picoto : PIMv1 Support
24 * Pavlin Ivanov Radoslavov: PIMv2 Registers must checksum only PIM header
25 * Relax this requrement to work with older peers.
26 *
27 */
28
Linus Torvalds1da177e2005-04-16 15:20:36 -070029#include <asm/system.h>
30#include <asm/uaccess.h>
31#include <linux/types.h>
Randy Dunlap4fc268d2006-01-11 12:17:47 -080032#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070033#include <linux/errno.h>
34#include <linux/timer.h>
35#include <linux/mm.h>
36#include <linux/kernel.h>
37#include <linux/fcntl.h>
38#include <linux/stat.h>
39#include <linux/socket.h>
40#include <linux/in.h>
41#include <linux/inet.h>
42#include <linux/netdevice.h>
43#include <linux/inetdevice.h>
44#include <linux/igmp.h>
45#include <linux/proc_fs.h>
46#include <linux/seq_file.h>
47#include <linux/mroute.h>
48#include <linux/init.h>
Kris Katterjohn46f25df2006-01-05 16:35:42 -080049#include <linux/if_ether.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020050#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070051#include <net/ip.h>
52#include <net/protocol.h>
53#include <linux/skbuff.h>
Arnaldo Carvalho de Melo14c85022005-12-27 02:43:12 -020054#include <net/route.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070055#include <net/sock.h>
56#include <net/icmp.h>
57#include <net/udp.h>
58#include <net/raw.h>
59#include <linux/notifier.h>
60#include <linux/if_arp.h>
61#include <linux/netfilter_ipv4.h>
62#include <net/ipip.h>
63#include <net/checksum.h>
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -070064#include <net/netlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070065
66#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
67#define CONFIG_IP_PIMSM 1
68#endif
69
Linus Torvalds1da177e2005-04-16 15:20:36 -070070/* Big lock, protecting vif table, mrt cache and mroute socket state.
71 Note that the changes are semaphored via rtnl_lock.
72 */
73
74static DEFINE_RWLOCK(mrt_lock);
75
76/*
77 * Multicast router control variables
78 */
79
Benjamin Therycf958ae32009-01-22 04:56:16 +000080#define VIF_EXISTS(_net, _idx) ((_net)->ipv4.vif_table[_idx].dev != NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -070081
82static int mroute_do_assert; /* Set in PIM assert */
83static int mroute_do_pim;
84
Linus Torvalds1da177e2005-04-16 15:20:36 -070085static struct mfc_cache *mfc_unres_queue; /* Queue of unresolved entries */
Linus Torvalds1da177e2005-04-16 15:20:36 -070086
87/* Special spinlock for queue of unresolved entries */
88static DEFINE_SPINLOCK(mfc_unres_lock);
89
90/* We return to original Alan's scheme. Hash table of resolved
91 entries is changed only in process context and protected
92 with weak lock mrt_lock. Queue of unresolved entries is protected
93 with strong spinlock mfc_unres_lock.
94
95 In this case data path is free of exclusive locks at all.
96 */
97
Christoph Lametere18b8902006-12-06 20:33:20 -080098static struct kmem_cache *mrt_cachep __read_mostly;
Linus Torvalds1da177e2005-04-16 15:20:36 -070099
100static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local);
101static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert);
102static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm);
103
104#ifdef CONFIG_IP_PIMSM_V2
105static struct net_protocol pim_protocol;
106#endif
107
108static struct timer_list ipmr_expire_timer;
109
110/* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
111
Wang Chend6070322008-07-14 20:55:26 -0700112static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
113{
114 dev_close(dev);
115
116 dev = __dev_get_by_name(&init_net, "tunl0");
117 if (dev) {
Stephen Hemminger5bc3eb72008-11-19 21:52:05 -0800118 const struct net_device_ops *ops = dev->netdev_ops;
Wang Chend6070322008-07-14 20:55:26 -0700119 struct ifreq ifr;
Wang Chend6070322008-07-14 20:55:26 -0700120 struct ip_tunnel_parm p;
121
122 memset(&p, 0, sizeof(p));
123 p.iph.daddr = v->vifc_rmt_addr.s_addr;
124 p.iph.saddr = v->vifc_lcl_addr.s_addr;
125 p.iph.version = 4;
126 p.iph.ihl = 5;
127 p.iph.protocol = IPPROTO_IPIP;
128 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
129 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
130
Stephen Hemminger5bc3eb72008-11-19 21:52:05 -0800131 if (ops->ndo_do_ioctl) {
132 mm_segment_t oldfs = get_fs();
133
134 set_fs(KERNEL_DS);
135 ops->ndo_do_ioctl(dev, &ifr, SIOCDELTUNNEL);
136 set_fs(oldfs);
137 }
Wang Chend6070322008-07-14 20:55:26 -0700138 }
139}
140
Linus Torvalds1da177e2005-04-16 15:20:36 -0700141static
142struct net_device *ipmr_new_tunnel(struct vifctl *v)
143{
144 struct net_device *dev;
145
Eric W. Biederman881d9662007-09-17 11:56:21 -0700146 dev = __dev_get_by_name(&init_net, "tunl0");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700147
148 if (dev) {
Stephen Hemminger5bc3eb72008-11-19 21:52:05 -0800149 const struct net_device_ops *ops = dev->netdev_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700150 int err;
151 struct ifreq ifr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700152 struct ip_tunnel_parm p;
153 struct in_device *in_dev;
154
155 memset(&p, 0, sizeof(p));
156 p.iph.daddr = v->vifc_rmt_addr.s_addr;
157 p.iph.saddr = v->vifc_lcl_addr.s_addr;
158 p.iph.version = 4;
159 p.iph.ihl = 5;
160 p.iph.protocol = IPPROTO_IPIP;
161 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
Stephen Hemmingerba93ef72008-01-21 17:28:59 -0800162 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700163
Stephen Hemminger5bc3eb72008-11-19 21:52:05 -0800164 if (ops->ndo_do_ioctl) {
165 mm_segment_t oldfs = get_fs();
166
167 set_fs(KERNEL_DS);
168 err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL);
169 set_fs(oldfs);
170 } else
171 err = -EOPNOTSUPP;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700172
173 dev = NULL;
174
Eric W. Biederman881d9662007-09-17 11:56:21 -0700175 if (err == 0 && (dev = __dev_get_by_name(&init_net, p.name)) != NULL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700176 dev->flags |= IFF_MULTICAST;
177
Herbert Xue5ed6392005-10-03 14:35:55 -0700178 in_dev = __in_dev_get_rtnl(dev);
Herbert Xu71e27da2007-06-04 23:36:06 -0700179 if (in_dev == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700180 goto failure;
Herbert Xu71e27da2007-06-04 23:36:06 -0700181
182 ipv4_devconf_setall(in_dev);
183 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700184
185 if (dev_open(dev))
186 goto failure;
Wang Chen7dc00c82008-07-14 20:56:34 -0700187 dev_hold(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700188 }
189 }
190 return dev;
191
192failure:
193 /* allow the register to be completed before unregistering. */
194 rtnl_unlock();
195 rtnl_lock();
196
197 unregister_netdevice(dev);
198 return NULL;
199}
200
201#ifdef CONFIG_IP_PIMSM
202
203static int reg_vif_num = -1;
204
205static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
206{
207 read_lock(&mrt_lock);
Pavel Emelyanovcf3677a2008-05-21 14:17:33 -0700208 dev->stats.tx_bytes += skb->len;
209 dev->stats.tx_packets++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700210 ipmr_cache_report(skb, reg_vif_num, IGMPMSG_WHOLEPKT);
211 read_unlock(&mrt_lock);
212 kfree_skb(skb);
213 return 0;
214}
215
Stephen Hemminger007c3832008-11-20 20:28:35 -0800216static const struct net_device_ops reg_vif_netdev_ops = {
217 .ndo_start_xmit = reg_vif_xmit,
218};
219
Linus Torvalds1da177e2005-04-16 15:20:36 -0700220static void reg_vif_setup(struct net_device *dev)
221{
222 dev->type = ARPHRD_PIMREG;
Kris Katterjohn46f25df2006-01-05 16:35:42 -0800223 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700224 dev->flags = IFF_NOARP;
Stephen Hemminger007c3832008-11-20 20:28:35 -0800225 dev->netdev_ops = &reg_vif_netdev_ops,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700226 dev->destructor = free_netdev;
227}
228
229static struct net_device *ipmr_reg_vif(void)
230{
231 struct net_device *dev;
232 struct in_device *in_dev;
233
Pavel Emelyanovcf3677a2008-05-21 14:17:33 -0700234 dev = alloc_netdev(0, "pimreg", reg_vif_setup);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700235
236 if (dev == NULL)
237 return NULL;
238
239 if (register_netdevice(dev)) {
240 free_netdev(dev);
241 return NULL;
242 }
243 dev->iflink = 0;
244
Herbert Xu71e27da2007-06-04 23:36:06 -0700245 rcu_read_lock();
246 if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
247 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700248 goto failure;
Herbert Xu71e27da2007-06-04 23:36:06 -0700249 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700250
Herbert Xu71e27da2007-06-04 23:36:06 -0700251 ipv4_devconf_setall(in_dev);
252 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
253 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700254
255 if (dev_open(dev))
256 goto failure;
257
Wang Chen7dc00c82008-07-14 20:56:34 -0700258 dev_hold(dev);
259
Linus Torvalds1da177e2005-04-16 15:20:36 -0700260 return dev;
261
262failure:
263 /* allow the register to be completed before unregistering. */
264 rtnl_unlock();
265 rtnl_lock();
266
267 unregister_netdevice(dev);
268 return NULL;
269}
270#endif
271
272/*
273 * Delete a VIF entry
Wang Chen7dc00c82008-07-14 20:56:34 -0700274 * @notify: Set to 1, if the caller is a notifier_call
Linus Torvalds1da177e2005-04-16 15:20:36 -0700275 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900276
Wang Chen7dc00c82008-07-14 20:56:34 -0700277static int vif_delete(int vifi, int notify)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700278{
279 struct vif_device *v;
280 struct net_device *dev;
281 struct in_device *in_dev;
282
Benjamin Therycf958ae32009-01-22 04:56:16 +0000283 if (vifi < 0 || vifi >= init_net.ipv4.maxvif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700284 return -EADDRNOTAVAIL;
285
Benjamin Therycf958ae32009-01-22 04:56:16 +0000286 v = &init_net.ipv4.vif_table[vifi];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700287
288 write_lock_bh(&mrt_lock);
289 dev = v->dev;
290 v->dev = NULL;
291
292 if (!dev) {
293 write_unlock_bh(&mrt_lock);
294 return -EADDRNOTAVAIL;
295 }
296
297#ifdef CONFIG_IP_PIMSM
298 if (vifi == reg_vif_num)
299 reg_vif_num = -1;
300#endif
301
Benjamin Therycf958ae32009-01-22 04:56:16 +0000302 if (vifi+1 == init_net.ipv4.maxvif) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700303 int tmp;
304 for (tmp=vifi-1; tmp>=0; tmp--) {
Benjamin Therycf958ae32009-01-22 04:56:16 +0000305 if (VIF_EXISTS(&init_net, tmp))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700306 break;
307 }
Benjamin Therycf958ae32009-01-22 04:56:16 +0000308 init_net.ipv4.maxvif = tmp+1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700309 }
310
311 write_unlock_bh(&mrt_lock);
312
313 dev_set_allmulti(dev, -1);
314
Herbert Xue5ed6392005-10-03 14:35:55 -0700315 if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
Herbert Xu42f811b2007-06-04 23:34:44 -0700316 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700317 ip_rt_multicast_event(in_dev);
318 }
319
Wang Chen7dc00c82008-07-14 20:56:34 -0700320 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700321 unregister_netdevice(dev);
322
323 dev_put(dev);
324 return 0;
325}
326
Benjamin Thery5c0a66f2009-01-22 04:56:17 +0000327static inline void ipmr_cache_free(struct mfc_cache *c)
328{
329 release_net(mfc_net(c));
330 kmem_cache_free(mrt_cachep, c);
331}
332
Linus Torvalds1da177e2005-04-16 15:20:36 -0700333/* Destroy an unresolved cache entry, killing queued skbs
334 and reporting error to netlink readers.
335 */
336
337static void ipmr_destroy_unres(struct mfc_cache *c)
338{
339 struct sk_buff *skb;
Patrick McHardy9ef1d4c2005-06-28 12:55:30 -0700340 struct nlmsgerr *e;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700341
Benjamin Thery1e8fb3b2009-01-22 04:56:19 +0000342 atomic_dec(&init_net.ipv4.cache_resolve_queue_len);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700343
Jianjun Kongc354e122008-11-03 00:28:02 -0800344 while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) {
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700345 if (ip_hdr(skb)->version == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700346 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
347 nlh->nlmsg_type = NLMSG_ERROR;
348 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
349 skb_trim(skb, nlh->nlmsg_len);
Patrick McHardy9ef1d4c2005-06-28 12:55:30 -0700350 e = NLMSG_DATA(nlh);
351 e->error = -ETIMEDOUT;
352 memset(&e->msg, 0, sizeof(e->msg));
Thomas Graf2942e902006-08-15 00:30:25 -0700353
Denis V. Lunev97c53ca2007-11-19 22:26:51 -0800354 rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700355 } else
356 kfree_skb(skb);
357 }
358
Benjamin Thery5c0a66f2009-01-22 04:56:17 +0000359 ipmr_cache_free(c);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700360}
361
362
363/* Single timer process for all the unresolved queue. */
364
365static void ipmr_expire_process(unsigned long dummy)
366{
367 unsigned long now;
368 unsigned long expires;
369 struct mfc_cache *c, **cp;
370
371 if (!spin_trylock(&mfc_unres_lock)) {
372 mod_timer(&ipmr_expire_timer, jiffies+HZ/10);
373 return;
374 }
375
Benjamin Thery1e8fb3b2009-01-22 04:56:19 +0000376 if (mfc_unres_queue == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700377 goto out;
378
379 now = jiffies;
380 expires = 10*HZ;
381 cp = &mfc_unres_queue;
382
383 while ((c=*cp) != NULL) {
384 if (time_after(c->mfc_un.unres.expires, now)) {
385 unsigned long interval = c->mfc_un.unres.expires - now;
386 if (interval < expires)
387 expires = interval;
388 cp = &c->next;
389 continue;
390 }
391
392 *cp = c->next;
393
394 ipmr_destroy_unres(c);
395 }
396
Benjamin Thery1e8fb3b2009-01-22 04:56:19 +0000397 if (mfc_unres_queue != NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700398 mod_timer(&ipmr_expire_timer, jiffies + expires);
399
400out:
401 spin_unlock(&mfc_unres_lock);
402}
403
404/* Fill oifs list. It is called under write locked mrt_lock. */
405
Baruch Evend1b04c02005-07-30 17:41:59 -0700406static void ipmr_update_thresholds(struct mfc_cache *cache, unsigned char *ttls)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700407{
408 int vifi;
409
410 cache->mfc_un.res.minvif = MAXVIFS;
411 cache->mfc_un.res.maxvif = 0;
412 memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
413
Benjamin Therycf958ae32009-01-22 04:56:16 +0000414 for (vifi = 0; vifi < init_net.ipv4.maxvif; vifi++) {
415 if (VIF_EXISTS(&init_net, vifi) &&
416 ttls[vifi] && ttls[vifi] < 255) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700417 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
418 if (cache->mfc_un.res.minvif > vifi)
419 cache->mfc_un.res.minvif = vifi;
420 if (cache->mfc_un.res.maxvif <= vifi)
421 cache->mfc_un.res.maxvif = vifi + 1;
422 }
423 }
424}
425
426static int vif_add(struct vifctl *vifc, int mrtsock)
427{
428 int vifi = vifc->vifc_vifi;
Benjamin Therycf958ae32009-01-22 04:56:16 +0000429 struct vif_device *v = &init_net.ipv4.vif_table[vifi];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700430 struct net_device *dev;
431 struct in_device *in_dev;
Wang Chend6070322008-07-14 20:55:26 -0700432 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700433
434 /* Is vif busy ? */
Benjamin Therycf958ae32009-01-22 04:56:16 +0000435 if (VIF_EXISTS(&init_net, vifi))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700436 return -EADDRINUSE;
437
438 switch (vifc->vifc_flags) {
439#ifdef CONFIG_IP_PIMSM
440 case VIFF_REGISTER:
441 /*
442 * Special Purpose VIF in PIM
443 * All the packets will be sent to the daemon
444 */
445 if (reg_vif_num >= 0)
446 return -EADDRINUSE;
447 dev = ipmr_reg_vif();
448 if (!dev)
449 return -ENOBUFS;
Wang Chend6070322008-07-14 20:55:26 -0700450 err = dev_set_allmulti(dev, 1);
451 if (err) {
452 unregister_netdevice(dev);
Wang Chen7dc00c82008-07-14 20:56:34 -0700453 dev_put(dev);
Wang Chend6070322008-07-14 20:55:26 -0700454 return err;
455 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700456 break;
457#endif
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900458 case VIFF_TUNNEL:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700459 dev = ipmr_new_tunnel(vifc);
460 if (!dev)
461 return -ENOBUFS;
Wang Chend6070322008-07-14 20:55:26 -0700462 err = dev_set_allmulti(dev, 1);
463 if (err) {
464 ipmr_del_tunnel(dev, vifc);
Wang Chen7dc00c82008-07-14 20:56:34 -0700465 dev_put(dev);
Wang Chend6070322008-07-14 20:55:26 -0700466 return err;
467 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700468 break;
469 case 0:
Denis V. Lunev1ab35272008-01-22 22:04:30 -0800470 dev = ip_dev_find(&init_net, vifc->vifc_lcl_addr.s_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700471 if (!dev)
472 return -EADDRNOTAVAIL;
Wang Chend6070322008-07-14 20:55:26 -0700473 err = dev_set_allmulti(dev, 1);
Wang Chen7dc00c82008-07-14 20:56:34 -0700474 if (err) {
475 dev_put(dev);
Wang Chend6070322008-07-14 20:55:26 -0700476 return err;
Wang Chen7dc00c82008-07-14 20:56:34 -0700477 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700478 break;
479 default:
480 return -EINVAL;
481 }
482
Herbert Xue5ed6392005-10-03 14:35:55 -0700483 if ((in_dev = __in_dev_get_rtnl(dev)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700484 return -EADDRNOTAVAIL;
Herbert Xu42f811b2007-06-04 23:34:44 -0700485 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700486 ip_rt_multicast_event(in_dev);
487
488 /*
489 * Fill in the VIF structures
490 */
Jianjun Kongc354e122008-11-03 00:28:02 -0800491 v->rate_limit = vifc->vifc_rate_limit;
492 v->local = vifc->vifc_lcl_addr.s_addr;
493 v->remote = vifc->vifc_rmt_addr.s_addr;
494 v->flags = vifc->vifc_flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700495 if (!mrtsock)
496 v->flags |= VIFF_STATIC;
Jianjun Kongc354e122008-11-03 00:28:02 -0800497 v->threshold = vifc->vifc_threshold;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700498 v->bytes_in = 0;
499 v->bytes_out = 0;
500 v->pkt_in = 0;
501 v->pkt_out = 0;
502 v->link = dev->ifindex;
503 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
504 v->link = dev->iflink;
505
506 /* And finish update writing critical data */
507 write_lock_bh(&mrt_lock);
Jianjun Kongc354e122008-11-03 00:28:02 -0800508 v->dev = dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700509#ifdef CONFIG_IP_PIMSM
510 if (v->flags&VIFF_REGISTER)
511 reg_vif_num = vifi;
512#endif
Benjamin Therycf958ae32009-01-22 04:56:16 +0000513 if (vifi+1 > init_net.ipv4.maxvif)
514 init_net.ipv4.maxvif = vifi+1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700515 write_unlock_bh(&mrt_lock);
516 return 0;
517}
518
Al Viro114c7842006-09-27 18:39:29 -0700519static struct mfc_cache *ipmr_cache_find(__be32 origin, __be32 mcastgrp)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700520{
Jianjun Kongc354e122008-11-03 00:28:02 -0800521 int line = MFC_HASH(mcastgrp, origin);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700522 struct mfc_cache *c;
523
Benjamin Thery2bb8b262009-01-22 04:56:18 +0000524 for (c = init_net.ipv4.mfc_cache_array[line]; c; c = c->next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700525 if (c->mfc_origin==origin && c->mfc_mcastgrp==mcastgrp)
526 break;
527 }
528 return c;
529}
530
531/*
532 * Allocate a multicast cache entry
533 */
Benjamin Thery5c0a66f2009-01-22 04:56:17 +0000534static struct mfc_cache *ipmr_cache_alloc(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700535{
Jianjun Kongc354e122008-11-03 00:28:02 -0800536 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
537 if (c == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700538 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700539 c->mfc_un.res.minvif = MAXVIFS;
Benjamin Thery5c0a66f2009-01-22 04:56:17 +0000540 mfc_net_set(c, net);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700541 return c;
542}
543
Benjamin Thery5c0a66f2009-01-22 04:56:17 +0000544static struct mfc_cache *ipmr_cache_alloc_unres(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700545{
Jianjun Kongc354e122008-11-03 00:28:02 -0800546 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
547 if (c == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700548 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700549 skb_queue_head_init(&c->mfc_un.unres.unresolved);
550 c->mfc_un.unres.expires = jiffies + 10*HZ;
Benjamin Thery5c0a66f2009-01-22 04:56:17 +0000551 mfc_net_set(c, net);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700552 return c;
553}
554
555/*
556 * A cache entry has gone into a resolved state from queued
557 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900558
Linus Torvalds1da177e2005-04-16 15:20:36 -0700559static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
560{
561 struct sk_buff *skb;
Patrick McHardy9ef1d4c2005-06-28 12:55:30 -0700562 struct nlmsgerr *e;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700563
564 /*
565 * Play the pending entries through our router
566 */
567
Jianjun Kongc354e122008-11-03 00:28:02 -0800568 while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700569 if (ip_hdr(skb)->version == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700570 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
571
572 if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -0700573 nlh->nlmsg_len = (skb_tail_pointer(skb) -
574 (u8 *)nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700575 } else {
576 nlh->nlmsg_type = NLMSG_ERROR;
577 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
578 skb_trim(skb, nlh->nlmsg_len);
Patrick McHardy9ef1d4c2005-06-28 12:55:30 -0700579 e = NLMSG_DATA(nlh);
580 e->error = -EMSGSIZE;
581 memset(&e->msg, 0, sizeof(e->msg));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700582 }
Thomas Graf2942e902006-08-15 00:30:25 -0700583
Denis V. Lunev97c53ca2007-11-19 22:26:51 -0800584 rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700585 } else
586 ip_mr_forward(skb, c, 0);
587 }
588}
589
590/*
591 * Bounce a cache query up to mrouted. We could use netlink for this but mrouted
592 * expects the following bizarre scheme.
593 *
594 * Called under mrt_lock.
595 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900596
Linus Torvalds1da177e2005-04-16 15:20:36 -0700597static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
598{
599 struct sk_buff *skb;
Arnaldo Carvalho de Meloc9bdd4b2007-03-12 20:09:15 -0300600 const int ihl = ip_hdrlen(pkt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700601 struct igmphdr *igmp;
602 struct igmpmsg *msg;
603 int ret;
604
605#ifdef CONFIG_IP_PIMSM
606 if (assert == IGMPMSG_WHOLEPKT)
607 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
608 else
609#endif
610 skb = alloc_skb(128, GFP_ATOMIC);
611
Stephen Hemminger132adf52007-03-08 20:44:43 -0800612 if (!skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700613 return -ENOBUFS;
614
615#ifdef CONFIG_IP_PIMSM
616 if (assert == IGMPMSG_WHOLEPKT) {
617 /* Ugly, but we have no choice with this interface.
618 Duplicate old header, fix ihl, length etc.
619 And all this only to mangle msg->im_msgtype and
620 to set msg->im_mbz to "mbz" :-)
621 */
Arnaldo Carvalho de Melo878c8142007-03-11 22:38:29 -0300622 skb_push(skb, sizeof(struct iphdr));
623 skb_reset_network_header(skb);
Arnaldo Carvalho de Melobadff6d2007-03-13 13:06:52 -0300624 skb_reset_transport_header(skb);
Arnaldo Carvalho de Melo0272ffc2007-03-12 20:05:39 -0300625 msg = (struct igmpmsg *)skb_network_header(skb);
Arnaldo Carvalho de Melod56f90a2007-04-10 20:50:43 -0700626 memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700627 msg->im_msgtype = IGMPMSG_WHOLEPKT;
628 msg->im_mbz = 0;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900629 msg->im_vif = reg_vif_num;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700630 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
631 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
632 sizeof(struct iphdr));
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900633 } else
Linus Torvalds1da177e2005-04-16 15:20:36 -0700634#endif
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900635 {
636
Linus Torvalds1da177e2005-04-16 15:20:36 -0700637 /*
638 * Copy the IP header
639 */
640
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -0700641 skb->network_header = skb->tail;
Arnaldo Carvalho de Meloddc7b8e2007-03-15 21:42:27 -0300642 skb_put(skb, ihl);
Arnaldo Carvalho de Melo27d7ff42007-03-31 11:55:19 -0300643 skb_copy_to_linear_data(skb, pkt->data, ihl);
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700644 ip_hdr(skb)->protocol = 0; /* Flag to the kernel this is a route add */
645 msg = (struct igmpmsg *)skb_network_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700646 msg->im_vif = vifi;
647 skb->dst = dst_clone(pkt->dst);
648
649 /*
650 * Add our header
651 */
652
Jianjun Kongc354e122008-11-03 00:28:02 -0800653 igmp=(struct igmphdr *)skb_put(skb, sizeof(struct igmphdr));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700654 igmp->type =
655 msg->im_msgtype = assert;
656 igmp->code = 0;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700657 ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -0700658 skb->transport_header = skb->network_header;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900659 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700660
Benjamin Thery70a269e2009-01-22 04:56:15 +0000661 if (init_net.ipv4.mroute_sk == NULL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700662 kfree_skb(skb);
663 return -EINVAL;
664 }
665
666 /*
667 * Deliver to mrouted
668 */
Benjamin Thery70a269e2009-01-22 04:56:15 +0000669 ret = sock_queue_rcv_skb(init_net.ipv4.mroute_sk, skb);
670 if (ret < 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700671 if (net_ratelimit())
672 printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
673 kfree_skb(skb);
674 }
675
676 return ret;
677}
678
679/*
680 * Queue a packet for resolution. It gets locked cache entry!
681 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900682
Linus Torvalds1da177e2005-04-16 15:20:36 -0700683static int
684ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb)
685{
686 int err;
687 struct mfc_cache *c;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700688 const struct iphdr *iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700689
690 spin_lock_bh(&mfc_unres_lock);
691 for (c=mfc_unres_queue; c; c=c->next) {
Benjamin Thery1e8fb3b2009-01-22 04:56:19 +0000692 if (net_eq(mfc_net(c), &init_net) &&
693 c->mfc_mcastgrp == iph->daddr &&
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700694 c->mfc_origin == iph->saddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700695 break;
696 }
697
698 if (c == NULL) {
699 /*
700 * Create a new entry if allowable
701 */
702
Benjamin Thery1e8fb3b2009-01-22 04:56:19 +0000703 if (atomic_read(&init_net.ipv4.cache_resolve_queue_len) >= 10 ||
Benjamin Thery5c0a66f2009-01-22 04:56:17 +0000704 (c = ipmr_cache_alloc_unres(&init_net)) == NULL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700705 spin_unlock_bh(&mfc_unres_lock);
706
707 kfree_skb(skb);
708 return -ENOBUFS;
709 }
710
711 /*
712 * Fill in the new cache entry
713 */
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700714 c->mfc_parent = -1;
715 c->mfc_origin = iph->saddr;
716 c->mfc_mcastgrp = iph->daddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700717
718 /*
719 * Reflect first query at mrouted.
720 */
721 if ((err = ipmr_cache_report(skb, vifi, IGMPMSG_NOCACHE))<0) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900722 /* If the report failed throw the cache entry
Linus Torvalds1da177e2005-04-16 15:20:36 -0700723 out - Brad Parker
724 */
725 spin_unlock_bh(&mfc_unres_lock);
726
Benjamin Thery5c0a66f2009-01-22 04:56:17 +0000727 ipmr_cache_free(c);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700728 kfree_skb(skb);
729 return err;
730 }
731
Benjamin Thery1e8fb3b2009-01-22 04:56:19 +0000732 atomic_inc(&init_net.ipv4.cache_resolve_queue_len);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700733 c->next = mfc_unres_queue;
734 mfc_unres_queue = c;
735
736 mod_timer(&ipmr_expire_timer, c->mfc_un.unres.expires);
737 }
738
739 /*
740 * See if we can append the packet
741 */
742 if (c->mfc_un.unres.unresolved.qlen>3) {
743 kfree_skb(skb);
744 err = -ENOBUFS;
745 } else {
Jianjun Kongc354e122008-11-03 00:28:02 -0800746 skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700747 err = 0;
748 }
749
750 spin_unlock_bh(&mfc_unres_lock);
751 return err;
752}
753
754/*
755 * MFC cache manipulation by user space mroute daemon
756 */
757
758static int ipmr_mfc_delete(struct mfcctl *mfc)
759{
760 int line;
761 struct mfc_cache *c, **cp;
762
Jianjun Kongc354e122008-11-03 00:28:02 -0800763 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700764
Benjamin Thery2bb8b262009-01-22 04:56:18 +0000765 for (cp = &init_net.ipv4.mfc_cache_array[line];
766 (c = *cp) != NULL; cp = &c->next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700767 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
768 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
769 write_lock_bh(&mrt_lock);
770 *cp = c->next;
771 write_unlock_bh(&mrt_lock);
772
Benjamin Thery5c0a66f2009-01-22 04:56:17 +0000773 ipmr_cache_free(c);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700774 return 0;
775 }
776 }
777 return -ENOENT;
778}
779
780static int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock)
781{
782 int line;
783 struct mfc_cache *uc, *c, **cp;
784
Jianjun Kongc354e122008-11-03 00:28:02 -0800785 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700786
Benjamin Thery2bb8b262009-01-22 04:56:18 +0000787 for (cp = &init_net.ipv4.mfc_cache_array[line];
788 (c = *cp) != NULL; cp = &c->next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700789 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
790 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr)
791 break;
792 }
793
794 if (c != NULL) {
795 write_lock_bh(&mrt_lock);
796 c->mfc_parent = mfc->mfcc_parent;
Baruch Evend1b04c02005-07-30 17:41:59 -0700797 ipmr_update_thresholds(c, mfc->mfcc_ttls);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700798 if (!mrtsock)
799 c->mfc_flags |= MFC_STATIC;
800 write_unlock_bh(&mrt_lock);
801 return 0;
802 }
803
Joe Perchesf97c1e02007-12-16 13:45:43 -0800804 if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700805 return -EINVAL;
806
Benjamin Thery5c0a66f2009-01-22 04:56:17 +0000807 c = ipmr_cache_alloc(&init_net);
Jianjun Kongc354e122008-11-03 00:28:02 -0800808 if (c == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700809 return -ENOMEM;
810
Jianjun Kongc354e122008-11-03 00:28:02 -0800811 c->mfc_origin = mfc->mfcc_origin.s_addr;
812 c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
813 c->mfc_parent = mfc->mfcc_parent;
Baruch Evend1b04c02005-07-30 17:41:59 -0700814 ipmr_update_thresholds(c, mfc->mfcc_ttls);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700815 if (!mrtsock)
816 c->mfc_flags |= MFC_STATIC;
817
818 write_lock_bh(&mrt_lock);
Benjamin Thery2bb8b262009-01-22 04:56:18 +0000819 c->next = init_net.ipv4.mfc_cache_array[line];
820 init_net.ipv4.mfc_cache_array[line] = c;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700821 write_unlock_bh(&mrt_lock);
822
823 /*
824 * Check to see if we resolved a queued list. If so we
825 * need to send on the frames and tidy up.
826 */
827 spin_lock_bh(&mfc_unres_lock);
828 for (cp = &mfc_unres_queue; (uc=*cp) != NULL;
829 cp = &uc->next) {
Benjamin Thery1e8fb3b2009-01-22 04:56:19 +0000830 if (net_eq(mfc_net(uc), &init_net) &&
831 uc->mfc_origin == c->mfc_origin &&
Linus Torvalds1da177e2005-04-16 15:20:36 -0700832 uc->mfc_mcastgrp == c->mfc_mcastgrp) {
833 *cp = uc->next;
Benjamin Thery1e8fb3b2009-01-22 04:56:19 +0000834 atomic_dec(&init_net.ipv4.cache_resolve_queue_len);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700835 break;
836 }
837 }
Benjamin Thery1e8fb3b2009-01-22 04:56:19 +0000838 if (mfc_unres_queue == NULL)
839 del_timer(&ipmr_expire_timer);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700840 spin_unlock_bh(&mfc_unres_lock);
841
842 if (uc) {
843 ipmr_cache_resolve(uc, c);
Benjamin Thery5c0a66f2009-01-22 04:56:17 +0000844 ipmr_cache_free(uc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700845 }
846 return 0;
847}
848
849/*
850 * Close the multicast socket, and clear the vif tables etc
851 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900852
Linus Torvalds1da177e2005-04-16 15:20:36 -0700853static void mroute_clean_tables(struct sock *sk)
854{
855 int i;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900856
Linus Torvalds1da177e2005-04-16 15:20:36 -0700857 /*
858 * Shut down all active vif entries
859 */
Benjamin Therycf958ae32009-01-22 04:56:16 +0000860 for (i = 0; i < init_net.ipv4.maxvif; i++) {
861 if (!(init_net.ipv4.vif_table[i].flags&VIFF_STATIC))
Wang Chen7dc00c82008-07-14 20:56:34 -0700862 vif_delete(i, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700863 }
864
865 /*
866 * Wipe the cache
867 */
Jianjun Kongc354e122008-11-03 00:28:02 -0800868 for (i=0; i<MFC_LINES; i++) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700869 struct mfc_cache *c, **cp;
870
Benjamin Thery2bb8b262009-01-22 04:56:18 +0000871 cp = &init_net.ipv4.mfc_cache_array[i];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700872 while ((c = *cp) != NULL) {
873 if (c->mfc_flags&MFC_STATIC) {
874 cp = &c->next;
875 continue;
876 }
877 write_lock_bh(&mrt_lock);
878 *cp = c->next;
879 write_unlock_bh(&mrt_lock);
880
Benjamin Thery5c0a66f2009-01-22 04:56:17 +0000881 ipmr_cache_free(c);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700882 }
883 }
884
Benjamin Thery1e8fb3b2009-01-22 04:56:19 +0000885 if (atomic_read(&init_net.ipv4.cache_resolve_queue_len) != 0) {
886 struct mfc_cache *c, **cp;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700887
888 spin_lock_bh(&mfc_unres_lock);
Benjamin Thery1e8fb3b2009-01-22 04:56:19 +0000889 cp = &mfc_unres_queue;
890 while ((c = *cp) != NULL) {
891 if (!net_eq(mfc_net(c), &init_net)) {
892 cp = &c->next;
893 continue;
894 }
895 *cp = c->next;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700896
897 ipmr_destroy_unres(c);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700898 }
899 spin_unlock_bh(&mfc_unres_lock);
900 }
901}
902
903static void mrtsock_destruct(struct sock *sk)
904{
905 rtnl_lock();
Benjamin Thery70a269e2009-01-22 04:56:15 +0000906 if (sk == init_net.ipv4.mroute_sk) {
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +0900907 IPV4_DEVCONF_ALL(sock_net(sk), MC_FORWARDING)--;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700908
909 write_lock_bh(&mrt_lock);
Benjamin Thery70a269e2009-01-22 04:56:15 +0000910 init_net.ipv4.mroute_sk = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700911 write_unlock_bh(&mrt_lock);
912
913 mroute_clean_tables(sk);
914 }
915 rtnl_unlock();
916}
917
918/*
919 * Socket options and virtual interface manipulation. The whole
920 * virtual interface system is a complete heap, but unfortunately
921 * that's how BSD mrouted happens to think. Maybe one day with a proper
922 * MOSPF/PIM router set up we can clean this up.
923 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900924
Jianjun Kongc354e122008-11-03 00:28:02 -0800925int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int optlen)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700926{
927 int ret;
928 struct vifctl vif;
929 struct mfcctl mfc;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900930
Stephen Hemminger132adf52007-03-08 20:44:43 -0800931 if (optname != MRT_INIT) {
Benjamin Thery70a269e2009-01-22 04:56:15 +0000932 if (sk != init_net.ipv4.mroute_sk && !capable(CAP_NET_ADMIN))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700933 return -EACCES;
934 }
935
Stephen Hemminger132adf52007-03-08 20:44:43 -0800936 switch (optname) {
937 case MRT_INIT:
938 if (sk->sk_type != SOCK_RAW ||
939 inet_sk(sk)->num != IPPROTO_IGMP)
940 return -EOPNOTSUPP;
Jianjun Kongc354e122008-11-03 00:28:02 -0800941 if (optlen != sizeof(int))
Stephen Hemminger132adf52007-03-08 20:44:43 -0800942 return -ENOPROTOOPT;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700943
Stephen Hemminger132adf52007-03-08 20:44:43 -0800944 rtnl_lock();
Benjamin Thery70a269e2009-01-22 04:56:15 +0000945 if (init_net.ipv4.mroute_sk) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700946 rtnl_unlock();
Stephen Hemminger132adf52007-03-08 20:44:43 -0800947 return -EADDRINUSE;
948 }
949
950 ret = ip_ra_control(sk, 1, mrtsock_destruct);
951 if (ret == 0) {
952 write_lock_bh(&mrt_lock);
Benjamin Thery70a269e2009-01-22 04:56:15 +0000953 init_net.ipv4.mroute_sk = sk;
Stephen Hemminger132adf52007-03-08 20:44:43 -0800954 write_unlock_bh(&mrt_lock);
955
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +0900956 IPV4_DEVCONF_ALL(sock_net(sk), MC_FORWARDING)++;
Stephen Hemminger132adf52007-03-08 20:44:43 -0800957 }
958 rtnl_unlock();
959 return ret;
960 case MRT_DONE:
Benjamin Thery70a269e2009-01-22 04:56:15 +0000961 if (sk != init_net.ipv4.mroute_sk)
Stephen Hemminger132adf52007-03-08 20:44:43 -0800962 return -EACCES;
963 return ip_ra_control(sk, 0, NULL);
964 case MRT_ADD_VIF:
965 case MRT_DEL_VIF:
Jianjun Kongc354e122008-11-03 00:28:02 -0800966 if (optlen != sizeof(vif))
Stephen Hemminger132adf52007-03-08 20:44:43 -0800967 return -EINVAL;
Jianjun Kongc354e122008-11-03 00:28:02 -0800968 if (copy_from_user(&vif, optval, sizeof(vif)))
Stephen Hemminger132adf52007-03-08 20:44:43 -0800969 return -EFAULT;
970 if (vif.vifc_vifi >= MAXVIFS)
971 return -ENFILE;
972 rtnl_lock();
Jianjun Kongc354e122008-11-03 00:28:02 -0800973 if (optname == MRT_ADD_VIF) {
Benjamin Thery70a269e2009-01-22 04:56:15 +0000974 ret = vif_add(&vif, sk == init_net.ipv4.mroute_sk);
Stephen Hemminger132adf52007-03-08 20:44:43 -0800975 } else {
Wang Chen7dc00c82008-07-14 20:56:34 -0700976 ret = vif_delete(vif.vifc_vifi, 0);
Stephen Hemminger132adf52007-03-08 20:44:43 -0800977 }
978 rtnl_unlock();
979 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700980
981 /*
982 * Manipulate the forwarding caches. These live
983 * in a sort of kernel/user symbiosis.
984 */
Stephen Hemminger132adf52007-03-08 20:44:43 -0800985 case MRT_ADD_MFC:
986 case MRT_DEL_MFC:
Jianjun Kongc354e122008-11-03 00:28:02 -0800987 if (optlen != sizeof(mfc))
Stephen Hemminger132adf52007-03-08 20:44:43 -0800988 return -EINVAL;
Jianjun Kongc354e122008-11-03 00:28:02 -0800989 if (copy_from_user(&mfc, optval, sizeof(mfc)))
Stephen Hemminger132adf52007-03-08 20:44:43 -0800990 return -EFAULT;
991 rtnl_lock();
Jianjun Kongc354e122008-11-03 00:28:02 -0800992 if (optname == MRT_DEL_MFC)
Stephen Hemminger132adf52007-03-08 20:44:43 -0800993 ret = ipmr_mfc_delete(&mfc);
994 else
Benjamin Thery70a269e2009-01-22 04:56:15 +0000995 ret = ipmr_mfc_add(&mfc, sk == init_net.ipv4.mroute_sk);
Stephen Hemminger132adf52007-03-08 20:44:43 -0800996 rtnl_unlock();
997 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700998 /*
999 * Control PIM assert.
1000 */
Stephen Hemminger132adf52007-03-08 20:44:43 -08001001 case MRT_ASSERT:
1002 {
1003 int v;
1004 if (get_user(v,(int __user *)optval))
1005 return -EFAULT;
1006 mroute_do_assert=(v)?1:0;
1007 return 0;
1008 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001009#ifdef CONFIG_IP_PIMSM
Stephen Hemminger132adf52007-03-08 20:44:43 -08001010 case MRT_PIM:
1011 {
Stephen Hemmingerba93ef72008-01-21 17:28:59 -08001012 int v;
1013
Stephen Hemminger132adf52007-03-08 20:44:43 -08001014 if (get_user(v,(int __user *)optval))
1015 return -EFAULT;
Stephen Hemmingerba93ef72008-01-21 17:28:59 -08001016 v = (v) ? 1 : 0;
1017
Stephen Hemminger132adf52007-03-08 20:44:43 -08001018 rtnl_lock();
1019 ret = 0;
1020 if (v != mroute_do_pim) {
1021 mroute_do_pim = v;
1022 mroute_do_assert = v;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001023#ifdef CONFIG_IP_PIMSM_V2
Stephen Hemminger132adf52007-03-08 20:44:43 -08001024 if (mroute_do_pim)
1025 ret = inet_add_protocol(&pim_protocol,
1026 IPPROTO_PIM);
1027 else
1028 ret = inet_del_protocol(&pim_protocol,
1029 IPPROTO_PIM);
1030 if (ret < 0)
1031 ret = -EAGAIN;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001032#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001033 }
Stephen Hemminger132adf52007-03-08 20:44:43 -08001034 rtnl_unlock();
1035 return ret;
1036 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001037#endif
Stephen Hemminger132adf52007-03-08 20:44:43 -08001038 /*
1039 * Spurious command, or MRT_VERSION which you cannot
1040 * set.
1041 */
1042 default:
1043 return -ENOPROTOOPT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001044 }
1045}
1046
1047/*
1048 * Getsock opt support for the multicast routing system.
1049 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001050
Jianjun Kongc354e122008-11-03 00:28:02 -08001051int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001052{
1053 int olr;
1054 int val;
1055
Jianjun Kongc354e122008-11-03 00:28:02 -08001056 if (optname != MRT_VERSION &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001057#ifdef CONFIG_IP_PIMSM
1058 optname!=MRT_PIM &&
1059#endif
1060 optname!=MRT_ASSERT)
1061 return -ENOPROTOOPT;
1062
1063 if (get_user(olr, optlen))
1064 return -EFAULT;
1065
1066 olr = min_t(unsigned int, olr, sizeof(int));
1067 if (olr < 0)
1068 return -EINVAL;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001069
Jianjun Kongc354e122008-11-03 00:28:02 -08001070 if (put_user(olr, optlen))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001071 return -EFAULT;
Jianjun Kongc354e122008-11-03 00:28:02 -08001072 if (optname == MRT_VERSION)
1073 val = 0x0305;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001074#ifdef CONFIG_IP_PIMSM
Jianjun Kongc354e122008-11-03 00:28:02 -08001075 else if (optname == MRT_PIM)
1076 val = mroute_do_pim;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001077#endif
1078 else
Jianjun Kongc354e122008-11-03 00:28:02 -08001079 val = mroute_do_assert;
1080 if (copy_to_user(optval, &val, olr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001081 return -EFAULT;
1082 return 0;
1083}
1084
1085/*
1086 * The IP multicast ioctl support routines.
1087 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001088
Linus Torvalds1da177e2005-04-16 15:20:36 -07001089int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1090{
1091 struct sioc_sg_req sr;
1092 struct sioc_vif_req vr;
1093 struct vif_device *vif;
1094 struct mfc_cache *c;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001095
Stephen Hemminger132adf52007-03-08 20:44:43 -08001096 switch (cmd) {
1097 case SIOCGETVIFCNT:
Jianjun Kongc354e122008-11-03 00:28:02 -08001098 if (copy_from_user(&vr, arg, sizeof(vr)))
Stephen Hemminger132adf52007-03-08 20:44:43 -08001099 return -EFAULT;
Benjamin Therycf958ae32009-01-22 04:56:16 +00001100 if (vr.vifi >= init_net.ipv4.maxvif)
Stephen Hemminger132adf52007-03-08 20:44:43 -08001101 return -EINVAL;
1102 read_lock(&mrt_lock);
Benjamin Therycf958ae32009-01-22 04:56:16 +00001103 vif = &init_net.ipv4.vif_table[vr.vifi];
1104 if (VIF_EXISTS(&init_net, vr.vifi)) {
Jianjun Kongc354e122008-11-03 00:28:02 -08001105 vr.icount = vif->pkt_in;
1106 vr.ocount = vif->pkt_out;
1107 vr.ibytes = vif->bytes_in;
1108 vr.obytes = vif->bytes_out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001109 read_unlock(&mrt_lock);
Stephen Hemminger132adf52007-03-08 20:44:43 -08001110
Jianjun Kongc354e122008-11-03 00:28:02 -08001111 if (copy_to_user(arg, &vr, sizeof(vr)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001112 return -EFAULT;
Stephen Hemminger132adf52007-03-08 20:44:43 -08001113 return 0;
1114 }
1115 read_unlock(&mrt_lock);
1116 return -EADDRNOTAVAIL;
1117 case SIOCGETSGCNT:
Jianjun Kongc354e122008-11-03 00:28:02 -08001118 if (copy_from_user(&sr, arg, sizeof(sr)))
Stephen Hemminger132adf52007-03-08 20:44:43 -08001119 return -EFAULT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001120
Stephen Hemminger132adf52007-03-08 20:44:43 -08001121 read_lock(&mrt_lock);
1122 c = ipmr_cache_find(sr.src.s_addr, sr.grp.s_addr);
1123 if (c) {
1124 sr.pktcnt = c->mfc_un.res.pkt;
1125 sr.bytecnt = c->mfc_un.res.bytes;
1126 sr.wrong_if = c->mfc_un.res.wrong_if;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001127 read_unlock(&mrt_lock);
Stephen Hemminger132adf52007-03-08 20:44:43 -08001128
Jianjun Kongc354e122008-11-03 00:28:02 -08001129 if (copy_to_user(arg, &sr, sizeof(sr)))
Stephen Hemminger132adf52007-03-08 20:44:43 -08001130 return -EFAULT;
1131 return 0;
1132 }
1133 read_unlock(&mrt_lock);
1134 return -EADDRNOTAVAIL;
1135 default:
1136 return -ENOIOCTLCMD;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001137 }
1138}
1139
1140
1141static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1142{
Eric W. Biedermane9dc8652007-09-12 13:02:17 +02001143 struct net_device *dev = ptr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001144 struct vif_device *v;
1145 int ct;
Eric W. Biedermane9dc8652007-09-12 13:02:17 +02001146
YOSHIFUJI Hideaki721499e2008-07-19 22:34:43 -07001147 if (!net_eq(dev_net(dev), &init_net))
Eric W. Biedermane9dc8652007-09-12 13:02:17 +02001148 return NOTIFY_DONE;
1149
Linus Torvalds1da177e2005-04-16 15:20:36 -07001150 if (event != NETDEV_UNREGISTER)
1151 return NOTIFY_DONE;
Benjamin Therycf958ae32009-01-22 04:56:16 +00001152 v = &init_net.ipv4.vif_table[0];
1153 for (ct = 0; ct < init_net.ipv4.maxvif; ct++, v++) {
Jianjun Kongc354e122008-11-03 00:28:02 -08001154 if (v->dev == dev)
Wang Chen7dc00c82008-07-14 20:56:34 -07001155 vif_delete(ct, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001156 }
1157 return NOTIFY_DONE;
1158}
1159
1160
Jianjun Kongc354e122008-11-03 00:28:02 -08001161static struct notifier_block ip_mr_notifier = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001162 .notifier_call = ipmr_device_event,
1163};
1164
1165/*
1166 * Encapsulate a packet by attaching a valid IPIP header to it.
1167 * This avoids tunnel drivers and other mess and gives us the speed so
1168 * important for multicast video.
1169 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001170
Al Viro114c7842006-09-27 18:39:29 -07001171static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001172{
Arnaldo Carvalho de Melo8856dfa2007-03-10 19:40:39 -03001173 struct iphdr *iph;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001174 struct iphdr *old_iph = ip_hdr(skb);
Arnaldo Carvalho de Melo8856dfa2007-03-10 19:40:39 -03001175
1176 skb_push(skb, sizeof(struct iphdr));
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -07001177 skb->transport_header = skb->network_header;
Arnaldo Carvalho de Melo8856dfa2007-03-10 19:40:39 -03001178 skb_reset_network_header(skb);
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001179 iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001180
1181 iph->version = 4;
Arnaldo Carvalho de Meloe023dd62007-03-12 20:09:36 -03001182 iph->tos = old_iph->tos;
1183 iph->ttl = old_iph->ttl;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001184 iph->frag_off = 0;
1185 iph->daddr = daddr;
1186 iph->saddr = saddr;
1187 iph->protocol = IPPROTO_IPIP;
1188 iph->ihl = 5;
1189 iph->tot_len = htons(skb->len);
1190 ip_select_ident(iph, skb->dst, NULL);
1191 ip_send_check(iph);
1192
Linus Torvalds1da177e2005-04-16 15:20:36 -07001193 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1194 nf_reset(skb);
1195}
1196
1197static inline int ipmr_forward_finish(struct sk_buff *skb)
1198{
1199 struct ip_options * opt = &(IPCB(skb)->opt);
1200
Pavel Emelyanov7c73a6f2008-07-16 20:20:11 -07001201 IP_INC_STATS_BH(dev_net(skb->dst->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001202
1203 if (unlikely(opt->optlen))
1204 ip_forward_options(skb);
1205
1206 return dst_output(skb);
1207}
1208
1209/*
1210 * Processing handlers for ipmr_forward
1211 */
1212
1213static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
1214{
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001215 const struct iphdr *iph = ip_hdr(skb);
Benjamin Therycf958ae32009-01-22 04:56:16 +00001216 struct vif_device *vif = &init_net.ipv4.vif_table[vifi];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001217 struct net_device *dev;
1218 struct rtable *rt;
1219 int encap = 0;
1220
1221 if (vif->dev == NULL)
1222 goto out_free;
1223
1224#ifdef CONFIG_IP_PIMSM
1225 if (vif->flags & VIFF_REGISTER) {
1226 vif->pkt_out++;
Jianjun Kongc354e122008-11-03 00:28:02 -08001227 vif->bytes_out += skb->len;
Pavel Emelyanovcf3677a2008-05-21 14:17:33 -07001228 vif->dev->stats.tx_bytes += skb->len;
1229 vif->dev->stats.tx_packets++;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001230 ipmr_cache_report(skb, vifi, IGMPMSG_WHOLEPKT);
1231 kfree_skb(skb);
1232 return;
1233 }
1234#endif
1235
1236 if (vif->flags&VIFF_TUNNEL) {
1237 struct flowi fl = { .oif = vif->link,
1238 .nl_u = { .ip4_u =
1239 { .daddr = vif->remote,
1240 .saddr = vif->local,
1241 .tos = RT_TOS(iph->tos) } },
1242 .proto = IPPROTO_IPIP };
Denis V. Lunevf2063512008-01-22 22:07:34 -08001243 if (ip_route_output_key(&init_net, &rt, &fl))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001244 goto out_free;
1245 encap = sizeof(struct iphdr);
1246 } else {
1247 struct flowi fl = { .oif = vif->link,
1248 .nl_u = { .ip4_u =
1249 { .daddr = iph->daddr,
1250 .tos = RT_TOS(iph->tos) } },
1251 .proto = IPPROTO_IPIP };
Denis V. Lunevf2063512008-01-22 22:07:34 -08001252 if (ip_route_output_key(&init_net, &rt, &fl))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001253 goto out_free;
1254 }
1255
1256 dev = rt->u.dst.dev;
1257
1258 if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) {
1259 /* Do not fragment multicasts. Alas, IPv4 does not
1260 allow to send ICMP, so that packets will disappear
1261 to blackhole.
1262 */
1263
Pavel Emelyanov7c73a6f2008-07-16 20:20:11 -07001264 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001265 ip_rt_put(rt);
1266 goto out_free;
1267 }
1268
1269 encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len;
1270
1271 if (skb_cow(skb, encap)) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001272 ip_rt_put(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001273 goto out_free;
1274 }
1275
1276 vif->pkt_out++;
Jianjun Kongc354e122008-11-03 00:28:02 -08001277 vif->bytes_out += skb->len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001278
1279 dst_release(skb->dst);
1280 skb->dst = &rt->u.dst;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001281 ip_decrease_ttl(ip_hdr(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001282
1283 /* FIXME: forward and output firewalls used to be called here.
1284 * What do we do with netfilter? -- RR */
1285 if (vif->flags & VIFF_TUNNEL) {
1286 ip_encap(skb, vif->local, vif->remote);
1287 /* FIXME: extra output firewall step used to be here. --RR */
Pavel Emelyanov2f4c02d2008-05-21 14:16:14 -07001288 vif->dev->stats.tx_packets++;
1289 vif->dev->stats.tx_bytes += skb->len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001290 }
1291
1292 IPCB(skb)->flags |= IPSKB_FORWARDED;
1293
1294 /*
1295 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1296 * not only before forwarding, but after forwarding on all output
1297 * interfaces. It is clear, if mrouter runs a multicasting
1298 * program, it should receive packets not depending to what interface
1299 * program is joined.
1300 * If we will not make it, the program will have to join on all
1301 * interfaces. On the other hand, multihoming host (or router, but
1302 * not mrouter) cannot join to more than one interface - it will
1303 * result in receiving multiple packets.
1304 */
Patrick McHardy6e23ae22007-11-19 18:53:30 -08001305 NF_HOOK(PF_INET, NF_INET_FORWARD, skb, skb->dev, dev,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001306 ipmr_forward_finish);
1307 return;
1308
1309out_free:
1310 kfree_skb(skb);
1311 return;
1312}
1313
1314static int ipmr_find_vif(struct net_device *dev)
1315{
1316 int ct;
Benjamin Therycf958ae32009-01-22 04:56:16 +00001317 for (ct = init_net.ipv4.maxvif-1; ct >= 0; ct--) {
1318 if (init_net.ipv4.vif_table[ct].dev == dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001319 break;
1320 }
1321 return ct;
1322}
1323
1324/* "local" means that we should preserve one skb (for local delivery) */
1325
1326static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local)
1327{
1328 int psend = -1;
1329 int vif, ct;
1330
1331 vif = cache->mfc_parent;
1332 cache->mfc_un.res.pkt++;
1333 cache->mfc_un.res.bytes += skb->len;
1334
1335 /*
1336 * Wrong interface: drop packet and (maybe) send PIM assert.
1337 */
Benjamin Therycf958ae32009-01-22 04:56:16 +00001338 if (init_net.ipv4.vif_table[vif].dev != skb->dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001339 int true_vifi;
1340
Eric Dumazetee6b9672008-03-05 18:30:47 -08001341 if (skb->rtable->fl.iif == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001342 /* It is our own packet, looped back.
1343 Very complicated situation...
1344
1345 The best workaround until routing daemons will be
1346 fixed is not to redistribute packet, if it was
1347 send through wrong interface. It means, that
1348 multicast applications WILL NOT work for
1349 (S,G), which have default multicast route pointing
1350 to wrong oif. In any case, it is not a good
1351 idea to use multicasting applications on router.
1352 */
1353 goto dont_forward;
1354 }
1355
1356 cache->mfc_un.res.wrong_if++;
1357 true_vifi = ipmr_find_vif(skb->dev);
1358
1359 if (true_vifi >= 0 && mroute_do_assert &&
1360 /* pimsm uses asserts, when switching from RPT to SPT,
1361 so that we cannot check that packet arrived on an oif.
1362 It is bad, but otherwise we would need to move pretty
1363 large chunk of pimd to kernel. Ough... --ANK
1364 */
1365 (mroute_do_pim || cache->mfc_un.res.ttls[true_vifi] < 255) &&
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001366 time_after(jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001367 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1368 cache->mfc_un.res.last_assert = jiffies;
1369 ipmr_cache_report(skb, true_vifi, IGMPMSG_WRONGVIF);
1370 }
1371 goto dont_forward;
1372 }
1373
Benjamin Therycf958ae32009-01-22 04:56:16 +00001374 init_net.ipv4.vif_table[vif].pkt_in++;
1375 init_net.ipv4.vif_table[vif].bytes_in += skb->len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001376
1377 /*
1378 * Forward the frame
1379 */
1380 for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001381 if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001382 if (psend != -1) {
1383 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1384 if (skb2)
1385 ipmr_queue_xmit(skb2, cache, psend);
1386 }
Jianjun Kongc354e122008-11-03 00:28:02 -08001387 psend = ct;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001388 }
1389 }
1390 if (psend != -1) {
1391 if (local) {
1392 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1393 if (skb2)
1394 ipmr_queue_xmit(skb2, cache, psend);
1395 } else {
1396 ipmr_queue_xmit(skb, cache, psend);
1397 return 0;
1398 }
1399 }
1400
1401dont_forward:
1402 if (!local)
1403 kfree_skb(skb);
1404 return 0;
1405}
1406
1407
1408/*
1409 * Multicast packets for forwarding arrive here
1410 */
1411
1412int ip_mr_input(struct sk_buff *skb)
1413{
1414 struct mfc_cache *cache;
Eric Dumazetee6b9672008-03-05 18:30:47 -08001415 int local = skb->rtable->rt_flags&RTCF_LOCAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001416
1417 /* Packet is looped back after forward, it should not be
1418 forwarded second time, but still can be delivered locally.
1419 */
1420 if (IPCB(skb)->flags&IPSKB_FORWARDED)
1421 goto dont_forward;
1422
1423 if (!local) {
1424 if (IPCB(skb)->opt.router_alert) {
1425 if (ip_call_ra_chain(skb))
1426 return 0;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001427 } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP){
Linus Torvalds1da177e2005-04-16 15:20:36 -07001428 /* IGMPv1 (and broken IGMPv2 implementations sort of
1429 Cisco IOS <= 11.2(8)) do not put router alert
1430 option to IGMP packets destined to routable
1431 groups. It is very bad, because it means
1432 that we can forward NO IGMP messages.
1433 */
1434 read_lock(&mrt_lock);
Benjamin Thery70a269e2009-01-22 04:56:15 +00001435 if (init_net.ipv4.mroute_sk) {
Patrick McHardy2715bcf2005-06-21 14:06:24 -07001436 nf_reset(skb);
Benjamin Thery70a269e2009-01-22 04:56:15 +00001437 raw_rcv(init_net.ipv4.mroute_sk, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001438 read_unlock(&mrt_lock);
1439 return 0;
1440 }
1441 read_unlock(&mrt_lock);
1442 }
1443 }
1444
1445 read_lock(&mrt_lock);
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001446 cache = ipmr_cache_find(ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001447
1448 /*
1449 * No usable cache entry
1450 */
Jianjun Kongc354e122008-11-03 00:28:02 -08001451 if (cache == NULL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001452 int vif;
1453
1454 if (local) {
1455 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1456 ip_local_deliver(skb);
1457 if (skb2 == NULL) {
1458 read_unlock(&mrt_lock);
1459 return -ENOBUFS;
1460 }
1461 skb = skb2;
1462 }
1463
1464 vif = ipmr_find_vif(skb->dev);
1465 if (vif >= 0) {
1466 int err = ipmr_cache_unresolved(vif, skb);
1467 read_unlock(&mrt_lock);
1468
1469 return err;
1470 }
1471 read_unlock(&mrt_lock);
1472 kfree_skb(skb);
1473 return -ENODEV;
1474 }
1475
1476 ip_mr_forward(skb, cache, local);
1477
1478 read_unlock(&mrt_lock);
1479
1480 if (local)
1481 return ip_local_deliver(skb);
1482
1483 return 0;
1484
1485dont_forward:
1486 if (local)
1487 return ip_local_deliver(skb);
1488 kfree_skb(skb);
1489 return 0;
1490}
1491
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001492#ifdef CONFIG_IP_PIMSM
1493static int __pim_rcv(struct sk_buff *skb, unsigned int pimlen)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001494{
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001495 struct net_device *reg_dev = NULL;
1496 struct iphdr *encap;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001497
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001498 encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001499 /*
1500 Check that:
1501 a. packet is really destinted to a multicast group
1502 b. packet is not a NULL-REGISTER
1503 c. packet is not truncated
1504 */
Joe Perchesf97c1e02007-12-16 13:45:43 -08001505 if (!ipv4_is_multicast(encap->daddr) ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07001506 encap->tot_len == 0 ||
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001507 ntohs(encap->tot_len) + pimlen > skb->len)
1508 return 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001509
1510 read_lock(&mrt_lock);
1511 if (reg_vif_num >= 0)
Benjamin Therycf958ae32009-01-22 04:56:16 +00001512 reg_dev = init_net.ipv4.vif_table[reg_vif_num].dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001513 if (reg_dev)
1514 dev_hold(reg_dev);
1515 read_unlock(&mrt_lock);
1516
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001517 if (reg_dev == NULL)
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001518 return 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001519
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -07001520 skb->mac_header = skb->network_header;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001521 skb_pull(skb, (u8*)encap - skb->data);
Arnaldo Carvalho de Melo31c77112007-03-10 19:04:55 -03001522 skb_reset_network_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001523 skb->dev = reg_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001524 skb->protocol = htons(ETH_P_IP);
1525 skb->ip_summed = 0;
1526 skb->pkt_type = PACKET_HOST;
1527 dst_release(skb->dst);
1528 skb->dst = NULL;
Pavel Emelyanovcf3677a2008-05-21 14:17:33 -07001529 reg_dev->stats.rx_bytes += skb->len;
1530 reg_dev->stats.rx_packets++;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001531 nf_reset(skb);
1532 netif_rx(skb);
1533 dev_put(reg_dev);
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001534
Linus Torvalds1da177e2005-04-16 15:20:36 -07001535 return 0;
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001536}
1537#endif
1538
1539#ifdef CONFIG_IP_PIMSM_V1
1540/*
1541 * Handle IGMP messages of PIMv1
1542 */
1543
1544int pim_rcv_v1(struct sk_buff * skb)
1545{
1546 struct igmphdr *pim;
1547
1548 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1549 goto drop;
1550
1551 pim = igmp_hdr(skb);
1552
1553 if (!mroute_do_pim ||
1554 pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
1555 goto drop;
1556
1557 if (__pim_rcv(skb, sizeof(*pim))) {
1558drop:
1559 kfree_skb(skb);
1560 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001561 return 0;
1562}
1563#endif
1564
1565#ifdef CONFIG_IP_PIMSM_V2
1566static int pim_rcv(struct sk_buff * skb)
1567{
1568 struct pimreghdr *pim;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001569
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001570 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001571 goto drop;
1572
Arnaldo Carvalho de Melo9c702202007-04-25 18:04:18 -07001573 pim = (struct pimreghdr *)skb_transport_header(skb);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001574 if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07001575 (pim->flags&PIM_NULL_REGISTER) ||
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001576 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
Al Virod3bc23e2006-11-14 21:24:49 -08001577 csum_fold(skb_checksum(skb, 0, skb->len, 0))))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001578 goto drop;
1579
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001580 if (__pim_rcv(skb, sizeof(*pim))) {
1581drop:
1582 kfree_skb(skb);
1583 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001584 return 0;
1585}
1586#endif
1587
1588static int
1589ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm)
1590{
1591 int ct;
1592 struct rtnexthop *nhp;
Benjamin Therycf958ae32009-01-22 04:56:16 +00001593 struct net_device *dev = init_net.ipv4.vif_table[c->mfc_parent].dev;
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001594 u8 *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001595 struct rtattr *mp_head;
1596
1597 if (dev)
1598 RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex);
1599
Jianjun Kongc354e122008-11-03 00:28:02 -08001600 mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001601
1602 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1603 if (c->mfc_un.res.ttls[ct] < 255) {
1604 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1605 goto rtattr_failure;
Jianjun Kongc354e122008-11-03 00:28:02 -08001606 nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001607 nhp->rtnh_flags = 0;
1608 nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
Benjamin Therycf958ae32009-01-22 04:56:16 +00001609 nhp->rtnh_ifindex = init_net.ipv4.vif_table[ct].dev->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001610 nhp->rtnh_len = sizeof(*nhp);
1611 }
1612 }
1613 mp_head->rta_type = RTA_MULTIPATH;
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001614 mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001615 rtm->rtm_type = RTN_MULTICAST;
1616 return 1;
1617
1618rtattr_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001619 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001620 return -EMSGSIZE;
1621}
1622
1623int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1624{
1625 int err;
1626 struct mfc_cache *cache;
Eric Dumazetee6b9672008-03-05 18:30:47 -08001627 struct rtable *rt = skb->rtable;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001628
1629 read_lock(&mrt_lock);
1630 cache = ipmr_cache_find(rt->rt_src, rt->rt_dst);
1631
Jianjun Kongc354e122008-11-03 00:28:02 -08001632 if (cache == NULL) {
Alexey Kuznetsov72287492006-07-25 16:45:12 -07001633 struct sk_buff *skb2;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001634 struct iphdr *iph;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001635 struct net_device *dev;
1636 int vif;
1637
1638 if (nowait) {
1639 read_unlock(&mrt_lock);
1640 return -EAGAIN;
1641 }
1642
1643 dev = skb->dev;
1644 if (dev == NULL || (vif = ipmr_find_vif(dev)) < 0) {
1645 read_unlock(&mrt_lock);
1646 return -ENODEV;
1647 }
Alexey Kuznetsov72287492006-07-25 16:45:12 -07001648 skb2 = skb_clone(skb, GFP_ATOMIC);
1649 if (!skb2) {
1650 read_unlock(&mrt_lock);
1651 return -ENOMEM;
1652 }
1653
Arnaldo Carvalho de Meloe2d1bca2007-04-10 20:46:21 -07001654 skb_push(skb2, sizeof(struct iphdr));
1655 skb_reset_network_header(skb2);
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001656 iph = ip_hdr(skb2);
1657 iph->ihl = sizeof(struct iphdr) >> 2;
1658 iph->saddr = rt->rt_src;
1659 iph->daddr = rt->rt_dst;
1660 iph->version = 0;
Alexey Kuznetsov72287492006-07-25 16:45:12 -07001661 err = ipmr_cache_unresolved(vif, skb2);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001662 read_unlock(&mrt_lock);
1663 return err;
1664 }
1665
1666 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1667 cache->mfc_flags |= MFC_NOTIFY;
1668 err = ipmr_fill_mroute(skb, cache, rtm);
1669 read_unlock(&mrt_lock);
1670 return err;
1671}
1672
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001673#ifdef CONFIG_PROC_FS
Linus Torvalds1da177e2005-04-16 15:20:36 -07001674/*
1675 * The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
1676 */
1677struct ipmr_vif_iter {
1678 int ct;
1679};
1680
1681static struct vif_device *ipmr_vif_seq_idx(struct ipmr_vif_iter *iter,
1682 loff_t pos)
1683{
Benjamin Therycf958ae32009-01-22 04:56:16 +00001684 for (iter->ct = 0; iter->ct < init_net.ipv4.maxvif; ++iter->ct) {
1685 if (!VIF_EXISTS(&init_net, iter->ct))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001686 continue;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001687 if (pos-- == 0)
Benjamin Therycf958ae32009-01-22 04:56:16 +00001688 return &init_net.ipv4.vif_table[iter->ct];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001689 }
1690 return NULL;
1691}
1692
1693static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
Stephen Hemmingerba93ef72008-01-21 17:28:59 -08001694 __acquires(mrt_lock)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001695{
1696 read_lock(&mrt_lock);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001697 return *pos ? ipmr_vif_seq_idx(seq->private, *pos - 1)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001698 : SEQ_START_TOKEN;
1699}
1700
1701static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1702{
1703 struct ipmr_vif_iter *iter = seq->private;
1704
1705 ++*pos;
1706 if (v == SEQ_START_TOKEN)
1707 return ipmr_vif_seq_idx(iter, 0);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001708
Benjamin Therycf958ae32009-01-22 04:56:16 +00001709 while (++iter->ct < init_net.ipv4.maxvif) {
1710 if (!VIF_EXISTS(&init_net, iter->ct))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001711 continue;
Benjamin Therycf958ae32009-01-22 04:56:16 +00001712 return &init_net.ipv4.vif_table[iter->ct];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001713 }
1714 return NULL;
1715}
1716
1717static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
Stephen Hemmingerba93ef72008-01-21 17:28:59 -08001718 __releases(mrt_lock)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001719{
1720 read_unlock(&mrt_lock);
1721}
1722
1723static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
1724{
1725 if (v == SEQ_START_TOKEN) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001726 seq_puts(seq,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001727 "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n");
1728 } else {
1729 const struct vif_device *vif = v;
1730 const char *name = vif->dev ? vif->dev->name : "none";
1731
1732 seq_printf(seq,
1733 "%2Zd %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n",
Benjamin Therycf958ae32009-01-22 04:56:16 +00001734 vif - init_net.ipv4.vif_table,
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001735 name, vif->bytes_in, vif->pkt_in,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001736 vif->bytes_out, vif->pkt_out,
1737 vif->flags, vif->local, vif->remote);
1738 }
1739 return 0;
1740}
1741
Stephen Hemmingerf6908082007-03-12 14:34:29 -07001742static const struct seq_operations ipmr_vif_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001743 .start = ipmr_vif_seq_start,
1744 .next = ipmr_vif_seq_next,
1745 .stop = ipmr_vif_seq_stop,
1746 .show = ipmr_vif_seq_show,
1747};
1748
1749static int ipmr_vif_open(struct inode *inode, struct file *file)
1750{
Pavel Emelyanovcf7732e2007-10-10 02:29:29 -07001751 return seq_open_private(file, &ipmr_vif_seq_ops,
1752 sizeof(struct ipmr_vif_iter));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001753}
1754
Arjan van de Ven9a321442007-02-12 00:55:35 -08001755static const struct file_operations ipmr_vif_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001756 .owner = THIS_MODULE,
1757 .open = ipmr_vif_open,
1758 .read = seq_read,
1759 .llseek = seq_lseek,
1760 .release = seq_release_private,
1761};
1762
1763struct ipmr_mfc_iter {
1764 struct mfc_cache **cache;
1765 int ct;
1766};
1767
1768
1769static struct mfc_cache *ipmr_mfc_seq_idx(struct ipmr_mfc_iter *it, loff_t pos)
1770{
1771 struct mfc_cache *mfc;
1772
Benjamin Thery2bb8b262009-01-22 04:56:18 +00001773 it->cache = init_net.ipv4.mfc_cache_array;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001774 read_lock(&mrt_lock);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001775 for (it->ct = 0; it->ct < MFC_LINES; it->ct++)
Benjamin Thery2bb8b262009-01-22 04:56:18 +00001776 for (mfc = init_net.ipv4.mfc_cache_array[it->ct];
1777 mfc; mfc = mfc->next)
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001778 if (pos-- == 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001779 return mfc;
1780 read_unlock(&mrt_lock);
1781
1782 it->cache = &mfc_unres_queue;
1783 spin_lock_bh(&mfc_unres_lock);
Stephen Hemminger132adf52007-03-08 20:44:43 -08001784 for (mfc = mfc_unres_queue; mfc; mfc = mfc->next)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001785 if (pos-- == 0)
1786 return mfc;
1787 spin_unlock_bh(&mfc_unres_lock);
1788
1789 it->cache = NULL;
1790 return NULL;
1791}
1792
1793
1794static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
1795{
1796 struct ipmr_mfc_iter *it = seq->private;
1797 it->cache = NULL;
1798 it->ct = 0;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001799 return *pos ? ipmr_mfc_seq_idx(seq->private, *pos - 1)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001800 : SEQ_START_TOKEN;
1801}
1802
1803static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1804{
1805 struct mfc_cache *mfc = v;
1806 struct ipmr_mfc_iter *it = seq->private;
1807
1808 ++*pos;
1809
1810 if (v == SEQ_START_TOKEN)
1811 return ipmr_mfc_seq_idx(seq->private, 0);
1812
1813 if (mfc->next)
1814 return mfc->next;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001815
1816 if (it->cache == &mfc_unres_queue)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001817 goto end_of_list;
1818
Benjamin Thery2bb8b262009-01-22 04:56:18 +00001819 BUG_ON(it->cache != init_net.ipv4.mfc_cache_array);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001820
1821 while (++it->ct < MFC_LINES) {
Benjamin Thery2bb8b262009-01-22 04:56:18 +00001822 mfc = init_net.ipv4.mfc_cache_array[it->ct];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001823 if (mfc)
1824 return mfc;
1825 }
1826
1827 /* exhausted cache_array, show unresolved */
1828 read_unlock(&mrt_lock);
1829 it->cache = &mfc_unres_queue;
1830 it->ct = 0;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001831
Linus Torvalds1da177e2005-04-16 15:20:36 -07001832 spin_lock_bh(&mfc_unres_lock);
1833 mfc = mfc_unres_queue;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001834 if (mfc)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001835 return mfc;
1836
1837 end_of_list:
1838 spin_unlock_bh(&mfc_unres_lock);
1839 it->cache = NULL;
1840
1841 return NULL;
1842}
1843
1844static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
1845{
1846 struct ipmr_mfc_iter *it = seq->private;
1847
1848 if (it->cache == &mfc_unres_queue)
1849 spin_unlock_bh(&mfc_unres_lock);
Benjamin Thery2bb8b262009-01-22 04:56:18 +00001850 else if (it->cache == init_net.ipv4.mfc_cache_array)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001851 read_unlock(&mrt_lock);
1852}
1853
1854static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
1855{
1856 int n;
1857
1858 if (v == SEQ_START_TOKEN) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001859 seq_puts(seq,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001860 "Group Origin Iif Pkts Bytes Wrong Oifs\n");
1861 } else {
1862 const struct mfc_cache *mfc = v;
1863 const struct ipmr_mfc_iter *it = seq->private;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001864
Benjamin Thery999890b2008-12-03 22:22:16 -08001865 seq_printf(seq, "%08lX %08lX %-3hd",
Linus Torvalds1da177e2005-04-16 15:20:36 -07001866 (unsigned long) mfc->mfc_mcastgrp,
1867 (unsigned long) mfc->mfc_origin,
Benjamin Thery1ea472e2008-12-03 22:21:47 -08001868 mfc->mfc_parent);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001869
1870 if (it->cache != &mfc_unres_queue) {
Benjamin Thery1ea472e2008-12-03 22:21:47 -08001871 seq_printf(seq, " %8lu %8lu %8lu",
1872 mfc->mfc_un.res.pkt,
1873 mfc->mfc_un.res.bytes,
1874 mfc->mfc_un.res.wrong_if);
Stephen Hemminger132adf52007-03-08 20:44:43 -08001875 for (n = mfc->mfc_un.res.minvif;
1876 n < mfc->mfc_un.res.maxvif; n++ ) {
Benjamin Therycf958ae32009-01-22 04:56:16 +00001877 if (VIF_EXISTS(&init_net, n) &&
1878 mfc->mfc_un.res.ttls[n] < 255)
1879 seq_printf(seq,
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001880 " %2d:%-3d",
Linus Torvalds1da177e2005-04-16 15:20:36 -07001881 n, mfc->mfc_un.res.ttls[n]);
1882 }
Benjamin Thery1ea472e2008-12-03 22:21:47 -08001883 } else {
1884 /* unresolved mfc_caches don't contain
1885 * pkt, bytes and wrong_if values
1886 */
1887 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001888 }
1889 seq_putc(seq, '\n');
1890 }
1891 return 0;
1892}
1893
Stephen Hemmingerf6908082007-03-12 14:34:29 -07001894static const struct seq_operations ipmr_mfc_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001895 .start = ipmr_mfc_seq_start,
1896 .next = ipmr_mfc_seq_next,
1897 .stop = ipmr_mfc_seq_stop,
1898 .show = ipmr_mfc_seq_show,
1899};
1900
1901static int ipmr_mfc_open(struct inode *inode, struct file *file)
1902{
Pavel Emelyanovcf7732e2007-10-10 02:29:29 -07001903 return seq_open_private(file, &ipmr_mfc_seq_ops,
1904 sizeof(struct ipmr_mfc_iter));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001905}
1906
Arjan van de Ven9a321442007-02-12 00:55:35 -08001907static const struct file_operations ipmr_mfc_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001908 .owner = THIS_MODULE,
1909 .open = ipmr_mfc_open,
1910 .read = seq_read,
1911 .llseek = seq_lseek,
1912 .release = seq_release_private,
1913};
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001914#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001915
1916#ifdef CONFIG_IP_PIMSM_V2
1917static struct net_protocol pim_protocol = {
1918 .handler = pim_rcv,
1919};
1920#endif
1921
1922
1923/*
1924 * Setup for IP multicast routing
1925 */
Benjamin Therycf958ae32009-01-22 04:56:16 +00001926static int __net_init ipmr_net_init(struct net *net)
1927{
1928 int err = 0;
1929
1930 net->ipv4.vif_table = kcalloc(MAXVIFS, sizeof(struct vif_device),
1931 GFP_KERNEL);
1932 if (!net->ipv4.vif_table) {
1933 err = -ENOMEM;
1934 goto fail;
1935 }
Benjamin Thery2bb8b262009-01-22 04:56:18 +00001936
1937 /* Forwarding cache */
1938 net->ipv4.mfc_cache_array = kcalloc(MFC_LINES,
1939 sizeof(struct mfc_cache *),
1940 GFP_KERNEL);
1941 if (!net->ipv4.mfc_cache_array) {
1942 err = -ENOMEM;
1943 goto fail_mfc_cache;
1944 }
1945 return 0;
1946
1947fail_mfc_cache:
1948 kfree(net->ipv4.vif_table);
Benjamin Therycf958ae32009-01-22 04:56:16 +00001949fail:
1950 return err;
1951}
1952
1953static void __net_exit ipmr_net_exit(struct net *net)
1954{
Benjamin Thery2bb8b262009-01-22 04:56:18 +00001955 kfree(net->ipv4.mfc_cache_array);
Benjamin Therycf958ae32009-01-22 04:56:16 +00001956 kfree(net->ipv4.vif_table);
1957}
1958
1959static struct pernet_operations ipmr_net_ops = {
1960 .init = ipmr_net_init,
1961 .exit = ipmr_net_exit,
1962};
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001963
Wang Chen03d2f892008-07-03 12:13:36 +08001964int __init ip_mr_init(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001965{
Wang Chen03d2f892008-07-03 12:13:36 +08001966 int err;
1967
Linus Torvalds1da177e2005-04-16 15:20:36 -07001968 mrt_cachep = kmem_cache_create("ip_mrt_cache",
1969 sizeof(struct mfc_cache),
Alexey Dobriyane5d679f332006-08-26 19:25:52 -07001970 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
Paul Mundt20c2df82007-07-20 10:11:58 +09001971 NULL);
Wang Chen03d2f892008-07-03 12:13:36 +08001972 if (!mrt_cachep)
1973 return -ENOMEM;
1974
Benjamin Therycf958ae32009-01-22 04:56:16 +00001975 err = register_pernet_subsys(&ipmr_net_ops);
1976 if (err)
1977 goto reg_pernet_fail;
1978
Pavel Emelyanovb24b8a22008-01-23 21:20:07 -08001979 setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0);
Wang Chen03d2f892008-07-03 12:13:36 +08001980 err = register_netdevice_notifier(&ip_mr_notifier);
1981 if (err)
1982 goto reg_notif_fail;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001983#ifdef CONFIG_PROC_FS
Wang Chen03d2f892008-07-03 12:13:36 +08001984 err = -ENOMEM;
1985 if (!proc_net_fops_create(&init_net, "ip_mr_vif", 0, &ipmr_vif_fops))
1986 goto proc_vif_fail;
1987 if (!proc_net_fops_create(&init_net, "ip_mr_cache", 0, &ipmr_mfc_fops))
1988 goto proc_cache_fail;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001989#endif
Wang Chen03d2f892008-07-03 12:13:36 +08001990 return 0;
Wang Chen03d2f892008-07-03 12:13:36 +08001991#ifdef CONFIG_PROC_FS
Wang Chen03d2f892008-07-03 12:13:36 +08001992proc_cache_fail:
1993 proc_net_remove(&init_net, "ip_mr_vif");
Benjamin Theryc3e38892008-11-19 14:07:41 -08001994proc_vif_fail:
1995 unregister_netdevice_notifier(&ip_mr_notifier);
Wang Chen03d2f892008-07-03 12:13:36 +08001996#endif
Benjamin Theryc3e38892008-11-19 14:07:41 -08001997reg_notif_fail:
1998 del_timer(&ipmr_expire_timer);
Benjamin Therycf958ae32009-01-22 04:56:16 +00001999 unregister_pernet_subsys(&ipmr_net_ops);
2000reg_pernet_fail:
Benjamin Theryc3e38892008-11-19 14:07:41 -08002001 kmem_cache_destroy(mrt_cachep);
Wang Chen03d2f892008-07-03 12:13:36 +08002002 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002003}