blob: 9a8da5ed92b7cab3c4883b5866c3a69e655a7278 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * IP multicast routing support for mrouted 3.6/3.8
3 *
Alan Cox113aa832008-10-13 19:01:08 -07004 * (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk>
Linus Torvalds1da177e2005-04-16 15:20:36 -07005 * Linux Consultancy and Custom Driver Development
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 *
Linus Torvalds1da177e2005-04-16 15:20:36 -070012 * Fixes:
13 * Michael Chastain : Incorrect size of copying.
14 * Alan Cox : Added the cache manager code
15 * Alan Cox : Fixed the clone/copy bug and device race.
16 * Mike McLagan : Routing by source
17 * Malcolm Beattie : Buffer handling fixes.
18 * Alexey Kuznetsov : Double buffer free and other fixes.
19 * SVR Anand : Fixed several multicast bugs and problems.
20 * Alexey Kuznetsov : Status, optimisations and more.
21 * Brad Parker : Better behaviour on mrouted upcall
22 * overflow.
23 * Carlos Picoto : PIMv1 Support
24 * Pavlin Ivanov Radoslavov: PIMv2 Registers must checksum only PIM header
25 * Relax this requrement to work with older peers.
26 *
27 */
28
Linus Torvalds1da177e2005-04-16 15:20:36 -070029#include <asm/system.h>
30#include <asm/uaccess.h>
31#include <linux/types.h>
Randy Dunlap4fc268d2006-01-11 12:17:47 -080032#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070033#include <linux/errno.h>
34#include <linux/timer.h>
35#include <linux/mm.h>
36#include <linux/kernel.h>
37#include <linux/fcntl.h>
38#include <linux/stat.h>
39#include <linux/socket.h>
40#include <linux/in.h>
41#include <linux/inet.h>
42#include <linux/netdevice.h>
43#include <linux/inetdevice.h>
44#include <linux/igmp.h>
45#include <linux/proc_fs.h>
46#include <linux/seq_file.h>
47#include <linux/mroute.h>
48#include <linux/init.h>
Kris Katterjohn46f25df2006-01-05 16:35:42 -080049#include <linux/if_ether.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020050#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070051#include <net/ip.h>
52#include <net/protocol.h>
53#include <linux/skbuff.h>
Arnaldo Carvalho de Melo14c85022005-12-27 02:43:12 -020054#include <net/route.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070055#include <net/sock.h>
56#include <net/icmp.h>
57#include <net/udp.h>
58#include <net/raw.h>
59#include <linux/notifier.h>
60#include <linux/if_arp.h>
61#include <linux/netfilter_ipv4.h>
62#include <net/ipip.h>
63#include <net/checksum.h>
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -070064#include <net/netlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070065
66#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
67#define CONFIG_IP_PIMSM 1
68#endif
69
Linus Torvalds1da177e2005-04-16 15:20:36 -070070/* Big lock, protecting vif table, mrt cache and mroute socket state.
71 Note that the changes are semaphored via rtnl_lock.
72 */
73
74static DEFINE_RWLOCK(mrt_lock);
75
76/*
77 * Multicast router control variables
78 */
79
Benjamin Therycf958ae32009-01-22 04:56:16 +000080#define VIF_EXISTS(_net, _idx) ((_net)->ipv4.vif_table[_idx].dev != NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -070081
Linus Torvalds1da177e2005-04-16 15:20:36 -070082static struct mfc_cache *mfc_unres_queue; /* Queue of unresolved entries */
Linus Torvalds1da177e2005-04-16 15:20:36 -070083
84/* Special spinlock for queue of unresolved entries */
85static DEFINE_SPINLOCK(mfc_unres_lock);
86
87/* We return to original Alan's scheme. Hash table of resolved
88 entries is changed only in process context and protected
89 with weak lock mrt_lock. Queue of unresolved entries is protected
90 with strong spinlock mfc_unres_lock.
91
92 In this case data path is free of exclusive locks at all.
93 */
94
Christoph Lametere18b8902006-12-06 20:33:20 -080095static struct kmem_cache *mrt_cachep __read_mostly;
Linus Torvalds1da177e2005-04-16 15:20:36 -070096
97static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local);
Benjamin Thery4feb88e2009-01-22 04:56:23 +000098static int ipmr_cache_report(struct net *net,
99 struct sk_buff *pkt, vifi_t vifi, int assert);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700100static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm);
101
102#ifdef CONFIG_IP_PIMSM_V2
103static struct net_protocol pim_protocol;
104#endif
105
106static struct timer_list ipmr_expire_timer;
107
108/* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
109
Wang Chend6070322008-07-14 20:55:26 -0700110static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
111{
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000112 struct net *net = dev_net(dev);
113
Wang Chend6070322008-07-14 20:55:26 -0700114 dev_close(dev);
115
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000116 dev = __dev_get_by_name(net, "tunl0");
Wang Chend6070322008-07-14 20:55:26 -0700117 if (dev) {
Stephen Hemminger5bc3eb72008-11-19 21:52:05 -0800118 const struct net_device_ops *ops = dev->netdev_ops;
Wang Chend6070322008-07-14 20:55:26 -0700119 struct ifreq ifr;
Wang Chend6070322008-07-14 20:55:26 -0700120 struct ip_tunnel_parm p;
121
122 memset(&p, 0, sizeof(p));
123 p.iph.daddr = v->vifc_rmt_addr.s_addr;
124 p.iph.saddr = v->vifc_lcl_addr.s_addr;
125 p.iph.version = 4;
126 p.iph.ihl = 5;
127 p.iph.protocol = IPPROTO_IPIP;
128 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
129 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
130
Stephen Hemminger5bc3eb72008-11-19 21:52:05 -0800131 if (ops->ndo_do_ioctl) {
132 mm_segment_t oldfs = get_fs();
133
134 set_fs(KERNEL_DS);
135 ops->ndo_do_ioctl(dev, &ifr, SIOCDELTUNNEL);
136 set_fs(oldfs);
137 }
Wang Chend6070322008-07-14 20:55:26 -0700138 }
139}
140
Linus Torvalds1da177e2005-04-16 15:20:36 -0700141static
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000142struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700143{
144 struct net_device *dev;
145
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000146 dev = __dev_get_by_name(net, "tunl0");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700147
148 if (dev) {
Stephen Hemminger5bc3eb72008-11-19 21:52:05 -0800149 const struct net_device_ops *ops = dev->netdev_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700150 int err;
151 struct ifreq ifr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700152 struct ip_tunnel_parm p;
153 struct in_device *in_dev;
154
155 memset(&p, 0, sizeof(p));
156 p.iph.daddr = v->vifc_rmt_addr.s_addr;
157 p.iph.saddr = v->vifc_lcl_addr.s_addr;
158 p.iph.version = 4;
159 p.iph.ihl = 5;
160 p.iph.protocol = IPPROTO_IPIP;
161 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
Stephen Hemmingerba93ef72008-01-21 17:28:59 -0800162 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700163
Stephen Hemminger5bc3eb72008-11-19 21:52:05 -0800164 if (ops->ndo_do_ioctl) {
165 mm_segment_t oldfs = get_fs();
166
167 set_fs(KERNEL_DS);
168 err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL);
169 set_fs(oldfs);
170 } else
171 err = -EOPNOTSUPP;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700172
173 dev = NULL;
174
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000175 if (err == 0 &&
176 (dev = __dev_get_by_name(net, p.name)) != NULL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700177 dev->flags |= IFF_MULTICAST;
178
Herbert Xue5ed6392005-10-03 14:35:55 -0700179 in_dev = __in_dev_get_rtnl(dev);
Herbert Xu71e27da2007-06-04 23:36:06 -0700180 if (in_dev == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700181 goto failure;
Herbert Xu71e27da2007-06-04 23:36:06 -0700182
183 ipv4_devconf_setall(in_dev);
184 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700185
186 if (dev_open(dev))
187 goto failure;
Wang Chen7dc00c82008-07-14 20:56:34 -0700188 dev_hold(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700189 }
190 }
191 return dev;
192
193failure:
194 /* allow the register to be completed before unregistering. */
195 rtnl_unlock();
196 rtnl_lock();
197
198 unregister_netdevice(dev);
199 return NULL;
200}
201
202#ifdef CONFIG_IP_PIMSM
203
Linus Torvalds1da177e2005-04-16 15:20:36 -0700204static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
205{
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000206 struct net *net = dev_net(dev);
207
Linus Torvalds1da177e2005-04-16 15:20:36 -0700208 read_lock(&mrt_lock);
Pavel Emelyanovcf3677a2008-05-21 14:17:33 -0700209 dev->stats.tx_bytes += skb->len;
210 dev->stats.tx_packets++;
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000211 ipmr_cache_report(net, skb, net->ipv4.mroute_reg_vif_num,
212 IGMPMSG_WHOLEPKT);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700213 read_unlock(&mrt_lock);
214 kfree_skb(skb);
215 return 0;
216}
217
Stephen Hemminger007c3832008-11-20 20:28:35 -0800218static const struct net_device_ops reg_vif_netdev_ops = {
219 .ndo_start_xmit = reg_vif_xmit,
220};
221
Linus Torvalds1da177e2005-04-16 15:20:36 -0700222static void reg_vif_setup(struct net_device *dev)
223{
224 dev->type = ARPHRD_PIMREG;
Kris Katterjohn46f25df2006-01-05 16:35:42 -0800225 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700226 dev->flags = IFF_NOARP;
Stephen Hemminger007c3832008-11-20 20:28:35 -0800227 dev->netdev_ops = &reg_vif_netdev_ops,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700228 dev->destructor = free_netdev;
Tom Goff403dbb92009-06-14 03:16:13 -0700229 dev->features |= NETIF_F_NETNS_LOCAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700230}
231
Tom Goff403dbb92009-06-14 03:16:13 -0700232static struct net_device *ipmr_reg_vif(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700233{
234 struct net_device *dev;
235 struct in_device *in_dev;
236
Pavel Emelyanovcf3677a2008-05-21 14:17:33 -0700237 dev = alloc_netdev(0, "pimreg", reg_vif_setup);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700238
239 if (dev == NULL)
240 return NULL;
241
Tom Goff403dbb92009-06-14 03:16:13 -0700242 dev_net_set(dev, net);
243
Linus Torvalds1da177e2005-04-16 15:20:36 -0700244 if (register_netdevice(dev)) {
245 free_netdev(dev);
246 return NULL;
247 }
248 dev->iflink = 0;
249
Herbert Xu71e27da2007-06-04 23:36:06 -0700250 rcu_read_lock();
251 if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
252 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700253 goto failure;
Herbert Xu71e27da2007-06-04 23:36:06 -0700254 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700255
Herbert Xu71e27da2007-06-04 23:36:06 -0700256 ipv4_devconf_setall(in_dev);
257 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
258 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700259
260 if (dev_open(dev))
261 goto failure;
262
Wang Chen7dc00c82008-07-14 20:56:34 -0700263 dev_hold(dev);
264
Linus Torvalds1da177e2005-04-16 15:20:36 -0700265 return dev;
266
267failure:
268 /* allow the register to be completed before unregistering. */
269 rtnl_unlock();
270 rtnl_lock();
271
272 unregister_netdevice(dev);
273 return NULL;
274}
275#endif
276
277/*
278 * Delete a VIF entry
Wang Chen7dc00c82008-07-14 20:56:34 -0700279 * @notify: Set to 1, if the caller is a notifier_call
Linus Torvalds1da177e2005-04-16 15:20:36 -0700280 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900281
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000282static int vif_delete(struct net *net, int vifi, int notify)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700283{
284 struct vif_device *v;
285 struct net_device *dev;
286 struct in_device *in_dev;
287
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000288 if (vifi < 0 || vifi >= net->ipv4.maxvif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700289 return -EADDRNOTAVAIL;
290
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000291 v = &net->ipv4.vif_table[vifi];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700292
293 write_lock_bh(&mrt_lock);
294 dev = v->dev;
295 v->dev = NULL;
296
297 if (!dev) {
298 write_unlock_bh(&mrt_lock);
299 return -EADDRNOTAVAIL;
300 }
301
302#ifdef CONFIG_IP_PIMSM
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000303 if (vifi == net->ipv4.mroute_reg_vif_num)
304 net->ipv4.mroute_reg_vif_num = -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700305#endif
306
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000307 if (vifi+1 == net->ipv4.maxvif) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700308 int tmp;
309 for (tmp=vifi-1; tmp>=0; tmp--) {
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000310 if (VIF_EXISTS(net, tmp))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700311 break;
312 }
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000313 net->ipv4.maxvif = tmp+1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700314 }
315
316 write_unlock_bh(&mrt_lock);
317
318 dev_set_allmulti(dev, -1);
319
Herbert Xue5ed6392005-10-03 14:35:55 -0700320 if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
Herbert Xu42f811b2007-06-04 23:34:44 -0700321 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700322 ip_rt_multicast_event(in_dev);
323 }
324
Wang Chen7dc00c82008-07-14 20:56:34 -0700325 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700326 unregister_netdevice(dev);
327
328 dev_put(dev);
329 return 0;
330}
331
Benjamin Thery5c0a66f2009-01-22 04:56:17 +0000332static inline void ipmr_cache_free(struct mfc_cache *c)
333{
334 release_net(mfc_net(c));
335 kmem_cache_free(mrt_cachep, c);
336}
337
Linus Torvalds1da177e2005-04-16 15:20:36 -0700338/* Destroy an unresolved cache entry, killing queued skbs
339 and reporting error to netlink readers.
340 */
341
342static void ipmr_destroy_unres(struct mfc_cache *c)
343{
344 struct sk_buff *skb;
Patrick McHardy9ef1d4c2005-06-28 12:55:30 -0700345 struct nlmsgerr *e;
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000346 struct net *net = mfc_net(c);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700347
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000348 atomic_dec(&net->ipv4.cache_resolve_queue_len);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700349
Jianjun Kongc354e122008-11-03 00:28:02 -0800350 while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) {
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700351 if (ip_hdr(skb)->version == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700352 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
353 nlh->nlmsg_type = NLMSG_ERROR;
354 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
355 skb_trim(skb, nlh->nlmsg_len);
Patrick McHardy9ef1d4c2005-06-28 12:55:30 -0700356 e = NLMSG_DATA(nlh);
357 e->error = -ETIMEDOUT;
358 memset(&e->msg, 0, sizeof(e->msg));
Thomas Graf2942e902006-08-15 00:30:25 -0700359
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000360 rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700361 } else
362 kfree_skb(skb);
363 }
364
Benjamin Thery5c0a66f2009-01-22 04:56:17 +0000365 ipmr_cache_free(c);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700366}
367
368
369/* Single timer process for all the unresolved queue. */
370
371static void ipmr_expire_process(unsigned long dummy)
372{
373 unsigned long now;
374 unsigned long expires;
375 struct mfc_cache *c, **cp;
376
377 if (!spin_trylock(&mfc_unres_lock)) {
378 mod_timer(&ipmr_expire_timer, jiffies+HZ/10);
379 return;
380 }
381
Benjamin Thery1e8fb3b2009-01-22 04:56:19 +0000382 if (mfc_unres_queue == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700383 goto out;
384
385 now = jiffies;
386 expires = 10*HZ;
387 cp = &mfc_unres_queue;
388
389 while ((c=*cp) != NULL) {
390 if (time_after(c->mfc_un.unres.expires, now)) {
391 unsigned long interval = c->mfc_un.unres.expires - now;
392 if (interval < expires)
393 expires = interval;
394 cp = &c->next;
395 continue;
396 }
397
398 *cp = c->next;
399
400 ipmr_destroy_unres(c);
401 }
402
Benjamin Thery1e8fb3b2009-01-22 04:56:19 +0000403 if (mfc_unres_queue != NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700404 mod_timer(&ipmr_expire_timer, jiffies + expires);
405
406out:
407 spin_unlock(&mfc_unres_lock);
408}
409
410/* Fill oifs list. It is called under write locked mrt_lock. */
411
Baruch Evend1b04c02005-07-30 17:41:59 -0700412static void ipmr_update_thresholds(struct mfc_cache *cache, unsigned char *ttls)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700413{
414 int vifi;
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000415 struct net *net = mfc_net(cache);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700416
417 cache->mfc_un.res.minvif = MAXVIFS;
418 cache->mfc_un.res.maxvif = 0;
419 memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
420
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000421 for (vifi = 0; vifi < net->ipv4.maxvif; vifi++) {
422 if (VIF_EXISTS(net, vifi) &&
Benjamin Therycf958ae32009-01-22 04:56:16 +0000423 ttls[vifi] && ttls[vifi] < 255) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700424 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
425 if (cache->mfc_un.res.minvif > vifi)
426 cache->mfc_un.res.minvif = vifi;
427 if (cache->mfc_un.res.maxvif <= vifi)
428 cache->mfc_un.res.maxvif = vifi + 1;
429 }
430 }
431}
432
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000433static int vif_add(struct net *net, struct vifctl *vifc, int mrtsock)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700434{
435 int vifi = vifc->vifc_vifi;
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000436 struct vif_device *v = &net->ipv4.vif_table[vifi];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700437 struct net_device *dev;
438 struct in_device *in_dev;
Wang Chend6070322008-07-14 20:55:26 -0700439 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700440
441 /* Is vif busy ? */
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000442 if (VIF_EXISTS(net, vifi))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700443 return -EADDRINUSE;
444
445 switch (vifc->vifc_flags) {
446#ifdef CONFIG_IP_PIMSM
447 case VIFF_REGISTER:
448 /*
449 * Special Purpose VIF in PIM
450 * All the packets will be sent to the daemon
451 */
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000452 if (net->ipv4.mroute_reg_vif_num >= 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700453 return -EADDRINUSE;
Tom Goff403dbb92009-06-14 03:16:13 -0700454 dev = ipmr_reg_vif(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700455 if (!dev)
456 return -ENOBUFS;
Wang Chend6070322008-07-14 20:55:26 -0700457 err = dev_set_allmulti(dev, 1);
458 if (err) {
459 unregister_netdevice(dev);
Wang Chen7dc00c82008-07-14 20:56:34 -0700460 dev_put(dev);
Wang Chend6070322008-07-14 20:55:26 -0700461 return err;
462 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700463 break;
464#endif
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900465 case VIFF_TUNNEL:
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000466 dev = ipmr_new_tunnel(net, vifc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700467 if (!dev)
468 return -ENOBUFS;
Wang Chend6070322008-07-14 20:55:26 -0700469 err = dev_set_allmulti(dev, 1);
470 if (err) {
471 ipmr_del_tunnel(dev, vifc);
Wang Chen7dc00c82008-07-14 20:56:34 -0700472 dev_put(dev);
Wang Chend6070322008-07-14 20:55:26 -0700473 return err;
474 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700475 break;
476 case 0:
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000477 dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700478 if (!dev)
479 return -EADDRNOTAVAIL;
Wang Chend6070322008-07-14 20:55:26 -0700480 err = dev_set_allmulti(dev, 1);
Wang Chen7dc00c82008-07-14 20:56:34 -0700481 if (err) {
482 dev_put(dev);
Wang Chend6070322008-07-14 20:55:26 -0700483 return err;
Wang Chen7dc00c82008-07-14 20:56:34 -0700484 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700485 break;
486 default:
487 return -EINVAL;
488 }
489
Herbert Xue5ed6392005-10-03 14:35:55 -0700490 if ((in_dev = __in_dev_get_rtnl(dev)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700491 return -EADDRNOTAVAIL;
Herbert Xu42f811b2007-06-04 23:34:44 -0700492 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700493 ip_rt_multicast_event(in_dev);
494
495 /*
496 * Fill in the VIF structures
497 */
Jianjun Kongc354e122008-11-03 00:28:02 -0800498 v->rate_limit = vifc->vifc_rate_limit;
499 v->local = vifc->vifc_lcl_addr.s_addr;
500 v->remote = vifc->vifc_rmt_addr.s_addr;
501 v->flags = vifc->vifc_flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700502 if (!mrtsock)
503 v->flags |= VIFF_STATIC;
Jianjun Kongc354e122008-11-03 00:28:02 -0800504 v->threshold = vifc->vifc_threshold;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700505 v->bytes_in = 0;
506 v->bytes_out = 0;
507 v->pkt_in = 0;
508 v->pkt_out = 0;
509 v->link = dev->ifindex;
510 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
511 v->link = dev->iflink;
512
513 /* And finish update writing critical data */
514 write_lock_bh(&mrt_lock);
Jianjun Kongc354e122008-11-03 00:28:02 -0800515 v->dev = dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700516#ifdef CONFIG_IP_PIMSM
517 if (v->flags&VIFF_REGISTER)
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000518 net->ipv4.mroute_reg_vif_num = vifi;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700519#endif
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000520 if (vifi+1 > net->ipv4.maxvif)
521 net->ipv4.maxvif = vifi+1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700522 write_unlock_bh(&mrt_lock);
523 return 0;
524}
525
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000526static struct mfc_cache *ipmr_cache_find(struct net *net,
527 __be32 origin,
528 __be32 mcastgrp)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700529{
Jianjun Kongc354e122008-11-03 00:28:02 -0800530 int line = MFC_HASH(mcastgrp, origin);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700531 struct mfc_cache *c;
532
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000533 for (c = net->ipv4.mfc_cache_array[line]; c; c = c->next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700534 if (c->mfc_origin==origin && c->mfc_mcastgrp==mcastgrp)
535 break;
536 }
537 return c;
538}
539
540/*
541 * Allocate a multicast cache entry
542 */
Benjamin Thery5c0a66f2009-01-22 04:56:17 +0000543static struct mfc_cache *ipmr_cache_alloc(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700544{
Jianjun Kongc354e122008-11-03 00:28:02 -0800545 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
546 if (c == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700547 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700548 c->mfc_un.res.minvif = MAXVIFS;
Benjamin Thery5c0a66f2009-01-22 04:56:17 +0000549 mfc_net_set(c, net);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700550 return c;
551}
552
Benjamin Thery5c0a66f2009-01-22 04:56:17 +0000553static struct mfc_cache *ipmr_cache_alloc_unres(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700554{
Jianjun Kongc354e122008-11-03 00:28:02 -0800555 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
556 if (c == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700557 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700558 skb_queue_head_init(&c->mfc_un.unres.unresolved);
559 c->mfc_un.unres.expires = jiffies + 10*HZ;
Benjamin Thery5c0a66f2009-01-22 04:56:17 +0000560 mfc_net_set(c, net);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700561 return c;
562}
563
564/*
565 * A cache entry has gone into a resolved state from queued
566 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900567
Linus Torvalds1da177e2005-04-16 15:20:36 -0700568static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
569{
570 struct sk_buff *skb;
Patrick McHardy9ef1d4c2005-06-28 12:55:30 -0700571 struct nlmsgerr *e;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700572
573 /*
574 * Play the pending entries through our router
575 */
576
Jianjun Kongc354e122008-11-03 00:28:02 -0800577 while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700578 if (ip_hdr(skb)->version == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700579 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
580
581 if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -0700582 nlh->nlmsg_len = (skb_tail_pointer(skb) -
583 (u8 *)nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700584 } else {
585 nlh->nlmsg_type = NLMSG_ERROR;
586 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
587 skb_trim(skb, nlh->nlmsg_len);
Patrick McHardy9ef1d4c2005-06-28 12:55:30 -0700588 e = NLMSG_DATA(nlh);
589 e->error = -EMSGSIZE;
590 memset(&e->msg, 0, sizeof(e->msg));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700591 }
Thomas Graf2942e902006-08-15 00:30:25 -0700592
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000593 rtnl_unicast(skb, mfc_net(c), NETLINK_CB(skb).pid);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700594 } else
595 ip_mr_forward(skb, c, 0);
596 }
597}
598
599/*
600 * Bounce a cache query up to mrouted. We could use netlink for this but mrouted
601 * expects the following bizarre scheme.
602 *
603 * Called under mrt_lock.
604 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900605
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000606static int ipmr_cache_report(struct net *net,
607 struct sk_buff *pkt, vifi_t vifi, int assert)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700608{
609 struct sk_buff *skb;
Arnaldo Carvalho de Meloc9bdd4b2007-03-12 20:09:15 -0300610 const int ihl = ip_hdrlen(pkt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700611 struct igmphdr *igmp;
612 struct igmpmsg *msg;
613 int ret;
614
615#ifdef CONFIG_IP_PIMSM
616 if (assert == IGMPMSG_WHOLEPKT)
617 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
618 else
619#endif
620 skb = alloc_skb(128, GFP_ATOMIC);
621
Stephen Hemminger132adf52007-03-08 20:44:43 -0800622 if (!skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700623 return -ENOBUFS;
624
625#ifdef CONFIG_IP_PIMSM
626 if (assert == IGMPMSG_WHOLEPKT) {
627 /* Ugly, but we have no choice with this interface.
628 Duplicate old header, fix ihl, length etc.
629 And all this only to mangle msg->im_msgtype and
630 to set msg->im_mbz to "mbz" :-)
631 */
Arnaldo Carvalho de Melo878c8142007-03-11 22:38:29 -0300632 skb_push(skb, sizeof(struct iphdr));
633 skb_reset_network_header(skb);
Arnaldo Carvalho de Melobadff6d2007-03-13 13:06:52 -0300634 skb_reset_transport_header(skb);
Arnaldo Carvalho de Melo0272ffc2007-03-12 20:05:39 -0300635 msg = (struct igmpmsg *)skb_network_header(skb);
Arnaldo Carvalho de Melod56f90a2007-04-10 20:50:43 -0700636 memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700637 msg->im_msgtype = IGMPMSG_WHOLEPKT;
638 msg->im_mbz = 0;
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000639 msg->im_vif = net->ipv4.mroute_reg_vif_num;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700640 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
641 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
642 sizeof(struct iphdr));
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900643 } else
Linus Torvalds1da177e2005-04-16 15:20:36 -0700644#endif
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900645 {
646
Linus Torvalds1da177e2005-04-16 15:20:36 -0700647 /*
648 * Copy the IP header
649 */
650
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -0700651 skb->network_header = skb->tail;
Arnaldo Carvalho de Meloddc7b8e2007-03-15 21:42:27 -0300652 skb_put(skb, ihl);
Arnaldo Carvalho de Melo27d7ff42007-03-31 11:55:19 -0300653 skb_copy_to_linear_data(skb, pkt->data, ihl);
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700654 ip_hdr(skb)->protocol = 0; /* Flag to the kernel this is a route add */
655 msg = (struct igmpmsg *)skb_network_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700656 msg->im_vif = vifi;
Eric Dumazetadf30902009-06-02 05:19:30 +0000657 skb_dst_set(skb, dst_clone(skb_dst(pkt)));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700658
659 /*
660 * Add our header
661 */
662
Jianjun Kongc354e122008-11-03 00:28:02 -0800663 igmp=(struct igmphdr *)skb_put(skb, sizeof(struct igmphdr));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700664 igmp->type =
665 msg->im_msgtype = assert;
666 igmp->code = 0;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700667 ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -0700668 skb->transport_header = skb->network_header;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900669 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700670
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000671 if (net->ipv4.mroute_sk == NULL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700672 kfree_skb(skb);
673 return -EINVAL;
674 }
675
676 /*
677 * Deliver to mrouted
678 */
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000679 ret = sock_queue_rcv_skb(net->ipv4.mroute_sk, skb);
Benjamin Thery70a269e2009-01-22 04:56:15 +0000680 if (ret < 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700681 if (net_ratelimit())
682 printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
683 kfree_skb(skb);
684 }
685
686 return ret;
687}
688
689/*
690 * Queue a packet for resolution. It gets locked cache entry!
691 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900692
Linus Torvalds1da177e2005-04-16 15:20:36 -0700693static int
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000694ipmr_cache_unresolved(struct net *net, vifi_t vifi, struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700695{
696 int err;
697 struct mfc_cache *c;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700698 const struct iphdr *iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700699
700 spin_lock_bh(&mfc_unres_lock);
701 for (c=mfc_unres_queue; c; c=c->next) {
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000702 if (net_eq(mfc_net(c), net) &&
Benjamin Thery1e8fb3b2009-01-22 04:56:19 +0000703 c->mfc_mcastgrp == iph->daddr &&
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700704 c->mfc_origin == iph->saddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700705 break;
706 }
707
708 if (c == NULL) {
709 /*
710 * Create a new entry if allowable
711 */
712
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000713 if (atomic_read(&net->ipv4.cache_resolve_queue_len) >= 10 ||
714 (c = ipmr_cache_alloc_unres(net)) == NULL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700715 spin_unlock_bh(&mfc_unres_lock);
716
717 kfree_skb(skb);
718 return -ENOBUFS;
719 }
720
721 /*
722 * Fill in the new cache entry
723 */
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700724 c->mfc_parent = -1;
725 c->mfc_origin = iph->saddr;
726 c->mfc_mcastgrp = iph->daddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700727
728 /*
729 * Reflect first query at mrouted.
730 */
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000731 err = ipmr_cache_report(net, skb, vifi, IGMPMSG_NOCACHE);
732 if (err < 0) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900733 /* If the report failed throw the cache entry
Linus Torvalds1da177e2005-04-16 15:20:36 -0700734 out - Brad Parker
735 */
736 spin_unlock_bh(&mfc_unres_lock);
737
Benjamin Thery5c0a66f2009-01-22 04:56:17 +0000738 ipmr_cache_free(c);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700739 kfree_skb(skb);
740 return err;
741 }
742
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000743 atomic_inc(&net->ipv4.cache_resolve_queue_len);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700744 c->next = mfc_unres_queue;
745 mfc_unres_queue = c;
746
747 mod_timer(&ipmr_expire_timer, c->mfc_un.unres.expires);
748 }
749
750 /*
751 * See if we can append the packet
752 */
753 if (c->mfc_un.unres.unresolved.qlen>3) {
754 kfree_skb(skb);
755 err = -ENOBUFS;
756 } else {
Jianjun Kongc354e122008-11-03 00:28:02 -0800757 skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700758 err = 0;
759 }
760
761 spin_unlock_bh(&mfc_unres_lock);
762 return err;
763}
764
765/*
766 * MFC cache manipulation by user space mroute daemon
767 */
768
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000769static int ipmr_mfc_delete(struct net *net, struct mfcctl *mfc)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700770{
771 int line;
772 struct mfc_cache *c, **cp;
773
Jianjun Kongc354e122008-11-03 00:28:02 -0800774 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700775
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000776 for (cp = &net->ipv4.mfc_cache_array[line];
Benjamin Thery2bb8b262009-01-22 04:56:18 +0000777 (c = *cp) != NULL; cp = &c->next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700778 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
779 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
780 write_lock_bh(&mrt_lock);
781 *cp = c->next;
782 write_unlock_bh(&mrt_lock);
783
Benjamin Thery5c0a66f2009-01-22 04:56:17 +0000784 ipmr_cache_free(c);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700785 return 0;
786 }
787 }
788 return -ENOENT;
789}
790
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000791static int ipmr_mfc_add(struct net *net, struct mfcctl *mfc, int mrtsock)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700792{
793 int line;
794 struct mfc_cache *uc, *c, **cp;
795
Jianjun Kongc354e122008-11-03 00:28:02 -0800796 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700797
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000798 for (cp = &net->ipv4.mfc_cache_array[line];
Benjamin Thery2bb8b262009-01-22 04:56:18 +0000799 (c = *cp) != NULL; cp = &c->next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700800 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
801 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr)
802 break;
803 }
804
805 if (c != NULL) {
806 write_lock_bh(&mrt_lock);
807 c->mfc_parent = mfc->mfcc_parent;
Baruch Evend1b04c02005-07-30 17:41:59 -0700808 ipmr_update_thresholds(c, mfc->mfcc_ttls);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700809 if (!mrtsock)
810 c->mfc_flags |= MFC_STATIC;
811 write_unlock_bh(&mrt_lock);
812 return 0;
813 }
814
Joe Perchesf97c1e02007-12-16 13:45:43 -0800815 if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700816 return -EINVAL;
817
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000818 c = ipmr_cache_alloc(net);
Jianjun Kongc354e122008-11-03 00:28:02 -0800819 if (c == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700820 return -ENOMEM;
821
Jianjun Kongc354e122008-11-03 00:28:02 -0800822 c->mfc_origin = mfc->mfcc_origin.s_addr;
823 c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
824 c->mfc_parent = mfc->mfcc_parent;
Baruch Evend1b04c02005-07-30 17:41:59 -0700825 ipmr_update_thresholds(c, mfc->mfcc_ttls);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700826 if (!mrtsock)
827 c->mfc_flags |= MFC_STATIC;
828
829 write_lock_bh(&mrt_lock);
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000830 c->next = net->ipv4.mfc_cache_array[line];
831 net->ipv4.mfc_cache_array[line] = c;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700832 write_unlock_bh(&mrt_lock);
833
834 /*
835 * Check to see if we resolved a queued list. If so we
836 * need to send on the frames and tidy up.
837 */
838 spin_lock_bh(&mfc_unres_lock);
839 for (cp = &mfc_unres_queue; (uc=*cp) != NULL;
840 cp = &uc->next) {
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000841 if (net_eq(mfc_net(uc), net) &&
Benjamin Thery1e8fb3b2009-01-22 04:56:19 +0000842 uc->mfc_origin == c->mfc_origin &&
Linus Torvalds1da177e2005-04-16 15:20:36 -0700843 uc->mfc_mcastgrp == c->mfc_mcastgrp) {
844 *cp = uc->next;
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000845 atomic_dec(&net->ipv4.cache_resolve_queue_len);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700846 break;
847 }
848 }
Benjamin Thery1e8fb3b2009-01-22 04:56:19 +0000849 if (mfc_unres_queue == NULL)
850 del_timer(&ipmr_expire_timer);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700851 spin_unlock_bh(&mfc_unres_lock);
852
853 if (uc) {
854 ipmr_cache_resolve(uc, c);
Benjamin Thery5c0a66f2009-01-22 04:56:17 +0000855 ipmr_cache_free(uc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700856 }
857 return 0;
858}
859
860/*
861 * Close the multicast socket, and clear the vif tables etc
862 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900863
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000864static void mroute_clean_tables(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700865{
866 int i;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900867
Linus Torvalds1da177e2005-04-16 15:20:36 -0700868 /*
869 * Shut down all active vif entries
870 */
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000871 for (i = 0; i < net->ipv4.maxvif; i++) {
872 if (!(net->ipv4.vif_table[i].flags&VIFF_STATIC))
873 vif_delete(net, i, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700874 }
875
876 /*
877 * Wipe the cache
878 */
Jianjun Kongc354e122008-11-03 00:28:02 -0800879 for (i=0; i<MFC_LINES; i++) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700880 struct mfc_cache *c, **cp;
881
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000882 cp = &net->ipv4.mfc_cache_array[i];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700883 while ((c = *cp) != NULL) {
884 if (c->mfc_flags&MFC_STATIC) {
885 cp = &c->next;
886 continue;
887 }
888 write_lock_bh(&mrt_lock);
889 *cp = c->next;
890 write_unlock_bh(&mrt_lock);
891
Benjamin Thery5c0a66f2009-01-22 04:56:17 +0000892 ipmr_cache_free(c);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700893 }
894 }
895
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000896 if (atomic_read(&net->ipv4.cache_resolve_queue_len) != 0) {
Benjamin Thery1e8fb3b2009-01-22 04:56:19 +0000897 struct mfc_cache *c, **cp;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700898
899 spin_lock_bh(&mfc_unres_lock);
Benjamin Thery1e8fb3b2009-01-22 04:56:19 +0000900 cp = &mfc_unres_queue;
901 while ((c = *cp) != NULL) {
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000902 if (!net_eq(mfc_net(c), net)) {
Benjamin Thery1e8fb3b2009-01-22 04:56:19 +0000903 cp = &c->next;
904 continue;
905 }
906 *cp = c->next;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700907
908 ipmr_destroy_unres(c);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700909 }
910 spin_unlock_bh(&mfc_unres_lock);
911 }
912}
913
914static void mrtsock_destruct(struct sock *sk)
915{
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000916 struct net *net = sock_net(sk);
917
Linus Torvalds1da177e2005-04-16 15:20:36 -0700918 rtnl_lock();
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000919 if (sk == net->ipv4.mroute_sk) {
920 IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700921
922 write_lock_bh(&mrt_lock);
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000923 net->ipv4.mroute_sk = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700924 write_unlock_bh(&mrt_lock);
925
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000926 mroute_clean_tables(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700927 }
928 rtnl_unlock();
929}
930
931/*
932 * Socket options and virtual interface manipulation. The whole
933 * virtual interface system is a complete heap, but unfortunately
934 * that's how BSD mrouted happens to think. Maybe one day with a proper
935 * MOSPF/PIM router set up we can clean this up.
936 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900937
Jianjun Kongc354e122008-11-03 00:28:02 -0800938int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int optlen)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700939{
940 int ret;
941 struct vifctl vif;
942 struct mfcctl mfc;
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000943 struct net *net = sock_net(sk);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900944
Stephen Hemminger132adf52007-03-08 20:44:43 -0800945 if (optname != MRT_INIT) {
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000946 if (sk != net->ipv4.mroute_sk && !capable(CAP_NET_ADMIN))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700947 return -EACCES;
948 }
949
Stephen Hemminger132adf52007-03-08 20:44:43 -0800950 switch (optname) {
951 case MRT_INIT:
952 if (sk->sk_type != SOCK_RAW ||
953 inet_sk(sk)->num != IPPROTO_IGMP)
954 return -EOPNOTSUPP;
Jianjun Kongc354e122008-11-03 00:28:02 -0800955 if (optlen != sizeof(int))
Stephen Hemminger132adf52007-03-08 20:44:43 -0800956 return -ENOPROTOOPT;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700957
Stephen Hemminger132adf52007-03-08 20:44:43 -0800958 rtnl_lock();
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000959 if (net->ipv4.mroute_sk) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700960 rtnl_unlock();
Stephen Hemminger132adf52007-03-08 20:44:43 -0800961 return -EADDRINUSE;
962 }
963
964 ret = ip_ra_control(sk, 1, mrtsock_destruct);
965 if (ret == 0) {
966 write_lock_bh(&mrt_lock);
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000967 net->ipv4.mroute_sk = sk;
Stephen Hemminger132adf52007-03-08 20:44:43 -0800968 write_unlock_bh(&mrt_lock);
969
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000970 IPV4_DEVCONF_ALL(net, MC_FORWARDING)++;
Stephen Hemminger132adf52007-03-08 20:44:43 -0800971 }
972 rtnl_unlock();
973 return ret;
974 case MRT_DONE:
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000975 if (sk != net->ipv4.mroute_sk)
Stephen Hemminger132adf52007-03-08 20:44:43 -0800976 return -EACCES;
977 return ip_ra_control(sk, 0, NULL);
978 case MRT_ADD_VIF:
979 case MRT_DEL_VIF:
Jianjun Kongc354e122008-11-03 00:28:02 -0800980 if (optlen != sizeof(vif))
Stephen Hemminger132adf52007-03-08 20:44:43 -0800981 return -EINVAL;
Jianjun Kongc354e122008-11-03 00:28:02 -0800982 if (copy_from_user(&vif, optval, sizeof(vif)))
Stephen Hemminger132adf52007-03-08 20:44:43 -0800983 return -EFAULT;
984 if (vif.vifc_vifi >= MAXVIFS)
985 return -ENFILE;
986 rtnl_lock();
Jianjun Kongc354e122008-11-03 00:28:02 -0800987 if (optname == MRT_ADD_VIF) {
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000988 ret = vif_add(net, &vif, sk == net->ipv4.mroute_sk);
Stephen Hemminger132adf52007-03-08 20:44:43 -0800989 } else {
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000990 ret = vif_delete(net, vif.vifc_vifi, 0);
Stephen Hemminger132adf52007-03-08 20:44:43 -0800991 }
992 rtnl_unlock();
993 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700994
995 /*
996 * Manipulate the forwarding caches. These live
997 * in a sort of kernel/user symbiosis.
998 */
Stephen Hemminger132adf52007-03-08 20:44:43 -0800999 case MRT_ADD_MFC:
1000 case MRT_DEL_MFC:
Jianjun Kongc354e122008-11-03 00:28:02 -08001001 if (optlen != sizeof(mfc))
Stephen Hemminger132adf52007-03-08 20:44:43 -08001002 return -EINVAL;
Jianjun Kongc354e122008-11-03 00:28:02 -08001003 if (copy_from_user(&mfc, optval, sizeof(mfc)))
Stephen Hemminger132adf52007-03-08 20:44:43 -08001004 return -EFAULT;
1005 rtnl_lock();
Jianjun Kongc354e122008-11-03 00:28:02 -08001006 if (optname == MRT_DEL_MFC)
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001007 ret = ipmr_mfc_delete(net, &mfc);
Stephen Hemminger132adf52007-03-08 20:44:43 -08001008 else
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001009 ret = ipmr_mfc_add(net, &mfc, sk == net->ipv4.mroute_sk);
Stephen Hemminger132adf52007-03-08 20:44:43 -08001010 rtnl_unlock();
1011 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001012 /*
1013 * Control PIM assert.
1014 */
Stephen Hemminger132adf52007-03-08 20:44:43 -08001015 case MRT_ASSERT:
1016 {
1017 int v;
1018 if (get_user(v,(int __user *)optval))
1019 return -EFAULT;
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001020 net->ipv4.mroute_do_assert = (v) ? 1 : 0;
Stephen Hemminger132adf52007-03-08 20:44:43 -08001021 return 0;
1022 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001023#ifdef CONFIG_IP_PIMSM
Stephen Hemminger132adf52007-03-08 20:44:43 -08001024 case MRT_PIM:
1025 {
Stephen Hemmingerba93ef72008-01-21 17:28:59 -08001026 int v;
1027
Stephen Hemminger132adf52007-03-08 20:44:43 -08001028 if (get_user(v,(int __user *)optval))
1029 return -EFAULT;
Stephen Hemmingerba93ef72008-01-21 17:28:59 -08001030 v = (v) ? 1 : 0;
1031
Stephen Hemminger132adf52007-03-08 20:44:43 -08001032 rtnl_lock();
1033 ret = 0;
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001034 if (v != net->ipv4.mroute_do_pim) {
1035 net->ipv4.mroute_do_pim = v;
1036 net->ipv4.mroute_do_assert = v;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001037 }
Stephen Hemminger132adf52007-03-08 20:44:43 -08001038 rtnl_unlock();
1039 return ret;
1040 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001041#endif
Stephen Hemminger132adf52007-03-08 20:44:43 -08001042 /*
1043 * Spurious command, or MRT_VERSION which you cannot
1044 * set.
1045 */
1046 default:
1047 return -ENOPROTOOPT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001048 }
1049}
1050
1051/*
1052 * Getsock opt support for the multicast routing system.
1053 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001054
Jianjun Kongc354e122008-11-03 00:28:02 -08001055int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001056{
1057 int olr;
1058 int val;
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001059 struct net *net = sock_net(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001060
Jianjun Kongc354e122008-11-03 00:28:02 -08001061 if (optname != MRT_VERSION &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001062#ifdef CONFIG_IP_PIMSM
1063 optname!=MRT_PIM &&
1064#endif
1065 optname!=MRT_ASSERT)
1066 return -ENOPROTOOPT;
1067
1068 if (get_user(olr, optlen))
1069 return -EFAULT;
1070
1071 olr = min_t(unsigned int, olr, sizeof(int));
1072 if (olr < 0)
1073 return -EINVAL;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001074
Jianjun Kongc354e122008-11-03 00:28:02 -08001075 if (put_user(olr, optlen))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001076 return -EFAULT;
Jianjun Kongc354e122008-11-03 00:28:02 -08001077 if (optname == MRT_VERSION)
1078 val = 0x0305;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001079#ifdef CONFIG_IP_PIMSM
Jianjun Kongc354e122008-11-03 00:28:02 -08001080 else if (optname == MRT_PIM)
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001081 val = net->ipv4.mroute_do_pim;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001082#endif
1083 else
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001084 val = net->ipv4.mroute_do_assert;
Jianjun Kongc354e122008-11-03 00:28:02 -08001085 if (copy_to_user(optval, &val, olr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001086 return -EFAULT;
1087 return 0;
1088}
1089
1090/*
1091 * The IP multicast ioctl support routines.
1092 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001093
Linus Torvalds1da177e2005-04-16 15:20:36 -07001094int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1095{
1096 struct sioc_sg_req sr;
1097 struct sioc_vif_req vr;
1098 struct vif_device *vif;
1099 struct mfc_cache *c;
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001100 struct net *net = sock_net(sk);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001101
Stephen Hemminger132adf52007-03-08 20:44:43 -08001102 switch (cmd) {
1103 case SIOCGETVIFCNT:
Jianjun Kongc354e122008-11-03 00:28:02 -08001104 if (copy_from_user(&vr, arg, sizeof(vr)))
Stephen Hemminger132adf52007-03-08 20:44:43 -08001105 return -EFAULT;
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001106 if (vr.vifi >= net->ipv4.maxvif)
Stephen Hemminger132adf52007-03-08 20:44:43 -08001107 return -EINVAL;
1108 read_lock(&mrt_lock);
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001109 vif = &net->ipv4.vif_table[vr.vifi];
1110 if (VIF_EXISTS(net, vr.vifi)) {
Jianjun Kongc354e122008-11-03 00:28:02 -08001111 vr.icount = vif->pkt_in;
1112 vr.ocount = vif->pkt_out;
1113 vr.ibytes = vif->bytes_in;
1114 vr.obytes = vif->bytes_out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001115 read_unlock(&mrt_lock);
Stephen Hemminger132adf52007-03-08 20:44:43 -08001116
Jianjun Kongc354e122008-11-03 00:28:02 -08001117 if (copy_to_user(arg, &vr, sizeof(vr)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001118 return -EFAULT;
Stephen Hemminger132adf52007-03-08 20:44:43 -08001119 return 0;
1120 }
1121 read_unlock(&mrt_lock);
1122 return -EADDRNOTAVAIL;
1123 case SIOCGETSGCNT:
Jianjun Kongc354e122008-11-03 00:28:02 -08001124 if (copy_from_user(&sr, arg, sizeof(sr)))
Stephen Hemminger132adf52007-03-08 20:44:43 -08001125 return -EFAULT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001126
Stephen Hemminger132adf52007-03-08 20:44:43 -08001127 read_lock(&mrt_lock);
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001128 c = ipmr_cache_find(net, sr.src.s_addr, sr.grp.s_addr);
Stephen Hemminger132adf52007-03-08 20:44:43 -08001129 if (c) {
1130 sr.pktcnt = c->mfc_un.res.pkt;
1131 sr.bytecnt = c->mfc_un.res.bytes;
1132 sr.wrong_if = c->mfc_un.res.wrong_if;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001133 read_unlock(&mrt_lock);
Stephen Hemminger132adf52007-03-08 20:44:43 -08001134
Jianjun Kongc354e122008-11-03 00:28:02 -08001135 if (copy_to_user(arg, &sr, sizeof(sr)))
Stephen Hemminger132adf52007-03-08 20:44:43 -08001136 return -EFAULT;
1137 return 0;
1138 }
1139 read_unlock(&mrt_lock);
1140 return -EADDRNOTAVAIL;
1141 default:
1142 return -ENOIOCTLCMD;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001143 }
1144}
1145
1146
1147static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1148{
Eric W. Biedermane9dc8652007-09-12 13:02:17 +02001149 struct net_device *dev = ptr;
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001150 struct net *net = dev_net(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001151 struct vif_device *v;
1152 int ct;
Eric W. Biedermane9dc8652007-09-12 13:02:17 +02001153
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001154 if (!net_eq(dev_net(dev), net))
Eric W. Biedermane9dc8652007-09-12 13:02:17 +02001155 return NOTIFY_DONE;
1156
Linus Torvalds1da177e2005-04-16 15:20:36 -07001157 if (event != NETDEV_UNREGISTER)
1158 return NOTIFY_DONE;
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001159 v = &net->ipv4.vif_table[0];
1160 for (ct = 0; ct < net->ipv4.maxvif; ct++, v++) {
Jianjun Kongc354e122008-11-03 00:28:02 -08001161 if (v->dev == dev)
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001162 vif_delete(net, ct, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001163 }
1164 return NOTIFY_DONE;
1165}
1166
1167
Jianjun Kongc354e122008-11-03 00:28:02 -08001168static struct notifier_block ip_mr_notifier = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001169 .notifier_call = ipmr_device_event,
1170};
1171
1172/*
1173 * Encapsulate a packet by attaching a valid IPIP header to it.
1174 * This avoids tunnel drivers and other mess and gives us the speed so
1175 * important for multicast video.
1176 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001177
Al Viro114c7842006-09-27 18:39:29 -07001178static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001179{
Arnaldo Carvalho de Melo8856dfa2007-03-10 19:40:39 -03001180 struct iphdr *iph;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001181 struct iphdr *old_iph = ip_hdr(skb);
Arnaldo Carvalho de Melo8856dfa2007-03-10 19:40:39 -03001182
1183 skb_push(skb, sizeof(struct iphdr));
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -07001184 skb->transport_header = skb->network_header;
Arnaldo Carvalho de Melo8856dfa2007-03-10 19:40:39 -03001185 skb_reset_network_header(skb);
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001186 iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001187
1188 iph->version = 4;
Arnaldo Carvalho de Meloe023dd62007-03-12 20:09:36 -03001189 iph->tos = old_iph->tos;
1190 iph->ttl = old_iph->ttl;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001191 iph->frag_off = 0;
1192 iph->daddr = daddr;
1193 iph->saddr = saddr;
1194 iph->protocol = IPPROTO_IPIP;
1195 iph->ihl = 5;
1196 iph->tot_len = htons(skb->len);
Eric Dumazetadf30902009-06-02 05:19:30 +00001197 ip_select_ident(iph, skb_dst(skb), NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001198 ip_send_check(iph);
1199
Linus Torvalds1da177e2005-04-16 15:20:36 -07001200 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1201 nf_reset(skb);
1202}
1203
1204static inline int ipmr_forward_finish(struct sk_buff *skb)
1205{
1206 struct ip_options * opt = &(IPCB(skb)->opt);
1207
Eric Dumazetadf30902009-06-02 05:19:30 +00001208 IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001209
1210 if (unlikely(opt->optlen))
1211 ip_forward_options(skb);
1212
1213 return dst_output(skb);
1214}
1215
1216/*
1217 * Processing handlers for ipmr_forward
1218 */
1219
1220static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
1221{
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001222 struct net *net = mfc_net(c);
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001223 const struct iphdr *iph = ip_hdr(skb);
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001224 struct vif_device *vif = &net->ipv4.vif_table[vifi];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001225 struct net_device *dev;
1226 struct rtable *rt;
1227 int encap = 0;
1228
1229 if (vif->dev == NULL)
1230 goto out_free;
1231
1232#ifdef CONFIG_IP_PIMSM
1233 if (vif->flags & VIFF_REGISTER) {
1234 vif->pkt_out++;
Jianjun Kongc354e122008-11-03 00:28:02 -08001235 vif->bytes_out += skb->len;
Pavel Emelyanovcf3677a2008-05-21 14:17:33 -07001236 vif->dev->stats.tx_bytes += skb->len;
1237 vif->dev->stats.tx_packets++;
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001238 ipmr_cache_report(net, skb, vifi, IGMPMSG_WHOLEPKT);
Ilpo Järvinen69ebbf52009-02-06 23:46:51 -08001239 goto out_free;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001240 }
1241#endif
1242
1243 if (vif->flags&VIFF_TUNNEL) {
1244 struct flowi fl = { .oif = vif->link,
1245 .nl_u = { .ip4_u =
1246 { .daddr = vif->remote,
1247 .saddr = vif->local,
1248 .tos = RT_TOS(iph->tos) } },
1249 .proto = IPPROTO_IPIP };
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001250 if (ip_route_output_key(net, &rt, &fl))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001251 goto out_free;
1252 encap = sizeof(struct iphdr);
1253 } else {
1254 struct flowi fl = { .oif = vif->link,
1255 .nl_u = { .ip4_u =
1256 { .daddr = iph->daddr,
1257 .tos = RT_TOS(iph->tos) } },
1258 .proto = IPPROTO_IPIP };
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001259 if (ip_route_output_key(net, &rt, &fl))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001260 goto out_free;
1261 }
1262
1263 dev = rt->u.dst.dev;
1264
1265 if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) {
1266 /* Do not fragment multicasts. Alas, IPv4 does not
1267 allow to send ICMP, so that packets will disappear
1268 to blackhole.
1269 */
1270
Pavel Emelyanov7c73a6f2008-07-16 20:20:11 -07001271 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001272 ip_rt_put(rt);
1273 goto out_free;
1274 }
1275
1276 encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len;
1277
1278 if (skb_cow(skb, encap)) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001279 ip_rt_put(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001280 goto out_free;
1281 }
1282
1283 vif->pkt_out++;
Jianjun Kongc354e122008-11-03 00:28:02 -08001284 vif->bytes_out += skb->len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001285
Eric Dumazetadf30902009-06-02 05:19:30 +00001286 skb_dst_drop(skb);
1287 skb_dst_set(skb, &rt->u.dst);
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001288 ip_decrease_ttl(ip_hdr(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001289
1290 /* FIXME: forward and output firewalls used to be called here.
1291 * What do we do with netfilter? -- RR */
1292 if (vif->flags & VIFF_TUNNEL) {
1293 ip_encap(skb, vif->local, vif->remote);
1294 /* FIXME: extra output firewall step used to be here. --RR */
Pavel Emelyanov2f4c02d2008-05-21 14:16:14 -07001295 vif->dev->stats.tx_packets++;
1296 vif->dev->stats.tx_bytes += skb->len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001297 }
1298
1299 IPCB(skb)->flags |= IPSKB_FORWARDED;
1300
1301 /*
1302 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1303 * not only before forwarding, but after forwarding on all output
1304 * interfaces. It is clear, if mrouter runs a multicasting
1305 * program, it should receive packets not depending to what interface
1306 * program is joined.
1307 * If we will not make it, the program will have to join on all
1308 * interfaces. On the other hand, multihoming host (or router, but
1309 * not mrouter) cannot join to more than one interface - it will
1310 * result in receiving multiple packets.
1311 */
Patrick McHardy6e23ae22007-11-19 18:53:30 -08001312 NF_HOOK(PF_INET, NF_INET_FORWARD, skb, skb->dev, dev,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001313 ipmr_forward_finish);
1314 return;
1315
1316out_free:
1317 kfree_skb(skb);
1318 return;
1319}
1320
1321static int ipmr_find_vif(struct net_device *dev)
1322{
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001323 struct net *net = dev_net(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001324 int ct;
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001325 for (ct = net->ipv4.maxvif-1; ct >= 0; ct--) {
1326 if (net->ipv4.vif_table[ct].dev == dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001327 break;
1328 }
1329 return ct;
1330}
1331
1332/* "local" means that we should preserve one skb (for local delivery) */
1333
1334static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local)
1335{
1336 int psend = -1;
1337 int vif, ct;
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001338 struct net *net = mfc_net(cache);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001339
1340 vif = cache->mfc_parent;
1341 cache->mfc_un.res.pkt++;
1342 cache->mfc_un.res.bytes += skb->len;
1343
1344 /*
1345 * Wrong interface: drop packet and (maybe) send PIM assert.
1346 */
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001347 if (net->ipv4.vif_table[vif].dev != skb->dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001348 int true_vifi;
1349
Eric Dumazet511c3f92009-06-02 05:14:27 +00001350 if (skb_rtable(skb)->fl.iif == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001351 /* It is our own packet, looped back.
1352 Very complicated situation...
1353
1354 The best workaround until routing daemons will be
1355 fixed is not to redistribute packet, if it was
1356 send through wrong interface. It means, that
1357 multicast applications WILL NOT work for
1358 (S,G), which have default multicast route pointing
1359 to wrong oif. In any case, it is not a good
1360 idea to use multicasting applications on router.
1361 */
1362 goto dont_forward;
1363 }
1364
1365 cache->mfc_un.res.wrong_if++;
1366 true_vifi = ipmr_find_vif(skb->dev);
1367
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001368 if (true_vifi >= 0 && net->ipv4.mroute_do_assert &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001369 /* pimsm uses asserts, when switching from RPT to SPT,
1370 so that we cannot check that packet arrived on an oif.
1371 It is bad, but otherwise we would need to move pretty
1372 large chunk of pimd to kernel. Ough... --ANK
1373 */
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001374 (net->ipv4.mroute_do_pim ||
Benjamin Thery6f9374a2009-01-22 04:56:20 +00001375 cache->mfc_un.res.ttls[true_vifi] < 255) &&
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001376 time_after(jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001377 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1378 cache->mfc_un.res.last_assert = jiffies;
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001379 ipmr_cache_report(net, skb, true_vifi, IGMPMSG_WRONGVIF);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001380 }
1381 goto dont_forward;
1382 }
1383
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001384 net->ipv4.vif_table[vif].pkt_in++;
1385 net->ipv4.vif_table[vif].bytes_in += skb->len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001386
1387 /*
1388 * Forward the frame
1389 */
1390 for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001391 if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001392 if (psend != -1) {
1393 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1394 if (skb2)
1395 ipmr_queue_xmit(skb2, cache, psend);
1396 }
Jianjun Kongc354e122008-11-03 00:28:02 -08001397 psend = ct;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001398 }
1399 }
1400 if (psend != -1) {
1401 if (local) {
1402 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1403 if (skb2)
1404 ipmr_queue_xmit(skb2, cache, psend);
1405 } else {
1406 ipmr_queue_xmit(skb, cache, psend);
1407 return 0;
1408 }
1409 }
1410
1411dont_forward:
1412 if (!local)
1413 kfree_skb(skb);
1414 return 0;
1415}
1416
1417
1418/*
1419 * Multicast packets for forwarding arrive here
1420 */
1421
1422int ip_mr_input(struct sk_buff *skb)
1423{
1424 struct mfc_cache *cache;
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001425 struct net *net = dev_net(skb->dev);
Eric Dumazet511c3f92009-06-02 05:14:27 +00001426 int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001427
1428 /* Packet is looped back after forward, it should not be
1429 forwarded second time, but still can be delivered locally.
1430 */
1431 if (IPCB(skb)->flags&IPSKB_FORWARDED)
1432 goto dont_forward;
1433
1434 if (!local) {
1435 if (IPCB(skb)->opt.router_alert) {
1436 if (ip_call_ra_chain(skb))
1437 return 0;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001438 } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP){
Linus Torvalds1da177e2005-04-16 15:20:36 -07001439 /* IGMPv1 (and broken IGMPv2 implementations sort of
1440 Cisco IOS <= 11.2(8)) do not put router alert
1441 option to IGMP packets destined to routable
1442 groups. It is very bad, because it means
1443 that we can forward NO IGMP messages.
1444 */
1445 read_lock(&mrt_lock);
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001446 if (net->ipv4.mroute_sk) {
Patrick McHardy2715bcf2005-06-21 14:06:24 -07001447 nf_reset(skb);
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001448 raw_rcv(net->ipv4.mroute_sk, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001449 read_unlock(&mrt_lock);
1450 return 0;
1451 }
1452 read_unlock(&mrt_lock);
1453 }
1454 }
1455
1456 read_lock(&mrt_lock);
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001457 cache = ipmr_cache_find(net, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001458
1459 /*
1460 * No usable cache entry
1461 */
Jianjun Kongc354e122008-11-03 00:28:02 -08001462 if (cache == NULL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001463 int vif;
1464
1465 if (local) {
1466 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1467 ip_local_deliver(skb);
1468 if (skb2 == NULL) {
1469 read_unlock(&mrt_lock);
1470 return -ENOBUFS;
1471 }
1472 skb = skb2;
1473 }
1474
1475 vif = ipmr_find_vif(skb->dev);
1476 if (vif >= 0) {
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001477 int err = ipmr_cache_unresolved(net, vif, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001478 read_unlock(&mrt_lock);
1479
1480 return err;
1481 }
1482 read_unlock(&mrt_lock);
1483 kfree_skb(skb);
1484 return -ENODEV;
1485 }
1486
1487 ip_mr_forward(skb, cache, local);
1488
1489 read_unlock(&mrt_lock);
1490
1491 if (local)
1492 return ip_local_deliver(skb);
1493
1494 return 0;
1495
1496dont_forward:
1497 if (local)
1498 return ip_local_deliver(skb);
1499 kfree_skb(skb);
1500 return 0;
1501}
1502
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001503#ifdef CONFIG_IP_PIMSM
1504static int __pim_rcv(struct sk_buff *skb, unsigned int pimlen)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001505{
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001506 struct net_device *reg_dev = NULL;
1507 struct iphdr *encap;
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001508 struct net *net = dev_net(skb->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001509
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001510 encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001511 /*
1512 Check that:
1513 a. packet is really destinted to a multicast group
1514 b. packet is not a NULL-REGISTER
1515 c. packet is not truncated
1516 */
Joe Perchesf97c1e02007-12-16 13:45:43 -08001517 if (!ipv4_is_multicast(encap->daddr) ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07001518 encap->tot_len == 0 ||
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001519 ntohs(encap->tot_len) + pimlen > skb->len)
1520 return 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001521
1522 read_lock(&mrt_lock);
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001523 if (net->ipv4.mroute_reg_vif_num >= 0)
1524 reg_dev = net->ipv4.vif_table[net->ipv4.mroute_reg_vif_num].dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001525 if (reg_dev)
1526 dev_hold(reg_dev);
1527 read_unlock(&mrt_lock);
1528
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001529 if (reg_dev == NULL)
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001530 return 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001531
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -07001532 skb->mac_header = skb->network_header;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001533 skb_pull(skb, (u8*)encap - skb->data);
Arnaldo Carvalho de Melo31c77112007-03-10 19:04:55 -03001534 skb_reset_network_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001535 skb->dev = reg_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001536 skb->protocol = htons(ETH_P_IP);
1537 skb->ip_summed = 0;
1538 skb->pkt_type = PACKET_HOST;
Eric Dumazetadf30902009-06-02 05:19:30 +00001539 skb_dst_drop(skb);
Pavel Emelyanovcf3677a2008-05-21 14:17:33 -07001540 reg_dev->stats.rx_bytes += skb->len;
1541 reg_dev->stats.rx_packets++;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001542 nf_reset(skb);
1543 netif_rx(skb);
1544 dev_put(reg_dev);
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001545
Linus Torvalds1da177e2005-04-16 15:20:36 -07001546 return 0;
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001547}
1548#endif
1549
1550#ifdef CONFIG_IP_PIMSM_V1
1551/*
1552 * Handle IGMP messages of PIMv1
1553 */
1554
1555int pim_rcv_v1(struct sk_buff * skb)
1556{
1557 struct igmphdr *pim;
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001558 struct net *net = dev_net(skb->dev);
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001559
1560 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1561 goto drop;
1562
1563 pim = igmp_hdr(skb);
1564
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001565 if (!net->ipv4.mroute_do_pim ||
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001566 pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
1567 goto drop;
1568
1569 if (__pim_rcv(skb, sizeof(*pim))) {
1570drop:
1571 kfree_skb(skb);
1572 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001573 return 0;
1574}
1575#endif
1576
1577#ifdef CONFIG_IP_PIMSM_V2
1578static int pim_rcv(struct sk_buff * skb)
1579{
1580 struct pimreghdr *pim;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001581
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001582 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001583 goto drop;
1584
Arnaldo Carvalho de Melo9c702202007-04-25 18:04:18 -07001585 pim = (struct pimreghdr *)skb_transport_header(skb);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001586 if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07001587 (pim->flags&PIM_NULL_REGISTER) ||
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001588 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
Al Virod3bc23e2006-11-14 21:24:49 -08001589 csum_fold(skb_checksum(skb, 0, skb->len, 0))))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001590 goto drop;
1591
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001592 if (__pim_rcv(skb, sizeof(*pim))) {
1593drop:
1594 kfree_skb(skb);
1595 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001596 return 0;
1597}
1598#endif
1599
1600static int
1601ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm)
1602{
1603 int ct;
1604 struct rtnexthop *nhp;
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001605 struct net *net = mfc_net(c);
1606 struct net_device *dev = net->ipv4.vif_table[c->mfc_parent].dev;
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001607 u8 *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001608 struct rtattr *mp_head;
1609
1610 if (dev)
1611 RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex);
1612
Jianjun Kongc354e122008-11-03 00:28:02 -08001613 mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001614
1615 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1616 if (c->mfc_un.res.ttls[ct] < 255) {
1617 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1618 goto rtattr_failure;
Jianjun Kongc354e122008-11-03 00:28:02 -08001619 nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001620 nhp->rtnh_flags = 0;
1621 nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001622 nhp->rtnh_ifindex = net->ipv4.vif_table[ct].dev->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001623 nhp->rtnh_len = sizeof(*nhp);
1624 }
1625 }
1626 mp_head->rta_type = RTA_MULTIPATH;
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001627 mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001628 rtm->rtm_type = RTN_MULTICAST;
1629 return 1;
1630
1631rtattr_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001632 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001633 return -EMSGSIZE;
1634}
1635
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001636int ipmr_get_route(struct net *net,
1637 struct sk_buff *skb, struct rtmsg *rtm, int nowait)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001638{
1639 int err;
1640 struct mfc_cache *cache;
Eric Dumazet511c3f92009-06-02 05:14:27 +00001641 struct rtable *rt = skb_rtable(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001642
1643 read_lock(&mrt_lock);
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001644 cache = ipmr_cache_find(net, rt->rt_src, rt->rt_dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001645
Jianjun Kongc354e122008-11-03 00:28:02 -08001646 if (cache == NULL) {
Alexey Kuznetsov72287492006-07-25 16:45:12 -07001647 struct sk_buff *skb2;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001648 struct iphdr *iph;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001649 struct net_device *dev;
1650 int vif;
1651
1652 if (nowait) {
1653 read_unlock(&mrt_lock);
1654 return -EAGAIN;
1655 }
1656
1657 dev = skb->dev;
1658 if (dev == NULL || (vif = ipmr_find_vif(dev)) < 0) {
1659 read_unlock(&mrt_lock);
1660 return -ENODEV;
1661 }
Alexey Kuznetsov72287492006-07-25 16:45:12 -07001662 skb2 = skb_clone(skb, GFP_ATOMIC);
1663 if (!skb2) {
1664 read_unlock(&mrt_lock);
1665 return -ENOMEM;
1666 }
1667
Arnaldo Carvalho de Meloe2d1bca2007-04-10 20:46:21 -07001668 skb_push(skb2, sizeof(struct iphdr));
1669 skb_reset_network_header(skb2);
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001670 iph = ip_hdr(skb2);
1671 iph->ihl = sizeof(struct iphdr) >> 2;
1672 iph->saddr = rt->rt_src;
1673 iph->daddr = rt->rt_dst;
1674 iph->version = 0;
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001675 err = ipmr_cache_unresolved(net, vif, skb2);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001676 read_unlock(&mrt_lock);
1677 return err;
1678 }
1679
1680 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1681 cache->mfc_flags |= MFC_NOTIFY;
1682 err = ipmr_fill_mroute(skb, cache, rtm);
1683 read_unlock(&mrt_lock);
1684 return err;
1685}
1686
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001687#ifdef CONFIG_PROC_FS
Linus Torvalds1da177e2005-04-16 15:20:36 -07001688/*
1689 * The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
1690 */
1691struct ipmr_vif_iter {
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001692 struct seq_net_private p;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001693 int ct;
1694};
1695
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001696static struct vif_device *ipmr_vif_seq_idx(struct net *net,
1697 struct ipmr_vif_iter *iter,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001698 loff_t pos)
1699{
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001700 for (iter->ct = 0; iter->ct < net->ipv4.maxvif; ++iter->ct) {
1701 if (!VIF_EXISTS(net, iter->ct))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001702 continue;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001703 if (pos-- == 0)
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001704 return &net->ipv4.vif_table[iter->ct];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001705 }
1706 return NULL;
1707}
1708
1709static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
Stephen Hemmingerba93ef72008-01-21 17:28:59 -08001710 __acquires(mrt_lock)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001711{
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001712 struct net *net = seq_file_net(seq);
1713
Linus Torvalds1da177e2005-04-16 15:20:36 -07001714 read_lock(&mrt_lock);
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001715 return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001716 : SEQ_START_TOKEN;
1717}
1718
1719static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1720{
1721 struct ipmr_vif_iter *iter = seq->private;
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001722 struct net *net = seq_file_net(seq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001723
1724 ++*pos;
1725 if (v == SEQ_START_TOKEN)
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001726 return ipmr_vif_seq_idx(net, iter, 0);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001727
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001728 while (++iter->ct < net->ipv4.maxvif) {
1729 if (!VIF_EXISTS(net, iter->ct))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001730 continue;
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001731 return &net->ipv4.vif_table[iter->ct];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001732 }
1733 return NULL;
1734}
1735
1736static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
Stephen Hemmingerba93ef72008-01-21 17:28:59 -08001737 __releases(mrt_lock)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001738{
1739 read_unlock(&mrt_lock);
1740}
1741
1742static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
1743{
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001744 struct net *net = seq_file_net(seq);
1745
Linus Torvalds1da177e2005-04-16 15:20:36 -07001746 if (v == SEQ_START_TOKEN) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001747 seq_puts(seq,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001748 "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n");
1749 } else {
1750 const struct vif_device *vif = v;
1751 const char *name = vif->dev ? vif->dev->name : "none";
1752
1753 seq_printf(seq,
1754 "%2Zd %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n",
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001755 vif - net->ipv4.vif_table,
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001756 name, vif->bytes_in, vif->pkt_in,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001757 vif->bytes_out, vif->pkt_out,
1758 vif->flags, vif->local, vif->remote);
1759 }
1760 return 0;
1761}
1762
Stephen Hemmingerf6908082007-03-12 14:34:29 -07001763static const struct seq_operations ipmr_vif_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001764 .start = ipmr_vif_seq_start,
1765 .next = ipmr_vif_seq_next,
1766 .stop = ipmr_vif_seq_stop,
1767 .show = ipmr_vif_seq_show,
1768};
1769
1770static int ipmr_vif_open(struct inode *inode, struct file *file)
1771{
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001772 return seq_open_net(inode, file, &ipmr_vif_seq_ops,
1773 sizeof(struct ipmr_vif_iter));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001774}
1775
Arjan van de Ven9a321442007-02-12 00:55:35 -08001776static const struct file_operations ipmr_vif_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001777 .owner = THIS_MODULE,
1778 .open = ipmr_vif_open,
1779 .read = seq_read,
1780 .llseek = seq_lseek,
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001781 .release = seq_release_net,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001782};
1783
1784struct ipmr_mfc_iter {
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001785 struct seq_net_private p;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001786 struct mfc_cache **cache;
1787 int ct;
1788};
1789
1790
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001791static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
1792 struct ipmr_mfc_iter *it, loff_t pos)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001793{
1794 struct mfc_cache *mfc;
1795
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001796 it->cache = net->ipv4.mfc_cache_array;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001797 read_lock(&mrt_lock);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001798 for (it->ct = 0; it->ct < MFC_LINES; it->ct++)
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001799 for (mfc = net->ipv4.mfc_cache_array[it->ct];
Benjamin Thery2bb8b262009-01-22 04:56:18 +00001800 mfc; mfc = mfc->next)
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001801 if (pos-- == 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001802 return mfc;
1803 read_unlock(&mrt_lock);
1804
1805 it->cache = &mfc_unres_queue;
1806 spin_lock_bh(&mfc_unres_lock);
Stephen Hemminger132adf52007-03-08 20:44:43 -08001807 for (mfc = mfc_unres_queue; mfc; mfc = mfc->next)
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001808 if (net_eq(mfc_net(mfc), net) &&
1809 pos-- == 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001810 return mfc;
1811 spin_unlock_bh(&mfc_unres_lock);
1812
1813 it->cache = NULL;
1814 return NULL;
1815}
1816
1817
1818static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
1819{
1820 struct ipmr_mfc_iter *it = seq->private;
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001821 struct net *net = seq_file_net(seq);
1822
Linus Torvalds1da177e2005-04-16 15:20:36 -07001823 it->cache = NULL;
1824 it->ct = 0;
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001825 return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001826 : SEQ_START_TOKEN;
1827}
1828
1829static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1830{
1831 struct mfc_cache *mfc = v;
1832 struct ipmr_mfc_iter *it = seq->private;
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001833 struct net *net = seq_file_net(seq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001834
1835 ++*pos;
1836
1837 if (v == SEQ_START_TOKEN)
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001838 return ipmr_mfc_seq_idx(net, seq->private, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001839
1840 if (mfc->next)
1841 return mfc->next;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001842
1843 if (it->cache == &mfc_unres_queue)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001844 goto end_of_list;
1845
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001846 BUG_ON(it->cache != net->ipv4.mfc_cache_array);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001847
1848 while (++it->ct < MFC_LINES) {
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001849 mfc = net->ipv4.mfc_cache_array[it->ct];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001850 if (mfc)
1851 return mfc;
1852 }
1853
1854 /* exhausted cache_array, show unresolved */
1855 read_unlock(&mrt_lock);
1856 it->cache = &mfc_unres_queue;
1857 it->ct = 0;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001858
Linus Torvalds1da177e2005-04-16 15:20:36 -07001859 spin_lock_bh(&mfc_unres_lock);
1860 mfc = mfc_unres_queue;
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001861 while (mfc && !net_eq(mfc_net(mfc), net))
1862 mfc = mfc->next;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001863 if (mfc)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001864 return mfc;
1865
1866 end_of_list:
1867 spin_unlock_bh(&mfc_unres_lock);
1868 it->cache = NULL;
1869
1870 return NULL;
1871}
1872
1873static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
1874{
1875 struct ipmr_mfc_iter *it = seq->private;
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001876 struct net *net = seq_file_net(seq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001877
1878 if (it->cache == &mfc_unres_queue)
1879 spin_unlock_bh(&mfc_unres_lock);
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001880 else if (it->cache == net->ipv4.mfc_cache_array)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001881 read_unlock(&mrt_lock);
1882}
1883
1884static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
1885{
1886 int n;
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001887 struct net *net = seq_file_net(seq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001888
1889 if (v == SEQ_START_TOKEN) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001890 seq_puts(seq,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001891 "Group Origin Iif Pkts Bytes Wrong Oifs\n");
1892 } else {
1893 const struct mfc_cache *mfc = v;
1894 const struct ipmr_mfc_iter *it = seq->private;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001895
Benjamin Thery999890b2008-12-03 22:22:16 -08001896 seq_printf(seq, "%08lX %08lX %-3hd",
Linus Torvalds1da177e2005-04-16 15:20:36 -07001897 (unsigned long) mfc->mfc_mcastgrp,
1898 (unsigned long) mfc->mfc_origin,
Benjamin Thery1ea472e2008-12-03 22:21:47 -08001899 mfc->mfc_parent);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001900
1901 if (it->cache != &mfc_unres_queue) {
Benjamin Thery1ea472e2008-12-03 22:21:47 -08001902 seq_printf(seq, " %8lu %8lu %8lu",
1903 mfc->mfc_un.res.pkt,
1904 mfc->mfc_un.res.bytes,
1905 mfc->mfc_un.res.wrong_if);
Stephen Hemminger132adf52007-03-08 20:44:43 -08001906 for (n = mfc->mfc_un.res.minvif;
1907 n < mfc->mfc_un.res.maxvif; n++ ) {
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001908 if (VIF_EXISTS(net, n) &&
Benjamin Therycf958ae32009-01-22 04:56:16 +00001909 mfc->mfc_un.res.ttls[n] < 255)
1910 seq_printf(seq,
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001911 " %2d:%-3d",
Linus Torvalds1da177e2005-04-16 15:20:36 -07001912 n, mfc->mfc_un.res.ttls[n]);
1913 }
Benjamin Thery1ea472e2008-12-03 22:21:47 -08001914 } else {
1915 /* unresolved mfc_caches don't contain
1916 * pkt, bytes and wrong_if values
1917 */
1918 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001919 }
1920 seq_putc(seq, '\n');
1921 }
1922 return 0;
1923}
1924
Stephen Hemmingerf6908082007-03-12 14:34:29 -07001925static const struct seq_operations ipmr_mfc_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001926 .start = ipmr_mfc_seq_start,
1927 .next = ipmr_mfc_seq_next,
1928 .stop = ipmr_mfc_seq_stop,
1929 .show = ipmr_mfc_seq_show,
1930};
1931
1932static int ipmr_mfc_open(struct inode *inode, struct file *file)
1933{
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001934 return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
1935 sizeof(struct ipmr_mfc_iter));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001936}
1937
Arjan van de Ven9a321442007-02-12 00:55:35 -08001938static const struct file_operations ipmr_mfc_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001939 .owner = THIS_MODULE,
1940 .open = ipmr_mfc_open,
1941 .read = seq_read,
1942 .llseek = seq_lseek,
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001943 .release = seq_release_net,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001944};
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001945#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001946
1947#ifdef CONFIG_IP_PIMSM_V2
1948static struct net_protocol pim_protocol = {
1949 .handler = pim_rcv,
Tom Goff403dbb92009-06-14 03:16:13 -07001950 .netns_ok = 1,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001951};
1952#endif
1953
1954
1955/*
1956 * Setup for IP multicast routing
1957 */
Benjamin Therycf958ae32009-01-22 04:56:16 +00001958static int __net_init ipmr_net_init(struct net *net)
1959{
1960 int err = 0;
1961
1962 net->ipv4.vif_table = kcalloc(MAXVIFS, sizeof(struct vif_device),
1963 GFP_KERNEL);
1964 if (!net->ipv4.vif_table) {
1965 err = -ENOMEM;
1966 goto fail;
1967 }
Benjamin Thery2bb8b262009-01-22 04:56:18 +00001968
1969 /* Forwarding cache */
1970 net->ipv4.mfc_cache_array = kcalloc(MFC_LINES,
1971 sizeof(struct mfc_cache *),
1972 GFP_KERNEL);
1973 if (!net->ipv4.mfc_cache_array) {
1974 err = -ENOMEM;
1975 goto fail_mfc_cache;
1976 }
Benjamin Thery6c5143d2009-01-22 04:56:21 +00001977
1978#ifdef CONFIG_IP_PIMSM
1979 net->ipv4.mroute_reg_vif_num = -1;
1980#endif
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001981
1982#ifdef CONFIG_PROC_FS
1983 err = -ENOMEM;
1984 if (!proc_net_fops_create(net, "ip_mr_vif", 0, &ipmr_vif_fops))
1985 goto proc_vif_fail;
1986 if (!proc_net_fops_create(net, "ip_mr_cache", 0, &ipmr_mfc_fops))
1987 goto proc_cache_fail;
1988#endif
Benjamin Thery2bb8b262009-01-22 04:56:18 +00001989 return 0;
1990
Benjamin Theryf6bb4512009-01-22 04:56:22 +00001991#ifdef CONFIG_PROC_FS
1992proc_cache_fail:
1993 proc_net_remove(net, "ip_mr_vif");
1994proc_vif_fail:
1995 kfree(net->ipv4.mfc_cache_array);
1996#endif
Benjamin Thery2bb8b262009-01-22 04:56:18 +00001997fail_mfc_cache:
1998 kfree(net->ipv4.vif_table);
Benjamin Therycf958ae32009-01-22 04:56:16 +00001999fail:
2000 return err;
2001}
2002
2003static void __net_exit ipmr_net_exit(struct net *net)
2004{
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002005#ifdef CONFIG_PROC_FS
2006 proc_net_remove(net, "ip_mr_cache");
2007 proc_net_remove(net, "ip_mr_vif");
2008#endif
Benjamin Thery2bb8b262009-01-22 04:56:18 +00002009 kfree(net->ipv4.mfc_cache_array);
Benjamin Therycf958ae32009-01-22 04:56:16 +00002010 kfree(net->ipv4.vif_table);
2011}
2012
2013static struct pernet_operations ipmr_net_ops = {
2014 .init = ipmr_net_init,
2015 .exit = ipmr_net_exit,
2016};
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002017
Wang Chen03d2f892008-07-03 12:13:36 +08002018int __init ip_mr_init(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002019{
Wang Chen03d2f892008-07-03 12:13:36 +08002020 int err;
2021
Linus Torvalds1da177e2005-04-16 15:20:36 -07002022 mrt_cachep = kmem_cache_create("ip_mrt_cache",
2023 sizeof(struct mfc_cache),
Alexey Dobriyane5d679f332006-08-26 19:25:52 -07002024 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
Paul Mundt20c2df82007-07-20 10:11:58 +09002025 NULL);
Wang Chen03d2f892008-07-03 12:13:36 +08002026 if (!mrt_cachep)
2027 return -ENOMEM;
2028
Benjamin Therycf958ae32009-01-22 04:56:16 +00002029 err = register_pernet_subsys(&ipmr_net_ops);
2030 if (err)
2031 goto reg_pernet_fail;
2032
Pavel Emelyanovb24b8a22008-01-23 21:20:07 -08002033 setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0);
Wang Chen03d2f892008-07-03 12:13:36 +08002034 err = register_netdevice_notifier(&ip_mr_notifier);
2035 if (err)
2036 goto reg_notif_fail;
Tom Goff403dbb92009-06-14 03:16:13 -07002037#ifdef CONFIG_IP_PIMSM_V2
2038 if (inet_add_protocol(&pim_protocol, IPPROTO_PIM) < 0) {
2039 printk(KERN_ERR "ip_mr_init: can't add PIM protocol\n");
2040 err = -EAGAIN;
2041 goto add_proto_fail;
2042 }
2043#endif
Wang Chen03d2f892008-07-03 12:13:36 +08002044 return 0;
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002045
Tom Goff403dbb92009-06-14 03:16:13 -07002046#ifdef CONFIG_IP_PIMSM_V2
2047add_proto_fail:
2048 unregister_netdevice_notifier(&ip_mr_notifier);
2049#endif
Benjamin Theryc3e38892008-11-19 14:07:41 -08002050reg_notif_fail:
2051 del_timer(&ipmr_expire_timer);
Benjamin Therycf958ae32009-01-22 04:56:16 +00002052 unregister_pernet_subsys(&ipmr_net_ops);
2053reg_pernet_fail:
Benjamin Theryc3e38892008-11-19 14:07:41 -08002054 kmem_cache_destroy(mrt_cachep);
Wang Chen03d2f892008-07-03 12:13:36 +08002055 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002056}