blob: 856123fe32f98c97e3364a322fe9899d4b8d1359 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * IP multicast routing support for mrouted 3.6/3.8
3 *
Alan Cox113aa832008-10-13 19:01:08 -07004 * (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk>
Linus Torvalds1da177e2005-04-16 15:20:36 -07005 * Linux Consultancy and Custom Driver Development
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 *
Linus Torvalds1da177e2005-04-16 15:20:36 -070012 * Fixes:
13 * Michael Chastain : Incorrect size of copying.
14 * Alan Cox : Added the cache manager code
15 * Alan Cox : Fixed the clone/copy bug and device race.
16 * Mike McLagan : Routing by source
17 * Malcolm Beattie : Buffer handling fixes.
18 * Alexey Kuznetsov : Double buffer free and other fixes.
19 * SVR Anand : Fixed several multicast bugs and problems.
20 * Alexey Kuznetsov : Status, optimisations and more.
21 * Brad Parker : Better behaviour on mrouted upcall
22 * overflow.
23 * Carlos Picoto : PIMv1 Support
24 * Pavlin Ivanov Radoslavov: PIMv2 Registers must checksum only PIM header
Gilles Espinassef77f13e2010-03-29 15:41:47 +020025 * Relax this requirement to work with older peers.
Linus Torvalds1da177e2005-04-16 15:20:36 -070026 *
27 */
28
Linus Torvalds1da177e2005-04-16 15:20:36 -070029#include <asm/system.h>
30#include <asm/uaccess.h>
31#include <linux/types.h>
Randy Dunlap4fc268d2006-01-11 12:17:47 -080032#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070033#include <linux/errno.h>
34#include <linux/timer.h>
35#include <linux/mm.h>
36#include <linux/kernel.h>
37#include <linux/fcntl.h>
38#include <linux/stat.h>
39#include <linux/socket.h>
40#include <linux/in.h>
41#include <linux/inet.h>
42#include <linux/netdevice.h>
43#include <linux/inetdevice.h>
44#include <linux/igmp.h>
45#include <linux/proc_fs.h>
46#include <linux/seq_file.h>
47#include <linux/mroute.h>
48#include <linux/init.h>
Kris Katterjohn46f25df2006-01-05 16:35:42 -080049#include <linux/if_ether.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090050#include <linux/slab.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020051#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070052#include <net/ip.h>
53#include <net/protocol.h>
54#include <linux/skbuff.h>
Arnaldo Carvalho de Melo14c85022005-12-27 02:43:12 -020055#include <net/route.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070056#include <net/sock.h>
57#include <net/icmp.h>
58#include <net/udp.h>
59#include <net/raw.h>
60#include <linux/notifier.h>
61#include <linux/if_arp.h>
62#include <linux/netfilter_ipv4.h>
63#include <net/ipip.h>
64#include <net/checksum.h>
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -070065#include <net/netlink.h>
Patrick McHardyf0ad0862010-04-13 05:03:23 +000066#include <net/fib_rules.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070067
68#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
69#define CONFIG_IP_PIMSM 1
70#endif
71
Patrick McHardy0c122952010-04-13 05:03:22 +000072struct mr_table {
Patrick McHardyf0ad0862010-04-13 05:03:23 +000073 struct list_head list;
Patrick McHardy8de53df2010-04-15 13:29:28 +020074#ifdef CONFIG_NET_NS
75 struct net *net;
76#endif
Patrick McHardyf0ad0862010-04-13 05:03:23 +000077 u32 id;
Patrick McHardy0c122952010-04-13 05:03:22 +000078 struct sock *mroute_sk;
79 struct timer_list ipmr_expire_timer;
80 struct list_head mfc_unres_queue;
81 struct list_head mfc_cache_array[MFC_LINES];
82 struct vif_device vif_table[MAXVIFS];
83 int maxvif;
84 atomic_t cache_resolve_queue_len;
85 int mroute_do_assert;
86 int mroute_do_pim;
87#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
88 int mroute_reg_vif_num;
89#endif
90};
91
Patrick McHardyf0ad0862010-04-13 05:03:23 +000092struct ipmr_rule {
93 struct fib_rule common;
94};
95
96struct ipmr_result {
97 struct mr_table *mrt;
98};
99
Linus Torvalds1da177e2005-04-16 15:20:36 -0700100/* Big lock, protecting vif table, mrt cache and mroute socket state.
101 Note that the changes are semaphored via rtnl_lock.
102 */
103
104static DEFINE_RWLOCK(mrt_lock);
105
106/*
107 * Multicast router control variables
108 */
109
Patrick McHardy0c122952010-04-13 05:03:22 +0000110#define VIF_EXISTS(_mrt, _idx) ((_mrt)->vif_table[_idx].dev != NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700111
112/* Special spinlock for queue of unresolved entries */
113static DEFINE_SPINLOCK(mfc_unres_lock);
114
115/* We return to original Alan's scheme. Hash table of resolved
116 entries is changed only in process context and protected
117 with weak lock mrt_lock. Queue of unresolved entries is protected
118 with strong spinlock mfc_unres_lock.
119
120 In this case data path is free of exclusive locks at all.
121 */
122
Christoph Lametere18b8902006-12-06 20:33:20 -0800123static struct kmem_cache *mrt_cachep __read_mostly;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700124
Patrick McHardyf0ad0862010-04-13 05:03:23 +0000125static struct mr_table *ipmr_new_table(struct net *net, u32 id);
Patrick McHardy0c122952010-04-13 05:03:22 +0000126static int ip_mr_forward(struct net *net, struct mr_table *mrt,
127 struct sk_buff *skb, struct mfc_cache *cache,
128 int local);
129static int ipmr_cache_report(struct mr_table *mrt,
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000130 struct sk_buff *pkt, vifi_t vifi, int assert);
Patrick McHardycb6a4e42010-04-26 16:02:08 +0200131static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
132 struct mfc_cache *c, struct rtmsg *rtm);
Patrick McHardyf0ad0862010-04-13 05:03:23 +0000133static void ipmr_expire_process(unsigned long arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700134
Patrick McHardyf0ad0862010-04-13 05:03:23 +0000135#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
136#define ipmr_for_each_table(mrt, net) \
137 list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list)
138
139static struct mr_table *ipmr_get_table(struct net *net, u32 id)
140{
141 struct mr_table *mrt;
142
143 ipmr_for_each_table(mrt, net) {
144 if (mrt->id == id)
145 return mrt;
146 }
147 return NULL;
148}
149
150static int ipmr_fib_lookup(struct net *net, struct flowi *flp,
151 struct mr_table **mrt)
152{
153 struct ipmr_result res;
154 struct fib_lookup_arg arg = { .result = &res, };
155 int err;
156
157 err = fib_rules_lookup(net->ipv4.mr_rules_ops, flp, 0, &arg);
158 if (err < 0)
159 return err;
160 *mrt = res.mrt;
161 return 0;
162}
163
164static int ipmr_rule_action(struct fib_rule *rule, struct flowi *flp,
165 int flags, struct fib_lookup_arg *arg)
166{
167 struct ipmr_result *res = arg->result;
168 struct mr_table *mrt;
169
170 switch (rule->action) {
171 case FR_ACT_TO_TBL:
172 break;
173 case FR_ACT_UNREACHABLE:
174 return -ENETUNREACH;
175 case FR_ACT_PROHIBIT:
176 return -EACCES;
177 case FR_ACT_BLACKHOLE:
178 default:
179 return -EINVAL;
180 }
181
182 mrt = ipmr_get_table(rule->fr_net, rule->table);
183 if (mrt == NULL)
184 return -EAGAIN;
185 res->mrt = mrt;
186 return 0;
187}
188
189static int ipmr_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
190{
191 return 1;
192}
193
194static const struct nla_policy ipmr_rule_policy[FRA_MAX + 1] = {
195 FRA_GENERIC_POLICY,
196};
197
198static int ipmr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
199 struct fib_rule_hdr *frh, struct nlattr **tb)
200{
201 return 0;
202}
203
204static int ipmr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
205 struct nlattr **tb)
206{
207 return 1;
208}
209
210static int ipmr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
211 struct fib_rule_hdr *frh)
212{
213 frh->dst_len = 0;
214 frh->src_len = 0;
215 frh->tos = 0;
216 return 0;
217}
218
Patrick McHardy3d0c9c42010-04-26 16:02:04 +0200219static const struct fib_rules_ops __net_initdata ipmr_rules_ops_template = {
Patrick McHardy25239ce2010-04-26 16:02:05 +0200220 .family = RTNL_FAMILY_IPMR,
Patrick McHardyf0ad0862010-04-13 05:03:23 +0000221 .rule_size = sizeof(struct ipmr_rule),
222 .addr_size = sizeof(u32),
223 .action = ipmr_rule_action,
224 .match = ipmr_rule_match,
225 .configure = ipmr_rule_configure,
226 .compare = ipmr_rule_compare,
227 .default_pref = fib_default_rule_pref,
228 .fill = ipmr_rule_fill,
229 .nlgroup = RTNLGRP_IPV4_RULE,
230 .policy = ipmr_rule_policy,
231 .owner = THIS_MODULE,
232};
233
234static int __net_init ipmr_rules_init(struct net *net)
235{
236 struct fib_rules_ops *ops;
237 struct mr_table *mrt;
238 int err;
239
240 ops = fib_rules_register(&ipmr_rules_ops_template, net);
241 if (IS_ERR(ops))
242 return PTR_ERR(ops);
243
244 INIT_LIST_HEAD(&net->ipv4.mr_tables);
245
246 mrt = ipmr_new_table(net, RT_TABLE_DEFAULT);
247 if (mrt == NULL) {
248 err = -ENOMEM;
249 goto err1;
250 }
251
252 err = fib_default_rule_add(ops, 0x7fff, RT_TABLE_DEFAULT, 0);
253 if (err < 0)
254 goto err2;
255
256 net->ipv4.mr_rules_ops = ops;
257 return 0;
258
259err2:
260 kfree(mrt);
261err1:
262 fib_rules_unregister(ops);
263 return err;
264}
265
266static void __net_exit ipmr_rules_exit(struct net *net)
267{
268 struct mr_table *mrt, *next;
269
270 list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list)
271 kfree(mrt);
272 fib_rules_unregister(net->ipv4.mr_rules_ops);
273}
274#else
275#define ipmr_for_each_table(mrt, net) \
276 for (mrt = net->ipv4.mrt; mrt; mrt = NULL)
277
278static struct mr_table *ipmr_get_table(struct net *net, u32 id)
279{
280 return net->ipv4.mrt;
281}
282
283static int ipmr_fib_lookup(struct net *net, struct flowi *flp,
284 struct mr_table **mrt)
285{
286 *mrt = net->ipv4.mrt;
287 return 0;
288}
289
290static int __net_init ipmr_rules_init(struct net *net)
291{
292 net->ipv4.mrt = ipmr_new_table(net, RT_TABLE_DEFAULT);
293 return net->ipv4.mrt ? 0 : -ENOMEM;
294}
295
296static void __net_exit ipmr_rules_exit(struct net *net)
297{
298 kfree(net->ipv4.mrt);
299}
300#endif
301
302static struct mr_table *ipmr_new_table(struct net *net, u32 id)
303{
304 struct mr_table *mrt;
305 unsigned int i;
306
307 mrt = ipmr_get_table(net, id);
308 if (mrt != NULL)
309 return mrt;
310
311 mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
312 if (mrt == NULL)
313 return NULL;
Patrick McHardy8de53df2010-04-15 13:29:28 +0200314 write_pnet(&mrt->net, net);
Patrick McHardyf0ad0862010-04-13 05:03:23 +0000315 mrt->id = id;
316
317 /* Forwarding cache */
318 for (i = 0; i < MFC_LINES; i++)
319 INIT_LIST_HEAD(&mrt->mfc_cache_array[i]);
320
321 INIT_LIST_HEAD(&mrt->mfc_unres_queue);
322
323 setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process,
324 (unsigned long)mrt);
325
326#ifdef CONFIG_IP_PIMSM
327 mrt->mroute_reg_vif_num = -1;
328#endif
329#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
330 list_add_tail_rcu(&mrt->list, &net->ipv4.mr_tables);
331#endif
332 return mrt;
333}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700334
335/* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
336
Wang Chend6070322008-07-14 20:55:26 -0700337static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
338{
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000339 struct net *net = dev_net(dev);
340
Wang Chend6070322008-07-14 20:55:26 -0700341 dev_close(dev);
342
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000343 dev = __dev_get_by_name(net, "tunl0");
Wang Chend6070322008-07-14 20:55:26 -0700344 if (dev) {
Stephen Hemminger5bc3eb72008-11-19 21:52:05 -0800345 const struct net_device_ops *ops = dev->netdev_ops;
Wang Chend6070322008-07-14 20:55:26 -0700346 struct ifreq ifr;
Wang Chend6070322008-07-14 20:55:26 -0700347 struct ip_tunnel_parm p;
348
349 memset(&p, 0, sizeof(p));
350 p.iph.daddr = v->vifc_rmt_addr.s_addr;
351 p.iph.saddr = v->vifc_lcl_addr.s_addr;
352 p.iph.version = 4;
353 p.iph.ihl = 5;
354 p.iph.protocol = IPPROTO_IPIP;
355 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
356 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
357
Stephen Hemminger5bc3eb72008-11-19 21:52:05 -0800358 if (ops->ndo_do_ioctl) {
359 mm_segment_t oldfs = get_fs();
360
361 set_fs(KERNEL_DS);
362 ops->ndo_do_ioctl(dev, &ifr, SIOCDELTUNNEL);
363 set_fs(oldfs);
364 }
Wang Chend6070322008-07-14 20:55:26 -0700365 }
366}
367
Linus Torvalds1da177e2005-04-16 15:20:36 -0700368static
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000369struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700370{
371 struct net_device *dev;
372
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000373 dev = __dev_get_by_name(net, "tunl0");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700374
375 if (dev) {
Stephen Hemminger5bc3eb72008-11-19 21:52:05 -0800376 const struct net_device_ops *ops = dev->netdev_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700377 int err;
378 struct ifreq ifr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700379 struct ip_tunnel_parm p;
380 struct in_device *in_dev;
381
382 memset(&p, 0, sizeof(p));
383 p.iph.daddr = v->vifc_rmt_addr.s_addr;
384 p.iph.saddr = v->vifc_lcl_addr.s_addr;
385 p.iph.version = 4;
386 p.iph.ihl = 5;
387 p.iph.protocol = IPPROTO_IPIP;
388 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
Stephen Hemmingerba93ef72008-01-21 17:28:59 -0800389 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700390
Stephen Hemminger5bc3eb72008-11-19 21:52:05 -0800391 if (ops->ndo_do_ioctl) {
392 mm_segment_t oldfs = get_fs();
393
394 set_fs(KERNEL_DS);
395 err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL);
396 set_fs(oldfs);
397 } else
398 err = -EOPNOTSUPP;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700399
400 dev = NULL;
401
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000402 if (err == 0 &&
403 (dev = __dev_get_by_name(net, p.name)) != NULL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700404 dev->flags |= IFF_MULTICAST;
405
Herbert Xue5ed6392005-10-03 14:35:55 -0700406 in_dev = __in_dev_get_rtnl(dev);
Herbert Xu71e27da2007-06-04 23:36:06 -0700407 if (in_dev == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700408 goto failure;
Herbert Xu71e27da2007-06-04 23:36:06 -0700409
410 ipv4_devconf_setall(in_dev);
411 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700412
413 if (dev_open(dev))
414 goto failure;
Wang Chen7dc00c82008-07-14 20:56:34 -0700415 dev_hold(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700416 }
417 }
418 return dev;
419
420failure:
421 /* allow the register to be completed before unregistering. */
422 rtnl_unlock();
423 rtnl_lock();
424
425 unregister_netdevice(dev);
426 return NULL;
427}
428
429#ifdef CONFIG_IP_PIMSM
430
Stephen Hemminger6fef4c02009-08-31 19:50:41 +0000431static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700432{
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000433 struct net *net = dev_net(dev);
Patrick McHardyf0ad0862010-04-13 05:03:23 +0000434 struct mr_table *mrt;
435 struct flowi fl = {
436 .oif = dev->ifindex,
437 .iif = skb->skb_iif,
438 .mark = skb->mark,
439 };
440 int err;
441
442 err = ipmr_fib_lookup(net, &fl, &mrt);
443 if (err < 0)
444 return err;
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000445
Linus Torvalds1da177e2005-04-16 15:20:36 -0700446 read_lock(&mrt_lock);
Pavel Emelyanovcf3677a2008-05-21 14:17:33 -0700447 dev->stats.tx_bytes += skb->len;
448 dev->stats.tx_packets++;
Patrick McHardy0c122952010-04-13 05:03:22 +0000449 ipmr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, IGMPMSG_WHOLEPKT);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700450 read_unlock(&mrt_lock);
451 kfree_skb(skb);
Patrick McHardy6ed10652009-06-23 06:03:08 +0000452 return NETDEV_TX_OK;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700453}
454
Stephen Hemminger007c3832008-11-20 20:28:35 -0800455static const struct net_device_ops reg_vif_netdev_ops = {
456 .ndo_start_xmit = reg_vif_xmit,
457};
458
Linus Torvalds1da177e2005-04-16 15:20:36 -0700459static void reg_vif_setup(struct net_device *dev)
460{
461 dev->type = ARPHRD_PIMREG;
Kris Katterjohn46f25df2006-01-05 16:35:42 -0800462 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700463 dev->flags = IFF_NOARP;
Stephen Hemminger007c3832008-11-20 20:28:35 -0800464 dev->netdev_ops = &reg_vif_netdev_ops,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700465 dev->destructor = free_netdev;
Tom Goff403dbb92009-06-14 03:16:13 -0700466 dev->features |= NETIF_F_NETNS_LOCAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700467}
468
Patrick McHardyf0ad0862010-04-13 05:03:23 +0000469static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700470{
471 struct net_device *dev;
472 struct in_device *in_dev;
Patrick McHardyf0ad0862010-04-13 05:03:23 +0000473 char name[IFNAMSIZ];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700474
Patrick McHardyf0ad0862010-04-13 05:03:23 +0000475 if (mrt->id == RT_TABLE_DEFAULT)
476 sprintf(name, "pimreg");
477 else
478 sprintf(name, "pimreg%u", mrt->id);
479
480 dev = alloc_netdev(0, name, reg_vif_setup);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700481
482 if (dev == NULL)
483 return NULL;
484
Tom Goff403dbb92009-06-14 03:16:13 -0700485 dev_net_set(dev, net);
486
Linus Torvalds1da177e2005-04-16 15:20:36 -0700487 if (register_netdevice(dev)) {
488 free_netdev(dev);
489 return NULL;
490 }
491 dev->iflink = 0;
492
Herbert Xu71e27da2007-06-04 23:36:06 -0700493 rcu_read_lock();
494 if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
495 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700496 goto failure;
Herbert Xu71e27da2007-06-04 23:36:06 -0700497 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700498
Herbert Xu71e27da2007-06-04 23:36:06 -0700499 ipv4_devconf_setall(in_dev);
500 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
501 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700502
503 if (dev_open(dev))
504 goto failure;
505
Wang Chen7dc00c82008-07-14 20:56:34 -0700506 dev_hold(dev);
507
Linus Torvalds1da177e2005-04-16 15:20:36 -0700508 return dev;
509
510failure:
511 /* allow the register to be completed before unregistering. */
512 rtnl_unlock();
513 rtnl_lock();
514
515 unregister_netdevice(dev);
516 return NULL;
517}
518#endif
519
520/*
521 * Delete a VIF entry
Wang Chen7dc00c82008-07-14 20:56:34 -0700522 * @notify: Set to 1, if the caller is a notifier_call
Linus Torvalds1da177e2005-04-16 15:20:36 -0700523 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900524
Patrick McHardy0c122952010-04-13 05:03:22 +0000525static int vif_delete(struct mr_table *mrt, int vifi, int notify,
Eric Dumazetd17fa6f2009-10-28 05:21:38 +0000526 struct list_head *head)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700527{
528 struct vif_device *v;
529 struct net_device *dev;
530 struct in_device *in_dev;
531
Patrick McHardy0c122952010-04-13 05:03:22 +0000532 if (vifi < 0 || vifi >= mrt->maxvif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700533 return -EADDRNOTAVAIL;
534
Patrick McHardy0c122952010-04-13 05:03:22 +0000535 v = &mrt->vif_table[vifi];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700536
537 write_lock_bh(&mrt_lock);
538 dev = v->dev;
539 v->dev = NULL;
540
541 if (!dev) {
542 write_unlock_bh(&mrt_lock);
543 return -EADDRNOTAVAIL;
544 }
545
546#ifdef CONFIG_IP_PIMSM
Patrick McHardy0c122952010-04-13 05:03:22 +0000547 if (vifi == mrt->mroute_reg_vif_num)
548 mrt->mroute_reg_vif_num = -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700549#endif
550
Patrick McHardy0c122952010-04-13 05:03:22 +0000551 if (vifi+1 == mrt->maxvif) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700552 int tmp;
553 for (tmp=vifi-1; tmp>=0; tmp--) {
Patrick McHardy0c122952010-04-13 05:03:22 +0000554 if (VIF_EXISTS(mrt, tmp))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700555 break;
556 }
Patrick McHardy0c122952010-04-13 05:03:22 +0000557 mrt->maxvif = tmp+1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700558 }
559
560 write_unlock_bh(&mrt_lock);
561
562 dev_set_allmulti(dev, -1);
563
Herbert Xue5ed6392005-10-03 14:35:55 -0700564 if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
Herbert Xu42f811b2007-06-04 23:34:44 -0700565 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700566 ip_rt_multicast_event(in_dev);
567 }
568
Wang Chen7dc00c82008-07-14 20:56:34 -0700569 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify)
Eric Dumazetd17fa6f2009-10-28 05:21:38 +0000570 unregister_netdevice_queue(dev, head);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700571
572 dev_put(dev);
573 return 0;
574}
575
Benjamin Thery5c0a66f2009-01-22 04:56:17 +0000576static inline void ipmr_cache_free(struct mfc_cache *c)
577{
Benjamin Thery5c0a66f2009-01-22 04:56:17 +0000578 kmem_cache_free(mrt_cachep, c);
579}
580
Linus Torvalds1da177e2005-04-16 15:20:36 -0700581/* Destroy an unresolved cache entry, killing queued skbs
582 and reporting error to netlink readers.
583 */
584
Patrick McHardy0c122952010-04-13 05:03:22 +0000585static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700586{
Patrick McHardy8de53df2010-04-15 13:29:28 +0200587 struct net *net = read_pnet(&mrt->net);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700588 struct sk_buff *skb;
Patrick McHardy9ef1d4c2005-06-28 12:55:30 -0700589 struct nlmsgerr *e;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700590
Patrick McHardy0c122952010-04-13 05:03:22 +0000591 atomic_dec(&mrt->cache_resolve_queue_len);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700592
Jianjun Kongc354e122008-11-03 00:28:02 -0800593 while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) {
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700594 if (ip_hdr(skb)->version == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700595 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
596 nlh->nlmsg_type = NLMSG_ERROR;
597 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
598 skb_trim(skb, nlh->nlmsg_len);
Patrick McHardy9ef1d4c2005-06-28 12:55:30 -0700599 e = NLMSG_DATA(nlh);
600 e->error = -ETIMEDOUT;
601 memset(&e->msg, 0, sizeof(e->msg));
Thomas Graf2942e902006-08-15 00:30:25 -0700602
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000603 rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700604 } else
605 kfree_skb(skb);
606 }
607
Benjamin Thery5c0a66f2009-01-22 04:56:17 +0000608 ipmr_cache_free(c);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700609}
610
611
Patrick McHardye258beb2010-04-13 05:03:19 +0000612/* Timer process for the unresolved queue. */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700613
Patrick McHardye258beb2010-04-13 05:03:19 +0000614static void ipmr_expire_process(unsigned long arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700615{
Patrick McHardy0c122952010-04-13 05:03:22 +0000616 struct mr_table *mrt = (struct mr_table *)arg;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700617 unsigned long now;
618 unsigned long expires;
Patrick McHardy862465f2010-04-13 05:03:21 +0000619 struct mfc_cache *c, *next;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700620
621 if (!spin_trylock(&mfc_unres_lock)) {
Patrick McHardy0c122952010-04-13 05:03:22 +0000622 mod_timer(&mrt->ipmr_expire_timer, jiffies+HZ/10);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700623 return;
624 }
625
Patrick McHardy0c122952010-04-13 05:03:22 +0000626 if (list_empty(&mrt->mfc_unres_queue))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700627 goto out;
628
629 now = jiffies;
630 expires = 10*HZ;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700631
Patrick McHardy0c122952010-04-13 05:03:22 +0000632 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700633 if (time_after(c->mfc_un.unres.expires, now)) {
634 unsigned long interval = c->mfc_un.unres.expires - now;
635 if (interval < expires)
636 expires = interval;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700637 continue;
638 }
639
Patrick McHardy862465f2010-04-13 05:03:21 +0000640 list_del(&c->list);
Patrick McHardy0c122952010-04-13 05:03:22 +0000641 ipmr_destroy_unres(mrt, c);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700642 }
643
Patrick McHardy0c122952010-04-13 05:03:22 +0000644 if (!list_empty(&mrt->mfc_unres_queue))
645 mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700646
647out:
648 spin_unlock(&mfc_unres_lock);
649}
650
651/* Fill oifs list. It is called under write locked mrt_lock. */
652
Patrick McHardy0c122952010-04-13 05:03:22 +0000653static void ipmr_update_thresholds(struct mr_table *mrt, struct mfc_cache *cache,
Patrick McHardyd658f8a2010-04-13 05:03:20 +0000654 unsigned char *ttls)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700655{
656 int vifi;
657
658 cache->mfc_un.res.minvif = MAXVIFS;
659 cache->mfc_un.res.maxvif = 0;
660 memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
661
Patrick McHardy0c122952010-04-13 05:03:22 +0000662 for (vifi = 0; vifi < mrt->maxvif; vifi++) {
663 if (VIF_EXISTS(mrt, vifi) &&
Benjamin Therycf958ae32009-01-22 04:56:16 +0000664 ttls[vifi] && ttls[vifi] < 255) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700665 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
666 if (cache->mfc_un.res.minvif > vifi)
667 cache->mfc_un.res.minvif = vifi;
668 if (cache->mfc_un.res.maxvif <= vifi)
669 cache->mfc_un.res.maxvif = vifi + 1;
670 }
671 }
672}
673
Patrick McHardy0c122952010-04-13 05:03:22 +0000674static int vif_add(struct net *net, struct mr_table *mrt,
675 struct vifctl *vifc, int mrtsock)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700676{
677 int vifi = vifc->vifc_vifi;
Patrick McHardy0c122952010-04-13 05:03:22 +0000678 struct vif_device *v = &mrt->vif_table[vifi];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700679 struct net_device *dev;
680 struct in_device *in_dev;
Wang Chend6070322008-07-14 20:55:26 -0700681 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700682
683 /* Is vif busy ? */
Patrick McHardy0c122952010-04-13 05:03:22 +0000684 if (VIF_EXISTS(mrt, vifi))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700685 return -EADDRINUSE;
686
687 switch (vifc->vifc_flags) {
688#ifdef CONFIG_IP_PIMSM
689 case VIFF_REGISTER:
690 /*
691 * Special Purpose VIF in PIM
692 * All the packets will be sent to the daemon
693 */
Patrick McHardy0c122952010-04-13 05:03:22 +0000694 if (mrt->mroute_reg_vif_num >= 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700695 return -EADDRINUSE;
Patrick McHardyf0ad0862010-04-13 05:03:23 +0000696 dev = ipmr_reg_vif(net, mrt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700697 if (!dev)
698 return -ENOBUFS;
Wang Chend6070322008-07-14 20:55:26 -0700699 err = dev_set_allmulti(dev, 1);
700 if (err) {
701 unregister_netdevice(dev);
Wang Chen7dc00c82008-07-14 20:56:34 -0700702 dev_put(dev);
Wang Chend6070322008-07-14 20:55:26 -0700703 return err;
704 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700705 break;
706#endif
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900707 case VIFF_TUNNEL:
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000708 dev = ipmr_new_tunnel(net, vifc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700709 if (!dev)
710 return -ENOBUFS;
Wang Chend6070322008-07-14 20:55:26 -0700711 err = dev_set_allmulti(dev, 1);
712 if (err) {
713 ipmr_del_tunnel(dev, vifc);
Wang Chen7dc00c82008-07-14 20:56:34 -0700714 dev_put(dev);
Wang Chend6070322008-07-14 20:55:26 -0700715 return err;
716 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700717 break;
Ilia Kee5e81f2009-09-16 05:53:07 +0000718
719 case VIFF_USE_IFINDEX:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700720 case 0:
Ilia Kee5e81f2009-09-16 05:53:07 +0000721 if (vifc->vifc_flags == VIFF_USE_IFINDEX) {
722 dev = dev_get_by_index(net, vifc->vifc_lcl_ifindex);
723 if (dev && dev->ip_ptr == NULL) {
724 dev_put(dev);
725 return -EADDRNOTAVAIL;
726 }
727 } else
728 dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr);
729
Linus Torvalds1da177e2005-04-16 15:20:36 -0700730 if (!dev)
731 return -EADDRNOTAVAIL;
Wang Chend6070322008-07-14 20:55:26 -0700732 err = dev_set_allmulti(dev, 1);
Wang Chen7dc00c82008-07-14 20:56:34 -0700733 if (err) {
734 dev_put(dev);
Wang Chend6070322008-07-14 20:55:26 -0700735 return err;
Wang Chen7dc00c82008-07-14 20:56:34 -0700736 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700737 break;
738 default:
739 return -EINVAL;
740 }
741
Dan Carpenterd0490cf2009-11-11 02:03:54 +0000742 if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) {
743 dev_put(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700744 return -EADDRNOTAVAIL;
Dan Carpenterd0490cf2009-11-11 02:03:54 +0000745 }
Herbert Xu42f811b2007-06-04 23:34:44 -0700746 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700747 ip_rt_multicast_event(in_dev);
748
749 /*
750 * Fill in the VIF structures
751 */
Jianjun Kongc354e122008-11-03 00:28:02 -0800752 v->rate_limit = vifc->vifc_rate_limit;
753 v->local = vifc->vifc_lcl_addr.s_addr;
754 v->remote = vifc->vifc_rmt_addr.s_addr;
755 v->flags = vifc->vifc_flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700756 if (!mrtsock)
757 v->flags |= VIFF_STATIC;
Jianjun Kongc354e122008-11-03 00:28:02 -0800758 v->threshold = vifc->vifc_threshold;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700759 v->bytes_in = 0;
760 v->bytes_out = 0;
761 v->pkt_in = 0;
762 v->pkt_out = 0;
763 v->link = dev->ifindex;
764 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
765 v->link = dev->iflink;
766
767 /* And finish update writing critical data */
768 write_lock_bh(&mrt_lock);
Jianjun Kongc354e122008-11-03 00:28:02 -0800769 v->dev = dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700770#ifdef CONFIG_IP_PIMSM
771 if (v->flags&VIFF_REGISTER)
Patrick McHardy0c122952010-04-13 05:03:22 +0000772 mrt->mroute_reg_vif_num = vifi;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700773#endif
Patrick McHardy0c122952010-04-13 05:03:22 +0000774 if (vifi+1 > mrt->maxvif)
775 mrt->maxvif = vifi+1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700776 write_unlock_bh(&mrt_lock);
777 return 0;
778}
779
Patrick McHardy0c122952010-04-13 05:03:22 +0000780static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt,
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000781 __be32 origin,
782 __be32 mcastgrp)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700783{
Jianjun Kongc354e122008-11-03 00:28:02 -0800784 int line = MFC_HASH(mcastgrp, origin);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700785 struct mfc_cache *c;
786
Patrick McHardy0c122952010-04-13 05:03:22 +0000787 list_for_each_entry(c, &mrt->mfc_cache_array[line], list) {
Patrick McHardy862465f2010-04-13 05:03:21 +0000788 if (c->mfc_origin == origin && c->mfc_mcastgrp == mcastgrp)
789 return c;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700790 }
Patrick McHardy862465f2010-04-13 05:03:21 +0000791 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700792}
793
794/*
795 * Allocate a multicast cache entry
796 */
Patrick McHardyd658f8a2010-04-13 05:03:20 +0000797static struct mfc_cache *ipmr_cache_alloc(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700798{
Jianjun Kongc354e122008-11-03 00:28:02 -0800799 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
800 if (c == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700801 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700802 c->mfc_un.res.minvif = MAXVIFS;
803 return c;
804}
805
Patrick McHardyd658f8a2010-04-13 05:03:20 +0000806static struct mfc_cache *ipmr_cache_alloc_unres(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700807{
Jianjun Kongc354e122008-11-03 00:28:02 -0800808 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
809 if (c == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700810 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700811 skb_queue_head_init(&c->mfc_un.unres.unresolved);
812 c->mfc_un.unres.expires = jiffies + 10*HZ;
813 return c;
814}
815
816/*
817 * A cache entry has gone into a resolved state from queued
818 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900819
Patrick McHardy0c122952010-04-13 05:03:22 +0000820static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt,
821 struct mfc_cache *uc, struct mfc_cache *c)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700822{
823 struct sk_buff *skb;
Patrick McHardy9ef1d4c2005-06-28 12:55:30 -0700824 struct nlmsgerr *e;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700825
826 /*
827 * Play the pending entries through our router
828 */
829
Jianjun Kongc354e122008-11-03 00:28:02 -0800830 while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700831 if (ip_hdr(skb)->version == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700832 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
833
Patrick McHardycb6a4e42010-04-26 16:02:08 +0200834 if (__ipmr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) {
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -0700835 nlh->nlmsg_len = (skb_tail_pointer(skb) -
836 (u8 *)nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700837 } else {
838 nlh->nlmsg_type = NLMSG_ERROR;
839 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
840 skb_trim(skb, nlh->nlmsg_len);
Patrick McHardy9ef1d4c2005-06-28 12:55:30 -0700841 e = NLMSG_DATA(nlh);
842 e->error = -EMSGSIZE;
843 memset(&e->msg, 0, sizeof(e->msg));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700844 }
Thomas Graf2942e902006-08-15 00:30:25 -0700845
Patrick McHardyd658f8a2010-04-13 05:03:20 +0000846 rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700847 } else
Patrick McHardy0c122952010-04-13 05:03:22 +0000848 ip_mr_forward(net, mrt, skb, c, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700849 }
850}
851
852/*
853 * Bounce a cache query up to mrouted. We could use netlink for this but mrouted
854 * expects the following bizarre scheme.
855 *
856 * Called under mrt_lock.
857 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900858
Patrick McHardy0c122952010-04-13 05:03:22 +0000859static int ipmr_cache_report(struct mr_table *mrt,
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000860 struct sk_buff *pkt, vifi_t vifi, int assert)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700861{
862 struct sk_buff *skb;
Arnaldo Carvalho de Meloc9bdd4b2007-03-12 20:09:15 -0300863 const int ihl = ip_hdrlen(pkt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700864 struct igmphdr *igmp;
865 struct igmpmsg *msg;
866 int ret;
867
868#ifdef CONFIG_IP_PIMSM
869 if (assert == IGMPMSG_WHOLEPKT)
870 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
871 else
872#endif
873 skb = alloc_skb(128, GFP_ATOMIC);
874
Stephen Hemminger132adf52007-03-08 20:44:43 -0800875 if (!skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700876 return -ENOBUFS;
877
878#ifdef CONFIG_IP_PIMSM
879 if (assert == IGMPMSG_WHOLEPKT) {
880 /* Ugly, but we have no choice with this interface.
881 Duplicate old header, fix ihl, length etc.
882 And all this only to mangle msg->im_msgtype and
883 to set msg->im_mbz to "mbz" :-)
884 */
Arnaldo Carvalho de Melo878c8142007-03-11 22:38:29 -0300885 skb_push(skb, sizeof(struct iphdr));
886 skb_reset_network_header(skb);
Arnaldo Carvalho de Melobadff6d2007-03-13 13:06:52 -0300887 skb_reset_transport_header(skb);
Arnaldo Carvalho de Melo0272ffc2007-03-12 20:05:39 -0300888 msg = (struct igmpmsg *)skb_network_header(skb);
Arnaldo Carvalho de Melod56f90a2007-04-10 20:50:43 -0700889 memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700890 msg->im_msgtype = IGMPMSG_WHOLEPKT;
891 msg->im_mbz = 0;
Patrick McHardy0c122952010-04-13 05:03:22 +0000892 msg->im_vif = mrt->mroute_reg_vif_num;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700893 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
894 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
895 sizeof(struct iphdr));
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900896 } else
Linus Torvalds1da177e2005-04-16 15:20:36 -0700897#endif
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900898 {
899
Linus Torvalds1da177e2005-04-16 15:20:36 -0700900 /*
901 * Copy the IP header
902 */
903
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -0700904 skb->network_header = skb->tail;
Arnaldo Carvalho de Meloddc7b8e2007-03-15 21:42:27 -0300905 skb_put(skb, ihl);
Arnaldo Carvalho de Melo27d7ff42007-03-31 11:55:19 -0300906 skb_copy_to_linear_data(skb, pkt->data, ihl);
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700907 ip_hdr(skb)->protocol = 0; /* Flag to the kernel this is a route add */
908 msg = (struct igmpmsg *)skb_network_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700909 msg->im_vif = vifi;
Eric Dumazetadf30902009-06-02 05:19:30 +0000910 skb_dst_set(skb, dst_clone(skb_dst(pkt)));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700911
912 /*
913 * Add our header
914 */
915
Jianjun Kongc354e122008-11-03 00:28:02 -0800916 igmp=(struct igmphdr *)skb_put(skb, sizeof(struct igmphdr));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700917 igmp->type =
918 msg->im_msgtype = assert;
919 igmp->code = 0;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700920 ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -0700921 skb->transport_header = skb->network_header;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900922 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700923
Patrick McHardy0c122952010-04-13 05:03:22 +0000924 if (mrt->mroute_sk == NULL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700925 kfree_skb(skb);
926 return -EINVAL;
927 }
928
929 /*
930 * Deliver to mrouted
931 */
Patrick McHardy0c122952010-04-13 05:03:22 +0000932 ret = sock_queue_rcv_skb(mrt->mroute_sk, skb);
Benjamin Thery70a269e2009-01-22 04:56:15 +0000933 if (ret < 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700934 if (net_ratelimit())
935 printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
936 kfree_skb(skb);
937 }
938
939 return ret;
940}
941
942/*
943 * Queue a packet for resolution. It gets locked cache entry!
944 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900945
Linus Torvalds1da177e2005-04-16 15:20:36 -0700946static int
Patrick McHardy0c122952010-04-13 05:03:22 +0000947ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700948{
Patrick McHardy862465f2010-04-13 05:03:21 +0000949 bool found = false;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700950 int err;
951 struct mfc_cache *c;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700952 const struct iphdr *iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700953
954 spin_lock_bh(&mfc_unres_lock);
Patrick McHardy0c122952010-04-13 05:03:22 +0000955 list_for_each_entry(c, &mrt->mfc_unres_queue, list) {
Patrick McHardye258beb2010-04-13 05:03:19 +0000956 if (c->mfc_mcastgrp == iph->daddr &&
Patrick McHardy862465f2010-04-13 05:03:21 +0000957 c->mfc_origin == iph->saddr) {
958 found = true;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700959 break;
Patrick McHardy862465f2010-04-13 05:03:21 +0000960 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700961 }
962
Patrick McHardy862465f2010-04-13 05:03:21 +0000963 if (!found) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700964 /*
965 * Create a new entry if allowable
966 */
967
Patrick McHardy0c122952010-04-13 05:03:22 +0000968 if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
Patrick McHardyd658f8a2010-04-13 05:03:20 +0000969 (c = ipmr_cache_alloc_unres()) == NULL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700970 spin_unlock_bh(&mfc_unres_lock);
971
972 kfree_skb(skb);
973 return -ENOBUFS;
974 }
975
976 /*
977 * Fill in the new cache entry
978 */
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700979 c->mfc_parent = -1;
980 c->mfc_origin = iph->saddr;
981 c->mfc_mcastgrp = iph->daddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700982
983 /*
984 * Reflect first query at mrouted.
985 */
Patrick McHardy0c122952010-04-13 05:03:22 +0000986 err = ipmr_cache_report(mrt, skb, vifi, IGMPMSG_NOCACHE);
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000987 if (err < 0) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900988 /* If the report failed throw the cache entry
Linus Torvalds1da177e2005-04-16 15:20:36 -0700989 out - Brad Parker
990 */
991 spin_unlock_bh(&mfc_unres_lock);
992
Benjamin Thery5c0a66f2009-01-22 04:56:17 +0000993 ipmr_cache_free(c);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700994 kfree_skb(skb);
995 return err;
996 }
997
Patrick McHardy0c122952010-04-13 05:03:22 +0000998 atomic_inc(&mrt->cache_resolve_queue_len);
999 list_add(&c->list, &mrt->mfc_unres_queue);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001000
David S. Miller278554b2010-05-12 00:05:35 -07001001 if (atomic_read(&mrt->cache_resolve_queue_len) == 1)
1002 mod_timer(&mrt->ipmr_expire_timer, c->mfc_un.unres.expires);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001003 }
1004
1005 /*
1006 * See if we can append the packet
1007 */
1008 if (c->mfc_un.unres.unresolved.qlen>3) {
1009 kfree_skb(skb);
1010 err = -ENOBUFS;
1011 } else {
Jianjun Kongc354e122008-11-03 00:28:02 -08001012 skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001013 err = 0;
1014 }
1015
1016 spin_unlock_bh(&mfc_unres_lock);
1017 return err;
1018}
1019
1020/*
1021 * MFC cache manipulation by user space mroute daemon
1022 */
1023
Patrick McHardy0c122952010-04-13 05:03:22 +00001024static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001025{
1026 int line;
Patrick McHardy862465f2010-04-13 05:03:21 +00001027 struct mfc_cache *c, *next;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001028
Jianjun Kongc354e122008-11-03 00:28:02 -08001029 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001030
Patrick McHardy0c122952010-04-13 05:03:22 +00001031 list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[line], list) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001032 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
1033 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
1034 write_lock_bh(&mrt_lock);
Patrick McHardy862465f2010-04-13 05:03:21 +00001035 list_del(&c->list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001036 write_unlock_bh(&mrt_lock);
1037
Benjamin Thery5c0a66f2009-01-22 04:56:17 +00001038 ipmr_cache_free(c);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001039 return 0;
1040 }
1041 }
1042 return -ENOENT;
1043}
1044
Patrick McHardy0c122952010-04-13 05:03:22 +00001045static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
1046 struct mfcctl *mfc, int mrtsock)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001047{
Patrick McHardy862465f2010-04-13 05:03:21 +00001048 bool found = false;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001049 int line;
Patrick McHardy862465f2010-04-13 05:03:21 +00001050 struct mfc_cache *uc, *c;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001051
Patrick McHardya50436f22010-03-17 06:04:14 +00001052 if (mfc->mfcc_parent >= MAXVIFS)
1053 return -ENFILE;
1054
Jianjun Kongc354e122008-11-03 00:28:02 -08001055 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001056
Patrick McHardy0c122952010-04-13 05:03:22 +00001057 list_for_each_entry(c, &mrt->mfc_cache_array[line], list) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001058 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
Patrick McHardy862465f2010-04-13 05:03:21 +00001059 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
1060 found = true;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001061 break;
Patrick McHardy862465f2010-04-13 05:03:21 +00001062 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001063 }
1064
Patrick McHardy862465f2010-04-13 05:03:21 +00001065 if (found) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001066 write_lock_bh(&mrt_lock);
1067 c->mfc_parent = mfc->mfcc_parent;
Patrick McHardy0c122952010-04-13 05:03:22 +00001068 ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001069 if (!mrtsock)
1070 c->mfc_flags |= MFC_STATIC;
1071 write_unlock_bh(&mrt_lock);
1072 return 0;
1073 }
1074
Joe Perchesf97c1e02007-12-16 13:45:43 -08001075 if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001076 return -EINVAL;
1077
Patrick McHardyd658f8a2010-04-13 05:03:20 +00001078 c = ipmr_cache_alloc();
Jianjun Kongc354e122008-11-03 00:28:02 -08001079 if (c == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001080 return -ENOMEM;
1081
Jianjun Kongc354e122008-11-03 00:28:02 -08001082 c->mfc_origin = mfc->mfcc_origin.s_addr;
1083 c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
1084 c->mfc_parent = mfc->mfcc_parent;
Patrick McHardy0c122952010-04-13 05:03:22 +00001085 ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001086 if (!mrtsock)
1087 c->mfc_flags |= MFC_STATIC;
1088
1089 write_lock_bh(&mrt_lock);
Patrick McHardy0c122952010-04-13 05:03:22 +00001090 list_add(&c->list, &mrt->mfc_cache_array[line]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001091 write_unlock_bh(&mrt_lock);
1092
1093 /*
1094 * Check to see if we resolved a queued list. If so we
1095 * need to send on the frames and tidy up.
1096 */
Patrick McHardyb0ebb732010-04-15 13:29:28 +02001097 found = false;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001098 spin_lock_bh(&mfc_unres_lock);
Patrick McHardy0c122952010-04-13 05:03:22 +00001099 list_for_each_entry(uc, &mrt->mfc_unres_queue, list) {
Patrick McHardye258beb2010-04-13 05:03:19 +00001100 if (uc->mfc_origin == c->mfc_origin &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001101 uc->mfc_mcastgrp == c->mfc_mcastgrp) {
Patrick McHardy862465f2010-04-13 05:03:21 +00001102 list_del(&uc->list);
Patrick McHardy0c122952010-04-13 05:03:22 +00001103 atomic_dec(&mrt->cache_resolve_queue_len);
Patrick McHardyb0ebb732010-04-15 13:29:28 +02001104 found = true;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001105 break;
1106 }
1107 }
Patrick McHardy0c122952010-04-13 05:03:22 +00001108 if (list_empty(&mrt->mfc_unres_queue))
1109 del_timer(&mrt->ipmr_expire_timer);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001110 spin_unlock_bh(&mfc_unres_lock);
1111
Patrick McHardyb0ebb732010-04-15 13:29:28 +02001112 if (found) {
Patrick McHardy0c122952010-04-13 05:03:22 +00001113 ipmr_cache_resolve(net, mrt, uc, c);
Benjamin Thery5c0a66f2009-01-22 04:56:17 +00001114 ipmr_cache_free(uc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001115 }
1116 return 0;
1117}
1118
1119/*
1120 * Close the multicast socket, and clear the vif tables etc
1121 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001122
Patrick McHardy0c122952010-04-13 05:03:22 +00001123static void mroute_clean_tables(struct mr_table *mrt)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001124{
1125 int i;
Eric Dumazetd17fa6f2009-10-28 05:21:38 +00001126 LIST_HEAD(list);
Patrick McHardy862465f2010-04-13 05:03:21 +00001127 struct mfc_cache *c, *next;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001128
Linus Torvalds1da177e2005-04-16 15:20:36 -07001129 /*
1130 * Shut down all active vif entries
1131 */
Patrick McHardy0c122952010-04-13 05:03:22 +00001132 for (i = 0; i < mrt->maxvif; i++) {
1133 if (!(mrt->vif_table[i].flags&VIFF_STATIC))
1134 vif_delete(mrt, i, 0, &list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001135 }
Eric Dumazetd17fa6f2009-10-28 05:21:38 +00001136 unregister_netdevice_many(&list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001137
1138 /*
1139 * Wipe the cache
1140 */
Patrick McHardy862465f2010-04-13 05:03:21 +00001141 for (i = 0; i < MFC_LINES; i++) {
Patrick McHardy0c122952010-04-13 05:03:22 +00001142 list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[i], list) {
Patrick McHardy862465f2010-04-13 05:03:21 +00001143 if (c->mfc_flags&MFC_STATIC)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001144 continue;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001145 write_lock_bh(&mrt_lock);
Patrick McHardy862465f2010-04-13 05:03:21 +00001146 list_del(&c->list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001147 write_unlock_bh(&mrt_lock);
1148
Benjamin Thery5c0a66f2009-01-22 04:56:17 +00001149 ipmr_cache_free(c);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001150 }
1151 }
1152
Patrick McHardy0c122952010-04-13 05:03:22 +00001153 if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001154 spin_lock_bh(&mfc_unres_lock);
Patrick McHardy0c122952010-04-13 05:03:22 +00001155 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
Patrick McHardy862465f2010-04-13 05:03:21 +00001156 list_del(&c->list);
Patrick McHardy0c122952010-04-13 05:03:22 +00001157 ipmr_destroy_unres(mrt, c);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001158 }
1159 spin_unlock_bh(&mfc_unres_lock);
1160 }
1161}
1162
1163static void mrtsock_destruct(struct sock *sk)
1164{
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001165 struct net *net = sock_net(sk);
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001166 struct mr_table *mrt;
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001167
Linus Torvalds1da177e2005-04-16 15:20:36 -07001168 rtnl_lock();
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001169 ipmr_for_each_table(mrt, net) {
1170 if (sk == mrt->mroute_sk) {
1171 IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001172
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001173 write_lock_bh(&mrt_lock);
1174 mrt->mroute_sk = NULL;
1175 write_unlock_bh(&mrt_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001176
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001177 mroute_clean_tables(mrt);
1178 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001179 }
1180 rtnl_unlock();
1181}
1182
1183/*
1184 * Socket options and virtual interface manipulation. The whole
1185 * virtual interface system is a complete heap, but unfortunately
1186 * that's how BSD mrouted happens to think. Maybe one day with a proper
1187 * MOSPF/PIM router set up we can clean this up.
1188 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001189
David S. Millerb7058842009-09-30 16:12:20 -07001190int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001191{
1192 int ret;
1193 struct vifctl vif;
1194 struct mfcctl mfc;
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001195 struct net *net = sock_net(sk);
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001196 struct mr_table *mrt;
1197
1198 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1199 if (mrt == NULL)
1200 return -ENOENT;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001201
Stephen Hemminger132adf52007-03-08 20:44:43 -08001202 if (optname != MRT_INIT) {
Patrick McHardy0c122952010-04-13 05:03:22 +00001203 if (sk != mrt->mroute_sk && !capable(CAP_NET_ADMIN))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001204 return -EACCES;
1205 }
1206
Stephen Hemminger132adf52007-03-08 20:44:43 -08001207 switch (optname) {
1208 case MRT_INIT:
1209 if (sk->sk_type != SOCK_RAW ||
Eric Dumazetc720c7e2009-10-15 06:30:45 +00001210 inet_sk(sk)->inet_num != IPPROTO_IGMP)
Stephen Hemminger132adf52007-03-08 20:44:43 -08001211 return -EOPNOTSUPP;
Jianjun Kongc354e122008-11-03 00:28:02 -08001212 if (optlen != sizeof(int))
Stephen Hemminger132adf52007-03-08 20:44:43 -08001213 return -ENOPROTOOPT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001214
Stephen Hemminger132adf52007-03-08 20:44:43 -08001215 rtnl_lock();
Patrick McHardy0c122952010-04-13 05:03:22 +00001216 if (mrt->mroute_sk) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001217 rtnl_unlock();
Stephen Hemminger132adf52007-03-08 20:44:43 -08001218 return -EADDRINUSE;
1219 }
1220
1221 ret = ip_ra_control(sk, 1, mrtsock_destruct);
1222 if (ret == 0) {
1223 write_lock_bh(&mrt_lock);
Patrick McHardy0c122952010-04-13 05:03:22 +00001224 mrt->mroute_sk = sk;
Stephen Hemminger132adf52007-03-08 20:44:43 -08001225 write_unlock_bh(&mrt_lock);
1226
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001227 IPV4_DEVCONF_ALL(net, MC_FORWARDING)++;
Stephen Hemminger132adf52007-03-08 20:44:43 -08001228 }
1229 rtnl_unlock();
1230 return ret;
1231 case MRT_DONE:
Patrick McHardy0c122952010-04-13 05:03:22 +00001232 if (sk != mrt->mroute_sk)
Stephen Hemminger132adf52007-03-08 20:44:43 -08001233 return -EACCES;
1234 return ip_ra_control(sk, 0, NULL);
1235 case MRT_ADD_VIF:
1236 case MRT_DEL_VIF:
Jianjun Kongc354e122008-11-03 00:28:02 -08001237 if (optlen != sizeof(vif))
Stephen Hemminger132adf52007-03-08 20:44:43 -08001238 return -EINVAL;
Jianjun Kongc354e122008-11-03 00:28:02 -08001239 if (copy_from_user(&vif, optval, sizeof(vif)))
Stephen Hemminger132adf52007-03-08 20:44:43 -08001240 return -EFAULT;
1241 if (vif.vifc_vifi >= MAXVIFS)
1242 return -ENFILE;
1243 rtnl_lock();
Jianjun Kongc354e122008-11-03 00:28:02 -08001244 if (optname == MRT_ADD_VIF) {
Patrick McHardy0c122952010-04-13 05:03:22 +00001245 ret = vif_add(net, mrt, &vif, sk == mrt->mroute_sk);
Stephen Hemminger132adf52007-03-08 20:44:43 -08001246 } else {
Patrick McHardy0c122952010-04-13 05:03:22 +00001247 ret = vif_delete(mrt, vif.vifc_vifi, 0, NULL);
Stephen Hemminger132adf52007-03-08 20:44:43 -08001248 }
1249 rtnl_unlock();
1250 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001251
1252 /*
1253 * Manipulate the forwarding caches. These live
1254 * in a sort of kernel/user symbiosis.
1255 */
Stephen Hemminger132adf52007-03-08 20:44:43 -08001256 case MRT_ADD_MFC:
1257 case MRT_DEL_MFC:
Jianjun Kongc354e122008-11-03 00:28:02 -08001258 if (optlen != sizeof(mfc))
Stephen Hemminger132adf52007-03-08 20:44:43 -08001259 return -EINVAL;
Jianjun Kongc354e122008-11-03 00:28:02 -08001260 if (copy_from_user(&mfc, optval, sizeof(mfc)))
Stephen Hemminger132adf52007-03-08 20:44:43 -08001261 return -EFAULT;
1262 rtnl_lock();
Jianjun Kongc354e122008-11-03 00:28:02 -08001263 if (optname == MRT_DEL_MFC)
Patrick McHardy0c122952010-04-13 05:03:22 +00001264 ret = ipmr_mfc_delete(mrt, &mfc);
Stephen Hemminger132adf52007-03-08 20:44:43 -08001265 else
Patrick McHardy0c122952010-04-13 05:03:22 +00001266 ret = ipmr_mfc_add(net, mrt, &mfc, sk == mrt->mroute_sk);
Stephen Hemminger132adf52007-03-08 20:44:43 -08001267 rtnl_unlock();
1268 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001269 /*
1270 * Control PIM assert.
1271 */
Stephen Hemminger132adf52007-03-08 20:44:43 -08001272 case MRT_ASSERT:
1273 {
1274 int v;
1275 if (get_user(v,(int __user *)optval))
1276 return -EFAULT;
Patrick McHardy0c122952010-04-13 05:03:22 +00001277 mrt->mroute_do_assert = (v) ? 1 : 0;
Stephen Hemminger132adf52007-03-08 20:44:43 -08001278 return 0;
1279 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001280#ifdef CONFIG_IP_PIMSM
Stephen Hemminger132adf52007-03-08 20:44:43 -08001281 case MRT_PIM:
1282 {
Stephen Hemmingerba93ef72008-01-21 17:28:59 -08001283 int v;
1284
Stephen Hemminger132adf52007-03-08 20:44:43 -08001285 if (get_user(v,(int __user *)optval))
1286 return -EFAULT;
Stephen Hemmingerba93ef72008-01-21 17:28:59 -08001287 v = (v) ? 1 : 0;
1288
Stephen Hemminger132adf52007-03-08 20:44:43 -08001289 rtnl_lock();
1290 ret = 0;
Patrick McHardy0c122952010-04-13 05:03:22 +00001291 if (v != mrt->mroute_do_pim) {
1292 mrt->mroute_do_pim = v;
1293 mrt->mroute_do_assert = v;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001294 }
Stephen Hemminger132adf52007-03-08 20:44:43 -08001295 rtnl_unlock();
1296 return ret;
1297 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001298#endif
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001299#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
1300 case MRT_TABLE:
1301 {
1302 u32 v;
1303
1304 if (optlen != sizeof(u32))
1305 return -EINVAL;
1306 if (get_user(v, (u32 __user *)optval))
1307 return -EFAULT;
1308 if (sk == mrt->mroute_sk)
1309 return -EBUSY;
1310
1311 rtnl_lock();
1312 ret = 0;
1313 if (!ipmr_new_table(net, v))
1314 ret = -ENOMEM;
1315 raw_sk(sk)->ipmr_table = v;
1316 rtnl_unlock();
1317 return ret;
1318 }
1319#endif
Stephen Hemminger132adf52007-03-08 20:44:43 -08001320 /*
1321 * Spurious command, or MRT_VERSION which you cannot
1322 * set.
1323 */
1324 default:
1325 return -ENOPROTOOPT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001326 }
1327}
1328
1329/*
1330 * Getsock opt support for the multicast routing system.
1331 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001332
Jianjun Kongc354e122008-11-03 00:28:02 -08001333int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001334{
1335 int olr;
1336 int val;
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001337 struct net *net = sock_net(sk);
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001338 struct mr_table *mrt;
1339
1340 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1341 if (mrt == NULL)
1342 return -ENOENT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001343
Jianjun Kongc354e122008-11-03 00:28:02 -08001344 if (optname != MRT_VERSION &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001345#ifdef CONFIG_IP_PIMSM
1346 optname!=MRT_PIM &&
1347#endif
1348 optname!=MRT_ASSERT)
1349 return -ENOPROTOOPT;
1350
1351 if (get_user(olr, optlen))
1352 return -EFAULT;
1353
1354 olr = min_t(unsigned int, olr, sizeof(int));
1355 if (olr < 0)
1356 return -EINVAL;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001357
Jianjun Kongc354e122008-11-03 00:28:02 -08001358 if (put_user(olr, optlen))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001359 return -EFAULT;
Jianjun Kongc354e122008-11-03 00:28:02 -08001360 if (optname == MRT_VERSION)
1361 val = 0x0305;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001362#ifdef CONFIG_IP_PIMSM
Jianjun Kongc354e122008-11-03 00:28:02 -08001363 else if (optname == MRT_PIM)
Patrick McHardy0c122952010-04-13 05:03:22 +00001364 val = mrt->mroute_do_pim;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001365#endif
1366 else
Patrick McHardy0c122952010-04-13 05:03:22 +00001367 val = mrt->mroute_do_assert;
Jianjun Kongc354e122008-11-03 00:28:02 -08001368 if (copy_to_user(optval, &val, olr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001369 return -EFAULT;
1370 return 0;
1371}
1372
1373/*
1374 * The IP multicast ioctl support routines.
1375 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001376
Linus Torvalds1da177e2005-04-16 15:20:36 -07001377int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1378{
1379 struct sioc_sg_req sr;
1380 struct sioc_vif_req vr;
1381 struct vif_device *vif;
1382 struct mfc_cache *c;
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001383 struct net *net = sock_net(sk);
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001384 struct mr_table *mrt;
1385
1386 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1387 if (mrt == NULL)
1388 return -ENOENT;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001389
Stephen Hemminger132adf52007-03-08 20:44:43 -08001390 switch (cmd) {
1391 case SIOCGETVIFCNT:
Jianjun Kongc354e122008-11-03 00:28:02 -08001392 if (copy_from_user(&vr, arg, sizeof(vr)))
Stephen Hemminger132adf52007-03-08 20:44:43 -08001393 return -EFAULT;
Patrick McHardy0c122952010-04-13 05:03:22 +00001394 if (vr.vifi >= mrt->maxvif)
Stephen Hemminger132adf52007-03-08 20:44:43 -08001395 return -EINVAL;
1396 read_lock(&mrt_lock);
Patrick McHardy0c122952010-04-13 05:03:22 +00001397 vif = &mrt->vif_table[vr.vifi];
1398 if (VIF_EXISTS(mrt, vr.vifi)) {
Jianjun Kongc354e122008-11-03 00:28:02 -08001399 vr.icount = vif->pkt_in;
1400 vr.ocount = vif->pkt_out;
1401 vr.ibytes = vif->bytes_in;
1402 vr.obytes = vif->bytes_out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001403 read_unlock(&mrt_lock);
Stephen Hemminger132adf52007-03-08 20:44:43 -08001404
Jianjun Kongc354e122008-11-03 00:28:02 -08001405 if (copy_to_user(arg, &vr, sizeof(vr)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001406 return -EFAULT;
Stephen Hemminger132adf52007-03-08 20:44:43 -08001407 return 0;
1408 }
1409 read_unlock(&mrt_lock);
1410 return -EADDRNOTAVAIL;
1411 case SIOCGETSGCNT:
Jianjun Kongc354e122008-11-03 00:28:02 -08001412 if (copy_from_user(&sr, arg, sizeof(sr)))
Stephen Hemminger132adf52007-03-08 20:44:43 -08001413 return -EFAULT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001414
Stephen Hemminger132adf52007-03-08 20:44:43 -08001415 read_lock(&mrt_lock);
Patrick McHardy0c122952010-04-13 05:03:22 +00001416 c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr);
Stephen Hemminger132adf52007-03-08 20:44:43 -08001417 if (c) {
1418 sr.pktcnt = c->mfc_un.res.pkt;
1419 sr.bytecnt = c->mfc_un.res.bytes;
1420 sr.wrong_if = c->mfc_un.res.wrong_if;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001421 read_unlock(&mrt_lock);
Stephen Hemminger132adf52007-03-08 20:44:43 -08001422
Jianjun Kongc354e122008-11-03 00:28:02 -08001423 if (copy_to_user(arg, &sr, sizeof(sr)))
Stephen Hemminger132adf52007-03-08 20:44:43 -08001424 return -EFAULT;
1425 return 0;
1426 }
1427 read_unlock(&mrt_lock);
1428 return -EADDRNOTAVAIL;
1429 default:
1430 return -ENOIOCTLCMD;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001431 }
1432}
1433
1434
1435static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1436{
Eric W. Biedermane9dc8652007-09-12 13:02:17 +02001437 struct net_device *dev = ptr;
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001438 struct net *net = dev_net(dev);
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001439 struct mr_table *mrt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001440 struct vif_device *v;
1441 int ct;
Eric Dumazetd17fa6f2009-10-28 05:21:38 +00001442 LIST_HEAD(list);
Eric W. Biedermane9dc8652007-09-12 13:02:17 +02001443
Linus Torvalds1da177e2005-04-16 15:20:36 -07001444 if (event != NETDEV_UNREGISTER)
1445 return NOTIFY_DONE;
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001446
1447 ipmr_for_each_table(mrt, net) {
1448 v = &mrt->vif_table[0];
1449 for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1450 if (v->dev == dev)
1451 vif_delete(mrt, ct, 1, &list);
1452 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001453 }
Eric Dumazetd17fa6f2009-10-28 05:21:38 +00001454 unregister_netdevice_many(&list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001455 return NOTIFY_DONE;
1456}
1457
1458
Jianjun Kongc354e122008-11-03 00:28:02 -08001459static struct notifier_block ip_mr_notifier = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001460 .notifier_call = ipmr_device_event,
1461};
1462
1463/*
1464 * Encapsulate a packet by attaching a valid IPIP header to it.
1465 * This avoids tunnel drivers and other mess and gives us the speed so
1466 * important for multicast video.
1467 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001468
Al Viro114c7842006-09-27 18:39:29 -07001469static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001470{
Arnaldo Carvalho de Melo8856dfa2007-03-10 19:40:39 -03001471 struct iphdr *iph;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001472 struct iphdr *old_iph = ip_hdr(skb);
Arnaldo Carvalho de Melo8856dfa2007-03-10 19:40:39 -03001473
1474 skb_push(skb, sizeof(struct iphdr));
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -07001475 skb->transport_header = skb->network_header;
Arnaldo Carvalho de Melo8856dfa2007-03-10 19:40:39 -03001476 skb_reset_network_header(skb);
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001477 iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001478
1479 iph->version = 4;
Arnaldo Carvalho de Meloe023dd62007-03-12 20:09:36 -03001480 iph->tos = old_iph->tos;
1481 iph->ttl = old_iph->ttl;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001482 iph->frag_off = 0;
1483 iph->daddr = daddr;
1484 iph->saddr = saddr;
1485 iph->protocol = IPPROTO_IPIP;
1486 iph->ihl = 5;
1487 iph->tot_len = htons(skb->len);
Eric Dumazetadf30902009-06-02 05:19:30 +00001488 ip_select_ident(iph, skb_dst(skb), NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001489 ip_send_check(iph);
1490
Linus Torvalds1da177e2005-04-16 15:20:36 -07001491 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1492 nf_reset(skb);
1493}
1494
1495static inline int ipmr_forward_finish(struct sk_buff *skb)
1496{
1497 struct ip_options * opt = &(IPCB(skb)->opt);
1498
Eric Dumazetadf30902009-06-02 05:19:30 +00001499 IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001500
1501 if (unlikely(opt->optlen))
1502 ip_forward_options(skb);
1503
1504 return dst_output(skb);
1505}
1506
1507/*
1508 * Processing handlers for ipmr_forward
1509 */
1510
Patrick McHardy0c122952010-04-13 05:03:22 +00001511static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
1512 struct sk_buff *skb, struct mfc_cache *c, int vifi)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001513{
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001514 const struct iphdr *iph = ip_hdr(skb);
Patrick McHardy0c122952010-04-13 05:03:22 +00001515 struct vif_device *vif = &mrt->vif_table[vifi];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001516 struct net_device *dev;
1517 struct rtable *rt;
1518 int encap = 0;
1519
1520 if (vif->dev == NULL)
1521 goto out_free;
1522
1523#ifdef CONFIG_IP_PIMSM
1524 if (vif->flags & VIFF_REGISTER) {
1525 vif->pkt_out++;
Jianjun Kongc354e122008-11-03 00:28:02 -08001526 vif->bytes_out += skb->len;
Pavel Emelyanovcf3677a2008-05-21 14:17:33 -07001527 vif->dev->stats.tx_bytes += skb->len;
1528 vif->dev->stats.tx_packets++;
Patrick McHardy0c122952010-04-13 05:03:22 +00001529 ipmr_cache_report(mrt, skb, vifi, IGMPMSG_WHOLEPKT);
Ilpo Järvinen69ebbf52009-02-06 23:46:51 -08001530 goto out_free;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001531 }
1532#endif
1533
1534 if (vif->flags&VIFF_TUNNEL) {
1535 struct flowi fl = { .oif = vif->link,
1536 .nl_u = { .ip4_u =
1537 { .daddr = vif->remote,
1538 .saddr = vif->local,
1539 .tos = RT_TOS(iph->tos) } },
1540 .proto = IPPROTO_IPIP };
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001541 if (ip_route_output_key(net, &rt, &fl))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001542 goto out_free;
1543 encap = sizeof(struct iphdr);
1544 } else {
1545 struct flowi fl = { .oif = vif->link,
1546 .nl_u = { .ip4_u =
1547 { .daddr = iph->daddr,
1548 .tos = RT_TOS(iph->tos) } },
1549 .proto = IPPROTO_IPIP };
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001550 if (ip_route_output_key(net, &rt, &fl))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001551 goto out_free;
1552 }
1553
1554 dev = rt->u.dst.dev;
1555
1556 if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) {
1557 /* Do not fragment multicasts. Alas, IPv4 does not
1558 allow to send ICMP, so that packets will disappear
1559 to blackhole.
1560 */
1561
Pavel Emelyanov7c73a6f2008-07-16 20:20:11 -07001562 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001563 ip_rt_put(rt);
1564 goto out_free;
1565 }
1566
1567 encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len;
1568
1569 if (skb_cow(skb, encap)) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001570 ip_rt_put(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001571 goto out_free;
1572 }
1573
1574 vif->pkt_out++;
Jianjun Kongc354e122008-11-03 00:28:02 -08001575 vif->bytes_out += skb->len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001576
Eric Dumazetadf30902009-06-02 05:19:30 +00001577 skb_dst_drop(skb);
1578 skb_dst_set(skb, &rt->u.dst);
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001579 ip_decrease_ttl(ip_hdr(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001580
1581 /* FIXME: forward and output firewalls used to be called here.
1582 * What do we do with netfilter? -- RR */
1583 if (vif->flags & VIFF_TUNNEL) {
1584 ip_encap(skb, vif->local, vif->remote);
1585 /* FIXME: extra output firewall step used to be here. --RR */
Pavel Emelyanov2f4c02d2008-05-21 14:16:14 -07001586 vif->dev->stats.tx_packets++;
1587 vif->dev->stats.tx_bytes += skb->len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001588 }
1589
1590 IPCB(skb)->flags |= IPSKB_FORWARDED;
1591
1592 /*
1593 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1594 * not only before forwarding, but after forwarding on all output
1595 * interfaces. It is clear, if mrouter runs a multicasting
1596 * program, it should receive packets not depending to what interface
1597 * program is joined.
1598 * If we will not make it, the program will have to join on all
1599 * interfaces. On the other hand, multihoming host (or router, but
1600 * not mrouter) cannot join to more than one interface - it will
1601 * result in receiving multiple packets.
1602 */
Jan Engelhardt9bbc7682010-03-23 04:07:29 +01001603 NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, skb, skb->dev, dev,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001604 ipmr_forward_finish);
1605 return;
1606
1607out_free:
1608 kfree_skb(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001609}
1610
Patrick McHardy0c122952010-04-13 05:03:22 +00001611static int ipmr_find_vif(struct mr_table *mrt, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001612{
1613 int ct;
Patrick McHardy0c122952010-04-13 05:03:22 +00001614
1615 for (ct = mrt->maxvif-1; ct >= 0; ct--) {
1616 if (mrt->vif_table[ct].dev == dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001617 break;
1618 }
1619 return ct;
1620}
1621
1622/* "local" means that we should preserve one skb (for local delivery) */
1623
Patrick McHardy0c122952010-04-13 05:03:22 +00001624static int ip_mr_forward(struct net *net, struct mr_table *mrt,
1625 struct sk_buff *skb, struct mfc_cache *cache,
1626 int local)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001627{
1628 int psend = -1;
1629 int vif, ct;
1630
1631 vif = cache->mfc_parent;
1632 cache->mfc_un.res.pkt++;
1633 cache->mfc_un.res.bytes += skb->len;
1634
1635 /*
1636 * Wrong interface: drop packet and (maybe) send PIM assert.
1637 */
Patrick McHardy0c122952010-04-13 05:03:22 +00001638 if (mrt->vif_table[vif].dev != skb->dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001639 int true_vifi;
1640
Eric Dumazet511c3f92009-06-02 05:14:27 +00001641 if (skb_rtable(skb)->fl.iif == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001642 /* It is our own packet, looped back.
1643 Very complicated situation...
1644
1645 The best workaround until routing daemons will be
1646 fixed is not to redistribute packet, if it was
1647 send through wrong interface. It means, that
1648 multicast applications WILL NOT work for
1649 (S,G), which have default multicast route pointing
1650 to wrong oif. In any case, it is not a good
1651 idea to use multicasting applications on router.
1652 */
1653 goto dont_forward;
1654 }
1655
1656 cache->mfc_un.res.wrong_if++;
Patrick McHardy0c122952010-04-13 05:03:22 +00001657 true_vifi = ipmr_find_vif(mrt, skb->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001658
Patrick McHardy0c122952010-04-13 05:03:22 +00001659 if (true_vifi >= 0 && mrt->mroute_do_assert &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001660 /* pimsm uses asserts, when switching from RPT to SPT,
1661 so that we cannot check that packet arrived on an oif.
1662 It is bad, but otherwise we would need to move pretty
1663 large chunk of pimd to kernel. Ough... --ANK
1664 */
Patrick McHardy0c122952010-04-13 05:03:22 +00001665 (mrt->mroute_do_pim ||
Benjamin Thery6f9374a2009-01-22 04:56:20 +00001666 cache->mfc_un.res.ttls[true_vifi] < 255) &&
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001667 time_after(jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001668 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1669 cache->mfc_un.res.last_assert = jiffies;
Patrick McHardy0c122952010-04-13 05:03:22 +00001670 ipmr_cache_report(mrt, skb, true_vifi, IGMPMSG_WRONGVIF);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001671 }
1672 goto dont_forward;
1673 }
1674
Patrick McHardy0c122952010-04-13 05:03:22 +00001675 mrt->vif_table[vif].pkt_in++;
1676 mrt->vif_table[vif].bytes_in += skb->len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001677
1678 /*
1679 * Forward the frame
1680 */
1681 for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001682 if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001683 if (psend != -1) {
1684 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1685 if (skb2)
Patrick McHardy0c122952010-04-13 05:03:22 +00001686 ipmr_queue_xmit(net, mrt, skb2, cache,
1687 psend);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001688 }
Jianjun Kongc354e122008-11-03 00:28:02 -08001689 psend = ct;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001690 }
1691 }
1692 if (psend != -1) {
1693 if (local) {
1694 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1695 if (skb2)
Patrick McHardy0c122952010-04-13 05:03:22 +00001696 ipmr_queue_xmit(net, mrt, skb2, cache, psend);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001697 } else {
Patrick McHardy0c122952010-04-13 05:03:22 +00001698 ipmr_queue_xmit(net, mrt, skb, cache, psend);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001699 return 0;
1700 }
1701 }
1702
1703dont_forward:
1704 if (!local)
1705 kfree_skb(skb);
1706 return 0;
1707}
1708
1709
1710/*
1711 * Multicast packets for forwarding arrive here
1712 */
1713
1714int ip_mr_input(struct sk_buff *skb)
1715{
1716 struct mfc_cache *cache;
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001717 struct net *net = dev_net(skb->dev);
Eric Dumazet511c3f92009-06-02 05:14:27 +00001718 int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL;
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001719 struct mr_table *mrt;
1720 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001721
1722 /* Packet is looped back after forward, it should not be
1723 forwarded second time, but still can be delivered locally.
1724 */
1725 if (IPCB(skb)->flags&IPSKB_FORWARDED)
1726 goto dont_forward;
1727
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001728 err = ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt);
1729 if (err < 0)
1730 return err;
1731
Linus Torvalds1da177e2005-04-16 15:20:36 -07001732 if (!local) {
1733 if (IPCB(skb)->opt.router_alert) {
1734 if (ip_call_ra_chain(skb))
1735 return 0;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001736 } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP){
Linus Torvalds1da177e2005-04-16 15:20:36 -07001737 /* IGMPv1 (and broken IGMPv2 implementations sort of
1738 Cisco IOS <= 11.2(8)) do not put router alert
1739 option to IGMP packets destined to routable
1740 groups. It is very bad, because it means
1741 that we can forward NO IGMP messages.
1742 */
1743 read_lock(&mrt_lock);
Patrick McHardy0c122952010-04-13 05:03:22 +00001744 if (mrt->mroute_sk) {
Patrick McHardy2715bcf2005-06-21 14:06:24 -07001745 nf_reset(skb);
Patrick McHardy0c122952010-04-13 05:03:22 +00001746 raw_rcv(mrt->mroute_sk, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001747 read_unlock(&mrt_lock);
1748 return 0;
1749 }
1750 read_unlock(&mrt_lock);
1751 }
1752 }
1753
1754 read_lock(&mrt_lock);
Patrick McHardy0c122952010-04-13 05:03:22 +00001755 cache = ipmr_cache_find(mrt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001756
1757 /*
1758 * No usable cache entry
1759 */
Jianjun Kongc354e122008-11-03 00:28:02 -08001760 if (cache == NULL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001761 int vif;
1762
1763 if (local) {
1764 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1765 ip_local_deliver(skb);
1766 if (skb2 == NULL) {
1767 read_unlock(&mrt_lock);
1768 return -ENOBUFS;
1769 }
1770 skb = skb2;
1771 }
1772
Patrick McHardy0c122952010-04-13 05:03:22 +00001773 vif = ipmr_find_vif(mrt, skb->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001774 if (vif >= 0) {
Eric Dumazet0eae88f2010-04-20 19:06:52 -07001775 int err2 = ipmr_cache_unresolved(mrt, vif, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001776 read_unlock(&mrt_lock);
1777
Eric Dumazet0eae88f2010-04-20 19:06:52 -07001778 return err2;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001779 }
1780 read_unlock(&mrt_lock);
1781 kfree_skb(skb);
1782 return -ENODEV;
1783 }
1784
Patrick McHardy0c122952010-04-13 05:03:22 +00001785 ip_mr_forward(net, mrt, skb, cache, local);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001786
1787 read_unlock(&mrt_lock);
1788
1789 if (local)
1790 return ip_local_deliver(skb);
1791
1792 return 0;
1793
1794dont_forward:
1795 if (local)
1796 return ip_local_deliver(skb);
1797 kfree_skb(skb);
1798 return 0;
1799}
1800
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001801#ifdef CONFIG_IP_PIMSM
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001802static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb,
1803 unsigned int pimlen)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001804{
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001805 struct net_device *reg_dev = NULL;
1806 struct iphdr *encap;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001807
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001808 encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001809 /*
1810 Check that:
1811 a. packet is really destinted to a multicast group
1812 b. packet is not a NULL-REGISTER
1813 c. packet is not truncated
1814 */
Joe Perchesf97c1e02007-12-16 13:45:43 -08001815 if (!ipv4_is_multicast(encap->daddr) ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07001816 encap->tot_len == 0 ||
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001817 ntohs(encap->tot_len) + pimlen > skb->len)
1818 return 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001819
1820 read_lock(&mrt_lock);
Patrick McHardy0c122952010-04-13 05:03:22 +00001821 if (mrt->mroute_reg_vif_num >= 0)
1822 reg_dev = mrt->vif_table[mrt->mroute_reg_vif_num].dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001823 if (reg_dev)
1824 dev_hold(reg_dev);
1825 read_unlock(&mrt_lock);
1826
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001827 if (reg_dev == NULL)
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001828 return 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001829
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -07001830 skb->mac_header = skb->network_header;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001831 skb_pull(skb, (u8*)encap - skb->data);
Arnaldo Carvalho de Melo31c77112007-03-10 19:04:55 -03001832 skb_reset_network_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001833 skb->protocol = htons(ETH_P_IP);
1834 skb->ip_summed = 0;
1835 skb->pkt_type = PACKET_HOST;
Eric Dumazetd19d56d2010-05-17 22:36:55 -07001836
1837 skb_tunnel_rx(skb, reg_dev);
1838
Linus Torvalds1da177e2005-04-16 15:20:36 -07001839 netif_rx(skb);
1840 dev_put(reg_dev);
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001841
Linus Torvalds1da177e2005-04-16 15:20:36 -07001842 return 0;
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001843}
1844#endif
1845
1846#ifdef CONFIG_IP_PIMSM_V1
1847/*
1848 * Handle IGMP messages of PIMv1
1849 */
1850
1851int pim_rcv_v1(struct sk_buff * skb)
1852{
1853 struct igmphdr *pim;
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001854 struct net *net = dev_net(skb->dev);
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001855 struct mr_table *mrt;
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001856
1857 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1858 goto drop;
1859
1860 pim = igmp_hdr(skb);
1861
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001862 if (ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt) < 0)
1863 goto drop;
1864
Patrick McHardy0c122952010-04-13 05:03:22 +00001865 if (!mrt->mroute_do_pim ||
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001866 pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
1867 goto drop;
1868
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001869 if (__pim_rcv(mrt, skb, sizeof(*pim))) {
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001870drop:
1871 kfree_skb(skb);
1872 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001873 return 0;
1874}
1875#endif
1876
1877#ifdef CONFIG_IP_PIMSM_V2
1878static int pim_rcv(struct sk_buff * skb)
1879{
1880 struct pimreghdr *pim;
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001881 struct net *net = dev_net(skb->dev);
1882 struct mr_table *mrt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001883
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001884 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001885 goto drop;
1886
Arnaldo Carvalho de Melo9c702202007-04-25 18:04:18 -07001887 pim = (struct pimreghdr *)skb_transport_header(skb);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001888 if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07001889 (pim->flags&PIM_NULL_REGISTER) ||
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001890 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
Al Virod3bc23e2006-11-14 21:24:49 -08001891 csum_fold(skb_checksum(skb, 0, skb->len, 0))))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001892 goto drop;
1893
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001894 if (ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt) < 0)
1895 goto drop;
1896
1897 if (__pim_rcv(mrt, skb, sizeof(*pim))) {
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001898drop:
1899 kfree_skb(skb);
1900 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001901 return 0;
1902}
1903#endif
1904
Patrick McHardycb6a4e42010-04-26 16:02:08 +02001905static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
1906 struct mfc_cache *c, struct rtmsg *rtm)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001907{
1908 int ct;
1909 struct rtnexthop *nhp;
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001910 u8 *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001911 struct rtattr *mp_head;
1912
Nicolas Dichtel74381892010-03-25 23:45:35 +00001913 /* If cache is unresolved, don't try to parse IIF and OIF */
Dan Carpentered0f160a2010-05-26 00:38:56 -07001914 if (c->mfc_parent >= MAXVIFS)
Nicolas Dichtel74381892010-03-25 23:45:35 +00001915 return -ENOENT;
1916
Patrick McHardy0c122952010-04-13 05:03:22 +00001917 if (VIF_EXISTS(mrt, c->mfc_parent))
1918 RTA_PUT(skb, RTA_IIF, 4, &mrt->vif_table[c->mfc_parent].dev->ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001919
Jianjun Kongc354e122008-11-03 00:28:02 -08001920 mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001921
1922 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
Patrick McHardy0c122952010-04-13 05:03:22 +00001923 if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001924 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1925 goto rtattr_failure;
Jianjun Kongc354e122008-11-03 00:28:02 -08001926 nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001927 nhp->rtnh_flags = 0;
1928 nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
Patrick McHardy0c122952010-04-13 05:03:22 +00001929 nhp->rtnh_ifindex = mrt->vif_table[ct].dev->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001930 nhp->rtnh_len = sizeof(*nhp);
1931 }
1932 }
1933 mp_head->rta_type = RTA_MULTIPATH;
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001934 mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001935 rtm->rtm_type = RTN_MULTICAST;
1936 return 1;
1937
1938rtattr_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001939 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001940 return -EMSGSIZE;
1941}
1942
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001943int ipmr_get_route(struct net *net,
1944 struct sk_buff *skb, struct rtmsg *rtm, int nowait)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001945{
1946 int err;
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001947 struct mr_table *mrt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001948 struct mfc_cache *cache;
Eric Dumazet511c3f92009-06-02 05:14:27 +00001949 struct rtable *rt = skb_rtable(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001950
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001951 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
1952 if (mrt == NULL)
1953 return -ENOENT;
1954
Linus Torvalds1da177e2005-04-16 15:20:36 -07001955 read_lock(&mrt_lock);
Patrick McHardy0c122952010-04-13 05:03:22 +00001956 cache = ipmr_cache_find(mrt, rt->rt_src, rt->rt_dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001957
Jianjun Kongc354e122008-11-03 00:28:02 -08001958 if (cache == NULL) {
Alexey Kuznetsov72287492006-07-25 16:45:12 -07001959 struct sk_buff *skb2;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001960 struct iphdr *iph;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001961 struct net_device *dev;
1962 int vif;
1963
1964 if (nowait) {
1965 read_unlock(&mrt_lock);
1966 return -EAGAIN;
1967 }
1968
1969 dev = skb->dev;
Patrick McHardy0c122952010-04-13 05:03:22 +00001970 if (dev == NULL || (vif = ipmr_find_vif(mrt, dev)) < 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001971 read_unlock(&mrt_lock);
1972 return -ENODEV;
1973 }
Alexey Kuznetsov72287492006-07-25 16:45:12 -07001974 skb2 = skb_clone(skb, GFP_ATOMIC);
1975 if (!skb2) {
1976 read_unlock(&mrt_lock);
1977 return -ENOMEM;
1978 }
1979
Arnaldo Carvalho de Meloe2d1bca2007-04-10 20:46:21 -07001980 skb_push(skb2, sizeof(struct iphdr));
1981 skb_reset_network_header(skb2);
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001982 iph = ip_hdr(skb2);
1983 iph->ihl = sizeof(struct iphdr) >> 2;
1984 iph->saddr = rt->rt_src;
1985 iph->daddr = rt->rt_dst;
1986 iph->version = 0;
Patrick McHardy0c122952010-04-13 05:03:22 +00001987 err = ipmr_cache_unresolved(mrt, vif, skb2);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001988 read_unlock(&mrt_lock);
1989 return err;
1990 }
1991
1992 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1993 cache->mfc_flags |= MFC_NOTIFY;
Patrick McHardycb6a4e42010-04-26 16:02:08 +02001994 err = __ipmr_fill_mroute(mrt, skb, cache, rtm);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001995 read_unlock(&mrt_lock);
1996 return err;
1997}
1998
Patrick McHardycb6a4e42010-04-26 16:02:08 +02001999static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2000 u32 pid, u32 seq, struct mfc_cache *c)
2001{
2002 struct nlmsghdr *nlh;
2003 struct rtmsg *rtm;
2004
2005 nlh = nlmsg_put(skb, pid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI);
2006 if (nlh == NULL)
2007 return -EMSGSIZE;
2008
2009 rtm = nlmsg_data(nlh);
2010 rtm->rtm_family = RTNL_FAMILY_IPMR;
2011 rtm->rtm_dst_len = 32;
2012 rtm->rtm_src_len = 32;
2013 rtm->rtm_tos = 0;
2014 rtm->rtm_table = mrt->id;
2015 NLA_PUT_U32(skb, RTA_TABLE, mrt->id);
2016 rtm->rtm_type = RTN_MULTICAST;
2017 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2018 rtm->rtm_protocol = RTPROT_UNSPEC;
2019 rtm->rtm_flags = 0;
2020
2021 NLA_PUT_BE32(skb, RTA_SRC, c->mfc_origin);
2022 NLA_PUT_BE32(skb, RTA_DST, c->mfc_mcastgrp);
2023
2024 if (__ipmr_fill_mroute(mrt, skb, c, rtm) < 0)
2025 goto nla_put_failure;
2026
2027 return nlmsg_end(skb, nlh);
2028
2029nla_put_failure:
2030 nlmsg_cancel(skb, nlh);
2031 return -EMSGSIZE;
2032}
2033
2034static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2035{
2036 struct net *net = sock_net(skb->sk);
2037 struct mr_table *mrt;
2038 struct mfc_cache *mfc;
2039 unsigned int t = 0, s_t;
2040 unsigned int h = 0, s_h;
2041 unsigned int e = 0, s_e;
2042
2043 s_t = cb->args[0];
2044 s_h = cb->args[1];
2045 s_e = cb->args[2];
2046
2047 read_lock(&mrt_lock);
2048 ipmr_for_each_table(mrt, net) {
2049 if (t < s_t)
2050 goto next_table;
2051 if (t > s_t)
2052 s_h = 0;
2053 for (h = s_h; h < MFC_LINES; h++) {
2054 list_for_each_entry(mfc, &mrt->mfc_cache_array[h], list) {
2055 if (e < s_e)
2056 goto next_entry;
2057 if (ipmr_fill_mroute(mrt, skb,
2058 NETLINK_CB(cb->skb).pid,
2059 cb->nlh->nlmsg_seq,
2060 mfc) < 0)
2061 goto done;
2062next_entry:
2063 e++;
2064 }
2065 e = s_e = 0;
2066 }
2067 s_h = 0;
2068next_table:
2069 t++;
2070 }
2071done:
2072 read_unlock(&mrt_lock);
2073
2074 cb->args[2] = e;
2075 cb->args[1] = h;
2076 cb->args[0] = t;
2077
2078 return skb->len;
2079}
2080
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002081#ifdef CONFIG_PROC_FS
Linus Torvalds1da177e2005-04-16 15:20:36 -07002082/*
2083 * The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
2084 */
2085struct ipmr_vif_iter {
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002086 struct seq_net_private p;
Patrick McHardyf0ad0862010-04-13 05:03:23 +00002087 struct mr_table *mrt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002088 int ct;
2089};
2090
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002091static struct vif_device *ipmr_vif_seq_idx(struct net *net,
2092 struct ipmr_vif_iter *iter,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002093 loff_t pos)
2094{
Patrick McHardyf0ad0862010-04-13 05:03:23 +00002095 struct mr_table *mrt = iter->mrt;
Patrick McHardy0c122952010-04-13 05:03:22 +00002096
2097 for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
2098 if (!VIF_EXISTS(mrt, iter->ct))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002099 continue;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002100 if (pos-- == 0)
Patrick McHardy0c122952010-04-13 05:03:22 +00002101 return &mrt->vif_table[iter->ct];
Linus Torvalds1da177e2005-04-16 15:20:36 -07002102 }
2103 return NULL;
2104}
2105
2106static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
Stephen Hemmingerba93ef72008-01-21 17:28:59 -08002107 __acquires(mrt_lock)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002108{
Patrick McHardyf0ad0862010-04-13 05:03:23 +00002109 struct ipmr_vif_iter *iter = seq->private;
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002110 struct net *net = seq_file_net(seq);
Patrick McHardyf0ad0862010-04-13 05:03:23 +00002111 struct mr_table *mrt;
2112
2113 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
2114 if (mrt == NULL)
2115 return ERR_PTR(-ENOENT);
2116
2117 iter->mrt = mrt;
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002118
Linus Torvalds1da177e2005-04-16 15:20:36 -07002119 read_lock(&mrt_lock);
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002120 return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002121 : SEQ_START_TOKEN;
2122}
2123
2124static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2125{
2126 struct ipmr_vif_iter *iter = seq->private;
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002127 struct net *net = seq_file_net(seq);
Patrick McHardyf0ad0862010-04-13 05:03:23 +00002128 struct mr_table *mrt = iter->mrt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002129
2130 ++*pos;
2131 if (v == SEQ_START_TOKEN)
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002132 return ipmr_vif_seq_idx(net, iter, 0);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002133
Patrick McHardy0c122952010-04-13 05:03:22 +00002134 while (++iter->ct < mrt->maxvif) {
2135 if (!VIF_EXISTS(mrt, iter->ct))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002136 continue;
Patrick McHardy0c122952010-04-13 05:03:22 +00002137 return &mrt->vif_table[iter->ct];
Linus Torvalds1da177e2005-04-16 15:20:36 -07002138 }
2139 return NULL;
2140}
2141
2142static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
Stephen Hemmingerba93ef72008-01-21 17:28:59 -08002143 __releases(mrt_lock)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002144{
2145 read_unlock(&mrt_lock);
2146}
2147
2148static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
2149{
Patrick McHardyf0ad0862010-04-13 05:03:23 +00002150 struct ipmr_vif_iter *iter = seq->private;
2151 struct mr_table *mrt = iter->mrt;
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002152
Linus Torvalds1da177e2005-04-16 15:20:36 -07002153 if (v == SEQ_START_TOKEN) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002154 seq_puts(seq,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002155 "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n");
2156 } else {
2157 const struct vif_device *vif = v;
2158 const char *name = vif->dev ? vif->dev->name : "none";
2159
2160 seq_printf(seq,
2161 "%2Zd %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n",
Patrick McHardy0c122952010-04-13 05:03:22 +00002162 vif - mrt->vif_table,
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002163 name, vif->bytes_in, vif->pkt_in,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002164 vif->bytes_out, vif->pkt_out,
2165 vif->flags, vif->local, vif->remote);
2166 }
2167 return 0;
2168}
2169
Stephen Hemmingerf6908082007-03-12 14:34:29 -07002170static const struct seq_operations ipmr_vif_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002171 .start = ipmr_vif_seq_start,
2172 .next = ipmr_vif_seq_next,
2173 .stop = ipmr_vif_seq_stop,
2174 .show = ipmr_vif_seq_show,
2175};
2176
2177static int ipmr_vif_open(struct inode *inode, struct file *file)
2178{
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002179 return seq_open_net(inode, file, &ipmr_vif_seq_ops,
2180 sizeof(struct ipmr_vif_iter));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002181}
2182
Arjan van de Ven9a321442007-02-12 00:55:35 -08002183static const struct file_operations ipmr_vif_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002184 .owner = THIS_MODULE,
2185 .open = ipmr_vif_open,
2186 .read = seq_read,
2187 .llseek = seq_lseek,
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002188 .release = seq_release_net,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002189};
2190
2191struct ipmr_mfc_iter {
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002192 struct seq_net_private p;
Patrick McHardyf0ad0862010-04-13 05:03:23 +00002193 struct mr_table *mrt;
Patrick McHardy862465f2010-04-13 05:03:21 +00002194 struct list_head *cache;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002195 int ct;
2196};
2197
2198
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002199static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
2200 struct ipmr_mfc_iter *it, loff_t pos)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002201{
Patrick McHardyf0ad0862010-04-13 05:03:23 +00002202 struct mr_table *mrt = it->mrt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002203 struct mfc_cache *mfc;
2204
Linus Torvalds1da177e2005-04-16 15:20:36 -07002205 read_lock(&mrt_lock);
Patrick McHardy862465f2010-04-13 05:03:21 +00002206 for (it->ct = 0; it->ct < MFC_LINES; it->ct++) {
Patrick McHardy0c122952010-04-13 05:03:22 +00002207 it->cache = &mrt->mfc_cache_array[it->ct];
Patrick McHardy862465f2010-04-13 05:03:21 +00002208 list_for_each_entry(mfc, it->cache, list)
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002209 if (pos-- == 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002210 return mfc;
Patrick McHardy862465f2010-04-13 05:03:21 +00002211 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002212 read_unlock(&mrt_lock);
2213
Linus Torvalds1da177e2005-04-16 15:20:36 -07002214 spin_lock_bh(&mfc_unres_lock);
Patrick McHardy0c122952010-04-13 05:03:22 +00002215 it->cache = &mrt->mfc_unres_queue;
Patrick McHardy862465f2010-04-13 05:03:21 +00002216 list_for_each_entry(mfc, it->cache, list)
Patrick McHardye258beb2010-04-13 05:03:19 +00002217 if (pos-- == 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002218 return mfc;
2219 spin_unlock_bh(&mfc_unres_lock);
2220
2221 it->cache = NULL;
2222 return NULL;
2223}
2224
2225
2226static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
2227{
2228 struct ipmr_mfc_iter *it = seq->private;
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002229 struct net *net = seq_file_net(seq);
Patrick McHardyf0ad0862010-04-13 05:03:23 +00002230 struct mr_table *mrt;
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002231
Patrick McHardyf0ad0862010-04-13 05:03:23 +00002232 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
2233 if (mrt == NULL)
2234 return ERR_PTR(-ENOENT);
2235
2236 it->mrt = mrt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002237 it->cache = NULL;
2238 it->ct = 0;
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002239 return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002240 : SEQ_START_TOKEN;
2241}
2242
2243static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2244{
2245 struct mfc_cache *mfc = v;
2246 struct ipmr_mfc_iter *it = seq->private;
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002247 struct net *net = seq_file_net(seq);
Patrick McHardyf0ad0862010-04-13 05:03:23 +00002248 struct mr_table *mrt = it->mrt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002249
2250 ++*pos;
2251
2252 if (v == SEQ_START_TOKEN)
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002253 return ipmr_mfc_seq_idx(net, seq->private, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002254
Patrick McHardy862465f2010-04-13 05:03:21 +00002255 if (mfc->list.next != it->cache)
2256 return list_entry(mfc->list.next, struct mfc_cache, list);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002257
Patrick McHardy0c122952010-04-13 05:03:22 +00002258 if (it->cache == &mrt->mfc_unres_queue)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002259 goto end_of_list;
2260
Patrick McHardy0c122952010-04-13 05:03:22 +00002261 BUG_ON(it->cache != &mrt->mfc_cache_array[it->ct]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002262
2263 while (++it->ct < MFC_LINES) {
Patrick McHardy0c122952010-04-13 05:03:22 +00002264 it->cache = &mrt->mfc_cache_array[it->ct];
Patrick McHardy862465f2010-04-13 05:03:21 +00002265 if (list_empty(it->cache))
2266 continue;
2267 return list_first_entry(it->cache, struct mfc_cache, list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002268 }
2269
2270 /* exhausted cache_array, show unresolved */
2271 read_unlock(&mrt_lock);
Patrick McHardy0c122952010-04-13 05:03:22 +00002272 it->cache = &mrt->mfc_unres_queue;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002273 it->ct = 0;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002274
Linus Torvalds1da177e2005-04-16 15:20:36 -07002275 spin_lock_bh(&mfc_unres_lock);
Patrick McHardy862465f2010-04-13 05:03:21 +00002276 if (!list_empty(it->cache))
2277 return list_first_entry(it->cache, struct mfc_cache, list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002278
2279 end_of_list:
2280 spin_unlock_bh(&mfc_unres_lock);
2281 it->cache = NULL;
2282
2283 return NULL;
2284}
2285
2286static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
2287{
2288 struct ipmr_mfc_iter *it = seq->private;
Patrick McHardyf0ad0862010-04-13 05:03:23 +00002289 struct mr_table *mrt = it->mrt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002290
Patrick McHardy0c122952010-04-13 05:03:22 +00002291 if (it->cache == &mrt->mfc_unres_queue)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002292 spin_unlock_bh(&mfc_unres_lock);
Patrick McHardy0c122952010-04-13 05:03:22 +00002293 else if (it->cache == &mrt->mfc_cache_array[it->ct])
Linus Torvalds1da177e2005-04-16 15:20:36 -07002294 read_unlock(&mrt_lock);
2295}
2296
2297static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
2298{
2299 int n;
2300
2301 if (v == SEQ_START_TOKEN) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002302 seq_puts(seq,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002303 "Group Origin Iif Pkts Bytes Wrong Oifs\n");
2304 } else {
2305 const struct mfc_cache *mfc = v;
2306 const struct ipmr_mfc_iter *it = seq->private;
Patrick McHardyf0ad0862010-04-13 05:03:23 +00002307 const struct mr_table *mrt = it->mrt;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002308
Eric Dumazet0eae88f2010-04-20 19:06:52 -07002309 seq_printf(seq, "%08X %08X %-3hd",
2310 (__force u32) mfc->mfc_mcastgrp,
2311 (__force u32) mfc->mfc_origin,
Benjamin Thery1ea472e2008-12-03 22:21:47 -08002312 mfc->mfc_parent);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002313
Patrick McHardy0c122952010-04-13 05:03:22 +00002314 if (it->cache != &mrt->mfc_unres_queue) {
Benjamin Thery1ea472e2008-12-03 22:21:47 -08002315 seq_printf(seq, " %8lu %8lu %8lu",
2316 mfc->mfc_un.res.pkt,
2317 mfc->mfc_un.res.bytes,
2318 mfc->mfc_un.res.wrong_if);
Stephen Hemminger132adf52007-03-08 20:44:43 -08002319 for (n = mfc->mfc_un.res.minvif;
2320 n < mfc->mfc_un.res.maxvif; n++ ) {
Patrick McHardy0c122952010-04-13 05:03:22 +00002321 if (VIF_EXISTS(mrt, n) &&
Benjamin Therycf958ae32009-01-22 04:56:16 +00002322 mfc->mfc_un.res.ttls[n] < 255)
2323 seq_printf(seq,
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002324 " %2d:%-3d",
Linus Torvalds1da177e2005-04-16 15:20:36 -07002325 n, mfc->mfc_un.res.ttls[n]);
2326 }
Benjamin Thery1ea472e2008-12-03 22:21:47 -08002327 } else {
2328 /* unresolved mfc_caches don't contain
2329 * pkt, bytes and wrong_if values
2330 */
2331 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002332 }
2333 seq_putc(seq, '\n');
2334 }
2335 return 0;
2336}
2337
Stephen Hemmingerf6908082007-03-12 14:34:29 -07002338static const struct seq_operations ipmr_mfc_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002339 .start = ipmr_mfc_seq_start,
2340 .next = ipmr_mfc_seq_next,
2341 .stop = ipmr_mfc_seq_stop,
2342 .show = ipmr_mfc_seq_show,
2343};
2344
2345static int ipmr_mfc_open(struct inode *inode, struct file *file)
2346{
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002347 return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
2348 sizeof(struct ipmr_mfc_iter));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002349}
2350
Arjan van de Ven9a321442007-02-12 00:55:35 -08002351static const struct file_operations ipmr_mfc_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002352 .owner = THIS_MODULE,
2353 .open = ipmr_mfc_open,
2354 .read = seq_read,
2355 .llseek = seq_lseek,
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002356 .release = seq_release_net,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002357};
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002358#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07002359
2360#ifdef CONFIG_IP_PIMSM_V2
Alexey Dobriyan32613092009-09-14 12:21:47 +00002361static const struct net_protocol pim_protocol = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002362 .handler = pim_rcv,
Tom Goff403dbb92009-06-14 03:16:13 -07002363 .netns_ok = 1,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002364};
2365#endif
2366
2367
2368/*
2369 * Setup for IP multicast routing
2370 */
Benjamin Therycf958ae32009-01-22 04:56:16 +00002371static int __net_init ipmr_net_init(struct net *net)
2372{
Patrick McHardyf0ad0862010-04-13 05:03:23 +00002373 int err;
Benjamin Therycf958ae32009-01-22 04:56:16 +00002374
Patrick McHardyf0ad0862010-04-13 05:03:23 +00002375 err = ipmr_rules_init(net);
2376 if (err < 0)
Benjamin Therycf958ae32009-01-22 04:56:16 +00002377 goto fail;
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002378
2379#ifdef CONFIG_PROC_FS
2380 err = -ENOMEM;
2381 if (!proc_net_fops_create(net, "ip_mr_vif", 0, &ipmr_vif_fops))
2382 goto proc_vif_fail;
2383 if (!proc_net_fops_create(net, "ip_mr_cache", 0, &ipmr_mfc_fops))
2384 goto proc_cache_fail;
2385#endif
Benjamin Thery2bb8b262009-01-22 04:56:18 +00002386 return 0;
2387
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002388#ifdef CONFIG_PROC_FS
2389proc_cache_fail:
2390 proc_net_remove(net, "ip_mr_vif");
2391proc_vif_fail:
Patrick McHardyf0ad0862010-04-13 05:03:23 +00002392 ipmr_rules_exit(net);
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002393#endif
Benjamin Therycf958ae32009-01-22 04:56:16 +00002394fail:
2395 return err;
2396}
2397
2398static void __net_exit ipmr_net_exit(struct net *net)
2399{
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002400#ifdef CONFIG_PROC_FS
2401 proc_net_remove(net, "ip_mr_cache");
2402 proc_net_remove(net, "ip_mr_vif");
2403#endif
Patrick McHardyf0ad0862010-04-13 05:03:23 +00002404 ipmr_rules_exit(net);
Benjamin Therycf958ae32009-01-22 04:56:16 +00002405}
2406
2407static struct pernet_operations ipmr_net_ops = {
2408 .init = ipmr_net_init,
2409 .exit = ipmr_net_exit,
2410};
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002411
Wang Chen03d2f892008-07-03 12:13:36 +08002412int __init ip_mr_init(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002413{
Wang Chen03d2f892008-07-03 12:13:36 +08002414 int err;
2415
Linus Torvalds1da177e2005-04-16 15:20:36 -07002416 mrt_cachep = kmem_cache_create("ip_mrt_cache",
2417 sizeof(struct mfc_cache),
Alexey Dobriyane5d679f332006-08-26 19:25:52 -07002418 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
Paul Mundt20c2df82007-07-20 10:11:58 +09002419 NULL);
Wang Chen03d2f892008-07-03 12:13:36 +08002420 if (!mrt_cachep)
2421 return -ENOMEM;
2422
Benjamin Therycf958ae32009-01-22 04:56:16 +00002423 err = register_pernet_subsys(&ipmr_net_ops);
2424 if (err)
2425 goto reg_pernet_fail;
2426
Wang Chen03d2f892008-07-03 12:13:36 +08002427 err = register_netdevice_notifier(&ip_mr_notifier);
2428 if (err)
2429 goto reg_notif_fail;
Tom Goff403dbb92009-06-14 03:16:13 -07002430#ifdef CONFIG_IP_PIMSM_V2
2431 if (inet_add_protocol(&pim_protocol, IPPROTO_PIM) < 0) {
2432 printk(KERN_ERR "ip_mr_init: can't add PIM protocol\n");
2433 err = -EAGAIN;
2434 goto add_proto_fail;
2435 }
2436#endif
Patrick McHardycb6a4e42010-04-26 16:02:08 +02002437 rtnl_register(RTNL_FAMILY_IPMR, RTM_GETROUTE, NULL, ipmr_rtm_dumproute);
Wang Chen03d2f892008-07-03 12:13:36 +08002438 return 0;
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002439
Tom Goff403dbb92009-06-14 03:16:13 -07002440#ifdef CONFIG_IP_PIMSM_V2
2441add_proto_fail:
2442 unregister_netdevice_notifier(&ip_mr_notifier);
2443#endif
Benjamin Theryc3e38892008-11-19 14:07:41 -08002444reg_notif_fail:
Benjamin Therycf958ae32009-01-22 04:56:16 +00002445 unregister_pernet_subsys(&ipmr_net_ops);
2446reg_pernet_fail:
Benjamin Theryc3e38892008-11-19 14:07:41 -08002447 kmem_cache_destroy(mrt_cachep);
Wang Chen03d2f892008-07-03 12:13:36 +08002448 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002449}