blob: eddfd12f55b8b96e505385b3605db34c016d0fb3 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * IP multicast routing support for mrouted 3.6/3.8
3 *
Alan Cox113aa832008-10-13 19:01:08 -07004 * (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk>
Linus Torvalds1da177e2005-04-16 15:20:36 -07005 * Linux Consultancy and Custom Driver Development
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 *
Linus Torvalds1da177e2005-04-16 15:20:36 -070012 * Fixes:
13 * Michael Chastain : Incorrect size of copying.
14 * Alan Cox : Added the cache manager code
15 * Alan Cox : Fixed the clone/copy bug and device race.
16 * Mike McLagan : Routing by source
17 * Malcolm Beattie : Buffer handling fixes.
18 * Alexey Kuznetsov : Double buffer free and other fixes.
19 * SVR Anand : Fixed several multicast bugs and problems.
20 * Alexey Kuznetsov : Status, optimisations and more.
21 * Brad Parker : Better behaviour on mrouted upcall
22 * overflow.
23 * Carlos Picoto : PIMv1 Support
24 * Pavlin Ivanov Radoslavov: PIMv2 Registers must checksum only PIM header
25 * Relax this requrement to work with older peers.
26 *
27 */
28
Linus Torvalds1da177e2005-04-16 15:20:36 -070029#include <asm/system.h>
30#include <asm/uaccess.h>
31#include <linux/types.h>
Randy Dunlap4fc268d2006-01-11 12:17:47 -080032#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070033#include <linux/errno.h>
34#include <linux/timer.h>
35#include <linux/mm.h>
36#include <linux/kernel.h>
37#include <linux/fcntl.h>
38#include <linux/stat.h>
39#include <linux/socket.h>
40#include <linux/in.h>
41#include <linux/inet.h>
42#include <linux/netdevice.h>
43#include <linux/inetdevice.h>
44#include <linux/igmp.h>
45#include <linux/proc_fs.h>
46#include <linux/seq_file.h>
47#include <linux/mroute.h>
48#include <linux/init.h>
Kris Katterjohn46f25df2006-01-05 16:35:42 -080049#include <linux/if_ether.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090050#include <linux/slab.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020051#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070052#include <net/ip.h>
53#include <net/protocol.h>
54#include <linux/skbuff.h>
Arnaldo Carvalho de Melo14c85022005-12-27 02:43:12 -020055#include <net/route.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070056#include <net/sock.h>
57#include <net/icmp.h>
58#include <net/udp.h>
59#include <net/raw.h>
60#include <linux/notifier.h>
61#include <linux/if_arp.h>
62#include <linux/netfilter_ipv4.h>
63#include <net/ipip.h>
64#include <net/checksum.h>
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -070065#include <net/netlink.h>
Patrick McHardyf0ad0862010-04-13 05:03:23 +000066#include <net/fib_rules.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070067
68#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
69#define CONFIG_IP_PIMSM 1
70#endif
71
Patrick McHardy0c122952010-04-13 05:03:22 +000072struct mr_table {
Patrick McHardyf0ad0862010-04-13 05:03:23 +000073 struct list_head list;
Patrick McHardy8de53df2010-04-15 13:29:28 +020074#ifdef CONFIG_NET_NS
75 struct net *net;
76#endif
Patrick McHardyf0ad0862010-04-13 05:03:23 +000077 u32 id;
Patrick McHardy0c122952010-04-13 05:03:22 +000078 struct sock *mroute_sk;
79 struct timer_list ipmr_expire_timer;
80 struct list_head mfc_unres_queue;
81 struct list_head mfc_cache_array[MFC_LINES];
82 struct vif_device vif_table[MAXVIFS];
83 int maxvif;
84 atomic_t cache_resolve_queue_len;
85 int mroute_do_assert;
86 int mroute_do_pim;
87#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
88 int mroute_reg_vif_num;
89#endif
90};
91
Patrick McHardyf0ad0862010-04-13 05:03:23 +000092struct ipmr_rule {
93 struct fib_rule common;
94};
95
96struct ipmr_result {
97 struct mr_table *mrt;
98};
99
Linus Torvalds1da177e2005-04-16 15:20:36 -0700100/* Big lock, protecting vif table, mrt cache and mroute socket state.
101 Note that the changes are semaphored via rtnl_lock.
102 */
103
104static DEFINE_RWLOCK(mrt_lock);
105
106/*
107 * Multicast router control variables
108 */
109
Patrick McHardy0c122952010-04-13 05:03:22 +0000110#define VIF_EXISTS(_mrt, _idx) ((_mrt)->vif_table[_idx].dev != NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700111
Linus Torvalds1da177e2005-04-16 15:20:36 -0700112/* Special spinlock for queue of unresolved entries */
113static DEFINE_SPINLOCK(mfc_unres_lock);
114
115/* We return to original Alan's scheme. Hash table of resolved
116 entries is changed only in process context and protected
117 with weak lock mrt_lock. Queue of unresolved entries is protected
118 with strong spinlock mfc_unres_lock.
119
120 In this case data path is free of exclusive locks at all.
121 */
122
Christoph Lametere18b8902006-12-06 20:33:20 -0800123static struct kmem_cache *mrt_cachep __read_mostly;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700124
Patrick McHardyf0ad0862010-04-13 05:03:23 +0000125static struct mr_table *ipmr_new_table(struct net *net, u32 id);
Patrick McHardy0c122952010-04-13 05:03:22 +0000126static int ip_mr_forward(struct net *net, struct mr_table *mrt,
127 struct sk_buff *skb, struct mfc_cache *cache,
128 int local);
129static int ipmr_cache_report(struct mr_table *mrt,
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000130 struct sk_buff *pkt, vifi_t vifi, int assert);
Patrick McHardycb6a4e42010-04-26 16:02:08 +0200131static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
132 struct mfc_cache *c, struct rtmsg *rtm);
Patrick McHardyf0ad0862010-04-13 05:03:23 +0000133static void ipmr_expire_process(unsigned long arg);
134
135#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
136#define ipmr_for_each_table(mrt, net) \
137 list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list)
138
139static struct mr_table *ipmr_get_table(struct net *net, u32 id)
140{
141 struct mr_table *mrt;
142
143 ipmr_for_each_table(mrt, net) {
144 if (mrt->id == id)
145 return mrt;
146 }
147 return NULL;
148}
149
150static int ipmr_fib_lookup(struct net *net, struct flowi *flp,
151 struct mr_table **mrt)
152{
153 struct ipmr_result res;
154 struct fib_lookup_arg arg = { .result = &res, };
155 int err;
156
157 err = fib_rules_lookup(net->ipv4.mr_rules_ops, flp, 0, &arg);
158 if (err < 0)
159 return err;
160 *mrt = res.mrt;
161 return 0;
162}
163
164static int ipmr_rule_action(struct fib_rule *rule, struct flowi *flp,
165 int flags, struct fib_lookup_arg *arg)
166{
167 struct ipmr_result *res = arg->result;
168 struct mr_table *mrt;
169
170 switch (rule->action) {
171 case FR_ACT_TO_TBL:
172 break;
173 case FR_ACT_UNREACHABLE:
174 return -ENETUNREACH;
175 case FR_ACT_PROHIBIT:
176 return -EACCES;
177 case FR_ACT_BLACKHOLE:
178 default:
179 return -EINVAL;
180 }
181
182 mrt = ipmr_get_table(rule->fr_net, rule->table);
183 if (mrt == NULL)
184 return -EAGAIN;
185 res->mrt = mrt;
186 return 0;
187}
188
189static int ipmr_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
190{
191 return 1;
192}
193
194static const struct nla_policy ipmr_rule_policy[FRA_MAX + 1] = {
195 FRA_GENERIC_POLICY,
196};
197
198static int ipmr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
199 struct fib_rule_hdr *frh, struct nlattr **tb)
200{
201 return 0;
202}
203
204static int ipmr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
205 struct nlattr **tb)
206{
207 return 1;
208}
209
210static int ipmr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
211 struct fib_rule_hdr *frh)
212{
213 frh->dst_len = 0;
214 frh->src_len = 0;
215 frh->tos = 0;
216 return 0;
217}
218
Patrick McHardy3d0c9c42010-04-26 16:02:04 +0200219static const struct fib_rules_ops __net_initdata ipmr_rules_ops_template = {
Patrick McHardy25239ce2010-04-26 16:02:05 +0200220 .family = RTNL_FAMILY_IPMR,
Patrick McHardyf0ad0862010-04-13 05:03:23 +0000221 .rule_size = sizeof(struct ipmr_rule),
222 .addr_size = sizeof(u32),
223 .action = ipmr_rule_action,
224 .match = ipmr_rule_match,
225 .configure = ipmr_rule_configure,
226 .compare = ipmr_rule_compare,
227 .default_pref = fib_default_rule_pref,
228 .fill = ipmr_rule_fill,
229 .nlgroup = RTNLGRP_IPV4_RULE,
230 .policy = ipmr_rule_policy,
231 .owner = THIS_MODULE,
232};
233
234static int __net_init ipmr_rules_init(struct net *net)
235{
236 struct fib_rules_ops *ops;
237 struct mr_table *mrt;
238 int err;
239
240 ops = fib_rules_register(&ipmr_rules_ops_template, net);
241 if (IS_ERR(ops))
242 return PTR_ERR(ops);
243
244 INIT_LIST_HEAD(&net->ipv4.mr_tables);
245
246 mrt = ipmr_new_table(net, RT_TABLE_DEFAULT);
247 if (mrt == NULL) {
248 err = -ENOMEM;
249 goto err1;
250 }
251
252 err = fib_default_rule_add(ops, 0x7fff, RT_TABLE_DEFAULT, 0);
253 if (err < 0)
254 goto err2;
255
256 net->ipv4.mr_rules_ops = ops;
257 return 0;
258
259err2:
260 kfree(mrt);
261err1:
262 fib_rules_unregister(ops);
263 return err;
264}
265
266static void __net_exit ipmr_rules_exit(struct net *net)
267{
268 struct mr_table *mrt, *next;
269
270 list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list)
271 kfree(mrt);
272 fib_rules_unregister(net->ipv4.mr_rules_ops);
273}
274#else
275#define ipmr_for_each_table(mrt, net) \
276 for (mrt = net->ipv4.mrt; mrt; mrt = NULL)
277
278static struct mr_table *ipmr_get_table(struct net *net, u32 id)
279{
280 return net->ipv4.mrt;
281}
282
283static int ipmr_fib_lookup(struct net *net, struct flowi *flp,
284 struct mr_table **mrt)
285{
286 *mrt = net->ipv4.mrt;
287 return 0;
288}
289
290static int __net_init ipmr_rules_init(struct net *net)
291{
292 net->ipv4.mrt = ipmr_new_table(net, RT_TABLE_DEFAULT);
293 return net->ipv4.mrt ? 0 : -ENOMEM;
294}
295
296static void __net_exit ipmr_rules_exit(struct net *net)
297{
298 kfree(net->ipv4.mrt);
299}
300#endif
301
302static struct mr_table *ipmr_new_table(struct net *net, u32 id)
303{
304 struct mr_table *mrt;
305 unsigned int i;
306
307 mrt = ipmr_get_table(net, id);
308 if (mrt != NULL)
309 return mrt;
310
311 mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
312 if (mrt == NULL)
313 return NULL;
Patrick McHardy8de53df2010-04-15 13:29:28 +0200314 write_pnet(&mrt->net, net);
Patrick McHardyf0ad0862010-04-13 05:03:23 +0000315 mrt->id = id;
316
317 /* Forwarding cache */
318 for (i = 0; i < MFC_LINES; i++)
319 INIT_LIST_HEAD(&mrt->mfc_cache_array[i]);
320
321 INIT_LIST_HEAD(&mrt->mfc_unres_queue);
322
323 setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process,
324 (unsigned long)mrt);
325
326#ifdef CONFIG_IP_PIMSM
327 mrt->mroute_reg_vif_num = -1;
328#endif
329#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
330 list_add_tail_rcu(&mrt->list, &net->ipv4.mr_tables);
331#endif
332 return mrt;
333}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700334
Linus Torvalds1da177e2005-04-16 15:20:36 -0700335/* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
336
Wang Chend6070322008-07-14 20:55:26 -0700337static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
338{
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000339 struct net *net = dev_net(dev);
340
Wang Chend6070322008-07-14 20:55:26 -0700341 dev_close(dev);
342
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000343 dev = __dev_get_by_name(net, "tunl0");
Wang Chend6070322008-07-14 20:55:26 -0700344 if (dev) {
Stephen Hemminger5bc3eb72008-11-19 21:52:05 -0800345 const struct net_device_ops *ops = dev->netdev_ops;
Wang Chend6070322008-07-14 20:55:26 -0700346 struct ifreq ifr;
Wang Chend6070322008-07-14 20:55:26 -0700347 struct ip_tunnel_parm p;
348
349 memset(&p, 0, sizeof(p));
350 p.iph.daddr = v->vifc_rmt_addr.s_addr;
351 p.iph.saddr = v->vifc_lcl_addr.s_addr;
352 p.iph.version = 4;
353 p.iph.ihl = 5;
354 p.iph.protocol = IPPROTO_IPIP;
355 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
356 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
357
Stephen Hemminger5bc3eb72008-11-19 21:52:05 -0800358 if (ops->ndo_do_ioctl) {
359 mm_segment_t oldfs = get_fs();
360
361 set_fs(KERNEL_DS);
362 ops->ndo_do_ioctl(dev, &ifr, SIOCDELTUNNEL);
363 set_fs(oldfs);
364 }
Wang Chend6070322008-07-14 20:55:26 -0700365 }
366}
367
Linus Torvalds1da177e2005-04-16 15:20:36 -0700368static
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000369struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700370{
371 struct net_device *dev;
372
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000373 dev = __dev_get_by_name(net, "tunl0");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700374
375 if (dev) {
Stephen Hemminger5bc3eb72008-11-19 21:52:05 -0800376 const struct net_device_ops *ops = dev->netdev_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700377 int err;
378 struct ifreq ifr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700379 struct ip_tunnel_parm p;
380 struct in_device *in_dev;
381
382 memset(&p, 0, sizeof(p));
383 p.iph.daddr = v->vifc_rmt_addr.s_addr;
384 p.iph.saddr = v->vifc_lcl_addr.s_addr;
385 p.iph.version = 4;
386 p.iph.ihl = 5;
387 p.iph.protocol = IPPROTO_IPIP;
388 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
Stephen Hemmingerba93ef72008-01-21 17:28:59 -0800389 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700390
Stephen Hemminger5bc3eb72008-11-19 21:52:05 -0800391 if (ops->ndo_do_ioctl) {
392 mm_segment_t oldfs = get_fs();
393
394 set_fs(KERNEL_DS);
395 err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL);
396 set_fs(oldfs);
397 } else
398 err = -EOPNOTSUPP;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700399
400 dev = NULL;
401
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000402 if (err == 0 &&
403 (dev = __dev_get_by_name(net, p.name)) != NULL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700404 dev->flags |= IFF_MULTICAST;
405
Herbert Xue5ed6392005-10-03 14:35:55 -0700406 in_dev = __in_dev_get_rtnl(dev);
Herbert Xu71e27da2007-06-04 23:36:06 -0700407 if (in_dev == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700408 goto failure;
Herbert Xu71e27da2007-06-04 23:36:06 -0700409
410 ipv4_devconf_setall(in_dev);
411 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700412
413 if (dev_open(dev))
414 goto failure;
Wang Chen7dc00c82008-07-14 20:56:34 -0700415 dev_hold(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700416 }
417 }
418 return dev;
419
420failure:
421 /* allow the register to be completed before unregistering. */
422 rtnl_unlock();
423 rtnl_lock();
424
425 unregister_netdevice(dev);
426 return NULL;
427}
428
429#ifdef CONFIG_IP_PIMSM
430
Stephen Hemminger6fef4c02009-08-31 19:50:41 +0000431static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700432{
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000433 struct net *net = dev_net(dev);
Patrick McHardyf0ad0862010-04-13 05:03:23 +0000434 struct mr_table *mrt;
435 struct flowi fl = {
436 .oif = dev->ifindex,
437 .iif = skb->skb_iif,
438 .mark = skb->mark,
439 };
440 int err;
441
442 err = ipmr_fib_lookup(net, &fl, &mrt);
443 if (err < 0)
444 return err;
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000445
Linus Torvalds1da177e2005-04-16 15:20:36 -0700446 read_lock(&mrt_lock);
Pavel Emelyanovcf3677a2008-05-21 14:17:33 -0700447 dev->stats.tx_bytes += skb->len;
448 dev->stats.tx_packets++;
Patrick McHardy0c122952010-04-13 05:03:22 +0000449 ipmr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, IGMPMSG_WHOLEPKT);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700450 read_unlock(&mrt_lock);
451 kfree_skb(skb);
Patrick McHardy6ed10652009-06-23 06:03:08 +0000452 return NETDEV_TX_OK;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700453}
454
Stephen Hemminger007c3832008-11-20 20:28:35 -0800455static const struct net_device_ops reg_vif_netdev_ops = {
456 .ndo_start_xmit = reg_vif_xmit,
457};
458
Linus Torvalds1da177e2005-04-16 15:20:36 -0700459static void reg_vif_setup(struct net_device *dev)
460{
461 dev->type = ARPHRD_PIMREG;
Kris Katterjohn46f25df2006-01-05 16:35:42 -0800462 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700463 dev->flags = IFF_NOARP;
Stephen Hemminger007c3832008-11-20 20:28:35 -0800464 dev->netdev_ops = &reg_vif_netdev_ops,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700465 dev->destructor = free_netdev;
Tom Goff403dbb92009-06-14 03:16:13 -0700466 dev->features |= NETIF_F_NETNS_LOCAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700467}
468
Patrick McHardyf0ad0862010-04-13 05:03:23 +0000469static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700470{
471 struct net_device *dev;
472 struct in_device *in_dev;
Patrick McHardyf0ad0862010-04-13 05:03:23 +0000473 char name[IFNAMSIZ];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700474
Patrick McHardyf0ad0862010-04-13 05:03:23 +0000475 if (mrt->id == RT_TABLE_DEFAULT)
476 sprintf(name, "pimreg");
477 else
478 sprintf(name, "pimreg%u", mrt->id);
479
480 dev = alloc_netdev(0, name, reg_vif_setup);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700481
482 if (dev == NULL)
483 return NULL;
484
Tom Goff403dbb92009-06-14 03:16:13 -0700485 dev_net_set(dev, net);
486
Linus Torvalds1da177e2005-04-16 15:20:36 -0700487 if (register_netdevice(dev)) {
488 free_netdev(dev);
489 return NULL;
490 }
491 dev->iflink = 0;
492
Herbert Xu71e27da2007-06-04 23:36:06 -0700493 rcu_read_lock();
494 if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
495 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700496 goto failure;
Herbert Xu71e27da2007-06-04 23:36:06 -0700497 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700498
Herbert Xu71e27da2007-06-04 23:36:06 -0700499 ipv4_devconf_setall(in_dev);
500 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
501 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700502
503 if (dev_open(dev))
504 goto failure;
505
Wang Chen7dc00c82008-07-14 20:56:34 -0700506 dev_hold(dev);
507
Linus Torvalds1da177e2005-04-16 15:20:36 -0700508 return dev;
509
510failure:
511 /* allow the register to be completed before unregistering. */
512 rtnl_unlock();
513 rtnl_lock();
514
515 unregister_netdevice(dev);
516 return NULL;
517}
518#endif
519
520/*
521 * Delete a VIF entry
Wang Chen7dc00c82008-07-14 20:56:34 -0700522 * @notify: Set to 1, if the caller is a notifier_call
Linus Torvalds1da177e2005-04-16 15:20:36 -0700523 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900524
Patrick McHardy0c122952010-04-13 05:03:22 +0000525static int vif_delete(struct mr_table *mrt, int vifi, int notify,
Eric Dumazetd17fa6f2009-10-28 05:21:38 +0000526 struct list_head *head)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700527{
528 struct vif_device *v;
529 struct net_device *dev;
530 struct in_device *in_dev;
531
Patrick McHardy0c122952010-04-13 05:03:22 +0000532 if (vifi < 0 || vifi >= mrt->maxvif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700533 return -EADDRNOTAVAIL;
534
Patrick McHardy0c122952010-04-13 05:03:22 +0000535 v = &mrt->vif_table[vifi];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700536
537 write_lock_bh(&mrt_lock);
538 dev = v->dev;
539 v->dev = NULL;
540
541 if (!dev) {
542 write_unlock_bh(&mrt_lock);
543 return -EADDRNOTAVAIL;
544 }
545
546#ifdef CONFIG_IP_PIMSM
Patrick McHardy0c122952010-04-13 05:03:22 +0000547 if (vifi == mrt->mroute_reg_vif_num)
548 mrt->mroute_reg_vif_num = -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700549#endif
550
Patrick McHardy0c122952010-04-13 05:03:22 +0000551 if (vifi+1 == mrt->maxvif) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700552 int tmp;
553 for (tmp=vifi-1; tmp>=0; tmp--) {
Patrick McHardy0c122952010-04-13 05:03:22 +0000554 if (VIF_EXISTS(mrt, tmp))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700555 break;
556 }
Patrick McHardy0c122952010-04-13 05:03:22 +0000557 mrt->maxvif = tmp+1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700558 }
559
560 write_unlock_bh(&mrt_lock);
561
562 dev_set_allmulti(dev, -1);
563
Herbert Xue5ed6392005-10-03 14:35:55 -0700564 if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
Herbert Xu42f811b2007-06-04 23:34:44 -0700565 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700566 ip_rt_multicast_event(in_dev);
567 }
568
Wang Chen7dc00c82008-07-14 20:56:34 -0700569 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify)
Eric Dumazetd17fa6f2009-10-28 05:21:38 +0000570 unregister_netdevice_queue(dev, head);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700571
572 dev_put(dev);
573 return 0;
574}
575
Benjamin Thery5c0a66f2009-01-22 04:56:17 +0000576static inline void ipmr_cache_free(struct mfc_cache *c)
577{
Benjamin Thery5c0a66f2009-01-22 04:56:17 +0000578 kmem_cache_free(mrt_cachep, c);
579}
580
Linus Torvalds1da177e2005-04-16 15:20:36 -0700581/* Destroy an unresolved cache entry, killing queued skbs
582 and reporting error to netlink readers.
583 */
584
Patrick McHardy0c122952010-04-13 05:03:22 +0000585static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700586{
Patrick McHardy8de53df2010-04-15 13:29:28 +0200587 struct net *net = read_pnet(&mrt->net);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700588 struct sk_buff *skb;
Patrick McHardy9ef1d4c2005-06-28 12:55:30 -0700589 struct nlmsgerr *e;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700590
Patrick McHardy0c122952010-04-13 05:03:22 +0000591 atomic_dec(&mrt->cache_resolve_queue_len);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700592
Jianjun Kongc354e122008-11-03 00:28:02 -0800593 while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) {
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700594 if (ip_hdr(skb)->version == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700595 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
596 nlh->nlmsg_type = NLMSG_ERROR;
597 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
598 skb_trim(skb, nlh->nlmsg_len);
Patrick McHardy9ef1d4c2005-06-28 12:55:30 -0700599 e = NLMSG_DATA(nlh);
600 e->error = -ETIMEDOUT;
601 memset(&e->msg, 0, sizeof(e->msg));
Thomas Graf2942e902006-08-15 00:30:25 -0700602
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000603 rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700604 } else
605 kfree_skb(skb);
606 }
607
Benjamin Thery5c0a66f2009-01-22 04:56:17 +0000608 ipmr_cache_free(c);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700609}
610
611
Patrick McHardye258beb2010-04-13 05:03:19 +0000612/* Timer process for the unresolved queue. */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700613
Patrick McHardye258beb2010-04-13 05:03:19 +0000614static void ipmr_expire_process(unsigned long arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700615{
Patrick McHardy0c122952010-04-13 05:03:22 +0000616 struct mr_table *mrt = (struct mr_table *)arg;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700617 unsigned long now;
618 unsigned long expires;
Patrick McHardy862465f2010-04-13 05:03:21 +0000619 struct mfc_cache *c, *next;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700620
621 if (!spin_trylock(&mfc_unres_lock)) {
Patrick McHardy0c122952010-04-13 05:03:22 +0000622 mod_timer(&mrt->ipmr_expire_timer, jiffies+HZ/10);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700623 return;
624 }
625
Patrick McHardy0c122952010-04-13 05:03:22 +0000626 if (list_empty(&mrt->mfc_unres_queue))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700627 goto out;
628
629 now = jiffies;
630 expires = 10*HZ;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700631
Patrick McHardy0c122952010-04-13 05:03:22 +0000632 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700633 if (time_after(c->mfc_un.unres.expires, now)) {
634 unsigned long interval = c->mfc_un.unres.expires - now;
635 if (interval < expires)
636 expires = interval;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700637 continue;
638 }
639
Patrick McHardy862465f2010-04-13 05:03:21 +0000640 list_del(&c->list);
Patrick McHardy0c122952010-04-13 05:03:22 +0000641 ipmr_destroy_unres(mrt, c);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700642 }
643
Patrick McHardy0c122952010-04-13 05:03:22 +0000644 if (!list_empty(&mrt->mfc_unres_queue))
645 mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700646
647out:
648 spin_unlock(&mfc_unres_lock);
649}
650
651/* Fill oifs list. It is called under write locked mrt_lock. */
652
Patrick McHardy0c122952010-04-13 05:03:22 +0000653static void ipmr_update_thresholds(struct mr_table *mrt, struct mfc_cache *cache,
Patrick McHardyd658f8a2010-04-13 05:03:20 +0000654 unsigned char *ttls)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700655{
656 int vifi;
657
658 cache->mfc_un.res.minvif = MAXVIFS;
659 cache->mfc_un.res.maxvif = 0;
660 memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
661
Patrick McHardy0c122952010-04-13 05:03:22 +0000662 for (vifi = 0; vifi < mrt->maxvif; vifi++) {
663 if (VIF_EXISTS(mrt, vifi) &&
Benjamin Therycf958ae32009-01-22 04:56:16 +0000664 ttls[vifi] && ttls[vifi] < 255) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700665 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
666 if (cache->mfc_un.res.minvif > vifi)
667 cache->mfc_un.res.minvif = vifi;
668 if (cache->mfc_un.res.maxvif <= vifi)
669 cache->mfc_un.res.maxvif = vifi + 1;
670 }
671 }
672}
673
Patrick McHardy0c122952010-04-13 05:03:22 +0000674static int vif_add(struct net *net, struct mr_table *mrt,
675 struct vifctl *vifc, int mrtsock)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700676{
677 int vifi = vifc->vifc_vifi;
Patrick McHardy0c122952010-04-13 05:03:22 +0000678 struct vif_device *v = &mrt->vif_table[vifi];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700679 struct net_device *dev;
680 struct in_device *in_dev;
Wang Chend6070322008-07-14 20:55:26 -0700681 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700682
683 /* Is vif busy ? */
Patrick McHardy0c122952010-04-13 05:03:22 +0000684 if (VIF_EXISTS(mrt, vifi))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700685 return -EADDRINUSE;
686
687 switch (vifc->vifc_flags) {
688#ifdef CONFIG_IP_PIMSM
689 case VIFF_REGISTER:
690 /*
691 * Special Purpose VIF in PIM
692 * All the packets will be sent to the daemon
693 */
Patrick McHardy0c122952010-04-13 05:03:22 +0000694 if (mrt->mroute_reg_vif_num >= 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700695 return -EADDRINUSE;
Patrick McHardyf0ad0862010-04-13 05:03:23 +0000696 dev = ipmr_reg_vif(net, mrt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700697 if (!dev)
698 return -ENOBUFS;
Wang Chend6070322008-07-14 20:55:26 -0700699 err = dev_set_allmulti(dev, 1);
700 if (err) {
701 unregister_netdevice(dev);
Wang Chen7dc00c82008-07-14 20:56:34 -0700702 dev_put(dev);
Wang Chend6070322008-07-14 20:55:26 -0700703 return err;
704 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700705 break;
706#endif
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900707 case VIFF_TUNNEL:
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000708 dev = ipmr_new_tunnel(net, vifc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700709 if (!dev)
710 return -ENOBUFS;
Wang Chend6070322008-07-14 20:55:26 -0700711 err = dev_set_allmulti(dev, 1);
712 if (err) {
713 ipmr_del_tunnel(dev, vifc);
Wang Chen7dc00c82008-07-14 20:56:34 -0700714 dev_put(dev);
Wang Chend6070322008-07-14 20:55:26 -0700715 return err;
716 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700717 break;
Ilia Kee5e81f2009-09-16 05:53:07 +0000718
719 case VIFF_USE_IFINDEX:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700720 case 0:
Ilia Kee5e81f2009-09-16 05:53:07 +0000721 if (vifc->vifc_flags == VIFF_USE_IFINDEX) {
722 dev = dev_get_by_index(net, vifc->vifc_lcl_ifindex);
723 if (dev && dev->ip_ptr == NULL) {
724 dev_put(dev);
725 return -EADDRNOTAVAIL;
726 }
727 } else
728 dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr);
729
Linus Torvalds1da177e2005-04-16 15:20:36 -0700730 if (!dev)
731 return -EADDRNOTAVAIL;
Wang Chend6070322008-07-14 20:55:26 -0700732 err = dev_set_allmulti(dev, 1);
Wang Chen7dc00c82008-07-14 20:56:34 -0700733 if (err) {
734 dev_put(dev);
Wang Chend6070322008-07-14 20:55:26 -0700735 return err;
Wang Chen7dc00c82008-07-14 20:56:34 -0700736 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700737 break;
738 default:
739 return -EINVAL;
740 }
741
Dan Carpenterd0490cf2009-11-11 02:03:54 +0000742 if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) {
743 dev_put(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700744 return -EADDRNOTAVAIL;
Dan Carpenterd0490cf2009-11-11 02:03:54 +0000745 }
Herbert Xu42f811b2007-06-04 23:34:44 -0700746 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700747 ip_rt_multicast_event(in_dev);
748
749 /*
750 * Fill in the VIF structures
751 */
Jianjun Kongc354e122008-11-03 00:28:02 -0800752 v->rate_limit = vifc->vifc_rate_limit;
753 v->local = vifc->vifc_lcl_addr.s_addr;
754 v->remote = vifc->vifc_rmt_addr.s_addr;
755 v->flags = vifc->vifc_flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700756 if (!mrtsock)
757 v->flags |= VIFF_STATIC;
Jianjun Kongc354e122008-11-03 00:28:02 -0800758 v->threshold = vifc->vifc_threshold;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700759 v->bytes_in = 0;
760 v->bytes_out = 0;
761 v->pkt_in = 0;
762 v->pkt_out = 0;
763 v->link = dev->ifindex;
764 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
765 v->link = dev->iflink;
766
767 /* And finish update writing critical data */
768 write_lock_bh(&mrt_lock);
Jianjun Kongc354e122008-11-03 00:28:02 -0800769 v->dev = dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700770#ifdef CONFIG_IP_PIMSM
771 if (v->flags&VIFF_REGISTER)
Patrick McHardy0c122952010-04-13 05:03:22 +0000772 mrt->mroute_reg_vif_num = vifi;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700773#endif
Patrick McHardy0c122952010-04-13 05:03:22 +0000774 if (vifi+1 > mrt->maxvif)
775 mrt->maxvif = vifi+1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700776 write_unlock_bh(&mrt_lock);
777 return 0;
778}
779
Patrick McHardy0c122952010-04-13 05:03:22 +0000780static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt,
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000781 __be32 origin,
782 __be32 mcastgrp)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700783{
Jianjun Kongc354e122008-11-03 00:28:02 -0800784 int line = MFC_HASH(mcastgrp, origin);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700785 struct mfc_cache *c;
786
Patrick McHardy0c122952010-04-13 05:03:22 +0000787 list_for_each_entry(c, &mrt->mfc_cache_array[line], list) {
Patrick McHardy862465f2010-04-13 05:03:21 +0000788 if (c->mfc_origin == origin && c->mfc_mcastgrp == mcastgrp)
789 return c;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700790 }
Patrick McHardy862465f2010-04-13 05:03:21 +0000791 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700792}
793
794/*
795 * Allocate a multicast cache entry
796 */
Patrick McHardyd658f8a2010-04-13 05:03:20 +0000797static struct mfc_cache *ipmr_cache_alloc(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700798{
Jianjun Kongc354e122008-11-03 00:28:02 -0800799 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
800 if (c == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700801 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700802 c->mfc_un.res.minvif = MAXVIFS;
803 return c;
804}
805
Patrick McHardyd658f8a2010-04-13 05:03:20 +0000806static struct mfc_cache *ipmr_cache_alloc_unres(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700807{
Jianjun Kongc354e122008-11-03 00:28:02 -0800808 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
809 if (c == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700810 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700811 skb_queue_head_init(&c->mfc_un.unres.unresolved);
812 c->mfc_un.unres.expires = jiffies + 10*HZ;
813 return c;
814}
815
816/*
817 * A cache entry has gone into a resolved state from queued
818 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900819
Patrick McHardy0c122952010-04-13 05:03:22 +0000820static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt,
821 struct mfc_cache *uc, struct mfc_cache *c)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700822{
823 struct sk_buff *skb;
Patrick McHardy9ef1d4c2005-06-28 12:55:30 -0700824 struct nlmsgerr *e;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700825
826 /*
827 * Play the pending entries through our router
828 */
829
Jianjun Kongc354e122008-11-03 00:28:02 -0800830 while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700831 if (ip_hdr(skb)->version == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700832 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
833
Patrick McHardycb6a4e42010-04-26 16:02:08 +0200834 if (__ipmr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) {
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -0700835 nlh->nlmsg_len = (skb_tail_pointer(skb) -
836 (u8 *)nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700837 } else {
838 nlh->nlmsg_type = NLMSG_ERROR;
839 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
840 skb_trim(skb, nlh->nlmsg_len);
Patrick McHardy9ef1d4c2005-06-28 12:55:30 -0700841 e = NLMSG_DATA(nlh);
842 e->error = -EMSGSIZE;
843 memset(&e->msg, 0, sizeof(e->msg));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700844 }
Thomas Graf2942e902006-08-15 00:30:25 -0700845
Patrick McHardyd658f8a2010-04-13 05:03:20 +0000846 rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700847 } else
Patrick McHardy0c122952010-04-13 05:03:22 +0000848 ip_mr_forward(net, mrt, skb, c, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700849 }
850}
851
852/*
853 * Bounce a cache query up to mrouted. We could use netlink for this but mrouted
854 * expects the following bizarre scheme.
855 *
856 * Called under mrt_lock.
857 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900858
Patrick McHardy0c122952010-04-13 05:03:22 +0000859static int ipmr_cache_report(struct mr_table *mrt,
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000860 struct sk_buff *pkt, vifi_t vifi, int assert)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700861{
862 struct sk_buff *skb;
Arnaldo Carvalho de Meloc9bdd4b2007-03-12 20:09:15 -0300863 const int ihl = ip_hdrlen(pkt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700864 struct igmphdr *igmp;
865 struct igmpmsg *msg;
866 int ret;
867
868#ifdef CONFIG_IP_PIMSM
869 if (assert == IGMPMSG_WHOLEPKT)
870 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
871 else
872#endif
873 skb = alloc_skb(128, GFP_ATOMIC);
874
Stephen Hemminger132adf52007-03-08 20:44:43 -0800875 if (!skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700876 return -ENOBUFS;
877
878#ifdef CONFIG_IP_PIMSM
879 if (assert == IGMPMSG_WHOLEPKT) {
880 /* Ugly, but we have no choice with this interface.
881 Duplicate old header, fix ihl, length etc.
882 And all this only to mangle msg->im_msgtype and
883 to set msg->im_mbz to "mbz" :-)
884 */
Arnaldo Carvalho de Melo878c8142007-03-11 22:38:29 -0300885 skb_push(skb, sizeof(struct iphdr));
886 skb_reset_network_header(skb);
Arnaldo Carvalho de Melobadff6d2007-03-13 13:06:52 -0300887 skb_reset_transport_header(skb);
Arnaldo Carvalho de Melo0272ffc2007-03-12 20:05:39 -0300888 msg = (struct igmpmsg *)skb_network_header(skb);
Arnaldo Carvalho de Melod56f90a2007-04-10 20:50:43 -0700889 memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700890 msg->im_msgtype = IGMPMSG_WHOLEPKT;
891 msg->im_mbz = 0;
Patrick McHardy0c122952010-04-13 05:03:22 +0000892 msg->im_vif = mrt->mroute_reg_vif_num;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700893 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
894 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
895 sizeof(struct iphdr));
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900896 } else
Linus Torvalds1da177e2005-04-16 15:20:36 -0700897#endif
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900898 {
899
Linus Torvalds1da177e2005-04-16 15:20:36 -0700900 /*
901 * Copy the IP header
902 */
903
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -0700904 skb->network_header = skb->tail;
Arnaldo Carvalho de Meloddc7b8e2007-03-15 21:42:27 -0300905 skb_put(skb, ihl);
Arnaldo Carvalho de Melo27d7ff42007-03-31 11:55:19 -0300906 skb_copy_to_linear_data(skb, pkt->data, ihl);
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700907 ip_hdr(skb)->protocol = 0; /* Flag to the kernel this is a route add */
908 msg = (struct igmpmsg *)skb_network_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700909 msg->im_vif = vifi;
Eric Dumazetadf30902009-06-02 05:19:30 +0000910 skb_dst_set(skb, dst_clone(skb_dst(pkt)));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700911
912 /*
913 * Add our header
914 */
915
Jianjun Kongc354e122008-11-03 00:28:02 -0800916 igmp=(struct igmphdr *)skb_put(skb, sizeof(struct igmphdr));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700917 igmp->type =
918 msg->im_msgtype = assert;
919 igmp->code = 0;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700920 ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -0700921 skb->transport_header = skb->network_header;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900922 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700923
Patrick McHardy0c122952010-04-13 05:03:22 +0000924 if (mrt->mroute_sk == NULL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700925 kfree_skb(skb);
926 return -EINVAL;
927 }
928
929 /*
930 * Deliver to mrouted
931 */
Patrick McHardy0c122952010-04-13 05:03:22 +0000932 ret = sock_queue_rcv_skb(mrt->mroute_sk, skb);
Benjamin Thery70a269e2009-01-22 04:56:15 +0000933 if (ret < 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700934 if (net_ratelimit())
935 printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
936 kfree_skb(skb);
937 }
938
939 return ret;
940}
941
942/*
943 * Queue a packet for resolution. It gets locked cache entry!
944 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900945
Linus Torvalds1da177e2005-04-16 15:20:36 -0700946static int
Patrick McHardy0c122952010-04-13 05:03:22 +0000947ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700948{
Patrick McHardy862465f2010-04-13 05:03:21 +0000949 bool found = false;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700950 int err;
951 struct mfc_cache *c;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700952 const struct iphdr *iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700953
954 spin_lock_bh(&mfc_unres_lock);
Patrick McHardy0c122952010-04-13 05:03:22 +0000955 list_for_each_entry(c, &mrt->mfc_unres_queue, list) {
Patrick McHardye258beb2010-04-13 05:03:19 +0000956 if (c->mfc_mcastgrp == iph->daddr &&
Patrick McHardy862465f2010-04-13 05:03:21 +0000957 c->mfc_origin == iph->saddr) {
958 found = true;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700959 break;
Patrick McHardy862465f2010-04-13 05:03:21 +0000960 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700961 }
962
Patrick McHardy862465f2010-04-13 05:03:21 +0000963 if (!found) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700964 /*
965 * Create a new entry if allowable
966 */
967
Patrick McHardy0c122952010-04-13 05:03:22 +0000968 if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
Patrick McHardyd658f8a2010-04-13 05:03:20 +0000969 (c = ipmr_cache_alloc_unres()) == NULL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700970 spin_unlock_bh(&mfc_unres_lock);
971
972 kfree_skb(skb);
973 return -ENOBUFS;
974 }
975
976 /*
977 * Fill in the new cache entry
978 */
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700979 c->mfc_parent = -1;
980 c->mfc_origin = iph->saddr;
981 c->mfc_mcastgrp = iph->daddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700982
983 /*
984 * Reflect first query at mrouted.
985 */
Patrick McHardy0c122952010-04-13 05:03:22 +0000986 err = ipmr_cache_report(mrt, skb, vifi, IGMPMSG_NOCACHE);
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000987 if (err < 0) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900988 /* If the report failed throw the cache entry
Linus Torvalds1da177e2005-04-16 15:20:36 -0700989 out - Brad Parker
990 */
991 spin_unlock_bh(&mfc_unres_lock);
992
Benjamin Thery5c0a66f2009-01-22 04:56:17 +0000993 ipmr_cache_free(c);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700994 kfree_skb(skb);
995 return err;
996 }
997
Patrick McHardy0c122952010-04-13 05:03:22 +0000998 atomic_inc(&mrt->cache_resolve_queue_len);
999 list_add(&c->list, &mrt->mfc_unres_queue);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001000
Patrick McHardy0c122952010-04-13 05:03:22 +00001001 mod_timer(&mrt->ipmr_expire_timer, c->mfc_un.unres.expires);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001002 }
1003
1004 /*
1005 * See if we can append the packet
1006 */
1007 if (c->mfc_un.unres.unresolved.qlen>3) {
1008 kfree_skb(skb);
1009 err = -ENOBUFS;
1010 } else {
Jianjun Kongc354e122008-11-03 00:28:02 -08001011 skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001012 err = 0;
1013 }
1014
1015 spin_unlock_bh(&mfc_unres_lock);
1016 return err;
1017}
1018
1019/*
1020 * MFC cache manipulation by user space mroute daemon
1021 */
1022
Patrick McHardy0c122952010-04-13 05:03:22 +00001023static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001024{
1025 int line;
Patrick McHardy862465f2010-04-13 05:03:21 +00001026 struct mfc_cache *c, *next;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001027
Jianjun Kongc354e122008-11-03 00:28:02 -08001028 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001029
Patrick McHardy0c122952010-04-13 05:03:22 +00001030 list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[line], list) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001031 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
1032 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
1033 write_lock_bh(&mrt_lock);
Patrick McHardy862465f2010-04-13 05:03:21 +00001034 list_del(&c->list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001035 write_unlock_bh(&mrt_lock);
1036
Benjamin Thery5c0a66f2009-01-22 04:56:17 +00001037 ipmr_cache_free(c);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001038 return 0;
1039 }
1040 }
1041 return -ENOENT;
1042}
1043
Patrick McHardy0c122952010-04-13 05:03:22 +00001044static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
1045 struct mfcctl *mfc, int mrtsock)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001046{
Patrick McHardy862465f2010-04-13 05:03:21 +00001047 bool found = false;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001048 int line;
Patrick McHardy862465f2010-04-13 05:03:21 +00001049 struct mfc_cache *uc, *c;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001050
Patrick McHardya50436f22010-03-17 06:04:14 +00001051 if (mfc->mfcc_parent >= MAXVIFS)
1052 return -ENFILE;
1053
Jianjun Kongc354e122008-11-03 00:28:02 -08001054 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001055
Patrick McHardy0c122952010-04-13 05:03:22 +00001056 list_for_each_entry(c, &mrt->mfc_cache_array[line], list) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001057 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
Patrick McHardy862465f2010-04-13 05:03:21 +00001058 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
1059 found = true;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001060 break;
Patrick McHardy862465f2010-04-13 05:03:21 +00001061 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001062 }
1063
Patrick McHardy862465f2010-04-13 05:03:21 +00001064 if (found) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001065 write_lock_bh(&mrt_lock);
1066 c->mfc_parent = mfc->mfcc_parent;
Patrick McHardy0c122952010-04-13 05:03:22 +00001067 ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001068 if (!mrtsock)
1069 c->mfc_flags |= MFC_STATIC;
1070 write_unlock_bh(&mrt_lock);
1071 return 0;
1072 }
1073
Joe Perchesf97c1e02007-12-16 13:45:43 -08001074 if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001075 return -EINVAL;
1076
Patrick McHardyd658f8a2010-04-13 05:03:20 +00001077 c = ipmr_cache_alloc();
Jianjun Kongc354e122008-11-03 00:28:02 -08001078 if (c == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001079 return -ENOMEM;
1080
Jianjun Kongc354e122008-11-03 00:28:02 -08001081 c->mfc_origin = mfc->mfcc_origin.s_addr;
1082 c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
1083 c->mfc_parent = mfc->mfcc_parent;
Patrick McHardy0c122952010-04-13 05:03:22 +00001084 ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001085 if (!mrtsock)
1086 c->mfc_flags |= MFC_STATIC;
1087
1088 write_lock_bh(&mrt_lock);
Patrick McHardy0c122952010-04-13 05:03:22 +00001089 list_add(&c->list, &mrt->mfc_cache_array[line]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001090 write_unlock_bh(&mrt_lock);
1091
1092 /*
1093 * Check to see if we resolved a queued list. If so we
1094 * need to send on the frames and tidy up.
1095 */
Patrick McHardyb0ebb732010-04-15 13:29:28 +02001096 found = false;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001097 spin_lock_bh(&mfc_unres_lock);
Patrick McHardy0c122952010-04-13 05:03:22 +00001098 list_for_each_entry(uc, &mrt->mfc_unres_queue, list) {
Patrick McHardye258beb2010-04-13 05:03:19 +00001099 if (uc->mfc_origin == c->mfc_origin &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001100 uc->mfc_mcastgrp == c->mfc_mcastgrp) {
Patrick McHardy862465f2010-04-13 05:03:21 +00001101 list_del(&uc->list);
Patrick McHardy0c122952010-04-13 05:03:22 +00001102 atomic_dec(&mrt->cache_resolve_queue_len);
Patrick McHardyb0ebb732010-04-15 13:29:28 +02001103 found = true;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001104 break;
1105 }
1106 }
Patrick McHardy0c122952010-04-13 05:03:22 +00001107 if (list_empty(&mrt->mfc_unres_queue))
1108 del_timer(&mrt->ipmr_expire_timer);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001109 spin_unlock_bh(&mfc_unres_lock);
1110
Patrick McHardyb0ebb732010-04-15 13:29:28 +02001111 if (found) {
Patrick McHardy0c122952010-04-13 05:03:22 +00001112 ipmr_cache_resolve(net, mrt, uc, c);
Benjamin Thery5c0a66f2009-01-22 04:56:17 +00001113 ipmr_cache_free(uc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001114 }
1115 return 0;
1116}
1117
1118/*
1119 * Close the multicast socket, and clear the vif tables etc
1120 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001121
Patrick McHardy0c122952010-04-13 05:03:22 +00001122static void mroute_clean_tables(struct mr_table *mrt)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001123{
1124 int i;
Eric Dumazetd17fa6f2009-10-28 05:21:38 +00001125 LIST_HEAD(list);
Patrick McHardy862465f2010-04-13 05:03:21 +00001126 struct mfc_cache *c, *next;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001127
Linus Torvalds1da177e2005-04-16 15:20:36 -07001128 /*
1129 * Shut down all active vif entries
1130 */
Patrick McHardy0c122952010-04-13 05:03:22 +00001131 for (i = 0; i < mrt->maxvif; i++) {
1132 if (!(mrt->vif_table[i].flags&VIFF_STATIC))
1133 vif_delete(mrt, i, 0, &list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001134 }
Eric Dumazetd17fa6f2009-10-28 05:21:38 +00001135 unregister_netdevice_many(&list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001136
1137 /*
1138 * Wipe the cache
1139 */
Patrick McHardy862465f2010-04-13 05:03:21 +00001140 for (i = 0; i < MFC_LINES; i++) {
Patrick McHardy0c122952010-04-13 05:03:22 +00001141 list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[i], list) {
Patrick McHardy862465f2010-04-13 05:03:21 +00001142 if (c->mfc_flags&MFC_STATIC)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001143 continue;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001144 write_lock_bh(&mrt_lock);
Patrick McHardy862465f2010-04-13 05:03:21 +00001145 list_del(&c->list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001146 write_unlock_bh(&mrt_lock);
1147
Benjamin Thery5c0a66f2009-01-22 04:56:17 +00001148 ipmr_cache_free(c);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001149 }
1150 }
1151
Patrick McHardy0c122952010-04-13 05:03:22 +00001152 if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001153 spin_lock_bh(&mfc_unres_lock);
Patrick McHardy0c122952010-04-13 05:03:22 +00001154 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
Patrick McHardy862465f2010-04-13 05:03:21 +00001155 list_del(&c->list);
Patrick McHardy0c122952010-04-13 05:03:22 +00001156 ipmr_destroy_unres(mrt, c);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001157 }
1158 spin_unlock_bh(&mfc_unres_lock);
1159 }
1160}
1161
1162static void mrtsock_destruct(struct sock *sk)
1163{
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001164 struct net *net = sock_net(sk);
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001165 struct mr_table *mrt;
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001166
Linus Torvalds1da177e2005-04-16 15:20:36 -07001167 rtnl_lock();
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001168 ipmr_for_each_table(mrt, net) {
1169 if (sk == mrt->mroute_sk) {
1170 IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001171
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001172 write_lock_bh(&mrt_lock);
1173 mrt->mroute_sk = NULL;
1174 write_unlock_bh(&mrt_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001175
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001176 mroute_clean_tables(mrt);
1177 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001178 }
1179 rtnl_unlock();
1180}
1181
1182/*
1183 * Socket options and virtual interface manipulation. The whole
1184 * virtual interface system is a complete heap, but unfortunately
1185 * that's how BSD mrouted happens to think. Maybe one day with a proper
1186 * MOSPF/PIM router set up we can clean this up.
1187 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001188
David S. Millerb7058842009-09-30 16:12:20 -07001189int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001190{
1191 int ret;
1192 struct vifctl vif;
1193 struct mfcctl mfc;
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001194 struct net *net = sock_net(sk);
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001195 struct mr_table *mrt;
1196
1197 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1198 if (mrt == NULL)
1199 return -ENOENT;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001200
Stephen Hemminger132adf52007-03-08 20:44:43 -08001201 if (optname != MRT_INIT) {
Patrick McHardy0c122952010-04-13 05:03:22 +00001202 if (sk != mrt->mroute_sk && !capable(CAP_NET_ADMIN))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001203 return -EACCES;
1204 }
1205
Stephen Hemminger132adf52007-03-08 20:44:43 -08001206 switch (optname) {
1207 case MRT_INIT:
1208 if (sk->sk_type != SOCK_RAW ||
Eric Dumazetc720c7e2009-10-15 06:30:45 +00001209 inet_sk(sk)->inet_num != IPPROTO_IGMP)
Stephen Hemminger132adf52007-03-08 20:44:43 -08001210 return -EOPNOTSUPP;
Jianjun Kongc354e122008-11-03 00:28:02 -08001211 if (optlen != sizeof(int))
Stephen Hemminger132adf52007-03-08 20:44:43 -08001212 return -ENOPROTOOPT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001213
Stephen Hemminger132adf52007-03-08 20:44:43 -08001214 rtnl_lock();
Patrick McHardy0c122952010-04-13 05:03:22 +00001215 if (mrt->mroute_sk) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001216 rtnl_unlock();
Stephen Hemminger132adf52007-03-08 20:44:43 -08001217 return -EADDRINUSE;
1218 }
1219
1220 ret = ip_ra_control(sk, 1, mrtsock_destruct);
1221 if (ret == 0) {
1222 write_lock_bh(&mrt_lock);
Patrick McHardy0c122952010-04-13 05:03:22 +00001223 mrt->mroute_sk = sk;
Stephen Hemminger132adf52007-03-08 20:44:43 -08001224 write_unlock_bh(&mrt_lock);
1225
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001226 IPV4_DEVCONF_ALL(net, MC_FORWARDING)++;
Stephen Hemminger132adf52007-03-08 20:44:43 -08001227 }
1228 rtnl_unlock();
1229 return ret;
1230 case MRT_DONE:
Patrick McHardy0c122952010-04-13 05:03:22 +00001231 if (sk != mrt->mroute_sk)
Stephen Hemminger132adf52007-03-08 20:44:43 -08001232 return -EACCES;
1233 return ip_ra_control(sk, 0, NULL);
1234 case MRT_ADD_VIF:
1235 case MRT_DEL_VIF:
Jianjun Kongc354e122008-11-03 00:28:02 -08001236 if (optlen != sizeof(vif))
Stephen Hemminger132adf52007-03-08 20:44:43 -08001237 return -EINVAL;
Jianjun Kongc354e122008-11-03 00:28:02 -08001238 if (copy_from_user(&vif, optval, sizeof(vif)))
Stephen Hemminger132adf52007-03-08 20:44:43 -08001239 return -EFAULT;
1240 if (vif.vifc_vifi >= MAXVIFS)
1241 return -ENFILE;
1242 rtnl_lock();
Jianjun Kongc354e122008-11-03 00:28:02 -08001243 if (optname == MRT_ADD_VIF) {
Patrick McHardy0c122952010-04-13 05:03:22 +00001244 ret = vif_add(net, mrt, &vif, sk == mrt->mroute_sk);
Stephen Hemminger132adf52007-03-08 20:44:43 -08001245 } else {
Patrick McHardy0c122952010-04-13 05:03:22 +00001246 ret = vif_delete(mrt, vif.vifc_vifi, 0, NULL);
Stephen Hemminger132adf52007-03-08 20:44:43 -08001247 }
1248 rtnl_unlock();
1249 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001250
1251 /*
1252 * Manipulate the forwarding caches. These live
1253 * in a sort of kernel/user symbiosis.
1254 */
Stephen Hemminger132adf52007-03-08 20:44:43 -08001255 case MRT_ADD_MFC:
1256 case MRT_DEL_MFC:
Jianjun Kongc354e122008-11-03 00:28:02 -08001257 if (optlen != sizeof(mfc))
Stephen Hemminger132adf52007-03-08 20:44:43 -08001258 return -EINVAL;
Jianjun Kongc354e122008-11-03 00:28:02 -08001259 if (copy_from_user(&mfc, optval, sizeof(mfc)))
Stephen Hemminger132adf52007-03-08 20:44:43 -08001260 return -EFAULT;
1261 rtnl_lock();
Jianjun Kongc354e122008-11-03 00:28:02 -08001262 if (optname == MRT_DEL_MFC)
Patrick McHardy0c122952010-04-13 05:03:22 +00001263 ret = ipmr_mfc_delete(mrt, &mfc);
Stephen Hemminger132adf52007-03-08 20:44:43 -08001264 else
Patrick McHardy0c122952010-04-13 05:03:22 +00001265 ret = ipmr_mfc_add(net, mrt, &mfc, sk == mrt->mroute_sk);
Stephen Hemminger132adf52007-03-08 20:44:43 -08001266 rtnl_unlock();
1267 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001268 /*
1269 * Control PIM assert.
1270 */
Stephen Hemminger132adf52007-03-08 20:44:43 -08001271 case MRT_ASSERT:
1272 {
1273 int v;
1274 if (get_user(v,(int __user *)optval))
1275 return -EFAULT;
Patrick McHardy0c122952010-04-13 05:03:22 +00001276 mrt->mroute_do_assert = (v) ? 1 : 0;
Stephen Hemminger132adf52007-03-08 20:44:43 -08001277 return 0;
1278 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001279#ifdef CONFIG_IP_PIMSM
Stephen Hemminger132adf52007-03-08 20:44:43 -08001280 case MRT_PIM:
1281 {
Stephen Hemmingerba93ef72008-01-21 17:28:59 -08001282 int v;
1283
Stephen Hemminger132adf52007-03-08 20:44:43 -08001284 if (get_user(v,(int __user *)optval))
1285 return -EFAULT;
Stephen Hemmingerba93ef72008-01-21 17:28:59 -08001286 v = (v) ? 1 : 0;
1287
Stephen Hemminger132adf52007-03-08 20:44:43 -08001288 rtnl_lock();
1289 ret = 0;
Patrick McHardy0c122952010-04-13 05:03:22 +00001290 if (v != mrt->mroute_do_pim) {
1291 mrt->mroute_do_pim = v;
1292 mrt->mroute_do_assert = v;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001293 }
Stephen Hemminger132adf52007-03-08 20:44:43 -08001294 rtnl_unlock();
1295 return ret;
1296 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001297#endif
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001298#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
1299 case MRT_TABLE:
1300 {
1301 u32 v;
1302
1303 if (optlen != sizeof(u32))
1304 return -EINVAL;
1305 if (get_user(v, (u32 __user *)optval))
1306 return -EFAULT;
1307 if (sk == mrt->mroute_sk)
1308 return -EBUSY;
1309
1310 rtnl_lock();
1311 ret = 0;
1312 if (!ipmr_new_table(net, v))
1313 ret = -ENOMEM;
1314 raw_sk(sk)->ipmr_table = v;
1315 rtnl_unlock();
1316 return ret;
1317 }
1318#endif
Stephen Hemminger132adf52007-03-08 20:44:43 -08001319 /*
1320 * Spurious command, or MRT_VERSION which you cannot
1321 * set.
1322 */
1323 default:
1324 return -ENOPROTOOPT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001325 }
1326}
1327
1328/*
1329 * Getsock opt support for the multicast routing system.
1330 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001331
Jianjun Kongc354e122008-11-03 00:28:02 -08001332int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001333{
1334 int olr;
1335 int val;
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001336 struct net *net = sock_net(sk);
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001337 struct mr_table *mrt;
1338
1339 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1340 if (mrt == NULL)
1341 return -ENOENT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001342
Jianjun Kongc354e122008-11-03 00:28:02 -08001343 if (optname != MRT_VERSION &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001344#ifdef CONFIG_IP_PIMSM
1345 optname!=MRT_PIM &&
1346#endif
1347 optname!=MRT_ASSERT)
1348 return -ENOPROTOOPT;
1349
1350 if (get_user(olr, optlen))
1351 return -EFAULT;
1352
1353 olr = min_t(unsigned int, olr, sizeof(int));
1354 if (olr < 0)
1355 return -EINVAL;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001356
Jianjun Kongc354e122008-11-03 00:28:02 -08001357 if (put_user(olr, optlen))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001358 return -EFAULT;
Jianjun Kongc354e122008-11-03 00:28:02 -08001359 if (optname == MRT_VERSION)
1360 val = 0x0305;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001361#ifdef CONFIG_IP_PIMSM
Jianjun Kongc354e122008-11-03 00:28:02 -08001362 else if (optname == MRT_PIM)
Patrick McHardy0c122952010-04-13 05:03:22 +00001363 val = mrt->mroute_do_pim;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001364#endif
1365 else
Patrick McHardy0c122952010-04-13 05:03:22 +00001366 val = mrt->mroute_do_assert;
Jianjun Kongc354e122008-11-03 00:28:02 -08001367 if (copy_to_user(optval, &val, olr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001368 return -EFAULT;
1369 return 0;
1370}
1371
1372/*
1373 * The IP multicast ioctl support routines.
1374 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001375
Linus Torvalds1da177e2005-04-16 15:20:36 -07001376int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1377{
1378 struct sioc_sg_req sr;
1379 struct sioc_vif_req vr;
1380 struct vif_device *vif;
1381 struct mfc_cache *c;
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001382 struct net *net = sock_net(sk);
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001383 struct mr_table *mrt;
1384
1385 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1386 if (mrt == NULL)
1387 return -ENOENT;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001388
Stephen Hemminger132adf52007-03-08 20:44:43 -08001389 switch (cmd) {
1390 case SIOCGETVIFCNT:
Jianjun Kongc354e122008-11-03 00:28:02 -08001391 if (copy_from_user(&vr, arg, sizeof(vr)))
Stephen Hemminger132adf52007-03-08 20:44:43 -08001392 return -EFAULT;
Patrick McHardy0c122952010-04-13 05:03:22 +00001393 if (vr.vifi >= mrt->maxvif)
Stephen Hemminger132adf52007-03-08 20:44:43 -08001394 return -EINVAL;
1395 read_lock(&mrt_lock);
Patrick McHardy0c122952010-04-13 05:03:22 +00001396 vif = &mrt->vif_table[vr.vifi];
1397 if (VIF_EXISTS(mrt, vr.vifi)) {
Jianjun Kongc354e122008-11-03 00:28:02 -08001398 vr.icount = vif->pkt_in;
1399 vr.ocount = vif->pkt_out;
1400 vr.ibytes = vif->bytes_in;
1401 vr.obytes = vif->bytes_out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001402 read_unlock(&mrt_lock);
Stephen Hemminger132adf52007-03-08 20:44:43 -08001403
Jianjun Kongc354e122008-11-03 00:28:02 -08001404 if (copy_to_user(arg, &vr, sizeof(vr)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001405 return -EFAULT;
Stephen Hemminger132adf52007-03-08 20:44:43 -08001406 return 0;
1407 }
1408 read_unlock(&mrt_lock);
1409 return -EADDRNOTAVAIL;
1410 case SIOCGETSGCNT:
Jianjun Kongc354e122008-11-03 00:28:02 -08001411 if (copy_from_user(&sr, arg, sizeof(sr)))
Stephen Hemminger132adf52007-03-08 20:44:43 -08001412 return -EFAULT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001413
Stephen Hemminger132adf52007-03-08 20:44:43 -08001414 read_lock(&mrt_lock);
Patrick McHardy0c122952010-04-13 05:03:22 +00001415 c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr);
Stephen Hemminger132adf52007-03-08 20:44:43 -08001416 if (c) {
1417 sr.pktcnt = c->mfc_un.res.pkt;
1418 sr.bytecnt = c->mfc_un.res.bytes;
1419 sr.wrong_if = c->mfc_un.res.wrong_if;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001420 read_unlock(&mrt_lock);
Stephen Hemminger132adf52007-03-08 20:44:43 -08001421
Jianjun Kongc354e122008-11-03 00:28:02 -08001422 if (copy_to_user(arg, &sr, sizeof(sr)))
Stephen Hemminger132adf52007-03-08 20:44:43 -08001423 return -EFAULT;
1424 return 0;
1425 }
1426 read_unlock(&mrt_lock);
1427 return -EADDRNOTAVAIL;
1428 default:
1429 return -ENOIOCTLCMD;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001430 }
1431}
1432
1433
1434static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1435{
Eric W. Biedermane9dc8652007-09-12 13:02:17 +02001436 struct net_device *dev = ptr;
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001437 struct net *net = dev_net(dev);
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001438 struct mr_table *mrt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001439 struct vif_device *v;
1440 int ct;
Eric Dumazetd17fa6f2009-10-28 05:21:38 +00001441 LIST_HEAD(list);
Eric W. Biedermane9dc8652007-09-12 13:02:17 +02001442
Linus Torvalds1da177e2005-04-16 15:20:36 -07001443 if (event != NETDEV_UNREGISTER)
1444 return NOTIFY_DONE;
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001445
1446 ipmr_for_each_table(mrt, net) {
1447 v = &mrt->vif_table[0];
1448 for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1449 if (v->dev == dev)
1450 vif_delete(mrt, ct, 1, &list);
1451 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001452 }
Eric Dumazetd17fa6f2009-10-28 05:21:38 +00001453 unregister_netdevice_many(&list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001454 return NOTIFY_DONE;
1455}
1456
1457
Jianjun Kongc354e122008-11-03 00:28:02 -08001458static struct notifier_block ip_mr_notifier = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001459 .notifier_call = ipmr_device_event,
1460};
1461
1462/*
1463 * Encapsulate a packet by attaching a valid IPIP header to it.
1464 * This avoids tunnel drivers and other mess and gives us the speed so
1465 * important for multicast video.
1466 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001467
Al Viro114c7842006-09-27 18:39:29 -07001468static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001469{
Arnaldo Carvalho de Melo8856dfa2007-03-10 19:40:39 -03001470 struct iphdr *iph;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001471 struct iphdr *old_iph = ip_hdr(skb);
Arnaldo Carvalho de Melo8856dfa2007-03-10 19:40:39 -03001472
1473 skb_push(skb, sizeof(struct iphdr));
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -07001474 skb->transport_header = skb->network_header;
Arnaldo Carvalho de Melo8856dfa2007-03-10 19:40:39 -03001475 skb_reset_network_header(skb);
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001476 iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001477
1478 iph->version = 4;
Arnaldo Carvalho de Meloe023dd62007-03-12 20:09:36 -03001479 iph->tos = old_iph->tos;
1480 iph->ttl = old_iph->ttl;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001481 iph->frag_off = 0;
1482 iph->daddr = daddr;
1483 iph->saddr = saddr;
1484 iph->protocol = IPPROTO_IPIP;
1485 iph->ihl = 5;
1486 iph->tot_len = htons(skb->len);
Eric Dumazetadf30902009-06-02 05:19:30 +00001487 ip_select_ident(iph, skb_dst(skb), NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001488 ip_send_check(iph);
1489
Linus Torvalds1da177e2005-04-16 15:20:36 -07001490 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1491 nf_reset(skb);
1492}
1493
1494static inline int ipmr_forward_finish(struct sk_buff *skb)
1495{
1496 struct ip_options * opt = &(IPCB(skb)->opt);
1497
Eric Dumazetadf30902009-06-02 05:19:30 +00001498 IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001499
1500 if (unlikely(opt->optlen))
1501 ip_forward_options(skb);
1502
1503 return dst_output(skb);
1504}
1505
1506/*
1507 * Processing handlers for ipmr_forward
1508 */
1509
Patrick McHardy0c122952010-04-13 05:03:22 +00001510static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
1511 struct sk_buff *skb, struct mfc_cache *c, int vifi)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001512{
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001513 const struct iphdr *iph = ip_hdr(skb);
Patrick McHardy0c122952010-04-13 05:03:22 +00001514 struct vif_device *vif = &mrt->vif_table[vifi];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001515 struct net_device *dev;
1516 struct rtable *rt;
1517 int encap = 0;
1518
1519 if (vif->dev == NULL)
1520 goto out_free;
1521
1522#ifdef CONFIG_IP_PIMSM
1523 if (vif->flags & VIFF_REGISTER) {
1524 vif->pkt_out++;
Jianjun Kongc354e122008-11-03 00:28:02 -08001525 vif->bytes_out += skb->len;
Pavel Emelyanovcf3677a2008-05-21 14:17:33 -07001526 vif->dev->stats.tx_bytes += skb->len;
1527 vif->dev->stats.tx_packets++;
Patrick McHardy0c122952010-04-13 05:03:22 +00001528 ipmr_cache_report(mrt, skb, vifi, IGMPMSG_WHOLEPKT);
Ilpo Järvinen69ebbf52009-02-06 23:46:51 -08001529 goto out_free;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001530 }
1531#endif
1532
1533 if (vif->flags&VIFF_TUNNEL) {
1534 struct flowi fl = { .oif = vif->link,
1535 .nl_u = { .ip4_u =
1536 { .daddr = vif->remote,
1537 .saddr = vif->local,
1538 .tos = RT_TOS(iph->tos) } },
1539 .proto = IPPROTO_IPIP };
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001540 if (ip_route_output_key(net, &rt, &fl))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001541 goto out_free;
1542 encap = sizeof(struct iphdr);
1543 } else {
1544 struct flowi fl = { .oif = vif->link,
1545 .nl_u = { .ip4_u =
1546 { .daddr = iph->daddr,
1547 .tos = RT_TOS(iph->tos) } },
1548 .proto = IPPROTO_IPIP };
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001549 if (ip_route_output_key(net, &rt, &fl))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001550 goto out_free;
1551 }
1552
1553 dev = rt->u.dst.dev;
1554
1555 if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) {
1556 /* Do not fragment multicasts. Alas, IPv4 does not
1557 allow to send ICMP, so that packets will disappear
1558 to blackhole.
1559 */
1560
Pavel Emelyanov7c73a6f2008-07-16 20:20:11 -07001561 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001562 ip_rt_put(rt);
1563 goto out_free;
1564 }
1565
1566 encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len;
1567
1568 if (skb_cow(skb, encap)) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001569 ip_rt_put(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001570 goto out_free;
1571 }
1572
1573 vif->pkt_out++;
Jianjun Kongc354e122008-11-03 00:28:02 -08001574 vif->bytes_out += skb->len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001575
Eric Dumazetadf30902009-06-02 05:19:30 +00001576 skb_dst_drop(skb);
1577 skb_dst_set(skb, &rt->u.dst);
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001578 ip_decrease_ttl(ip_hdr(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001579
1580 /* FIXME: forward and output firewalls used to be called here.
1581 * What do we do with netfilter? -- RR */
1582 if (vif->flags & VIFF_TUNNEL) {
1583 ip_encap(skb, vif->local, vif->remote);
1584 /* FIXME: extra output firewall step used to be here. --RR */
Pavel Emelyanov2f4c02d2008-05-21 14:16:14 -07001585 vif->dev->stats.tx_packets++;
1586 vif->dev->stats.tx_bytes += skb->len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001587 }
1588
1589 IPCB(skb)->flags |= IPSKB_FORWARDED;
1590
1591 /*
1592 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1593 * not only before forwarding, but after forwarding on all output
1594 * interfaces. It is clear, if mrouter runs a multicasting
1595 * program, it should receive packets not depending to what interface
1596 * program is joined.
1597 * If we will not make it, the program will have to join on all
1598 * interfaces. On the other hand, multihoming host (or router, but
1599 * not mrouter) cannot join to more than one interface - it will
1600 * result in receiving multiple packets.
1601 */
Patrick McHardy6e23ae22007-11-19 18:53:30 -08001602 NF_HOOK(PF_INET, NF_INET_FORWARD, skb, skb->dev, dev,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001603 ipmr_forward_finish);
1604 return;
1605
1606out_free:
1607 kfree_skb(skb);
1608 return;
1609}
1610
Patrick McHardy0c122952010-04-13 05:03:22 +00001611static int ipmr_find_vif(struct mr_table *mrt, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001612{
1613 int ct;
Patrick McHardy0c122952010-04-13 05:03:22 +00001614
1615 for (ct = mrt->maxvif-1; ct >= 0; ct--) {
1616 if (mrt->vif_table[ct].dev == dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001617 break;
1618 }
1619 return ct;
1620}
1621
1622/* "local" means that we should preserve one skb (for local delivery) */
1623
Patrick McHardy0c122952010-04-13 05:03:22 +00001624static int ip_mr_forward(struct net *net, struct mr_table *mrt,
1625 struct sk_buff *skb, struct mfc_cache *cache,
1626 int local)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001627{
1628 int psend = -1;
1629 int vif, ct;
1630
1631 vif = cache->mfc_parent;
1632 cache->mfc_un.res.pkt++;
1633 cache->mfc_un.res.bytes += skb->len;
1634
1635 /*
1636 * Wrong interface: drop packet and (maybe) send PIM assert.
1637 */
Patrick McHardy0c122952010-04-13 05:03:22 +00001638 if (mrt->vif_table[vif].dev != skb->dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001639 int true_vifi;
1640
Eric Dumazet511c3f92009-06-02 05:14:27 +00001641 if (skb_rtable(skb)->fl.iif == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001642 /* It is our own packet, looped back.
1643 Very complicated situation...
1644
1645 The best workaround until routing daemons will be
1646 fixed is not to redistribute packet, if it was
1647 send through wrong interface. It means, that
1648 multicast applications WILL NOT work for
1649 (S,G), which have default multicast route pointing
1650 to wrong oif. In any case, it is not a good
1651 idea to use multicasting applications on router.
1652 */
1653 goto dont_forward;
1654 }
1655
1656 cache->mfc_un.res.wrong_if++;
Patrick McHardy0c122952010-04-13 05:03:22 +00001657 true_vifi = ipmr_find_vif(mrt, skb->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001658
Patrick McHardy0c122952010-04-13 05:03:22 +00001659 if (true_vifi >= 0 && mrt->mroute_do_assert &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001660 /* pimsm uses asserts, when switching from RPT to SPT,
1661 so that we cannot check that packet arrived on an oif.
1662 It is bad, but otherwise we would need to move pretty
1663 large chunk of pimd to kernel. Ough... --ANK
1664 */
Patrick McHardy0c122952010-04-13 05:03:22 +00001665 (mrt->mroute_do_pim ||
Benjamin Thery6f9374a2009-01-22 04:56:20 +00001666 cache->mfc_un.res.ttls[true_vifi] < 255) &&
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001667 time_after(jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001668 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1669 cache->mfc_un.res.last_assert = jiffies;
Patrick McHardy0c122952010-04-13 05:03:22 +00001670 ipmr_cache_report(mrt, skb, true_vifi, IGMPMSG_WRONGVIF);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001671 }
1672 goto dont_forward;
1673 }
1674
Patrick McHardy0c122952010-04-13 05:03:22 +00001675 mrt->vif_table[vif].pkt_in++;
1676 mrt->vif_table[vif].bytes_in += skb->len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001677
1678 /*
1679 * Forward the frame
1680 */
1681 for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001682 if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001683 if (psend != -1) {
1684 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1685 if (skb2)
Patrick McHardy0c122952010-04-13 05:03:22 +00001686 ipmr_queue_xmit(net, mrt, skb2, cache,
1687 psend);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001688 }
Jianjun Kongc354e122008-11-03 00:28:02 -08001689 psend = ct;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001690 }
1691 }
1692 if (psend != -1) {
1693 if (local) {
1694 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1695 if (skb2)
Patrick McHardy0c122952010-04-13 05:03:22 +00001696 ipmr_queue_xmit(net, mrt, skb2, cache, psend);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001697 } else {
Patrick McHardy0c122952010-04-13 05:03:22 +00001698 ipmr_queue_xmit(net, mrt, skb, cache, psend);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001699 return 0;
1700 }
1701 }
1702
1703dont_forward:
1704 if (!local)
1705 kfree_skb(skb);
1706 return 0;
1707}
1708
1709
1710/*
1711 * Multicast packets for forwarding arrive here
1712 */
1713
1714int ip_mr_input(struct sk_buff *skb)
1715{
1716 struct mfc_cache *cache;
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001717 struct net *net = dev_net(skb->dev);
Eric Dumazet511c3f92009-06-02 05:14:27 +00001718 int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL;
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001719 struct mr_table *mrt;
1720 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001721
1722 /* Packet is looped back after forward, it should not be
1723 forwarded second time, but still can be delivered locally.
1724 */
1725 if (IPCB(skb)->flags&IPSKB_FORWARDED)
1726 goto dont_forward;
1727
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001728 err = ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt);
1729 if (err < 0)
1730 return err;
1731
Linus Torvalds1da177e2005-04-16 15:20:36 -07001732 if (!local) {
1733 if (IPCB(skb)->opt.router_alert) {
1734 if (ip_call_ra_chain(skb))
1735 return 0;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001736 } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP){
Linus Torvalds1da177e2005-04-16 15:20:36 -07001737 /* IGMPv1 (and broken IGMPv2 implementations sort of
1738 Cisco IOS <= 11.2(8)) do not put router alert
1739 option to IGMP packets destined to routable
1740 groups. It is very bad, because it means
1741 that we can forward NO IGMP messages.
1742 */
1743 read_lock(&mrt_lock);
Patrick McHardy0c122952010-04-13 05:03:22 +00001744 if (mrt->mroute_sk) {
Patrick McHardy2715bcf2005-06-21 14:06:24 -07001745 nf_reset(skb);
Patrick McHardy0c122952010-04-13 05:03:22 +00001746 raw_rcv(mrt->mroute_sk, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001747 read_unlock(&mrt_lock);
1748 return 0;
1749 }
1750 read_unlock(&mrt_lock);
1751 }
1752 }
1753
1754 read_lock(&mrt_lock);
Patrick McHardy0c122952010-04-13 05:03:22 +00001755 cache = ipmr_cache_find(mrt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001756
1757 /*
1758 * No usable cache entry
1759 */
Jianjun Kongc354e122008-11-03 00:28:02 -08001760 if (cache == NULL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001761 int vif;
1762
1763 if (local) {
1764 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1765 ip_local_deliver(skb);
1766 if (skb2 == NULL) {
1767 read_unlock(&mrt_lock);
1768 return -ENOBUFS;
1769 }
1770 skb = skb2;
1771 }
1772
Patrick McHardy0c122952010-04-13 05:03:22 +00001773 vif = ipmr_find_vif(mrt, skb->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001774 if (vif >= 0) {
Eric Dumazet0eae88f2010-04-20 19:06:52 -07001775 int err2 = ipmr_cache_unresolved(mrt, vif, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001776 read_unlock(&mrt_lock);
1777
Eric Dumazet0eae88f2010-04-20 19:06:52 -07001778 return err2;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001779 }
1780 read_unlock(&mrt_lock);
1781 kfree_skb(skb);
1782 return -ENODEV;
1783 }
1784
Patrick McHardy0c122952010-04-13 05:03:22 +00001785 ip_mr_forward(net, mrt, skb, cache, local);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001786
1787 read_unlock(&mrt_lock);
1788
1789 if (local)
1790 return ip_local_deliver(skb);
1791
1792 return 0;
1793
1794dont_forward:
1795 if (local)
1796 return ip_local_deliver(skb);
1797 kfree_skb(skb);
1798 return 0;
1799}
1800
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001801#ifdef CONFIG_IP_PIMSM
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001802static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb,
1803 unsigned int pimlen)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001804{
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001805 struct net_device *reg_dev = NULL;
1806 struct iphdr *encap;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001807
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001808 encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001809 /*
1810 Check that:
1811 a. packet is really destinted to a multicast group
1812 b. packet is not a NULL-REGISTER
1813 c. packet is not truncated
1814 */
Joe Perchesf97c1e02007-12-16 13:45:43 -08001815 if (!ipv4_is_multicast(encap->daddr) ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07001816 encap->tot_len == 0 ||
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001817 ntohs(encap->tot_len) + pimlen > skb->len)
1818 return 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001819
1820 read_lock(&mrt_lock);
Patrick McHardy0c122952010-04-13 05:03:22 +00001821 if (mrt->mroute_reg_vif_num >= 0)
1822 reg_dev = mrt->vif_table[mrt->mroute_reg_vif_num].dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001823 if (reg_dev)
1824 dev_hold(reg_dev);
1825 read_unlock(&mrt_lock);
1826
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001827 if (reg_dev == NULL)
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001828 return 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001829
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -07001830 skb->mac_header = skb->network_header;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001831 skb_pull(skb, (u8*)encap - skb->data);
Arnaldo Carvalho de Melo31c77112007-03-10 19:04:55 -03001832 skb_reset_network_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001833 skb->dev = reg_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001834 skb->protocol = htons(ETH_P_IP);
1835 skb->ip_summed = 0;
1836 skb->pkt_type = PACKET_HOST;
Eric Dumazetadf30902009-06-02 05:19:30 +00001837 skb_dst_drop(skb);
Pavel Emelyanovcf3677a2008-05-21 14:17:33 -07001838 reg_dev->stats.rx_bytes += skb->len;
1839 reg_dev->stats.rx_packets++;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001840 nf_reset(skb);
1841 netif_rx(skb);
1842 dev_put(reg_dev);
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001843
Linus Torvalds1da177e2005-04-16 15:20:36 -07001844 return 0;
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001845}
1846#endif
1847
1848#ifdef CONFIG_IP_PIMSM_V1
1849/*
1850 * Handle IGMP messages of PIMv1
1851 */
1852
1853int pim_rcv_v1(struct sk_buff * skb)
1854{
1855 struct igmphdr *pim;
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001856 struct net *net = dev_net(skb->dev);
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001857 struct mr_table *mrt;
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001858
1859 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1860 goto drop;
1861
1862 pim = igmp_hdr(skb);
1863
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001864 if (ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt) < 0)
1865 goto drop;
1866
Patrick McHardy0c122952010-04-13 05:03:22 +00001867 if (!mrt->mroute_do_pim ||
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001868 pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
1869 goto drop;
1870
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001871 if (__pim_rcv(mrt, skb, sizeof(*pim))) {
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001872drop:
1873 kfree_skb(skb);
1874 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001875 return 0;
1876}
1877#endif
1878
1879#ifdef CONFIG_IP_PIMSM_V2
1880static int pim_rcv(struct sk_buff * skb)
1881{
1882 struct pimreghdr *pim;
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001883 struct net *net = dev_net(skb->dev);
1884 struct mr_table *mrt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001885
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001886 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001887 goto drop;
1888
Arnaldo Carvalho de Melo9c702202007-04-25 18:04:18 -07001889 pim = (struct pimreghdr *)skb_transport_header(skb);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001890 if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07001891 (pim->flags&PIM_NULL_REGISTER) ||
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001892 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
Al Virod3bc23e2006-11-14 21:24:49 -08001893 csum_fold(skb_checksum(skb, 0, skb->len, 0))))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001894 goto drop;
1895
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001896 if (ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt) < 0)
1897 goto drop;
1898
1899 if (__pim_rcv(mrt, skb, sizeof(*pim))) {
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001900drop:
1901 kfree_skb(skb);
1902 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001903 return 0;
1904}
1905#endif
1906
Patrick McHardycb6a4e42010-04-26 16:02:08 +02001907static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
1908 struct mfc_cache *c, struct rtmsg *rtm)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001909{
1910 int ct;
1911 struct rtnexthop *nhp;
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001912 u8 *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001913 struct rtattr *mp_head;
1914
Nicolas Dichtel74381892010-03-25 23:45:35 +00001915 /* If cache is unresolved, don't try to parse IIF and OIF */
1916 if (c->mfc_parent > MAXVIFS)
1917 return -ENOENT;
1918
Patrick McHardy0c122952010-04-13 05:03:22 +00001919 if (VIF_EXISTS(mrt, c->mfc_parent))
1920 RTA_PUT(skb, RTA_IIF, 4, &mrt->vif_table[c->mfc_parent].dev->ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001921
Jianjun Kongc354e122008-11-03 00:28:02 -08001922 mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001923
1924 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
Patrick McHardy0c122952010-04-13 05:03:22 +00001925 if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001926 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1927 goto rtattr_failure;
Jianjun Kongc354e122008-11-03 00:28:02 -08001928 nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001929 nhp->rtnh_flags = 0;
1930 nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
Patrick McHardy0c122952010-04-13 05:03:22 +00001931 nhp->rtnh_ifindex = mrt->vif_table[ct].dev->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001932 nhp->rtnh_len = sizeof(*nhp);
1933 }
1934 }
1935 mp_head->rta_type = RTA_MULTIPATH;
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001936 mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001937 rtm->rtm_type = RTN_MULTICAST;
1938 return 1;
1939
1940rtattr_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001941 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001942 return -EMSGSIZE;
1943}
1944
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001945int ipmr_get_route(struct net *net,
1946 struct sk_buff *skb, struct rtmsg *rtm, int nowait)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001947{
1948 int err;
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001949 struct mr_table *mrt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001950 struct mfc_cache *cache;
Eric Dumazet511c3f92009-06-02 05:14:27 +00001951 struct rtable *rt = skb_rtable(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001952
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001953 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
1954 if (mrt == NULL)
1955 return -ENOENT;
1956
Linus Torvalds1da177e2005-04-16 15:20:36 -07001957 read_lock(&mrt_lock);
Patrick McHardy0c122952010-04-13 05:03:22 +00001958 cache = ipmr_cache_find(mrt, rt->rt_src, rt->rt_dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001959
Jianjun Kongc354e122008-11-03 00:28:02 -08001960 if (cache == NULL) {
Alexey Kuznetsov72287492006-07-25 16:45:12 -07001961 struct sk_buff *skb2;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001962 struct iphdr *iph;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001963 struct net_device *dev;
1964 int vif;
1965
1966 if (nowait) {
1967 read_unlock(&mrt_lock);
1968 return -EAGAIN;
1969 }
1970
1971 dev = skb->dev;
Patrick McHardy0c122952010-04-13 05:03:22 +00001972 if (dev == NULL || (vif = ipmr_find_vif(mrt, dev)) < 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001973 read_unlock(&mrt_lock);
1974 return -ENODEV;
1975 }
Alexey Kuznetsov72287492006-07-25 16:45:12 -07001976 skb2 = skb_clone(skb, GFP_ATOMIC);
1977 if (!skb2) {
1978 read_unlock(&mrt_lock);
1979 return -ENOMEM;
1980 }
1981
Arnaldo Carvalho de Meloe2d1bca2007-04-10 20:46:21 -07001982 skb_push(skb2, sizeof(struct iphdr));
1983 skb_reset_network_header(skb2);
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001984 iph = ip_hdr(skb2);
1985 iph->ihl = sizeof(struct iphdr) >> 2;
1986 iph->saddr = rt->rt_src;
1987 iph->daddr = rt->rt_dst;
1988 iph->version = 0;
Patrick McHardy0c122952010-04-13 05:03:22 +00001989 err = ipmr_cache_unresolved(mrt, vif, skb2);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001990 read_unlock(&mrt_lock);
1991 return err;
1992 }
1993
1994 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1995 cache->mfc_flags |= MFC_NOTIFY;
Patrick McHardycb6a4e42010-04-26 16:02:08 +02001996 err = __ipmr_fill_mroute(mrt, skb, cache, rtm);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001997 read_unlock(&mrt_lock);
1998 return err;
1999}
2000
Patrick McHardycb6a4e42010-04-26 16:02:08 +02002001static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2002 u32 pid, u32 seq, struct mfc_cache *c)
2003{
2004 struct nlmsghdr *nlh;
2005 struct rtmsg *rtm;
2006
2007 nlh = nlmsg_put(skb, pid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI);
2008 if (nlh == NULL)
2009 return -EMSGSIZE;
2010
2011 rtm = nlmsg_data(nlh);
2012 rtm->rtm_family = RTNL_FAMILY_IPMR;
2013 rtm->rtm_dst_len = 32;
2014 rtm->rtm_src_len = 32;
2015 rtm->rtm_tos = 0;
2016 rtm->rtm_table = mrt->id;
2017 NLA_PUT_U32(skb, RTA_TABLE, mrt->id);
2018 rtm->rtm_type = RTN_MULTICAST;
2019 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2020 rtm->rtm_protocol = RTPROT_UNSPEC;
2021 rtm->rtm_flags = 0;
2022
2023 NLA_PUT_BE32(skb, RTA_SRC, c->mfc_origin);
2024 NLA_PUT_BE32(skb, RTA_DST, c->mfc_mcastgrp);
2025
2026 if (__ipmr_fill_mroute(mrt, skb, c, rtm) < 0)
2027 goto nla_put_failure;
2028
2029 return nlmsg_end(skb, nlh);
2030
2031nla_put_failure:
2032 nlmsg_cancel(skb, nlh);
2033 return -EMSGSIZE;
2034}
2035
2036static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2037{
2038 struct net *net = sock_net(skb->sk);
2039 struct mr_table *mrt;
2040 struct mfc_cache *mfc;
2041 unsigned int t = 0, s_t;
2042 unsigned int h = 0, s_h;
2043 unsigned int e = 0, s_e;
2044
2045 s_t = cb->args[0];
2046 s_h = cb->args[1];
2047 s_e = cb->args[2];
2048
2049 read_lock(&mrt_lock);
2050 ipmr_for_each_table(mrt, net) {
2051 if (t < s_t)
2052 goto next_table;
2053 if (t > s_t)
2054 s_h = 0;
2055 for (h = s_h; h < MFC_LINES; h++) {
2056 list_for_each_entry(mfc, &mrt->mfc_cache_array[h], list) {
2057 if (e < s_e)
2058 goto next_entry;
2059 if (ipmr_fill_mroute(mrt, skb,
2060 NETLINK_CB(cb->skb).pid,
2061 cb->nlh->nlmsg_seq,
2062 mfc) < 0)
2063 goto done;
2064next_entry:
2065 e++;
2066 }
2067 e = s_e = 0;
2068 }
2069 s_h = 0;
2070next_table:
2071 t++;
2072 }
2073done:
2074 read_unlock(&mrt_lock);
2075
2076 cb->args[2] = e;
2077 cb->args[1] = h;
2078 cb->args[0] = t;
2079
2080 return skb->len;
2081}
2082
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002083#ifdef CONFIG_PROC_FS
Linus Torvalds1da177e2005-04-16 15:20:36 -07002084/*
2085 * The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
2086 */
2087struct ipmr_vif_iter {
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002088 struct seq_net_private p;
Patrick McHardyf0ad0862010-04-13 05:03:23 +00002089 struct mr_table *mrt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002090 int ct;
2091};
2092
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002093static struct vif_device *ipmr_vif_seq_idx(struct net *net,
2094 struct ipmr_vif_iter *iter,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002095 loff_t pos)
2096{
Patrick McHardyf0ad0862010-04-13 05:03:23 +00002097 struct mr_table *mrt = iter->mrt;
Patrick McHardy0c122952010-04-13 05:03:22 +00002098
2099 for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
2100 if (!VIF_EXISTS(mrt, iter->ct))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002101 continue;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002102 if (pos-- == 0)
Patrick McHardy0c122952010-04-13 05:03:22 +00002103 return &mrt->vif_table[iter->ct];
Linus Torvalds1da177e2005-04-16 15:20:36 -07002104 }
2105 return NULL;
2106}
2107
2108static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
Stephen Hemmingerba93ef72008-01-21 17:28:59 -08002109 __acquires(mrt_lock)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002110{
Patrick McHardyf0ad0862010-04-13 05:03:23 +00002111 struct ipmr_vif_iter *iter = seq->private;
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002112 struct net *net = seq_file_net(seq);
Patrick McHardyf0ad0862010-04-13 05:03:23 +00002113 struct mr_table *mrt;
2114
2115 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
2116 if (mrt == NULL)
2117 return ERR_PTR(-ENOENT);
2118
2119 iter->mrt = mrt;
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002120
Linus Torvalds1da177e2005-04-16 15:20:36 -07002121 read_lock(&mrt_lock);
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002122 return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002123 : SEQ_START_TOKEN;
2124}
2125
2126static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2127{
2128 struct ipmr_vif_iter *iter = seq->private;
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002129 struct net *net = seq_file_net(seq);
Patrick McHardyf0ad0862010-04-13 05:03:23 +00002130 struct mr_table *mrt = iter->mrt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002131
2132 ++*pos;
2133 if (v == SEQ_START_TOKEN)
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002134 return ipmr_vif_seq_idx(net, iter, 0);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002135
Patrick McHardy0c122952010-04-13 05:03:22 +00002136 while (++iter->ct < mrt->maxvif) {
2137 if (!VIF_EXISTS(mrt, iter->ct))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002138 continue;
Patrick McHardy0c122952010-04-13 05:03:22 +00002139 return &mrt->vif_table[iter->ct];
Linus Torvalds1da177e2005-04-16 15:20:36 -07002140 }
2141 return NULL;
2142}
2143
2144static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
Stephen Hemmingerba93ef72008-01-21 17:28:59 -08002145 __releases(mrt_lock)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002146{
2147 read_unlock(&mrt_lock);
2148}
2149
2150static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
2151{
Patrick McHardyf0ad0862010-04-13 05:03:23 +00002152 struct ipmr_vif_iter *iter = seq->private;
2153 struct mr_table *mrt = iter->mrt;
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002154
Linus Torvalds1da177e2005-04-16 15:20:36 -07002155 if (v == SEQ_START_TOKEN) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002156 seq_puts(seq,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002157 "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n");
2158 } else {
2159 const struct vif_device *vif = v;
2160 const char *name = vif->dev ? vif->dev->name : "none";
2161
2162 seq_printf(seq,
2163 "%2Zd %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n",
Patrick McHardy0c122952010-04-13 05:03:22 +00002164 vif - mrt->vif_table,
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002165 name, vif->bytes_in, vif->pkt_in,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002166 vif->bytes_out, vif->pkt_out,
2167 vif->flags, vif->local, vif->remote);
2168 }
2169 return 0;
2170}
2171
Stephen Hemmingerf6908082007-03-12 14:34:29 -07002172static const struct seq_operations ipmr_vif_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002173 .start = ipmr_vif_seq_start,
2174 .next = ipmr_vif_seq_next,
2175 .stop = ipmr_vif_seq_stop,
2176 .show = ipmr_vif_seq_show,
2177};
2178
2179static int ipmr_vif_open(struct inode *inode, struct file *file)
2180{
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002181 return seq_open_net(inode, file, &ipmr_vif_seq_ops,
2182 sizeof(struct ipmr_vif_iter));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002183}
2184
Arjan van de Ven9a321442007-02-12 00:55:35 -08002185static const struct file_operations ipmr_vif_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002186 .owner = THIS_MODULE,
2187 .open = ipmr_vif_open,
2188 .read = seq_read,
2189 .llseek = seq_lseek,
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002190 .release = seq_release_net,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002191};
2192
2193struct ipmr_mfc_iter {
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002194 struct seq_net_private p;
Patrick McHardyf0ad0862010-04-13 05:03:23 +00002195 struct mr_table *mrt;
Patrick McHardy862465f2010-04-13 05:03:21 +00002196 struct list_head *cache;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002197 int ct;
2198};
2199
2200
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002201static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
2202 struct ipmr_mfc_iter *it, loff_t pos)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002203{
Patrick McHardyf0ad0862010-04-13 05:03:23 +00002204 struct mr_table *mrt = it->mrt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002205 struct mfc_cache *mfc;
2206
Linus Torvalds1da177e2005-04-16 15:20:36 -07002207 read_lock(&mrt_lock);
Patrick McHardy862465f2010-04-13 05:03:21 +00002208 for (it->ct = 0; it->ct < MFC_LINES; it->ct++) {
Patrick McHardy0c122952010-04-13 05:03:22 +00002209 it->cache = &mrt->mfc_cache_array[it->ct];
Patrick McHardy862465f2010-04-13 05:03:21 +00002210 list_for_each_entry(mfc, it->cache, list)
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002211 if (pos-- == 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002212 return mfc;
Patrick McHardy862465f2010-04-13 05:03:21 +00002213 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002214 read_unlock(&mrt_lock);
2215
Linus Torvalds1da177e2005-04-16 15:20:36 -07002216 spin_lock_bh(&mfc_unres_lock);
Patrick McHardy0c122952010-04-13 05:03:22 +00002217 it->cache = &mrt->mfc_unres_queue;
Patrick McHardy862465f2010-04-13 05:03:21 +00002218 list_for_each_entry(mfc, it->cache, list)
Patrick McHardye258beb2010-04-13 05:03:19 +00002219 if (pos-- == 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002220 return mfc;
2221 spin_unlock_bh(&mfc_unres_lock);
2222
2223 it->cache = NULL;
2224 return NULL;
2225}
2226
2227
2228static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
2229{
2230 struct ipmr_mfc_iter *it = seq->private;
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002231 struct net *net = seq_file_net(seq);
Patrick McHardyf0ad0862010-04-13 05:03:23 +00002232 struct mr_table *mrt;
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002233
Patrick McHardyf0ad0862010-04-13 05:03:23 +00002234 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
2235 if (mrt == NULL)
2236 return ERR_PTR(-ENOENT);
2237
2238 it->mrt = mrt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002239 it->cache = NULL;
2240 it->ct = 0;
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002241 return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002242 : SEQ_START_TOKEN;
2243}
2244
2245static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2246{
2247 struct mfc_cache *mfc = v;
2248 struct ipmr_mfc_iter *it = seq->private;
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002249 struct net *net = seq_file_net(seq);
Patrick McHardyf0ad0862010-04-13 05:03:23 +00002250 struct mr_table *mrt = it->mrt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002251
2252 ++*pos;
2253
2254 if (v == SEQ_START_TOKEN)
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002255 return ipmr_mfc_seq_idx(net, seq->private, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002256
Patrick McHardy862465f2010-04-13 05:03:21 +00002257 if (mfc->list.next != it->cache)
2258 return list_entry(mfc->list.next, struct mfc_cache, list);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002259
Patrick McHardy0c122952010-04-13 05:03:22 +00002260 if (it->cache == &mrt->mfc_unres_queue)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002261 goto end_of_list;
2262
Patrick McHardy0c122952010-04-13 05:03:22 +00002263 BUG_ON(it->cache != &mrt->mfc_cache_array[it->ct]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002264
2265 while (++it->ct < MFC_LINES) {
Patrick McHardy0c122952010-04-13 05:03:22 +00002266 it->cache = &mrt->mfc_cache_array[it->ct];
Patrick McHardy862465f2010-04-13 05:03:21 +00002267 if (list_empty(it->cache))
2268 continue;
2269 return list_first_entry(it->cache, struct mfc_cache, list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002270 }
2271
2272 /* exhausted cache_array, show unresolved */
2273 read_unlock(&mrt_lock);
Patrick McHardy0c122952010-04-13 05:03:22 +00002274 it->cache = &mrt->mfc_unres_queue;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002275 it->ct = 0;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002276
Linus Torvalds1da177e2005-04-16 15:20:36 -07002277 spin_lock_bh(&mfc_unres_lock);
Patrick McHardy862465f2010-04-13 05:03:21 +00002278 if (!list_empty(it->cache))
2279 return list_first_entry(it->cache, struct mfc_cache, list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002280
2281 end_of_list:
2282 spin_unlock_bh(&mfc_unres_lock);
2283 it->cache = NULL;
2284
2285 return NULL;
2286}
2287
2288static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
2289{
2290 struct ipmr_mfc_iter *it = seq->private;
Patrick McHardyf0ad0862010-04-13 05:03:23 +00002291 struct mr_table *mrt = it->mrt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002292
Patrick McHardy0c122952010-04-13 05:03:22 +00002293 if (it->cache == &mrt->mfc_unres_queue)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002294 spin_unlock_bh(&mfc_unres_lock);
Patrick McHardy0c122952010-04-13 05:03:22 +00002295 else if (it->cache == &mrt->mfc_cache_array[it->ct])
Linus Torvalds1da177e2005-04-16 15:20:36 -07002296 read_unlock(&mrt_lock);
2297}
2298
2299static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
2300{
2301 int n;
2302
2303 if (v == SEQ_START_TOKEN) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002304 seq_puts(seq,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002305 "Group Origin Iif Pkts Bytes Wrong Oifs\n");
2306 } else {
2307 const struct mfc_cache *mfc = v;
2308 const struct ipmr_mfc_iter *it = seq->private;
Patrick McHardyf0ad0862010-04-13 05:03:23 +00002309 const struct mr_table *mrt = it->mrt;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002310
Eric Dumazet0eae88f2010-04-20 19:06:52 -07002311 seq_printf(seq, "%08X %08X %-3hd",
2312 (__force u32) mfc->mfc_mcastgrp,
2313 (__force u32) mfc->mfc_origin,
Benjamin Thery1ea472e2008-12-03 22:21:47 -08002314 mfc->mfc_parent);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002315
Patrick McHardy0c122952010-04-13 05:03:22 +00002316 if (it->cache != &mrt->mfc_unres_queue) {
Benjamin Thery1ea472e2008-12-03 22:21:47 -08002317 seq_printf(seq, " %8lu %8lu %8lu",
2318 mfc->mfc_un.res.pkt,
2319 mfc->mfc_un.res.bytes,
2320 mfc->mfc_un.res.wrong_if);
Stephen Hemminger132adf52007-03-08 20:44:43 -08002321 for (n = mfc->mfc_un.res.minvif;
2322 n < mfc->mfc_un.res.maxvif; n++ ) {
Patrick McHardy0c122952010-04-13 05:03:22 +00002323 if (VIF_EXISTS(mrt, n) &&
Benjamin Therycf958ae32009-01-22 04:56:16 +00002324 mfc->mfc_un.res.ttls[n] < 255)
2325 seq_printf(seq,
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002326 " %2d:%-3d",
Linus Torvalds1da177e2005-04-16 15:20:36 -07002327 n, mfc->mfc_un.res.ttls[n]);
2328 }
Benjamin Thery1ea472e2008-12-03 22:21:47 -08002329 } else {
2330 /* unresolved mfc_caches don't contain
2331 * pkt, bytes and wrong_if values
2332 */
2333 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002334 }
2335 seq_putc(seq, '\n');
2336 }
2337 return 0;
2338}
2339
Stephen Hemmingerf6908082007-03-12 14:34:29 -07002340static const struct seq_operations ipmr_mfc_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002341 .start = ipmr_mfc_seq_start,
2342 .next = ipmr_mfc_seq_next,
2343 .stop = ipmr_mfc_seq_stop,
2344 .show = ipmr_mfc_seq_show,
2345};
2346
2347static int ipmr_mfc_open(struct inode *inode, struct file *file)
2348{
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002349 return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
2350 sizeof(struct ipmr_mfc_iter));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002351}
2352
Arjan van de Ven9a321442007-02-12 00:55:35 -08002353static const struct file_operations ipmr_mfc_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002354 .owner = THIS_MODULE,
2355 .open = ipmr_mfc_open,
2356 .read = seq_read,
2357 .llseek = seq_lseek,
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002358 .release = seq_release_net,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002359};
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002360#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07002361
2362#ifdef CONFIG_IP_PIMSM_V2
Alexey Dobriyan32613092009-09-14 12:21:47 +00002363static const struct net_protocol pim_protocol = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002364 .handler = pim_rcv,
Tom Goff403dbb92009-06-14 03:16:13 -07002365 .netns_ok = 1,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002366};
2367#endif
2368
2369
2370/*
2371 * Setup for IP multicast routing
2372 */
Benjamin Therycf958ae32009-01-22 04:56:16 +00002373static int __net_init ipmr_net_init(struct net *net)
2374{
Patrick McHardyf0ad0862010-04-13 05:03:23 +00002375 int err;
Benjamin Therycf958ae32009-01-22 04:56:16 +00002376
Patrick McHardyf0ad0862010-04-13 05:03:23 +00002377 err = ipmr_rules_init(net);
2378 if (err < 0)
Benjamin Therycf958ae32009-01-22 04:56:16 +00002379 goto fail;
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002380
2381#ifdef CONFIG_PROC_FS
2382 err = -ENOMEM;
2383 if (!proc_net_fops_create(net, "ip_mr_vif", 0, &ipmr_vif_fops))
2384 goto proc_vif_fail;
2385 if (!proc_net_fops_create(net, "ip_mr_cache", 0, &ipmr_mfc_fops))
2386 goto proc_cache_fail;
2387#endif
Benjamin Thery2bb8b262009-01-22 04:56:18 +00002388 return 0;
2389
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002390#ifdef CONFIG_PROC_FS
2391proc_cache_fail:
2392 proc_net_remove(net, "ip_mr_vif");
2393proc_vif_fail:
Patrick McHardyf0ad0862010-04-13 05:03:23 +00002394 ipmr_rules_exit(net);
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002395#endif
Benjamin Therycf958ae32009-01-22 04:56:16 +00002396fail:
2397 return err;
2398}
2399
2400static void __net_exit ipmr_net_exit(struct net *net)
2401{
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002402#ifdef CONFIG_PROC_FS
2403 proc_net_remove(net, "ip_mr_cache");
2404 proc_net_remove(net, "ip_mr_vif");
2405#endif
Patrick McHardyf0ad0862010-04-13 05:03:23 +00002406 ipmr_rules_exit(net);
Benjamin Therycf958ae32009-01-22 04:56:16 +00002407}
2408
2409static struct pernet_operations ipmr_net_ops = {
2410 .init = ipmr_net_init,
2411 .exit = ipmr_net_exit,
2412};
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002413
Wang Chen03d2f892008-07-03 12:13:36 +08002414int __init ip_mr_init(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002415{
Wang Chen03d2f892008-07-03 12:13:36 +08002416 int err;
2417
Linus Torvalds1da177e2005-04-16 15:20:36 -07002418 mrt_cachep = kmem_cache_create("ip_mrt_cache",
2419 sizeof(struct mfc_cache),
Alexey Dobriyane5d679f332006-08-26 19:25:52 -07002420 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
Paul Mundt20c2df82007-07-20 10:11:58 +09002421 NULL);
Wang Chen03d2f892008-07-03 12:13:36 +08002422 if (!mrt_cachep)
2423 return -ENOMEM;
2424
Benjamin Therycf958ae32009-01-22 04:56:16 +00002425 err = register_pernet_subsys(&ipmr_net_ops);
2426 if (err)
2427 goto reg_pernet_fail;
2428
Wang Chen03d2f892008-07-03 12:13:36 +08002429 err = register_netdevice_notifier(&ip_mr_notifier);
2430 if (err)
2431 goto reg_notif_fail;
Tom Goff403dbb92009-06-14 03:16:13 -07002432#ifdef CONFIG_IP_PIMSM_V2
2433 if (inet_add_protocol(&pim_protocol, IPPROTO_PIM) < 0) {
2434 printk(KERN_ERR "ip_mr_init: can't add PIM protocol\n");
2435 err = -EAGAIN;
2436 goto add_proto_fail;
2437 }
2438#endif
Patrick McHardycb6a4e42010-04-26 16:02:08 +02002439 rtnl_register(RTNL_FAMILY_IPMR, RTM_GETROUTE, NULL, ipmr_rtm_dumproute);
Wang Chen03d2f892008-07-03 12:13:36 +08002440 return 0;
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002441
Tom Goff403dbb92009-06-14 03:16:13 -07002442#ifdef CONFIG_IP_PIMSM_V2
2443add_proto_fail:
2444 unregister_netdevice_notifier(&ip_mr_notifier);
2445#endif
Benjamin Theryc3e38892008-11-19 14:07:41 -08002446reg_notif_fail:
Benjamin Therycf958ae32009-01-22 04:56:16 +00002447 unregister_pernet_subsys(&ipmr_net_ops);
2448reg_pernet_fail:
Benjamin Theryc3e38892008-11-19 14:07:41 -08002449 kmem_cache_destroy(mrt_cachep);
Wang Chen03d2f892008-07-03 12:13:36 +08002450 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002451}