blob: 0643fb6d47c4a19093f245c924402a8773fc8f1e [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * IP multicast routing support for mrouted 3.6/3.8
3 *
Alan Cox113aa832008-10-13 19:01:08 -07004 * (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk>
Linus Torvalds1da177e2005-04-16 15:20:36 -07005 * Linux Consultancy and Custom Driver Development
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 *
Linus Torvalds1da177e2005-04-16 15:20:36 -070012 * Fixes:
13 * Michael Chastain : Incorrect size of copying.
14 * Alan Cox : Added the cache manager code
15 * Alan Cox : Fixed the clone/copy bug and device race.
16 * Mike McLagan : Routing by source
17 * Malcolm Beattie : Buffer handling fixes.
18 * Alexey Kuznetsov : Double buffer free and other fixes.
19 * SVR Anand : Fixed several multicast bugs and problems.
20 * Alexey Kuznetsov : Status, optimisations and more.
21 * Brad Parker : Better behaviour on mrouted upcall
22 * overflow.
23 * Carlos Picoto : PIMv1 Support
24 * Pavlin Ivanov Radoslavov: PIMv2 Registers must checksum only PIM header
25 * Relax this requrement to work with older peers.
26 *
27 */
28
Linus Torvalds1da177e2005-04-16 15:20:36 -070029#include <asm/system.h>
30#include <asm/uaccess.h>
31#include <linux/types.h>
Randy Dunlap4fc268d2006-01-11 12:17:47 -080032#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070033#include <linux/errno.h>
34#include <linux/timer.h>
35#include <linux/mm.h>
36#include <linux/kernel.h>
37#include <linux/fcntl.h>
38#include <linux/stat.h>
39#include <linux/socket.h>
40#include <linux/in.h>
41#include <linux/inet.h>
42#include <linux/netdevice.h>
43#include <linux/inetdevice.h>
44#include <linux/igmp.h>
45#include <linux/proc_fs.h>
46#include <linux/seq_file.h>
47#include <linux/mroute.h>
48#include <linux/init.h>
Kris Katterjohn46f25df2006-01-05 16:35:42 -080049#include <linux/if_ether.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090050#include <linux/slab.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020051#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070052#include <net/ip.h>
53#include <net/protocol.h>
54#include <linux/skbuff.h>
Arnaldo Carvalho de Melo14c85022005-12-27 02:43:12 -020055#include <net/route.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070056#include <net/sock.h>
57#include <net/icmp.h>
58#include <net/udp.h>
59#include <net/raw.h>
60#include <linux/notifier.h>
61#include <linux/if_arp.h>
62#include <linux/netfilter_ipv4.h>
63#include <net/ipip.h>
64#include <net/checksum.h>
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -070065#include <net/netlink.h>
Patrick McHardyf0ad0862010-04-13 05:03:23 +000066#include <net/fib_rules.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070067
68#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
69#define CONFIG_IP_PIMSM 1
70#endif
71
Patrick McHardy0c122952010-04-13 05:03:22 +000072struct mr_table {
Patrick McHardyf0ad0862010-04-13 05:03:23 +000073 struct list_head list;
74 u32 id;
Patrick McHardy0c122952010-04-13 05:03:22 +000075 struct sock *mroute_sk;
76 struct timer_list ipmr_expire_timer;
77 struct list_head mfc_unres_queue;
78 struct list_head mfc_cache_array[MFC_LINES];
79 struct vif_device vif_table[MAXVIFS];
80 int maxvif;
81 atomic_t cache_resolve_queue_len;
82 int mroute_do_assert;
83 int mroute_do_pim;
84#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
85 int mroute_reg_vif_num;
86#endif
87};
88
Patrick McHardyf0ad0862010-04-13 05:03:23 +000089struct ipmr_rule {
90 struct fib_rule common;
91};
92
93struct ipmr_result {
94 struct mr_table *mrt;
95};
96
Linus Torvalds1da177e2005-04-16 15:20:36 -070097/* Big lock, protecting vif table, mrt cache and mroute socket state.
98 Note that the changes are semaphored via rtnl_lock.
99 */
100
101static DEFINE_RWLOCK(mrt_lock);
102
103/*
104 * Multicast router control variables
105 */
106
Patrick McHardy0c122952010-04-13 05:03:22 +0000107#define VIF_EXISTS(_mrt, _idx) ((_mrt)->vif_table[_idx].dev != NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700108
Linus Torvalds1da177e2005-04-16 15:20:36 -0700109/* Special spinlock for queue of unresolved entries */
110static DEFINE_SPINLOCK(mfc_unres_lock);
111
112/* We return to original Alan's scheme. Hash table of resolved
113 entries is changed only in process context and protected
114 with weak lock mrt_lock. Queue of unresolved entries is protected
115 with strong spinlock mfc_unres_lock.
116
117 In this case data path is free of exclusive locks at all.
118 */
119
Christoph Lametere18b8902006-12-06 20:33:20 -0800120static struct kmem_cache *mrt_cachep __read_mostly;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700121
Patrick McHardyf0ad0862010-04-13 05:03:23 +0000122static struct mr_table *ipmr_new_table(struct net *net, u32 id);
Patrick McHardy0c122952010-04-13 05:03:22 +0000123static int ip_mr_forward(struct net *net, struct mr_table *mrt,
124 struct sk_buff *skb, struct mfc_cache *cache,
125 int local);
126static int ipmr_cache_report(struct mr_table *mrt,
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000127 struct sk_buff *pkt, vifi_t vifi, int assert);
Patrick McHardy0c122952010-04-13 05:03:22 +0000128static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
Patrick McHardyd658f8a2010-04-13 05:03:20 +0000129 struct mfc_cache *c, struct rtmsg *rtm);
Patrick McHardyf0ad0862010-04-13 05:03:23 +0000130static void ipmr_expire_process(unsigned long arg);
131
132#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
133#define ipmr_for_each_table(mrt, net) \
134 list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list)
135
136static struct mr_table *ipmr_get_table(struct net *net, u32 id)
137{
138 struct mr_table *mrt;
139
140 ipmr_for_each_table(mrt, net) {
141 if (mrt->id == id)
142 return mrt;
143 }
144 return NULL;
145}
146
147static int ipmr_fib_lookup(struct net *net, struct flowi *flp,
148 struct mr_table **mrt)
149{
150 struct ipmr_result res;
151 struct fib_lookup_arg arg = { .result = &res, };
152 int err;
153
154 err = fib_rules_lookup(net->ipv4.mr_rules_ops, flp, 0, &arg);
155 if (err < 0)
156 return err;
157 *mrt = res.mrt;
158 return 0;
159}
160
161static int ipmr_rule_action(struct fib_rule *rule, struct flowi *flp,
162 int flags, struct fib_lookup_arg *arg)
163{
164 struct ipmr_result *res = arg->result;
165 struct mr_table *mrt;
166
167 switch (rule->action) {
168 case FR_ACT_TO_TBL:
169 break;
170 case FR_ACT_UNREACHABLE:
171 return -ENETUNREACH;
172 case FR_ACT_PROHIBIT:
173 return -EACCES;
174 case FR_ACT_BLACKHOLE:
175 default:
176 return -EINVAL;
177 }
178
179 mrt = ipmr_get_table(rule->fr_net, rule->table);
180 if (mrt == NULL)
181 return -EAGAIN;
182 res->mrt = mrt;
183 return 0;
184}
185
186static int ipmr_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
187{
188 return 1;
189}
190
191static const struct nla_policy ipmr_rule_policy[FRA_MAX + 1] = {
192 FRA_GENERIC_POLICY,
193};
194
195static int ipmr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
196 struct fib_rule_hdr *frh, struct nlattr **tb)
197{
198 return 0;
199}
200
201static int ipmr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
202 struct nlattr **tb)
203{
204 return 1;
205}
206
207static int ipmr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
208 struct fib_rule_hdr *frh)
209{
210 frh->dst_len = 0;
211 frh->src_len = 0;
212 frh->tos = 0;
213 return 0;
214}
215
216static struct fib_rules_ops ipmr_rules_ops_template = {
217 .family = FIB_RULES_IPMR,
218 .rule_size = sizeof(struct ipmr_rule),
219 .addr_size = sizeof(u32),
220 .action = ipmr_rule_action,
221 .match = ipmr_rule_match,
222 .configure = ipmr_rule_configure,
223 .compare = ipmr_rule_compare,
224 .default_pref = fib_default_rule_pref,
225 .fill = ipmr_rule_fill,
226 .nlgroup = RTNLGRP_IPV4_RULE,
227 .policy = ipmr_rule_policy,
228 .owner = THIS_MODULE,
229};
230
231static int __net_init ipmr_rules_init(struct net *net)
232{
233 struct fib_rules_ops *ops;
234 struct mr_table *mrt;
235 int err;
236
237 ops = fib_rules_register(&ipmr_rules_ops_template, net);
238 if (IS_ERR(ops))
239 return PTR_ERR(ops);
240
241 INIT_LIST_HEAD(&net->ipv4.mr_tables);
242
243 mrt = ipmr_new_table(net, RT_TABLE_DEFAULT);
244 if (mrt == NULL) {
245 err = -ENOMEM;
246 goto err1;
247 }
248
249 err = fib_default_rule_add(ops, 0x7fff, RT_TABLE_DEFAULT, 0);
250 if (err < 0)
251 goto err2;
252
253 net->ipv4.mr_rules_ops = ops;
254 return 0;
255
256err2:
257 kfree(mrt);
258err1:
259 fib_rules_unregister(ops);
260 return err;
261}
262
263static void __net_exit ipmr_rules_exit(struct net *net)
264{
265 struct mr_table *mrt, *next;
266
267 list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list)
268 kfree(mrt);
269 fib_rules_unregister(net->ipv4.mr_rules_ops);
270}
271#else
272#define ipmr_for_each_table(mrt, net) \
273 for (mrt = net->ipv4.mrt; mrt; mrt = NULL)
274
275static struct mr_table *ipmr_get_table(struct net *net, u32 id)
276{
277 return net->ipv4.mrt;
278}
279
280static int ipmr_fib_lookup(struct net *net, struct flowi *flp,
281 struct mr_table **mrt)
282{
283 *mrt = net->ipv4.mrt;
284 return 0;
285}
286
287static int __net_init ipmr_rules_init(struct net *net)
288{
289 net->ipv4.mrt = ipmr_new_table(net, RT_TABLE_DEFAULT);
290 return net->ipv4.mrt ? 0 : -ENOMEM;
291}
292
293static void __net_exit ipmr_rules_exit(struct net *net)
294{
295 kfree(net->ipv4.mrt);
296}
297#endif
298
299static struct mr_table *ipmr_new_table(struct net *net, u32 id)
300{
301 struct mr_table *mrt;
302 unsigned int i;
303
304 mrt = ipmr_get_table(net, id);
305 if (mrt != NULL)
306 return mrt;
307
308 mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
309 if (mrt == NULL)
310 return NULL;
311 mrt->id = id;
312
313 /* Forwarding cache */
314 for (i = 0; i < MFC_LINES; i++)
315 INIT_LIST_HEAD(&mrt->mfc_cache_array[i]);
316
317 INIT_LIST_HEAD(&mrt->mfc_unres_queue);
318
319 setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process,
320 (unsigned long)mrt);
321
322#ifdef CONFIG_IP_PIMSM
323 mrt->mroute_reg_vif_num = -1;
324#endif
325#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
326 list_add_tail_rcu(&mrt->list, &net->ipv4.mr_tables);
327#endif
328 return mrt;
329}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700330
Linus Torvalds1da177e2005-04-16 15:20:36 -0700331/* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
332
Wang Chend6070322008-07-14 20:55:26 -0700333static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
334{
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000335 struct net *net = dev_net(dev);
336
Wang Chend6070322008-07-14 20:55:26 -0700337 dev_close(dev);
338
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000339 dev = __dev_get_by_name(net, "tunl0");
Wang Chend6070322008-07-14 20:55:26 -0700340 if (dev) {
Stephen Hemminger5bc3eb72008-11-19 21:52:05 -0800341 const struct net_device_ops *ops = dev->netdev_ops;
Wang Chend6070322008-07-14 20:55:26 -0700342 struct ifreq ifr;
Wang Chend6070322008-07-14 20:55:26 -0700343 struct ip_tunnel_parm p;
344
345 memset(&p, 0, sizeof(p));
346 p.iph.daddr = v->vifc_rmt_addr.s_addr;
347 p.iph.saddr = v->vifc_lcl_addr.s_addr;
348 p.iph.version = 4;
349 p.iph.ihl = 5;
350 p.iph.protocol = IPPROTO_IPIP;
351 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
352 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
353
Stephen Hemminger5bc3eb72008-11-19 21:52:05 -0800354 if (ops->ndo_do_ioctl) {
355 mm_segment_t oldfs = get_fs();
356
357 set_fs(KERNEL_DS);
358 ops->ndo_do_ioctl(dev, &ifr, SIOCDELTUNNEL);
359 set_fs(oldfs);
360 }
Wang Chend6070322008-07-14 20:55:26 -0700361 }
362}
363
Linus Torvalds1da177e2005-04-16 15:20:36 -0700364static
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000365struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700366{
367 struct net_device *dev;
368
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000369 dev = __dev_get_by_name(net, "tunl0");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700370
371 if (dev) {
Stephen Hemminger5bc3eb72008-11-19 21:52:05 -0800372 const struct net_device_ops *ops = dev->netdev_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700373 int err;
374 struct ifreq ifr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700375 struct ip_tunnel_parm p;
376 struct in_device *in_dev;
377
378 memset(&p, 0, sizeof(p));
379 p.iph.daddr = v->vifc_rmt_addr.s_addr;
380 p.iph.saddr = v->vifc_lcl_addr.s_addr;
381 p.iph.version = 4;
382 p.iph.ihl = 5;
383 p.iph.protocol = IPPROTO_IPIP;
384 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
Stephen Hemmingerba93ef72008-01-21 17:28:59 -0800385 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700386
Stephen Hemminger5bc3eb72008-11-19 21:52:05 -0800387 if (ops->ndo_do_ioctl) {
388 mm_segment_t oldfs = get_fs();
389
390 set_fs(KERNEL_DS);
391 err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL);
392 set_fs(oldfs);
393 } else
394 err = -EOPNOTSUPP;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700395
396 dev = NULL;
397
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000398 if (err == 0 &&
399 (dev = __dev_get_by_name(net, p.name)) != NULL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700400 dev->flags |= IFF_MULTICAST;
401
Herbert Xue5ed6392005-10-03 14:35:55 -0700402 in_dev = __in_dev_get_rtnl(dev);
Herbert Xu71e27da2007-06-04 23:36:06 -0700403 if (in_dev == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700404 goto failure;
Herbert Xu71e27da2007-06-04 23:36:06 -0700405
406 ipv4_devconf_setall(in_dev);
407 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700408
409 if (dev_open(dev))
410 goto failure;
Wang Chen7dc00c82008-07-14 20:56:34 -0700411 dev_hold(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700412 }
413 }
414 return dev;
415
416failure:
417 /* allow the register to be completed before unregistering. */
418 rtnl_unlock();
419 rtnl_lock();
420
421 unregister_netdevice(dev);
422 return NULL;
423}
424
425#ifdef CONFIG_IP_PIMSM
426
Stephen Hemminger6fef4c02009-08-31 19:50:41 +0000427static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700428{
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000429 struct net *net = dev_net(dev);
Patrick McHardyf0ad0862010-04-13 05:03:23 +0000430 struct mr_table *mrt;
431 struct flowi fl = {
432 .oif = dev->ifindex,
433 .iif = skb->skb_iif,
434 .mark = skb->mark,
435 };
436 int err;
437
438 err = ipmr_fib_lookup(net, &fl, &mrt);
439 if (err < 0)
440 return err;
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000441
Linus Torvalds1da177e2005-04-16 15:20:36 -0700442 read_lock(&mrt_lock);
Pavel Emelyanovcf3677a2008-05-21 14:17:33 -0700443 dev->stats.tx_bytes += skb->len;
444 dev->stats.tx_packets++;
Patrick McHardy0c122952010-04-13 05:03:22 +0000445 ipmr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, IGMPMSG_WHOLEPKT);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700446 read_unlock(&mrt_lock);
447 kfree_skb(skb);
Patrick McHardy6ed10652009-06-23 06:03:08 +0000448 return NETDEV_TX_OK;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700449}
450
Stephen Hemminger007c3832008-11-20 20:28:35 -0800451static const struct net_device_ops reg_vif_netdev_ops = {
452 .ndo_start_xmit = reg_vif_xmit,
453};
454
Linus Torvalds1da177e2005-04-16 15:20:36 -0700455static void reg_vif_setup(struct net_device *dev)
456{
457 dev->type = ARPHRD_PIMREG;
Kris Katterjohn46f25df2006-01-05 16:35:42 -0800458 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700459 dev->flags = IFF_NOARP;
Stephen Hemminger007c3832008-11-20 20:28:35 -0800460 dev->netdev_ops = &reg_vif_netdev_ops,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700461 dev->destructor = free_netdev;
Tom Goff403dbb92009-06-14 03:16:13 -0700462 dev->features |= NETIF_F_NETNS_LOCAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700463}
464
Patrick McHardyf0ad0862010-04-13 05:03:23 +0000465static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700466{
467 struct net_device *dev;
468 struct in_device *in_dev;
Patrick McHardyf0ad0862010-04-13 05:03:23 +0000469 char name[IFNAMSIZ];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700470
Patrick McHardyf0ad0862010-04-13 05:03:23 +0000471 if (mrt->id == RT_TABLE_DEFAULT)
472 sprintf(name, "pimreg");
473 else
474 sprintf(name, "pimreg%u", mrt->id);
475
476 dev = alloc_netdev(0, name, reg_vif_setup);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700477
478 if (dev == NULL)
479 return NULL;
480
Tom Goff403dbb92009-06-14 03:16:13 -0700481 dev_net_set(dev, net);
482
Linus Torvalds1da177e2005-04-16 15:20:36 -0700483 if (register_netdevice(dev)) {
484 free_netdev(dev);
485 return NULL;
486 }
487 dev->iflink = 0;
488
Herbert Xu71e27da2007-06-04 23:36:06 -0700489 rcu_read_lock();
490 if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
491 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700492 goto failure;
Herbert Xu71e27da2007-06-04 23:36:06 -0700493 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700494
Herbert Xu71e27da2007-06-04 23:36:06 -0700495 ipv4_devconf_setall(in_dev);
496 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
497 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700498
499 if (dev_open(dev))
500 goto failure;
501
Wang Chen7dc00c82008-07-14 20:56:34 -0700502 dev_hold(dev);
503
Linus Torvalds1da177e2005-04-16 15:20:36 -0700504 return dev;
505
506failure:
507 /* allow the register to be completed before unregistering. */
508 rtnl_unlock();
509 rtnl_lock();
510
511 unregister_netdevice(dev);
512 return NULL;
513}
514#endif
515
516/*
517 * Delete a VIF entry
Wang Chen7dc00c82008-07-14 20:56:34 -0700518 * @notify: Set to 1, if the caller is a notifier_call
Linus Torvalds1da177e2005-04-16 15:20:36 -0700519 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900520
Patrick McHardy0c122952010-04-13 05:03:22 +0000521static int vif_delete(struct mr_table *mrt, int vifi, int notify,
Eric Dumazetd17fa6f2009-10-28 05:21:38 +0000522 struct list_head *head)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700523{
524 struct vif_device *v;
525 struct net_device *dev;
526 struct in_device *in_dev;
527
Patrick McHardy0c122952010-04-13 05:03:22 +0000528 if (vifi < 0 || vifi >= mrt->maxvif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700529 return -EADDRNOTAVAIL;
530
Patrick McHardy0c122952010-04-13 05:03:22 +0000531 v = &mrt->vif_table[vifi];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700532
533 write_lock_bh(&mrt_lock);
534 dev = v->dev;
535 v->dev = NULL;
536
537 if (!dev) {
538 write_unlock_bh(&mrt_lock);
539 return -EADDRNOTAVAIL;
540 }
541
542#ifdef CONFIG_IP_PIMSM
Patrick McHardy0c122952010-04-13 05:03:22 +0000543 if (vifi == mrt->mroute_reg_vif_num)
544 mrt->mroute_reg_vif_num = -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700545#endif
546
Patrick McHardy0c122952010-04-13 05:03:22 +0000547 if (vifi+1 == mrt->maxvif) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700548 int tmp;
549 for (tmp=vifi-1; tmp>=0; tmp--) {
Patrick McHardy0c122952010-04-13 05:03:22 +0000550 if (VIF_EXISTS(mrt, tmp))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700551 break;
552 }
Patrick McHardy0c122952010-04-13 05:03:22 +0000553 mrt->maxvif = tmp+1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700554 }
555
556 write_unlock_bh(&mrt_lock);
557
558 dev_set_allmulti(dev, -1);
559
Herbert Xue5ed6392005-10-03 14:35:55 -0700560 if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
Herbert Xu42f811b2007-06-04 23:34:44 -0700561 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700562 ip_rt_multicast_event(in_dev);
563 }
564
Wang Chen7dc00c82008-07-14 20:56:34 -0700565 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify)
Eric Dumazetd17fa6f2009-10-28 05:21:38 +0000566 unregister_netdevice_queue(dev, head);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700567
568 dev_put(dev);
569 return 0;
570}
571
Benjamin Thery5c0a66f2009-01-22 04:56:17 +0000572static inline void ipmr_cache_free(struct mfc_cache *c)
573{
Benjamin Thery5c0a66f2009-01-22 04:56:17 +0000574 kmem_cache_free(mrt_cachep, c);
575}
576
Linus Torvalds1da177e2005-04-16 15:20:36 -0700577/* Destroy an unresolved cache entry, killing queued skbs
578 and reporting error to netlink readers.
579 */
580
Patrick McHardy0c122952010-04-13 05:03:22 +0000581static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700582{
Patrick McHardy0c122952010-04-13 05:03:22 +0000583 struct net *net = NULL; //mrt->net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700584 struct sk_buff *skb;
Patrick McHardy9ef1d4c2005-06-28 12:55:30 -0700585 struct nlmsgerr *e;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700586
Patrick McHardy0c122952010-04-13 05:03:22 +0000587 atomic_dec(&mrt->cache_resolve_queue_len);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700588
Jianjun Kongc354e122008-11-03 00:28:02 -0800589 while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) {
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700590 if (ip_hdr(skb)->version == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700591 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
592 nlh->nlmsg_type = NLMSG_ERROR;
593 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
594 skb_trim(skb, nlh->nlmsg_len);
Patrick McHardy9ef1d4c2005-06-28 12:55:30 -0700595 e = NLMSG_DATA(nlh);
596 e->error = -ETIMEDOUT;
597 memset(&e->msg, 0, sizeof(e->msg));
Thomas Graf2942e902006-08-15 00:30:25 -0700598
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000599 rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700600 } else
601 kfree_skb(skb);
602 }
603
Benjamin Thery5c0a66f2009-01-22 04:56:17 +0000604 ipmr_cache_free(c);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700605}
606
607
Patrick McHardye258beb2010-04-13 05:03:19 +0000608/* Timer process for the unresolved queue. */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700609
Patrick McHardye258beb2010-04-13 05:03:19 +0000610static void ipmr_expire_process(unsigned long arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700611{
Patrick McHardy0c122952010-04-13 05:03:22 +0000612 struct mr_table *mrt = (struct mr_table *)arg;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700613 unsigned long now;
614 unsigned long expires;
Patrick McHardy862465f2010-04-13 05:03:21 +0000615 struct mfc_cache *c, *next;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700616
617 if (!spin_trylock(&mfc_unres_lock)) {
Patrick McHardy0c122952010-04-13 05:03:22 +0000618 mod_timer(&mrt->ipmr_expire_timer, jiffies+HZ/10);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700619 return;
620 }
621
Patrick McHardy0c122952010-04-13 05:03:22 +0000622 if (list_empty(&mrt->mfc_unres_queue))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700623 goto out;
624
625 now = jiffies;
626 expires = 10*HZ;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700627
Patrick McHardy0c122952010-04-13 05:03:22 +0000628 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700629 if (time_after(c->mfc_un.unres.expires, now)) {
630 unsigned long interval = c->mfc_un.unres.expires - now;
631 if (interval < expires)
632 expires = interval;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700633 continue;
634 }
635
Patrick McHardy862465f2010-04-13 05:03:21 +0000636 list_del(&c->list);
Patrick McHardy0c122952010-04-13 05:03:22 +0000637 ipmr_destroy_unres(mrt, c);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700638 }
639
Patrick McHardy0c122952010-04-13 05:03:22 +0000640 if (!list_empty(&mrt->mfc_unres_queue))
641 mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700642
643out:
644 spin_unlock(&mfc_unres_lock);
645}
646
647/* Fill oifs list. It is called under write locked mrt_lock. */
648
Patrick McHardy0c122952010-04-13 05:03:22 +0000649static void ipmr_update_thresholds(struct mr_table *mrt, struct mfc_cache *cache,
Patrick McHardyd658f8a2010-04-13 05:03:20 +0000650 unsigned char *ttls)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700651{
652 int vifi;
653
654 cache->mfc_un.res.minvif = MAXVIFS;
655 cache->mfc_un.res.maxvif = 0;
656 memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
657
Patrick McHardy0c122952010-04-13 05:03:22 +0000658 for (vifi = 0; vifi < mrt->maxvif; vifi++) {
659 if (VIF_EXISTS(mrt, vifi) &&
Benjamin Therycf958ae32009-01-22 04:56:16 +0000660 ttls[vifi] && ttls[vifi] < 255) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700661 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
662 if (cache->mfc_un.res.minvif > vifi)
663 cache->mfc_un.res.minvif = vifi;
664 if (cache->mfc_un.res.maxvif <= vifi)
665 cache->mfc_un.res.maxvif = vifi + 1;
666 }
667 }
668}
669
Patrick McHardy0c122952010-04-13 05:03:22 +0000670static int vif_add(struct net *net, struct mr_table *mrt,
671 struct vifctl *vifc, int mrtsock)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700672{
673 int vifi = vifc->vifc_vifi;
Patrick McHardy0c122952010-04-13 05:03:22 +0000674 struct vif_device *v = &mrt->vif_table[vifi];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700675 struct net_device *dev;
676 struct in_device *in_dev;
Wang Chend6070322008-07-14 20:55:26 -0700677 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700678
679 /* Is vif busy ? */
Patrick McHardy0c122952010-04-13 05:03:22 +0000680 if (VIF_EXISTS(mrt, vifi))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700681 return -EADDRINUSE;
682
683 switch (vifc->vifc_flags) {
684#ifdef CONFIG_IP_PIMSM
685 case VIFF_REGISTER:
686 /*
687 * Special Purpose VIF in PIM
688 * All the packets will be sent to the daemon
689 */
Patrick McHardy0c122952010-04-13 05:03:22 +0000690 if (mrt->mroute_reg_vif_num >= 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700691 return -EADDRINUSE;
Patrick McHardyf0ad0862010-04-13 05:03:23 +0000692 dev = ipmr_reg_vif(net, mrt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700693 if (!dev)
694 return -ENOBUFS;
Wang Chend6070322008-07-14 20:55:26 -0700695 err = dev_set_allmulti(dev, 1);
696 if (err) {
697 unregister_netdevice(dev);
Wang Chen7dc00c82008-07-14 20:56:34 -0700698 dev_put(dev);
Wang Chend6070322008-07-14 20:55:26 -0700699 return err;
700 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700701 break;
702#endif
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900703 case VIFF_TUNNEL:
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000704 dev = ipmr_new_tunnel(net, vifc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700705 if (!dev)
706 return -ENOBUFS;
Wang Chend6070322008-07-14 20:55:26 -0700707 err = dev_set_allmulti(dev, 1);
708 if (err) {
709 ipmr_del_tunnel(dev, vifc);
Wang Chen7dc00c82008-07-14 20:56:34 -0700710 dev_put(dev);
Wang Chend6070322008-07-14 20:55:26 -0700711 return err;
712 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700713 break;
Ilia Kee5e81f2009-09-16 05:53:07 +0000714
715 case VIFF_USE_IFINDEX:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700716 case 0:
Ilia Kee5e81f2009-09-16 05:53:07 +0000717 if (vifc->vifc_flags == VIFF_USE_IFINDEX) {
718 dev = dev_get_by_index(net, vifc->vifc_lcl_ifindex);
719 if (dev && dev->ip_ptr == NULL) {
720 dev_put(dev);
721 return -EADDRNOTAVAIL;
722 }
723 } else
724 dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr);
725
Linus Torvalds1da177e2005-04-16 15:20:36 -0700726 if (!dev)
727 return -EADDRNOTAVAIL;
Wang Chend6070322008-07-14 20:55:26 -0700728 err = dev_set_allmulti(dev, 1);
Wang Chen7dc00c82008-07-14 20:56:34 -0700729 if (err) {
730 dev_put(dev);
Wang Chend6070322008-07-14 20:55:26 -0700731 return err;
Wang Chen7dc00c82008-07-14 20:56:34 -0700732 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700733 break;
734 default:
735 return -EINVAL;
736 }
737
Dan Carpenterd0490cf2009-11-11 02:03:54 +0000738 if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) {
739 dev_put(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700740 return -EADDRNOTAVAIL;
Dan Carpenterd0490cf2009-11-11 02:03:54 +0000741 }
Herbert Xu42f811b2007-06-04 23:34:44 -0700742 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700743 ip_rt_multicast_event(in_dev);
744
745 /*
746 * Fill in the VIF structures
747 */
Jianjun Kongc354e122008-11-03 00:28:02 -0800748 v->rate_limit = vifc->vifc_rate_limit;
749 v->local = vifc->vifc_lcl_addr.s_addr;
750 v->remote = vifc->vifc_rmt_addr.s_addr;
751 v->flags = vifc->vifc_flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700752 if (!mrtsock)
753 v->flags |= VIFF_STATIC;
Jianjun Kongc354e122008-11-03 00:28:02 -0800754 v->threshold = vifc->vifc_threshold;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700755 v->bytes_in = 0;
756 v->bytes_out = 0;
757 v->pkt_in = 0;
758 v->pkt_out = 0;
759 v->link = dev->ifindex;
760 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
761 v->link = dev->iflink;
762
763 /* And finish update writing critical data */
764 write_lock_bh(&mrt_lock);
Jianjun Kongc354e122008-11-03 00:28:02 -0800765 v->dev = dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700766#ifdef CONFIG_IP_PIMSM
767 if (v->flags&VIFF_REGISTER)
Patrick McHardy0c122952010-04-13 05:03:22 +0000768 mrt->mroute_reg_vif_num = vifi;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700769#endif
Patrick McHardy0c122952010-04-13 05:03:22 +0000770 if (vifi+1 > mrt->maxvif)
771 mrt->maxvif = vifi+1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700772 write_unlock_bh(&mrt_lock);
773 return 0;
774}
775
Patrick McHardy0c122952010-04-13 05:03:22 +0000776static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt,
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000777 __be32 origin,
778 __be32 mcastgrp)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700779{
Jianjun Kongc354e122008-11-03 00:28:02 -0800780 int line = MFC_HASH(mcastgrp, origin);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700781 struct mfc_cache *c;
782
Patrick McHardy0c122952010-04-13 05:03:22 +0000783 list_for_each_entry(c, &mrt->mfc_cache_array[line], list) {
Patrick McHardy862465f2010-04-13 05:03:21 +0000784 if (c->mfc_origin == origin && c->mfc_mcastgrp == mcastgrp)
785 return c;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700786 }
Patrick McHardy862465f2010-04-13 05:03:21 +0000787 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700788}
789
790/*
791 * Allocate a multicast cache entry
792 */
Patrick McHardyd658f8a2010-04-13 05:03:20 +0000793static struct mfc_cache *ipmr_cache_alloc(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700794{
Jianjun Kongc354e122008-11-03 00:28:02 -0800795 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
796 if (c == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700797 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700798 c->mfc_un.res.minvif = MAXVIFS;
799 return c;
800}
801
Patrick McHardyd658f8a2010-04-13 05:03:20 +0000802static struct mfc_cache *ipmr_cache_alloc_unres(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700803{
Jianjun Kongc354e122008-11-03 00:28:02 -0800804 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
805 if (c == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700806 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700807 skb_queue_head_init(&c->mfc_un.unres.unresolved);
808 c->mfc_un.unres.expires = jiffies + 10*HZ;
809 return c;
810}
811
812/*
813 * A cache entry has gone into a resolved state from queued
814 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900815
Patrick McHardy0c122952010-04-13 05:03:22 +0000816static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt,
817 struct mfc_cache *uc, struct mfc_cache *c)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700818{
819 struct sk_buff *skb;
Patrick McHardy9ef1d4c2005-06-28 12:55:30 -0700820 struct nlmsgerr *e;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700821
822 /*
823 * Play the pending entries through our router
824 */
825
Jianjun Kongc354e122008-11-03 00:28:02 -0800826 while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700827 if (ip_hdr(skb)->version == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700828 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
829
Patrick McHardy0c122952010-04-13 05:03:22 +0000830 if (ipmr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) {
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -0700831 nlh->nlmsg_len = (skb_tail_pointer(skb) -
832 (u8 *)nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700833 } else {
834 nlh->nlmsg_type = NLMSG_ERROR;
835 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
836 skb_trim(skb, nlh->nlmsg_len);
Patrick McHardy9ef1d4c2005-06-28 12:55:30 -0700837 e = NLMSG_DATA(nlh);
838 e->error = -EMSGSIZE;
839 memset(&e->msg, 0, sizeof(e->msg));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700840 }
Thomas Graf2942e902006-08-15 00:30:25 -0700841
Patrick McHardyd658f8a2010-04-13 05:03:20 +0000842 rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700843 } else
Patrick McHardy0c122952010-04-13 05:03:22 +0000844 ip_mr_forward(net, mrt, skb, c, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700845 }
846}
847
848/*
849 * Bounce a cache query up to mrouted. We could use netlink for this but mrouted
850 * expects the following bizarre scheme.
851 *
852 * Called under mrt_lock.
853 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900854
Patrick McHardy0c122952010-04-13 05:03:22 +0000855static int ipmr_cache_report(struct mr_table *mrt,
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000856 struct sk_buff *pkt, vifi_t vifi, int assert)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700857{
858 struct sk_buff *skb;
Arnaldo Carvalho de Meloc9bdd4b2007-03-12 20:09:15 -0300859 const int ihl = ip_hdrlen(pkt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700860 struct igmphdr *igmp;
861 struct igmpmsg *msg;
862 int ret;
863
864#ifdef CONFIG_IP_PIMSM
865 if (assert == IGMPMSG_WHOLEPKT)
866 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
867 else
868#endif
869 skb = alloc_skb(128, GFP_ATOMIC);
870
Stephen Hemminger132adf52007-03-08 20:44:43 -0800871 if (!skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700872 return -ENOBUFS;
873
874#ifdef CONFIG_IP_PIMSM
875 if (assert == IGMPMSG_WHOLEPKT) {
876 /* Ugly, but we have no choice with this interface.
877 Duplicate old header, fix ihl, length etc.
878 And all this only to mangle msg->im_msgtype and
879 to set msg->im_mbz to "mbz" :-)
880 */
Arnaldo Carvalho de Melo878c8142007-03-11 22:38:29 -0300881 skb_push(skb, sizeof(struct iphdr));
882 skb_reset_network_header(skb);
Arnaldo Carvalho de Melobadff6d2007-03-13 13:06:52 -0300883 skb_reset_transport_header(skb);
Arnaldo Carvalho de Melo0272ffc2007-03-12 20:05:39 -0300884 msg = (struct igmpmsg *)skb_network_header(skb);
Arnaldo Carvalho de Melod56f90a2007-04-10 20:50:43 -0700885 memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700886 msg->im_msgtype = IGMPMSG_WHOLEPKT;
887 msg->im_mbz = 0;
Patrick McHardy0c122952010-04-13 05:03:22 +0000888 msg->im_vif = mrt->mroute_reg_vif_num;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700889 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
890 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
891 sizeof(struct iphdr));
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900892 } else
Linus Torvalds1da177e2005-04-16 15:20:36 -0700893#endif
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900894 {
895
Linus Torvalds1da177e2005-04-16 15:20:36 -0700896 /*
897 * Copy the IP header
898 */
899
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -0700900 skb->network_header = skb->tail;
Arnaldo Carvalho de Meloddc7b8e2007-03-15 21:42:27 -0300901 skb_put(skb, ihl);
Arnaldo Carvalho de Melo27d7ff42007-03-31 11:55:19 -0300902 skb_copy_to_linear_data(skb, pkt->data, ihl);
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700903 ip_hdr(skb)->protocol = 0; /* Flag to the kernel this is a route add */
904 msg = (struct igmpmsg *)skb_network_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700905 msg->im_vif = vifi;
Eric Dumazetadf30902009-06-02 05:19:30 +0000906 skb_dst_set(skb, dst_clone(skb_dst(pkt)));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700907
908 /*
909 * Add our header
910 */
911
Jianjun Kongc354e122008-11-03 00:28:02 -0800912 igmp=(struct igmphdr *)skb_put(skb, sizeof(struct igmphdr));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700913 igmp->type =
914 msg->im_msgtype = assert;
915 igmp->code = 0;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700916 ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -0700917 skb->transport_header = skb->network_header;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900918 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700919
Patrick McHardy0c122952010-04-13 05:03:22 +0000920 if (mrt->mroute_sk == NULL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700921 kfree_skb(skb);
922 return -EINVAL;
923 }
924
925 /*
926 * Deliver to mrouted
927 */
Patrick McHardy0c122952010-04-13 05:03:22 +0000928 ret = sock_queue_rcv_skb(mrt->mroute_sk, skb);
Benjamin Thery70a269e2009-01-22 04:56:15 +0000929 if (ret < 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700930 if (net_ratelimit())
931 printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
932 kfree_skb(skb);
933 }
934
935 return ret;
936}
937
938/*
939 * Queue a packet for resolution. It gets locked cache entry!
940 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900941
Linus Torvalds1da177e2005-04-16 15:20:36 -0700942static int
Patrick McHardy0c122952010-04-13 05:03:22 +0000943ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700944{
Patrick McHardy862465f2010-04-13 05:03:21 +0000945 bool found = false;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700946 int err;
947 struct mfc_cache *c;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700948 const struct iphdr *iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700949
950 spin_lock_bh(&mfc_unres_lock);
Patrick McHardy0c122952010-04-13 05:03:22 +0000951 list_for_each_entry(c, &mrt->mfc_unres_queue, list) {
Patrick McHardye258beb2010-04-13 05:03:19 +0000952 if (c->mfc_mcastgrp == iph->daddr &&
Patrick McHardy862465f2010-04-13 05:03:21 +0000953 c->mfc_origin == iph->saddr) {
954 found = true;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700955 break;
Patrick McHardy862465f2010-04-13 05:03:21 +0000956 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700957 }
958
Patrick McHardy862465f2010-04-13 05:03:21 +0000959 if (!found) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700960 /*
961 * Create a new entry if allowable
962 */
963
Patrick McHardy0c122952010-04-13 05:03:22 +0000964 if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
Patrick McHardyd658f8a2010-04-13 05:03:20 +0000965 (c = ipmr_cache_alloc_unres()) == NULL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700966 spin_unlock_bh(&mfc_unres_lock);
967
968 kfree_skb(skb);
969 return -ENOBUFS;
970 }
971
972 /*
973 * Fill in the new cache entry
974 */
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700975 c->mfc_parent = -1;
976 c->mfc_origin = iph->saddr;
977 c->mfc_mcastgrp = iph->daddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700978
979 /*
980 * Reflect first query at mrouted.
981 */
Patrick McHardy0c122952010-04-13 05:03:22 +0000982 err = ipmr_cache_report(mrt, skb, vifi, IGMPMSG_NOCACHE);
Benjamin Thery4feb88e2009-01-22 04:56:23 +0000983 if (err < 0) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900984 /* If the report failed throw the cache entry
Linus Torvalds1da177e2005-04-16 15:20:36 -0700985 out - Brad Parker
986 */
987 spin_unlock_bh(&mfc_unres_lock);
988
Benjamin Thery5c0a66f2009-01-22 04:56:17 +0000989 ipmr_cache_free(c);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700990 kfree_skb(skb);
991 return err;
992 }
993
Patrick McHardy0c122952010-04-13 05:03:22 +0000994 atomic_inc(&mrt->cache_resolve_queue_len);
995 list_add(&c->list, &mrt->mfc_unres_queue);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700996
Patrick McHardy0c122952010-04-13 05:03:22 +0000997 mod_timer(&mrt->ipmr_expire_timer, c->mfc_un.unres.expires);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700998 }
999
1000 /*
1001 * See if we can append the packet
1002 */
1003 if (c->mfc_un.unres.unresolved.qlen>3) {
1004 kfree_skb(skb);
1005 err = -ENOBUFS;
1006 } else {
Jianjun Kongc354e122008-11-03 00:28:02 -08001007 skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001008 err = 0;
1009 }
1010
1011 spin_unlock_bh(&mfc_unres_lock);
1012 return err;
1013}
1014
1015/*
1016 * MFC cache manipulation by user space mroute daemon
1017 */
1018
Patrick McHardy0c122952010-04-13 05:03:22 +00001019static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001020{
1021 int line;
Patrick McHardy862465f2010-04-13 05:03:21 +00001022 struct mfc_cache *c, *next;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001023
Jianjun Kongc354e122008-11-03 00:28:02 -08001024 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001025
Patrick McHardy0c122952010-04-13 05:03:22 +00001026 list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[line], list) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001027 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
1028 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
1029 write_lock_bh(&mrt_lock);
Patrick McHardy862465f2010-04-13 05:03:21 +00001030 list_del(&c->list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001031 write_unlock_bh(&mrt_lock);
1032
Benjamin Thery5c0a66f2009-01-22 04:56:17 +00001033 ipmr_cache_free(c);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001034 return 0;
1035 }
1036 }
1037 return -ENOENT;
1038}
1039
Patrick McHardy0c122952010-04-13 05:03:22 +00001040static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
1041 struct mfcctl *mfc, int mrtsock)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001042{
Patrick McHardy862465f2010-04-13 05:03:21 +00001043 bool found = false;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001044 int line;
Patrick McHardy862465f2010-04-13 05:03:21 +00001045 struct mfc_cache *uc, *c;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001046
Patrick McHardya50436f22010-03-17 06:04:14 +00001047 if (mfc->mfcc_parent >= MAXVIFS)
1048 return -ENFILE;
1049
Jianjun Kongc354e122008-11-03 00:28:02 -08001050 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001051
Patrick McHardy0c122952010-04-13 05:03:22 +00001052 list_for_each_entry(c, &mrt->mfc_cache_array[line], list) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001053 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
Patrick McHardy862465f2010-04-13 05:03:21 +00001054 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
1055 found = true;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001056 break;
Patrick McHardy862465f2010-04-13 05:03:21 +00001057 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001058 }
1059
Patrick McHardy862465f2010-04-13 05:03:21 +00001060 if (found) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001061 write_lock_bh(&mrt_lock);
1062 c->mfc_parent = mfc->mfcc_parent;
Patrick McHardy0c122952010-04-13 05:03:22 +00001063 ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001064 if (!mrtsock)
1065 c->mfc_flags |= MFC_STATIC;
1066 write_unlock_bh(&mrt_lock);
1067 return 0;
1068 }
1069
Joe Perchesf97c1e02007-12-16 13:45:43 -08001070 if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001071 return -EINVAL;
1072
Patrick McHardyd658f8a2010-04-13 05:03:20 +00001073 c = ipmr_cache_alloc();
Jianjun Kongc354e122008-11-03 00:28:02 -08001074 if (c == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001075 return -ENOMEM;
1076
Jianjun Kongc354e122008-11-03 00:28:02 -08001077 c->mfc_origin = mfc->mfcc_origin.s_addr;
1078 c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
1079 c->mfc_parent = mfc->mfcc_parent;
Patrick McHardy0c122952010-04-13 05:03:22 +00001080 ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001081 if (!mrtsock)
1082 c->mfc_flags |= MFC_STATIC;
1083
1084 write_lock_bh(&mrt_lock);
Patrick McHardy0c122952010-04-13 05:03:22 +00001085 list_add(&c->list, &mrt->mfc_cache_array[line]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001086 write_unlock_bh(&mrt_lock);
1087
1088 /*
1089 * Check to see if we resolved a queued list. If so we
1090 * need to send on the frames and tidy up.
1091 */
Patrick McHardyb0ebb732010-04-15 13:29:28 +02001092 found = false;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001093 spin_lock_bh(&mfc_unres_lock);
Patrick McHardy0c122952010-04-13 05:03:22 +00001094 list_for_each_entry(uc, &mrt->mfc_unres_queue, list) {
Patrick McHardye258beb2010-04-13 05:03:19 +00001095 if (uc->mfc_origin == c->mfc_origin &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001096 uc->mfc_mcastgrp == c->mfc_mcastgrp) {
Patrick McHardy862465f2010-04-13 05:03:21 +00001097 list_del(&uc->list);
Patrick McHardy0c122952010-04-13 05:03:22 +00001098 atomic_dec(&mrt->cache_resolve_queue_len);
Patrick McHardyb0ebb732010-04-15 13:29:28 +02001099 found = true;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001100 break;
1101 }
1102 }
Patrick McHardy0c122952010-04-13 05:03:22 +00001103 if (list_empty(&mrt->mfc_unres_queue))
1104 del_timer(&mrt->ipmr_expire_timer);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001105 spin_unlock_bh(&mfc_unres_lock);
1106
Patrick McHardyb0ebb732010-04-15 13:29:28 +02001107 if (found) {
Patrick McHardy0c122952010-04-13 05:03:22 +00001108 ipmr_cache_resolve(net, mrt, uc, c);
Benjamin Thery5c0a66f2009-01-22 04:56:17 +00001109 ipmr_cache_free(uc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001110 }
1111 return 0;
1112}
1113
1114/*
1115 * Close the multicast socket, and clear the vif tables etc
1116 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001117
Patrick McHardy0c122952010-04-13 05:03:22 +00001118static void mroute_clean_tables(struct mr_table *mrt)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001119{
1120 int i;
Eric Dumazetd17fa6f2009-10-28 05:21:38 +00001121 LIST_HEAD(list);
Patrick McHardy862465f2010-04-13 05:03:21 +00001122 struct mfc_cache *c, *next;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001123
Linus Torvalds1da177e2005-04-16 15:20:36 -07001124 /*
1125 * Shut down all active vif entries
1126 */
Patrick McHardy0c122952010-04-13 05:03:22 +00001127 for (i = 0; i < mrt->maxvif; i++) {
1128 if (!(mrt->vif_table[i].flags&VIFF_STATIC))
1129 vif_delete(mrt, i, 0, &list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001130 }
Eric Dumazetd17fa6f2009-10-28 05:21:38 +00001131 unregister_netdevice_many(&list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001132
1133 /*
1134 * Wipe the cache
1135 */
Patrick McHardy862465f2010-04-13 05:03:21 +00001136 for (i = 0; i < MFC_LINES; i++) {
Patrick McHardy0c122952010-04-13 05:03:22 +00001137 list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[i], list) {
Patrick McHardy862465f2010-04-13 05:03:21 +00001138 if (c->mfc_flags&MFC_STATIC)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001139 continue;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001140 write_lock_bh(&mrt_lock);
Patrick McHardy862465f2010-04-13 05:03:21 +00001141 list_del(&c->list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001142 write_unlock_bh(&mrt_lock);
1143
Benjamin Thery5c0a66f2009-01-22 04:56:17 +00001144 ipmr_cache_free(c);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001145 }
1146 }
1147
Patrick McHardy0c122952010-04-13 05:03:22 +00001148 if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001149 spin_lock_bh(&mfc_unres_lock);
Patrick McHardy0c122952010-04-13 05:03:22 +00001150 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
Patrick McHardy862465f2010-04-13 05:03:21 +00001151 list_del(&c->list);
Patrick McHardy0c122952010-04-13 05:03:22 +00001152 ipmr_destroy_unres(mrt, c);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001153 }
1154 spin_unlock_bh(&mfc_unres_lock);
1155 }
1156}
1157
1158static void mrtsock_destruct(struct sock *sk)
1159{
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001160 struct net *net = sock_net(sk);
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001161 struct mr_table *mrt;
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001162
Linus Torvalds1da177e2005-04-16 15:20:36 -07001163 rtnl_lock();
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001164 ipmr_for_each_table(mrt, net) {
1165 if (sk == mrt->mroute_sk) {
1166 IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001167
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001168 write_lock_bh(&mrt_lock);
1169 mrt->mroute_sk = NULL;
1170 write_unlock_bh(&mrt_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001171
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001172 mroute_clean_tables(mrt);
1173 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001174 }
1175 rtnl_unlock();
1176}
1177
1178/*
1179 * Socket options and virtual interface manipulation. The whole
1180 * virtual interface system is a complete heap, but unfortunately
1181 * that's how BSD mrouted happens to think. Maybe one day with a proper
1182 * MOSPF/PIM router set up we can clean this up.
1183 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001184
David S. Millerb7058842009-09-30 16:12:20 -07001185int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001186{
1187 int ret;
1188 struct vifctl vif;
1189 struct mfcctl mfc;
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001190 struct net *net = sock_net(sk);
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001191 struct mr_table *mrt;
1192
1193 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1194 if (mrt == NULL)
1195 return -ENOENT;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001196
Stephen Hemminger132adf52007-03-08 20:44:43 -08001197 if (optname != MRT_INIT) {
Patrick McHardy0c122952010-04-13 05:03:22 +00001198 if (sk != mrt->mroute_sk && !capable(CAP_NET_ADMIN))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001199 return -EACCES;
1200 }
1201
Stephen Hemminger132adf52007-03-08 20:44:43 -08001202 switch (optname) {
1203 case MRT_INIT:
1204 if (sk->sk_type != SOCK_RAW ||
Eric Dumazetc720c7e82009-10-15 06:30:45 +00001205 inet_sk(sk)->inet_num != IPPROTO_IGMP)
Stephen Hemminger132adf52007-03-08 20:44:43 -08001206 return -EOPNOTSUPP;
Jianjun Kongc354e122008-11-03 00:28:02 -08001207 if (optlen != sizeof(int))
Stephen Hemminger132adf52007-03-08 20:44:43 -08001208 return -ENOPROTOOPT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001209
Stephen Hemminger132adf52007-03-08 20:44:43 -08001210 rtnl_lock();
Patrick McHardy0c122952010-04-13 05:03:22 +00001211 if (mrt->mroute_sk) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001212 rtnl_unlock();
Stephen Hemminger132adf52007-03-08 20:44:43 -08001213 return -EADDRINUSE;
1214 }
1215
1216 ret = ip_ra_control(sk, 1, mrtsock_destruct);
1217 if (ret == 0) {
1218 write_lock_bh(&mrt_lock);
Patrick McHardy0c122952010-04-13 05:03:22 +00001219 mrt->mroute_sk = sk;
Stephen Hemminger132adf52007-03-08 20:44:43 -08001220 write_unlock_bh(&mrt_lock);
1221
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001222 IPV4_DEVCONF_ALL(net, MC_FORWARDING)++;
Stephen Hemminger132adf52007-03-08 20:44:43 -08001223 }
1224 rtnl_unlock();
1225 return ret;
1226 case MRT_DONE:
Patrick McHardy0c122952010-04-13 05:03:22 +00001227 if (sk != mrt->mroute_sk)
Stephen Hemminger132adf52007-03-08 20:44:43 -08001228 return -EACCES;
1229 return ip_ra_control(sk, 0, NULL);
1230 case MRT_ADD_VIF:
1231 case MRT_DEL_VIF:
Jianjun Kongc354e122008-11-03 00:28:02 -08001232 if (optlen != sizeof(vif))
Stephen Hemminger132adf52007-03-08 20:44:43 -08001233 return -EINVAL;
Jianjun Kongc354e122008-11-03 00:28:02 -08001234 if (copy_from_user(&vif, optval, sizeof(vif)))
Stephen Hemminger132adf52007-03-08 20:44:43 -08001235 return -EFAULT;
1236 if (vif.vifc_vifi >= MAXVIFS)
1237 return -ENFILE;
1238 rtnl_lock();
Jianjun Kongc354e122008-11-03 00:28:02 -08001239 if (optname == MRT_ADD_VIF) {
Patrick McHardy0c122952010-04-13 05:03:22 +00001240 ret = vif_add(net, mrt, &vif, sk == mrt->mroute_sk);
Stephen Hemminger132adf52007-03-08 20:44:43 -08001241 } else {
Patrick McHardy0c122952010-04-13 05:03:22 +00001242 ret = vif_delete(mrt, vif.vifc_vifi, 0, NULL);
Stephen Hemminger132adf52007-03-08 20:44:43 -08001243 }
1244 rtnl_unlock();
1245 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001246
1247 /*
1248 * Manipulate the forwarding caches. These live
1249 * in a sort of kernel/user symbiosis.
1250 */
Stephen Hemminger132adf52007-03-08 20:44:43 -08001251 case MRT_ADD_MFC:
1252 case MRT_DEL_MFC:
Jianjun Kongc354e122008-11-03 00:28:02 -08001253 if (optlen != sizeof(mfc))
Stephen Hemminger132adf52007-03-08 20:44:43 -08001254 return -EINVAL;
Jianjun Kongc354e122008-11-03 00:28:02 -08001255 if (copy_from_user(&mfc, optval, sizeof(mfc)))
Stephen Hemminger132adf52007-03-08 20:44:43 -08001256 return -EFAULT;
1257 rtnl_lock();
Jianjun Kongc354e122008-11-03 00:28:02 -08001258 if (optname == MRT_DEL_MFC)
Patrick McHardy0c122952010-04-13 05:03:22 +00001259 ret = ipmr_mfc_delete(mrt, &mfc);
Stephen Hemminger132adf52007-03-08 20:44:43 -08001260 else
Patrick McHardy0c122952010-04-13 05:03:22 +00001261 ret = ipmr_mfc_add(net, mrt, &mfc, sk == mrt->mroute_sk);
Stephen Hemminger132adf52007-03-08 20:44:43 -08001262 rtnl_unlock();
1263 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001264 /*
1265 * Control PIM assert.
1266 */
Stephen Hemminger132adf52007-03-08 20:44:43 -08001267 case MRT_ASSERT:
1268 {
1269 int v;
1270 if (get_user(v,(int __user *)optval))
1271 return -EFAULT;
Patrick McHardy0c122952010-04-13 05:03:22 +00001272 mrt->mroute_do_assert = (v) ? 1 : 0;
Stephen Hemminger132adf52007-03-08 20:44:43 -08001273 return 0;
1274 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001275#ifdef CONFIG_IP_PIMSM
Stephen Hemminger132adf52007-03-08 20:44:43 -08001276 case MRT_PIM:
1277 {
Stephen Hemmingerba93ef72008-01-21 17:28:59 -08001278 int v;
1279
Stephen Hemminger132adf52007-03-08 20:44:43 -08001280 if (get_user(v,(int __user *)optval))
1281 return -EFAULT;
Stephen Hemmingerba93ef72008-01-21 17:28:59 -08001282 v = (v) ? 1 : 0;
1283
Stephen Hemminger132adf52007-03-08 20:44:43 -08001284 rtnl_lock();
1285 ret = 0;
Patrick McHardy0c122952010-04-13 05:03:22 +00001286 if (v != mrt->mroute_do_pim) {
1287 mrt->mroute_do_pim = v;
1288 mrt->mroute_do_assert = v;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001289 }
Stephen Hemminger132adf52007-03-08 20:44:43 -08001290 rtnl_unlock();
1291 return ret;
1292 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001293#endif
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001294#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
1295 case MRT_TABLE:
1296 {
1297 u32 v;
1298
1299 if (optlen != sizeof(u32))
1300 return -EINVAL;
1301 if (get_user(v, (u32 __user *)optval))
1302 return -EFAULT;
1303 if (sk == mrt->mroute_sk)
1304 return -EBUSY;
1305
1306 rtnl_lock();
1307 ret = 0;
1308 if (!ipmr_new_table(net, v))
1309 ret = -ENOMEM;
1310 raw_sk(sk)->ipmr_table = v;
1311 rtnl_unlock();
1312 return ret;
1313 }
1314#endif
Stephen Hemminger132adf52007-03-08 20:44:43 -08001315 /*
1316 * Spurious command, or MRT_VERSION which you cannot
1317 * set.
1318 */
1319 default:
1320 return -ENOPROTOOPT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001321 }
1322}
1323
1324/*
1325 * Getsock opt support for the multicast routing system.
1326 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001327
Jianjun Kongc354e122008-11-03 00:28:02 -08001328int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001329{
1330 int olr;
1331 int val;
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001332 struct net *net = sock_net(sk);
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001333 struct mr_table *mrt;
1334
1335 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1336 if (mrt == NULL)
1337 return -ENOENT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001338
Jianjun Kongc354e122008-11-03 00:28:02 -08001339 if (optname != MRT_VERSION &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001340#ifdef CONFIG_IP_PIMSM
1341 optname!=MRT_PIM &&
1342#endif
1343 optname!=MRT_ASSERT)
1344 return -ENOPROTOOPT;
1345
1346 if (get_user(olr, optlen))
1347 return -EFAULT;
1348
1349 olr = min_t(unsigned int, olr, sizeof(int));
1350 if (olr < 0)
1351 return -EINVAL;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001352
Jianjun Kongc354e122008-11-03 00:28:02 -08001353 if (put_user(olr, optlen))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001354 return -EFAULT;
Jianjun Kongc354e122008-11-03 00:28:02 -08001355 if (optname == MRT_VERSION)
1356 val = 0x0305;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001357#ifdef CONFIG_IP_PIMSM
Jianjun Kongc354e122008-11-03 00:28:02 -08001358 else if (optname == MRT_PIM)
Patrick McHardy0c122952010-04-13 05:03:22 +00001359 val = mrt->mroute_do_pim;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001360#endif
1361 else
Patrick McHardy0c122952010-04-13 05:03:22 +00001362 val = mrt->mroute_do_assert;
Jianjun Kongc354e122008-11-03 00:28:02 -08001363 if (copy_to_user(optval, &val, olr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001364 return -EFAULT;
1365 return 0;
1366}
1367
1368/*
1369 * The IP multicast ioctl support routines.
1370 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001371
Linus Torvalds1da177e2005-04-16 15:20:36 -07001372int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1373{
1374 struct sioc_sg_req sr;
1375 struct sioc_vif_req vr;
1376 struct vif_device *vif;
1377 struct mfc_cache *c;
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001378 struct net *net = sock_net(sk);
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001379 struct mr_table *mrt;
1380
1381 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1382 if (mrt == NULL)
1383 return -ENOENT;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001384
Stephen Hemminger132adf52007-03-08 20:44:43 -08001385 switch (cmd) {
1386 case SIOCGETVIFCNT:
Jianjun Kongc354e122008-11-03 00:28:02 -08001387 if (copy_from_user(&vr, arg, sizeof(vr)))
Stephen Hemminger132adf52007-03-08 20:44:43 -08001388 return -EFAULT;
Patrick McHardy0c122952010-04-13 05:03:22 +00001389 if (vr.vifi >= mrt->maxvif)
Stephen Hemminger132adf52007-03-08 20:44:43 -08001390 return -EINVAL;
1391 read_lock(&mrt_lock);
Patrick McHardy0c122952010-04-13 05:03:22 +00001392 vif = &mrt->vif_table[vr.vifi];
1393 if (VIF_EXISTS(mrt, vr.vifi)) {
Jianjun Kongc354e122008-11-03 00:28:02 -08001394 vr.icount = vif->pkt_in;
1395 vr.ocount = vif->pkt_out;
1396 vr.ibytes = vif->bytes_in;
1397 vr.obytes = vif->bytes_out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001398 read_unlock(&mrt_lock);
Stephen Hemminger132adf52007-03-08 20:44:43 -08001399
Jianjun Kongc354e122008-11-03 00:28:02 -08001400 if (copy_to_user(arg, &vr, sizeof(vr)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001401 return -EFAULT;
Stephen Hemminger132adf52007-03-08 20:44:43 -08001402 return 0;
1403 }
1404 read_unlock(&mrt_lock);
1405 return -EADDRNOTAVAIL;
1406 case SIOCGETSGCNT:
Jianjun Kongc354e122008-11-03 00:28:02 -08001407 if (copy_from_user(&sr, arg, sizeof(sr)))
Stephen Hemminger132adf52007-03-08 20:44:43 -08001408 return -EFAULT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001409
Stephen Hemminger132adf52007-03-08 20:44:43 -08001410 read_lock(&mrt_lock);
Patrick McHardy0c122952010-04-13 05:03:22 +00001411 c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr);
Stephen Hemminger132adf52007-03-08 20:44:43 -08001412 if (c) {
1413 sr.pktcnt = c->mfc_un.res.pkt;
1414 sr.bytecnt = c->mfc_un.res.bytes;
1415 sr.wrong_if = c->mfc_un.res.wrong_if;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001416 read_unlock(&mrt_lock);
Stephen Hemminger132adf52007-03-08 20:44:43 -08001417
Jianjun Kongc354e122008-11-03 00:28:02 -08001418 if (copy_to_user(arg, &sr, sizeof(sr)))
Stephen Hemminger132adf52007-03-08 20:44:43 -08001419 return -EFAULT;
1420 return 0;
1421 }
1422 read_unlock(&mrt_lock);
1423 return -EADDRNOTAVAIL;
1424 default:
1425 return -ENOIOCTLCMD;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001426 }
1427}
1428
1429
1430static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1431{
Eric W. Biedermane9dc8652007-09-12 13:02:17 +02001432 struct net_device *dev = ptr;
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001433 struct net *net = dev_net(dev);
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001434 struct mr_table *mrt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001435 struct vif_device *v;
1436 int ct;
Eric Dumazetd17fa6f2009-10-28 05:21:38 +00001437 LIST_HEAD(list);
Eric W. Biedermane9dc8652007-09-12 13:02:17 +02001438
Linus Torvalds1da177e2005-04-16 15:20:36 -07001439 if (event != NETDEV_UNREGISTER)
1440 return NOTIFY_DONE;
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001441
1442 ipmr_for_each_table(mrt, net) {
1443 v = &mrt->vif_table[0];
1444 for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1445 if (v->dev == dev)
1446 vif_delete(mrt, ct, 1, &list);
1447 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001448 }
Eric Dumazetd17fa6f2009-10-28 05:21:38 +00001449 unregister_netdevice_many(&list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001450 return NOTIFY_DONE;
1451}
1452
1453
Jianjun Kongc354e122008-11-03 00:28:02 -08001454static struct notifier_block ip_mr_notifier = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001455 .notifier_call = ipmr_device_event,
1456};
1457
1458/*
1459 * Encapsulate a packet by attaching a valid IPIP header to it.
1460 * This avoids tunnel drivers and other mess and gives us the speed so
1461 * important for multicast video.
1462 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001463
Al Viro114c7842006-09-27 18:39:29 -07001464static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001465{
Arnaldo Carvalho de Melo8856dfa2007-03-10 19:40:39 -03001466 struct iphdr *iph;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001467 struct iphdr *old_iph = ip_hdr(skb);
Arnaldo Carvalho de Melo8856dfa2007-03-10 19:40:39 -03001468
1469 skb_push(skb, sizeof(struct iphdr));
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -07001470 skb->transport_header = skb->network_header;
Arnaldo Carvalho de Melo8856dfa2007-03-10 19:40:39 -03001471 skb_reset_network_header(skb);
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001472 iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001473
1474 iph->version = 4;
Arnaldo Carvalho de Meloe023dd62007-03-12 20:09:36 -03001475 iph->tos = old_iph->tos;
1476 iph->ttl = old_iph->ttl;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001477 iph->frag_off = 0;
1478 iph->daddr = daddr;
1479 iph->saddr = saddr;
1480 iph->protocol = IPPROTO_IPIP;
1481 iph->ihl = 5;
1482 iph->tot_len = htons(skb->len);
Eric Dumazetadf30902009-06-02 05:19:30 +00001483 ip_select_ident(iph, skb_dst(skb), NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001484 ip_send_check(iph);
1485
Linus Torvalds1da177e2005-04-16 15:20:36 -07001486 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1487 nf_reset(skb);
1488}
1489
1490static inline int ipmr_forward_finish(struct sk_buff *skb)
1491{
1492 struct ip_options * opt = &(IPCB(skb)->opt);
1493
Eric Dumazetadf30902009-06-02 05:19:30 +00001494 IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001495
1496 if (unlikely(opt->optlen))
1497 ip_forward_options(skb);
1498
1499 return dst_output(skb);
1500}
1501
1502/*
1503 * Processing handlers for ipmr_forward
1504 */
1505
Patrick McHardy0c122952010-04-13 05:03:22 +00001506static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
1507 struct sk_buff *skb, struct mfc_cache *c, int vifi)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001508{
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001509 const struct iphdr *iph = ip_hdr(skb);
Patrick McHardy0c122952010-04-13 05:03:22 +00001510 struct vif_device *vif = &mrt->vif_table[vifi];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001511 struct net_device *dev;
1512 struct rtable *rt;
1513 int encap = 0;
1514
1515 if (vif->dev == NULL)
1516 goto out_free;
1517
1518#ifdef CONFIG_IP_PIMSM
1519 if (vif->flags & VIFF_REGISTER) {
1520 vif->pkt_out++;
Jianjun Kongc354e122008-11-03 00:28:02 -08001521 vif->bytes_out += skb->len;
Pavel Emelyanovcf3677a2008-05-21 14:17:33 -07001522 vif->dev->stats.tx_bytes += skb->len;
1523 vif->dev->stats.tx_packets++;
Patrick McHardy0c122952010-04-13 05:03:22 +00001524 ipmr_cache_report(mrt, skb, vifi, IGMPMSG_WHOLEPKT);
Ilpo Järvinen69ebbf52009-02-06 23:46:51 -08001525 goto out_free;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001526 }
1527#endif
1528
1529 if (vif->flags&VIFF_TUNNEL) {
1530 struct flowi fl = { .oif = vif->link,
1531 .nl_u = { .ip4_u =
1532 { .daddr = vif->remote,
1533 .saddr = vif->local,
1534 .tos = RT_TOS(iph->tos) } },
1535 .proto = IPPROTO_IPIP };
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001536 if (ip_route_output_key(net, &rt, &fl))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001537 goto out_free;
1538 encap = sizeof(struct iphdr);
1539 } else {
1540 struct flowi fl = { .oif = vif->link,
1541 .nl_u = { .ip4_u =
1542 { .daddr = iph->daddr,
1543 .tos = RT_TOS(iph->tos) } },
1544 .proto = IPPROTO_IPIP };
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001545 if (ip_route_output_key(net, &rt, &fl))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001546 goto out_free;
1547 }
1548
1549 dev = rt->u.dst.dev;
1550
1551 if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) {
1552 /* Do not fragment multicasts. Alas, IPv4 does not
1553 allow to send ICMP, so that packets will disappear
1554 to blackhole.
1555 */
1556
Pavel Emelyanov7c73a6f2008-07-16 20:20:11 -07001557 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001558 ip_rt_put(rt);
1559 goto out_free;
1560 }
1561
1562 encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len;
1563
1564 if (skb_cow(skb, encap)) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001565 ip_rt_put(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001566 goto out_free;
1567 }
1568
1569 vif->pkt_out++;
Jianjun Kongc354e122008-11-03 00:28:02 -08001570 vif->bytes_out += skb->len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001571
Eric Dumazetadf30902009-06-02 05:19:30 +00001572 skb_dst_drop(skb);
1573 skb_dst_set(skb, &rt->u.dst);
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001574 ip_decrease_ttl(ip_hdr(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001575
1576 /* FIXME: forward and output firewalls used to be called here.
1577 * What do we do with netfilter? -- RR */
1578 if (vif->flags & VIFF_TUNNEL) {
1579 ip_encap(skb, vif->local, vif->remote);
1580 /* FIXME: extra output firewall step used to be here. --RR */
Pavel Emelyanov2f4c02d2008-05-21 14:16:14 -07001581 vif->dev->stats.tx_packets++;
1582 vif->dev->stats.tx_bytes += skb->len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001583 }
1584
1585 IPCB(skb)->flags |= IPSKB_FORWARDED;
1586
1587 /*
1588 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1589 * not only before forwarding, but after forwarding on all output
1590 * interfaces. It is clear, if mrouter runs a multicasting
1591 * program, it should receive packets not depending to what interface
1592 * program is joined.
1593 * If we will not make it, the program will have to join on all
1594 * interfaces. On the other hand, multihoming host (or router, but
1595 * not mrouter) cannot join to more than one interface - it will
1596 * result in receiving multiple packets.
1597 */
Patrick McHardy6e23ae22007-11-19 18:53:30 -08001598 NF_HOOK(PF_INET, NF_INET_FORWARD, skb, skb->dev, dev,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001599 ipmr_forward_finish);
1600 return;
1601
1602out_free:
1603 kfree_skb(skb);
1604 return;
1605}
1606
Patrick McHardy0c122952010-04-13 05:03:22 +00001607static int ipmr_find_vif(struct mr_table *mrt, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001608{
1609 int ct;
Patrick McHardy0c122952010-04-13 05:03:22 +00001610
1611 for (ct = mrt->maxvif-1; ct >= 0; ct--) {
1612 if (mrt->vif_table[ct].dev == dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001613 break;
1614 }
1615 return ct;
1616}
1617
1618/* "local" means that we should preserve one skb (for local delivery) */
1619
Patrick McHardy0c122952010-04-13 05:03:22 +00001620static int ip_mr_forward(struct net *net, struct mr_table *mrt,
1621 struct sk_buff *skb, struct mfc_cache *cache,
1622 int local)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001623{
1624 int psend = -1;
1625 int vif, ct;
1626
1627 vif = cache->mfc_parent;
1628 cache->mfc_un.res.pkt++;
1629 cache->mfc_un.res.bytes += skb->len;
1630
1631 /*
1632 * Wrong interface: drop packet and (maybe) send PIM assert.
1633 */
Patrick McHardy0c122952010-04-13 05:03:22 +00001634 if (mrt->vif_table[vif].dev != skb->dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001635 int true_vifi;
1636
Eric Dumazet511c3f92009-06-02 05:14:27 +00001637 if (skb_rtable(skb)->fl.iif == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001638 /* It is our own packet, looped back.
1639 Very complicated situation...
1640
1641 The best workaround until routing daemons will be
1642 fixed is not to redistribute packet, if it was
1643 send through wrong interface. It means, that
1644 multicast applications WILL NOT work for
1645 (S,G), which have default multicast route pointing
1646 to wrong oif. In any case, it is not a good
1647 idea to use multicasting applications on router.
1648 */
1649 goto dont_forward;
1650 }
1651
1652 cache->mfc_un.res.wrong_if++;
Patrick McHardy0c122952010-04-13 05:03:22 +00001653 true_vifi = ipmr_find_vif(mrt, skb->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001654
Patrick McHardy0c122952010-04-13 05:03:22 +00001655 if (true_vifi >= 0 && mrt->mroute_do_assert &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001656 /* pimsm uses asserts, when switching from RPT to SPT,
1657 so that we cannot check that packet arrived on an oif.
1658 It is bad, but otherwise we would need to move pretty
1659 large chunk of pimd to kernel. Ough... --ANK
1660 */
Patrick McHardy0c122952010-04-13 05:03:22 +00001661 (mrt->mroute_do_pim ||
Benjamin Thery6f9374a2009-01-22 04:56:20 +00001662 cache->mfc_un.res.ttls[true_vifi] < 255) &&
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001663 time_after(jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001664 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1665 cache->mfc_un.res.last_assert = jiffies;
Patrick McHardy0c122952010-04-13 05:03:22 +00001666 ipmr_cache_report(mrt, skb, true_vifi, IGMPMSG_WRONGVIF);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001667 }
1668 goto dont_forward;
1669 }
1670
Patrick McHardy0c122952010-04-13 05:03:22 +00001671 mrt->vif_table[vif].pkt_in++;
1672 mrt->vif_table[vif].bytes_in += skb->len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001673
1674 /*
1675 * Forward the frame
1676 */
1677 for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001678 if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001679 if (psend != -1) {
1680 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1681 if (skb2)
Patrick McHardy0c122952010-04-13 05:03:22 +00001682 ipmr_queue_xmit(net, mrt, skb2, cache,
1683 psend);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001684 }
Jianjun Kongc354e122008-11-03 00:28:02 -08001685 psend = ct;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001686 }
1687 }
1688 if (psend != -1) {
1689 if (local) {
1690 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1691 if (skb2)
Patrick McHardy0c122952010-04-13 05:03:22 +00001692 ipmr_queue_xmit(net, mrt, skb2, cache, psend);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001693 } else {
Patrick McHardy0c122952010-04-13 05:03:22 +00001694 ipmr_queue_xmit(net, mrt, skb, cache, psend);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001695 return 0;
1696 }
1697 }
1698
1699dont_forward:
1700 if (!local)
1701 kfree_skb(skb);
1702 return 0;
1703}
1704
1705
1706/*
1707 * Multicast packets for forwarding arrive here
1708 */
1709
1710int ip_mr_input(struct sk_buff *skb)
1711{
1712 struct mfc_cache *cache;
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001713 struct net *net = dev_net(skb->dev);
Eric Dumazet511c3f92009-06-02 05:14:27 +00001714 int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL;
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001715 struct mr_table *mrt;
1716 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001717
1718 /* Packet is looped back after forward, it should not be
1719 forwarded second time, but still can be delivered locally.
1720 */
1721 if (IPCB(skb)->flags&IPSKB_FORWARDED)
1722 goto dont_forward;
1723
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001724 err = ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt);
1725 if (err < 0)
1726 return err;
1727
Linus Torvalds1da177e2005-04-16 15:20:36 -07001728 if (!local) {
1729 if (IPCB(skb)->opt.router_alert) {
1730 if (ip_call_ra_chain(skb))
1731 return 0;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001732 } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP){
Linus Torvalds1da177e2005-04-16 15:20:36 -07001733 /* IGMPv1 (and broken IGMPv2 implementations sort of
1734 Cisco IOS <= 11.2(8)) do not put router alert
1735 option to IGMP packets destined to routable
1736 groups. It is very bad, because it means
1737 that we can forward NO IGMP messages.
1738 */
1739 read_lock(&mrt_lock);
Patrick McHardy0c122952010-04-13 05:03:22 +00001740 if (mrt->mroute_sk) {
Patrick McHardy2715bcf2005-06-21 14:06:24 -07001741 nf_reset(skb);
Patrick McHardy0c122952010-04-13 05:03:22 +00001742 raw_rcv(mrt->mroute_sk, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001743 read_unlock(&mrt_lock);
1744 return 0;
1745 }
1746 read_unlock(&mrt_lock);
1747 }
1748 }
1749
1750 read_lock(&mrt_lock);
Patrick McHardy0c122952010-04-13 05:03:22 +00001751 cache = ipmr_cache_find(mrt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001752
1753 /*
1754 * No usable cache entry
1755 */
Jianjun Kongc354e122008-11-03 00:28:02 -08001756 if (cache == NULL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001757 int vif;
1758
1759 if (local) {
1760 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1761 ip_local_deliver(skb);
1762 if (skb2 == NULL) {
1763 read_unlock(&mrt_lock);
1764 return -ENOBUFS;
1765 }
1766 skb = skb2;
1767 }
1768
Patrick McHardy0c122952010-04-13 05:03:22 +00001769 vif = ipmr_find_vif(mrt, skb->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001770 if (vif >= 0) {
Patrick McHardy0c122952010-04-13 05:03:22 +00001771 int err = ipmr_cache_unresolved(mrt, vif, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001772 read_unlock(&mrt_lock);
1773
1774 return err;
1775 }
1776 read_unlock(&mrt_lock);
1777 kfree_skb(skb);
1778 return -ENODEV;
1779 }
1780
Patrick McHardy0c122952010-04-13 05:03:22 +00001781 ip_mr_forward(net, mrt, skb, cache, local);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001782
1783 read_unlock(&mrt_lock);
1784
1785 if (local)
1786 return ip_local_deliver(skb);
1787
1788 return 0;
1789
1790dont_forward:
1791 if (local)
1792 return ip_local_deliver(skb);
1793 kfree_skb(skb);
1794 return 0;
1795}
1796
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001797#ifdef CONFIG_IP_PIMSM
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001798static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb,
1799 unsigned int pimlen)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001800{
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001801 struct net_device *reg_dev = NULL;
1802 struct iphdr *encap;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001803
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001804 encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001805 /*
1806 Check that:
1807 a. packet is really destinted to a multicast group
1808 b. packet is not a NULL-REGISTER
1809 c. packet is not truncated
1810 */
Joe Perchesf97c1e02007-12-16 13:45:43 -08001811 if (!ipv4_is_multicast(encap->daddr) ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07001812 encap->tot_len == 0 ||
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001813 ntohs(encap->tot_len) + pimlen > skb->len)
1814 return 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001815
1816 read_lock(&mrt_lock);
Patrick McHardy0c122952010-04-13 05:03:22 +00001817 if (mrt->mroute_reg_vif_num >= 0)
1818 reg_dev = mrt->vif_table[mrt->mroute_reg_vif_num].dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001819 if (reg_dev)
1820 dev_hold(reg_dev);
1821 read_unlock(&mrt_lock);
1822
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001823 if (reg_dev == NULL)
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001824 return 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001825
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -07001826 skb->mac_header = skb->network_header;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001827 skb_pull(skb, (u8*)encap - skb->data);
Arnaldo Carvalho de Melo31c77112007-03-10 19:04:55 -03001828 skb_reset_network_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001829 skb->dev = reg_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001830 skb->protocol = htons(ETH_P_IP);
1831 skb->ip_summed = 0;
1832 skb->pkt_type = PACKET_HOST;
Eric Dumazetadf30902009-06-02 05:19:30 +00001833 skb_dst_drop(skb);
Pavel Emelyanovcf3677a2008-05-21 14:17:33 -07001834 reg_dev->stats.rx_bytes += skb->len;
1835 reg_dev->stats.rx_packets++;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001836 nf_reset(skb);
1837 netif_rx(skb);
1838 dev_put(reg_dev);
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001839
Linus Torvalds1da177e2005-04-16 15:20:36 -07001840 return 0;
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001841}
1842#endif
1843
1844#ifdef CONFIG_IP_PIMSM_V1
1845/*
1846 * Handle IGMP messages of PIMv1
1847 */
1848
1849int pim_rcv_v1(struct sk_buff * skb)
1850{
1851 struct igmphdr *pim;
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001852 struct net *net = dev_net(skb->dev);
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001853 struct mr_table *mrt;
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001854
1855 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1856 goto drop;
1857
1858 pim = igmp_hdr(skb);
1859
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001860 if (ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt) < 0)
1861 goto drop;
1862
Patrick McHardy0c122952010-04-13 05:03:22 +00001863 if (!mrt->mroute_do_pim ||
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001864 pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
1865 goto drop;
1866
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001867 if (__pim_rcv(mrt, skb, sizeof(*pim))) {
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001868drop:
1869 kfree_skb(skb);
1870 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001871 return 0;
1872}
1873#endif
1874
1875#ifdef CONFIG_IP_PIMSM_V2
1876static int pim_rcv(struct sk_buff * skb)
1877{
1878 struct pimreghdr *pim;
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001879 struct net *net = dev_net(skb->dev);
1880 struct mr_table *mrt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001881
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001882 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001883 goto drop;
1884
Arnaldo Carvalho de Melo9c702202007-04-25 18:04:18 -07001885 pim = (struct pimreghdr *)skb_transport_header(skb);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001886 if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07001887 (pim->flags&PIM_NULL_REGISTER) ||
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001888 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
Al Virod3bc23e2006-11-14 21:24:49 -08001889 csum_fold(skb_checksum(skb, 0, skb->len, 0))))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001890 goto drop;
1891
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001892 if (ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt) < 0)
1893 goto drop;
1894
1895 if (__pim_rcv(mrt, skb, sizeof(*pim))) {
Ilpo Järvinenb1879202008-12-16 01:15:11 -08001896drop:
1897 kfree_skb(skb);
1898 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001899 return 0;
1900}
1901#endif
1902
1903static int
Patrick McHardy0c122952010-04-13 05:03:22 +00001904ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, struct mfc_cache *c,
Patrick McHardyd658f8a2010-04-13 05:03:20 +00001905 struct rtmsg *rtm)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001906{
1907 int ct;
1908 struct rtnexthop *nhp;
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001909 u8 *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001910 struct rtattr *mp_head;
1911
Nicolas Dichtel74381892010-03-25 23:45:35 +00001912 /* If cache is unresolved, don't try to parse IIF and OIF */
1913 if (c->mfc_parent > MAXVIFS)
1914 return -ENOENT;
1915
Patrick McHardy0c122952010-04-13 05:03:22 +00001916 if (VIF_EXISTS(mrt, c->mfc_parent))
1917 RTA_PUT(skb, RTA_IIF, 4, &mrt->vif_table[c->mfc_parent].dev->ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001918
Jianjun Kongc354e122008-11-03 00:28:02 -08001919 mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001920
1921 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
Patrick McHardy0c122952010-04-13 05:03:22 +00001922 if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001923 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1924 goto rtattr_failure;
Jianjun Kongc354e122008-11-03 00:28:02 -08001925 nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001926 nhp->rtnh_flags = 0;
1927 nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
Patrick McHardy0c122952010-04-13 05:03:22 +00001928 nhp->rtnh_ifindex = mrt->vif_table[ct].dev->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001929 nhp->rtnh_len = sizeof(*nhp);
1930 }
1931 }
1932 mp_head->rta_type = RTA_MULTIPATH;
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001933 mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001934 rtm->rtm_type = RTN_MULTICAST;
1935 return 1;
1936
1937rtattr_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001938 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001939 return -EMSGSIZE;
1940}
1941
Benjamin Thery4feb88e2009-01-22 04:56:23 +00001942int ipmr_get_route(struct net *net,
1943 struct sk_buff *skb, struct rtmsg *rtm, int nowait)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001944{
1945 int err;
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001946 struct mr_table *mrt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001947 struct mfc_cache *cache;
Eric Dumazet511c3f92009-06-02 05:14:27 +00001948 struct rtable *rt = skb_rtable(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001949
Patrick McHardyf0ad0862010-04-13 05:03:23 +00001950 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
1951 if (mrt == NULL)
1952 return -ENOENT;
1953
Linus Torvalds1da177e2005-04-16 15:20:36 -07001954 read_lock(&mrt_lock);
Patrick McHardy0c122952010-04-13 05:03:22 +00001955 cache = ipmr_cache_find(mrt, rt->rt_src, rt->rt_dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001956
Jianjun Kongc354e122008-11-03 00:28:02 -08001957 if (cache == NULL) {
Alexey Kuznetsov72287492006-07-25 16:45:12 -07001958 struct sk_buff *skb2;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001959 struct iphdr *iph;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001960 struct net_device *dev;
1961 int vif;
1962
1963 if (nowait) {
1964 read_unlock(&mrt_lock);
1965 return -EAGAIN;
1966 }
1967
1968 dev = skb->dev;
Patrick McHardy0c122952010-04-13 05:03:22 +00001969 if (dev == NULL || (vif = ipmr_find_vif(mrt, dev)) < 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001970 read_unlock(&mrt_lock);
1971 return -ENODEV;
1972 }
Alexey Kuznetsov72287492006-07-25 16:45:12 -07001973 skb2 = skb_clone(skb, GFP_ATOMIC);
1974 if (!skb2) {
1975 read_unlock(&mrt_lock);
1976 return -ENOMEM;
1977 }
1978
Arnaldo Carvalho de Meloe2d1bca2007-04-10 20:46:21 -07001979 skb_push(skb2, sizeof(struct iphdr));
1980 skb_reset_network_header(skb2);
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001981 iph = ip_hdr(skb2);
1982 iph->ihl = sizeof(struct iphdr) >> 2;
1983 iph->saddr = rt->rt_src;
1984 iph->daddr = rt->rt_dst;
1985 iph->version = 0;
Patrick McHardy0c122952010-04-13 05:03:22 +00001986 err = ipmr_cache_unresolved(mrt, vif, skb2);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001987 read_unlock(&mrt_lock);
1988 return err;
1989 }
1990
1991 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1992 cache->mfc_flags |= MFC_NOTIFY;
Patrick McHardy0c122952010-04-13 05:03:22 +00001993 err = ipmr_fill_mroute(mrt, skb, cache, rtm);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001994 read_unlock(&mrt_lock);
1995 return err;
1996}
1997
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001998#ifdef CONFIG_PROC_FS
Linus Torvalds1da177e2005-04-16 15:20:36 -07001999/*
2000 * The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
2001 */
2002struct ipmr_vif_iter {
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002003 struct seq_net_private p;
Patrick McHardyf0ad0862010-04-13 05:03:23 +00002004 struct mr_table *mrt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002005 int ct;
2006};
2007
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002008static struct vif_device *ipmr_vif_seq_idx(struct net *net,
2009 struct ipmr_vif_iter *iter,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002010 loff_t pos)
2011{
Patrick McHardyf0ad0862010-04-13 05:03:23 +00002012 struct mr_table *mrt = iter->mrt;
Patrick McHardy0c122952010-04-13 05:03:22 +00002013
2014 for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
2015 if (!VIF_EXISTS(mrt, iter->ct))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002016 continue;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002017 if (pos-- == 0)
Patrick McHardy0c122952010-04-13 05:03:22 +00002018 return &mrt->vif_table[iter->ct];
Linus Torvalds1da177e2005-04-16 15:20:36 -07002019 }
2020 return NULL;
2021}
2022
2023static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
Stephen Hemmingerba93ef72008-01-21 17:28:59 -08002024 __acquires(mrt_lock)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002025{
Patrick McHardyf0ad0862010-04-13 05:03:23 +00002026 struct ipmr_vif_iter *iter = seq->private;
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002027 struct net *net = seq_file_net(seq);
Patrick McHardyf0ad0862010-04-13 05:03:23 +00002028 struct mr_table *mrt;
2029
2030 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
2031 if (mrt == NULL)
2032 return ERR_PTR(-ENOENT);
2033
2034 iter->mrt = mrt;
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002035
Linus Torvalds1da177e2005-04-16 15:20:36 -07002036 read_lock(&mrt_lock);
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002037 return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002038 : SEQ_START_TOKEN;
2039}
2040
2041static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2042{
2043 struct ipmr_vif_iter *iter = seq->private;
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002044 struct net *net = seq_file_net(seq);
Patrick McHardyf0ad0862010-04-13 05:03:23 +00002045 struct mr_table *mrt = iter->mrt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002046
2047 ++*pos;
2048 if (v == SEQ_START_TOKEN)
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002049 return ipmr_vif_seq_idx(net, iter, 0);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002050
Patrick McHardy0c122952010-04-13 05:03:22 +00002051 while (++iter->ct < mrt->maxvif) {
2052 if (!VIF_EXISTS(mrt, iter->ct))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002053 continue;
Patrick McHardy0c122952010-04-13 05:03:22 +00002054 return &mrt->vif_table[iter->ct];
Linus Torvalds1da177e2005-04-16 15:20:36 -07002055 }
2056 return NULL;
2057}
2058
2059static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
Stephen Hemmingerba93ef72008-01-21 17:28:59 -08002060 __releases(mrt_lock)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002061{
2062 read_unlock(&mrt_lock);
2063}
2064
2065static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
2066{
Patrick McHardyf0ad0862010-04-13 05:03:23 +00002067 struct ipmr_vif_iter *iter = seq->private;
2068 struct mr_table *mrt = iter->mrt;
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002069
Linus Torvalds1da177e2005-04-16 15:20:36 -07002070 if (v == SEQ_START_TOKEN) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002071 seq_puts(seq,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002072 "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n");
2073 } else {
2074 const struct vif_device *vif = v;
2075 const char *name = vif->dev ? vif->dev->name : "none";
2076
2077 seq_printf(seq,
2078 "%2Zd %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n",
Patrick McHardy0c122952010-04-13 05:03:22 +00002079 vif - mrt->vif_table,
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002080 name, vif->bytes_in, vif->pkt_in,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002081 vif->bytes_out, vif->pkt_out,
2082 vif->flags, vif->local, vif->remote);
2083 }
2084 return 0;
2085}
2086
Stephen Hemmingerf6908082007-03-12 14:34:29 -07002087static const struct seq_operations ipmr_vif_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002088 .start = ipmr_vif_seq_start,
2089 .next = ipmr_vif_seq_next,
2090 .stop = ipmr_vif_seq_stop,
2091 .show = ipmr_vif_seq_show,
2092};
2093
2094static int ipmr_vif_open(struct inode *inode, struct file *file)
2095{
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002096 return seq_open_net(inode, file, &ipmr_vif_seq_ops,
2097 sizeof(struct ipmr_vif_iter));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002098}
2099
Arjan van de Ven9a321442007-02-12 00:55:35 -08002100static const struct file_operations ipmr_vif_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002101 .owner = THIS_MODULE,
2102 .open = ipmr_vif_open,
2103 .read = seq_read,
2104 .llseek = seq_lseek,
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002105 .release = seq_release_net,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002106};
2107
2108struct ipmr_mfc_iter {
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002109 struct seq_net_private p;
Patrick McHardyf0ad0862010-04-13 05:03:23 +00002110 struct mr_table *mrt;
Patrick McHardy862465f2010-04-13 05:03:21 +00002111 struct list_head *cache;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002112 int ct;
2113};
2114
2115
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002116static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
2117 struct ipmr_mfc_iter *it, loff_t pos)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002118{
Patrick McHardyf0ad0862010-04-13 05:03:23 +00002119 struct mr_table *mrt = it->mrt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002120 struct mfc_cache *mfc;
2121
Linus Torvalds1da177e2005-04-16 15:20:36 -07002122 read_lock(&mrt_lock);
Patrick McHardy862465f2010-04-13 05:03:21 +00002123 for (it->ct = 0; it->ct < MFC_LINES; it->ct++) {
Patrick McHardy0c122952010-04-13 05:03:22 +00002124 it->cache = &mrt->mfc_cache_array[it->ct];
Patrick McHardy862465f2010-04-13 05:03:21 +00002125 list_for_each_entry(mfc, it->cache, list)
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002126 if (pos-- == 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002127 return mfc;
Patrick McHardy862465f2010-04-13 05:03:21 +00002128 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002129 read_unlock(&mrt_lock);
2130
Linus Torvalds1da177e2005-04-16 15:20:36 -07002131 spin_lock_bh(&mfc_unres_lock);
Patrick McHardy0c122952010-04-13 05:03:22 +00002132 it->cache = &mrt->mfc_unres_queue;
Patrick McHardy862465f2010-04-13 05:03:21 +00002133 list_for_each_entry(mfc, it->cache, list)
Patrick McHardye258beb2010-04-13 05:03:19 +00002134 if (pos-- == 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002135 return mfc;
2136 spin_unlock_bh(&mfc_unres_lock);
2137
2138 it->cache = NULL;
2139 return NULL;
2140}
2141
2142
2143static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
2144{
2145 struct ipmr_mfc_iter *it = seq->private;
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002146 struct net *net = seq_file_net(seq);
Patrick McHardyf0ad0862010-04-13 05:03:23 +00002147 struct mr_table *mrt;
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002148
Patrick McHardyf0ad0862010-04-13 05:03:23 +00002149 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
2150 if (mrt == NULL)
2151 return ERR_PTR(-ENOENT);
2152
2153 it->mrt = mrt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002154 it->cache = NULL;
2155 it->ct = 0;
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002156 return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002157 : SEQ_START_TOKEN;
2158}
2159
2160static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2161{
2162 struct mfc_cache *mfc = v;
2163 struct ipmr_mfc_iter *it = seq->private;
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002164 struct net *net = seq_file_net(seq);
Patrick McHardyf0ad0862010-04-13 05:03:23 +00002165 struct mr_table *mrt = it->mrt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002166
2167 ++*pos;
2168
2169 if (v == SEQ_START_TOKEN)
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002170 return ipmr_mfc_seq_idx(net, seq->private, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002171
Patrick McHardy862465f2010-04-13 05:03:21 +00002172 if (mfc->list.next != it->cache)
2173 return list_entry(mfc->list.next, struct mfc_cache, list);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002174
Patrick McHardy0c122952010-04-13 05:03:22 +00002175 if (it->cache == &mrt->mfc_unres_queue)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002176 goto end_of_list;
2177
Patrick McHardy0c122952010-04-13 05:03:22 +00002178 BUG_ON(it->cache != &mrt->mfc_cache_array[it->ct]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002179
2180 while (++it->ct < MFC_LINES) {
Patrick McHardy0c122952010-04-13 05:03:22 +00002181 it->cache = &mrt->mfc_cache_array[it->ct];
Patrick McHardy862465f2010-04-13 05:03:21 +00002182 if (list_empty(it->cache))
2183 continue;
2184 return list_first_entry(it->cache, struct mfc_cache, list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002185 }
2186
2187 /* exhausted cache_array, show unresolved */
2188 read_unlock(&mrt_lock);
Patrick McHardy0c122952010-04-13 05:03:22 +00002189 it->cache = &mrt->mfc_unres_queue;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002190 it->ct = 0;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002191
Linus Torvalds1da177e2005-04-16 15:20:36 -07002192 spin_lock_bh(&mfc_unres_lock);
Patrick McHardy862465f2010-04-13 05:03:21 +00002193 if (!list_empty(it->cache))
2194 return list_first_entry(it->cache, struct mfc_cache, list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002195
2196 end_of_list:
2197 spin_unlock_bh(&mfc_unres_lock);
2198 it->cache = NULL;
2199
2200 return NULL;
2201}
2202
2203static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
2204{
2205 struct ipmr_mfc_iter *it = seq->private;
Patrick McHardyf0ad0862010-04-13 05:03:23 +00002206 struct mr_table *mrt = it->mrt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002207
Patrick McHardy0c122952010-04-13 05:03:22 +00002208 if (it->cache == &mrt->mfc_unres_queue)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002209 spin_unlock_bh(&mfc_unres_lock);
Patrick McHardy0c122952010-04-13 05:03:22 +00002210 else if (it->cache == &mrt->mfc_cache_array[it->ct])
Linus Torvalds1da177e2005-04-16 15:20:36 -07002211 read_unlock(&mrt_lock);
2212}
2213
2214static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
2215{
2216 int n;
2217
2218 if (v == SEQ_START_TOKEN) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002219 seq_puts(seq,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002220 "Group Origin Iif Pkts Bytes Wrong Oifs\n");
2221 } else {
2222 const struct mfc_cache *mfc = v;
2223 const struct ipmr_mfc_iter *it = seq->private;
Patrick McHardyf0ad0862010-04-13 05:03:23 +00002224 const struct mr_table *mrt = it->mrt;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002225
Benjamin Thery999890b2008-12-03 22:22:16 -08002226 seq_printf(seq, "%08lX %08lX %-3hd",
Linus Torvalds1da177e2005-04-16 15:20:36 -07002227 (unsigned long) mfc->mfc_mcastgrp,
2228 (unsigned long) mfc->mfc_origin,
Benjamin Thery1ea472e2008-12-03 22:21:47 -08002229 mfc->mfc_parent);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002230
Patrick McHardy0c122952010-04-13 05:03:22 +00002231 if (it->cache != &mrt->mfc_unres_queue) {
Benjamin Thery1ea472e2008-12-03 22:21:47 -08002232 seq_printf(seq, " %8lu %8lu %8lu",
2233 mfc->mfc_un.res.pkt,
2234 mfc->mfc_un.res.bytes,
2235 mfc->mfc_un.res.wrong_if);
Stephen Hemminger132adf52007-03-08 20:44:43 -08002236 for (n = mfc->mfc_un.res.minvif;
2237 n < mfc->mfc_un.res.maxvif; n++ ) {
Patrick McHardy0c122952010-04-13 05:03:22 +00002238 if (VIF_EXISTS(mrt, n) &&
Benjamin Therycf958ae32009-01-22 04:56:16 +00002239 mfc->mfc_un.res.ttls[n] < 255)
2240 seq_printf(seq,
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002241 " %2d:%-3d",
Linus Torvalds1da177e2005-04-16 15:20:36 -07002242 n, mfc->mfc_un.res.ttls[n]);
2243 }
Benjamin Thery1ea472e2008-12-03 22:21:47 -08002244 } else {
2245 /* unresolved mfc_caches don't contain
2246 * pkt, bytes and wrong_if values
2247 */
2248 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002249 }
2250 seq_putc(seq, '\n');
2251 }
2252 return 0;
2253}
2254
Stephen Hemmingerf6908082007-03-12 14:34:29 -07002255static const struct seq_operations ipmr_mfc_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002256 .start = ipmr_mfc_seq_start,
2257 .next = ipmr_mfc_seq_next,
2258 .stop = ipmr_mfc_seq_stop,
2259 .show = ipmr_mfc_seq_show,
2260};
2261
2262static int ipmr_mfc_open(struct inode *inode, struct file *file)
2263{
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002264 return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
2265 sizeof(struct ipmr_mfc_iter));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002266}
2267
Arjan van de Ven9a321442007-02-12 00:55:35 -08002268static const struct file_operations ipmr_mfc_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002269 .owner = THIS_MODULE,
2270 .open = ipmr_mfc_open,
2271 .read = seq_read,
2272 .llseek = seq_lseek,
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002273 .release = seq_release_net,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002274};
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002275#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07002276
2277#ifdef CONFIG_IP_PIMSM_V2
Alexey Dobriyan32613092009-09-14 12:21:47 +00002278static const struct net_protocol pim_protocol = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002279 .handler = pim_rcv,
Tom Goff403dbb92009-06-14 03:16:13 -07002280 .netns_ok = 1,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002281};
2282#endif
2283
2284
2285/*
2286 * Setup for IP multicast routing
2287 */
Benjamin Therycf958ae32009-01-22 04:56:16 +00002288static int __net_init ipmr_net_init(struct net *net)
2289{
Patrick McHardyf0ad0862010-04-13 05:03:23 +00002290 int err;
Benjamin Therycf958ae32009-01-22 04:56:16 +00002291
Patrick McHardyf0ad0862010-04-13 05:03:23 +00002292 err = ipmr_rules_init(net);
2293 if (err < 0)
Benjamin Therycf958ae32009-01-22 04:56:16 +00002294 goto fail;
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002295
2296#ifdef CONFIG_PROC_FS
2297 err = -ENOMEM;
2298 if (!proc_net_fops_create(net, "ip_mr_vif", 0, &ipmr_vif_fops))
2299 goto proc_vif_fail;
2300 if (!proc_net_fops_create(net, "ip_mr_cache", 0, &ipmr_mfc_fops))
2301 goto proc_cache_fail;
2302#endif
Benjamin Thery2bb8b262009-01-22 04:56:18 +00002303 return 0;
2304
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002305#ifdef CONFIG_PROC_FS
2306proc_cache_fail:
2307 proc_net_remove(net, "ip_mr_vif");
2308proc_vif_fail:
Patrick McHardyf0ad0862010-04-13 05:03:23 +00002309 ipmr_rules_exit(net);
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002310#endif
Benjamin Therycf958ae32009-01-22 04:56:16 +00002311fail:
2312 return err;
2313}
2314
2315static void __net_exit ipmr_net_exit(struct net *net)
2316{
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002317#ifdef CONFIG_PROC_FS
2318 proc_net_remove(net, "ip_mr_cache");
2319 proc_net_remove(net, "ip_mr_vif");
2320#endif
Patrick McHardyf0ad0862010-04-13 05:03:23 +00002321 ipmr_rules_exit(net);
Benjamin Therycf958ae32009-01-22 04:56:16 +00002322}
2323
2324static struct pernet_operations ipmr_net_ops = {
2325 .init = ipmr_net_init,
2326 .exit = ipmr_net_exit,
2327};
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002328
Wang Chen03d2f892008-07-03 12:13:36 +08002329int __init ip_mr_init(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002330{
Wang Chen03d2f892008-07-03 12:13:36 +08002331 int err;
2332
Linus Torvalds1da177e2005-04-16 15:20:36 -07002333 mrt_cachep = kmem_cache_create("ip_mrt_cache",
2334 sizeof(struct mfc_cache),
Alexey Dobriyane5d679f332006-08-26 19:25:52 -07002335 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
Paul Mundt20c2df82007-07-20 10:11:58 +09002336 NULL);
Wang Chen03d2f892008-07-03 12:13:36 +08002337 if (!mrt_cachep)
2338 return -ENOMEM;
2339
Benjamin Therycf958ae32009-01-22 04:56:16 +00002340 err = register_pernet_subsys(&ipmr_net_ops);
2341 if (err)
2342 goto reg_pernet_fail;
2343
Wang Chen03d2f892008-07-03 12:13:36 +08002344 err = register_netdevice_notifier(&ip_mr_notifier);
2345 if (err)
2346 goto reg_notif_fail;
Tom Goff403dbb92009-06-14 03:16:13 -07002347#ifdef CONFIG_IP_PIMSM_V2
2348 if (inet_add_protocol(&pim_protocol, IPPROTO_PIM) < 0) {
2349 printk(KERN_ERR "ip_mr_init: can't add PIM protocol\n");
2350 err = -EAGAIN;
2351 goto add_proto_fail;
2352 }
2353#endif
Wang Chen03d2f892008-07-03 12:13:36 +08002354 return 0;
Benjamin Theryf6bb4512009-01-22 04:56:22 +00002355
Tom Goff403dbb92009-06-14 03:16:13 -07002356#ifdef CONFIG_IP_PIMSM_V2
2357add_proto_fail:
2358 unregister_netdevice_notifier(&ip_mr_notifier);
2359#endif
Benjamin Theryc3e38892008-11-19 14:07:41 -08002360reg_notif_fail:
Benjamin Therycf958ae32009-01-22 04:56:16 +00002361 unregister_pernet_subsys(&ipmr_net_ops);
2362reg_pernet_fail:
Benjamin Theryc3e38892008-11-19 14:07:41 -08002363 kmem_cache_destroy(mrt_cachep);
Wang Chen03d2f892008-07-03 12:13:36 +08002364 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002365}