blob: 8e693859ea0337ec94cd52fbe251603e176f69d5 [file] [log] [blame]
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001/*
2 * Linux IPv6 multicast routing support for BSD pim6sd
3 * Based on net/ipv4/ipmr.c.
4 *
5 * (c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
6 * LSIIT Laboratory, Strasbourg, France
7 * (c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
8 * 6WIND, Paris, France
9 * Copyright (C)2007,2008 USAGI/WIDE Project
10 * YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 *
17 */
18
19#include <asm/system.h>
20#include <asm/uaccess.h>
21#include <linux/types.h>
22#include <linux/sched.h>
23#include <linux/errno.h>
24#include <linux/timer.h>
25#include <linux/mm.h>
26#include <linux/kernel.h>
27#include <linux/fcntl.h>
28#include <linux/stat.h>
29#include <linux/socket.h>
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +090030#include <linux/inet.h>
31#include <linux/netdevice.h>
32#include <linux/inetdevice.h>
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +090033#include <linux/proc_fs.h>
34#include <linux/seq_file.h>
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +090035#include <linux/init.h>
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +090036#include <net/protocol.h>
37#include <linux/skbuff.h>
38#include <net/sock.h>
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +090039#include <net/raw.h>
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +090040#include <linux/notifier.h>
41#include <linux/if_arp.h>
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +090042#include <net/checksum.h>
43#include <net/netlink.h>
44
45#include <net/ipv6.h>
46#include <net/ip6_route.h>
47#include <linux/mroute6.h>
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +090048#include <linux/pim.h>
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +090049#include <net/addrconf.h>
50#include <linux/netfilter_ipv6.h>
51
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +090052/* Big lock, protecting vif table, mrt cache and mroute socket state.
53 Note that the changes are semaphored via rtnl_lock.
54 */
55
56static DEFINE_RWLOCK(mrt_lock);
57
58/*
59 * Multicast router control variables
60 */
61
Benjamin Thery4e168802008-12-10 16:15:08 -080062#define MIF_EXISTS(_net, _idx) ((_net)->ipv6.vif6_table[_idx].dev != NULL)
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +090063
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +090064static struct mfc6_cache *mfc_unres_queue; /* Queue of unresolved entries */
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +090065
66/* Special spinlock for queue of unresolved entries */
67static DEFINE_SPINLOCK(mfc_unres_lock);
68
69/* We return to original Alan's scheme. Hash table of resolved
70 entries is changed only in process context and protected
71 with weak lock mrt_lock. Queue of unresolved entries is protected
72 with strong spinlock mfc_unres_lock.
73
74 In this case data path is free of exclusive locks at all.
75 */
76
77static struct kmem_cache *mrt_cachep __read_mostly;
78
79static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache);
Rami Rosen6ac7eb02008-04-10 12:40:10 +030080static int ip6mr_cache_report(struct sk_buff *pkt, mifi_t mifi, int assert);
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +090081static int ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm);
82
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +090083#ifdef CONFIG_IPV6_PIMSM_V2
84static struct inet6_protocol pim6_protocol;
85#endif
86
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +090087static struct timer_list ipmr_expire_timer;
88
89
90#ifdef CONFIG_PROC_FS
91
92struct ipmr_mfc_iter {
93 struct mfc6_cache **cache;
94 int ct;
95};
96
97
98static struct mfc6_cache *ipmr_mfc_seq_idx(struct ipmr_mfc_iter *it, loff_t pos)
99{
100 struct mfc6_cache *mfc;
101
Benjamin Thery4a6258a2008-12-10 16:24:07 -0800102 it->cache = init_net.ipv6.mfc6_cache_array;
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900103 read_lock(&mrt_lock);
Benjamin Thery4a6258a2008-12-10 16:24:07 -0800104 for (it->ct = 0; it->ct < MFC6_LINES; it->ct++)
105 for (mfc = init_net.ipv6.mfc6_cache_array[it->ct];
106 mfc; mfc = mfc->next)
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900107 if (pos-- == 0)
108 return mfc;
109 read_unlock(&mrt_lock);
110
111 it->cache = &mfc_unres_queue;
112 spin_lock_bh(&mfc_unres_lock);
113 for (mfc = mfc_unres_queue; mfc; mfc = mfc->next)
114 if (pos-- == 0)
115 return mfc;
116 spin_unlock_bh(&mfc_unres_lock);
117
118 it->cache = NULL;
119 return NULL;
120}
121
122
123
124
125/*
126 * The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif
127 */
128
129struct ipmr_vif_iter {
130 int ct;
131};
132
133static struct mif_device *ip6mr_vif_seq_idx(struct ipmr_vif_iter *iter,
134 loff_t pos)
135{
Benjamin Thery4e168802008-12-10 16:15:08 -0800136 for (iter->ct = 0; iter->ct < init_net.ipv6.maxvif; ++iter->ct) {
137 if (!MIF_EXISTS(&init_net, iter->ct))
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900138 continue;
139 if (pos-- == 0)
Benjamin Thery4e168802008-12-10 16:15:08 -0800140 return &init_net.ipv6.vif6_table[iter->ct];
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900141 }
142 return NULL;
143}
144
145static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
146 __acquires(mrt_lock)
147{
148 read_lock(&mrt_lock);
149 return (*pos ? ip6mr_vif_seq_idx(seq->private, *pos - 1)
150 : SEQ_START_TOKEN);
151}
152
153static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
154{
155 struct ipmr_vif_iter *iter = seq->private;
156
157 ++*pos;
158 if (v == SEQ_START_TOKEN)
159 return ip6mr_vif_seq_idx(iter, 0);
160
Benjamin Thery4e168802008-12-10 16:15:08 -0800161 while (++iter->ct < init_net.ipv6.maxvif) {
162 if (!MIF_EXISTS(&init_net, iter->ct))
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900163 continue;
Benjamin Thery4e168802008-12-10 16:15:08 -0800164 return &init_net.ipv6.vif6_table[iter->ct];
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900165 }
166 return NULL;
167}
168
169static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
170 __releases(mrt_lock)
171{
172 read_unlock(&mrt_lock);
173}
174
175static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
176{
177 if (v == SEQ_START_TOKEN) {
178 seq_puts(seq,
179 "Interface BytesIn PktsIn BytesOut PktsOut Flags\n");
180 } else {
181 const struct mif_device *vif = v;
182 const char *name = vif->dev ? vif->dev->name : "none";
183
184 seq_printf(seq,
Al Virod430a222008-06-02 10:59:02 +0100185 "%2td %-10s %8ld %7ld %8ld %7ld %05X\n",
Benjamin Thery4e168802008-12-10 16:15:08 -0800186 vif - init_net.ipv6.vif6_table,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900187 name, vif->bytes_in, vif->pkt_in,
188 vif->bytes_out, vif->pkt_out,
189 vif->flags);
190 }
191 return 0;
192}
193
194static struct seq_operations ip6mr_vif_seq_ops = {
195 .start = ip6mr_vif_seq_start,
196 .next = ip6mr_vif_seq_next,
197 .stop = ip6mr_vif_seq_stop,
198 .show = ip6mr_vif_seq_show,
199};
200
201static int ip6mr_vif_open(struct inode *inode, struct file *file)
202{
203 return seq_open_private(file, &ip6mr_vif_seq_ops,
204 sizeof(struct ipmr_vif_iter));
205}
206
207static struct file_operations ip6mr_vif_fops = {
208 .owner = THIS_MODULE,
209 .open = ip6mr_vif_open,
210 .read = seq_read,
211 .llseek = seq_lseek,
Benjamin Theryeedd7262008-11-20 04:16:12 -0800212 .release = seq_release_private,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900213};
214
215static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
216{
217 return (*pos ? ipmr_mfc_seq_idx(seq->private, *pos - 1)
218 : SEQ_START_TOKEN);
219}
220
221static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
222{
223 struct mfc6_cache *mfc = v;
224 struct ipmr_mfc_iter *it = seq->private;
225
226 ++*pos;
227
228 if (v == SEQ_START_TOKEN)
229 return ipmr_mfc_seq_idx(seq->private, 0);
230
231 if (mfc->next)
232 return mfc->next;
233
234 if (it->cache == &mfc_unres_queue)
235 goto end_of_list;
236
Benjamin Thery4a6258a2008-12-10 16:24:07 -0800237 BUG_ON(it->cache != init_net.ipv6.mfc6_cache_array);
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900238
Benjamin Thery4a6258a2008-12-10 16:24:07 -0800239 while (++it->ct < MFC6_LINES) {
240 mfc = init_net.ipv6.mfc6_cache_array[it->ct];
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900241 if (mfc)
242 return mfc;
243 }
244
245 /* exhausted cache_array, show unresolved */
246 read_unlock(&mrt_lock);
247 it->cache = &mfc_unres_queue;
248 it->ct = 0;
249
250 spin_lock_bh(&mfc_unres_lock);
251 mfc = mfc_unres_queue;
252 if (mfc)
253 return mfc;
254
255 end_of_list:
256 spin_unlock_bh(&mfc_unres_lock);
257 it->cache = NULL;
258
259 return NULL;
260}
261
262static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
263{
264 struct ipmr_mfc_iter *it = seq->private;
265
266 if (it->cache == &mfc_unres_queue)
267 spin_unlock_bh(&mfc_unres_lock);
Benjamin Thery4a6258a2008-12-10 16:24:07 -0800268 else if (it->cache == init_net.ipv6.mfc6_cache_array)
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900269 read_unlock(&mrt_lock);
270}
271
272static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
273{
274 int n;
275
276 if (v == SEQ_START_TOKEN) {
277 seq_puts(seq,
278 "Group "
279 "Origin "
280 "Iif Pkts Bytes Wrong Oifs\n");
281 } else {
282 const struct mfc6_cache *mfc = v;
283 const struct ipmr_mfc_iter *it = seq->private;
284
Benjamin Thery999890b2008-12-03 22:22:16 -0800285 seq_printf(seq, "%pI6 %pI6 %-3hd",
Harvey Harrison0c6ce782008-10-28 16:09:23 -0700286 &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
Benjamin Thery1ea472e2008-12-03 22:21:47 -0800287 mfc->mf6c_parent);
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900288
289 if (it->cache != &mfc_unres_queue) {
Benjamin Thery1ea472e2008-12-03 22:21:47 -0800290 seq_printf(seq, " %8lu %8lu %8lu",
291 mfc->mfc_un.res.pkt,
292 mfc->mfc_un.res.bytes,
293 mfc->mfc_un.res.wrong_if);
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900294 for (n = mfc->mfc_un.res.minvif;
295 n < mfc->mfc_un.res.maxvif; n++) {
Benjamin Thery4e168802008-12-10 16:15:08 -0800296 if (MIF_EXISTS(&init_net, n) &&
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900297 mfc->mfc_un.res.ttls[n] < 255)
298 seq_printf(seq,
299 " %2d:%-3d",
300 n, mfc->mfc_un.res.ttls[n]);
301 }
Benjamin Thery1ea472e2008-12-03 22:21:47 -0800302 } else {
303 /* unresolved mfc_caches don't contain
304 * pkt, bytes and wrong_if values
305 */
306 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900307 }
308 seq_putc(seq, '\n');
309 }
310 return 0;
311}
312
313static struct seq_operations ipmr_mfc_seq_ops = {
314 .start = ipmr_mfc_seq_start,
315 .next = ipmr_mfc_seq_next,
316 .stop = ipmr_mfc_seq_stop,
317 .show = ipmr_mfc_seq_show,
318};
319
320static int ipmr_mfc_open(struct inode *inode, struct file *file)
321{
322 return seq_open_private(file, &ipmr_mfc_seq_ops,
323 sizeof(struct ipmr_mfc_iter));
324}
325
326static struct file_operations ip6mr_mfc_fops = {
327 .owner = THIS_MODULE,
328 .open = ipmr_mfc_open,
329 .read = seq_read,
330 .llseek = seq_lseek,
Benjamin Theryeedd7262008-11-20 04:16:12 -0800331 .release = seq_release_private,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900332};
333#endif
334
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +0900335#ifdef CONFIG_IPV6_PIMSM_V2
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +0900336
337static int pim6_rcv(struct sk_buff *skb)
338{
339 struct pimreghdr *pim;
340 struct ipv6hdr *encap;
341 struct net_device *reg_dev = NULL;
Benjamin Thery950d5702008-12-10 16:29:24 -0800342 int reg_vif_num = init_net.ipv6.mroute_reg_vif_num;
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +0900343
344 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
345 goto drop;
346
347 pim = (struct pimreghdr *)skb_transport_header(skb);
348 if (pim->type != ((PIM_VERSION << 4) | PIM_REGISTER) ||
349 (pim->flags & PIM_NULL_REGISTER) ||
350 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
Al Viroec6b4862008-04-26 22:28:58 -0700351 csum_fold(skb_checksum(skb, 0, skb->len, 0))))
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +0900352 goto drop;
353
354 /* check if the inner packet is destined to mcast group */
355 encap = (struct ipv6hdr *)(skb_transport_header(skb) +
356 sizeof(*pim));
357
358 if (!ipv6_addr_is_multicast(&encap->daddr) ||
359 encap->payload_len == 0 ||
360 ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
361 goto drop;
362
363 read_lock(&mrt_lock);
364 if (reg_vif_num >= 0)
Benjamin Thery4e168802008-12-10 16:15:08 -0800365 reg_dev = init_net.ipv6.vif6_table[reg_vif_num].dev;
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +0900366 if (reg_dev)
367 dev_hold(reg_dev);
368 read_unlock(&mrt_lock);
369
370 if (reg_dev == NULL)
371 goto drop;
372
373 skb->mac_header = skb->network_header;
374 skb_pull(skb, (u8 *)encap - skb->data);
375 skb_reset_network_header(skb);
376 skb->dev = reg_dev;
377 skb->protocol = htons(ETH_P_IP);
378 skb->ip_summed = 0;
379 skb->pkt_type = PACKET_HOST;
380 dst_release(skb->dst);
Pavel Emelyanovdc58c782008-05-21 14:17:54 -0700381 reg_dev->stats.rx_bytes += skb->len;
382 reg_dev->stats.rx_packets++;
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +0900383 skb->dst = NULL;
384 nf_reset(skb);
385 netif_rx(skb);
386 dev_put(reg_dev);
387 return 0;
388 drop:
389 kfree_skb(skb);
390 return 0;
391}
392
393static struct inet6_protocol pim6_protocol = {
394 .handler = pim6_rcv,
395};
396
397/* Service routines creating virtual interfaces: PIMREG */
398
399static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
400{
401 read_lock(&mrt_lock);
Pavel Emelyanovdc58c782008-05-21 14:17:54 -0700402 dev->stats.tx_bytes += skb->len;
403 dev->stats.tx_packets++;
Benjamin Thery950d5702008-12-10 16:29:24 -0800404 ip6mr_cache_report(skb, init_net.ipv6.mroute_reg_vif_num, MRT6MSG_WHOLEPKT);
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +0900405 read_unlock(&mrt_lock);
406 kfree_skb(skb);
407 return 0;
408}
409
Stephen Hemminger007c3832008-11-20 20:28:35 -0800410static const struct net_device_ops reg_vif_netdev_ops = {
411 .ndo_start_xmit = reg_vif_xmit,
412};
413
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +0900414static void reg_vif_setup(struct net_device *dev)
415{
416 dev->type = ARPHRD_PIMREG;
417 dev->mtu = 1500 - sizeof(struct ipv6hdr) - 8;
418 dev->flags = IFF_NOARP;
Stephen Hemminger007c3832008-11-20 20:28:35 -0800419 dev->netdev_ops = &reg_vif_netdev_ops;
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +0900420 dev->destructor = free_netdev;
421}
422
423static struct net_device *ip6mr_reg_vif(void)
424{
425 struct net_device *dev;
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +0900426
Pavel Emelyanovdc58c782008-05-21 14:17:54 -0700427 dev = alloc_netdev(0, "pim6reg", reg_vif_setup);
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +0900428 if (dev == NULL)
429 return NULL;
430
431 if (register_netdevice(dev)) {
432 free_netdev(dev);
433 return NULL;
434 }
435 dev->iflink = 0;
436
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +0900437 if (dev_open(dev))
438 goto failure;
439
Wang Chen7af3db72008-07-14 20:54:54 -0700440 dev_hold(dev);
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +0900441 return dev;
442
443failure:
444 /* allow the register to be completed before unregistering. */
445 rtnl_unlock();
446 rtnl_lock();
447
448 unregister_netdevice(dev);
449 return NULL;
450}
451#endif
452
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900453/*
454 * Delete a VIF entry
455 */
456
457static int mif6_delete(int vifi)
458{
459 struct mif_device *v;
460 struct net_device *dev;
Benjamin Thery4e168802008-12-10 16:15:08 -0800461 if (vifi < 0 || vifi >= init_net.ipv6.maxvif)
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900462 return -EADDRNOTAVAIL;
463
Benjamin Thery4e168802008-12-10 16:15:08 -0800464 v = &init_net.ipv6.vif6_table[vifi];
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900465
466 write_lock_bh(&mrt_lock);
467 dev = v->dev;
468 v->dev = NULL;
469
470 if (!dev) {
471 write_unlock_bh(&mrt_lock);
472 return -EADDRNOTAVAIL;
473 }
474
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +0900475#ifdef CONFIG_IPV6_PIMSM_V2
Benjamin Thery950d5702008-12-10 16:29:24 -0800476 if (vifi == init_net.ipv6.mroute_reg_vif_num)
477 init_net.ipv6.mroute_reg_vif_num = -1;
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +0900478#endif
479
Benjamin Thery4e168802008-12-10 16:15:08 -0800480 if (vifi + 1 == init_net.ipv6.maxvif) {
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900481 int tmp;
482 for (tmp = vifi - 1; tmp >= 0; tmp--) {
Benjamin Thery4e168802008-12-10 16:15:08 -0800483 if (MIF_EXISTS(&init_net, tmp))
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900484 break;
485 }
Benjamin Thery4e168802008-12-10 16:15:08 -0800486 init_net.ipv6.maxvif = tmp + 1;
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900487 }
488
489 write_unlock_bh(&mrt_lock);
490
491 dev_set_allmulti(dev, -1);
492
493 if (v->flags & MIFF_REGISTER)
494 unregister_netdevice(dev);
495
496 dev_put(dev);
497 return 0;
498}
499
Benjamin Thery58701ad2008-12-10 16:22:34 -0800500static inline void ip6mr_cache_free(struct mfc6_cache *c)
501{
502 release_net(mfc6_net(c));
503 kmem_cache_free(mrt_cachep, c);
504}
505
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900506/* Destroy an unresolved cache entry, killing queued skbs
507 and reporting error to netlink readers.
508 */
509
510static void ip6mr_destroy_unres(struct mfc6_cache *c)
511{
512 struct sk_buff *skb;
513
Benjamin Thery4045e572008-12-10 16:27:21 -0800514 atomic_dec(&init_net.ipv6.cache_resolve_queue_len);
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900515
516 while((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) {
517 if (ipv6_hdr(skb)->version == 0) {
518 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
519 nlh->nlmsg_type = NLMSG_ERROR;
520 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
521 skb_trim(skb, nlh->nlmsg_len);
522 ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -ETIMEDOUT;
523 rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid);
524 } else
525 kfree_skb(skb);
526 }
527
Benjamin Thery58701ad2008-12-10 16:22:34 -0800528 ip6mr_cache_free(c);
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900529}
530
531
532/* Single timer process for all the unresolved queue. */
533
534static void ipmr_do_expire_process(unsigned long dummy)
535{
536 unsigned long now = jiffies;
537 unsigned long expires = 10 * HZ;
538 struct mfc6_cache *c, **cp;
539
540 cp = &mfc_unres_queue;
541
542 while ((c = *cp) != NULL) {
543 if (time_after(c->mfc_un.unres.expires, now)) {
544 /* not yet... */
545 unsigned long interval = c->mfc_un.unres.expires - now;
546 if (interval < expires)
547 expires = interval;
548 cp = &c->next;
549 continue;
550 }
551
552 *cp = c->next;
553 ip6mr_destroy_unres(c);
554 }
555
Benjamin Thery4045e572008-12-10 16:27:21 -0800556 if (mfc_unres_queue != NULL)
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900557 mod_timer(&ipmr_expire_timer, jiffies + expires);
558}
559
560static void ipmr_expire_process(unsigned long dummy)
561{
562 if (!spin_trylock(&mfc_unres_lock)) {
563 mod_timer(&ipmr_expire_timer, jiffies + 1);
564 return;
565 }
566
Benjamin Thery4045e572008-12-10 16:27:21 -0800567 if (mfc_unres_queue != NULL)
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900568 ipmr_do_expire_process(dummy);
569
570 spin_unlock(&mfc_unres_lock);
571}
572
573/* Fill oifs list. It is called under write locked mrt_lock. */
574
575static void ip6mr_update_thresholds(struct mfc6_cache *cache, unsigned char *ttls)
576{
577 int vifi;
578
Rami Rosen6ac7eb02008-04-10 12:40:10 +0300579 cache->mfc_un.res.minvif = MAXMIFS;
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900580 cache->mfc_un.res.maxvif = 0;
Rami Rosen6ac7eb02008-04-10 12:40:10 +0300581 memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900582
Benjamin Thery4e168802008-12-10 16:15:08 -0800583 for (vifi = 0; vifi < init_net.ipv6.maxvif; vifi++) {
584 if (MIF_EXISTS(&init_net, vifi) &&
585 ttls[vifi] && ttls[vifi] < 255) {
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900586 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
587 if (cache->mfc_un.res.minvif > vifi)
588 cache->mfc_un.res.minvif = vifi;
589 if (cache->mfc_un.res.maxvif <= vifi)
590 cache->mfc_un.res.maxvif = vifi + 1;
591 }
592 }
593}
594
595static int mif6_add(struct mif6ctl *vifc, int mrtsock)
596{
597 int vifi = vifc->mif6c_mifi;
Benjamin Thery4e168802008-12-10 16:15:08 -0800598 struct mif_device *v = &init_net.ipv6.vif6_table[vifi];
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900599 struct net_device *dev;
Wang Chen5ae7b442008-07-14 20:54:23 -0700600 int err;
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900601
602 /* Is vif busy ? */
Benjamin Thery4e168802008-12-10 16:15:08 -0800603 if (MIF_EXISTS(&init_net, vifi))
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900604 return -EADDRINUSE;
605
606 switch (vifc->mif6c_flags) {
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +0900607#ifdef CONFIG_IPV6_PIMSM_V2
608 case MIFF_REGISTER:
609 /*
610 * Special Purpose VIF in PIM
611 * All the packets will be sent to the daemon
612 */
Benjamin Thery950d5702008-12-10 16:29:24 -0800613 if (init_net.ipv6.mroute_reg_vif_num >= 0)
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +0900614 return -EADDRINUSE;
615 dev = ip6mr_reg_vif();
616 if (!dev)
617 return -ENOBUFS;
Wang Chen5ae7b442008-07-14 20:54:23 -0700618 err = dev_set_allmulti(dev, 1);
619 if (err) {
620 unregister_netdevice(dev);
Wang Chen7af3db72008-07-14 20:54:54 -0700621 dev_put(dev);
Wang Chen5ae7b442008-07-14 20:54:23 -0700622 return err;
623 }
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +0900624 break;
625#endif
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900626 case 0:
627 dev = dev_get_by_index(&init_net, vifc->mif6c_pifi);
628 if (!dev)
629 return -EADDRNOTAVAIL;
Wang Chen5ae7b442008-07-14 20:54:23 -0700630 err = dev_set_allmulti(dev, 1);
Wang Chen7af3db72008-07-14 20:54:54 -0700631 if (err) {
632 dev_put(dev);
Wang Chen5ae7b442008-07-14 20:54:23 -0700633 return err;
Wang Chen7af3db72008-07-14 20:54:54 -0700634 }
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900635 break;
636 default:
637 return -EINVAL;
638 }
639
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900640 /*
641 * Fill in the VIF structures
642 */
643 v->rate_limit = vifc->vifc_rate_limit;
644 v->flags = vifc->mif6c_flags;
645 if (!mrtsock)
646 v->flags |= VIFF_STATIC;
647 v->threshold = vifc->vifc_threshold;
648 v->bytes_in = 0;
649 v->bytes_out = 0;
650 v->pkt_in = 0;
651 v->pkt_out = 0;
652 v->link = dev->ifindex;
653 if (v->flags & MIFF_REGISTER)
654 v->link = dev->iflink;
655
656 /* And finish update writing critical data */
657 write_lock_bh(&mrt_lock);
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900658 v->dev = dev;
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +0900659#ifdef CONFIG_IPV6_PIMSM_V2
660 if (v->flags & MIFF_REGISTER)
Benjamin Thery950d5702008-12-10 16:29:24 -0800661 init_net.ipv6.mroute_reg_vif_num = vifi;
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +0900662#endif
Benjamin Thery4e168802008-12-10 16:15:08 -0800663 if (vifi + 1 > init_net.ipv6.maxvif)
664 init_net.ipv6.maxvif = vifi + 1;
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900665 write_unlock_bh(&mrt_lock);
666 return 0;
667}
668
669static struct mfc6_cache *ip6mr_cache_find(struct in6_addr *origin, struct in6_addr *mcastgrp)
670{
671 int line = MFC6_HASH(mcastgrp, origin);
672 struct mfc6_cache *c;
673
Benjamin Thery4a6258a2008-12-10 16:24:07 -0800674 for (c = init_net.ipv6.mfc6_cache_array[line]; c; c = c->next) {
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900675 if (ipv6_addr_equal(&c->mf6c_origin, origin) &&
676 ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp))
677 break;
678 }
679 return c;
680}
681
682/*
683 * Allocate a multicast cache entry
684 */
Benjamin Thery58701ad2008-12-10 16:22:34 -0800685static struct mfc6_cache *ip6mr_cache_alloc(struct net *net)
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900686{
Joe Perches36cbac52008-12-03 22:27:25 -0800687 struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900688 if (c == NULL)
689 return NULL;
Rami Rosen6ac7eb02008-04-10 12:40:10 +0300690 c->mfc_un.res.minvif = MAXMIFS;
Benjamin Thery58701ad2008-12-10 16:22:34 -0800691 mfc6_net_set(c, net);
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900692 return c;
693}
694
Benjamin Thery58701ad2008-12-10 16:22:34 -0800695static struct mfc6_cache *ip6mr_cache_alloc_unres(struct net *net)
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900696{
Joe Perches36cbac52008-12-03 22:27:25 -0800697 struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900698 if (c == NULL)
699 return NULL;
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900700 skb_queue_head_init(&c->mfc_un.unres.unresolved);
701 c->mfc_un.unres.expires = jiffies + 10 * HZ;
Benjamin Thery58701ad2008-12-10 16:22:34 -0800702 mfc6_net_set(c, net);
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900703 return c;
704}
705
706/*
707 * A cache entry has gone into a resolved state from queued
708 */
709
710static void ip6mr_cache_resolve(struct mfc6_cache *uc, struct mfc6_cache *c)
711{
712 struct sk_buff *skb;
713
714 /*
715 * Play the pending entries through our router
716 */
717
718 while((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
719 if (ipv6_hdr(skb)->version == 0) {
720 int err;
721 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
722
723 if (ip6mr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
YOSHIFUJI Hideaki549e0282008-04-05 22:17:39 +0900724 nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900725 } else {
726 nlh->nlmsg_type = NLMSG_ERROR;
727 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
728 skb_trim(skb, nlh->nlmsg_len);
729 ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -EMSGSIZE;
730 }
731 err = rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid);
732 } else
733 ip6_mr_forward(skb, c);
734 }
735}
736
737/*
738 * Bounce a cache query up to pim6sd. We could use netlink for this but pim6sd
739 * expects the following bizarre scheme.
740 *
741 * Called under mrt_lock.
742 */
743
Rami Rosen6ac7eb02008-04-10 12:40:10 +0300744static int ip6mr_cache_report(struct sk_buff *pkt, mifi_t mifi, int assert)
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900745{
746 struct sk_buff *skb;
747 struct mrt6msg *msg;
748 int ret;
749
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +0900750#ifdef CONFIG_IPV6_PIMSM_V2
751 if (assert == MRT6MSG_WHOLEPKT)
752 skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
753 +sizeof(*msg));
754 else
755#endif
756 skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900757
758 if (!skb)
759 return -ENOBUFS;
760
761 /* I suppose that internal messages
762 * do not require checksums */
763
764 skb->ip_summed = CHECKSUM_UNNECESSARY;
765
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +0900766#ifdef CONFIG_IPV6_PIMSM_V2
767 if (assert == MRT6MSG_WHOLEPKT) {
768 /* Ugly, but we have no choice with this interface.
769 Duplicate old header, fix length etc.
770 And all this only to mangle msg->im6_msgtype and
771 to set msg->im6_mbz to "mbz" :-)
772 */
773 skb_push(skb, -skb_network_offset(pkt));
774
775 skb_push(skb, sizeof(*msg));
776 skb_reset_transport_header(skb);
777 msg = (struct mrt6msg *)skb_transport_header(skb);
778 msg->im6_mbz = 0;
779 msg->im6_msgtype = MRT6MSG_WHOLEPKT;
Benjamin Thery950d5702008-12-10 16:29:24 -0800780 msg->im6_mif = init_net.ipv6.mroute_reg_vif_num;
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +0900781 msg->im6_pad = 0;
782 ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
783 ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
784
785 skb->ip_summed = CHECKSUM_UNNECESSARY;
786 } else
787#endif
788 {
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900789 /*
790 * Copy the IP header
791 */
792
793 skb_put(skb, sizeof(struct ipv6hdr));
794 skb_reset_network_header(skb);
795 skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
796
797 /*
798 * Add our header
799 */
800 skb_put(skb, sizeof(*msg));
801 skb_reset_transport_header(skb);
802 msg = (struct mrt6msg *)skb_transport_header(skb);
803
804 msg->im6_mbz = 0;
805 msg->im6_msgtype = assert;
Rami Rosen6ac7eb02008-04-10 12:40:10 +0300806 msg->im6_mif = mifi;
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900807 msg->im6_pad = 0;
808 ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
809 ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
810
811 skb->dst = dst_clone(pkt->dst);
812 skb->ip_summed = CHECKSUM_UNNECESSARY;
813
814 skb_pull(skb, sizeof(struct ipv6hdr));
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +0900815 }
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900816
Benjamin Therybd91b8b2008-12-10 16:07:08 -0800817 if (init_net.ipv6.mroute6_sk == NULL) {
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900818 kfree_skb(skb);
819 return -EINVAL;
820 }
821
822 /*
823 * Deliver to user space multicast routing algorithms
824 */
Benjamin Therybd91b8b2008-12-10 16:07:08 -0800825 ret = sock_queue_rcv_skb(init_net.ipv6.mroute6_sk, skb);
826 if (ret < 0) {
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900827 if (net_ratelimit())
828 printk(KERN_WARNING "mroute6: pending queue full, dropping entries.\n");
829 kfree_skb(skb);
830 }
831
832 return ret;
833}
834
835/*
836 * Queue a packet for resolution. It gets locked cache entry!
837 */
838
839static int
Rami Rosen6ac7eb02008-04-10 12:40:10 +0300840ip6mr_cache_unresolved(mifi_t mifi, struct sk_buff *skb)
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900841{
842 int err;
843 struct mfc6_cache *c;
844
845 spin_lock_bh(&mfc_unres_lock);
846 for (c = mfc_unres_queue; c; c = c->next) {
Benjamin Thery4045e572008-12-10 16:27:21 -0800847 if (net_eq(mfc6_net(c), &init_net) &&
848 ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900849 ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr))
850 break;
851 }
852
853 if (c == NULL) {
854 /*
855 * Create a new entry if allowable
856 */
857
Benjamin Thery4045e572008-12-10 16:27:21 -0800858 if (atomic_read(&init_net.ipv6.cache_resolve_queue_len) >= 10 ||
Benjamin Thery58701ad2008-12-10 16:22:34 -0800859 (c = ip6mr_cache_alloc_unres(&init_net)) == NULL) {
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900860 spin_unlock_bh(&mfc_unres_lock);
861
862 kfree_skb(skb);
863 return -ENOBUFS;
864 }
865
866 /*
867 * Fill in the new cache entry
868 */
869 c->mf6c_parent = -1;
870 c->mf6c_origin = ipv6_hdr(skb)->saddr;
871 c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
872
873 /*
874 * Reflect first query at pim6sd
875 */
Rami Rosen6ac7eb02008-04-10 12:40:10 +0300876 if ((err = ip6mr_cache_report(skb, mifi, MRT6MSG_NOCACHE)) < 0) {
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900877 /* If the report failed throw the cache entry
878 out - Brad Parker
879 */
880 spin_unlock_bh(&mfc_unres_lock);
881
Benjamin Thery58701ad2008-12-10 16:22:34 -0800882 ip6mr_cache_free(c);
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900883 kfree_skb(skb);
884 return err;
885 }
886
Benjamin Thery4045e572008-12-10 16:27:21 -0800887 atomic_inc(&init_net.ipv6.cache_resolve_queue_len);
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900888 c->next = mfc_unres_queue;
889 mfc_unres_queue = c;
890
891 ipmr_do_expire_process(1);
892 }
893
894 /*
895 * See if we can append the packet
896 */
897 if (c->mfc_un.unres.unresolved.qlen > 3) {
898 kfree_skb(skb);
899 err = -ENOBUFS;
900 } else {
901 skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
902 err = 0;
903 }
904
905 spin_unlock_bh(&mfc_unres_lock);
906 return err;
907}
908
909/*
910 * MFC6 cache manipulation by user space
911 */
912
913static int ip6mr_mfc_delete(struct mf6cctl *mfc)
914{
915 int line;
916 struct mfc6_cache *c, **cp;
917
918 line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
919
Benjamin Thery4a6258a2008-12-10 16:24:07 -0800920 for (cp = &init_net.ipv6.mfc6_cache_array[line];
921 (c = *cp) != NULL; cp = &c->next) {
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900922 if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
923 ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) {
924 write_lock_bh(&mrt_lock);
925 *cp = c->next;
926 write_unlock_bh(&mrt_lock);
927
Benjamin Thery58701ad2008-12-10 16:22:34 -0800928 ip6mr_cache_free(c);
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900929 return 0;
930 }
931 }
932 return -ENOENT;
933}
934
935static int ip6mr_device_event(struct notifier_block *this,
936 unsigned long event, void *ptr)
937{
938 struct net_device *dev = ptr;
939 struct mif_device *v;
940 int ct;
941
YOSHIFUJI Hideaki721499e2008-07-19 22:34:43 -0700942 if (!net_eq(dev_net(dev), &init_net))
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900943 return NOTIFY_DONE;
944
945 if (event != NETDEV_UNREGISTER)
946 return NOTIFY_DONE;
947
Benjamin Thery4e168802008-12-10 16:15:08 -0800948 v = &init_net.ipv6.vif6_table[0];
949 for (ct = 0; ct < init_net.ipv6.maxvif; ct++, v++) {
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900950 if (v->dev == dev)
951 mif6_delete(ct);
952 }
953 return NOTIFY_DONE;
954}
955
956static struct notifier_block ip6_mr_notifier = {
957 .notifier_call = ip6mr_device_event
958};
959
960/*
961 * Setup for IP multicast routing
962 */
963
Benjamin Thery4e168802008-12-10 16:15:08 -0800964static int __net_init ip6mr_net_init(struct net *net)
965{
966 int err = 0;
Benjamin Thery4e168802008-12-10 16:15:08 -0800967 net->ipv6.vif6_table = kcalloc(MAXMIFS, sizeof(struct mif_device),
968 GFP_KERNEL);
969 if (!net->ipv6.vif6_table) {
970 err = -ENOMEM;
971 goto fail;
972 }
Benjamin Thery4a6258a2008-12-10 16:24:07 -0800973
974 /* Forwarding cache */
975 net->ipv6.mfc6_cache_array = kcalloc(MFC6_LINES,
976 sizeof(struct mfc6_cache *),
977 GFP_KERNEL);
978 if (!net->ipv6.mfc6_cache_array) {
979 err = -ENOMEM;
980 goto fail_mfc6_cache;
981 }
Benjamin Thery950d5702008-12-10 16:29:24 -0800982
983#ifdef CONFIG_IPV6_PIMSM_V2
984 net->ipv6.mroute_reg_vif_num = -1;
985#endif
Benjamin Thery4a6258a2008-12-10 16:24:07 -0800986 return 0;
987
988fail_mfc6_cache:
989 kfree(net->ipv6.vif6_table);
Benjamin Thery4e168802008-12-10 16:15:08 -0800990fail:
991 return err;
992}
993
994static void __net_exit ip6mr_net_exit(struct net *net)
995{
Benjamin Thery4a6258a2008-12-10 16:24:07 -0800996 kfree(net->ipv6.mfc6_cache_array);
Benjamin Thery4e168802008-12-10 16:15:08 -0800997 kfree(net->ipv6.vif6_table);
998}
999
1000static struct pernet_operations ip6mr_net_ops = {
1001 .init = ip6mr_net_init,
1002 .exit = ip6mr_net_exit,
1003};
1004
Wang Chen623d1a12008-07-03 12:13:30 +08001005int __init ip6_mr_init(void)
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001006{
Wang Chen623d1a12008-07-03 12:13:30 +08001007 int err;
1008
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001009 mrt_cachep = kmem_cache_create("ip6_mrt_cache",
1010 sizeof(struct mfc6_cache),
1011 0, SLAB_HWCACHE_ALIGN,
1012 NULL);
1013 if (!mrt_cachep)
Wang Chen623d1a12008-07-03 12:13:30 +08001014 return -ENOMEM;
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001015
Benjamin Thery4e168802008-12-10 16:15:08 -08001016 err = register_pernet_subsys(&ip6mr_net_ops);
1017 if (err)
1018 goto reg_pernet_fail;
1019
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001020 setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0);
Wang Chen623d1a12008-07-03 12:13:30 +08001021 err = register_netdevice_notifier(&ip6_mr_notifier);
1022 if (err)
1023 goto reg_notif_fail;
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001024#ifdef CONFIG_PROC_FS
Wang Chen623d1a12008-07-03 12:13:30 +08001025 err = -ENOMEM;
1026 if (!proc_net_fops_create(&init_net, "ip6_mr_vif", 0, &ip6mr_vif_fops))
1027 goto proc_vif_fail;
1028 if (!proc_net_fops_create(&init_net, "ip6_mr_cache",
1029 0, &ip6mr_mfc_fops))
1030 goto proc_cache_fail;
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001031#endif
Wang Chen623d1a12008-07-03 12:13:30 +08001032 return 0;
Wang Chen623d1a12008-07-03 12:13:30 +08001033#ifdef CONFIG_PROC_FS
Wang Chen623d1a12008-07-03 12:13:30 +08001034proc_cache_fail:
1035 proc_net_remove(&init_net, "ip6_mr_vif");
Benjamin Thery87b30a62008-11-10 16:34:11 -08001036proc_vif_fail:
1037 unregister_netdevice_notifier(&ip6_mr_notifier);
Wang Chen623d1a12008-07-03 12:13:30 +08001038#endif
Benjamin Thery87b30a62008-11-10 16:34:11 -08001039reg_notif_fail:
1040 del_timer(&ipmr_expire_timer);
Benjamin Thery4e168802008-12-10 16:15:08 -08001041 unregister_pernet_subsys(&ip6mr_net_ops);
1042reg_pernet_fail:
Benjamin Thery87b30a62008-11-10 16:34:11 -08001043 kmem_cache_destroy(mrt_cachep);
Wang Chen623d1a12008-07-03 12:13:30 +08001044 return err;
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001045}
1046
Wang Chen623d1a12008-07-03 12:13:30 +08001047void ip6_mr_cleanup(void)
1048{
1049#ifdef CONFIG_PROC_FS
1050 proc_net_remove(&init_net, "ip6_mr_cache");
1051 proc_net_remove(&init_net, "ip6_mr_vif");
1052#endif
1053 unregister_netdevice_notifier(&ip6_mr_notifier);
1054 del_timer(&ipmr_expire_timer);
Benjamin Thery4e168802008-12-10 16:15:08 -08001055 unregister_pernet_subsys(&ip6mr_net_ops);
Wang Chen623d1a12008-07-03 12:13:30 +08001056 kmem_cache_destroy(mrt_cachep);
1057}
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001058
1059static int ip6mr_mfc_add(struct mf6cctl *mfc, int mrtsock)
1060{
1061 int line;
1062 struct mfc6_cache *uc, *c, **cp;
Rami Rosen6ac7eb02008-04-10 12:40:10 +03001063 unsigned char ttls[MAXMIFS];
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001064 int i;
1065
Rami Rosen6ac7eb02008-04-10 12:40:10 +03001066 memset(ttls, 255, MAXMIFS);
1067 for (i = 0; i < MAXMIFS; i++) {
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001068 if (IF_ISSET(i, &mfc->mf6cc_ifset))
1069 ttls[i] = 1;
1070
1071 }
1072
1073 line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1074
Benjamin Thery4a6258a2008-12-10 16:24:07 -08001075 for (cp = &init_net.ipv6.mfc6_cache_array[line];
1076 (c = *cp) != NULL; cp = &c->next) {
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001077 if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1078 ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr))
1079 break;
1080 }
1081
1082 if (c != NULL) {
1083 write_lock_bh(&mrt_lock);
1084 c->mf6c_parent = mfc->mf6cc_parent;
1085 ip6mr_update_thresholds(c, ttls);
1086 if (!mrtsock)
1087 c->mfc_flags |= MFC_STATIC;
1088 write_unlock_bh(&mrt_lock);
1089 return 0;
1090 }
1091
1092 if (!ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1093 return -EINVAL;
1094
Benjamin Thery58701ad2008-12-10 16:22:34 -08001095 c = ip6mr_cache_alloc(&init_net);
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001096 if (c == NULL)
1097 return -ENOMEM;
1098
1099 c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1100 c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1101 c->mf6c_parent = mfc->mf6cc_parent;
1102 ip6mr_update_thresholds(c, ttls);
1103 if (!mrtsock)
1104 c->mfc_flags |= MFC_STATIC;
1105
1106 write_lock_bh(&mrt_lock);
Benjamin Thery4a6258a2008-12-10 16:24:07 -08001107 c->next = init_net.ipv6.mfc6_cache_array[line];
1108 init_net.ipv6.mfc6_cache_array[line] = c;
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001109 write_unlock_bh(&mrt_lock);
1110
1111 /*
1112 * Check to see if we resolved a queued list. If so we
1113 * need to send on the frames and tidy up.
1114 */
1115 spin_lock_bh(&mfc_unres_lock);
1116 for (cp = &mfc_unres_queue; (uc = *cp) != NULL;
1117 cp = &uc->next) {
Benjamin Thery4045e572008-12-10 16:27:21 -08001118 if (net_eq(mfc6_net(uc), &init_net) &&
1119 ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001120 ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1121 *cp = uc->next;
Benjamin Thery4045e572008-12-10 16:27:21 -08001122 atomic_dec(&init_net.ipv6.cache_resolve_queue_len);
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001123 break;
1124 }
1125 }
Benjamin Thery4045e572008-12-10 16:27:21 -08001126 if (mfc_unres_queue == NULL)
1127 del_timer(&ipmr_expire_timer);
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001128 spin_unlock_bh(&mfc_unres_lock);
1129
1130 if (uc) {
1131 ip6mr_cache_resolve(uc, c);
Benjamin Thery58701ad2008-12-10 16:22:34 -08001132 ip6mr_cache_free(uc);
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001133 }
1134 return 0;
1135}
1136
1137/*
1138 * Close the multicast socket, and clear the vif tables etc
1139 */
1140
1141static void mroute_clean_tables(struct sock *sk)
1142{
1143 int i;
1144
1145 /*
1146 * Shut down all active vif entries
1147 */
Benjamin Thery4e168802008-12-10 16:15:08 -08001148 for (i = 0; i < init_net.ipv6.maxvif; i++) {
1149 if (!(init_net.ipv6.vif6_table[i].flags & VIFF_STATIC))
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001150 mif6_delete(i);
1151 }
1152
1153 /*
1154 * Wipe the cache
1155 */
Benjamin Thery4a6258a2008-12-10 16:24:07 -08001156 for (i = 0; i < MFC6_LINES; i++) {
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001157 struct mfc6_cache *c, **cp;
1158
Benjamin Thery4a6258a2008-12-10 16:24:07 -08001159 cp = &init_net.ipv6.mfc6_cache_array[i];
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001160 while ((c = *cp) != NULL) {
1161 if (c->mfc_flags & MFC_STATIC) {
1162 cp = &c->next;
1163 continue;
1164 }
1165 write_lock_bh(&mrt_lock);
1166 *cp = c->next;
1167 write_unlock_bh(&mrt_lock);
1168
Benjamin Thery58701ad2008-12-10 16:22:34 -08001169 ip6mr_cache_free(c);
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001170 }
1171 }
1172
Benjamin Thery4045e572008-12-10 16:27:21 -08001173 if (atomic_read(&init_net.ipv6.cache_resolve_queue_len) != 0) {
1174 struct mfc6_cache *c, **cp;
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001175
1176 spin_lock_bh(&mfc_unres_lock);
Benjamin Thery4045e572008-12-10 16:27:21 -08001177 cp = &mfc_unres_queue;
1178 while ((c = *cp) != NULL) {
1179 if (!net_eq(mfc6_net(c), &init_net)) {
1180 cp = &c->next;
1181 continue;
1182 }
1183 *cp = c->next;
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001184 ip6mr_destroy_unres(c);
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001185 }
1186 spin_unlock_bh(&mfc_unres_lock);
1187 }
1188}
1189
1190static int ip6mr_sk_init(struct sock *sk)
1191{
1192 int err = 0;
1193
1194 rtnl_lock();
1195 write_lock_bh(&mrt_lock);
Benjamin Therybd91b8b2008-12-10 16:07:08 -08001196 if (likely(init_net.ipv6.mroute6_sk == NULL))
1197 init_net.ipv6.mroute6_sk = sk;
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001198 else
1199 err = -EADDRINUSE;
1200 write_unlock_bh(&mrt_lock);
1201
1202 rtnl_unlock();
1203
1204 return err;
1205}
1206
1207int ip6mr_sk_done(struct sock *sk)
1208{
1209 int err = 0;
1210
1211 rtnl_lock();
Benjamin Therybd91b8b2008-12-10 16:07:08 -08001212 if (sk == init_net.ipv6.mroute6_sk) {
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001213 write_lock_bh(&mrt_lock);
Benjamin Therybd91b8b2008-12-10 16:07:08 -08001214 init_net.ipv6.mroute6_sk = NULL;
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001215 write_unlock_bh(&mrt_lock);
1216
1217 mroute_clean_tables(sk);
1218 } else
1219 err = -EACCES;
1220 rtnl_unlock();
1221
1222 return err;
1223}
1224
1225/*
1226 * Socket options and virtual interface manipulation. The whole
1227 * virtual interface system is a complete heap, but unfortunately
1228 * that's how BSD mrouted happens to think. Maybe one day with a proper
1229 * MOSPF/PIM router set up we can clean this up.
1230 */
1231
1232int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int optlen)
1233{
1234 int ret;
1235 struct mif6ctl vif;
1236 struct mf6cctl mfc;
1237 mifi_t mifi;
1238
1239 if (optname != MRT6_INIT) {
Benjamin Therybd91b8b2008-12-10 16:07:08 -08001240 if (sk != init_net.ipv6.mroute6_sk && !capable(CAP_NET_ADMIN))
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001241 return -EACCES;
1242 }
1243
1244 switch (optname) {
1245 case MRT6_INIT:
1246 if (sk->sk_type != SOCK_RAW ||
1247 inet_sk(sk)->num != IPPROTO_ICMPV6)
1248 return -EOPNOTSUPP;
1249 if (optlen < sizeof(int))
1250 return -EINVAL;
1251
1252 return ip6mr_sk_init(sk);
1253
1254 case MRT6_DONE:
1255 return ip6mr_sk_done(sk);
1256
1257 case MRT6_ADD_MIF:
1258 if (optlen < sizeof(vif))
1259 return -EINVAL;
1260 if (copy_from_user(&vif, optval, sizeof(vif)))
1261 return -EFAULT;
Rami Rosen6ac7eb02008-04-10 12:40:10 +03001262 if (vif.mif6c_mifi >= MAXMIFS)
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001263 return -ENFILE;
1264 rtnl_lock();
Benjamin Therybd91b8b2008-12-10 16:07:08 -08001265 ret = mif6_add(&vif, sk == init_net.ipv6.mroute6_sk);
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001266 rtnl_unlock();
1267 return ret;
1268
1269 case MRT6_DEL_MIF:
1270 if (optlen < sizeof(mifi_t))
1271 return -EINVAL;
1272 if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
1273 return -EFAULT;
1274 rtnl_lock();
1275 ret = mif6_delete(mifi);
1276 rtnl_unlock();
1277 return ret;
1278
1279 /*
1280 * Manipulate the forwarding caches. These live
1281 * in a sort of kernel/user symbiosis.
1282 */
1283 case MRT6_ADD_MFC:
1284 case MRT6_DEL_MFC:
1285 if (optlen < sizeof(mfc))
1286 return -EINVAL;
1287 if (copy_from_user(&mfc, optval, sizeof(mfc)))
1288 return -EFAULT;
1289 rtnl_lock();
1290 if (optname == MRT6_DEL_MFC)
1291 ret = ip6mr_mfc_delete(&mfc);
1292 else
Benjamin Therybd91b8b2008-12-10 16:07:08 -08001293 ret = ip6mr_mfc_add(&mfc, sk == init_net.ipv6.mroute6_sk);
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001294 rtnl_unlock();
1295 return ret;
1296
1297 /*
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +09001298 * Control PIM assert (to activate pim will activate assert)
1299 */
1300 case MRT6_ASSERT:
1301 {
1302 int v;
1303 if (get_user(v, (int __user *)optval))
1304 return -EFAULT;
Benjamin Therya21f3f92008-12-10 16:28:44 -08001305 init_net.ipv6.mroute_do_assert = !!v;
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +09001306 return 0;
1307 }
1308
1309#ifdef CONFIG_IPV6_PIMSM_V2
1310 case MRT6_PIM:
1311 {
YOSHIFUJI Hideakia9f83bf2008-04-10 15:41:28 +09001312 int v;
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +09001313 if (get_user(v, (int __user *)optval))
1314 return -EFAULT;
1315 v = !!v;
1316 rtnl_lock();
1317 ret = 0;
Benjamin Therya21f3f92008-12-10 16:28:44 -08001318 if (v != init_net.ipv6.mroute_do_pim) {
1319 init_net.ipv6.mroute_do_pim = v;
1320 init_net.ipv6.mroute_do_assert = v;
1321 if (init_net.ipv6.mroute_do_pim)
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +09001322 ret = inet6_add_protocol(&pim6_protocol,
1323 IPPROTO_PIM);
1324 else
1325 ret = inet6_del_protocol(&pim6_protocol,
1326 IPPROTO_PIM);
1327 if (ret < 0)
1328 ret = -EAGAIN;
1329 }
1330 rtnl_unlock();
1331 return ret;
1332 }
1333
1334#endif
1335 /*
Rami Rosen7d120c52008-04-23 14:35:13 +03001336 * Spurious command, or MRT6_VERSION which you cannot
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001337 * set.
1338 */
1339 default:
1340 return -ENOPROTOOPT;
1341 }
1342}
1343
1344/*
1345 * Getsock opt support for the multicast routing system.
1346 */
1347
1348int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
1349 int __user *optlen)
1350{
1351 int olr;
1352 int val;
1353
1354 switch (optname) {
1355 case MRT6_VERSION:
1356 val = 0x0305;
1357 break;
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +09001358#ifdef CONFIG_IPV6_PIMSM_V2
1359 case MRT6_PIM:
Benjamin Therya21f3f92008-12-10 16:28:44 -08001360 val = init_net.ipv6.mroute_do_pim;
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +09001361 break;
1362#endif
1363 case MRT6_ASSERT:
Benjamin Therya21f3f92008-12-10 16:28:44 -08001364 val = init_net.ipv6.mroute_do_assert;
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +09001365 break;
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001366 default:
1367 return -ENOPROTOOPT;
1368 }
1369
1370 if (get_user(olr, optlen))
1371 return -EFAULT;
1372
1373 olr = min_t(int, olr, sizeof(int));
1374 if (olr < 0)
1375 return -EINVAL;
1376
1377 if (put_user(olr, optlen))
1378 return -EFAULT;
1379 if (copy_to_user(optval, &val, olr))
1380 return -EFAULT;
1381 return 0;
1382}
1383
1384/*
1385 * The IP multicast ioctl support routines.
1386 */
1387
1388int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
1389{
1390 struct sioc_sg_req6 sr;
1391 struct sioc_mif_req6 vr;
1392 struct mif_device *vif;
1393 struct mfc6_cache *c;
1394
1395 switch (cmd) {
1396 case SIOCGETMIFCNT_IN6:
1397 if (copy_from_user(&vr, arg, sizeof(vr)))
1398 return -EFAULT;
Benjamin Thery4e168802008-12-10 16:15:08 -08001399 if (vr.mifi >= init_net.ipv6.maxvif)
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001400 return -EINVAL;
1401 read_lock(&mrt_lock);
Benjamin Thery4e168802008-12-10 16:15:08 -08001402 vif = &init_net.ipv6.vif6_table[vr.mifi];
1403 if (MIF_EXISTS(&init_net, vr.mifi)) {
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001404 vr.icount = vif->pkt_in;
1405 vr.ocount = vif->pkt_out;
1406 vr.ibytes = vif->bytes_in;
1407 vr.obytes = vif->bytes_out;
1408 read_unlock(&mrt_lock);
1409
1410 if (copy_to_user(arg, &vr, sizeof(vr)))
1411 return -EFAULT;
1412 return 0;
1413 }
1414 read_unlock(&mrt_lock);
1415 return -EADDRNOTAVAIL;
1416 case SIOCGETSGCNT_IN6:
1417 if (copy_from_user(&sr, arg, sizeof(sr)))
1418 return -EFAULT;
1419
1420 read_lock(&mrt_lock);
1421 c = ip6mr_cache_find(&sr.src.sin6_addr, &sr.grp.sin6_addr);
1422 if (c) {
1423 sr.pktcnt = c->mfc_un.res.pkt;
1424 sr.bytecnt = c->mfc_un.res.bytes;
1425 sr.wrong_if = c->mfc_un.res.wrong_if;
1426 read_unlock(&mrt_lock);
1427
1428 if (copy_to_user(arg, &sr, sizeof(sr)))
1429 return -EFAULT;
1430 return 0;
1431 }
1432 read_unlock(&mrt_lock);
1433 return -EADDRNOTAVAIL;
1434 default:
1435 return -ENOIOCTLCMD;
1436 }
1437}
1438
1439
1440static inline int ip6mr_forward2_finish(struct sk_buff *skb)
1441{
Denis V. Lunev483a47d2008-10-08 11:09:27 -07001442 IP6_INC_STATS_BH(dev_net(skb->dst->dev), ip6_dst_idev(skb->dst),
1443 IPSTATS_MIB_OUTFORWDATAGRAMS);
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001444 return dst_output(skb);
1445}
1446
1447/*
1448 * Processing handlers for ip6mr_forward
1449 */
1450
1451static int ip6mr_forward2(struct sk_buff *skb, struct mfc6_cache *c, int vifi)
1452{
1453 struct ipv6hdr *ipv6h;
Benjamin Thery4e168802008-12-10 16:15:08 -08001454 struct mif_device *vif = &init_net.ipv6.vif6_table[vifi];
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001455 struct net_device *dev;
1456 struct dst_entry *dst;
1457 struct flowi fl;
1458
1459 if (vif->dev == NULL)
1460 goto out_free;
1461
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +09001462#ifdef CONFIG_IPV6_PIMSM_V2
1463 if (vif->flags & MIFF_REGISTER) {
1464 vif->pkt_out++;
1465 vif->bytes_out += skb->len;
Pavel Emelyanovdc58c782008-05-21 14:17:54 -07001466 vif->dev->stats.tx_bytes += skb->len;
1467 vif->dev->stats.tx_packets++;
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +09001468 ip6mr_cache_report(skb, vifi, MRT6MSG_WHOLEPKT);
1469 kfree_skb(skb);
1470 return 0;
1471 }
1472#endif
1473
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001474 ipv6h = ipv6_hdr(skb);
1475
1476 fl = (struct flowi) {
1477 .oif = vif->link,
1478 .nl_u = { .ip6_u =
1479 { .daddr = ipv6h->daddr, }
1480 }
1481 };
1482
1483 dst = ip6_route_output(&init_net, NULL, &fl);
1484 if (!dst)
1485 goto out_free;
1486
1487 dst_release(skb->dst);
1488 skb->dst = dst;
1489
1490 /*
1491 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1492 * not only before forwarding, but after forwarding on all output
1493 * interfaces. It is clear, if mrouter runs a multicasting
1494 * program, it should receive packets not depending to what interface
1495 * program is joined.
1496 * If we will not make it, the program will have to join on all
1497 * interfaces. On the other hand, multihoming host (or router, but
1498 * not mrouter) cannot join to more than one interface - it will
1499 * result in receiving multiple packets.
1500 */
1501 dev = vif->dev;
1502 skb->dev = dev;
1503 vif->pkt_out++;
1504 vif->bytes_out += skb->len;
1505
1506 /* We are about to write */
1507 /* XXX: extension headers? */
1508 if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev)))
1509 goto out_free;
1510
1511 ipv6h = ipv6_hdr(skb);
1512 ipv6h->hop_limit--;
1513
1514 IP6CB(skb)->flags |= IP6SKB_FORWARDED;
1515
1516 return NF_HOOK(PF_INET6, NF_INET_FORWARD, skb, skb->dev, dev,
1517 ip6mr_forward2_finish);
1518
1519out_free:
1520 kfree_skb(skb);
1521 return 0;
1522}
1523
1524static int ip6mr_find_vif(struct net_device *dev)
1525{
1526 int ct;
Benjamin Thery4e168802008-12-10 16:15:08 -08001527 for (ct = init_net.ipv6.maxvif - 1; ct >= 0; ct--) {
1528 if (init_net.ipv6.vif6_table[ct].dev == dev)
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001529 break;
1530 }
1531 return ct;
1532}
1533
1534static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache)
1535{
1536 int psend = -1;
1537 int vif, ct;
1538
1539 vif = cache->mf6c_parent;
1540 cache->mfc_un.res.pkt++;
1541 cache->mfc_un.res.bytes += skb->len;
1542
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +09001543 /*
1544 * Wrong interface: drop packet and (maybe) send PIM assert.
1545 */
Benjamin Thery4e168802008-12-10 16:15:08 -08001546 if (init_net.ipv6.vif6_table[vif].dev != skb->dev) {
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +09001547 int true_vifi;
1548
1549 cache->mfc_un.res.wrong_if++;
1550 true_vifi = ip6mr_find_vif(skb->dev);
1551
Benjamin Therya21f3f92008-12-10 16:28:44 -08001552 if (true_vifi >= 0 && init_net.ipv6.mroute_do_assert &&
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +09001553 /* pimsm uses asserts, when switching from RPT to SPT,
1554 so that we cannot check that packet arrived on an oif.
1555 It is bad, but otherwise we would need to move pretty
1556 large chunk of pimd to kernel. Ough... --ANK
1557 */
Benjamin Therya21f3f92008-12-10 16:28:44 -08001558 (init_net.ipv6.mroute_do_pim ||
1559 cache->mfc_un.res.ttls[true_vifi] < 255) &&
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +09001560 time_after(jiffies,
1561 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1562 cache->mfc_un.res.last_assert = jiffies;
1563 ip6mr_cache_report(skb, true_vifi, MRT6MSG_WRONGMIF);
1564 }
1565 goto dont_forward;
1566 }
1567
Benjamin Thery4e168802008-12-10 16:15:08 -08001568 init_net.ipv6.vif6_table[vif].pkt_in++;
1569 init_net.ipv6.vif6_table[vif].bytes_in += skb->len;
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001570
1571 /*
1572 * Forward the frame
1573 */
1574 for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) {
1575 if (ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) {
1576 if (psend != -1) {
1577 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1578 if (skb2)
1579 ip6mr_forward2(skb2, cache, psend);
1580 }
1581 psend = ct;
1582 }
1583 }
1584 if (psend != -1) {
1585 ip6mr_forward2(skb, cache, psend);
1586 return 0;
1587 }
1588
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +09001589dont_forward:
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001590 kfree_skb(skb);
1591 return 0;
1592}
1593
1594
1595/*
1596 * Multicast packets for forwarding arrive here
1597 */
1598
1599int ip6_mr_input(struct sk_buff *skb)
1600{
1601 struct mfc6_cache *cache;
1602
1603 read_lock(&mrt_lock);
1604 cache = ip6mr_cache_find(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
1605
1606 /*
1607 * No usable cache entry
1608 */
1609 if (cache == NULL) {
1610 int vif;
1611
1612 vif = ip6mr_find_vif(skb->dev);
1613 if (vif >= 0) {
1614 int err = ip6mr_cache_unresolved(vif, skb);
1615 read_unlock(&mrt_lock);
1616
1617 return err;
1618 }
1619 read_unlock(&mrt_lock);
1620 kfree_skb(skb);
1621 return -ENODEV;
1622 }
1623
1624 ip6_mr_forward(skb, cache);
1625
1626 read_unlock(&mrt_lock);
1627
1628 return 0;
1629}
1630
1631
1632static int
1633ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm)
1634{
1635 int ct;
1636 struct rtnexthop *nhp;
Benjamin Thery4e168802008-12-10 16:15:08 -08001637 struct net_device *dev = init_net.ipv6.vif6_table[c->mf6c_parent].dev;
YOSHIFUJI Hideaki549e0282008-04-05 22:17:39 +09001638 u8 *b = skb_tail_pointer(skb);
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001639 struct rtattr *mp_head;
1640
1641 if (dev)
1642 RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex);
1643
1644 mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
1645
1646 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1647 if (c->mfc_un.res.ttls[ct] < 255) {
1648 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1649 goto rtattr_failure;
1650 nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1651 nhp->rtnh_flags = 0;
1652 nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
Benjamin Thery4e168802008-12-10 16:15:08 -08001653 nhp->rtnh_ifindex = init_net.ipv6.vif6_table[ct].dev->ifindex;
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001654 nhp->rtnh_len = sizeof(*nhp);
1655 }
1656 }
1657 mp_head->rta_type = RTA_MULTIPATH;
YOSHIFUJI Hideaki549e0282008-04-05 22:17:39 +09001658 mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001659 rtm->rtm_type = RTN_MULTICAST;
1660 return 1;
1661
1662rtattr_failure:
1663 nlmsg_trim(skb, b);
1664 return -EMSGSIZE;
1665}
1666
1667int ip6mr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1668{
1669 int err;
1670 struct mfc6_cache *cache;
1671 struct rt6_info *rt = (struct rt6_info *)skb->dst;
1672
1673 read_lock(&mrt_lock);
1674 cache = ip6mr_cache_find(&rt->rt6i_src.addr, &rt->rt6i_dst.addr);
1675
1676 if (!cache) {
1677 struct sk_buff *skb2;
1678 struct ipv6hdr *iph;
1679 struct net_device *dev;
1680 int vif;
1681
1682 if (nowait) {
1683 read_unlock(&mrt_lock);
1684 return -EAGAIN;
1685 }
1686
1687 dev = skb->dev;
1688 if (dev == NULL || (vif = ip6mr_find_vif(dev)) < 0) {
1689 read_unlock(&mrt_lock);
1690 return -ENODEV;
1691 }
1692
1693 /* really correct? */
1694 skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
1695 if (!skb2) {
1696 read_unlock(&mrt_lock);
1697 return -ENOMEM;
1698 }
1699
1700 skb_reset_transport_header(skb2);
1701
1702 skb_put(skb2, sizeof(struct ipv6hdr));
1703 skb_reset_network_header(skb2);
1704
1705 iph = ipv6_hdr(skb2);
1706 iph->version = 0;
1707 iph->priority = 0;
1708 iph->flow_lbl[0] = 0;
1709 iph->flow_lbl[1] = 0;
1710 iph->flow_lbl[2] = 0;
1711 iph->payload_len = 0;
1712 iph->nexthdr = IPPROTO_NONE;
1713 iph->hop_limit = 0;
1714 ipv6_addr_copy(&iph->saddr, &rt->rt6i_src.addr);
1715 ipv6_addr_copy(&iph->daddr, &rt->rt6i_dst.addr);
1716
1717 err = ip6mr_cache_unresolved(vif, skb2);
1718 read_unlock(&mrt_lock);
1719
1720 return err;
1721 }
1722
1723 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1724 cache->mfc_flags |= MFC_NOTIFY;
1725
1726 err = ip6mr_fill_mroute(skb, cache, rtm);
1727 read_unlock(&mrt_lock);
1728 return err;
1729}
1730