blob: 6a4019a4ca89d0f875062d1b36c282be9d4feb3c [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
16/* Changes:
17 *
18 * YOSHIFUJI Hideaki @USAGI
19 * reworked default router selection.
20 * - respect outgoing interface
21 * - select from (probably) reachable routers (i.e.
22 * routers in REACHABLE, STALE, DELAY or PROBE states).
23 * - always select the same router if it is (probably)
24 * reachable. otherwise, round-robin the list.
25 */
26
Randy Dunlap4fc268d2006-01-11 12:17:47 -080027#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070028#include <linux/config.h>
29#include <linux/errno.h>
30#include <linux/types.h>
31#include <linux/times.h>
32#include <linux/socket.h>
33#include <linux/sockios.h>
34#include <linux/net.h>
35#include <linux/route.h>
36#include <linux/netdevice.h>
37#include <linux/in6.h>
38#include <linux/init.h>
39#include <linux/netlink.h>
40#include <linux/if_arp.h>
41
42#ifdef CONFIG_PROC_FS
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
45#endif
46
47#include <net/snmp.h>
48#include <net/ipv6.h>
49#include <net/ip6_fib.h>
50#include <net/ip6_route.h>
51#include <net/ndisc.h>
52#include <net/addrconf.h>
53#include <net/tcp.h>
54#include <linux/rtnetlink.h>
55#include <net/dst.h>
56#include <net/xfrm.h>
57
58#include <asm/uaccess.h>
59
60#ifdef CONFIG_SYSCTL
61#include <linux/sysctl.h>
62#endif
63
64/* Set to 3 to get tracing. */
65#define RT6_DEBUG 2
66
67#if RT6_DEBUG >= 3
68#define RDBG(x) printk x
69#define RT6_TRACE(x...) printk(KERN_DEBUG x)
70#else
71#define RDBG(x)
72#define RT6_TRACE(x...) do { ; } while (0)
73#endif
74
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -080075#define CLONE_OFFLINK_ROUTE 0
Linus Torvalds1da177e2005-04-16 15:20:36 -070076
77static int ip6_rt_max_size = 4096;
78static int ip6_rt_gc_min_interval = HZ / 2;
79static int ip6_rt_gc_timeout = 60*HZ;
80int ip6_rt_gc_interval = 30*HZ;
81static int ip6_rt_gc_elasticity = 9;
82static int ip6_rt_mtu_expires = 10*60*HZ;
83static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
84
85static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
86static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
87static struct dst_entry *ip6_negative_advice(struct dst_entry *);
88static void ip6_dst_destroy(struct dst_entry *);
89static void ip6_dst_ifdown(struct dst_entry *,
90 struct net_device *dev, int how);
91static int ip6_dst_gc(void);
92
93static int ip6_pkt_discard(struct sk_buff *skb);
94static int ip6_pkt_discard_out(struct sk_buff *skb);
95static void ip6_link_failure(struct sk_buff *skb);
96static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
97
98static struct dst_ops ip6_dst_ops = {
99 .family = AF_INET6,
100 .protocol = __constant_htons(ETH_P_IPV6),
101 .gc = ip6_dst_gc,
102 .gc_thresh = 1024,
103 .check = ip6_dst_check,
104 .destroy = ip6_dst_destroy,
105 .ifdown = ip6_dst_ifdown,
106 .negative_advice = ip6_negative_advice,
107 .link_failure = ip6_link_failure,
108 .update_pmtu = ip6_rt_update_pmtu,
109 .entry_size = sizeof(struct rt6_info),
110};
111
112struct rt6_info ip6_null_entry = {
113 .u = {
114 .dst = {
115 .__refcnt = ATOMIC_INIT(1),
116 .__use = 1,
117 .dev = &loopback_dev,
118 .obsolete = -1,
119 .error = -ENETUNREACH,
120 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
121 .input = ip6_pkt_discard,
122 .output = ip6_pkt_discard_out,
123 .ops = &ip6_dst_ops,
124 .path = (struct dst_entry*)&ip6_null_entry,
125 }
126 },
127 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
128 .rt6i_metric = ~(u32) 0,
129 .rt6i_ref = ATOMIC_INIT(1),
130};
131
132struct fib6_node ip6_routing_table = {
133 .leaf = &ip6_null_entry,
134 .fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
135};
136
137/* Protects all the ip6 fib */
138
139DEFINE_RWLOCK(rt6_lock);
140
141
142/* allocate dst with ip6_dst_ops */
143static __inline__ struct rt6_info *ip6_dst_alloc(void)
144{
145 return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
146}
147
148static void ip6_dst_destroy(struct dst_entry *dst)
149{
150 struct rt6_info *rt = (struct rt6_info *)dst;
151 struct inet6_dev *idev = rt->rt6i_idev;
152
153 if (idev != NULL) {
154 rt->rt6i_idev = NULL;
155 in6_dev_put(idev);
156 }
157}
158
159static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
160 int how)
161{
162 struct rt6_info *rt = (struct rt6_info *)dst;
163 struct inet6_dev *idev = rt->rt6i_idev;
164
165 if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
166 struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
167 if (loopback_idev != NULL) {
168 rt->rt6i_idev = loopback_idev;
169 in6_dev_put(idev);
170 }
171 }
172}
173
174static __inline__ int rt6_check_expired(const struct rt6_info *rt)
175{
176 return (rt->rt6i_flags & RTF_EXPIRES &&
177 time_after(jiffies, rt->rt6i_expires));
178}
179
180/*
181 * Route lookup. Any rt6_lock is implied.
182 */
183
184static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
185 int oif,
186 int strict)
187{
188 struct rt6_info *local = NULL;
189 struct rt6_info *sprt;
190
191 if (oif) {
192 for (sprt = rt; sprt; sprt = sprt->u.next) {
193 struct net_device *dev = sprt->rt6i_dev;
194 if (dev->ifindex == oif)
195 return sprt;
196 if (dev->flags & IFF_LOOPBACK) {
197 if (sprt->rt6i_idev == NULL ||
198 sprt->rt6i_idev->dev->ifindex != oif) {
199 if (strict && oif)
200 continue;
201 if (local && (!oif ||
202 local->rt6i_idev->dev->ifindex == oif))
203 continue;
204 }
205 local = sprt;
206 }
207 }
208
209 if (local)
210 return local;
211
212 if (strict)
213 return &ip6_null_entry;
214 }
215 return rt;
216}
217
218/*
219 * pointer to the last default router chosen. BH is disabled locally.
220 */
221static struct rt6_info *rt6_dflt_pointer;
222static DEFINE_SPINLOCK(rt6_dflt_lock);
223
224void rt6_reset_dflt_pointer(struct rt6_info *rt)
225{
226 spin_lock_bh(&rt6_dflt_lock);
227 if (rt == NULL || rt == rt6_dflt_pointer) {
228 RT6_TRACE("reset default router: %p->NULL\n", rt6_dflt_pointer);
229 rt6_dflt_pointer = NULL;
230 }
231 spin_unlock_bh(&rt6_dflt_lock);
232}
233
234/* Default Router Selection (RFC 2461 6.3.6) */
235static struct rt6_info *rt6_best_dflt(struct rt6_info *rt, int oif)
236{
237 struct rt6_info *match = NULL;
238 struct rt6_info *sprt;
239 int mpri = 0;
240
241 for (sprt = rt; sprt; sprt = sprt->u.next) {
242 struct neighbour *neigh;
243 int m = 0;
244
245 if (!oif ||
246 (sprt->rt6i_dev &&
247 sprt->rt6i_dev->ifindex == oif))
248 m += 8;
249
250 if (rt6_check_expired(sprt))
251 continue;
252
253 if (sprt == rt6_dflt_pointer)
254 m += 4;
255
256 if ((neigh = sprt->rt6i_nexthop) != NULL) {
257 read_lock_bh(&neigh->lock);
258 switch (neigh->nud_state) {
259 case NUD_REACHABLE:
260 m += 3;
261 break;
262
263 case NUD_STALE:
264 case NUD_DELAY:
265 case NUD_PROBE:
266 m += 2;
267 break;
268
269 case NUD_NOARP:
270 case NUD_PERMANENT:
271 m += 1;
272 break;
273
274 case NUD_INCOMPLETE:
275 default:
276 read_unlock_bh(&neigh->lock);
277 continue;
278 }
279 read_unlock_bh(&neigh->lock);
280 } else {
281 continue;
282 }
283
284 if (m > mpri || m >= 12) {
285 match = sprt;
286 mpri = m;
287 if (m >= 12) {
288 /* we choose the last default router if it
289 * is in (probably) reachable state.
290 * If route changed, we should do pmtu
291 * discovery. --yoshfuji
292 */
293 break;
294 }
295 }
296 }
297
298 spin_lock(&rt6_dflt_lock);
299 if (!match) {
300 /*
301 * No default routers are known to be reachable.
302 * SHOULD round robin
303 */
304 if (rt6_dflt_pointer) {
305 for (sprt = rt6_dflt_pointer->u.next;
306 sprt; sprt = sprt->u.next) {
307 if (sprt->u.dst.obsolete <= 0 &&
308 sprt->u.dst.error == 0 &&
309 !rt6_check_expired(sprt)) {
310 match = sprt;
311 break;
312 }
313 }
314 for (sprt = rt;
315 !match && sprt;
316 sprt = sprt->u.next) {
317 if (sprt->u.dst.obsolete <= 0 &&
318 sprt->u.dst.error == 0 &&
319 !rt6_check_expired(sprt)) {
320 match = sprt;
321 break;
322 }
323 if (sprt == rt6_dflt_pointer)
324 break;
325 }
326 }
327 }
328
329 if (match) {
330 if (rt6_dflt_pointer != match)
331 RT6_TRACE("changed default router: %p->%p\n",
332 rt6_dflt_pointer, match);
333 rt6_dflt_pointer = match;
334 }
335 spin_unlock(&rt6_dflt_lock);
336
337 if (!match) {
338 /*
339 * Last Resort: if no default routers found,
340 * use addrconf default route.
341 * We don't record this route.
342 */
343 for (sprt = ip6_routing_table.leaf;
344 sprt; sprt = sprt->u.next) {
345 if (!rt6_check_expired(sprt) &&
346 (sprt->rt6i_flags & RTF_DEFAULT) &&
347 (!oif ||
348 (sprt->rt6i_dev &&
349 sprt->rt6i_dev->ifindex == oif))) {
350 match = sprt;
351 break;
352 }
353 }
354 if (!match) {
355 /* no default route. give up. */
356 match = &ip6_null_entry;
357 }
358 }
359
360 return match;
361}
362
363struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
364 int oif, int strict)
365{
366 struct fib6_node *fn;
367 struct rt6_info *rt;
368
369 read_lock_bh(&rt6_lock);
370 fn = fib6_lookup(&ip6_routing_table, daddr, saddr);
371 rt = rt6_device_match(fn->leaf, oif, strict);
372 dst_hold(&rt->u.dst);
373 rt->u.dst.__use++;
374 read_unlock_bh(&rt6_lock);
375
376 rt->u.dst.lastuse = jiffies;
377 if (rt->u.dst.error == 0)
378 return rt;
379 dst_release(&rt->u.dst);
380 return NULL;
381}
382
383/* ip6_ins_rt is called with FREE rt6_lock.
384 It takes new route entry, the addition fails by any reason the
385 route is freed. In any case, if caller does not hold it, it may
386 be destroyed.
387 */
388
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -0700389int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh,
390 void *_rtattr, struct netlink_skb_parms *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700391{
392 int err;
393
394 write_lock_bh(&rt6_lock);
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -0700395 err = fib6_add(&ip6_routing_table, rt, nlh, _rtattr, req);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700396 write_unlock_bh(&rt6_lock);
397
398 return err;
399}
400
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800401static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
402 struct in6_addr *saddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700403{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700404 struct rt6_info *rt;
405
406 /*
407 * Clone the route.
408 */
409
410 rt = ip6_rt_copy(ort);
411
412 if (rt) {
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900413 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
414 if (rt->rt6i_dst.plen != 128 &&
415 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
416 rt->rt6i_flags |= RTF_ANYCAST;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700417 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900418 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700419
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900420 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700421 rt->rt6i_dst.plen = 128;
422 rt->rt6i_flags |= RTF_CACHE;
423 rt->u.dst.flags |= DST_HOST;
424
425#ifdef CONFIG_IPV6_SUBTREES
426 if (rt->rt6i_src.plen && saddr) {
427 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
428 rt->rt6i_src.plen = 128;
429 }
430#endif
431
432 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
433
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800434 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700435
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800436 return rt;
437}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700438
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800439static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
440{
441 struct rt6_info *rt = ip6_rt_copy(ort);
442 if (rt) {
443 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
444 rt->rt6i_dst.plen = 128;
445 rt->rt6i_flags |= RTF_CACHE;
446 if (rt->rt6i_flags & RTF_REJECT)
447 rt->u.dst.error = ort->u.dst.error;
448 rt->u.dst.flags |= DST_HOST;
449 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
450 }
451 return rt;
452}
453
Linus Torvalds1da177e2005-04-16 15:20:36 -0700454#define BACKTRACK() \
455if (rt == &ip6_null_entry && strict) { \
456 while ((fn = fn->parent) != NULL) { \
457 if (fn->fn_flags & RTN_ROOT) { \
Linus Torvalds1da177e2005-04-16 15:20:36 -0700458 goto out; \
459 } \
460 if (fn->fn_flags & RTN_RTINFO) \
461 goto restart; \
462 } \
463}
464
465
466void ip6_route_input(struct sk_buff *skb)
467{
468 struct fib6_node *fn;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800469 struct rt6_info *rt, *nrt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700470 int strict;
471 int attempts = 3;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800472 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700473
474 strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
475
476relookup:
477 read_lock_bh(&rt6_lock);
478
479 fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr,
480 &skb->nh.ipv6h->saddr);
481
482restart:
483 rt = fn->leaf;
484
485 if ((rt->rt6i_flags & RTF_CACHE)) {
486 rt = rt6_device_match(rt, skb->dev->ifindex, strict);
487 BACKTRACK();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700488 goto out;
489 }
490
Yan Zheng9d17f212005-10-28 15:12:00 -0700491 rt = rt6_device_match(rt, skb->dev->ifindex, strict);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700492 BACKTRACK();
493
YOSHIFUJI Hideakifb9de912006-03-20 16:59:08 -0800494 dst_hold(&rt->u.dst);
495 read_unlock_bh(&rt6_lock);
496
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800497 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
498 nrt = rt6_alloc_cow(rt, &skb->nh.ipv6h->daddr, &skb->nh.ipv6h->saddr);
499 else {
500#if CLONE_OFFLINK_ROUTE
501 nrt = rt6_alloc_clone(rt, &skb->nh.ipv6h->daddr);
502#else
503 goto out2;
504#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700505 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700506
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800507 dst_release(&rt->u.dst);
508 rt = nrt ? : &ip6_null_entry;
509
510 dst_hold(&rt->u.dst);
511 if (nrt) {
512 err = ip6_ins_rt(nrt, NULL, NULL, &NETLINK_CB(skb));
513 if (!err)
514 goto out2;
515 }
516
517 if (--attempts <= 0)
518 goto out2;
519
520 /*
521 * Race condition! In the gap, when rt6_lock was
522 * released someone could insert this route. Relookup.
523 */
524 dst_release(&rt->u.dst);
525 goto relookup;
526
527out:
528 dst_hold(&rt->u.dst);
529 read_unlock_bh(&rt6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700530out2:
531 rt->u.dst.lastuse = jiffies;
532 rt->u.dst.__use++;
533 skb->dst = (struct dst_entry *) rt;
YOSHIFUJI Hideakifb9de912006-03-20 16:59:08 -0800534 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700535}
536
537struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
538{
539 struct fib6_node *fn;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800540 struct rt6_info *rt, *nrt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700541 int strict;
542 int attempts = 3;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800543 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700544
545 strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
546
547relookup:
548 read_lock_bh(&rt6_lock);
549
550 fn = fib6_lookup(&ip6_routing_table, &fl->fl6_dst, &fl->fl6_src);
551
552restart:
553 rt = fn->leaf;
554
555 if ((rt->rt6i_flags & RTF_CACHE)) {
556 rt = rt6_device_match(rt, fl->oif, strict);
557 BACKTRACK();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700558 goto out;
559 }
560 if (rt->rt6i_flags & RTF_DEFAULT) {
561 if (rt->rt6i_metric >= IP6_RT_PRIO_ADDRCONF)
562 rt = rt6_best_dflt(rt, fl->oif);
563 } else {
564 rt = rt6_device_match(rt, fl->oif, strict);
565 BACKTRACK();
566 }
567
YOSHIFUJI Hideakifb9de912006-03-20 16:59:08 -0800568 dst_hold(&rt->u.dst);
569 read_unlock_bh(&rt6_lock);
570
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800571 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
YOSHIFUJI Hideakie40cf352006-03-20 16:59:27 -0800572 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800573 else {
574#if CLONE_OFFLINK_ROUTE
575 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
576#else
577 goto out2;
578#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700579 }
YOSHIFUJI Hideakie40cf352006-03-20 16:59:27 -0800580
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800581 dst_release(&rt->u.dst);
582 rt = nrt ? : &ip6_null_entry;
583
584 dst_hold(&rt->u.dst);
585 if (nrt) {
586 err = ip6_ins_rt(nrt, NULL, NULL, NULL);
587 if (!err)
588 goto out2;
589 }
590
591 if (--attempts <= 0)
592 goto out2;
593
594 /*
595 * Race condition! In the gap, when rt6_lock was
596 * released someone could insert this route. Relookup.
597 */
598 dst_release(&rt->u.dst);
599 goto relookup;
600
601out:
602 dst_hold(&rt->u.dst);
603 read_unlock_bh(&rt6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700604out2:
605 rt->u.dst.lastuse = jiffies;
606 rt->u.dst.__use++;
607 return &rt->u.dst;
608}
609
610
611/*
612 * Destination cache support functions
613 */
614
615static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
616{
617 struct rt6_info *rt;
618
619 rt = (struct rt6_info *) dst;
620
621 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
622 return dst;
623
624 return NULL;
625}
626
627static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
628{
629 struct rt6_info *rt = (struct rt6_info *) dst;
630
631 if (rt) {
632 if (rt->rt6i_flags & RTF_CACHE)
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -0700633 ip6_del_rt(rt, NULL, NULL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700634 else
635 dst_release(dst);
636 }
637 return NULL;
638}
639
640static void ip6_link_failure(struct sk_buff *skb)
641{
642 struct rt6_info *rt;
643
644 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
645
646 rt = (struct rt6_info *) skb->dst;
647 if (rt) {
648 if (rt->rt6i_flags&RTF_CACHE) {
649 dst_set_expires(&rt->u.dst, 0);
650 rt->rt6i_flags |= RTF_EXPIRES;
651 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
652 rt->rt6i_node->fn_sernum = -1;
653 }
654}
655
656static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
657{
658 struct rt6_info *rt6 = (struct rt6_info*)dst;
659
660 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
661 rt6->rt6i_flags |= RTF_MODIFIED;
662 if (mtu < IPV6_MIN_MTU) {
663 mtu = IPV6_MIN_MTU;
664 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
665 }
666 dst->metrics[RTAX_MTU-1] = mtu;
667 }
668}
669
670/* Protected by rt6_lock. */
671static struct dst_entry *ndisc_dst_gc_list;
672static int ipv6_get_mtu(struct net_device *dev);
673
674static inline unsigned int ipv6_advmss(unsigned int mtu)
675{
676 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
677
678 if (mtu < ip6_rt_min_advmss)
679 mtu = ip6_rt_min_advmss;
680
681 /*
682 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
683 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
684 * IPV6_MAXPLEN is also valid and means: "any MSS,
685 * rely only on pmtu discovery"
686 */
687 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
688 mtu = IPV6_MAXPLEN;
689 return mtu;
690}
691
692struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
693 struct neighbour *neigh,
694 struct in6_addr *addr,
695 int (*output)(struct sk_buff *))
696{
697 struct rt6_info *rt;
698 struct inet6_dev *idev = in6_dev_get(dev);
699
700 if (unlikely(idev == NULL))
701 return NULL;
702
703 rt = ip6_dst_alloc();
704 if (unlikely(rt == NULL)) {
705 in6_dev_put(idev);
706 goto out;
707 }
708
709 dev_hold(dev);
710 if (neigh)
711 neigh_hold(neigh);
712 else
713 neigh = ndisc_get_neigh(dev, addr);
714
715 rt->rt6i_dev = dev;
716 rt->rt6i_idev = idev;
717 rt->rt6i_nexthop = neigh;
718 atomic_set(&rt->u.dst.__refcnt, 1);
719 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
720 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
721 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
722 rt->u.dst.output = output;
723
724#if 0 /* there's no chance to use these for ndisc */
725 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
726 ? DST_HOST
727 : 0;
728 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
729 rt->rt6i_dst.plen = 128;
730#endif
731
732 write_lock_bh(&rt6_lock);
733 rt->u.dst.next = ndisc_dst_gc_list;
734 ndisc_dst_gc_list = &rt->u.dst;
735 write_unlock_bh(&rt6_lock);
736
737 fib6_force_start_gc();
738
739out:
740 return (struct dst_entry *)rt;
741}
742
743int ndisc_dst_gc(int *more)
744{
745 struct dst_entry *dst, *next, **pprev;
746 int freed;
747
748 next = NULL;
749 pprev = &ndisc_dst_gc_list;
750 freed = 0;
751 while ((dst = *pprev) != NULL) {
752 if (!atomic_read(&dst->__refcnt)) {
753 *pprev = dst->next;
754 dst_free(dst);
755 freed++;
756 } else {
757 pprev = &dst->next;
758 (*more)++;
759 }
760 }
761
762 return freed;
763}
764
765static int ip6_dst_gc(void)
766{
767 static unsigned expire = 30*HZ;
768 static unsigned long last_gc;
769 unsigned long now = jiffies;
770
771 if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
772 atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
773 goto out;
774
775 expire++;
776 fib6_run_gc(expire);
777 last_gc = now;
778 if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
779 expire = ip6_rt_gc_timeout>>1;
780
781out:
782 expire -= expire>>ip6_rt_gc_elasticity;
783 return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
784}
785
786/* Clean host part of a prefix. Not necessary in radix tree,
787 but results in cleaner routing tables.
788
789 Remove it only when all the things will work!
790 */
791
792static int ipv6_get_mtu(struct net_device *dev)
793{
794 int mtu = IPV6_MIN_MTU;
795 struct inet6_dev *idev;
796
797 idev = in6_dev_get(dev);
798 if (idev) {
799 mtu = idev->cnf.mtu6;
800 in6_dev_put(idev);
801 }
802 return mtu;
803}
804
805int ipv6_get_hoplimit(struct net_device *dev)
806{
807 int hoplimit = ipv6_devconf.hop_limit;
808 struct inet6_dev *idev;
809
810 idev = in6_dev_get(dev);
811 if (idev) {
812 hoplimit = idev->cnf.hop_limit;
813 in6_dev_put(idev);
814 }
815 return hoplimit;
816}
817
818/*
819 *
820 */
821
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -0700822int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
823 void *_rtattr, struct netlink_skb_parms *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700824{
825 int err;
826 struct rtmsg *r;
827 struct rtattr **rta;
828 struct rt6_info *rt = NULL;
829 struct net_device *dev = NULL;
830 struct inet6_dev *idev = NULL;
831 int addr_type;
832
833 rta = (struct rtattr **) _rtattr;
834
835 if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128)
836 return -EINVAL;
837#ifndef CONFIG_IPV6_SUBTREES
838 if (rtmsg->rtmsg_src_len)
839 return -EINVAL;
840#endif
841 if (rtmsg->rtmsg_ifindex) {
842 err = -ENODEV;
843 dev = dev_get_by_index(rtmsg->rtmsg_ifindex);
844 if (!dev)
845 goto out;
846 idev = in6_dev_get(dev);
847 if (!idev)
848 goto out;
849 }
850
851 if (rtmsg->rtmsg_metric == 0)
852 rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
853
854 rt = ip6_dst_alloc();
855
856 if (rt == NULL) {
857 err = -ENOMEM;
858 goto out;
859 }
860
861 rt->u.dst.obsolete = -1;
YOSHIFUJI Hideaki3dd4bc62005-12-19 14:02:45 -0800862 rt->rt6i_expires = jiffies + clock_t_to_jiffies(rtmsg->rtmsg_info);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700863 if (nlh && (r = NLMSG_DATA(nlh))) {
864 rt->rt6i_protocol = r->rtm_protocol;
865 } else {
866 rt->rt6i_protocol = RTPROT_BOOT;
867 }
868
869 addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst);
870
871 if (addr_type & IPV6_ADDR_MULTICAST)
872 rt->u.dst.input = ip6_mc_input;
873 else
874 rt->u.dst.input = ip6_forward;
875
876 rt->u.dst.output = ip6_output;
877
878 ipv6_addr_prefix(&rt->rt6i_dst.addr,
879 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len);
880 rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len;
881 if (rt->rt6i_dst.plen == 128)
882 rt->u.dst.flags = DST_HOST;
883
884#ifdef CONFIG_IPV6_SUBTREES
885 ipv6_addr_prefix(&rt->rt6i_src.addr,
886 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
887 rt->rt6i_src.plen = rtmsg->rtmsg_src_len;
888#endif
889
890 rt->rt6i_metric = rtmsg->rtmsg_metric;
891
892 /* We cannot add true routes via loopback here,
893 they would result in kernel looping; promote them to reject routes
894 */
895 if ((rtmsg->rtmsg_flags&RTF_REJECT) ||
896 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
897 /* hold loopback dev/idev if we haven't done so. */
898 if (dev != &loopback_dev) {
899 if (dev) {
900 dev_put(dev);
901 in6_dev_put(idev);
902 }
903 dev = &loopback_dev;
904 dev_hold(dev);
905 idev = in6_dev_get(dev);
906 if (!idev) {
907 err = -ENODEV;
908 goto out;
909 }
910 }
911 rt->u.dst.output = ip6_pkt_discard_out;
912 rt->u.dst.input = ip6_pkt_discard;
913 rt->u.dst.error = -ENETUNREACH;
914 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
915 goto install_route;
916 }
917
918 if (rtmsg->rtmsg_flags & RTF_GATEWAY) {
919 struct in6_addr *gw_addr;
920 int gwa_type;
921
922 gw_addr = &rtmsg->rtmsg_gateway;
923 ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway);
924 gwa_type = ipv6_addr_type(gw_addr);
925
926 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
927 struct rt6_info *grt;
928
929 /* IPv6 strictly inhibits using not link-local
930 addresses as nexthop address.
931 Otherwise, router will not able to send redirects.
932 It is very good, but in some (rare!) circumstances
933 (SIT, PtP, NBMA NOARP links) it is handy to allow
934 some exceptions. --ANK
935 */
936 err = -EINVAL;
937 if (!(gwa_type&IPV6_ADDR_UNICAST))
938 goto out;
939
940 grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1);
941
942 err = -EHOSTUNREACH;
943 if (grt == NULL)
944 goto out;
945 if (dev) {
946 if (dev != grt->rt6i_dev) {
947 dst_release(&grt->u.dst);
948 goto out;
949 }
950 } else {
951 dev = grt->rt6i_dev;
952 idev = grt->rt6i_idev;
953 dev_hold(dev);
954 in6_dev_hold(grt->rt6i_idev);
955 }
956 if (!(grt->rt6i_flags&RTF_GATEWAY))
957 err = 0;
958 dst_release(&grt->u.dst);
959
960 if (err)
961 goto out;
962 }
963 err = -EINVAL;
964 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
965 goto out;
966 }
967
968 err = -ENODEV;
969 if (dev == NULL)
970 goto out;
971
972 if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) {
973 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
974 if (IS_ERR(rt->rt6i_nexthop)) {
975 err = PTR_ERR(rt->rt6i_nexthop);
976 rt->rt6i_nexthop = NULL;
977 goto out;
978 }
979 }
980
981 rt->rt6i_flags = rtmsg->rtmsg_flags;
982
983install_route:
984 if (rta && rta[RTA_METRICS-1]) {
985 int attrlen = RTA_PAYLOAD(rta[RTA_METRICS-1]);
986 struct rtattr *attr = RTA_DATA(rta[RTA_METRICS-1]);
987
988 while (RTA_OK(attr, attrlen)) {
989 unsigned flavor = attr->rta_type;
990 if (flavor) {
991 if (flavor > RTAX_MAX) {
992 err = -EINVAL;
993 goto out;
994 }
995 rt->u.dst.metrics[flavor-1] =
996 *(u32 *)RTA_DATA(attr);
997 }
998 attr = RTA_NEXT(attr, attrlen);
999 }
1000 }
1001
1002 if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
1003 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1004 if (!rt->u.dst.metrics[RTAX_MTU-1])
1005 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1006 if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
1007 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1008 rt->u.dst.dev = dev;
1009 rt->rt6i_idev = idev;
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001010 return ip6_ins_rt(rt, nlh, _rtattr, req);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001011
1012out:
1013 if (dev)
1014 dev_put(dev);
1015 if (idev)
1016 in6_dev_put(idev);
1017 if (rt)
1018 dst_free((struct dst_entry *) rt);
1019 return err;
1020}
1021
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001022int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001023{
1024 int err;
1025
1026 write_lock_bh(&rt6_lock);
1027
1028 rt6_reset_dflt_pointer(NULL);
1029
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001030 err = fib6_del(rt, nlh, _rtattr, req);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001031 dst_release(&rt->u.dst);
1032
1033 write_unlock_bh(&rt6_lock);
1034
1035 return err;
1036}
1037
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001038static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001039{
1040 struct fib6_node *fn;
1041 struct rt6_info *rt;
1042 int err = -ESRCH;
1043
1044 read_lock_bh(&rt6_lock);
1045
1046 fn = fib6_locate(&ip6_routing_table,
1047 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len,
1048 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
1049
1050 if (fn) {
1051 for (rt = fn->leaf; rt; rt = rt->u.next) {
1052 if (rtmsg->rtmsg_ifindex &&
1053 (rt->rt6i_dev == NULL ||
1054 rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex))
1055 continue;
1056 if (rtmsg->rtmsg_flags&RTF_GATEWAY &&
1057 !ipv6_addr_equal(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway))
1058 continue;
1059 if (rtmsg->rtmsg_metric &&
1060 rtmsg->rtmsg_metric != rt->rt6i_metric)
1061 continue;
1062 dst_hold(&rt->u.dst);
1063 read_unlock_bh(&rt6_lock);
1064
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001065 return ip6_del_rt(rt, nlh, _rtattr, req);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001066 }
1067 }
1068 read_unlock_bh(&rt6_lock);
1069
1070 return err;
1071}
1072
1073/*
1074 * Handle redirects
1075 */
1076void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
1077 struct neighbour *neigh, u8 *lladdr, int on_link)
1078{
1079 struct rt6_info *rt, *nrt;
1080
1081 /* Locate old route to this destination. */
1082 rt = rt6_lookup(dest, NULL, neigh->dev->ifindex, 1);
1083
1084 if (rt == NULL)
1085 return;
1086
1087 if (neigh->dev != rt->rt6i_dev)
1088 goto out;
1089
1090 /*
1091 * Current route is on-link; redirect is always invalid.
1092 *
1093 * Seems, previous statement is not true. It could
1094 * be node, which looks for us as on-link (f.e. proxy ndisc)
1095 * But then router serving it might decide, that we should
1096 * know truth 8)8) --ANK (980726).
1097 */
1098 if (!(rt->rt6i_flags&RTF_GATEWAY))
1099 goto out;
1100
1101 /*
1102 * RFC 2461 specifies that redirects should only be
1103 * accepted if they come from the nexthop to the target.
1104 * Due to the way default routers are chosen, this notion
1105 * is a bit fuzzy and one might need to check all default
1106 * routers.
1107 */
1108 if (!ipv6_addr_equal(saddr, &rt->rt6i_gateway)) {
1109 if (rt->rt6i_flags & RTF_DEFAULT) {
1110 struct rt6_info *rt1;
1111
1112 read_lock(&rt6_lock);
1113 for (rt1 = ip6_routing_table.leaf; rt1; rt1 = rt1->u.next) {
1114 if (ipv6_addr_equal(saddr, &rt1->rt6i_gateway)) {
1115 dst_hold(&rt1->u.dst);
1116 dst_release(&rt->u.dst);
1117 read_unlock(&rt6_lock);
1118 rt = rt1;
1119 goto source_ok;
1120 }
1121 }
1122 read_unlock(&rt6_lock);
1123 }
1124 if (net_ratelimit())
1125 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1126 "for redirect target\n");
1127 goto out;
1128 }
1129
1130source_ok:
1131
1132 /*
1133 * We have finally decided to accept it.
1134 */
1135
1136 neigh_update(neigh, lladdr, NUD_STALE,
1137 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1138 NEIGH_UPDATE_F_OVERRIDE|
1139 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1140 NEIGH_UPDATE_F_ISROUTER))
1141 );
1142
1143 /*
1144 * Redirect received -> path was valid.
1145 * Look, redirects are sent only in response to data packets,
1146 * so that this nexthop apparently is reachable. --ANK
1147 */
1148 dst_confirm(&rt->u.dst);
1149
1150 /* Duplicate redirect: silently ignore. */
1151 if (neigh == rt->u.dst.neighbour)
1152 goto out;
1153
1154 nrt = ip6_rt_copy(rt);
1155 if (nrt == NULL)
1156 goto out;
1157
1158 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1159 if (on_link)
1160 nrt->rt6i_flags &= ~RTF_GATEWAY;
1161
1162 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1163 nrt->rt6i_dst.plen = 128;
1164 nrt->u.dst.flags |= DST_HOST;
1165
1166 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1167 nrt->rt6i_nexthop = neigh_clone(neigh);
1168 /* Reset pmtu, it may be better */
1169 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1170 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1171
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001172 if (ip6_ins_rt(nrt, NULL, NULL, NULL))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001173 goto out;
1174
1175 if (rt->rt6i_flags&RTF_CACHE) {
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001176 ip6_del_rt(rt, NULL, NULL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001177 return;
1178 }
1179
1180out:
1181 dst_release(&rt->u.dst);
1182 return;
1183}
1184
1185/*
1186 * Handle ICMP "packet too big" messages
1187 * i.e. Path MTU discovery
1188 */
1189
1190void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1191 struct net_device *dev, u32 pmtu)
1192{
1193 struct rt6_info *rt, *nrt;
1194 int allfrag = 0;
1195
1196 rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1197 if (rt == NULL)
1198 return;
1199
1200 if (pmtu >= dst_mtu(&rt->u.dst))
1201 goto out;
1202
1203 if (pmtu < IPV6_MIN_MTU) {
1204 /*
1205 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1206 * MTU (1280) and a fragment header should always be included
1207 * after a node receiving Too Big message reporting PMTU is
1208 * less than the IPv6 Minimum Link MTU.
1209 */
1210 pmtu = IPV6_MIN_MTU;
1211 allfrag = 1;
1212 }
1213
1214 /* New mtu received -> path was valid.
1215 They are sent only in response to data packets,
1216 so that this nexthop apparently is reachable. --ANK
1217 */
1218 dst_confirm(&rt->u.dst);
1219
1220 /* Host route. If it is static, it would be better
1221 not to override it, but add new one, so that
1222 when cache entry will expire old pmtu
1223 would return automatically.
1224 */
1225 if (rt->rt6i_flags & RTF_CACHE) {
1226 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1227 if (allfrag)
1228 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1229 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1230 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1231 goto out;
1232 }
1233
1234 /* Network route.
1235 Two cases are possible:
1236 1. It is connected route. Action: COW
1237 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1238 */
YOSHIFUJI Hideakid5315b52006-03-20 16:58:48 -08001239 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001240 nrt = rt6_alloc_cow(rt, daddr, saddr);
YOSHIFUJI Hideakid5315b52006-03-20 16:58:48 -08001241 else
1242 nrt = rt6_alloc_clone(rt, daddr);
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001243
YOSHIFUJI Hideakid5315b52006-03-20 16:58:48 -08001244 if (nrt) {
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001245 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1246 if (allfrag)
1247 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1248
1249 /* According to RFC 1981, detecting PMTU increase shouldn't be
1250 * happened within 5 mins, the recommended timer is 10 mins.
1251 * Here this route expiration time is set to ip6_rt_mtu_expires
1252 * which is 10 mins. After 10 mins the decreased pmtu is expired
1253 * and detecting PMTU increase will be automatically happened.
1254 */
1255 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1256 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1257
1258 ip6_ins_rt(nrt, NULL, NULL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001259 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001260out:
1261 dst_release(&rt->u.dst);
1262}
1263
1264/*
1265 * Misc support functions
1266 */
1267
1268static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1269{
1270 struct rt6_info *rt = ip6_dst_alloc();
1271
1272 if (rt) {
1273 rt->u.dst.input = ort->u.dst.input;
1274 rt->u.dst.output = ort->u.dst.output;
1275
1276 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1277 rt->u.dst.dev = ort->u.dst.dev;
1278 if (rt->u.dst.dev)
1279 dev_hold(rt->u.dst.dev);
1280 rt->rt6i_idev = ort->rt6i_idev;
1281 if (rt->rt6i_idev)
1282 in6_dev_hold(rt->rt6i_idev);
1283 rt->u.dst.lastuse = jiffies;
1284 rt->rt6i_expires = 0;
1285
1286 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1287 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1288 rt->rt6i_metric = 0;
1289
1290 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1291#ifdef CONFIG_IPV6_SUBTREES
1292 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1293#endif
1294 }
1295 return rt;
1296}
1297
1298struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1299{
1300 struct rt6_info *rt;
1301 struct fib6_node *fn;
1302
1303 fn = &ip6_routing_table;
1304
1305 write_lock_bh(&rt6_lock);
1306 for (rt = fn->leaf; rt; rt=rt->u.next) {
1307 if (dev == rt->rt6i_dev &&
1308 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1309 break;
1310 }
1311 if (rt)
1312 dst_hold(&rt->u.dst);
1313 write_unlock_bh(&rt6_lock);
1314 return rt;
1315}
1316
1317struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1318 struct net_device *dev)
1319{
1320 struct in6_rtmsg rtmsg;
1321
1322 memset(&rtmsg, 0, sizeof(struct in6_rtmsg));
1323 rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1324 ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1325 rtmsg.rtmsg_metric = 1024;
1326 rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES;
1327
1328 rtmsg.rtmsg_ifindex = dev->ifindex;
1329
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001330 ip6_route_add(&rtmsg, NULL, NULL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001331 return rt6_get_dflt_router(gwaddr, dev);
1332}
1333
1334void rt6_purge_dflt_routers(void)
1335{
1336 struct rt6_info *rt;
1337
1338restart:
1339 read_lock_bh(&rt6_lock);
1340 for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) {
1341 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1342 dst_hold(&rt->u.dst);
1343
1344 rt6_reset_dflt_pointer(NULL);
1345
1346 read_unlock_bh(&rt6_lock);
1347
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001348 ip6_del_rt(rt, NULL, NULL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001349
1350 goto restart;
1351 }
1352 }
1353 read_unlock_bh(&rt6_lock);
1354}
1355
1356int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1357{
1358 struct in6_rtmsg rtmsg;
1359 int err;
1360
1361 switch(cmd) {
1362 case SIOCADDRT: /* Add a route */
1363 case SIOCDELRT: /* Delete a route */
1364 if (!capable(CAP_NET_ADMIN))
1365 return -EPERM;
1366 err = copy_from_user(&rtmsg, arg,
1367 sizeof(struct in6_rtmsg));
1368 if (err)
1369 return -EFAULT;
1370
1371 rtnl_lock();
1372 switch (cmd) {
1373 case SIOCADDRT:
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001374 err = ip6_route_add(&rtmsg, NULL, NULL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001375 break;
1376 case SIOCDELRT:
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001377 err = ip6_route_del(&rtmsg, NULL, NULL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001378 break;
1379 default:
1380 err = -EINVAL;
1381 }
1382 rtnl_unlock();
1383
1384 return err;
1385 };
1386
1387 return -EINVAL;
1388}
1389
1390/*
1391 * Drop the packet on the floor
1392 */
1393
Arnaldo Carvalho de Melo20380732005-08-16 02:18:02 -03001394static int ip6_pkt_discard(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001395{
1396 IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
1397 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
1398 kfree_skb(skb);
1399 return 0;
1400}
1401
Arnaldo Carvalho de Melo20380732005-08-16 02:18:02 -03001402static int ip6_pkt_discard_out(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001403{
1404 skb->dev = skb->dst->dev;
1405 return ip6_pkt_discard(skb);
1406}
1407
1408/*
1409 * Allocate a dst for local (unicast / anycast) address.
1410 */
1411
1412struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1413 const struct in6_addr *addr,
1414 int anycast)
1415{
1416 struct rt6_info *rt = ip6_dst_alloc();
1417
1418 if (rt == NULL)
1419 return ERR_PTR(-ENOMEM);
1420
1421 dev_hold(&loopback_dev);
1422 in6_dev_hold(idev);
1423
1424 rt->u.dst.flags = DST_HOST;
1425 rt->u.dst.input = ip6_input;
1426 rt->u.dst.output = ip6_output;
1427 rt->rt6i_dev = &loopback_dev;
1428 rt->rt6i_idev = idev;
1429 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1430 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1431 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1432 rt->u.dst.obsolete = -1;
1433
1434 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +09001435 if (anycast)
1436 rt->rt6i_flags |= RTF_ANYCAST;
1437 else
Linus Torvalds1da177e2005-04-16 15:20:36 -07001438 rt->rt6i_flags |= RTF_LOCAL;
1439 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1440 if (rt->rt6i_nexthop == NULL) {
1441 dst_free((struct dst_entry *) rt);
1442 return ERR_PTR(-ENOMEM);
1443 }
1444
1445 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1446 rt->rt6i_dst.plen = 128;
1447
1448 atomic_set(&rt->u.dst.__refcnt, 1);
1449
1450 return rt;
1451}
1452
1453static int fib6_ifdown(struct rt6_info *rt, void *arg)
1454{
1455 if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1456 rt != &ip6_null_entry) {
1457 RT6_TRACE("deleted by ifdown %p\n", rt);
1458 return -1;
1459 }
1460 return 0;
1461}
1462
1463void rt6_ifdown(struct net_device *dev)
1464{
1465 write_lock_bh(&rt6_lock);
1466 fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev);
1467 write_unlock_bh(&rt6_lock);
1468}
1469
1470struct rt6_mtu_change_arg
1471{
1472 struct net_device *dev;
1473 unsigned mtu;
1474};
1475
1476static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1477{
1478 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1479 struct inet6_dev *idev;
1480
1481 /* In IPv6 pmtu discovery is not optional,
1482 so that RTAX_MTU lock cannot disable it.
1483 We still use this lock to block changes
1484 caused by addrconf/ndisc.
1485 */
1486
1487 idev = __in6_dev_get(arg->dev);
1488 if (idev == NULL)
1489 return 0;
1490
1491 /* For administrative MTU increase, there is no way to discover
1492 IPv6 PMTU increase, so PMTU increase should be updated here.
1493 Since RFC 1981 doesn't include administrative MTU increase
1494 update PMTU increase is a MUST. (i.e. jumbo frame)
1495 */
1496 /*
1497 If new MTU is less than route PMTU, this new MTU will be the
1498 lowest MTU in the path, update the route PMTU to reflect PMTU
1499 decreases; if new MTU is greater than route PMTU, and the
1500 old MTU is the lowest MTU in the path, update the route PMTU
1501 to reflect the increase. In this case if the other nodes' MTU
1502 also have the lowest MTU, TOO BIG MESSAGE will be lead to
1503 PMTU discouvery.
1504 */
1505 if (rt->rt6i_dev == arg->dev &&
1506 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1507 (dst_mtu(&rt->u.dst) > arg->mtu ||
1508 (dst_mtu(&rt->u.dst) < arg->mtu &&
1509 dst_mtu(&rt->u.dst) == idev->cnf.mtu6)))
1510 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1511 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1512 return 0;
1513}
1514
1515void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1516{
1517 struct rt6_mtu_change_arg arg;
1518
1519 arg.dev = dev;
1520 arg.mtu = mtu;
1521 read_lock_bh(&rt6_lock);
1522 fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg);
1523 read_unlock_bh(&rt6_lock);
1524}
1525
1526static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta,
1527 struct in6_rtmsg *rtmsg)
1528{
1529 memset(rtmsg, 0, sizeof(*rtmsg));
1530
1531 rtmsg->rtmsg_dst_len = r->rtm_dst_len;
1532 rtmsg->rtmsg_src_len = r->rtm_src_len;
1533 rtmsg->rtmsg_flags = RTF_UP;
1534 if (r->rtm_type == RTN_UNREACHABLE)
1535 rtmsg->rtmsg_flags |= RTF_REJECT;
1536
1537 if (rta[RTA_GATEWAY-1]) {
1538 if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16))
1539 return -EINVAL;
1540 memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16);
1541 rtmsg->rtmsg_flags |= RTF_GATEWAY;
1542 }
1543 if (rta[RTA_DST-1]) {
1544 if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3))
1545 return -EINVAL;
1546 memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3));
1547 }
1548 if (rta[RTA_SRC-1]) {
1549 if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3))
1550 return -EINVAL;
1551 memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3));
1552 }
1553 if (rta[RTA_OIF-1]) {
1554 if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int)))
1555 return -EINVAL;
1556 memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1557 }
1558 if (rta[RTA_PRIORITY-1]) {
1559 if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4))
1560 return -EINVAL;
1561 memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
1562 }
1563 return 0;
1564}
1565
1566int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1567{
1568 struct rtmsg *r = NLMSG_DATA(nlh);
1569 struct in6_rtmsg rtmsg;
1570
1571 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1572 return -EINVAL;
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001573 return ip6_route_del(&rtmsg, nlh, arg, &NETLINK_CB(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001574}
1575
1576int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1577{
1578 struct rtmsg *r = NLMSG_DATA(nlh);
1579 struct in6_rtmsg rtmsg;
1580
1581 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1582 return -EINVAL;
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001583 return ip6_route_add(&rtmsg, nlh, arg, &NETLINK_CB(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001584}
1585
1586struct rt6_rtnl_dump_arg
1587{
1588 struct sk_buff *skb;
1589 struct netlink_callback *cb;
1590};
1591
1592static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001593 struct in6_addr *dst, struct in6_addr *src,
1594 int iif, int type, u32 pid, u32 seq,
1595 int prefix, unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001596{
1597 struct rtmsg *rtm;
1598 struct nlmsghdr *nlh;
1599 unsigned char *b = skb->tail;
1600 struct rta_cacheinfo ci;
1601
1602 if (prefix) { /* user wants prefix routes only */
1603 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
1604 /* success since this is not a prefix route */
1605 return 1;
1606 }
1607 }
1608
Jamal Hadi Salimb6544c02005-06-18 22:54:12 -07001609 nlh = NLMSG_NEW(skb, pid, seq, type, sizeof(*rtm), flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001610 rtm = NLMSG_DATA(nlh);
1611 rtm->rtm_family = AF_INET6;
1612 rtm->rtm_dst_len = rt->rt6i_dst.plen;
1613 rtm->rtm_src_len = rt->rt6i_src.plen;
1614 rtm->rtm_tos = 0;
1615 rtm->rtm_table = RT_TABLE_MAIN;
1616 if (rt->rt6i_flags&RTF_REJECT)
1617 rtm->rtm_type = RTN_UNREACHABLE;
1618 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
1619 rtm->rtm_type = RTN_LOCAL;
1620 else
1621 rtm->rtm_type = RTN_UNICAST;
1622 rtm->rtm_flags = 0;
1623 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1624 rtm->rtm_protocol = rt->rt6i_protocol;
1625 if (rt->rt6i_flags&RTF_DYNAMIC)
1626 rtm->rtm_protocol = RTPROT_REDIRECT;
1627 else if (rt->rt6i_flags & RTF_ADDRCONF)
1628 rtm->rtm_protocol = RTPROT_KERNEL;
1629 else if (rt->rt6i_flags&RTF_DEFAULT)
1630 rtm->rtm_protocol = RTPROT_RA;
1631
1632 if (rt->rt6i_flags&RTF_CACHE)
1633 rtm->rtm_flags |= RTM_F_CLONED;
1634
1635 if (dst) {
1636 RTA_PUT(skb, RTA_DST, 16, dst);
1637 rtm->rtm_dst_len = 128;
1638 } else if (rtm->rtm_dst_len)
1639 RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1640#ifdef CONFIG_IPV6_SUBTREES
1641 if (src) {
1642 RTA_PUT(skb, RTA_SRC, 16, src);
1643 rtm->rtm_src_len = 128;
1644 } else if (rtm->rtm_src_len)
1645 RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1646#endif
1647 if (iif)
1648 RTA_PUT(skb, RTA_IIF, 4, &iif);
1649 else if (dst) {
1650 struct in6_addr saddr_buf;
1651 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
1652 RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1653 }
1654 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
1655 goto rtattr_failure;
1656 if (rt->u.dst.neighbour)
1657 RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
1658 if (rt->u.dst.dev)
1659 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex);
1660 RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric);
1661 ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
1662 if (rt->rt6i_expires)
1663 ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
1664 else
1665 ci.rta_expires = 0;
1666 ci.rta_used = rt->u.dst.__use;
1667 ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
1668 ci.rta_error = rt->u.dst.error;
1669 ci.rta_id = 0;
1670 ci.rta_ts = 0;
1671 ci.rta_tsage = 0;
1672 RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
1673 nlh->nlmsg_len = skb->tail - b;
1674 return skb->len;
1675
1676nlmsg_failure:
1677rtattr_failure:
1678 skb_trim(skb, b - skb->data);
1679 return -1;
1680}
1681
1682static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1683{
1684 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1685 int prefix;
1686
1687 if (arg->cb->nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(struct rtmsg))) {
1688 struct rtmsg *rtm = NLMSG_DATA(arg->cb->nlh);
1689 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
1690 } else
1691 prefix = 0;
1692
1693 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1694 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001695 prefix, NLM_F_MULTI);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001696}
1697
1698static int fib6_dump_node(struct fib6_walker_t *w)
1699{
1700 int res;
1701 struct rt6_info *rt;
1702
1703 for (rt = w->leaf; rt; rt = rt->u.next) {
1704 res = rt6_dump_route(rt, w->args);
1705 if (res < 0) {
1706 /* Frame is full, suspend walking */
1707 w->leaf = rt;
1708 return 1;
1709 }
1710 BUG_TRAP(res!=0);
1711 }
1712 w->leaf = NULL;
1713 return 0;
1714}
1715
1716static void fib6_dump_end(struct netlink_callback *cb)
1717{
1718 struct fib6_walker_t *w = (void*)cb->args[0];
1719
1720 if (w) {
1721 cb->args[0] = 0;
1722 fib6_walker_unlink(w);
1723 kfree(w);
1724 }
Herbert Xuefacfbc2005-11-12 12:12:05 -08001725 cb->done = (void*)cb->args[1];
1726 cb->args[1] = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001727}
1728
1729static int fib6_dump_done(struct netlink_callback *cb)
1730{
1731 fib6_dump_end(cb);
Thomas Grafa8f74b22005-11-10 02:25:52 +01001732 return cb->done ? cb->done(cb) : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001733}
1734
1735int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1736{
1737 struct rt6_rtnl_dump_arg arg;
1738 struct fib6_walker_t *w;
1739 int res;
1740
1741 arg.skb = skb;
1742 arg.cb = cb;
1743
1744 w = (void*)cb->args[0];
1745 if (w == NULL) {
1746 /* New dump:
1747 *
1748 * 1. hook callback destructor.
1749 */
1750 cb->args[1] = (long)cb->done;
1751 cb->done = fib6_dump_done;
1752
1753 /*
1754 * 2. allocate and initialize walker.
1755 */
David S. Miller9e147a12005-11-17 16:52:51 -08001756 w = kmalloc(sizeof(*w), GFP_ATOMIC);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001757 if (w == NULL)
1758 return -ENOMEM;
1759 RT6_TRACE("dump<%p", w);
1760 memset(w, 0, sizeof(*w));
1761 w->root = &ip6_routing_table;
1762 w->func = fib6_dump_node;
1763 w->args = &arg;
1764 cb->args[0] = (long)w;
1765 read_lock_bh(&rt6_lock);
1766 res = fib6_walk(w);
1767 read_unlock_bh(&rt6_lock);
1768 } else {
1769 w->args = &arg;
1770 read_lock_bh(&rt6_lock);
1771 res = fib6_walk_continue(w);
1772 read_unlock_bh(&rt6_lock);
1773 }
1774#if RT6_DEBUG >= 3
1775 if (res <= 0 && skb->len == 0)
1776 RT6_TRACE("%p>dump end\n", w);
1777#endif
1778 res = res < 0 ? res : skb->len;
1779 /* res < 0 is an error. (really, impossible)
1780 res == 0 means that dump is complete, but skb still can contain data.
1781 res > 0 dump is not complete, but frame is full.
1782 */
1783 /* Destroy walker, if dump of this table is complete. */
1784 if (res <= 0)
1785 fib6_dump_end(cb);
1786 return res;
1787}
1788
1789int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1790{
1791 struct rtattr **rta = arg;
1792 int iif = 0;
1793 int err = -ENOBUFS;
1794 struct sk_buff *skb;
1795 struct flowi fl;
1796 struct rt6_info *rt;
1797
1798 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1799 if (skb == NULL)
1800 goto out;
1801
1802 /* Reserve room for dummy headers, this skb can pass
1803 through good chunk of routing engine.
1804 */
1805 skb->mac.raw = skb->data;
1806 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1807
1808 memset(&fl, 0, sizeof(fl));
1809 if (rta[RTA_SRC-1])
1810 ipv6_addr_copy(&fl.fl6_src,
1811 (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]));
1812 if (rta[RTA_DST-1])
1813 ipv6_addr_copy(&fl.fl6_dst,
1814 (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]));
1815
1816 if (rta[RTA_IIF-1])
1817 memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
1818
1819 if (iif) {
1820 struct net_device *dev;
1821 dev = __dev_get_by_index(iif);
1822 if (!dev) {
1823 err = -ENODEV;
1824 goto out_free;
1825 }
1826 }
1827
1828 fl.oif = 0;
1829 if (rta[RTA_OIF-1])
1830 memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1831
1832 rt = (struct rt6_info*)ip6_route_output(NULL, &fl);
1833
1834 skb->dst = &rt->u.dst;
1835
1836 NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
1837 err = rt6_fill_node(skb, rt,
1838 &fl.fl6_dst, &fl.fl6_src,
1839 iif,
1840 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001841 nlh->nlmsg_seq, 0, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001842 if (err < 0) {
1843 err = -EMSGSIZE;
1844 goto out_free;
1845 }
1846
1847 err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
1848 if (err > 0)
1849 err = 0;
1850out:
1851 return err;
1852out_free:
1853 kfree_skb(skb);
1854 goto out;
1855}
1856
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001857void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh,
1858 struct netlink_skb_parms *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001859{
1860 struct sk_buff *skb;
1861 int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001862 u32 pid = current->pid;
1863 u32 seq = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001864
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001865 if (req)
1866 pid = req->pid;
1867 if (nlh)
1868 seq = nlh->nlmsg_seq;
1869
Linus Torvalds1da177e2005-04-16 15:20:36 -07001870 skb = alloc_skb(size, gfp_any());
1871 if (!skb) {
Patrick McHardyac6d4392005-08-14 19:29:52 -07001872 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, ENOBUFS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001873 return;
1874 }
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001875 if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0) < 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001876 kfree_skb(skb);
Patrick McHardyac6d4392005-08-14 19:29:52 -07001877 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, EINVAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001878 return;
1879 }
Patrick McHardyac6d4392005-08-14 19:29:52 -07001880 NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_ROUTE;
1881 netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_ROUTE, gfp_any());
Linus Torvalds1da177e2005-04-16 15:20:36 -07001882}
1883
1884/*
1885 * /proc
1886 */
1887
1888#ifdef CONFIG_PROC_FS
1889
1890#define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
1891
1892struct rt6_proc_arg
1893{
1894 char *buffer;
1895 int offset;
1896 int length;
1897 int skip;
1898 int len;
1899};
1900
1901static int rt6_info_route(struct rt6_info *rt, void *p_arg)
1902{
1903 struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
1904 int i;
1905
1906 if (arg->skip < arg->offset / RT6_INFO_LEN) {
1907 arg->skip++;
1908 return 0;
1909 }
1910
1911 if (arg->len >= arg->length)
1912 return 0;
1913
1914 for (i=0; i<16; i++) {
1915 sprintf(arg->buffer + arg->len, "%02x",
1916 rt->rt6i_dst.addr.s6_addr[i]);
1917 arg->len += 2;
1918 }
1919 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1920 rt->rt6i_dst.plen);
1921
1922#ifdef CONFIG_IPV6_SUBTREES
1923 for (i=0; i<16; i++) {
1924 sprintf(arg->buffer + arg->len, "%02x",
1925 rt->rt6i_src.addr.s6_addr[i]);
1926 arg->len += 2;
1927 }
1928 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1929 rt->rt6i_src.plen);
1930#else
1931 sprintf(arg->buffer + arg->len,
1932 "00000000000000000000000000000000 00 ");
1933 arg->len += 36;
1934#endif
1935
1936 if (rt->rt6i_nexthop) {
1937 for (i=0; i<16; i++) {
1938 sprintf(arg->buffer + arg->len, "%02x",
1939 rt->rt6i_nexthop->primary_key[i]);
1940 arg->len += 2;
1941 }
1942 } else {
1943 sprintf(arg->buffer + arg->len,
1944 "00000000000000000000000000000000");
1945 arg->len += 32;
1946 }
1947 arg->len += sprintf(arg->buffer + arg->len,
1948 " %08x %08x %08x %08x %8s\n",
1949 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
1950 rt->u.dst.__use, rt->rt6i_flags,
1951 rt->rt6i_dev ? rt->rt6i_dev->name : "");
1952 return 0;
1953}
1954
1955static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
1956{
1957 struct rt6_proc_arg arg;
1958 arg.buffer = buffer;
1959 arg.offset = offset;
1960 arg.length = length;
1961 arg.skip = 0;
1962 arg.len = 0;
1963
1964 read_lock_bh(&rt6_lock);
1965 fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg);
1966 read_unlock_bh(&rt6_lock);
1967
1968 *start = buffer;
1969 if (offset)
1970 *start += offset % RT6_INFO_LEN;
1971
1972 arg.len -= offset % RT6_INFO_LEN;
1973
1974 if (arg.len > length)
1975 arg.len = length;
1976 if (arg.len < 0)
1977 arg.len = 0;
1978
1979 return arg.len;
1980}
1981
Linus Torvalds1da177e2005-04-16 15:20:36 -07001982static int rt6_stats_seq_show(struct seq_file *seq, void *v)
1983{
1984 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
1985 rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
1986 rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
1987 rt6_stats.fib_rt_cache,
1988 atomic_read(&ip6_dst_ops.entries),
1989 rt6_stats.fib_discarded_routes);
1990
1991 return 0;
1992}
1993
1994static int rt6_stats_seq_open(struct inode *inode, struct file *file)
1995{
1996 return single_open(file, rt6_stats_seq_show, NULL);
1997}
1998
1999static struct file_operations rt6_stats_seq_fops = {
2000 .owner = THIS_MODULE,
2001 .open = rt6_stats_seq_open,
2002 .read = seq_read,
2003 .llseek = seq_lseek,
2004 .release = single_release,
2005};
2006#endif /* CONFIG_PROC_FS */
2007
2008#ifdef CONFIG_SYSCTL
2009
2010static int flush_delay;
2011
2012static
2013int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2014 void __user *buffer, size_t *lenp, loff_t *ppos)
2015{
2016 if (write) {
2017 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2018 fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
2019 return 0;
2020 } else
2021 return -EINVAL;
2022}
2023
2024ctl_table ipv6_route_table[] = {
2025 {
2026 .ctl_name = NET_IPV6_ROUTE_FLUSH,
2027 .procname = "flush",
2028 .data = &flush_delay,
2029 .maxlen = sizeof(int),
Dave Jones89c8b3a2005-04-28 12:11:49 -07002030 .mode = 0200,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002031 .proc_handler = &ipv6_sysctl_rtcache_flush
2032 },
2033 {
2034 .ctl_name = NET_IPV6_ROUTE_GC_THRESH,
2035 .procname = "gc_thresh",
2036 .data = &ip6_dst_ops.gc_thresh,
2037 .maxlen = sizeof(int),
2038 .mode = 0644,
2039 .proc_handler = &proc_dointvec,
2040 },
2041 {
2042 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE,
2043 .procname = "max_size",
2044 .data = &ip6_rt_max_size,
2045 .maxlen = sizeof(int),
2046 .mode = 0644,
2047 .proc_handler = &proc_dointvec,
2048 },
2049 {
2050 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2051 .procname = "gc_min_interval",
2052 .data = &ip6_rt_gc_min_interval,
2053 .maxlen = sizeof(int),
2054 .mode = 0644,
2055 .proc_handler = &proc_dointvec_jiffies,
2056 .strategy = &sysctl_jiffies,
2057 },
2058 {
2059 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT,
2060 .procname = "gc_timeout",
2061 .data = &ip6_rt_gc_timeout,
2062 .maxlen = sizeof(int),
2063 .mode = 0644,
2064 .proc_handler = &proc_dointvec_jiffies,
2065 .strategy = &sysctl_jiffies,
2066 },
2067 {
2068 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL,
2069 .procname = "gc_interval",
2070 .data = &ip6_rt_gc_interval,
2071 .maxlen = sizeof(int),
2072 .mode = 0644,
2073 .proc_handler = &proc_dointvec_jiffies,
2074 .strategy = &sysctl_jiffies,
2075 },
2076 {
2077 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY,
2078 .procname = "gc_elasticity",
2079 .data = &ip6_rt_gc_elasticity,
2080 .maxlen = sizeof(int),
2081 .mode = 0644,
2082 .proc_handler = &proc_dointvec_jiffies,
2083 .strategy = &sysctl_jiffies,
2084 },
2085 {
2086 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES,
2087 .procname = "mtu_expires",
2088 .data = &ip6_rt_mtu_expires,
2089 .maxlen = sizeof(int),
2090 .mode = 0644,
2091 .proc_handler = &proc_dointvec_jiffies,
2092 .strategy = &sysctl_jiffies,
2093 },
2094 {
2095 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS,
2096 .procname = "min_adv_mss",
2097 .data = &ip6_rt_min_advmss,
2098 .maxlen = sizeof(int),
2099 .mode = 0644,
2100 .proc_handler = &proc_dointvec_jiffies,
2101 .strategy = &sysctl_jiffies,
2102 },
2103 {
2104 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2105 .procname = "gc_min_interval_ms",
2106 .data = &ip6_rt_gc_min_interval,
2107 .maxlen = sizeof(int),
2108 .mode = 0644,
2109 .proc_handler = &proc_dointvec_ms_jiffies,
2110 .strategy = &sysctl_ms_jiffies,
2111 },
2112 { .ctl_name = 0 }
2113};
2114
2115#endif
2116
2117void __init ip6_route_init(void)
2118{
2119 struct proc_dir_entry *p;
2120
2121 ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
2122 sizeof(struct rt6_info),
2123 0, SLAB_HWCACHE_ALIGN,
2124 NULL, NULL);
2125 if (!ip6_dst_ops.kmem_cachep)
2126 panic("cannot create ip6_dst_cache");
2127
2128 fib6_init();
2129#ifdef CONFIG_PROC_FS
2130 p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2131 if (p)
2132 p->owner = THIS_MODULE;
2133
2134 proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2135#endif
2136#ifdef CONFIG_XFRM
2137 xfrm6_init();
2138#endif
2139}
2140
2141void ip6_route_cleanup(void)
2142{
2143#ifdef CONFIG_PROC_FS
2144 proc_net_remove("ipv6_route");
2145 proc_net_remove("rt6_stats");
2146#endif
2147#ifdef CONFIG_XFRM
2148 xfrm6_fini();
2149#endif
2150 rt6_ifdown(NULL);
2151 fib6_gc_cleanup();
2152 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2153}