blob: 061a7bba163a3294d3cf39926cefd89a8e1c54ed [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
16/* Changes:
17 *
18 * YOSHIFUJI Hideaki @USAGI
19 * reworked default router selection.
20 * - respect outgoing interface
21 * - select from (probably) reachable routers (i.e.
22 * routers in REACHABLE, STALE, DELAY or PROBE states).
23 * - always select the same router if it is (probably)
24 * reachable. otherwise, round-robin the list.
25 */
26
Randy Dunlap4fc268d2006-01-11 12:17:47 -080027#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070028#include <linux/config.h>
29#include <linux/errno.h>
30#include <linux/types.h>
31#include <linux/times.h>
32#include <linux/socket.h>
33#include <linux/sockios.h>
34#include <linux/net.h>
35#include <linux/route.h>
36#include <linux/netdevice.h>
37#include <linux/in6.h>
38#include <linux/init.h>
39#include <linux/netlink.h>
40#include <linux/if_arp.h>
41
42#ifdef CONFIG_PROC_FS
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
45#endif
46
47#include <net/snmp.h>
48#include <net/ipv6.h>
49#include <net/ip6_fib.h>
50#include <net/ip6_route.h>
51#include <net/ndisc.h>
52#include <net/addrconf.h>
53#include <net/tcp.h>
54#include <linux/rtnetlink.h>
55#include <net/dst.h>
56#include <net/xfrm.h>
57
58#include <asm/uaccess.h>
59
60#ifdef CONFIG_SYSCTL
61#include <linux/sysctl.h>
62#endif
63
64/* Set to 3 to get tracing. */
65#define RT6_DEBUG 2
66
67#if RT6_DEBUG >= 3
68#define RDBG(x) printk x
69#define RT6_TRACE(x...) printk(KERN_DEBUG x)
70#else
71#define RDBG(x)
72#define RT6_TRACE(x...) do { ; } while (0)
73#endif
74
75
76static int ip6_rt_max_size = 4096;
77static int ip6_rt_gc_min_interval = HZ / 2;
78static int ip6_rt_gc_timeout = 60*HZ;
79int ip6_rt_gc_interval = 30*HZ;
80static int ip6_rt_gc_elasticity = 9;
81static int ip6_rt_mtu_expires = 10*60*HZ;
82static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
83
84static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
85static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
86static struct dst_entry *ip6_negative_advice(struct dst_entry *);
87static void ip6_dst_destroy(struct dst_entry *);
88static void ip6_dst_ifdown(struct dst_entry *,
89 struct net_device *dev, int how);
90static int ip6_dst_gc(void);
91
92static int ip6_pkt_discard(struct sk_buff *skb);
93static int ip6_pkt_discard_out(struct sk_buff *skb);
94static void ip6_link_failure(struct sk_buff *skb);
95static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
96
97static struct dst_ops ip6_dst_ops = {
98 .family = AF_INET6,
99 .protocol = __constant_htons(ETH_P_IPV6),
100 .gc = ip6_dst_gc,
101 .gc_thresh = 1024,
102 .check = ip6_dst_check,
103 .destroy = ip6_dst_destroy,
104 .ifdown = ip6_dst_ifdown,
105 .negative_advice = ip6_negative_advice,
106 .link_failure = ip6_link_failure,
107 .update_pmtu = ip6_rt_update_pmtu,
108 .entry_size = sizeof(struct rt6_info),
109};
110
111struct rt6_info ip6_null_entry = {
112 .u = {
113 .dst = {
114 .__refcnt = ATOMIC_INIT(1),
115 .__use = 1,
116 .dev = &loopback_dev,
117 .obsolete = -1,
118 .error = -ENETUNREACH,
119 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
120 .input = ip6_pkt_discard,
121 .output = ip6_pkt_discard_out,
122 .ops = &ip6_dst_ops,
123 .path = (struct dst_entry*)&ip6_null_entry,
124 }
125 },
126 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
127 .rt6i_metric = ~(u32) 0,
128 .rt6i_ref = ATOMIC_INIT(1),
129};
130
131struct fib6_node ip6_routing_table = {
132 .leaf = &ip6_null_entry,
133 .fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
134};
135
136/* Protects all the ip6 fib */
137
138DEFINE_RWLOCK(rt6_lock);
139
140
141/* allocate dst with ip6_dst_ops */
142static __inline__ struct rt6_info *ip6_dst_alloc(void)
143{
144 return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
145}
146
147static void ip6_dst_destroy(struct dst_entry *dst)
148{
149 struct rt6_info *rt = (struct rt6_info *)dst;
150 struct inet6_dev *idev = rt->rt6i_idev;
151
152 if (idev != NULL) {
153 rt->rt6i_idev = NULL;
154 in6_dev_put(idev);
155 }
156}
157
158static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
159 int how)
160{
161 struct rt6_info *rt = (struct rt6_info *)dst;
162 struct inet6_dev *idev = rt->rt6i_idev;
163
164 if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
165 struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
166 if (loopback_idev != NULL) {
167 rt->rt6i_idev = loopback_idev;
168 in6_dev_put(idev);
169 }
170 }
171}
172
173static __inline__ int rt6_check_expired(const struct rt6_info *rt)
174{
175 return (rt->rt6i_flags & RTF_EXPIRES &&
176 time_after(jiffies, rt->rt6i_expires));
177}
178
179/*
180 * Route lookup. Any rt6_lock is implied.
181 */
182
183static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
184 int oif,
185 int strict)
186{
187 struct rt6_info *local = NULL;
188 struct rt6_info *sprt;
189
190 if (oif) {
191 for (sprt = rt; sprt; sprt = sprt->u.next) {
192 struct net_device *dev = sprt->rt6i_dev;
193 if (dev->ifindex == oif)
194 return sprt;
195 if (dev->flags & IFF_LOOPBACK) {
196 if (sprt->rt6i_idev == NULL ||
197 sprt->rt6i_idev->dev->ifindex != oif) {
198 if (strict && oif)
199 continue;
200 if (local && (!oif ||
201 local->rt6i_idev->dev->ifindex == oif))
202 continue;
203 }
204 local = sprt;
205 }
206 }
207
208 if (local)
209 return local;
210
211 if (strict)
212 return &ip6_null_entry;
213 }
214 return rt;
215}
216
217/*
218 * pointer to the last default router chosen. BH is disabled locally.
219 */
220static struct rt6_info *rt6_dflt_pointer;
221static DEFINE_SPINLOCK(rt6_dflt_lock);
222
223void rt6_reset_dflt_pointer(struct rt6_info *rt)
224{
225 spin_lock_bh(&rt6_dflt_lock);
226 if (rt == NULL || rt == rt6_dflt_pointer) {
227 RT6_TRACE("reset default router: %p->NULL\n", rt6_dflt_pointer);
228 rt6_dflt_pointer = NULL;
229 }
230 spin_unlock_bh(&rt6_dflt_lock);
231}
232
233/* Default Router Selection (RFC 2461 6.3.6) */
234static struct rt6_info *rt6_best_dflt(struct rt6_info *rt, int oif)
235{
236 struct rt6_info *match = NULL;
237 struct rt6_info *sprt;
238 int mpri = 0;
239
240 for (sprt = rt; sprt; sprt = sprt->u.next) {
241 struct neighbour *neigh;
242 int m = 0;
243
244 if (!oif ||
245 (sprt->rt6i_dev &&
246 sprt->rt6i_dev->ifindex == oif))
247 m += 8;
248
249 if (rt6_check_expired(sprt))
250 continue;
251
252 if (sprt == rt6_dflt_pointer)
253 m += 4;
254
255 if ((neigh = sprt->rt6i_nexthop) != NULL) {
256 read_lock_bh(&neigh->lock);
257 switch (neigh->nud_state) {
258 case NUD_REACHABLE:
259 m += 3;
260 break;
261
262 case NUD_STALE:
263 case NUD_DELAY:
264 case NUD_PROBE:
265 m += 2;
266 break;
267
268 case NUD_NOARP:
269 case NUD_PERMANENT:
270 m += 1;
271 break;
272
273 case NUD_INCOMPLETE:
274 default:
275 read_unlock_bh(&neigh->lock);
276 continue;
277 }
278 read_unlock_bh(&neigh->lock);
279 } else {
280 continue;
281 }
282
283 if (m > mpri || m >= 12) {
284 match = sprt;
285 mpri = m;
286 if (m >= 12) {
287 /* we choose the last default router if it
288 * is in (probably) reachable state.
289 * If route changed, we should do pmtu
290 * discovery. --yoshfuji
291 */
292 break;
293 }
294 }
295 }
296
297 spin_lock(&rt6_dflt_lock);
298 if (!match) {
299 /*
300 * No default routers are known to be reachable.
301 * SHOULD round robin
302 */
303 if (rt6_dflt_pointer) {
304 for (sprt = rt6_dflt_pointer->u.next;
305 sprt; sprt = sprt->u.next) {
306 if (sprt->u.dst.obsolete <= 0 &&
307 sprt->u.dst.error == 0 &&
308 !rt6_check_expired(sprt)) {
309 match = sprt;
310 break;
311 }
312 }
313 for (sprt = rt;
314 !match && sprt;
315 sprt = sprt->u.next) {
316 if (sprt->u.dst.obsolete <= 0 &&
317 sprt->u.dst.error == 0 &&
318 !rt6_check_expired(sprt)) {
319 match = sprt;
320 break;
321 }
322 if (sprt == rt6_dflt_pointer)
323 break;
324 }
325 }
326 }
327
328 if (match) {
329 if (rt6_dflt_pointer != match)
330 RT6_TRACE("changed default router: %p->%p\n",
331 rt6_dflt_pointer, match);
332 rt6_dflt_pointer = match;
333 }
334 spin_unlock(&rt6_dflt_lock);
335
336 if (!match) {
337 /*
338 * Last Resort: if no default routers found,
339 * use addrconf default route.
340 * We don't record this route.
341 */
342 for (sprt = ip6_routing_table.leaf;
343 sprt; sprt = sprt->u.next) {
344 if (!rt6_check_expired(sprt) &&
345 (sprt->rt6i_flags & RTF_DEFAULT) &&
346 (!oif ||
347 (sprt->rt6i_dev &&
348 sprt->rt6i_dev->ifindex == oif))) {
349 match = sprt;
350 break;
351 }
352 }
353 if (!match) {
354 /* no default route. give up. */
355 match = &ip6_null_entry;
356 }
357 }
358
359 return match;
360}
361
362struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
363 int oif, int strict)
364{
365 struct fib6_node *fn;
366 struct rt6_info *rt;
367
368 read_lock_bh(&rt6_lock);
369 fn = fib6_lookup(&ip6_routing_table, daddr, saddr);
370 rt = rt6_device_match(fn->leaf, oif, strict);
371 dst_hold(&rt->u.dst);
372 rt->u.dst.__use++;
373 read_unlock_bh(&rt6_lock);
374
375 rt->u.dst.lastuse = jiffies;
376 if (rt->u.dst.error == 0)
377 return rt;
378 dst_release(&rt->u.dst);
379 return NULL;
380}
381
382/* ip6_ins_rt is called with FREE rt6_lock.
383 It takes new route entry, the addition fails by any reason the
384 route is freed. In any case, if caller does not hold it, it may
385 be destroyed.
386 */
387
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -0700388int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh,
389 void *_rtattr, struct netlink_skb_parms *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700390{
391 int err;
392
393 write_lock_bh(&rt6_lock);
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -0700394 err = fib6_add(&ip6_routing_table, rt, nlh, _rtattr, req);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700395 write_unlock_bh(&rt6_lock);
396
397 return err;
398}
399
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800400static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
401 struct in6_addr *saddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700402{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700403 struct rt6_info *rt;
404
405 /*
406 * Clone the route.
407 */
408
409 rt = ip6_rt_copy(ort);
410
411 if (rt) {
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900412 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
413 if (rt->rt6i_dst.plen != 128 &&
414 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
415 rt->rt6i_flags |= RTF_ANYCAST;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700416 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900417 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700418
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900419 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700420 rt->rt6i_dst.plen = 128;
421 rt->rt6i_flags |= RTF_CACHE;
422 rt->u.dst.flags |= DST_HOST;
423
424#ifdef CONFIG_IPV6_SUBTREES
425 if (rt->rt6i_src.plen && saddr) {
426 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
427 rt->rt6i_src.plen = 128;
428 }
429#endif
430
431 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
432
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800433 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700434
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800435 return rt;
436}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700437
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800438static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
439{
440 struct rt6_info *rt = ip6_rt_copy(ort);
441 if (rt) {
442 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
443 rt->rt6i_dst.plen = 128;
444 rt->rt6i_flags |= RTF_CACHE;
445 if (rt->rt6i_flags & RTF_REJECT)
446 rt->u.dst.error = ort->u.dst.error;
447 rt->u.dst.flags |= DST_HOST;
448 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
449 }
450 return rt;
451}
452
Linus Torvalds1da177e2005-04-16 15:20:36 -0700453#define BACKTRACK() \
454if (rt == &ip6_null_entry && strict) { \
455 while ((fn = fn->parent) != NULL) { \
456 if (fn->fn_flags & RTN_ROOT) { \
Linus Torvalds1da177e2005-04-16 15:20:36 -0700457 goto out; \
458 } \
459 if (fn->fn_flags & RTN_RTINFO) \
460 goto restart; \
461 } \
462}
463
464
465void ip6_route_input(struct sk_buff *skb)
466{
467 struct fib6_node *fn;
468 struct rt6_info *rt;
469 int strict;
470 int attempts = 3;
471
472 strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
473
474relookup:
475 read_lock_bh(&rt6_lock);
476
477 fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr,
478 &skb->nh.ipv6h->saddr);
479
480restart:
481 rt = fn->leaf;
482
483 if ((rt->rt6i_flags & RTF_CACHE)) {
484 rt = rt6_device_match(rt, skb->dev->ifindex, strict);
485 BACKTRACK();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700486 goto out;
487 }
488
Yan Zheng9d17f212005-10-28 15:12:00 -0700489 rt = rt6_device_match(rt, skb->dev->ifindex, strict);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700490 BACKTRACK();
491
YOSHIFUJI Hideakifb9de912006-03-20 16:59:08 -0800492 dst_hold(&rt->u.dst);
493 read_unlock_bh(&rt6_lock);
494
Linus Torvalds1da177e2005-04-16 15:20:36 -0700495 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
496 struct rt6_info *nrt;
YOSHIFUJI Hideakie40cf352006-03-20 16:59:27 -0800497 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700498
YOSHIFUJI Hideakie40cf352006-03-20 16:59:27 -0800499 nrt = rt6_alloc_cow(rt, &skb->nh.ipv6h->daddr,
500 &skb->nh.ipv6h->saddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700501
502 dst_release(&rt->u.dst);
YOSHIFUJI Hideakie40cf352006-03-20 16:59:27 -0800503 rt = nrt ? : &ip6_null_entry;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700504
YOSHIFUJI Hideakie40cf352006-03-20 16:59:27 -0800505 dst_hold(&rt->u.dst);
506 if (nrt) {
507 err = ip6_ins_rt(nrt, NULL, NULL,
508 &NETLINK_CB(skb));
509 if (!err)
510 goto out2;
511 }
512
513 if (--attempts <= 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700514 goto out2;
515
516 /* Race condition! In the gap, when rt6_lock was
517 released someone could insert this route. Relookup.
518 */
519 dst_release(&rt->u.dst);
520 goto relookup;
521 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700522
Linus Torvalds1da177e2005-04-16 15:20:36 -0700523out2:
524 rt->u.dst.lastuse = jiffies;
525 rt->u.dst.__use++;
526 skb->dst = (struct dst_entry *) rt;
YOSHIFUJI Hideakifb9de912006-03-20 16:59:08 -0800527 return;
528out:
529 dst_hold(&rt->u.dst);
530 read_unlock_bh(&rt6_lock);
531 goto out2;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700532}
533
534struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
535{
536 struct fib6_node *fn;
537 struct rt6_info *rt;
538 int strict;
539 int attempts = 3;
540
541 strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
542
543relookup:
544 read_lock_bh(&rt6_lock);
545
546 fn = fib6_lookup(&ip6_routing_table, &fl->fl6_dst, &fl->fl6_src);
547
548restart:
549 rt = fn->leaf;
550
551 if ((rt->rt6i_flags & RTF_CACHE)) {
552 rt = rt6_device_match(rt, fl->oif, strict);
553 BACKTRACK();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700554 goto out;
555 }
556 if (rt->rt6i_flags & RTF_DEFAULT) {
557 if (rt->rt6i_metric >= IP6_RT_PRIO_ADDRCONF)
558 rt = rt6_best_dflt(rt, fl->oif);
559 } else {
560 rt = rt6_device_match(rt, fl->oif, strict);
561 BACKTRACK();
562 }
563
YOSHIFUJI Hideakifb9de912006-03-20 16:59:08 -0800564 dst_hold(&rt->u.dst);
565 read_unlock_bh(&rt6_lock);
566
Linus Torvalds1da177e2005-04-16 15:20:36 -0700567 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
568 struct rt6_info *nrt;
YOSHIFUJI Hideakie40cf352006-03-20 16:59:27 -0800569 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700570
YOSHIFUJI Hideakie40cf352006-03-20 16:59:27 -0800571 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700572
573 dst_release(&rt->u.dst);
YOSHIFUJI Hideakie40cf352006-03-20 16:59:27 -0800574 rt = nrt ? : &ip6_null_entry;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700575
YOSHIFUJI Hideakie40cf352006-03-20 16:59:27 -0800576 dst_hold(&rt->u.dst);
577 if (nrt) {
578 err = ip6_ins_rt(nrt, NULL, NULL, NULL);
579 if (!err)
580 goto out2;
581 }
582
583 if (--attempts <= 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700584 goto out2;
585
586 /* Race condition! In the gap, when rt6_lock was
587 released someone could insert this route. Relookup.
588 */
589 dst_release(&rt->u.dst);
590 goto relookup;
591 }
YOSHIFUJI Hideakie40cf352006-03-20 16:59:27 -0800592
Linus Torvalds1da177e2005-04-16 15:20:36 -0700593out2:
594 rt->u.dst.lastuse = jiffies;
595 rt->u.dst.__use++;
596 return &rt->u.dst;
YOSHIFUJI Hideakifb9de912006-03-20 16:59:08 -0800597out:
598 dst_hold(&rt->u.dst);
599 read_unlock_bh(&rt6_lock);
600 goto out2;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700601}
602
603
604/*
605 * Destination cache support functions
606 */
607
608static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
609{
610 struct rt6_info *rt;
611
612 rt = (struct rt6_info *) dst;
613
614 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
615 return dst;
616
617 return NULL;
618}
619
620static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
621{
622 struct rt6_info *rt = (struct rt6_info *) dst;
623
624 if (rt) {
625 if (rt->rt6i_flags & RTF_CACHE)
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -0700626 ip6_del_rt(rt, NULL, NULL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700627 else
628 dst_release(dst);
629 }
630 return NULL;
631}
632
633static void ip6_link_failure(struct sk_buff *skb)
634{
635 struct rt6_info *rt;
636
637 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
638
639 rt = (struct rt6_info *) skb->dst;
640 if (rt) {
641 if (rt->rt6i_flags&RTF_CACHE) {
642 dst_set_expires(&rt->u.dst, 0);
643 rt->rt6i_flags |= RTF_EXPIRES;
644 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
645 rt->rt6i_node->fn_sernum = -1;
646 }
647}
648
649static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
650{
651 struct rt6_info *rt6 = (struct rt6_info*)dst;
652
653 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
654 rt6->rt6i_flags |= RTF_MODIFIED;
655 if (mtu < IPV6_MIN_MTU) {
656 mtu = IPV6_MIN_MTU;
657 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
658 }
659 dst->metrics[RTAX_MTU-1] = mtu;
660 }
661}
662
663/* Protected by rt6_lock. */
664static struct dst_entry *ndisc_dst_gc_list;
665static int ipv6_get_mtu(struct net_device *dev);
666
667static inline unsigned int ipv6_advmss(unsigned int mtu)
668{
669 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
670
671 if (mtu < ip6_rt_min_advmss)
672 mtu = ip6_rt_min_advmss;
673
674 /*
675 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
676 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
677 * IPV6_MAXPLEN is also valid and means: "any MSS,
678 * rely only on pmtu discovery"
679 */
680 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
681 mtu = IPV6_MAXPLEN;
682 return mtu;
683}
684
685struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
686 struct neighbour *neigh,
687 struct in6_addr *addr,
688 int (*output)(struct sk_buff *))
689{
690 struct rt6_info *rt;
691 struct inet6_dev *idev = in6_dev_get(dev);
692
693 if (unlikely(idev == NULL))
694 return NULL;
695
696 rt = ip6_dst_alloc();
697 if (unlikely(rt == NULL)) {
698 in6_dev_put(idev);
699 goto out;
700 }
701
702 dev_hold(dev);
703 if (neigh)
704 neigh_hold(neigh);
705 else
706 neigh = ndisc_get_neigh(dev, addr);
707
708 rt->rt6i_dev = dev;
709 rt->rt6i_idev = idev;
710 rt->rt6i_nexthop = neigh;
711 atomic_set(&rt->u.dst.__refcnt, 1);
712 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
713 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
714 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
715 rt->u.dst.output = output;
716
717#if 0 /* there's no chance to use these for ndisc */
718 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
719 ? DST_HOST
720 : 0;
721 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
722 rt->rt6i_dst.plen = 128;
723#endif
724
725 write_lock_bh(&rt6_lock);
726 rt->u.dst.next = ndisc_dst_gc_list;
727 ndisc_dst_gc_list = &rt->u.dst;
728 write_unlock_bh(&rt6_lock);
729
730 fib6_force_start_gc();
731
732out:
733 return (struct dst_entry *)rt;
734}
735
736int ndisc_dst_gc(int *more)
737{
738 struct dst_entry *dst, *next, **pprev;
739 int freed;
740
741 next = NULL;
742 pprev = &ndisc_dst_gc_list;
743 freed = 0;
744 while ((dst = *pprev) != NULL) {
745 if (!atomic_read(&dst->__refcnt)) {
746 *pprev = dst->next;
747 dst_free(dst);
748 freed++;
749 } else {
750 pprev = &dst->next;
751 (*more)++;
752 }
753 }
754
755 return freed;
756}
757
758static int ip6_dst_gc(void)
759{
760 static unsigned expire = 30*HZ;
761 static unsigned long last_gc;
762 unsigned long now = jiffies;
763
764 if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
765 atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
766 goto out;
767
768 expire++;
769 fib6_run_gc(expire);
770 last_gc = now;
771 if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
772 expire = ip6_rt_gc_timeout>>1;
773
774out:
775 expire -= expire>>ip6_rt_gc_elasticity;
776 return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
777}
778
779/* Clean host part of a prefix. Not necessary in radix tree,
780 but results in cleaner routing tables.
781
782 Remove it only when all the things will work!
783 */
784
785static int ipv6_get_mtu(struct net_device *dev)
786{
787 int mtu = IPV6_MIN_MTU;
788 struct inet6_dev *idev;
789
790 idev = in6_dev_get(dev);
791 if (idev) {
792 mtu = idev->cnf.mtu6;
793 in6_dev_put(idev);
794 }
795 return mtu;
796}
797
798int ipv6_get_hoplimit(struct net_device *dev)
799{
800 int hoplimit = ipv6_devconf.hop_limit;
801 struct inet6_dev *idev;
802
803 idev = in6_dev_get(dev);
804 if (idev) {
805 hoplimit = idev->cnf.hop_limit;
806 in6_dev_put(idev);
807 }
808 return hoplimit;
809}
810
811/*
812 *
813 */
814
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -0700815int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
816 void *_rtattr, struct netlink_skb_parms *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700817{
818 int err;
819 struct rtmsg *r;
820 struct rtattr **rta;
821 struct rt6_info *rt = NULL;
822 struct net_device *dev = NULL;
823 struct inet6_dev *idev = NULL;
824 int addr_type;
825
826 rta = (struct rtattr **) _rtattr;
827
828 if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128)
829 return -EINVAL;
830#ifndef CONFIG_IPV6_SUBTREES
831 if (rtmsg->rtmsg_src_len)
832 return -EINVAL;
833#endif
834 if (rtmsg->rtmsg_ifindex) {
835 err = -ENODEV;
836 dev = dev_get_by_index(rtmsg->rtmsg_ifindex);
837 if (!dev)
838 goto out;
839 idev = in6_dev_get(dev);
840 if (!idev)
841 goto out;
842 }
843
844 if (rtmsg->rtmsg_metric == 0)
845 rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
846
847 rt = ip6_dst_alloc();
848
849 if (rt == NULL) {
850 err = -ENOMEM;
851 goto out;
852 }
853
854 rt->u.dst.obsolete = -1;
YOSHIFUJI Hideaki3dd4bc62005-12-19 14:02:45 -0800855 rt->rt6i_expires = jiffies + clock_t_to_jiffies(rtmsg->rtmsg_info);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700856 if (nlh && (r = NLMSG_DATA(nlh))) {
857 rt->rt6i_protocol = r->rtm_protocol;
858 } else {
859 rt->rt6i_protocol = RTPROT_BOOT;
860 }
861
862 addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst);
863
864 if (addr_type & IPV6_ADDR_MULTICAST)
865 rt->u.dst.input = ip6_mc_input;
866 else
867 rt->u.dst.input = ip6_forward;
868
869 rt->u.dst.output = ip6_output;
870
871 ipv6_addr_prefix(&rt->rt6i_dst.addr,
872 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len);
873 rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len;
874 if (rt->rt6i_dst.plen == 128)
875 rt->u.dst.flags = DST_HOST;
876
877#ifdef CONFIG_IPV6_SUBTREES
878 ipv6_addr_prefix(&rt->rt6i_src.addr,
879 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
880 rt->rt6i_src.plen = rtmsg->rtmsg_src_len;
881#endif
882
883 rt->rt6i_metric = rtmsg->rtmsg_metric;
884
885 /* We cannot add true routes via loopback here,
886 they would result in kernel looping; promote them to reject routes
887 */
888 if ((rtmsg->rtmsg_flags&RTF_REJECT) ||
889 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
890 /* hold loopback dev/idev if we haven't done so. */
891 if (dev != &loopback_dev) {
892 if (dev) {
893 dev_put(dev);
894 in6_dev_put(idev);
895 }
896 dev = &loopback_dev;
897 dev_hold(dev);
898 idev = in6_dev_get(dev);
899 if (!idev) {
900 err = -ENODEV;
901 goto out;
902 }
903 }
904 rt->u.dst.output = ip6_pkt_discard_out;
905 rt->u.dst.input = ip6_pkt_discard;
906 rt->u.dst.error = -ENETUNREACH;
907 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
908 goto install_route;
909 }
910
911 if (rtmsg->rtmsg_flags & RTF_GATEWAY) {
912 struct in6_addr *gw_addr;
913 int gwa_type;
914
915 gw_addr = &rtmsg->rtmsg_gateway;
916 ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway);
917 gwa_type = ipv6_addr_type(gw_addr);
918
919 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
920 struct rt6_info *grt;
921
922 /* IPv6 strictly inhibits using not link-local
923 addresses as nexthop address.
924 Otherwise, router will not able to send redirects.
925 It is very good, but in some (rare!) circumstances
926 (SIT, PtP, NBMA NOARP links) it is handy to allow
927 some exceptions. --ANK
928 */
929 err = -EINVAL;
930 if (!(gwa_type&IPV6_ADDR_UNICAST))
931 goto out;
932
933 grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1);
934
935 err = -EHOSTUNREACH;
936 if (grt == NULL)
937 goto out;
938 if (dev) {
939 if (dev != grt->rt6i_dev) {
940 dst_release(&grt->u.dst);
941 goto out;
942 }
943 } else {
944 dev = grt->rt6i_dev;
945 idev = grt->rt6i_idev;
946 dev_hold(dev);
947 in6_dev_hold(grt->rt6i_idev);
948 }
949 if (!(grt->rt6i_flags&RTF_GATEWAY))
950 err = 0;
951 dst_release(&grt->u.dst);
952
953 if (err)
954 goto out;
955 }
956 err = -EINVAL;
957 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
958 goto out;
959 }
960
961 err = -ENODEV;
962 if (dev == NULL)
963 goto out;
964
965 if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) {
966 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
967 if (IS_ERR(rt->rt6i_nexthop)) {
968 err = PTR_ERR(rt->rt6i_nexthop);
969 rt->rt6i_nexthop = NULL;
970 goto out;
971 }
972 }
973
974 rt->rt6i_flags = rtmsg->rtmsg_flags;
975
976install_route:
977 if (rta && rta[RTA_METRICS-1]) {
978 int attrlen = RTA_PAYLOAD(rta[RTA_METRICS-1]);
979 struct rtattr *attr = RTA_DATA(rta[RTA_METRICS-1]);
980
981 while (RTA_OK(attr, attrlen)) {
982 unsigned flavor = attr->rta_type;
983 if (flavor) {
984 if (flavor > RTAX_MAX) {
985 err = -EINVAL;
986 goto out;
987 }
988 rt->u.dst.metrics[flavor-1] =
989 *(u32 *)RTA_DATA(attr);
990 }
991 attr = RTA_NEXT(attr, attrlen);
992 }
993 }
994
995 if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
996 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
997 if (!rt->u.dst.metrics[RTAX_MTU-1])
998 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
999 if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
1000 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1001 rt->u.dst.dev = dev;
1002 rt->rt6i_idev = idev;
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001003 return ip6_ins_rt(rt, nlh, _rtattr, req);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001004
1005out:
1006 if (dev)
1007 dev_put(dev);
1008 if (idev)
1009 in6_dev_put(idev);
1010 if (rt)
1011 dst_free((struct dst_entry *) rt);
1012 return err;
1013}
1014
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001015int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001016{
1017 int err;
1018
1019 write_lock_bh(&rt6_lock);
1020
1021 rt6_reset_dflt_pointer(NULL);
1022
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001023 err = fib6_del(rt, nlh, _rtattr, req);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001024 dst_release(&rt->u.dst);
1025
1026 write_unlock_bh(&rt6_lock);
1027
1028 return err;
1029}
1030
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001031static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001032{
1033 struct fib6_node *fn;
1034 struct rt6_info *rt;
1035 int err = -ESRCH;
1036
1037 read_lock_bh(&rt6_lock);
1038
1039 fn = fib6_locate(&ip6_routing_table,
1040 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len,
1041 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
1042
1043 if (fn) {
1044 for (rt = fn->leaf; rt; rt = rt->u.next) {
1045 if (rtmsg->rtmsg_ifindex &&
1046 (rt->rt6i_dev == NULL ||
1047 rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex))
1048 continue;
1049 if (rtmsg->rtmsg_flags&RTF_GATEWAY &&
1050 !ipv6_addr_equal(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway))
1051 continue;
1052 if (rtmsg->rtmsg_metric &&
1053 rtmsg->rtmsg_metric != rt->rt6i_metric)
1054 continue;
1055 dst_hold(&rt->u.dst);
1056 read_unlock_bh(&rt6_lock);
1057
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001058 return ip6_del_rt(rt, nlh, _rtattr, req);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001059 }
1060 }
1061 read_unlock_bh(&rt6_lock);
1062
1063 return err;
1064}
1065
1066/*
1067 * Handle redirects
1068 */
1069void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
1070 struct neighbour *neigh, u8 *lladdr, int on_link)
1071{
1072 struct rt6_info *rt, *nrt;
1073
1074 /* Locate old route to this destination. */
1075 rt = rt6_lookup(dest, NULL, neigh->dev->ifindex, 1);
1076
1077 if (rt == NULL)
1078 return;
1079
1080 if (neigh->dev != rt->rt6i_dev)
1081 goto out;
1082
1083 /*
1084 * Current route is on-link; redirect is always invalid.
1085 *
1086 * Seems, previous statement is not true. It could
1087 * be node, which looks for us as on-link (f.e. proxy ndisc)
1088 * But then router serving it might decide, that we should
1089 * know truth 8)8) --ANK (980726).
1090 */
1091 if (!(rt->rt6i_flags&RTF_GATEWAY))
1092 goto out;
1093
1094 /*
1095 * RFC 2461 specifies that redirects should only be
1096 * accepted if they come from the nexthop to the target.
1097 * Due to the way default routers are chosen, this notion
1098 * is a bit fuzzy and one might need to check all default
1099 * routers.
1100 */
1101 if (!ipv6_addr_equal(saddr, &rt->rt6i_gateway)) {
1102 if (rt->rt6i_flags & RTF_DEFAULT) {
1103 struct rt6_info *rt1;
1104
1105 read_lock(&rt6_lock);
1106 for (rt1 = ip6_routing_table.leaf; rt1; rt1 = rt1->u.next) {
1107 if (ipv6_addr_equal(saddr, &rt1->rt6i_gateway)) {
1108 dst_hold(&rt1->u.dst);
1109 dst_release(&rt->u.dst);
1110 read_unlock(&rt6_lock);
1111 rt = rt1;
1112 goto source_ok;
1113 }
1114 }
1115 read_unlock(&rt6_lock);
1116 }
1117 if (net_ratelimit())
1118 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1119 "for redirect target\n");
1120 goto out;
1121 }
1122
1123source_ok:
1124
1125 /*
1126 * We have finally decided to accept it.
1127 */
1128
1129 neigh_update(neigh, lladdr, NUD_STALE,
1130 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1131 NEIGH_UPDATE_F_OVERRIDE|
1132 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1133 NEIGH_UPDATE_F_ISROUTER))
1134 );
1135
1136 /*
1137 * Redirect received -> path was valid.
1138 * Look, redirects are sent only in response to data packets,
1139 * so that this nexthop apparently is reachable. --ANK
1140 */
1141 dst_confirm(&rt->u.dst);
1142
1143 /* Duplicate redirect: silently ignore. */
1144 if (neigh == rt->u.dst.neighbour)
1145 goto out;
1146
1147 nrt = ip6_rt_copy(rt);
1148 if (nrt == NULL)
1149 goto out;
1150
1151 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1152 if (on_link)
1153 nrt->rt6i_flags &= ~RTF_GATEWAY;
1154
1155 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1156 nrt->rt6i_dst.plen = 128;
1157 nrt->u.dst.flags |= DST_HOST;
1158
1159 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1160 nrt->rt6i_nexthop = neigh_clone(neigh);
1161 /* Reset pmtu, it may be better */
1162 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1163 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1164
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001165 if (ip6_ins_rt(nrt, NULL, NULL, NULL))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001166 goto out;
1167
1168 if (rt->rt6i_flags&RTF_CACHE) {
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001169 ip6_del_rt(rt, NULL, NULL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001170 return;
1171 }
1172
1173out:
1174 dst_release(&rt->u.dst);
1175 return;
1176}
1177
1178/*
1179 * Handle ICMP "packet too big" messages
1180 * i.e. Path MTU discovery
1181 */
1182
1183void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1184 struct net_device *dev, u32 pmtu)
1185{
1186 struct rt6_info *rt, *nrt;
1187 int allfrag = 0;
1188
1189 rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1190 if (rt == NULL)
1191 return;
1192
1193 if (pmtu >= dst_mtu(&rt->u.dst))
1194 goto out;
1195
1196 if (pmtu < IPV6_MIN_MTU) {
1197 /*
1198 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1199 * MTU (1280) and a fragment header should always be included
1200 * after a node receiving Too Big message reporting PMTU is
1201 * less than the IPv6 Minimum Link MTU.
1202 */
1203 pmtu = IPV6_MIN_MTU;
1204 allfrag = 1;
1205 }
1206
1207 /* New mtu received -> path was valid.
1208 They are sent only in response to data packets,
1209 so that this nexthop apparently is reachable. --ANK
1210 */
1211 dst_confirm(&rt->u.dst);
1212
1213 /* Host route. If it is static, it would be better
1214 not to override it, but add new one, so that
1215 when cache entry will expire old pmtu
1216 would return automatically.
1217 */
1218 if (rt->rt6i_flags & RTF_CACHE) {
1219 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1220 if (allfrag)
1221 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1222 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1223 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1224 goto out;
1225 }
1226
1227 /* Network route.
1228 Two cases are possible:
1229 1. It is connected route. Action: COW
1230 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1231 */
YOSHIFUJI Hideakid5315b502006-03-20 16:58:48 -08001232 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001233 nrt = rt6_alloc_cow(rt, daddr, saddr);
YOSHIFUJI Hideakid5315b502006-03-20 16:58:48 -08001234 else
1235 nrt = rt6_alloc_clone(rt, daddr);
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001236
YOSHIFUJI Hideakid5315b502006-03-20 16:58:48 -08001237 if (nrt) {
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001238 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1239 if (allfrag)
1240 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1241
1242 /* According to RFC 1981, detecting PMTU increase shouldn't be
1243 * happened within 5 mins, the recommended timer is 10 mins.
1244 * Here this route expiration time is set to ip6_rt_mtu_expires
1245 * which is 10 mins. After 10 mins the decreased pmtu is expired
1246 * and detecting PMTU increase will be automatically happened.
1247 */
1248 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1249 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1250
1251 ip6_ins_rt(nrt, NULL, NULL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001252 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001253out:
1254 dst_release(&rt->u.dst);
1255}
1256
1257/*
1258 * Misc support functions
1259 */
1260
1261static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1262{
1263 struct rt6_info *rt = ip6_dst_alloc();
1264
1265 if (rt) {
1266 rt->u.dst.input = ort->u.dst.input;
1267 rt->u.dst.output = ort->u.dst.output;
1268
1269 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1270 rt->u.dst.dev = ort->u.dst.dev;
1271 if (rt->u.dst.dev)
1272 dev_hold(rt->u.dst.dev);
1273 rt->rt6i_idev = ort->rt6i_idev;
1274 if (rt->rt6i_idev)
1275 in6_dev_hold(rt->rt6i_idev);
1276 rt->u.dst.lastuse = jiffies;
1277 rt->rt6i_expires = 0;
1278
1279 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1280 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1281 rt->rt6i_metric = 0;
1282
1283 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1284#ifdef CONFIG_IPV6_SUBTREES
1285 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1286#endif
1287 }
1288 return rt;
1289}
1290
1291struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1292{
1293 struct rt6_info *rt;
1294 struct fib6_node *fn;
1295
1296 fn = &ip6_routing_table;
1297
1298 write_lock_bh(&rt6_lock);
1299 for (rt = fn->leaf; rt; rt=rt->u.next) {
1300 if (dev == rt->rt6i_dev &&
1301 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1302 break;
1303 }
1304 if (rt)
1305 dst_hold(&rt->u.dst);
1306 write_unlock_bh(&rt6_lock);
1307 return rt;
1308}
1309
1310struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1311 struct net_device *dev)
1312{
1313 struct in6_rtmsg rtmsg;
1314
1315 memset(&rtmsg, 0, sizeof(struct in6_rtmsg));
1316 rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1317 ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1318 rtmsg.rtmsg_metric = 1024;
1319 rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES;
1320
1321 rtmsg.rtmsg_ifindex = dev->ifindex;
1322
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001323 ip6_route_add(&rtmsg, NULL, NULL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001324 return rt6_get_dflt_router(gwaddr, dev);
1325}
1326
1327void rt6_purge_dflt_routers(void)
1328{
1329 struct rt6_info *rt;
1330
1331restart:
1332 read_lock_bh(&rt6_lock);
1333 for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) {
1334 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1335 dst_hold(&rt->u.dst);
1336
1337 rt6_reset_dflt_pointer(NULL);
1338
1339 read_unlock_bh(&rt6_lock);
1340
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001341 ip6_del_rt(rt, NULL, NULL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001342
1343 goto restart;
1344 }
1345 }
1346 read_unlock_bh(&rt6_lock);
1347}
1348
1349int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1350{
1351 struct in6_rtmsg rtmsg;
1352 int err;
1353
1354 switch(cmd) {
1355 case SIOCADDRT: /* Add a route */
1356 case SIOCDELRT: /* Delete a route */
1357 if (!capable(CAP_NET_ADMIN))
1358 return -EPERM;
1359 err = copy_from_user(&rtmsg, arg,
1360 sizeof(struct in6_rtmsg));
1361 if (err)
1362 return -EFAULT;
1363
1364 rtnl_lock();
1365 switch (cmd) {
1366 case SIOCADDRT:
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001367 err = ip6_route_add(&rtmsg, NULL, NULL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001368 break;
1369 case SIOCDELRT:
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001370 err = ip6_route_del(&rtmsg, NULL, NULL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001371 break;
1372 default:
1373 err = -EINVAL;
1374 }
1375 rtnl_unlock();
1376
1377 return err;
1378 };
1379
1380 return -EINVAL;
1381}
1382
1383/*
1384 * Drop the packet on the floor
1385 */
1386
Arnaldo Carvalho de Melo20380732005-08-16 02:18:02 -03001387static int ip6_pkt_discard(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001388{
1389 IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
1390 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
1391 kfree_skb(skb);
1392 return 0;
1393}
1394
Arnaldo Carvalho de Melo20380732005-08-16 02:18:02 -03001395static int ip6_pkt_discard_out(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001396{
1397 skb->dev = skb->dst->dev;
1398 return ip6_pkt_discard(skb);
1399}
1400
1401/*
1402 * Allocate a dst for local (unicast / anycast) address.
1403 */
1404
1405struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1406 const struct in6_addr *addr,
1407 int anycast)
1408{
1409 struct rt6_info *rt = ip6_dst_alloc();
1410
1411 if (rt == NULL)
1412 return ERR_PTR(-ENOMEM);
1413
1414 dev_hold(&loopback_dev);
1415 in6_dev_hold(idev);
1416
1417 rt->u.dst.flags = DST_HOST;
1418 rt->u.dst.input = ip6_input;
1419 rt->u.dst.output = ip6_output;
1420 rt->rt6i_dev = &loopback_dev;
1421 rt->rt6i_idev = idev;
1422 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1423 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1424 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1425 rt->u.dst.obsolete = -1;
1426
1427 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +09001428 if (anycast)
1429 rt->rt6i_flags |= RTF_ANYCAST;
1430 else
Linus Torvalds1da177e2005-04-16 15:20:36 -07001431 rt->rt6i_flags |= RTF_LOCAL;
1432 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1433 if (rt->rt6i_nexthop == NULL) {
1434 dst_free((struct dst_entry *) rt);
1435 return ERR_PTR(-ENOMEM);
1436 }
1437
1438 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1439 rt->rt6i_dst.plen = 128;
1440
1441 atomic_set(&rt->u.dst.__refcnt, 1);
1442
1443 return rt;
1444}
1445
1446static int fib6_ifdown(struct rt6_info *rt, void *arg)
1447{
1448 if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1449 rt != &ip6_null_entry) {
1450 RT6_TRACE("deleted by ifdown %p\n", rt);
1451 return -1;
1452 }
1453 return 0;
1454}
1455
1456void rt6_ifdown(struct net_device *dev)
1457{
1458 write_lock_bh(&rt6_lock);
1459 fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev);
1460 write_unlock_bh(&rt6_lock);
1461}
1462
1463struct rt6_mtu_change_arg
1464{
1465 struct net_device *dev;
1466 unsigned mtu;
1467};
1468
1469static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1470{
1471 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1472 struct inet6_dev *idev;
1473
1474 /* In IPv6 pmtu discovery is not optional,
1475 so that RTAX_MTU lock cannot disable it.
1476 We still use this lock to block changes
1477 caused by addrconf/ndisc.
1478 */
1479
1480 idev = __in6_dev_get(arg->dev);
1481 if (idev == NULL)
1482 return 0;
1483
1484 /* For administrative MTU increase, there is no way to discover
1485 IPv6 PMTU increase, so PMTU increase should be updated here.
1486 Since RFC 1981 doesn't include administrative MTU increase
1487 update PMTU increase is a MUST. (i.e. jumbo frame)
1488 */
1489 /*
1490 If new MTU is less than route PMTU, this new MTU will be the
1491 lowest MTU in the path, update the route PMTU to reflect PMTU
1492 decreases; if new MTU is greater than route PMTU, and the
1493 old MTU is the lowest MTU in the path, update the route PMTU
1494 to reflect the increase. In this case if the other nodes' MTU
1495 also have the lowest MTU, TOO BIG MESSAGE will be lead to
1496 PMTU discouvery.
1497 */
1498 if (rt->rt6i_dev == arg->dev &&
1499 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1500 (dst_mtu(&rt->u.dst) > arg->mtu ||
1501 (dst_mtu(&rt->u.dst) < arg->mtu &&
1502 dst_mtu(&rt->u.dst) == idev->cnf.mtu6)))
1503 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1504 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1505 return 0;
1506}
1507
1508void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1509{
1510 struct rt6_mtu_change_arg arg;
1511
1512 arg.dev = dev;
1513 arg.mtu = mtu;
1514 read_lock_bh(&rt6_lock);
1515 fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg);
1516 read_unlock_bh(&rt6_lock);
1517}
1518
1519static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta,
1520 struct in6_rtmsg *rtmsg)
1521{
1522 memset(rtmsg, 0, sizeof(*rtmsg));
1523
1524 rtmsg->rtmsg_dst_len = r->rtm_dst_len;
1525 rtmsg->rtmsg_src_len = r->rtm_src_len;
1526 rtmsg->rtmsg_flags = RTF_UP;
1527 if (r->rtm_type == RTN_UNREACHABLE)
1528 rtmsg->rtmsg_flags |= RTF_REJECT;
1529
1530 if (rta[RTA_GATEWAY-1]) {
1531 if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16))
1532 return -EINVAL;
1533 memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16);
1534 rtmsg->rtmsg_flags |= RTF_GATEWAY;
1535 }
1536 if (rta[RTA_DST-1]) {
1537 if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3))
1538 return -EINVAL;
1539 memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3));
1540 }
1541 if (rta[RTA_SRC-1]) {
1542 if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3))
1543 return -EINVAL;
1544 memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3));
1545 }
1546 if (rta[RTA_OIF-1]) {
1547 if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int)))
1548 return -EINVAL;
1549 memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1550 }
1551 if (rta[RTA_PRIORITY-1]) {
1552 if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4))
1553 return -EINVAL;
1554 memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
1555 }
1556 return 0;
1557}
1558
1559int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1560{
1561 struct rtmsg *r = NLMSG_DATA(nlh);
1562 struct in6_rtmsg rtmsg;
1563
1564 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1565 return -EINVAL;
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001566 return ip6_route_del(&rtmsg, nlh, arg, &NETLINK_CB(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001567}
1568
1569int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1570{
1571 struct rtmsg *r = NLMSG_DATA(nlh);
1572 struct in6_rtmsg rtmsg;
1573
1574 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1575 return -EINVAL;
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001576 return ip6_route_add(&rtmsg, nlh, arg, &NETLINK_CB(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001577}
1578
1579struct rt6_rtnl_dump_arg
1580{
1581 struct sk_buff *skb;
1582 struct netlink_callback *cb;
1583};
1584
1585static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001586 struct in6_addr *dst, struct in6_addr *src,
1587 int iif, int type, u32 pid, u32 seq,
1588 int prefix, unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001589{
1590 struct rtmsg *rtm;
1591 struct nlmsghdr *nlh;
1592 unsigned char *b = skb->tail;
1593 struct rta_cacheinfo ci;
1594
1595 if (prefix) { /* user wants prefix routes only */
1596 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
1597 /* success since this is not a prefix route */
1598 return 1;
1599 }
1600 }
1601
Jamal Hadi Salimb6544c02005-06-18 22:54:12 -07001602 nlh = NLMSG_NEW(skb, pid, seq, type, sizeof(*rtm), flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001603 rtm = NLMSG_DATA(nlh);
1604 rtm->rtm_family = AF_INET6;
1605 rtm->rtm_dst_len = rt->rt6i_dst.plen;
1606 rtm->rtm_src_len = rt->rt6i_src.plen;
1607 rtm->rtm_tos = 0;
1608 rtm->rtm_table = RT_TABLE_MAIN;
1609 if (rt->rt6i_flags&RTF_REJECT)
1610 rtm->rtm_type = RTN_UNREACHABLE;
1611 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
1612 rtm->rtm_type = RTN_LOCAL;
1613 else
1614 rtm->rtm_type = RTN_UNICAST;
1615 rtm->rtm_flags = 0;
1616 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1617 rtm->rtm_protocol = rt->rt6i_protocol;
1618 if (rt->rt6i_flags&RTF_DYNAMIC)
1619 rtm->rtm_protocol = RTPROT_REDIRECT;
1620 else if (rt->rt6i_flags & RTF_ADDRCONF)
1621 rtm->rtm_protocol = RTPROT_KERNEL;
1622 else if (rt->rt6i_flags&RTF_DEFAULT)
1623 rtm->rtm_protocol = RTPROT_RA;
1624
1625 if (rt->rt6i_flags&RTF_CACHE)
1626 rtm->rtm_flags |= RTM_F_CLONED;
1627
1628 if (dst) {
1629 RTA_PUT(skb, RTA_DST, 16, dst);
1630 rtm->rtm_dst_len = 128;
1631 } else if (rtm->rtm_dst_len)
1632 RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1633#ifdef CONFIG_IPV6_SUBTREES
1634 if (src) {
1635 RTA_PUT(skb, RTA_SRC, 16, src);
1636 rtm->rtm_src_len = 128;
1637 } else if (rtm->rtm_src_len)
1638 RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1639#endif
1640 if (iif)
1641 RTA_PUT(skb, RTA_IIF, 4, &iif);
1642 else if (dst) {
1643 struct in6_addr saddr_buf;
1644 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
1645 RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1646 }
1647 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
1648 goto rtattr_failure;
1649 if (rt->u.dst.neighbour)
1650 RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
1651 if (rt->u.dst.dev)
1652 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex);
1653 RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric);
1654 ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
1655 if (rt->rt6i_expires)
1656 ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
1657 else
1658 ci.rta_expires = 0;
1659 ci.rta_used = rt->u.dst.__use;
1660 ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
1661 ci.rta_error = rt->u.dst.error;
1662 ci.rta_id = 0;
1663 ci.rta_ts = 0;
1664 ci.rta_tsage = 0;
1665 RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
1666 nlh->nlmsg_len = skb->tail - b;
1667 return skb->len;
1668
1669nlmsg_failure:
1670rtattr_failure:
1671 skb_trim(skb, b - skb->data);
1672 return -1;
1673}
1674
1675static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1676{
1677 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1678 int prefix;
1679
1680 if (arg->cb->nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(struct rtmsg))) {
1681 struct rtmsg *rtm = NLMSG_DATA(arg->cb->nlh);
1682 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
1683 } else
1684 prefix = 0;
1685
1686 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1687 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001688 prefix, NLM_F_MULTI);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001689}
1690
1691static int fib6_dump_node(struct fib6_walker_t *w)
1692{
1693 int res;
1694 struct rt6_info *rt;
1695
1696 for (rt = w->leaf; rt; rt = rt->u.next) {
1697 res = rt6_dump_route(rt, w->args);
1698 if (res < 0) {
1699 /* Frame is full, suspend walking */
1700 w->leaf = rt;
1701 return 1;
1702 }
1703 BUG_TRAP(res!=0);
1704 }
1705 w->leaf = NULL;
1706 return 0;
1707}
1708
1709static void fib6_dump_end(struct netlink_callback *cb)
1710{
1711 struct fib6_walker_t *w = (void*)cb->args[0];
1712
1713 if (w) {
1714 cb->args[0] = 0;
1715 fib6_walker_unlink(w);
1716 kfree(w);
1717 }
Herbert Xuefacfbc2005-11-12 12:12:05 -08001718 cb->done = (void*)cb->args[1];
1719 cb->args[1] = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001720}
1721
1722static int fib6_dump_done(struct netlink_callback *cb)
1723{
1724 fib6_dump_end(cb);
Thomas Grafa8f74b22005-11-10 02:25:52 +01001725 return cb->done ? cb->done(cb) : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001726}
1727
1728int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1729{
1730 struct rt6_rtnl_dump_arg arg;
1731 struct fib6_walker_t *w;
1732 int res;
1733
1734 arg.skb = skb;
1735 arg.cb = cb;
1736
1737 w = (void*)cb->args[0];
1738 if (w == NULL) {
1739 /* New dump:
1740 *
1741 * 1. hook callback destructor.
1742 */
1743 cb->args[1] = (long)cb->done;
1744 cb->done = fib6_dump_done;
1745
1746 /*
1747 * 2. allocate and initialize walker.
1748 */
David S. Miller9e147a12005-11-17 16:52:51 -08001749 w = kmalloc(sizeof(*w), GFP_ATOMIC);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001750 if (w == NULL)
1751 return -ENOMEM;
1752 RT6_TRACE("dump<%p", w);
1753 memset(w, 0, sizeof(*w));
1754 w->root = &ip6_routing_table;
1755 w->func = fib6_dump_node;
1756 w->args = &arg;
1757 cb->args[0] = (long)w;
1758 read_lock_bh(&rt6_lock);
1759 res = fib6_walk(w);
1760 read_unlock_bh(&rt6_lock);
1761 } else {
1762 w->args = &arg;
1763 read_lock_bh(&rt6_lock);
1764 res = fib6_walk_continue(w);
1765 read_unlock_bh(&rt6_lock);
1766 }
1767#if RT6_DEBUG >= 3
1768 if (res <= 0 && skb->len == 0)
1769 RT6_TRACE("%p>dump end\n", w);
1770#endif
1771 res = res < 0 ? res : skb->len;
1772 /* res < 0 is an error. (really, impossible)
1773 res == 0 means that dump is complete, but skb still can contain data.
1774 res > 0 dump is not complete, but frame is full.
1775 */
1776 /* Destroy walker, if dump of this table is complete. */
1777 if (res <= 0)
1778 fib6_dump_end(cb);
1779 return res;
1780}
1781
1782int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1783{
1784 struct rtattr **rta = arg;
1785 int iif = 0;
1786 int err = -ENOBUFS;
1787 struct sk_buff *skb;
1788 struct flowi fl;
1789 struct rt6_info *rt;
1790
1791 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1792 if (skb == NULL)
1793 goto out;
1794
1795 /* Reserve room for dummy headers, this skb can pass
1796 through good chunk of routing engine.
1797 */
1798 skb->mac.raw = skb->data;
1799 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1800
1801 memset(&fl, 0, sizeof(fl));
1802 if (rta[RTA_SRC-1])
1803 ipv6_addr_copy(&fl.fl6_src,
1804 (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]));
1805 if (rta[RTA_DST-1])
1806 ipv6_addr_copy(&fl.fl6_dst,
1807 (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]));
1808
1809 if (rta[RTA_IIF-1])
1810 memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
1811
1812 if (iif) {
1813 struct net_device *dev;
1814 dev = __dev_get_by_index(iif);
1815 if (!dev) {
1816 err = -ENODEV;
1817 goto out_free;
1818 }
1819 }
1820
1821 fl.oif = 0;
1822 if (rta[RTA_OIF-1])
1823 memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1824
1825 rt = (struct rt6_info*)ip6_route_output(NULL, &fl);
1826
1827 skb->dst = &rt->u.dst;
1828
1829 NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
1830 err = rt6_fill_node(skb, rt,
1831 &fl.fl6_dst, &fl.fl6_src,
1832 iif,
1833 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001834 nlh->nlmsg_seq, 0, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001835 if (err < 0) {
1836 err = -EMSGSIZE;
1837 goto out_free;
1838 }
1839
1840 err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
1841 if (err > 0)
1842 err = 0;
1843out:
1844 return err;
1845out_free:
1846 kfree_skb(skb);
1847 goto out;
1848}
1849
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001850void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh,
1851 struct netlink_skb_parms *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001852{
1853 struct sk_buff *skb;
1854 int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001855 u32 pid = current->pid;
1856 u32 seq = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001857
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001858 if (req)
1859 pid = req->pid;
1860 if (nlh)
1861 seq = nlh->nlmsg_seq;
1862
Linus Torvalds1da177e2005-04-16 15:20:36 -07001863 skb = alloc_skb(size, gfp_any());
1864 if (!skb) {
Patrick McHardyac6d4392005-08-14 19:29:52 -07001865 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, ENOBUFS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001866 return;
1867 }
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001868 if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0) < 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001869 kfree_skb(skb);
Patrick McHardyac6d4392005-08-14 19:29:52 -07001870 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, EINVAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001871 return;
1872 }
Patrick McHardyac6d4392005-08-14 19:29:52 -07001873 NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_ROUTE;
1874 netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_ROUTE, gfp_any());
Linus Torvalds1da177e2005-04-16 15:20:36 -07001875}
1876
1877/*
1878 * /proc
1879 */
1880
1881#ifdef CONFIG_PROC_FS
1882
1883#define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
1884
1885struct rt6_proc_arg
1886{
1887 char *buffer;
1888 int offset;
1889 int length;
1890 int skip;
1891 int len;
1892};
1893
1894static int rt6_info_route(struct rt6_info *rt, void *p_arg)
1895{
1896 struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
1897 int i;
1898
1899 if (arg->skip < arg->offset / RT6_INFO_LEN) {
1900 arg->skip++;
1901 return 0;
1902 }
1903
1904 if (arg->len >= arg->length)
1905 return 0;
1906
1907 for (i=0; i<16; i++) {
1908 sprintf(arg->buffer + arg->len, "%02x",
1909 rt->rt6i_dst.addr.s6_addr[i]);
1910 arg->len += 2;
1911 }
1912 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1913 rt->rt6i_dst.plen);
1914
1915#ifdef CONFIG_IPV6_SUBTREES
1916 for (i=0; i<16; i++) {
1917 sprintf(arg->buffer + arg->len, "%02x",
1918 rt->rt6i_src.addr.s6_addr[i]);
1919 arg->len += 2;
1920 }
1921 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1922 rt->rt6i_src.plen);
1923#else
1924 sprintf(arg->buffer + arg->len,
1925 "00000000000000000000000000000000 00 ");
1926 arg->len += 36;
1927#endif
1928
1929 if (rt->rt6i_nexthop) {
1930 for (i=0; i<16; i++) {
1931 sprintf(arg->buffer + arg->len, "%02x",
1932 rt->rt6i_nexthop->primary_key[i]);
1933 arg->len += 2;
1934 }
1935 } else {
1936 sprintf(arg->buffer + arg->len,
1937 "00000000000000000000000000000000");
1938 arg->len += 32;
1939 }
1940 arg->len += sprintf(arg->buffer + arg->len,
1941 " %08x %08x %08x %08x %8s\n",
1942 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
1943 rt->u.dst.__use, rt->rt6i_flags,
1944 rt->rt6i_dev ? rt->rt6i_dev->name : "");
1945 return 0;
1946}
1947
1948static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
1949{
1950 struct rt6_proc_arg arg;
1951 arg.buffer = buffer;
1952 arg.offset = offset;
1953 arg.length = length;
1954 arg.skip = 0;
1955 arg.len = 0;
1956
1957 read_lock_bh(&rt6_lock);
1958 fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg);
1959 read_unlock_bh(&rt6_lock);
1960
1961 *start = buffer;
1962 if (offset)
1963 *start += offset % RT6_INFO_LEN;
1964
1965 arg.len -= offset % RT6_INFO_LEN;
1966
1967 if (arg.len > length)
1968 arg.len = length;
1969 if (arg.len < 0)
1970 arg.len = 0;
1971
1972 return arg.len;
1973}
1974
Linus Torvalds1da177e2005-04-16 15:20:36 -07001975static int rt6_stats_seq_show(struct seq_file *seq, void *v)
1976{
1977 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
1978 rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
1979 rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
1980 rt6_stats.fib_rt_cache,
1981 atomic_read(&ip6_dst_ops.entries),
1982 rt6_stats.fib_discarded_routes);
1983
1984 return 0;
1985}
1986
1987static int rt6_stats_seq_open(struct inode *inode, struct file *file)
1988{
1989 return single_open(file, rt6_stats_seq_show, NULL);
1990}
1991
1992static struct file_operations rt6_stats_seq_fops = {
1993 .owner = THIS_MODULE,
1994 .open = rt6_stats_seq_open,
1995 .read = seq_read,
1996 .llseek = seq_lseek,
1997 .release = single_release,
1998};
1999#endif /* CONFIG_PROC_FS */
2000
2001#ifdef CONFIG_SYSCTL
2002
2003static int flush_delay;
2004
2005static
2006int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2007 void __user *buffer, size_t *lenp, loff_t *ppos)
2008{
2009 if (write) {
2010 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2011 fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
2012 return 0;
2013 } else
2014 return -EINVAL;
2015}
2016
2017ctl_table ipv6_route_table[] = {
2018 {
2019 .ctl_name = NET_IPV6_ROUTE_FLUSH,
2020 .procname = "flush",
2021 .data = &flush_delay,
2022 .maxlen = sizeof(int),
Dave Jones89c8b3a12005-04-28 12:11:49 -07002023 .mode = 0200,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002024 .proc_handler = &ipv6_sysctl_rtcache_flush
2025 },
2026 {
2027 .ctl_name = NET_IPV6_ROUTE_GC_THRESH,
2028 .procname = "gc_thresh",
2029 .data = &ip6_dst_ops.gc_thresh,
2030 .maxlen = sizeof(int),
2031 .mode = 0644,
2032 .proc_handler = &proc_dointvec,
2033 },
2034 {
2035 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE,
2036 .procname = "max_size",
2037 .data = &ip6_rt_max_size,
2038 .maxlen = sizeof(int),
2039 .mode = 0644,
2040 .proc_handler = &proc_dointvec,
2041 },
2042 {
2043 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2044 .procname = "gc_min_interval",
2045 .data = &ip6_rt_gc_min_interval,
2046 .maxlen = sizeof(int),
2047 .mode = 0644,
2048 .proc_handler = &proc_dointvec_jiffies,
2049 .strategy = &sysctl_jiffies,
2050 },
2051 {
2052 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT,
2053 .procname = "gc_timeout",
2054 .data = &ip6_rt_gc_timeout,
2055 .maxlen = sizeof(int),
2056 .mode = 0644,
2057 .proc_handler = &proc_dointvec_jiffies,
2058 .strategy = &sysctl_jiffies,
2059 },
2060 {
2061 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL,
2062 .procname = "gc_interval",
2063 .data = &ip6_rt_gc_interval,
2064 .maxlen = sizeof(int),
2065 .mode = 0644,
2066 .proc_handler = &proc_dointvec_jiffies,
2067 .strategy = &sysctl_jiffies,
2068 },
2069 {
2070 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY,
2071 .procname = "gc_elasticity",
2072 .data = &ip6_rt_gc_elasticity,
2073 .maxlen = sizeof(int),
2074 .mode = 0644,
2075 .proc_handler = &proc_dointvec_jiffies,
2076 .strategy = &sysctl_jiffies,
2077 },
2078 {
2079 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES,
2080 .procname = "mtu_expires",
2081 .data = &ip6_rt_mtu_expires,
2082 .maxlen = sizeof(int),
2083 .mode = 0644,
2084 .proc_handler = &proc_dointvec_jiffies,
2085 .strategy = &sysctl_jiffies,
2086 },
2087 {
2088 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS,
2089 .procname = "min_adv_mss",
2090 .data = &ip6_rt_min_advmss,
2091 .maxlen = sizeof(int),
2092 .mode = 0644,
2093 .proc_handler = &proc_dointvec_jiffies,
2094 .strategy = &sysctl_jiffies,
2095 },
2096 {
2097 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2098 .procname = "gc_min_interval_ms",
2099 .data = &ip6_rt_gc_min_interval,
2100 .maxlen = sizeof(int),
2101 .mode = 0644,
2102 .proc_handler = &proc_dointvec_ms_jiffies,
2103 .strategy = &sysctl_ms_jiffies,
2104 },
2105 { .ctl_name = 0 }
2106};
2107
2108#endif
2109
2110void __init ip6_route_init(void)
2111{
2112 struct proc_dir_entry *p;
2113
2114 ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
2115 sizeof(struct rt6_info),
2116 0, SLAB_HWCACHE_ALIGN,
2117 NULL, NULL);
2118 if (!ip6_dst_ops.kmem_cachep)
2119 panic("cannot create ip6_dst_cache");
2120
2121 fib6_init();
2122#ifdef CONFIG_PROC_FS
2123 p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2124 if (p)
2125 p->owner = THIS_MODULE;
2126
2127 proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2128#endif
2129#ifdef CONFIG_XFRM
2130 xfrm6_init();
2131#endif
2132}
2133
2134void ip6_route_cleanup(void)
2135{
2136#ifdef CONFIG_PROC_FS
2137 proc_net_remove("ipv6_route");
2138 proc_net_remove("rt6_stats");
2139#endif
2140#ifdef CONFIG_XFRM
2141 xfrm6_fini();
2142#endif
2143 rt6_ifdown(NULL);
2144 fib6_gc_cleanup();
2145 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2146}