blob: 0190e39096b978692349bd0dbd3e5271a828fab3 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
16/* Changes:
17 *
18 * YOSHIFUJI Hideaki @USAGI
19 * reworked default router selection.
20 * - respect outgoing interface
21 * - select from (probably) reachable routers (i.e.
22 * routers in REACHABLE, STALE, DELAY or PROBE states).
23 * - always select the same router if it is (probably)
24 * reachable. otherwise, round-robin the list.
25 */
26
Randy Dunlap4fc268d2006-01-11 12:17:47 -080027#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070028#include <linux/config.h>
29#include <linux/errno.h>
30#include <linux/types.h>
31#include <linux/times.h>
32#include <linux/socket.h>
33#include <linux/sockios.h>
34#include <linux/net.h>
35#include <linux/route.h>
36#include <linux/netdevice.h>
37#include <linux/in6.h>
38#include <linux/init.h>
39#include <linux/netlink.h>
40#include <linux/if_arp.h>
41
42#ifdef CONFIG_PROC_FS
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
45#endif
46
47#include <net/snmp.h>
48#include <net/ipv6.h>
49#include <net/ip6_fib.h>
50#include <net/ip6_route.h>
51#include <net/ndisc.h>
52#include <net/addrconf.h>
53#include <net/tcp.h>
54#include <linux/rtnetlink.h>
55#include <net/dst.h>
56#include <net/xfrm.h>
57
58#include <asm/uaccess.h>
59
60#ifdef CONFIG_SYSCTL
61#include <linux/sysctl.h>
62#endif
63
64/* Set to 3 to get tracing. */
65#define RT6_DEBUG 2
66
67#if RT6_DEBUG >= 3
68#define RDBG(x) printk x
69#define RT6_TRACE(x...) printk(KERN_DEBUG x)
70#else
71#define RDBG(x)
72#define RT6_TRACE(x...) do { ; } while (0)
73#endif
74
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -080075#define CLONE_OFFLINK_ROUTE 0
Linus Torvalds1da177e2005-04-16 15:20:36 -070076
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -080077#define RT6_SELECT_F_IFACE 0x1
78#define RT6_SELECT_F_REACHABLE 0x2
79
Linus Torvalds1da177e2005-04-16 15:20:36 -070080static int ip6_rt_max_size = 4096;
81static int ip6_rt_gc_min_interval = HZ / 2;
82static int ip6_rt_gc_timeout = 60*HZ;
83int ip6_rt_gc_interval = 30*HZ;
84static int ip6_rt_gc_elasticity = 9;
85static int ip6_rt_mtu_expires = 10*60*HZ;
86static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
87
88static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
89static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
90static struct dst_entry *ip6_negative_advice(struct dst_entry *);
91static void ip6_dst_destroy(struct dst_entry *);
92static void ip6_dst_ifdown(struct dst_entry *,
93 struct net_device *dev, int how);
94static int ip6_dst_gc(void);
95
96static int ip6_pkt_discard(struct sk_buff *skb);
97static int ip6_pkt_discard_out(struct sk_buff *skb);
98static void ip6_link_failure(struct sk_buff *skb);
99static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
100
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800101#ifdef CONFIG_IPV6_ROUTE_INFO
102static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
103 struct in6_addr *gwaddr, int ifindex,
104 unsigned pref);
105static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
106 struct in6_addr *gwaddr, int ifindex);
107#endif
108
Linus Torvalds1da177e2005-04-16 15:20:36 -0700109static struct dst_ops ip6_dst_ops = {
110 .family = AF_INET6,
111 .protocol = __constant_htons(ETH_P_IPV6),
112 .gc = ip6_dst_gc,
113 .gc_thresh = 1024,
114 .check = ip6_dst_check,
115 .destroy = ip6_dst_destroy,
116 .ifdown = ip6_dst_ifdown,
117 .negative_advice = ip6_negative_advice,
118 .link_failure = ip6_link_failure,
119 .update_pmtu = ip6_rt_update_pmtu,
120 .entry_size = sizeof(struct rt6_info),
121};
122
123struct rt6_info ip6_null_entry = {
124 .u = {
125 .dst = {
126 .__refcnt = ATOMIC_INIT(1),
127 .__use = 1,
128 .dev = &loopback_dev,
129 .obsolete = -1,
130 .error = -ENETUNREACH,
131 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
132 .input = ip6_pkt_discard,
133 .output = ip6_pkt_discard_out,
134 .ops = &ip6_dst_ops,
135 .path = (struct dst_entry*)&ip6_null_entry,
136 }
137 },
138 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
139 .rt6i_metric = ~(u32) 0,
140 .rt6i_ref = ATOMIC_INIT(1),
141};
142
143struct fib6_node ip6_routing_table = {
144 .leaf = &ip6_null_entry,
145 .fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
146};
147
148/* Protects all the ip6 fib */
149
150DEFINE_RWLOCK(rt6_lock);
151
152
153/* allocate dst with ip6_dst_ops */
154static __inline__ struct rt6_info *ip6_dst_alloc(void)
155{
156 return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
157}
158
159static void ip6_dst_destroy(struct dst_entry *dst)
160{
161 struct rt6_info *rt = (struct rt6_info *)dst;
162 struct inet6_dev *idev = rt->rt6i_idev;
163
164 if (idev != NULL) {
165 rt->rt6i_idev = NULL;
166 in6_dev_put(idev);
167 }
168}
169
170static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
171 int how)
172{
173 struct rt6_info *rt = (struct rt6_info *)dst;
174 struct inet6_dev *idev = rt->rt6i_idev;
175
176 if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
177 struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
178 if (loopback_idev != NULL) {
179 rt->rt6i_idev = loopback_idev;
180 in6_dev_put(idev);
181 }
182 }
183}
184
185static __inline__ int rt6_check_expired(const struct rt6_info *rt)
186{
187 return (rt->rt6i_flags & RTF_EXPIRES &&
188 time_after(jiffies, rt->rt6i_expires));
189}
190
191/*
192 * Route lookup. Any rt6_lock is implied.
193 */
194
195static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
196 int oif,
197 int strict)
198{
199 struct rt6_info *local = NULL;
200 struct rt6_info *sprt;
201
202 if (oif) {
203 for (sprt = rt; sprt; sprt = sprt->u.next) {
204 struct net_device *dev = sprt->rt6i_dev;
205 if (dev->ifindex == oif)
206 return sprt;
207 if (dev->flags & IFF_LOOPBACK) {
208 if (sprt->rt6i_idev == NULL ||
209 sprt->rt6i_idev->dev->ifindex != oif) {
210 if (strict && oif)
211 continue;
212 if (local && (!oif ||
213 local->rt6i_idev->dev->ifindex == oif))
214 continue;
215 }
216 local = sprt;
217 }
218 }
219
220 if (local)
221 return local;
222
223 if (strict)
224 return &ip6_null_entry;
225 }
226 return rt;
227}
228
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800229#ifdef CONFIG_IPV6_ROUTER_PREF
230static void rt6_probe(struct rt6_info *rt)
231{
232 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
233 /*
234 * Okay, this does not seem to be appropriate
235 * for now, however, we need to check if it
236 * is really so; aka Router Reachability Probing.
237 *
238 * Router Reachability Probe MUST be rate-limited
239 * to no more than one per minute.
240 */
241 if (!neigh || (neigh->nud_state & NUD_VALID))
242 return;
243 read_lock_bh(&neigh->lock);
244 if (!(neigh->nud_state & NUD_VALID) &&
YOSHIFUJI Hideaki52e16352006-03-20 17:05:47 -0800245 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800246 struct in6_addr mcaddr;
247 struct in6_addr *target;
248
249 neigh->updated = jiffies;
250 read_unlock_bh(&neigh->lock);
251
252 target = (struct in6_addr *)&neigh->primary_key;
253 addrconf_addr_solict_mult(target, &mcaddr);
254 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
255 } else
256 read_unlock_bh(&neigh->lock);
257}
258#else
259static inline void rt6_probe(struct rt6_info *rt)
260{
261 return;
262}
263#endif
264
Linus Torvalds1da177e2005-04-16 15:20:36 -0700265/*
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800266 * Default Router Selection (RFC 2461 6.3.6)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700267 */
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800268static int inline rt6_check_dev(struct rt6_info *rt, int oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700269{
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800270 struct net_device *dev = rt->rt6i_dev;
271 if (!oif || dev->ifindex == oif)
272 return 2;
273 if ((dev->flags & IFF_LOOPBACK) &&
274 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
275 return 1;
276 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700277}
278
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800279static int inline rt6_check_neigh(struct rt6_info *rt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700280{
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800281 struct neighbour *neigh = rt->rt6i_nexthop;
282 int m = 0;
283 if (neigh) {
284 read_lock_bh(&neigh->lock);
285 if (neigh->nud_state & NUD_VALID)
286 m = 1;
287 read_unlock_bh(&neigh->lock);
288 }
289 return m;
290}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700291
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800292static int rt6_score_route(struct rt6_info *rt, int oif,
293 int strict)
294{
295 int m = rt6_check_dev(rt, oif);
296 if (!m && (strict & RT6_SELECT_F_IFACE))
297 return -1;
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -0800298#ifdef CONFIG_IPV6_ROUTER_PREF
299 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
300#endif
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800301 if (rt6_check_neigh(rt))
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -0800302 m |= 16;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800303 else if (strict & RT6_SELECT_F_REACHABLE)
304 return -1;
305 return m;
306}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700307
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800308static struct rt6_info *rt6_select(struct rt6_info **head, int oif,
309 int strict)
310{
311 struct rt6_info *match = NULL, *last = NULL;
312 struct rt6_info *rt, *rt0 = *head;
313 u32 metric;
314 int mpri = -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700315
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800316 RT6_TRACE("%s(head=%p(*head=%p), oif=%d)\n",
317 __FUNCTION__, head, head ? *head : NULL, oif);
318
319 for (rt = rt0, metric = rt0->rt6i_metric;
YOSHIFUJI Hideakic302e6d2006-04-28 15:59:15 -0700320 rt && rt->rt6i_metric == metric && (!last || rt != rt0);
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800321 rt = rt->u.next) {
322 int m;
323
324 if (rt6_check_expired(rt))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700325 continue;
326
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800327 last = rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700328
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800329 m = rt6_score_route(rt, oif, strict);
330 if (m < 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700331 continue;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700332
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800333 if (m > mpri) {
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800334 rt6_probe(match);
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800335 match = rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700336 mpri = m;
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800337 } else {
338 rt6_probe(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700339 }
340 }
341
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800342 if (!match &&
343 (strict & RT6_SELECT_F_REACHABLE) &&
344 last && last != rt0) {
345 /* no entries matched; do round-robin */
YOSHIFUJI Hideakic302e6d2006-04-28 15:59:15 -0700346 static spinlock_t lock = SPIN_LOCK_UNLOCKED;
347 spin_lock(&lock);
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800348 *head = rt0->u.next;
349 rt0->u.next = last->u.next;
350 last->u.next = rt0;
YOSHIFUJI Hideakic302e6d2006-04-28 15:59:15 -0700351 spin_unlock(&lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700352 }
353
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800354 RT6_TRACE("%s() => %p, score=%d\n",
355 __FUNCTION__, match, mpri);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700356
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800357 return (match ? match : &ip6_null_entry);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700358}
359
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800360#ifdef CONFIG_IPV6_ROUTE_INFO
361int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
362 struct in6_addr *gwaddr)
363{
364 struct route_info *rinfo = (struct route_info *) opt;
365 struct in6_addr prefix_buf, *prefix;
366 unsigned int pref;
367 u32 lifetime;
368 struct rt6_info *rt;
369
370 if (len < sizeof(struct route_info)) {
371 return -EINVAL;
372 }
373
374 /* Sanity check for prefix_len and length */
375 if (rinfo->length > 3) {
376 return -EINVAL;
377 } else if (rinfo->prefix_len > 128) {
378 return -EINVAL;
379 } else if (rinfo->prefix_len > 64) {
380 if (rinfo->length < 2) {
381 return -EINVAL;
382 }
383 } else if (rinfo->prefix_len > 0) {
384 if (rinfo->length < 1) {
385 return -EINVAL;
386 }
387 }
388
389 pref = rinfo->route_pref;
390 if (pref == ICMPV6_ROUTER_PREF_INVALID)
391 pref = ICMPV6_ROUTER_PREF_MEDIUM;
392
393 lifetime = htonl(rinfo->lifetime);
394 if (lifetime == 0xffffffff) {
395 /* infinity */
396 } else if (lifetime > 0x7fffffff/HZ) {
397 /* Avoid arithmetic overflow */
398 lifetime = 0x7fffffff/HZ - 1;
399 }
400
401 if (rinfo->length == 3)
402 prefix = (struct in6_addr *)rinfo->prefix;
403 else {
404 /* this function is safe */
405 ipv6_addr_prefix(&prefix_buf,
406 (struct in6_addr *)rinfo->prefix,
407 rinfo->prefix_len);
408 prefix = &prefix_buf;
409 }
410
411 rt = rt6_get_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex);
412
413 if (rt && !lifetime) {
414 ip6_del_rt(rt, NULL, NULL, NULL);
415 rt = NULL;
416 }
417
418 if (!rt && lifetime)
419 rt = rt6_add_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
420 pref);
421 else if (rt)
422 rt->rt6i_flags = RTF_ROUTEINFO |
423 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
424
425 if (rt) {
426 if (lifetime == 0xffffffff) {
427 rt->rt6i_flags &= ~RTF_EXPIRES;
428 } else {
429 rt->rt6i_expires = jiffies + HZ * lifetime;
430 rt->rt6i_flags |= RTF_EXPIRES;
431 }
432 dst_release(&rt->u.dst);
433 }
434 return 0;
435}
436#endif
437
Linus Torvalds1da177e2005-04-16 15:20:36 -0700438struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
439 int oif, int strict)
440{
441 struct fib6_node *fn;
442 struct rt6_info *rt;
443
444 read_lock_bh(&rt6_lock);
445 fn = fib6_lookup(&ip6_routing_table, daddr, saddr);
446 rt = rt6_device_match(fn->leaf, oif, strict);
447 dst_hold(&rt->u.dst);
448 rt->u.dst.__use++;
449 read_unlock_bh(&rt6_lock);
450
451 rt->u.dst.lastuse = jiffies;
452 if (rt->u.dst.error == 0)
453 return rt;
454 dst_release(&rt->u.dst);
455 return NULL;
456}
457
458/* ip6_ins_rt is called with FREE rt6_lock.
459 It takes new route entry, the addition fails by any reason the
460 route is freed. In any case, if caller does not hold it, it may
461 be destroyed.
462 */
463
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -0700464int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh,
465 void *_rtattr, struct netlink_skb_parms *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700466{
467 int err;
468
469 write_lock_bh(&rt6_lock);
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -0700470 err = fib6_add(&ip6_routing_table, rt, nlh, _rtattr, req);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700471 write_unlock_bh(&rt6_lock);
472
473 return err;
474}
475
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800476static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
477 struct in6_addr *saddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700478{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700479 struct rt6_info *rt;
480
481 /*
482 * Clone the route.
483 */
484
485 rt = ip6_rt_copy(ort);
486
487 if (rt) {
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900488 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
489 if (rt->rt6i_dst.plen != 128 &&
490 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
491 rt->rt6i_flags |= RTF_ANYCAST;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700492 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900493 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700494
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900495 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700496 rt->rt6i_dst.plen = 128;
497 rt->rt6i_flags |= RTF_CACHE;
498 rt->u.dst.flags |= DST_HOST;
499
500#ifdef CONFIG_IPV6_SUBTREES
501 if (rt->rt6i_src.plen && saddr) {
502 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
503 rt->rt6i_src.plen = 128;
504 }
505#endif
506
507 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
508
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800509 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700510
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800511 return rt;
512}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700513
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800514static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
515{
516 struct rt6_info *rt = ip6_rt_copy(ort);
517 if (rt) {
518 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
519 rt->rt6i_dst.plen = 128;
520 rt->rt6i_flags |= RTF_CACHE;
521 if (rt->rt6i_flags & RTF_REJECT)
522 rt->u.dst.error = ort->u.dst.error;
523 rt->u.dst.flags |= DST_HOST;
524 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
525 }
526 return rt;
527}
528
Linus Torvalds1da177e2005-04-16 15:20:36 -0700529#define BACKTRACK() \
YOSHIFUJI Hideakibb133962006-03-20 17:01:43 -0800530if (rt == &ip6_null_entry) { \
Linus Torvalds1da177e2005-04-16 15:20:36 -0700531 while ((fn = fn->parent) != NULL) { \
532 if (fn->fn_flags & RTN_ROOT) { \
Linus Torvalds1da177e2005-04-16 15:20:36 -0700533 goto out; \
534 } \
535 if (fn->fn_flags & RTN_RTINFO) \
536 goto restart; \
537 } \
538}
539
540
541void ip6_route_input(struct sk_buff *skb)
542{
543 struct fib6_node *fn;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800544 struct rt6_info *rt, *nrt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700545 int strict;
546 int attempts = 3;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800547 int err;
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800548 int reachable = RT6_SELECT_F_REACHABLE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700549
YOSHIFUJI Hideaki118f8c12006-03-20 17:01:06 -0800550 strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL) ? RT6_SELECT_F_IFACE : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700551
552relookup:
553 read_lock_bh(&rt6_lock);
554
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800555restart_2:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700556 fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr,
557 &skb->nh.ipv6h->saddr);
558
559restart:
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800560 rt = rt6_select(&fn->leaf, skb->dev->ifindex, strict | reachable);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700561 BACKTRACK();
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800562 if (rt == &ip6_null_entry ||
563 rt->rt6i_flags & RTF_CACHE)
YOSHIFUJI Hideaki1ddef0442006-03-20 17:01:24 -0800564 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700565
YOSHIFUJI Hideakifb9de912006-03-20 16:59:08 -0800566 dst_hold(&rt->u.dst);
567 read_unlock_bh(&rt6_lock);
568
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800569 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
570 nrt = rt6_alloc_cow(rt, &skb->nh.ipv6h->daddr, &skb->nh.ipv6h->saddr);
571 else {
572#if CLONE_OFFLINK_ROUTE
573 nrt = rt6_alloc_clone(rt, &skb->nh.ipv6h->daddr);
574#else
575 goto out2;
576#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700577 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700578
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800579 dst_release(&rt->u.dst);
580 rt = nrt ? : &ip6_null_entry;
581
582 dst_hold(&rt->u.dst);
583 if (nrt) {
584 err = ip6_ins_rt(nrt, NULL, NULL, &NETLINK_CB(skb));
585 if (!err)
586 goto out2;
587 }
588
589 if (--attempts <= 0)
590 goto out2;
591
592 /*
593 * Race condition! In the gap, when rt6_lock was
594 * released someone could insert this route. Relookup.
595 */
596 dst_release(&rt->u.dst);
597 goto relookup;
598
599out:
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800600 if (reachable) {
601 reachable = 0;
602 goto restart_2;
603 }
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800604 dst_hold(&rt->u.dst);
605 read_unlock_bh(&rt6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700606out2:
607 rt->u.dst.lastuse = jiffies;
608 rt->u.dst.__use++;
609 skb->dst = (struct dst_entry *) rt;
YOSHIFUJI Hideakifb9de912006-03-20 16:59:08 -0800610 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700611}
612
613struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
614{
615 struct fib6_node *fn;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800616 struct rt6_info *rt, *nrt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700617 int strict;
618 int attempts = 3;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800619 int err;
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800620 int reachable = RT6_SELECT_F_REACHABLE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700621
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800622 strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL) ? RT6_SELECT_F_IFACE : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700623
624relookup:
625 read_lock_bh(&rt6_lock);
626
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800627restart_2:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700628 fn = fib6_lookup(&ip6_routing_table, &fl->fl6_dst, &fl->fl6_src);
629
630restart:
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800631 rt = rt6_select(&fn->leaf, fl->oif, strict | reachable);
YOSHIFUJI Hideaki1ddef0442006-03-20 17:01:24 -0800632 BACKTRACK();
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800633 if (rt == &ip6_null_entry ||
634 rt->rt6i_flags & RTF_CACHE)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700635 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700636
YOSHIFUJI Hideakifb9de912006-03-20 16:59:08 -0800637 dst_hold(&rt->u.dst);
638 read_unlock_bh(&rt6_lock);
639
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800640 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
YOSHIFUJI Hideakie40cf352006-03-20 16:59:27 -0800641 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800642 else {
643#if CLONE_OFFLINK_ROUTE
644 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
645#else
646 goto out2;
647#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700648 }
YOSHIFUJI Hideakie40cf352006-03-20 16:59:27 -0800649
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800650 dst_release(&rt->u.dst);
651 rt = nrt ? : &ip6_null_entry;
652
653 dst_hold(&rt->u.dst);
654 if (nrt) {
655 err = ip6_ins_rt(nrt, NULL, NULL, NULL);
656 if (!err)
657 goto out2;
658 }
659
660 if (--attempts <= 0)
661 goto out2;
662
663 /*
664 * Race condition! In the gap, when rt6_lock was
665 * released someone could insert this route. Relookup.
666 */
667 dst_release(&rt->u.dst);
668 goto relookup;
669
670out:
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800671 if (reachable) {
672 reachable = 0;
673 goto restart_2;
674 }
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800675 dst_hold(&rt->u.dst);
676 read_unlock_bh(&rt6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700677out2:
678 rt->u.dst.lastuse = jiffies;
679 rt->u.dst.__use++;
680 return &rt->u.dst;
681}
682
683
684/*
685 * Destination cache support functions
686 */
687
688static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
689{
690 struct rt6_info *rt;
691
692 rt = (struct rt6_info *) dst;
693
694 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
695 return dst;
696
697 return NULL;
698}
699
700static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
701{
702 struct rt6_info *rt = (struct rt6_info *) dst;
703
704 if (rt) {
705 if (rt->rt6i_flags & RTF_CACHE)
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -0700706 ip6_del_rt(rt, NULL, NULL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700707 else
708 dst_release(dst);
709 }
710 return NULL;
711}
712
713static void ip6_link_failure(struct sk_buff *skb)
714{
715 struct rt6_info *rt;
716
717 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
718
719 rt = (struct rt6_info *) skb->dst;
720 if (rt) {
721 if (rt->rt6i_flags&RTF_CACHE) {
722 dst_set_expires(&rt->u.dst, 0);
723 rt->rt6i_flags |= RTF_EXPIRES;
724 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
725 rt->rt6i_node->fn_sernum = -1;
726 }
727}
728
729static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
730{
731 struct rt6_info *rt6 = (struct rt6_info*)dst;
732
733 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
734 rt6->rt6i_flags |= RTF_MODIFIED;
735 if (mtu < IPV6_MIN_MTU) {
736 mtu = IPV6_MIN_MTU;
737 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
738 }
739 dst->metrics[RTAX_MTU-1] = mtu;
740 }
741}
742
743/* Protected by rt6_lock. */
744static struct dst_entry *ndisc_dst_gc_list;
745static int ipv6_get_mtu(struct net_device *dev);
746
747static inline unsigned int ipv6_advmss(unsigned int mtu)
748{
749 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
750
751 if (mtu < ip6_rt_min_advmss)
752 mtu = ip6_rt_min_advmss;
753
754 /*
755 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
756 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
757 * IPV6_MAXPLEN is also valid and means: "any MSS,
758 * rely only on pmtu discovery"
759 */
760 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
761 mtu = IPV6_MAXPLEN;
762 return mtu;
763}
764
765struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
766 struct neighbour *neigh,
767 struct in6_addr *addr,
768 int (*output)(struct sk_buff *))
769{
770 struct rt6_info *rt;
771 struct inet6_dev *idev = in6_dev_get(dev);
772
773 if (unlikely(idev == NULL))
774 return NULL;
775
776 rt = ip6_dst_alloc();
777 if (unlikely(rt == NULL)) {
778 in6_dev_put(idev);
779 goto out;
780 }
781
782 dev_hold(dev);
783 if (neigh)
784 neigh_hold(neigh);
785 else
786 neigh = ndisc_get_neigh(dev, addr);
787
788 rt->rt6i_dev = dev;
789 rt->rt6i_idev = idev;
790 rt->rt6i_nexthop = neigh;
791 atomic_set(&rt->u.dst.__refcnt, 1);
792 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
793 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
794 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
795 rt->u.dst.output = output;
796
797#if 0 /* there's no chance to use these for ndisc */
798 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
799 ? DST_HOST
800 : 0;
801 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
802 rt->rt6i_dst.plen = 128;
803#endif
804
805 write_lock_bh(&rt6_lock);
806 rt->u.dst.next = ndisc_dst_gc_list;
807 ndisc_dst_gc_list = &rt->u.dst;
808 write_unlock_bh(&rt6_lock);
809
810 fib6_force_start_gc();
811
812out:
813 return (struct dst_entry *)rt;
814}
815
816int ndisc_dst_gc(int *more)
817{
818 struct dst_entry *dst, *next, **pprev;
819 int freed;
820
821 next = NULL;
822 pprev = &ndisc_dst_gc_list;
823 freed = 0;
824 while ((dst = *pprev) != NULL) {
825 if (!atomic_read(&dst->__refcnt)) {
826 *pprev = dst->next;
827 dst_free(dst);
828 freed++;
829 } else {
830 pprev = &dst->next;
831 (*more)++;
832 }
833 }
834
835 return freed;
836}
837
838static int ip6_dst_gc(void)
839{
840 static unsigned expire = 30*HZ;
841 static unsigned long last_gc;
842 unsigned long now = jiffies;
843
844 if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
845 atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
846 goto out;
847
848 expire++;
849 fib6_run_gc(expire);
850 last_gc = now;
851 if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
852 expire = ip6_rt_gc_timeout>>1;
853
854out:
855 expire -= expire>>ip6_rt_gc_elasticity;
856 return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
857}
858
859/* Clean host part of a prefix. Not necessary in radix tree,
860 but results in cleaner routing tables.
861
862 Remove it only when all the things will work!
863 */
864
865static int ipv6_get_mtu(struct net_device *dev)
866{
867 int mtu = IPV6_MIN_MTU;
868 struct inet6_dev *idev;
869
870 idev = in6_dev_get(dev);
871 if (idev) {
872 mtu = idev->cnf.mtu6;
873 in6_dev_put(idev);
874 }
875 return mtu;
876}
877
878int ipv6_get_hoplimit(struct net_device *dev)
879{
880 int hoplimit = ipv6_devconf.hop_limit;
881 struct inet6_dev *idev;
882
883 idev = in6_dev_get(dev);
884 if (idev) {
885 hoplimit = idev->cnf.hop_limit;
886 in6_dev_put(idev);
887 }
888 return hoplimit;
889}
890
891/*
892 *
893 */
894
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -0700895int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
896 void *_rtattr, struct netlink_skb_parms *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700897{
898 int err;
899 struct rtmsg *r;
900 struct rtattr **rta;
901 struct rt6_info *rt = NULL;
902 struct net_device *dev = NULL;
903 struct inet6_dev *idev = NULL;
904 int addr_type;
905
906 rta = (struct rtattr **) _rtattr;
907
908 if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128)
909 return -EINVAL;
910#ifndef CONFIG_IPV6_SUBTREES
911 if (rtmsg->rtmsg_src_len)
912 return -EINVAL;
913#endif
914 if (rtmsg->rtmsg_ifindex) {
915 err = -ENODEV;
916 dev = dev_get_by_index(rtmsg->rtmsg_ifindex);
917 if (!dev)
918 goto out;
919 idev = in6_dev_get(dev);
920 if (!idev)
921 goto out;
922 }
923
924 if (rtmsg->rtmsg_metric == 0)
925 rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
926
927 rt = ip6_dst_alloc();
928
929 if (rt == NULL) {
930 err = -ENOMEM;
931 goto out;
932 }
933
934 rt->u.dst.obsolete = -1;
YOSHIFUJI Hideaki3dd4bc62005-12-19 14:02:45 -0800935 rt->rt6i_expires = jiffies + clock_t_to_jiffies(rtmsg->rtmsg_info);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700936 if (nlh && (r = NLMSG_DATA(nlh))) {
937 rt->rt6i_protocol = r->rtm_protocol;
938 } else {
939 rt->rt6i_protocol = RTPROT_BOOT;
940 }
941
942 addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst);
943
944 if (addr_type & IPV6_ADDR_MULTICAST)
945 rt->u.dst.input = ip6_mc_input;
946 else
947 rt->u.dst.input = ip6_forward;
948
949 rt->u.dst.output = ip6_output;
950
951 ipv6_addr_prefix(&rt->rt6i_dst.addr,
952 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len);
953 rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len;
954 if (rt->rt6i_dst.plen == 128)
955 rt->u.dst.flags = DST_HOST;
956
957#ifdef CONFIG_IPV6_SUBTREES
958 ipv6_addr_prefix(&rt->rt6i_src.addr,
959 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
960 rt->rt6i_src.plen = rtmsg->rtmsg_src_len;
961#endif
962
963 rt->rt6i_metric = rtmsg->rtmsg_metric;
964
965 /* We cannot add true routes via loopback here,
966 they would result in kernel looping; promote them to reject routes
967 */
968 if ((rtmsg->rtmsg_flags&RTF_REJECT) ||
969 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
970 /* hold loopback dev/idev if we haven't done so. */
971 if (dev != &loopback_dev) {
972 if (dev) {
973 dev_put(dev);
974 in6_dev_put(idev);
975 }
976 dev = &loopback_dev;
977 dev_hold(dev);
978 idev = in6_dev_get(dev);
979 if (!idev) {
980 err = -ENODEV;
981 goto out;
982 }
983 }
984 rt->u.dst.output = ip6_pkt_discard_out;
985 rt->u.dst.input = ip6_pkt_discard;
986 rt->u.dst.error = -ENETUNREACH;
987 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
988 goto install_route;
989 }
990
991 if (rtmsg->rtmsg_flags & RTF_GATEWAY) {
992 struct in6_addr *gw_addr;
993 int gwa_type;
994
995 gw_addr = &rtmsg->rtmsg_gateway;
996 ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway);
997 gwa_type = ipv6_addr_type(gw_addr);
998
999 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1000 struct rt6_info *grt;
1001
1002 /* IPv6 strictly inhibits using not link-local
1003 addresses as nexthop address.
1004 Otherwise, router will not able to send redirects.
1005 It is very good, but in some (rare!) circumstances
1006 (SIT, PtP, NBMA NOARP links) it is handy to allow
1007 some exceptions. --ANK
1008 */
1009 err = -EINVAL;
1010 if (!(gwa_type&IPV6_ADDR_UNICAST))
1011 goto out;
1012
1013 grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1);
1014
1015 err = -EHOSTUNREACH;
1016 if (grt == NULL)
1017 goto out;
1018 if (dev) {
1019 if (dev != grt->rt6i_dev) {
1020 dst_release(&grt->u.dst);
1021 goto out;
1022 }
1023 } else {
1024 dev = grt->rt6i_dev;
1025 idev = grt->rt6i_idev;
1026 dev_hold(dev);
1027 in6_dev_hold(grt->rt6i_idev);
1028 }
1029 if (!(grt->rt6i_flags&RTF_GATEWAY))
1030 err = 0;
1031 dst_release(&grt->u.dst);
1032
1033 if (err)
1034 goto out;
1035 }
1036 err = -EINVAL;
1037 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1038 goto out;
1039 }
1040
1041 err = -ENODEV;
1042 if (dev == NULL)
1043 goto out;
1044
1045 if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) {
1046 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1047 if (IS_ERR(rt->rt6i_nexthop)) {
1048 err = PTR_ERR(rt->rt6i_nexthop);
1049 rt->rt6i_nexthop = NULL;
1050 goto out;
1051 }
1052 }
1053
1054 rt->rt6i_flags = rtmsg->rtmsg_flags;
1055
1056install_route:
1057 if (rta && rta[RTA_METRICS-1]) {
1058 int attrlen = RTA_PAYLOAD(rta[RTA_METRICS-1]);
1059 struct rtattr *attr = RTA_DATA(rta[RTA_METRICS-1]);
1060
1061 while (RTA_OK(attr, attrlen)) {
1062 unsigned flavor = attr->rta_type;
1063 if (flavor) {
1064 if (flavor > RTAX_MAX) {
1065 err = -EINVAL;
1066 goto out;
1067 }
1068 rt->u.dst.metrics[flavor-1] =
1069 *(u32 *)RTA_DATA(attr);
1070 }
1071 attr = RTA_NEXT(attr, attrlen);
1072 }
1073 }
1074
1075 if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
1076 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1077 if (!rt->u.dst.metrics[RTAX_MTU-1])
1078 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1079 if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
1080 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1081 rt->u.dst.dev = dev;
1082 rt->rt6i_idev = idev;
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001083 return ip6_ins_rt(rt, nlh, _rtattr, req);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001084
1085out:
1086 if (dev)
1087 dev_put(dev);
1088 if (idev)
1089 in6_dev_put(idev);
1090 if (rt)
1091 dst_free((struct dst_entry *) rt);
1092 return err;
1093}
1094
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001095int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001096{
1097 int err;
1098
1099 write_lock_bh(&rt6_lock);
1100
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001101 err = fib6_del(rt, nlh, _rtattr, req);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001102 dst_release(&rt->u.dst);
1103
1104 write_unlock_bh(&rt6_lock);
1105
1106 return err;
1107}
1108
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001109static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001110{
1111 struct fib6_node *fn;
1112 struct rt6_info *rt;
1113 int err = -ESRCH;
1114
1115 read_lock_bh(&rt6_lock);
1116
1117 fn = fib6_locate(&ip6_routing_table,
1118 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len,
1119 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
1120
1121 if (fn) {
1122 for (rt = fn->leaf; rt; rt = rt->u.next) {
1123 if (rtmsg->rtmsg_ifindex &&
1124 (rt->rt6i_dev == NULL ||
1125 rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex))
1126 continue;
1127 if (rtmsg->rtmsg_flags&RTF_GATEWAY &&
1128 !ipv6_addr_equal(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway))
1129 continue;
1130 if (rtmsg->rtmsg_metric &&
1131 rtmsg->rtmsg_metric != rt->rt6i_metric)
1132 continue;
1133 dst_hold(&rt->u.dst);
1134 read_unlock_bh(&rt6_lock);
1135
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001136 return ip6_del_rt(rt, nlh, _rtattr, req);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001137 }
1138 }
1139 read_unlock_bh(&rt6_lock);
1140
1141 return err;
1142}
1143
1144/*
1145 * Handle redirects
1146 */
1147void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
1148 struct neighbour *neigh, u8 *lladdr, int on_link)
1149{
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001150 struct rt6_info *rt, *nrt = NULL;
1151 int strict;
1152 struct fib6_node *fn;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001153
1154 /*
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001155 * Get the "current" route for this destination and
1156 * check if the redirect has come from approriate router.
1157 *
1158 * RFC 2461 specifies that redirects should only be
1159 * accepted if they come from the nexthop to the target.
1160 * Due to the way the routes are chosen, this notion
1161 * is a bit fuzzy and one might need to check all possible
1162 * routes.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001163 */
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001164 strict = ipv6_addr_type(dest) & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001165
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001166 read_lock_bh(&rt6_lock);
1167 fn = fib6_lookup(&ip6_routing_table, dest, NULL);
1168restart:
1169 for (rt = fn->leaf; rt; rt = rt->u.next) {
1170 /*
1171 * Current route is on-link; redirect is always invalid.
1172 *
1173 * Seems, previous statement is not true. It could
1174 * be node, which looks for us as on-link (f.e. proxy ndisc)
1175 * But then router serving it might decide, that we should
1176 * know truth 8)8) --ANK (980726).
1177 */
1178 if (rt6_check_expired(rt))
1179 continue;
1180 if (!(rt->rt6i_flags & RTF_GATEWAY))
1181 continue;
1182 if (neigh->dev != rt->rt6i_dev)
1183 continue;
1184 if (!ipv6_addr_equal(saddr, &rt->rt6i_gateway))
1185 continue;
1186 break;
1187 }
1188 if (rt)
1189 dst_hold(&rt->u.dst);
1190 else if (strict) {
1191 while ((fn = fn->parent) != NULL) {
1192 if (fn->fn_flags & RTN_ROOT)
1193 break;
1194 if (fn->fn_flags & RTN_RTINFO)
1195 goto restart;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001196 }
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001197 }
1198 read_unlock_bh(&rt6_lock);
1199
1200 if (!rt) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001201 if (net_ratelimit())
1202 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1203 "for redirect target\n");
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001204 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001205 }
1206
Linus Torvalds1da177e2005-04-16 15:20:36 -07001207 /*
1208 * We have finally decided to accept it.
1209 */
1210
1211 neigh_update(neigh, lladdr, NUD_STALE,
1212 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1213 NEIGH_UPDATE_F_OVERRIDE|
1214 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1215 NEIGH_UPDATE_F_ISROUTER))
1216 );
1217
1218 /*
1219 * Redirect received -> path was valid.
1220 * Look, redirects are sent only in response to data packets,
1221 * so that this nexthop apparently is reachable. --ANK
1222 */
1223 dst_confirm(&rt->u.dst);
1224
1225 /* Duplicate redirect: silently ignore. */
1226 if (neigh == rt->u.dst.neighbour)
1227 goto out;
1228
1229 nrt = ip6_rt_copy(rt);
1230 if (nrt == NULL)
1231 goto out;
1232
1233 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1234 if (on_link)
1235 nrt->rt6i_flags &= ~RTF_GATEWAY;
1236
1237 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1238 nrt->rt6i_dst.plen = 128;
1239 nrt->u.dst.flags |= DST_HOST;
1240
1241 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1242 nrt->rt6i_nexthop = neigh_clone(neigh);
1243 /* Reset pmtu, it may be better */
1244 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1245 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1246
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001247 if (ip6_ins_rt(nrt, NULL, NULL, NULL))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001248 goto out;
1249
1250 if (rt->rt6i_flags&RTF_CACHE) {
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001251 ip6_del_rt(rt, NULL, NULL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001252 return;
1253 }
1254
1255out:
1256 dst_release(&rt->u.dst);
1257 return;
1258}
1259
1260/*
1261 * Handle ICMP "packet too big" messages
1262 * i.e. Path MTU discovery
1263 */
1264
1265void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1266 struct net_device *dev, u32 pmtu)
1267{
1268 struct rt6_info *rt, *nrt;
1269 int allfrag = 0;
1270
1271 rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1272 if (rt == NULL)
1273 return;
1274
1275 if (pmtu >= dst_mtu(&rt->u.dst))
1276 goto out;
1277
1278 if (pmtu < IPV6_MIN_MTU) {
1279 /*
1280 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1281 * MTU (1280) and a fragment header should always be included
1282 * after a node receiving Too Big message reporting PMTU is
1283 * less than the IPv6 Minimum Link MTU.
1284 */
1285 pmtu = IPV6_MIN_MTU;
1286 allfrag = 1;
1287 }
1288
1289 /* New mtu received -> path was valid.
1290 They are sent only in response to data packets,
1291 so that this nexthop apparently is reachable. --ANK
1292 */
1293 dst_confirm(&rt->u.dst);
1294
1295 /* Host route. If it is static, it would be better
1296 not to override it, but add new one, so that
1297 when cache entry will expire old pmtu
1298 would return automatically.
1299 */
1300 if (rt->rt6i_flags & RTF_CACHE) {
1301 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1302 if (allfrag)
1303 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1304 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1305 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1306 goto out;
1307 }
1308
1309 /* Network route.
1310 Two cases are possible:
1311 1. It is connected route. Action: COW
1312 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1313 */
YOSHIFUJI Hideakid5315b52006-03-20 16:58:48 -08001314 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001315 nrt = rt6_alloc_cow(rt, daddr, saddr);
YOSHIFUJI Hideakid5315b52006-03-20 16:58:48 -08001316 else
1317 nrt = rt6_alloc_clone(rt, daddr);
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001318
YOSHIFUJI Hideakid5315b52006-03-20 16:58:48 -08001319 if (nrt) {
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001320 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1321 if (allfrag)
1322 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1323
1324 /* According to RFC 1981, detecting PMTU increase shouldn't be
1325 * happened within 5 mins, the recommended timer is 10 mins.
1326 * Here this route expiration time is set to ip6_rt_mtu_expires
1327 * which is 10 mins. After 10 mins the decreased pmtu is expired
1328 * and detecting PMTU increase will be automatically happened.
1329 */
1330 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1331 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1332
1333 ip6_ins_rt(nrt, NULL, NULL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001334 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001335out:
1336 dst_release(&rt->u.dst);
1337}
1338
1339/*
1340 * Misc support functions
1341 */
1342
1343static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1344{
1345 struct rt6_info *rt = ip6_dst_alloc();
1346
1347 if (rt) {
1348 rt->u.dst.input = ort->u.dst.input;
1349 rt->u.dst.output = ort->u.dst.output;
1350
1351 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1352 rt->u.dst.dev = ort->u.dst.dev;
1353 if (rt->u.dst.dev)
1354 dev_hold(rt->u.dst.dev);
1355 rt->rt6i_idev = ort->rt6i_idev;
1356 if (rt->rt6i_idev)
1357 in6_dev_hold(rt->rt6i_idev);
1358 rt->u.dst.lastuse = jiffies;
1359 rt->rt6i_expires = 0;
1360
1361 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1362 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1363 rt->rt6i_metric = 0;
1364
1365 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1366#ifdef CONFIG_IPV6_SUBTREES
1367 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1368#endif
1369 }
1370 return rt;
1371}
1372
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001373#ifdef CONFIG_IPV6_ROUTE_INFO
1374static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
1375 struct in6_addr *gwaddr, int ifindex)
1376{
1377 struct fib6_node *fn;
1378 struct rt6_info *rt = NULL;
1379
1380 write_lock_bh(&rt6_lock);
1381 fn = fib6_locate(&ip6_routing_table, prefix ,prefixlen, NULL, 0);
1382 if (!fn)
1383 goto out;
1384
1385 for (rt = fn->leaf; rt; rt = rt->u.next) {
1386 if (rt->rt6i_dev->ifindex != ifindex)
1387 continue;
1388 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1389 continue;
1390 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1391 continue;
1392 dst_hold(&rt->u.dst);
1393 break;
1394 }
1395out:
1396 write_unlock_bh(&rt6_lock);
1397 return rt;
1398}
1399
1400static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
1401 struct in6_addr *gwaddr, int ifindex,
1402 unsigned pref)
1403{
1404 struct in6_rtmsg rtmsg;
1405
1406 memset(&rtmsg, 0, sizeof(rtmsg));
1407 rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1408 ipv6_addr_copy(&rtmsg.rtmsg_dst, prefix);
1409 rtmsg.rtmsg_dst_len = prefixlen;
1410 ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1411 rtmsg.rtmsg_metric = 1024;
1412 rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO | RTF_UP | RTF_PREF(pref);
YOSHIFUJI Hideakie317da92006-03-20 17:06:42 -08001413 /* We should treat it as a default route if prefix length is 0. */
1414 if (!prefixlen)
1415 rtmsg.rtmsg_flags |= RTF_DEFAULT;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001416 rtmsg.rtmsg_ifindex = ifindex;
1417
1418 ip6_route_add(&rtmsg, NULL, NULL, NULL);
1419
1420 return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex);
1421}
1422#endif
1423
Linus Torvalds1da177e2005-04-16 15:20:36 -07001424struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1425{
1426 struct rt6_info *rt;
1427 struct fib6_node *fn;
1428
1429 fn = &ip6_routing_table;
1430
1431 write_lock_bh(&rt6_lock);
1432 for (rt = fn->leaf; rt; rt=rt->u.next) {
1433 if (dev == rt->rt6i_dev &&
YOSHIFUJI Hideaki045927f2006-03-20 17:00:48 -08001434 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001435 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1436 break;
1437 }
1438 if (rt)
1439 dst_hold(&rt->u.dst);
1440 write_unlock_bh(&rt6_lock);
1441 return rt;
1442}
1443
1444struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -08001445 struct net_device *dev,
1446 unsigned int pref)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001447{
1448 struct in6_rtmsg rtmsg;
1449
1450 memset(&rtmsg, 0, sizeof(struct in6_rtmsg));
1451 rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1452 ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1453 rtmsg.rtmsg_metric = 1024;
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -08001454 rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES |
1455 RTF_PREF(pref);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001456
1457 rtmsg.rtmsg_ifindex = dev->ifindex;
1458
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001459 ip6_route_add(&rtmsg, NULL, NULL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001460 return rt6_get_dflt_router(gwaddr, dev);
1461}
1462
1463void rt6_purge_dflt_routers(void)
1464{
1465 struct rt6_info *rt;
1466
1467restart:
1468 read_lock_bh(&rt6_lock);
1469 for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) {
1470 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1471 dst_hold(&rt->u.dst);
1472
Linus Torvalds1da177e2005-04-16 15:20:36 -07001473 read_unlock_bh(&rt6_lock);
1474
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001475 ip6_del_rt(rt, NULL, NULL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001476
1477 goto restart;
1478 }
1479 }
1480 read_unlock_bh(&rt6_lock);
1481}
1482
1483int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1484{
1485 struct in6_rtmsg rtmsg;
1486 int err;
1487
1488 switch(cmd) {
1489 case SIOCADDRT: /* Add a route */
1490 case SIOCDELRT: /* Delete a route */
1491 if (!capable(CAP_NET_ADMIN))
1492 return -EPERM;
1493 err = copy_from_user(&rtmsg, arg,
1494 sizeof(struct in6_rtmsg));
1495 if (err)
1496 return -EFAULT;
1497
1498 rtnl_lock();
1499 switch (cmd) {
1500 case SIOCADDRT:
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001501 err = ip6_route_add(&rtmsg, NULL, NULL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001502 break;
1503 case SIOCDELRT:
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001504 err = ip6_route_del(&rtmsg, NULL, NULL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001505 break;
1506 default:
1507 err = -EINVAL;
1508 }
1509 rtnl_unlock();
1510
1511 return err;
1512 };
1513
1514 return -EINVAL;
1515}
1516
1517/*
1518 * Drop the packet on the floor
1519 */
1520
Arnaldo Carvalho de Melo20380732005-08-16 02:18:02 -03001521static int ip6_pkt_discard(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001522{
1523 IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
1524 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
1525 kfree_skb(skb);
1526 return 0;
1527}
1528
Arnaldo Carvalho de Melo20380732005-08-16 02:18:02 -03001529static int ip6_pkt_discard_out(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001530{
1531 skb->dev = skb->dst->dev;
1532 return ip6_pkt_discard(skb);
1533}
1534
1535/*
1536 * Allocate a dst for local (unicast / anycast) address.
1537 */
1538
1539struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1540 const struct in6_addr *addr,
1541 int anycast)
1542{
1543 struct rt6_info *rt = ip6_dst_alloc();
1544
1545 if (rt == NULL)
1546 return ERR_PTR(-ENOMEM);
1547
1548 dev_hold(&loopback_dev);
1549 in6_dev_hold(idev);
1550
1551 rt->u.dst.flags = DST_HOST;
1552 rt->u.dst.input = ip6_input;
1553 rt->u.dst.output = ip6_output;
1554 rt->rt6i_dev = &loopback_dev;
1555 rt->rt6i_idev = idev;
1556 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1557 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1558 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1559 rt->u.dst.obsolete = -1;
1560
1561 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +09001562 if (anycast)
1563 rt->rt6i_flags |= RTF_ANYCAST;
1564 else
Linus Torvalds1da177e2005-04-16 15:20:36 -07001565 rt->rt6i_flags |= RTF_LOCAL;
1566 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1567 if (rt->rt6i_nexthop == NULL) {
1568 dst_free((struct dst_entry *) rt);
1569 return ERR_PTR(-ENOMEM);
1570 }
1571
1572 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1573 rt->rt6i_dst.plen = 128;
1574
1575 atomic_set(&rt->u.dst.__refcnt, 1);
1576
1577 return rt;
1578}
1579
1580static int fib6_ifdown(struct rt6_info *rt, void *arg)
1581{
1582 if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1583 rt != &ip6_null_entry) {
1584 RT6_TRACE("deleted by ifdown %p\n", rt);
1585 return -1;
1586 }
1587 return 0;
1588}
1589
1590void rt6_ifdown(struct net_device *dev)
1591{
1592 write_lock_bh(&rt6_lock);
1593 fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev);
1594 write_unlock_bh(&rt6_lock);
1595}
1596
1597struct rt6_mtu_change_arg
1598{
1599 struct net_device *dev;
1600 unsigned mtu;
1601};
1602
1603static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1604{
1605 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1606 struct inet6_dev *idev;
1607
1608 /* In IPv6 pmtu discovery is not optional,
1609 so that RTAX_MTU lock cannot disable it.
1610 We still use this lock to block changes
1611 caused by addrconf/ndisc.
1612 */
1613
1614 idev = __in6_dev_get(arg->dev);
1615 if (idev == NULL)
1616 return 0;
1617
1618 /* For administrative MTU increase, there is no way to discover
1619 IPv6 PMTU increase, so PMTU increase should be updated here.
1620 Since RFC 1981 doesn't include administrative MTU increase
1621 update PMTU increase is a MUST. (i.e. jumbo frame)
1622 */
1623 /*
1624 If new MTU is less than route PMTU, this new MTU will be the
1625 lowest MTU in the path, update the route PMTU to reflect PMTU
1626 decreases; if new MTU is greater than route PMTU, and the
1627 old MTU is the lowest MTU in the path, update the route PMTU
1628 to reflect the increase. In this case if the other nodes' MTU
1629 also have the lowest MTU, TOO BIG MESSAGE will be lead to
1630 PMTU discouvery.
1631 */
1632 if (rt->rt6i_dev == arg->dev &&
1633 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1634 (dst_mtu(&rt->u.dst) > arg->mtu ||
1635 (dst_mtu(&rt->u.dst) < arg->mtu &&
1636 dst_mtu(&rt->u.dst) == idev->cnf.mtu6)))
1637 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1638 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1639 return 0;
1640}
1641
1642void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1643{
1644 struct rt6_mtu_change_arg arg;
1645
1646 arg.dev = dev;
1647 arg.mtu = mtu;
1648 read_lock_bh(&rt6_lock);
1649 fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg);
1650 read_unlock_bh(&rt6_lock);
1651}
1652
1653static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta,
1654 struct in6_rtmsg *rtmsg)
1655{
1656 memset(rtmsg, 0, sizeof(*rtmsg));
1657
1658 rtmsg->rtmsg_dst_len = r->rtm_dst_len;
1659 rtmsg->rtmsg_src_len = r->rtm_src_len;
1660 rtmsg->rtmsg_flags = RTF_UP;
1661 if (r->rtm_type == RTN_UNREACHABLE)
1662 rtmsg->rtmsg_flags |= RTF_REJECT;
1663
1664 if (rta[RTA_GATEWAY-1]) {
1665 if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16))
1666 return -EINVAL;
1667 memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16);
1668 rtmsg->rtmsg_flags |= RTF_GATEWAY;
1669 }
1670 if (rta[RTA_DST-1]) {
1671 if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3))
1672 return -EINVAL;
1673 memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3));
1674 }
1675 if (rta[RTA_SRC-1]) {
1676 if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3))
1677 return -EINVAL;
1678 memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3));
1679 }
1680 if (rta[RTA_OIF-1]) {
1681 if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int)))
1682 return -EINVAL;
1683 memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1684 }
1685 if (rta[RTA_PRIORITY-1]) {
1686 if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4))
1687 return -EINVAL;
1688 memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
1689 }
1690 return 0;
1691}
1692
1693int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1694{
1695 struct rtmsg *r = NLMSG_DATA(nlh);
1696 struct in6_rtmsg rtmsg;
1697
1698 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1699 return -EINVAL;
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001700 return ip6_route_del(&rtmsg, nlh, arg, &NETLINK_CB(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001701}
1702
1703int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1704{
1705 struct rtmsg *r = NLMSG_DATA(nlh);
1706 struct in6_rtmsg rtmsg;
1707
1708 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1709 return -EINVAL;
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001710 return ip6_route_add(&rtmsg, nlh, arg, &NETLINK_CB(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001711}
1712
1713struct rt6_rtnl_dump_arg
1714{
1715 struct sk_buff *skb;
1716 struct netlink_callback *cb;
1717};
1718
1719static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001720 struct in6_addr *dst, struct in6_addr *src,
1721 int iif, int type, u32 pid, u32 seq,
1722 int prefix, unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001723{
1724 struct rtmsg *rtm;
1725 struct nlmsghdr *nlh;
1726 unsigned char *b = skb->tail;
1727 struct rta_cacheinfo ci;
1728
1729 if (prefix) { /* user wants prefix routes only */
1730 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
1731 /* success since this is not a prefix route */
1732 return 1;
1733 }
1734 }
1735
Jamal Hadi Salimb6544c02005-06-18 22:54:12 -07001736 nlh = NLMSG_NEW(skb, pid, seq, type, sizeof(*rtm), flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001737 rtm = NLMSG_DATA(nlh);
1738 rtm->rtm_family = AF_INET6;
1739 rtm->rtm_dst_len = rt->rt6i_dst.plen;
1740 rtm->rtm_src_len = rt->rt6i_src.plen;
1741 rtm->rtm_tos = 0;
1742 rtm->rtm_table = RT_TABLE_MAIN;
1743 if (rt->rt6i_flags&RTF_REJECT)
1744 rtm->rtm_type = RTN_UNREACHABLE;
1745 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
1746 rtm->rtm_type = RTN_LOCAL;
1747 else
1748 rtm->rtm_type = RTN_UNICAST;
1749 rtm->rtm_flags = 0;
1750 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1751 rtm->rtm_protocol = rt->rt6i_protocol;
1752 if (rt->rt6i_flags&RTF_DYNAMIC)
1753 rtm->rtm_protocol = RTPROT_REDIRECT;
1754 else if (rt->rt6i_flags & RTF_ADDRCONF)
1755 rtm->rtm_protocol = RTPROT_KERNEL;
1756 else if (rt->rt6i_flags&RTF_DEFAULT)
1757 rtm->rtm_protocol = RTPROT_RA;
1758
1759 if (rt->rt6i_flags&RTF_CACHE)
1760 rtm->rtm_flags |= RTM_F_CLONED;
1761
1762 if (dst) {
1763 RTA_PUT(skb, RTA_DST, 16, dst);
1764 rtm->rtm_dst_len = 128;
1765 } else if (rtm->rtm_dst_len)
1766 RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1767#ifdef CONFIG_IPV6_SUBTREES
1768 if (src) {
1769 RTA_PUT(skb, RTA_SRC, 16, src);
1770 rtm->rtm_src_len = 128;
1771 } else if (rtm->rtm_src_len)
1772 RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1773#endif
1774 if (iif)
1775 RTA_PUT(skb, RTA_IIF, 4, &iif);
1776 else if (dst) {
1777 struct in6_addr saddr_buf;
1778 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
1779 RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1780 }
1781 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
1782 goto rtattr_failure;
1783 if (rt->u.dst.neighbour)
1784 RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
1785 if (rt->u.dst.dev)
1786 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex);
1787 RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric);
1788 ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
1789 if (rt->rt6i_expires)
1790 ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
1791 else
1792 ci.rta_expires = 0;
1793 ci.rta_used = rt->u.dst.__use;
1794 ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
1795 ci.rta_error = rt->u.dst.error;
1796 ci.rta_id = 0;
1797 ci.rta_ts = 0;
1798 ci.rta_tsage = 0;
1799 RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
1800 nlh->nlmsg_len = skb->tail - b;
1801 return skb->len;
1802
1803nlmsg_failure:
1804rtattr_failure:
1805 skb_trim(skb, b - skb->data);
1806 return -1;
1807}
1808
1809static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1810{
1811 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1812 int prefix;
1813
1814 if (arg->cb->nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(struct rtmsg))) {
1815 struct rtmsg *rtm = NLMSG_DATA(arg->cb->nlh);
1816 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
1817 } else
1818 prefix = 0;
1819
1820 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1821 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001822 prefix, NLM_F_MULTI);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001823}
1824
1825static int fib6_dump_node(struct fib6_walker_t *w)
1826{
1827 int res;
1828 struct rt6_info *rt;
1829
1830 for (rt = w->leaf; rt; rt = rt->u.next) {
1831 res = rt6_dump_route(rt, w->args);
1832 if (res < 0) {
1833 /* Frame is full, suspend walking */
1834 w->leaf = rt;
1835 return 1;
1836 }
1837 BUG_TRAP(res!=0);
1838 }
1839 w->leaf = NULL;
1840 return 0;
1841}
1842
1843static void fib6_dump_end(struct netlink_callback *cb)
1844{
1845 struct fib6_walker_t *w = (void*)cb->args[0];
1846
1847 if (w) {
1848 cb->args[0] = 0;
1849 fib6_walker_unlink(w);
1850 kfree(w);
1851 }
Herbert Xuefacfbc2005-11-12 12:12:05 -08001852 cb->done = (void*)cb->args[1];
1853 cb->args[1] = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001854}
1855
1856static int fib6_dump_done(struct netlink_callback *cb)
1857{
1858 fib6_dump_end(cb);
Thomas Grafa8f74b22005-11-10 02:25:52 +01001859 return cb->done ? cb->done(cb) : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001860}
1861
1862int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1863{
1864 struct rt6_rtnl_dump_arg arg;
1865 struct fib6_walker_t *w;
1866 int res;
1867
1868 arg.skb = skb;
1869 arg.cb = cb;
1870
1871 w = (void*)cb->args[0];
1872 if (w == NULL) {
1873 /* New dump:
1874 *
1875 * 1. hook callback destructor.
1876 */
1877 cb->args[1] = (long)cb->done;
1878 cb->done = fib6_dump_done;
1879
1880 /*
1881 * 2. allocate and initialize walker.
1882 */
Ingo Oeser0c600ed2006-03-20 23:01:32 -08001883 w = kzalloc(sizeof(*w), GFP_ATOMIC);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001884 if (w == NULL)
1885 return -ENOMEM;
1886 RT6_TRACE("dump<%p", w);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001887 w->root = &ip6_routing_table;
1888 w->func = fib6_dump_node;
1889 w->args = &arg;
1890 cb->args[0] = (long)w;
1891 read_lock_bh(&rt6_lock);
1892 res = fib6_walk(w);
1893 read_unlock_bh(&rt6_lock);
1894 } else {
1895 w->args = &arg;
1896 read_lock_bh(&rt6_lock);
1897 res = fib6_walk_continue(w);
1898 read_unlock_bh(&rt6_lock);
1899 }
1900#if RT6_DEBUG >= 3
1901 if (res <= 0 && skb->len == 0)
1902 RT6_TRACE("%p>dump end\n", w);
1903#endif
1904 res = res < 0 ? res : skb->len;
1905 /* res < 0 is an error. (really, impossible)
1906 res == 0 means that dump is complete, but skb still can contain data.
1907 res > 0 dump is not complete, but frame is full.
1908 */
1909 /* Destroy walker, if dump of this table is complete. */
1910 if (res <= 0)
1911 fib6_dump_end(cb);
1912 return res;
1913}
1914
1915int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1916{
1917 struct rtattr **rta = arg;
1918 int iif = 0;
1919 int err = -ENOBUFS;
1920 struct sk_buff *skb;
1921 struct flowi fl;
1922 struct rt6_info *rt;
1923
1924 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1925 if (skb == NULL)
1926 goto out;
1927
1928 /* Reserve room for dummy headers, this skb can pass
1929 through good chunk of routing engine.
1930 */
1931 skb->mac.raw = skb->data;
1932 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1933
1934 memset(&fl, 0, sizeof(fl));
1935 if (rta[RTA_SRC-1])
1936 ipv6_addr_copy(&fl.fl6_src,
1937 (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]));
1938 if (rta[RTA_DST-1])
1939 ipv6_addr_copy(&fl.fl6_dst,
1940 (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]));
1941
1942 if (rta[RTA_IIF-1])
1943 memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
1944
1945 if (iif) {
1946 struct net_device *dev;
1947 dev = __dev_get_by_index(iif);
1948 if (!dev) {
1949 err = -ENODEV;
1950 goto out_free;
1951 }
1952 }
1953
1954 fl.oif = 0;
1955 if (rta[RTA_OIF-1])
1956 memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1957
1958 rt = (struct rt6_info*)ip6_route_output(NULL, &fl);
1959
1960 skb->dst = &rt->u.dst;
1961
1962 NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
1963 err = rt6_fill_node(skb, rt,
1964 &fl.fl6_dst, &fl.fl6_src,
1965 iif,
1966 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001967 nlh->nlmsg_seq, 0, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001968 if (err < 0) {
1969 err = -EMSGSIZE;
1970 goto out_free;
1971 }
1972
1973 err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
1974 if (err > 0)
1975 err = 0;
1976out:
1977 return err;
1978out_free:
1979 kfree_skb(skb);
1980 goto out;
1981}
1982
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001983void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh,
1984 struct netlink_skb_parms *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001985{
1986 struct sk_buff *skb;
1987 int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001988 u32 pid = current->pid;
1989 u32 seq = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001990
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001991 if (req)
1992 pid = req->pid;
1993 if (nlh)
1994 seq = nlh->nlmsg_seq;
1995
Linus Torvalds1da177e2005-04-16 15:20:36 -07001996 skb = alloc_skb(size, gfp_any());
1997 if (!skb) {
Patrick McHardyac6d4392005-08-14 19:29:52 -07001998 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, ENOBUFS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001999 return;
2000 }
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07002001 if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0) < 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002002 kfree_skb(skb);
Patrick McHardyac6d4392005-08-14 19:29:52 -07002003 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, EINVAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002004 return;
2005 }
Patrick McHardyac6d4392005-08-14 19:29:52 -07002006 NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_ROUTE;
2007 netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_ROUTE, gfp_any());
Linus Torvalds1da177e2005-04-16 15:20:36 -07002008}
2009
2010/*
2011 * /proc
2012 */
2013
2014#ifdef CONFIG_PROC_FS
2015
2016#define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2017
2018struct rt6_proc_arg
2019{
2020 char *buffer;
2021 int offset;
2022 int length;
2023 int skip;
2024 int len;
2025};
2026
2027static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2028{
2029 struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
2030 int i;
2031
2032 if (arg->skip < arg->offset / RT6_INFO_LEN) {
2033 arg->skip++;
2034 return 0;
2035 }
2036
2037 if (arg->len >= arg->length)
2038 return 0;
2039
2040 for (i=0; i<16; i++) {
2041 sprintf(arg->buffer + arg->len, "%02x",
2042 rt->rt6i_dst.addr.s6_addr[i]);
2043 arg->len += 2;
2044 }
2045 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
2046 rt->rt6i_dst.plen);
2047
2048#ifdef CONFIG_IPV6_SUBTREES
2049 for (i=0; i<16; i++) {
2050 sprintf(arg->buffer + arg->len, "%02x",
2051 rt->rt6i_src.addr.s6_addr[i]);
2052 arg->len += 2;
2053 }
2054 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
2055 rt->rt6i_src.plen);
2056#else
2057 sprintf(arg->buffer + arg->len,
2058 "00000000000000000000000000000000 00 ");
2059 arg->len += 36;
2060#endif
2061
2062 if (rt->rt6i_nexthop) {
2063 for (i=0; i<16; i++) {
2064 sprintf(arg->buffer + arg->len, "%02x",
2065 rt->rt6i_nexthop->primary_key[i]);
2066 arg->len += 2;
2067 }
2068 } else {
2069 sprintf(arg->buffer + arg->len,
2070 "00000000000000000000000000000000");
2071 arg->len += 32;
2072 }
2073 arg->len += sprintf(arg->buffer + arg->len,
2074 " %08x %08x %08x %08x %8s\n",
2075 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
2076 rt->u.dst.__use, rt->rt6i_flags,
2077 rt->rt6i_dev ? rt->rt6i_dev->name : "");
2078 return 0;
2079}
2080
2081static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
2082{
2083 struct rt6_proc_arg arg;
2084 arg.buffer = buffer;
2085 arg.offset = offset;
2086 arg.length = length;
2087 arg.skip = 0;
2088 arg.len = 0;
2089
2090 read_lock_bh(&rt6_lock);
2091 fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg);
2092 read_unlock_bh(&rt6_lock);
2093
2094 *start = buffer;
2095 if (offset)
2096 *start += offset % RT6_INFO_LEN;
2097
2098 arg.len -= offset % RT6_INFO_LEN;
2099
2100 if (arg.len > length)
2101 arg.len = length;
2102 if (arg.len < 0)
2103 arg.len = 0;
2104
2105 return arg.len;
2106}
2107
Linus Torvalds1da177e2005-04-16 15:20:36 -07002108static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2109{
2110 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2111 rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
2112 rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
2113 rt6_stats.fib_rt_cache,
2114 atomic_read(&ip6_dst_ops.entries),
2115 rt6_stats.fib_discarded_routes);
2116
2117 return 0;
2118}
2119
2120static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2121{
2122 return single_open(file, rt6_stats_seq_show, NULL);
2123}
2124
2125static struct file_operations rt6_stats_seq_fops = {
2126 .owner = THIS_MODULE,
2127 .open = rt6_stats_seq_open,
2128 .read = seq_read,
2129 .llseek = seq_lseek,
2130 .release = single_release,
2131};
2132#endif /* CONFIG_PROC_FS */
2133
2134#ifdef CONFIG_SYSCTL
2135
2136static int flush_delay;
2137
2138static
2139int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2140 void __user *buffer, size_t *lenp, loff_t *ppos)
2141{
2142 if (write) {
2143 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2144 fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
2145 return 0;
2146 } else
2147 return -EINVAL;
2148}
2149
2150ctl_table ipv6_route_table[] = {
2151 {
2152 .ctl_name = NET_IPV6_ROUTE_FLUSH,
2153 .procname = "flush",
2154 .data = &flush_delay,
2155 .maxlen = sizeof(int),
Dave Jones89c8b3a12005-04-28 12:11:49 -07002156 .mode = 0200,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002157 .proc_handler = &ipv6_sysctl_rtcache_flush
2158 },
2159 {
2160 .ctl_name = NET_IPV6_ROUTE_GC_THRESH,
2161 .procname = "gc_thresh",
2162 .data = &ip6_dst_ops.gc_thresh,
2163 .maxlen = sizeof(int),
2164 .mode = 0644,
2165 .proc_handler = &proc_dointvec,
2166 },
2167 {
2168 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE,
2169 .procname = "max_size",
2170 .data = &ip6_rt_max_size,
2171 .maxlen = sizeof(int),
2172 .mode = 0644,
2173 .proc_handler = &proc_dointvec,
2174 },
2175 {
2176 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2177 .procname = "gc_min_interval",
2178 .data = &ip6_rt_gc_min_interval,
2179 .maxlen = sizeof(int),
2180 .mode = 0644,
2181 .proc_handler = &proc_dointvec_jiffies,
2182 .strategy = &sysctl_jiffies,
2183 },
2184 {
2185 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT,
2186 .procname = "gc_timeout",
2187 .data = &ip6_rt_gc_timeout,
2188 .maxlen = sizeof(int),
2189 .mode = 0644,
2190 .proc_handler = &proc_dointvec_jiffies,
2191 .strategy = &sysctl_jiffies,
2192 },
2193 {
2194 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL,
2195 .procname = "gc_interval",
2196 .data = &ip6_rt_gc_interval,
2197 .maxlen = sizeof(int),
2198 .mode = 0644,
2199 .proc_handler = &proc_dointvec_jiffies,
2200 .strategy = &sysctl_jiffies,
2201 },
2202 {
2203 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY,
2204 .procname = "gc_elasticity",
2205 .data = &ip6_rt_gc_elasticity,
2206 .maxlen = sizeof(int),
2207 .mode = 0644,
2208 .proc_handler = &proc_dointvec_jiffies,
2209 .strategy = &sysctl_jiffies,
2210 },
2211 {
2212 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES,
2213 .procname = "mtu_expires",
2214 .data = &ip6_rt_mtu_expires,
2215 .maxlen = sizeof(int),
2216 .mode = 0644,
2217 .proc_handler = &proc_dointvec_jiffies,
2218 .strategy = &sysctl_jiffies,
2219 },
2220 {
2221 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS,
2222 .procname = "min_adv_mss",
2223 .data = &ip6_rt_min_advmss,
2224 .maxlen = sizeof(int),
2225 .mode = 0644,
2226 .proc_handler = &proc_dointvec_jiffies,
2227 .strategy = &sysctl_jiffies,
2228 },
2229 {
2230 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2231 .procname = "gc_min_interval_ms",
2232 .data = &ip6_rt_gc_min_interval,
2233 .maxlen = sizeof(int),
2234 .mode = 0644,
2235 .proc_handler = &proc_dointvec_ms_jiffies,
2236 .strategy = &sysctl_ms_jiffies,
2237 },
2238 { .ctl_name = 0 }
2239};
2240
2241#endif
2242
2243void __init ip6_route_init(void)
2244{
2245 struct proc_dir_entry *p;
2246
2247 ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
2248 sizeof(struct rt6_info),
2249 0, SLAB_HWCACHE_ALIGN,
2250 NULL, NULL);
2251 if (!ip6_dst_ops.kmem_cachep)
2252 panic("cannot create ip6_dst_cache");
2253
2254 fib6_init();
2255#ifdef CONFIG_PROC_FS
2256 p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2257 if (p)
2258 p->owner = THIS_MODULE;
2259
2260 proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2261#endif
2262#ifdef CONFIG_XFRM
2263 xfrm6_init();
2264#endif
2265}
2266
2267void ip6_route_cleanup(void)
2268{
2269#ifdef CONFIG_PROC_FS
2270 proc_net_remove("ipv6_route");
2271 proc_net_remove("rt6_stats");
2272#endif
2273#ifdef CONFIG_XFRM
2274 xfrm6_fini();
2275#endif
2276 rt6_ifdown(NULL);
2277 fib6_gc_cleanup();
2278 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2279}