blob: 8a777932786d7d4c0975fd6941489039abad3d06 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
16/* Changes:
17 *
18 * YOSHIFUJI Hideaki @USAGI
19 * reworked default router selection.
20 * - respect outgoing interface
21 * - select from (probably) reachable routers (i.e.
22 * routers in REACHABLE, STALE, DELAY or PROBE states).
23 * - always select the same router if it is (probably)
24 * reachable. otherwise, round-robin the list.
25 */
26
Randy Dunlap4fc268d2006-01-11 12:17:47 -080027#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070028#include <linux/config.h>
29#include <linux/errno.h>
30#include <linux/types.h>
31#include <linux/times.h>
32#include <linux/socket.h>
33#include <linux/sockios.h>
34#include <linux/net.h>
35#include <linux/route.h>
36#include <linux/netdevice.h>
37#include <linux/in6.h>
38#include <linux/init.h>
39#include <linux/netlink.h>
40#include <linux/if_arp.h>
41
42#ifdef CONFIG_PROC_FS
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
45#endif
46
47#include <net/snmp.h>
48#include <net/ipv6.h>
49#include <net/ip6_fib.h>
50#include <net/ip6_route.h>
51#include <net/ndisc.h>
52#include <net/addrconf.h>
53#include <net/tcp.h>
54#include <linux/rtnetlink.h>
55#include <net/dst.h>
56#include <net/xfrm.h>
57
58#include <asm/uaccess.h>
59
60#ifdef CONFIG_SYSCTL
61#include <linux/sysctl.h>
62#endif
63
64/* Set to 3 to get tracing. */
65#define RT6_DEBUG 2
66
67#if RT6_DEBUG >= 3
68#define RDBG(x) printk x
69#define RT6_TRACE(x...) printk(KERN_DEBUG x)
70#else
71#define RDBG(x)
72#define RT6_TRACE(x...) do { ; } while (0)
73#endif
74
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -080075#define CLONE_OFFLINK_ROUTE 0
Linus Torvalds1da177e2005-04-16 15:20:36 -070076
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -080077#define RT6_SELECT_F_IFACE 0x1
78#define RT6_SELECT_F_REACHABLE 0x2
79
Linus Torvalds1da177e2005-04-16 15:20:36 -070080static int ip6_rt_max_size = 4096;
81static int ip6_rt_gc_min_interval = HZ / 2;
82static int ip6_rt_gc_timeout = 60*HZ;
83int ip6_rt_gc_interval = 30*HZ;
84static int ip6_rt_gc_elasticity = 9;
85static int ip6_rt_mtu_expires = 10*60*HZ;
86static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
87
88static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
89static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
90static struct dst_entry *ip6_negative_advice(struct dst_entry *);
91static void ip6_dst_destroy(struct dst_entry *);
92static void ip6_dst_ifdown(struct dst_entry *,
93 struct net_device *dev, int how);
94static int ip6_dst_gc(void);
95
96static int ip6_pkt_discard(struct sk_buff *skb);
97static int ip6_pkt_discard_out(struct sk_buff *skb);
98static void ip6_link_failure(struct sk_buff *skb);
99static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
100
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800101#ifdef CONFIG_IPV6_ROUTE_INFO
102static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
103 struct in6_addr *gwaddr, int ifindex,
104 unsigned pref);
105static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
106 struct in6_addr *gwaddr, int ifindex);
107#endif
108
Linus Torvalds1da177e2005-04-16 15:20:36 -0700109static struct dst_ops ip6_dst_ops = {
110 .family = AF_INET6,
111 .protocol = __constant_htons(ETH_P_IPV6),
112 .gc = ip6_dst_gc,
113 .gc_thresh = 1024,
114 .check = ip6_dst_check,
115 .destroy = ip6_dst_destroy,
116 .ifdown = ip6_dst_ifdown,
117 .negative_advice = ip6_negative_advice,
118 .link_failure = ip6_link_failure,
119 .update_pmtu = ip6_rt_update_pmtu,
120 .entry_size = sizeof(struct rt6_info),
121};
122
123struct rt6_info ip6_null_entry = {
124 .u = {
125 .dst = {
126 .__refcnt = ATOMIC_INIT(1),
127 .__use = 1,
128 .dev = &loopback_dev,
129 .obsolete = -1,
130 .error = -ENETUNREACH,
131 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
132 .input = ip6_pkt_discard,
133 .output = ip6_pkt_discard_out,
134 .ops = &ip6_dst_ops,
135 .path = (struct dst_entry*)&ip6_null_entry,
136 }
137 },
138 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
139 .rt6i_metric = ~(u32) 0,
140 .rt6i_ref = ATOMIC_INIT(1),
141};
142
143struct fib6_node ip6_routing_table = {
144 .leaf = &ip6_null_entry,
145 .fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
146};
147
148/* Protects all the ip6 fib */
149
150DEFINE_RWLOCK(rt6_lock);
151
152
153/* allocate dst with ip6_dst_ops */
154static __inline__ struct rt6_info *ip6_dst_alloc(void)
155{
156 return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
157}
158
159static void ip6_dst_destroy(struct dst_entry *dst)
160{
161 struct rt6_info *rt = (struct rt6_info *)dst;
162 struct inet6_dev *idev = rt->rt6i_idev;
163
164 if (idev != NULL) {
165 rt->rt6i_idev = NULL;
166 in6_dev_put(idev);
167 }
168}
169
170static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
171 int how)
172{
173 struct rt6_info *rt = (struct rt6_info *)dst;
174 struct inet6_dev *idev = rt->rt6i_idev;
175
176 if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
177 struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
178 if (loopback_idev != NULL) {
179 rt->rt6i_idev = loopback_idev;
180 in6_dev_put(idev);
181 }
182 }
183}
184
185static __inline__ int rt6_check_expired(const struct rt6_info *rt)
186{
187 return (rt->rt6i_flags & RTF_EXPIRES &&
188 time_after(jiffies, rt->rt6i_expires));
189}
190
191/*
192 * Route lookup. Any rt6_lock is implied.
193 */
194
195static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
196 int oif,
197 int strict)
198{
199 struct rt6_info *local = NULL;
200 struct rt6_info *sprt;
201
202 if (oif) {
203 for (sprt = rt; sprt; sprt = sprt->u.next) {
204 struct net_device *dev = sprt->rt6i_dev;
205 if (dev->ifindex == oif)
206 return sprt;
207 if (dev->flags & IFF_LOOPBACK) {
208 if (sprt->rt6i_idev == NULL ||
209 sprt->rt6i_idev->dev->ifindex != oif) {
210 if (strict && oif)
211 continue;
212 if (local && (!oif ||
213 local->rt6i_idev->dev->ifindex == oif))
214 continue;
215 }
216 local = sprt;
217 }
218 }
219
220 if (local)
221 return local;
222
223 if (strict)
224 return &ip6_null_entry;
225 }
226 return rt;
227}
228
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800229#ifdef CONFIG_IPV6_ROUTER_PREF
230static void rt6_probe(struct rt6_info *rt)
231{
232 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
233 /*
234 * Okay, this does not seem to be appropriate
235 * for now, however, we need to check if it
236 * is really so; aka Router Reachability Probing.
237 *
238 * Router Reachability Probe MUST be rate-limited
239 * to no more than one per minute.
240 */
241 if (!neigh || (neigh->nud_state & NUD_VALID))
242 return;
243 read_lock_bh(&neigh->lock);
244 if (!(neigh->nud_state & NUD_VALID) &&
YOSHIFUJI Hideaki52e16352006-03-20 17:05:47 -0800245 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800246 struct in6_addr mcaddr;
247 struct in6_addr *target;
248
249 neigh->updated = jiffies;
250 read_unlock_bh(&neigh->lock);
251
252 target = (struct in6_addr *)&neigh->primary_key;
253 addrconf_addr_solict_mult(target, &mcaddr);
254 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
255 } else
256 read_unlock_bh(&neigh->lock);
257}
258#else
259static inline void rt6_probe(struct rt6_info *rt)
260{
261 return;
262}
263#endif
264
Linus Torvalds1da177e2005-04-16 15:20:36 -0700265/*
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800266 * Default Router Selection (RFC 2461 6.3.6)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700267 */
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800268static int inline rt6_check_dev(struct rt6_info *rt, int oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700269{
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800270 struct net_device *dev = rt->rt6i_dev;
271 if (!oif || dev->ifindex == oif)
272 return 2;
273 if ((dev->flags & IFF_LOOPBACK) &&
274 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
275 return 1;
276 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700277}
278
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800279static int inline rt6_check_neigh(struct rt6_info *rt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700280{
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800281 struct neighbour *neigh = rt->rt6i_nexthop;
282 int m = 0;
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700283 if (rt->rt6i_flags & RTF_NONEXTHOP ||
284 !(rt->rt6i_flags & RTF_GATEWAY))
285 m = 1;
286 else if (neigh) {
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800287 read_lock_bh(&neigh->lock);
288 if (neigh->nud_state & NUD_VALID)
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700289 m = 2;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800290 read_unlock_bh(&neigh->lock);
291 }
292 return m;
293}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700294
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800295static int rt6_score_route(struct rt6_info *rt, int oif,
296 int strict)
297{
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700298 int m, n;
299
300 m = rt6_check_dev(rt, oif);
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800301 if (!m && (strict & RT6_SELECT_F_IFACE))
302 return -1;
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -0800303#ifdef CONFIG_IPV6_ROUTER_PREF
304 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
305#endif
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700306 n = rt6_check_neigh(rt);
307 if (n > 1)
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -0800308 m |= 16;
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700309 else if (!n && strict & RT6_SELECT_F_REACHABLE)
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800310 return -1;
311 return m;
312}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700313
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800314static struct rt6_info *rt6_select(struct rt6_info **head, int oif,
315 int strict)
316{
317 struct rt6_info *match = NULL, *last = NULL;
318 struct rt6_info *rt, *rt0 = *head;
319 u32 metric;
320 int mpri = -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700321
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800322 RT6_TRACE("%s(head=%p(*head=%p), oif=%d)\n",
323 __FUNCTION__, head, head ? *head : NULL, oif);
324
325 for (rt = rt0, metric = rt0->rt6i_metric;
YOSHIFUJI Hideakic302e6d2006-04-28 15:59:15 -0700326 rt && rt->rt6i_metric == metric && (!last || rt != rt0);
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800327 rt = rt->u.next) {
328 int m;
329
330 if (rt6_check_expired(rt))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700331 continue;
332
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800333 last = rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700334
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800335 m = rt6_score_route(rt, oif, strict);
336 if (m < 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700337 continue;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700338
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800339 if (m > mpri) {
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800340 rt6_probe(match);
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800341 match = rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700342 mpri = m;
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800343 } else {
344 rt6_probe(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700345 }
346 }
347
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800348 if (!match &&
349 (strict & RT6_SELECT_F_REACHABLE) &&
350 last && last != rt0) {
351 /* no entries matched; do round-robin */
YOSHIFUJI Hideakic302e6d2006-04-28 15:59:15 -0700352 static spinlock_t lock = SPIN_LOCK_UNLOCKED;
353 spin_lock(&lock);
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800354 *head = rt0->u.next;
355 rt0->u.next = last->u.next;
356 last->u.next = rt0;
YOSHIFUJI Hideakic302e6d2006-04-28 15:59:15 -0700357 spin_unlock(&lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700358 }
359
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800360 RT6_TRACE("%s() => %p, score=%d\n",
361 __FUNCTION__, match, mpri);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700362
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800363 return (match ? match : &ip6_null_entry);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700364}
365
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800366#ifdef CONFIG_IPV6_ROUTE_INFO
367int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
368 struct in6_addr *gwaddr)
369{
370 struct route_info *rinfo = (struct route_info *) opt;
371 struct in6_addr prefix_buf, *prefix;
372 unsigned int pref;
373 u32 lifetime;
374 struct rt6_info *rt;
375
376 if (len < sizeof(struct route_info)) {
377 return -EINVAL;
378 }
379
380 /* Sanity check for prefix_len and length */
381 if (rinfo->length > 3) {
382 return -EINVAL;
383 } else if (rinfo->prefix_len > 128) {
384 return -EINVAL;
385 } else if (rinfo->prefix_len > 64) {
386 if (rinfo->length < 2) {
387 return -EINVAL;
388 }
389 } else if (rinfo->prefix_len > 0) {
390 if (rinfo->length < 1) {
391 return -EINVAL;
392 }
393 }
394
395 pref = rinfo->route_pref;
396 if (pref == ICMPV6_ROUTER_PREF_INVALID)
397 pref = ICMPV6_ROUTER_PREF_MEDIUM;
398
399 lifetime = htonl(rinfo->lifetime);
400 if (lifetime == 0xffffffff) {
401 /* infinity */
402 } else if (lifetime > 0x7fffffff/HZ) {
403 /* Avoid arithmetic overflow */
404 lifetime = 0x7fffffff/HZ - 1;
405 }
406
407 if (rinfo->length == 3)
408 prefix = (struct in6_addr *)rinfo->prefix;
409 else {
410 /* this function is safe */
411 ipv6_addr_prefix(&prefix_buf,
412 (struct in6_addr *)rinfo->prefix,
413 rinfo->prefix_len);
414 prefix = &prefix_buf;
415 }
416
417 rt = rt6_get_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex);
418
419 if (rt && !lifetime) {
420 ip6_del_rt(rt, NULL, NULL, NULL);
421 rt = NULL;
422 }
423
424 if (!rt && lifetime)
425 rt = rt6_add_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
426 pref);
427 else if (rt)
428 rt->rt6i_flags = RTF_ROUTEINFO |
429 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
430
431 if (rt) {
432 if (lifetime == 0xffffffff) {
433 rt->rt6i_flags &= ~RTF_EXPIRES;
434 } else {
435 rt->rt6i_expires = jiffies + HZ * lifetime;
436 rt->rt6i_flags |= RTF_EXPIRES;
437 }
438 dst_release(&rt->u.dst);
439 }
440 return 0;
441}
442#endif
443
Linus Torvalds1da177e2005-04-16 15:20:36 -0700444struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
445 int oif, int strict)
446{
447 struct fib6_node *fn;
448 struct rt6_info *rt;
449
450 read_lock_bh(&rt6_lock);
451 fn = fib6_lookup(&ip6_routing_table, daddr, saddr);
452 rt = rt6_device_match(fn->leaf, oif, strict);
453 dst_hold(&rt->u.dst);
454 rt->u.dst.__use++;
455 read_unlock_bh(&rt6_lock);
456
457 rt->u.dst.lastuse = jiffies;
458 if (rt->u.dst.error == 0)
459 return rt;
460 dst_release(&rt->u.dst);
461 return NULL;
462}
463
464/* ip6_ins_rt is called with FREE rt6_lock.
465 It takes new route entry, the addition fails by any reason the
466 route is freed. In any case, if caller does not hold it, it may
467 be destroyed.
468 */
469
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -0700470int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh,
471 void *_rtattr, struct netlink_skb_parms *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700472{
473 int err;
474
475 write_lock_bh(&rt6_lock);
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -0700476 err = fib6_add(&ip6_routing_table, rt, nlh, _rtattr, req);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700477 write_unlock_bh(&rt6_lock);
478
479 return err;
480}
481
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800482static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
483 struct in6_addr *saddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700484{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700485 struct rt6_info *rt;
486
487 /*
488 * Clone the route.
489 */
490
491 rt = ip6_rt_copy(ort);
492
493 if (rt) {
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900494 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
495 if (rt->rt6i_dst.plen != 128 &&
496 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
497 rt->rt6i_flags |= RTF_ANYCAST;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700498 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900499 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700500
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900501 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700502 rt->rt6i_dst.plen = 128;
503 rt->rt6i_flags |= RTF_CACHE;
504 rt->u.dst.flags |= DST_HOST;
505
506#ifdef CONFIG_IPV6_SUBTREES
507 if (rt->rt6i_src.plen && saddr) {
508 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
509 rt->rt6i_src.plen = 128;
510 }
511#endif
512
513 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
514
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800515 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700516
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800517 return rt;
518}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700519
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800520static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
521{
522 struct rt6_info *rt = ip6_rt_copy(ort);
523 if (rt) {
524 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
525 rt->rt6i_dst.plen = 128;
526 rt->rt6i_flags |= RTF_CACHE;
527 if (rt->rt6i_flags & RTF_REJECT)
528 rt->u.dst.error = ort->u.dst.error;
529 rt->u.dst.flags |= DST_HOST;
530 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
531 }
532 return rt;
533}
534
Linus Torvalds1da177e2005-04-16 15:20:36 -0700535#define BACKTRACK() \
YOSHIFUJI Hideakibb133962006-03-20 17:01:43 -0800536if (rt == &ip6_null_entry) { \
Linus Torvalds1da177e2005-04-16 15:20:36 -0700537 while ((fn = fn->parent) != NULL) { \
538 if (fn->fn_flags & RTN_ROOT) { \
Linus Torvalds1da177e2005-04-16 15:20:36 -0700539 goto out; \
540 } \
541 if (fn->fn_flags & RTN_RTINFO) \
542 goto restart; \
543 } \
544}
545
546
547void ip6_route_input(struct sk_buff *skb)
548{
549 struct fib6_node *fn;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800550 struct rt6_info *rt, *nrt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700551 int strict;
552 int attempts = 3;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800553 int err;
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800554 int reachable = RT6_SELECT_F_REACHABLE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700555
YOSHIFUJI Hideaki118f8c12006-03-20 17:01:06 -0800556 strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL) ? RT6_SELECT_F_IFACE : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700557
558relookup:
559 read_lock_bh(&rt6_lock);
560
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800561restart_2:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700562 fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr,
563 &skb->nh.ipv6h->saddr);
564
565restart:
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800566 rt = rt6_select(&fn->leaf, skb->dev->ifindex, strict | reachable);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700567 BACKTRACK();
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800568 if (rt == &ip6_null_entry ||
569 rt->rt6i_flags & RTF_CACHE)
YOSHIFUJI Hideaki1ddef0442006-03-20 17:01:24 -0800570 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700571
YOSHIFUJI Hideakifb9de912006-03-20 16:59:08 -0800572 dst_hold(&rt->u.dst);
573 read_unlock_bh(&rt6_lock);
574
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800575 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
576 nrt = rt6_alloc_cow(rt, &skb->nh.ipv6h->daddr, &skb->nh.ipv6h->saddr);
577 else {
578#if CLONE_OFFLINK_ROUTE
579 nrt = rt6_alloc_clone(rt, &skb->nh.ipv6h->daddr);
580#else
581 goto out2;
582#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700583 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700584
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800585 dst_release(&rt->u.dst);
586 rt = nrt ? : &ip6_null_entry;
587
588 dst_hold(&rt->u.dst);
589 if (nrt) {
590 err = ip6_ins_rt(nrt, NULL, NULL, &NETLINK_CB(skb));
591 if (!err)
592 goto out2;
593 }
594
595 if (--attempts <= 0)
596 goto out2;
597
598 /*
599 * Race condition! In the gap, when rt6_lock was
600 * released someone could insert this route. Relookup.
601 */
602 dst_release(&rt->u.dst);
603 goto relookup;
604
605out:
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800606 if (reachable) {
607 reachable = 0;
608 goto restart_2;
609 }
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800610 dst_hold(&rt->u.dst);
611 read_unlock_bh(&rt6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700612out2:
613 rt->u.dst.lastuse = jiffies;
614 rt->u.dst.__use++;
615 skb->dst = (struct dst_entry *) rt;
YOSHIFUJI Hideakifb9de912006-03-20 16:59:08 -0800616 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700617}
618
619struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
620{
621 struct fib6_node *fn;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800622 struct rt6_info *rt, *nrt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700623 int strict;
624 int attempts = 3;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800625 int err;
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800626 int reachable = RT6_SELECT_F_REACHABLE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700627
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800628 strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL) ? RT6_SELECT_F_IFACE : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700629
630relookup:
631 read_lock_bh(&rt6_lock);
632
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800633restart_2:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700634 fn = fib6_lookup(&ip6_routing_table, &fl->fl6_dst, &fl->fl6_src);
635
636restart:
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800637 rt = rt6_select(&fn->leaf, fl->oif, strict | reachable);
YOSHIFUJI Hideaki1ddef0442006-03-20 17:01:24 -0800638 BACKTRACK();
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800639 if (rt == &ip6_null_entry ||
640 rt->rt6i_flags & RTF_CACHE)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700641 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700642
YOSHIFUJI Hideakifb9de912006-03-20 16:59:08 -0800643 dst_hold(&rt->u.dst);
644 read_unlock_bh(&rt6_lock);
645
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800646 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
YOSHIFUJI Hideakie40cf352006-03-20 16:59:27 -0800647 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800648 else {
649#if CLONE_OFFLINK_ROUTE
650 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
651#else
652 goto out2;
653#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700654 }
YOSHIFUJI Hideakie40cf352006-03-20 16:59:27 -0800655
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800656 dst_release(&rt->u.dst);
657 rt = nrt ? : &ip6_null_entry;
658
659 dst_hold(&rt->u.dst);
660 if (nrt) {
661 err = ip6_ins_rt(nrt, NULL, NULL, NULL);
662 if (!err)
663 goto out2;
664 }
665
666 if (--attempts <= 0)
667 goto out2;
668
669 /*
670 * Race condition! In the gap, when rt6_lock was
671 * released someone could insert this route. Relookup.
672 */
673 dst_release(&rt->u.dst);
674 goto relookup;
675
676out:
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800677 if (reachable) {
678 reachable = 0;
679 goto restart_2;
680 }
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800681 dst_hold(&rt->u.dst);
682 read_unlock_bh(&rt6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700683out2:
684 rt->u.dst.lastuse = jiffies;
685 rt->u.dst.__use++;
686 return &rt->u.dst;
687}
688
689
690/*
691 * Destination cache support functions
692 */
693
694static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
695{
696 struct rt6_info *rt;
697
698 rt = (struct rt6_info *) dst;
699
700 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
701 return dst;
702
703 return NULL;
704}
705
706static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
707{
708 struct rt6_info *rt = (struct rt6_info *) dst;
709
710 if (rt) {
711 if (rt->rt6i_flags & RTF_CACHE)
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -0700712 ip6_del_rt(rt, NULL, NULL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700713 else
714 dst_release(dst);
715 }
716 return NULL;
717}
718
719static void ip6_link_failure(struct sk_buff *skb)
720{
721 struct rt6_info *rt;
722
723 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
724
725 rt = (struct rt6_info *) skb->dst;
726 if (rt) {
727 if (rt->rt6i_flags&RTF_CACHE) {
728 dst_set_expires(&rt->u.dst, 0);
729 rt->rt6i_flags |= RTF_EXPIRES;
730 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
731 rt->rt6i_node->fn_sernum = -1;
732 }
733}
734
735static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
736{
737 struct rt6_info *rt6 = (struct rt6_info*)dst;
738
739 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
740 rt6->rt6i_flags |= RTF_MODIFIED;
741 if (mtu < IPV6_MIN_MTU) {
742 mtu = IPV6_MIN_MTU;
743 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
744 }
745 dst->metrics[RTAX_MTU-1] = mtu;
746 }
747}
748
749/* Protected by rt6_lock. */
750static struct dst_entry *ndisc_dst_gc_list;
751static int ipv6_get_mtu(struct net_device *dev);
752
753static inline unsigned int ipv6_advmss(unsigned int mtu)
754{
755 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
756
757 if (mtu < ip6_rt_min_advmss)
758 mtu = ip6_rt_min_advmss;
759
760 /*
761 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
762 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
763 * IPV6_MAXPLEN is also valid and means: "any MSS,
764 * rely only on pmtu discovery"
765 */
766 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
767 mtu = IPV6_MAXPLEN;
768 return mtu;
769}
770
771struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
772 struct neighbour *neigh,
773 struct in6_addr *addr,
774 int (*output)(struct sk_buff *))
775{
776 struct rt6_info *rt;
777 struct inet6_dev *idev = in6_dev_get(dev);
778
779 if (unlikely(idev == NULL))
780 return NULL;
781
782 rt = ip6_dst_alloc();
783 if (unlikely(rt == NULL)) {
784 in6_dev_put(idev);
785 goto out;
786 }
787
788 dev_hold(dev);
789 if (neigh)
790 neigh_hold(neigh);
791 else
792 neigh = ndisc_get_neigh(dev, addr);
793
794 rt->rt6i_dev = dev;
795 rt->rt6i_idev = idev;
796 rt->rt6i_nexthop = neigh;
797 atomic_set(&rt->u.dst.__refcnt, 1);
798 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
799 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
800 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
801 rt->u.dst.output = output;
802
803#if 0 /* there's no chance to use these for ndisc */
804 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
805 ? DST_HOST
806 : 0;
807 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
808 rt->rt6i_dst.plen = 128;
809#endif
810
811 write_lock_bh(&rt6_lock);
812 rt->u.dst.next = ndisc_dst_gc_list;
813 ndisc_dst_gc_list = &rt->u.dst;
814 write_unlock_bh(&rt6_lock);
815
816 fib6_force_start_gc();
817
818out:
819 return (struct dst_entry *)rt;
820}
821
822int ndisc_dst_gc(int *more)
823{
824 struct dst_entry *dst, *next, **pprev;
825 int freed;
826
827 next = NULL;
828 pprev = &ndisc_dst_gc_list;
829 freed = 0;
830 while ((dst = *pprev) != NULL) {
831 if (!atomic_read(&dst->__refcnt)) {
832 *pprev = dst->next;
833 dst_free(dst);
834 freed++;
835 } else {
836 pprev = &dst->next;
837 (*more)++;
838 }
839 }
840
841 return freed;
842}
843
844static int ip6_dst_gc(void)
845{
846 static unsigned expire = 30*HZ;
847 static unsigned long last_gc;
848 unsigned long now = jiffies;
849
850 if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
851 atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
852 goto out;
853
854 expire++;
855 fib6_run_gc(expire);
856 last_gc = now;
857 if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
858 expire = ip6_rt_gc_timeout>>1;
859
860out:
861 expire -= expire>>ip6_rt_gc_elasticity;
862 return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
863}
864
865/* Clean host part of a prefix. Not necessary in radix tree,
866 but results in cleaner routing tables.
867
868 Remove it only when all the things will work!
869 */
870
871static int ipv6_get_mtu(struct net_device *dev)
872{
873 int mtu = IPV6_MIN_MTU;
874 struct inet6_dev *idev;
875
876 idev = in6_dev_get(dev);
877 if (idev) {
878 mtu = idev->cnf.mtu6;
879 in6_dev_put(idev);
880 }
881 return mtu;
882}
883
884int ipv6_get_hoplimit(struct net_device *dev)
885{
886 int hoplimit = ipv6_devconf.hop_limit;
887 struct inet6_dev *idev;
888
889 idev = in6_dev_get(dev);
890 if (idev) {
891 hoplimit = idev->cnf.hop_limit;
892 in6_dev_put(idev);
893 }
894 return hoplimit;
895}
896
897/*
898 *
899 */
900
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -0700901int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
902 void *_rtattr, struct netlink_skb_parms *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700903{
904 int err;
905 struct rtmsg *r;
906 struct rtattr **rta;
907 struct rt6_info *rt = NULL;
908 struct net_device *dev = NULL;
909 struct inet6_dev *idev = NULL;
910 int addr_type;
911
912 rta = (struct rtattr **) _rtattr;
913
914 if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128)
915 return -EINVAL;
916#ifndef CONFIG_IPV6_SUBTREES
917 if (rtmsg->rtmsg_src_len)
918 return -EINVAL;
919#endif
920 if (rtmsg->rtmsg_ifindex) {
921 err = -ENODEV;
922 dev = dev_get_by_index(rtmsg->rtmsg_ifindex);
923 if (!dev)
924 goto out;
925 idev = in6_dev_get(dev);
926 if (!idev)
927 goto out;
928 }
929
930 if (rtmsg->rtmsg_metric == 0)
931 rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
932
933 rt = ip6_dst_alloc();
934
935 if (rt == NULL) {
936 err = -ENOMEM;
937 goto out;
938 }
939
940 rt->u.dst.obsolete = -1;
YOSHIFUJI Hideaki3dd4bc62005-12-19 14:02:45 -0800941 rt->rt6i_expires = jiffies + clock_t_to_jiffies(rtmsg->rtmsg_info);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700942 if (nlh && (r = NLMSG_DATA(nlh))) {
943 rt->rt6i_protocol = r->rtm_protocol;
944 } else {
945 rt->rt6i_protocol = RTPROT_BOOT;
946 }
947
948 addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst);
949
950 if (addr_type & IPV6_ADDR_MULTICAST)
951 rt->u.dst.input = ip6_mc_input;
952 else
953 rt->u.dst.input = ip6_forward;
954
955 rt->u.dst.output = ip6_output;
956
957 ipv6_addr_prefix(&rt->rt6i_dst.addr,
958 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len);
959 rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len;
960 if (rt->rt6i_dst.plen == 128)
961 rt->u.dst.flags = DST_HOST;
962
963#ifdef CONFIG_IPV6_SUBTREES
964 ipv6_addr_prefix(&rt->rt6i_src.addr,
965 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
966 rt->rt6i_src.plen = rtmsg->rtmsg_src_len;
967#endif
968
969 rt->rt6i_metric = rtmsg->rtmsg_metric;
970
971 /* We cannot add true routes via loopback here,
972 they would result in kernel looping; promote them to reject routes
973 */
974 if ((rtmsg->rtmsg_flags&RTF_REJECT) ||
975 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
976 /* hold loopback dev/idev if we haven't done so. */
977 if (dev != &loopback_dev) {
978 if (dev) {
979 dev_put(dev);
980 in6_dev_put(idev);
981 }
982 dev = &loopback_dev;
983 dev_hold(dev);
984 idev = in6_dev_get(dev);
985 if (!idev) {
986 err = -ENODEV;
987 goto out;
988 }
989 }
990 rt->u.dst.output = ip6_pkt_discard_out;
991 rt->u.dst.input = ip6_pkt_discard;
992 rt->u.dst.error = -ENETUNREACH;
993 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
994 goto install_route;
995 }
996
997 if (rtmsg->rtmsg_flags & RTF_GATEWAY) {
998 struct in6_addr *gw_addr;
999 int gwa_type;
1000
1001 gw_addr = &rtmsg->rtmsg_gateway;
1002 ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway);
1003 gwa_type = ipv6_addr_type(gw_addr);
1004
1005 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1006 struct rt6_info *grt;
1007
1008 /* IPv6 strictly inhibits using not link-local
1009 addresses as nexthop address.
1010 Otherwise, router will not able to send redirects.
1011 It is very good, but in some (rare!) circumstances
1012 (SIT, PtP, NBMA NOARP links) it is handy to allow
1013 some exceptions. --ANK
1014 */
1015 err = -EINVAL;
1016 if (!(gwa_type&IPV6_ADDR_UNICAST))
1017 goto out;
1018
1019 grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1);
1020
1021 err = -EHOSTUNREACH;
1022 if (grt == NULL)
1023 goto out;
1024 if (dev) {
1025 if (dev != grt->rt6i_dev) {
1026 dst_release(&grt->u.dst);
1027 goto out;
1028 }
1029 } else {
1030 dev = grt->rt6i_dev;
1031 idev = grt->rt6i_idev;
1032 dev_hold(dev);
1033 in6_dev_hold(grt->rt6i_idev);
1034 }
1035 if (!(grt->rt6i_flags&RTF_GATEWAY))
1036 err = 0;
1037 dst_release(&grt->u.dst);
1038
1039 if (err)
1040 goto out;
1041 }
1042 err = -EINVAL;
1043 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1044 goto out;
1045 }
1046
1047 err = -ENODEV;
1048 if (dev == NULL)
1049 goto out;
1050
1051 if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) {
1052 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1053 if (IS_ERR(rt->rt6i_nexthop)) {
1054 err = PTR_ERR(rt->rt6i_nexthop);
1055 rt->rt6i_nexthop = NULL;
1056 goto out;
1057 }
1058 }
1059
1060 rt->rt6i_flags = rtmsg->rtmsg_flags;
1061
1062install_route:
1063 if (rta && rta[RTA_METRICS-1]) {
1064 int attrlen = RTA_PAYLOAD(rta[RTA_METRICS-1]);
1065 struct rtattr *attr = RTA_DATA(rta[RTA_METRICS-1]);
1066
1067 while (RTA_OK(attr, attrlen)) {
1068 unsigned flavor = attr->rta_type;
1069 if (flavor) {
1070 if (flavor > RTAX_MAX) {
1071 err = -EINVAL;
1072 goto out;
1073 }
1074 rt->u.dst.metrics[flavor-1] =
1075 *(u32 *)RTA_DATA(attr);
1076 }
1077 attr = RTA_NEXT(attr, attrlen);
1078 }
1079 }
1080
1081 if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
1082 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1083 if (!rt->u.dst.metrics[RTAX_MTU-1])
1084 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1085 if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
1086 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1087 rt->u.dst.dev = dev;
1088 rt->rt6i_idev = idev;
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001089 return ip6_ins_rt(rt, nlh, _rtattr, req);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001090
1091out:
1092 if (dev)
1093 dev_put(dev);
1094 if (idev)
1095 in6_dev_put(idev);
1096 if (rt)
1097 dst_free((struct dst_entry *) rt);
1098 return err;
1099}
1100
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001101int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001102{
1103 int err;
1104
1105 write_lock_bh(&rt6_lock);
1106
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001107 err = fib6_del(rt, nlh, _rtattr, req);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001108 dst_release(&rt->u.dst);
1109
1110 write_unlock_bh(&rt6_lock);
1111
1112 return err;
1113}
1114
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001115static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001116{
1117 struct fib6_node *fn;
1118 struct rt6_info *rt;
1119 int err = -ESRCH;
1120
1121 read_lock_bh(&rt6_lock);
1122
1123 fn = fib6_locate(&ip6_routing_table,
1124 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len,
1125 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
1126
1127 if (fn) {
1128 for (rt = fn->leaf; rt; rt = rt->u.next) {
1129 if (rtmsg->rtmsg_ifindex &&
1130 (rt->rt6i_dev == NULL ||
1131 rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex))
1132 continue;
1133 if (rtmsg->rtmsg_flags&RTF_GATEWAY &&
1134 !ipv6_addr_equal(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway))
1135 continue;
1136 if (rtmsg->rtmsg_metric &&
1137 rtmsg->rtmsg_metric != rt->rt6i_metric)
1138 continue;
1139 dst_hold(&rt->u.dst);
1140 read_unlock_bh(&rt6_lock);
1141
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001142 return ip6_del_rt(rt, nlh, _rtattr, req);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001143 }
1144 }
1145 read_unlock_bh(&rt6_lock);
1146
1147 return err;
1148}
1149
1150/*
1151 * Handle redirects
1152 */
1153void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
1154 struct neighbour *neigh, u8 *lladdr, int on_link)
1155{
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001156 struct rt6_info *rt, *nrt = NULL;
1157 int strict;
1158 struct fib6_node *fn;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001159
1160 /*
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001161 * Get the "current" route for this destination and
1162 * check if the redirect has come from approriate router.
1163 *
1164 * RFC 2461 specifies that redirects should only be
1165 * accepted if they come from the nexthop to the target.
1166 * Due to the way the routes are chosen, this notion
1167 * is a bit fuzzy and one might need to check all possible
1168 * routes.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001169 */
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001170 strict = ipv6_addr_type(dest) & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001171
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001172 read_lock_bh(&rt6_lock);
1173 fn = fib6_lookup(&ip6_routing_table, dest, NULL);
1174restart:
1175 for (rt = fn->leaf; rt; rt = rt->u.next) {
1176 /*
1177 * Current route is on-link; redirect is always invalid.
1178 *
1179 * Seems, previous statement is not true. It could
1180 * be node, which looks for us as on-link (f.e. proxy ndisc)
1181 * But then router serving it might decide, that we should
1182 * know truth 8)8) --ANK (980726).
1183 */
1184 if (rt6_check_expired(rt))
1185 continue;
1186 if (!(rt->rt6i_flags & RTF_GATEWAY))
1187 continue;
1188 if (neigh->dev != rt->rt6i_dev)
1189 continue;
1190 if (!ipv6_addr_equal(saddr, &rt->rt6i_gateway))
1191 continue;
1192 break;
1193 }
1194 if (rt)
1195 dst_hold(&rt->u.dst);
1196 else if (strict) {
1197 while ((fn = fn->parent) != NULL) {
1198 if (fn->fn_flags & RTN_ROOT)
1199 break;
1200 if (fn->fn_flags & RTN_RTINFO)
1201 goto restart;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001202 }
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001203 }
1204 read_unlock_bh(&rt6_lock);
1205
1206 if (!rt) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001207 if (net_ratelimit())
1208 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1209 "for redirect target\n");
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001210 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001211 }
1212
Linus Torvalds1da177e2005-04-16 15:20:36 -07001213 /*
1214 * We have finally decided to accept it.
1215 */
1216
1217 neigh_update(neigh, lladdr, NUD_STALE,
1218 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1219 NEIGH_UPDATE_F_OVERRIDE|
1220 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1221 NEIGH_UPDATE_F_ISROUTER))
1222 );
1223
1224 /*
1225 * Redirect received -> path was valid.
1226 * Look, redirects are sent only in response to data packets,
1227 * so that this nexthop apparently is reachable. --ANK
1228 */
1229 dst_confirm(&rt->u.dst);
1230
1231 /* Duplicate redirect: silently ignore. */
1232 if (neigh == rt->u.dst.neighbour)
1233 goto out;
1234
1235 nrt = ip6_rt_copy(rt);
1236 if (nrt == NULL)
1237 goto out;
1238
1239 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1240 if (on_link)
1241 nrt->rt6i_flags &= ~RTF_GATEWAY;
1242
1243 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1244 nrt->rt6i_dst.plen = 128;
1245 nrt->u.dst.flags |= DST_HOST;
1246
1247 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1248 nrt->rt6i_nexthop = neigh_clone(neigh);
1249 /* Reset pmtu, it may be better */
1250 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1251 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1252
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001253 if (ip6_ins_rt(nrt, NULL, NULL, NULL))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001254 goto out;
1255
1256 if (rt->rt6i_flags&RTF_CACHE) {
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001257 ip6_del_rt(rt, NULL, NULL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001258 return;
1259 }
1260
1261out:
1262 dst_release(&rt->u.dst);
1263 return;
1264}
1265
1266/*
1267 * Handle ICMP "packet too big" messages
1268 * i.e. Path MTU discovery
1269 */
1270
1271void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1272 struct net_device *dev, u32 pmtu)
1273{
1274 struct rt6_info *rt, *nrt;
1275 int allfrag = 0;
1276
1277 rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1278 if (rt == NULL)
1279 return;
1280
1281 if (pmtu >= dst_mtu(&rt->u.dst))
1282 goto out;
1283
1284 if (pmtu < IPV6_MIN_MTU) {
1285 /*
1286 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1287 * MTU (1280) and a fragment header should always be included
1288 * after a node receiving Too Big message reporting PMTU is
1289 * less than the IPv6 Minimum Link MTU.
1290 */
1291 pmtu = IPV6_MIN_MTU;
1292 allfrag = 1;
1293 }
1294
1295 /* New mtu received -> path was valid.
1296 They are sent only in response to data packets,
1297 so that this nexthop apparently is reachable. --ANK
1298 */
1299 dst_confirm(&rt->u.dst);
1300
1301 /* Host route. If it is static, it would be better
1302 not to override it, but add new one, so that
1303 when cache entry will expire old pmtu
1304 would return automatically.
1305 */
1306 if (rt->rt6i_flags & RTF_CACHE) {
1307 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1308 if (allfrag)
1309 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1310 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1311 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1312 goto out;
1313 }
1314
1315 /* Network route.
1316 Two cases are possible:
1317 1. It is connected route. Action: COW
1318 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1319 */
YOSHIFUJI Hideakid5315b52006-03-20 16:58:48 -08001320 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001321 nrt = rt6_alloc_cow(rt, daddr, saddr);
YOSHIFUJI Hideakid5315b52006-03-20 16:58:48 -08001322 else
1323 nrt = rt6_alloc_clone(rt, daddr);
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001324
YOSHIFUJI Hideakid5315b52006-03-20 16:58:48 -08001325 if (nrt) {
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001326 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1327 if (allfrag)
1328 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1329
1330 /* According to RFC 1981, detecting PMTU increase shouldn't be
1331 * happened within 5 mins, the recommended timer is 10 mins.
1332 * Here this route expiration time is set to ip6_rt_mtu_expires
1333 * which is 10 mins. After 10 mins the decreased pmtu is expired
1334 * and detecting PMTU increase will be automatically happened.
1335 */
1336 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1337 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1338
1339 ip6_ins_rt(nrt, NULL, NULL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001340 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001341out:
1342 dst_release(&rt->u.dst);
1343}
1344
1345/*
1346 * Misc support functions
1347 */
1348
1349static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1350{
1351 struct rt6_info *rt = ip6_dst_alloc();
1352
1353 if (rt) {
1354 rt->u.dst.input = ort->u.dst.input;
1355 rt->u.dst.output = ort->u.dst.output;
1356
1357 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1358 rt->u.dst.dev = ort->u.dst.dev;
1359 if (rt->u.dst.dev)
1360 dev_hold(rt->u.dst.dev);
1361 rt->rt6i_idev = ort->rt6i_idev;
1362 if (rt->rt6i_idev)
1363 in6_dev_hold(rt->rt6i_idev);
1364 rt->u.dst.lastuse = jiffies;
1365 rt->rt6i_expires = 0;
1366
1367 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1368 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1369 rt->rt6i_metric = 0;
1370
1371 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1372#ifdef CONFIG_IPV6_SUBTREES
1373 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1374#endif
1375 }
1376 return rt;
1377}
1378
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001379#ifdef CONFIG_IPV6_ROUTE_INFO
1380static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
1381 struct in6_addr *gwaddr, int ifindex)
1382{
1383 struct fib6_node *fn;
1384 struct rt6_info *rt = NULL;
1385
1386 write_lock_bh(&rt6_lock);
1387 fn = fib6_locate(&ip6_routing_table, prefix ,prefixlen, NULL, 0);
1388 if (!fn)
1389 goto out;
1390
1391 for (rt = fn->leaf; rt; rt = rt->u.next) {
1392 if (rt->rt6i_dev->ifindex != ifindex)
1393 continue;
1394 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1395 continue;
1396 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1397 continue;
1398 dst_hold(&rt->u.dst);
1399 break;
1400 }
1401out:
1402 write_unlock_bh(&rt6_lock);
1403 return rt;
1404}
1405
1406static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
1407 struct in6_addr *gwaddr, int ifindex,
1408 unsigned pref)
1409{
1410 struct in6_rtmsg rtmsg;
1411
1412 memset(&rtmsg, 0, sizeof(rtmsg));
1413 rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1414 ipv6_addr_copy(&rtmsg.rtmsg_dst, prefix);
1415 rtmsg.rtmsg_dst_len = prefixlen;
1416 ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1417 rtmsg.rtmsg_metric = 1024;
1418 rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO | RTF_UP | RTF_PREF(pref);
YOSHIFUJI Hideakie317da92006-03-20 17:06:42 -08001419 /* We should treat it as a default route if prefix length is 0. */
1420 if (!prefixlen)
1421 rtmsg.rtmsg_flags |= RTF_DEFAULT;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001422 rtmsg.rtmsg_ifindex = ifindex;
1423
1424 ip6_route_add(&rtmsg, NULL, NULL, NULL);
1425
1426 return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex);
1427}
1428#endif
1429
Linus Torvalds1da177e2005-04-16 15:20:36 -07001430struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1431{
1432 struct rt6_info *rt;
1433 struct fib6_node *fn;
1434
1435 fn = &ip6_routing_table;
1436
1437 write_lock_bh(&rt6_lock);
1438 for (rt = fn->leaf; rt; rt=rt->u.next) {
1439 if (dev == rt->rt6i_dev &&
YOSHIFUJI Hideaki045927f2006-03-20 17:00:48 -08001440 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001441 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1442 break;
1443 }
1444 if (rt)
1445 dst_hold(&rt->u.dst);
1446 write_unlock_bh(&rt6_lock);
1447 return rt;
1448}
1449
1450struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -08001451 struct net_device *dev,
1452 unsigned int pref)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001453{
1454 struct in6_rtmsg rtmsg;
1455
1456 memset(&rtmsg, 0, sizeof(struct in6_rtmsg));
1457 rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1458 ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1459 rtmsg.rtmsg_metric = 1024;
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -08001460 rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES |
1461 RTF_PREF(pref);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001462
1463 rtmsg.rtmsg_ifindex = dev->ifindex;
1464
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001465 ip6_route_add(&rtmsg, NULL, NULL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001466 return rt6_get_dflt_router(gwaddr, dev);
1467}
1468
1469void rt6_purge_dflt_routers(void)
1470{
1471 struct rt6_info *rt;
1472
1473restart:
1474 read_lock_bh(&rt6_lock);
1475 for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) {
1476 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1477 dst_hold(&rt->u.dst);
1478
Linus Torvalds1da177e2005-04-16 15:20:36 -07001479 read_unlock_bh(&rt6_lock);
1480
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001481 ip6_del_rt(rt, NULL, NULL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001482
1483 goto restart;
1484 }
1485 }
1486 read_unlock_bh(&rt6_lock);
1487}
1488
1489int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1490{
1491 struct in6_rtmsg rtmsg;
1492 int err;
1493
1494 switch(cmd) {
1495 case SIOCADDRT: /* Add a route */
1496 case SIOCDELRT: /* Delete a route */
1497 if (!capable(CAP_NET_ADMIN))
1498 return -EPERM;
1499 err = copy_from_user(&rtmsg, arg,
1500 sizeof(struct in6_rtmsg));
1501 if (err)
1502 return -EFAULT;
1503
1504 rtnl_lock();
1505 switch (cmd) {
1506 case SIOCADDRT:
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001507 err = ip6_route_add(&rtmsg, NULL, NULL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001508 break;
1509 case SIOCDELRT:
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001510 err = ip6_route_del(&rtmsg, NULL, NULL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001511 break;
1512 default:
1513 err = -EINVAL;
1514 }
1515 rtnl_unlock();
1516
1517 return err;
1518 };
1519
1520 return -EINVAL;
1521}
1522
1523/*
1524 * Drop the packet on the floor
1525 */
1526
Arnaldo Carvalho de Melo20380732005-08-16 02:18:02 -03001527static int ip6_pkt_discard(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001528{
1529 IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
1530 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
1531 kfree_skb(skb);
1532 return 0;
1533}
1534
Arnaldo Carvalho de Melo20380732005-08-16 02:18:02 -03001535static int ip6_pkt_discard_out(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001536{
1537 skb->dev = skb->dst->dev;
1538 return ip6_pkt_discard(skb);
1539}
1540
1541/*
1542 * Allocate a dst for local (unicast / anycast) address.
1543 */
1544
1545struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1546 const struct in6_addr *addr,
1547 int anycast)
1548{
1549 struct rt6_info *rt = ip6_dst_alloc();
1550
1551 if (rt == NULL)
1552 return ERR_PTR(-ENOMEM);
1553
1554 dev_hold(&loopback_dev);
1555 in6_dev_hold(idev);
1556
1557 rt->u.dst.flags = DST_HOST;
1558 rt->u.dst.input = ip6_input;
1559 rt->u.dst.output = ip6_output;
1560 rt->rt6i_dev = &loopback_dev;
1561 rt->rt6i_idev = idev;
1562 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1563 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1564 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1565 rt->u.dst.obsolete = -1;
1566
1567 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +09001568 if (anycast)
1569 rt->rt6i_flags |= RTF_ANYCAST;
1570 else
Linus Torvalds1da177e2005-04-16 15:20:36 -07001571 rt->rt6i_flags |= RTF_LOCAL;
1572 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1573 if (rt->rt6i_nexthop == NULL) {
1574 dst_free((struct dst_entry *) rt);
1575 return ERR_PTR(-ENOMEM);
1576 }
1577
1578 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1579 rt->rt6i_dst.plen = 128;
1580
1581 atomic_set(&rt->u.dst.__refcnt, 1);
1582
1583 return rt;
1584}
1585
1586static int fib6_ifdown(struct rt6_info *rt, void *arg)
1587{
1588 if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1589 rt != &ip6_null_entry) {
1590 RT6_TRACE("deleted by ifdown %p\n", rt);
1591 return -1;
1592 }
1593 return 0;
1594}
1595
1596void rt6_ifdown(struct net_device *dev)
1597{
1598 write_lock_bh(&rt6_lock);
1599 fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev);
1600 write_unlock_bh(&rt6_lock);
1601}
1602
1603struct rt6_mtu_change_arg
1604{
1605 struct net_device *dev;
1606 unsigned mtu;
1607};
1608
1609static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1610{
1611 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1612 struct inet6_dev *idev;
1613
1614 /* In IPv6 pmtu discovery is not optional,
1615 so that RTAX_MTU lock cannot disable it.
1616 We still use this lock to block changes
1617 caused by addrconf/ndisc.
1618 */
1619
1620 idev = __in6_dev_get(arg->dev);
1621 if (idev == NULL)
1622 return 0;
1623
1624 /* For administrative MTU increase, there is no way to discover
1625 IPv6 PMTU increase, so PMTU increase should be updated here.
1626 Since RFC 1981 doesn't include administrative MTU increase
1627 update PMTU increase is a MUST. (i.e. jumbo frame)
1628 */
1629 /*
1630 If new MTU is less than route PMTU, this new MTU will be the
1631 lowest MTU in the path, update the route PMTU to reflect PMTU
1632 decreases; if new MTU is greater than route PMTU, and the
1633 old MTU is the lowest MTU in the path, update the route PMTU
1634 to reflect the increase. In this case if the other nodes' MTU
1635 also have the lowest MTU, TOO BIG MESSAGE will be lead to
1636 PMTU discouvery.
1637 */
1638 if (rt->rt6i_dev == arg->dev &&
1639 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1640 (dst_mtu(&rt->u.dst) > arg->mtu ||
1641 (dst_mtu(&rt->u.dst) < arg->mtu &&
1642 dst_mtu(&rt->u.dst) == idev->cnf.mtu6)))
1643 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1644 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1645 return 0;
1646}
1647
1648void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1649{
1650 struct rt6_mtu_change_arg arg;
1651
1652 arg.dev = dev;
1653 arg.mtu = mtu;
1654 read_lock_bh(&rt6_lock);
1655 fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg);
1656 read_unlock_bh(&rt6_lock);
1657}
1658
1659static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta,
1660 struct in6_rtmsg *rtmsg)
1661{
1662 memset(rtmsg, 0, sizeof(*rtmsg));
1663
1664 rtmsg->rtmsg_dst_len = r->rtm_dst_len;
1665 rtmsg->rtmsg_src_len = r->rtm_src_len;
1666 rtmsg->rtmsg_flags = RTF_UP;
1667 if (r->rtm_type == RTN_UNREACHABLE)
1668 rtmsg->rtmsg_flags |= RTF_REJECT;
1669
1670 if (rta[RTA_GATEWAY-1]) {
1671 if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16))
1672 return -EINVAL;
1673 memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16);
1674 rtmsg->rtmsg_flags |= RTF_GATEWAY;
1675 }
1676 if (rta[RTA_DST-1]) {
1677 if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3))
1678 return -EINVAL;
1679 memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3));
1680 }
1681 if (rta[RTA_SRC-1]) {
1682 if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3))
1683 return -EINVAL;
1684 memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3));
1685 }
1686 if (rta[RTA_OIF-1]) {
1687 if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int)))
1688 return -EINVAL;
1689 memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1690 }
1691 if (rta[RTA_PRIORITY-1]) {
1692 if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4))
1693 return -EINVAL;
1694 memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
1695 }
1696 return 0;
1697}
1698
1699int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1700{
1701 struct rtmsg *r = NLMSG_DATA(nlh);
1702 struct in6_rtmsg rtmsg;
1703
1704 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1705 return -EINVAL;
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001706 return ip6_route_del(&rtmsg, nlh, arg, &NETLINK_CB(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001707}
1708
1709int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1710{
1711 struct rtmsg *r = NLMSG_DATA(nlh);
1712 struct in6_rtmsg rtmsg;
1713
1714 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1715 return -EINVAL;
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001716 return ip6_route_add(&rtmsg, nlh, arg, &NETLINK_CB(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001717}
1718
1719struct rt6_rtnl_dump_arg
1720{
1721 struct sk_buff *skb;
1722 struct netlink_callback *cb;
1723};
1724
1725static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001726 struct in6_addr *dst, struct in6_addr *src,
1727 int iif, int type, u32 pid, u32 seq,
1728 int prefix, unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001729{
1730 struct rtmsg *rtm;
1731 struct nlmsghdr *nlh;
1732 unsigned char *b = skb->tail;
1733 struct rta_cacheinfo ci;
1734
1735 if (prefix) { /* user wants prefix routes only */
1736 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
1737 /* success since this is not a prefix route */
1738 return 1;
1739 }
1740 }
1741
Jamal Hadi Salimb6544c02005-06-18 22:54:12 -07001742 nlh = NLMSG_NEW(skb, pid, seq, type, sizeof(*rtm), flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001743 rtm = NLMSG_DATA(nlh);
1744 rtm->rtm_family = AF_INET6;
1745 rtm->rtm_dst_len = rt->rt6i_dst.plen;
1746 rtm->rtm_src_len = rt->rt6i_src.plen;
1747 rtm->rtm_tos = 0;
1748 rtm->rtm_table = RT_TABLE_MAIN;
1749 if (rt->rt6i_flags&RTF_REJECT)
1750 rtm->rtm_type = RTN_UNREACHABLE;
1751 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
1752 rtm->rtm_type = RTN_LOCAL;
1753 else
1754 rtm->rtm_type = RTN_UNICAST;
1755 rtm->rtm_flags = 0;
1756 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1757 rtm->rtm_protocol = rt->rt6i_protocol;
1758 if (rt->rt6i_flags&RTF_DYNAMIC)
1759 rtm->rtm_protocol = RTPROT_REDIRECT;
1760 else if (rt->rt6i_flags & RTF_ADDRCONF)
1761 rtm->rtm_protocol = RTPROT_KERNEL;
1762 else if (rt->rt6i_flags&RTF_DEFAULT)
1763 rtm->rtm_protocol = RTPROT_RA;
1764
1765 if (rt->rt6i_flags&RTF_CACHE)
1766 rtm->rtm_flags |= RTM_F_CLONED;
1767
1768 if (dst) {
1769 RTA_PUT(skb, RTA_DST, 16, dst);
1770 rtm->rtm_dst_len = 128;
1771 } else if (rtm->rtm_dst_len)
1772 RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1773#ifdef CONFIG_IPV6_SUBTREES
1774 if (src) {
1775 RTA_PUT(skb, RTA_SRC, 16, src);
1776 rtm->rtm_src_len = 128;
1777 } else if (rtm->rtm_src_len)
1778 RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1779#endif
1780 if (iif)
1781 RTA_PUT(skb, RTA_IIF, 4, &iif);
1782 else if (dst) {
1783 struct in6_addr saddr_buf;
1784 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
1785 RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1786 }
1787 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
1788 goto rtattr_failure;
1789 if (rt->u.dst.neighbour)
1790 RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
1791 if (rt->u.dst.dev)
1792 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex);
1793 RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric);
1794 ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
1795 if (rt->rt6i_expires)
1796 ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
1797 else
1798 ci.rta_expires = 0;
1799 ci.rta_used = rt->u.dst.__use;
1800 ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
1801 ci.rta_error = rt->u.dst.error;
1802 ci.rta_id = 0;
1803 ci.rta_ts = 0;
1804 ci.rta_tsage = 0;
1805 RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
1806 nlh->nlmsg_len = skb->tail - b;
1807 return skb->len;
1808
1809nlmsg_failure:
1810rtattr_failure:
1811 skb_trim(skb, b - skb->data);
1812 return -1;
1813}
1814
1815static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1816{
1817 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1818 int prefix;
1819
1820 if (arg->cb->nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(struct rtmsg))) {
1821 struct rtmsg *rtm = NLMSG_DATA(arg->cb->nlh);
1822 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
1823 } else
1824 prefix = 0;
1825
1826 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1827 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001828 prefix, NLM_F_MULTI);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001829}
1830
1831static int fib6_dump_node(struct fib6_walker_t *w)
1832{
1833 int res;
1834 struct rt6_info *rt;
1835
1836 for (rt = w->leaf; rt; rt = rt->u.next) {
1837 res = rt6_dump_route(rt, w->args);
1838 if (res < 0) {
1839 /* Frame is full, suspend walking */
1840 w->leaf = rt;
1841 return 1;
1842 }
1843 BUG_TRAP(res!=0);
1844 }
1845 w->leaf = NULL;
1846 return 0;
1847}
1848
1849static void fib6_dump_end(struct netlink_callback *cb)
1850{
1851 struct fib6_walker_t *w = (void*)cb->args[0];
1852
1853 if (w) {
1854 cb->args[0] = 0;
1855 fib6_walker_unlink(w);
1856 kfree(w);
1857 }
Herbert Xuefacfbc2005-11-12 12:12:05 -08001858 cb->done = (void*)cb->args[1];
1859 cb->args[1] = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001860}
1861
1862static int fib6_dump_done(struct netlink_callback *cb)
1863{
1864 fib6_dump_end(cb);
Thomas Grafa8f74b22005-11-10 02:25:52 +01001865 return cb->done ? cb->done(cb) : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001866}
1867
1868int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1869{
1870 struct rt6_rtnl_dump_arg arg;
1871 struct fib6_walker_t *w;
1872 int res;
1873
1874 arg.skb = skb;
1875 arg.cb = cb;
1876
1877 w = (void*)cb->args[0];
1878 if (w == NULL) {
1879 /* New dump:
1880 *
1881 * 1. hook callback destructor.
1882 */
1883 cb->args[1] = (long)cb->done;
1884 cb->done = fib6_dump_done;
1885
1886 /*
1887 * 2. allocate and initialize walker.
1888 */
Ingo Oeser0c600ed2006-03-20 23:01:32 -08001889 w = kzalloc(sizeof(*w), GFP_ATOMIC);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001890 if (w == NULL)
1891 return -ENOMEM;
1892 RT6_TRACE("dump<%p", w);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001893 w->root = &ip6_routing_table;
1894 w->func = fib6_dump_node;
1895 w->args = &arg;
1896 cb->args[0] = (long)w;
1897 read_lock_bh(&rt6_lock);
1898 res = fib6_walk(w);
1899 read_unlock_bh(&rt6_lock);
1900 } else {
1901 w->args = &arg;
1902 read_lock_bh(&rt6_lock);
1903 res = fib6_walk_continue(w);
1904 read_unlock_bh(&rt6_lock);
1905 }
1906#if RT6_DEBUG >= 3
1907 if (res <= 0 && skb->len == 0)
1908 RT6_TRACE("%p>dump end\n", w);
1909#endif
1910 res = res < 0 ? res : skb->len;
1911 /* res < 0 is an error. (really, impossible)
1912 res == 0 means that dump is complete, but skb still can contain data.
1913 res > 0 dump is not complete, but frame is full.
1914 */
1915 /* Destroy walker, if dump of this table is complete. */
1916 if (res <= 0)
1917 fib6_dump_end(cb);
1918 return res;
1919}
1920
1921int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1922{
1923 struct rtattr **rta = arg;
1924 int iif = 0;
1925 int err = -ENOBUFS;
1926 struct sk_buff *skb;
1927 struct flowi fl;
1928 struct rt6_info *rt;
1929
1930 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1931 if (skb == NULL)
1932 goto out;
1933
1934 /* Reserve room for dummy headers, this skb can pass
1935 through good chunk of routing engine.
1936 */
1937 skb->mac.raw = skb->data;
1938 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1939
1940 memset(&fl, 0, sizeof(fl));
1941 if (rta[RTA_SRC-1])
1942 ipv6_addr_copy(&fl.fl6_src,
1943 (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]));
1944 if (rta[RTA_DST-1])
1945 ipv6_addr_copy(&fl.fl6_dst,
1946 (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]));
1947
1948 if (rta[RTA_IIF-1])
1949 memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
1950
1951 if (iif) {
1952 struct net_device *dev;
1953 dev = __dev_get_by_index(iif);
1954 if (!dev) {
1955 err = -ENODEV;
1956 goto out_free;
1957 }
1958 }
1959
1960 fl.oif = 0;
1961 if (rta[RTA_OIF-1])
1962 memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1963
1964 rt = (struct rt6_info*)ip6_route_output(NULL, &fl);
1965
1966 skb->dst = &rt->u.dst;
1967
1968 NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
1969 err = rt6_fill_node(skb, rt,
1970 &fl.fl6_dst, &fl.fl6_src,
1971 iif,
1972 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001973 nlh->nlmsg_seq, 0, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001974 if (err < 0) {
1975 err = -EMSGSIZE;
1976 goto out_free;
1977 }
1978
1979 err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
1980 if (err > 0)
1981 err = 0;
1982out:
1983 return err;
1984out_free:
1985 kfree_skb(skb);
1986 goto out;
1987}
1988
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001989void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh,
1990 struct netlink_skb_parms *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001991{
1992 struct sk_buff *skb;
1993 int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001994 u32 pid = current->pid;
1995 u32 seq = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001996
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001997 if (req)
1998 pid = req->pid;
1999 if (nlh)
2000 seq = nlh->nlmsg_seq;
2001
Linus Torvalds1da177e2005-04-16 15:20:36 -07002002 skb = alloc_skb(size, gfp_any());
2003 if (!skb) {
Patrick McHardyac6d4392005-08-14 19:29:52 -07002004 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, ENOBUFS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002005 return;
2006 }
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07002007 if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0) < 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002008 kfree_skb(skb);
Patrick McHardyac6d4392005-08-14 19:29:52 -07002009 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, EINVAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002010 return;
2011 }
Patrick McHardyac6d4392005-08-14 19:29:52 -07002012 NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_ROUTE;
2013 netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_ROUTE, gfp_any());
Linus Torvalds1da177e2005-04-16 15:20:36 -07002014}
2015
2016/*
2017 * /proc
2018 */
2019
2020#ifdef CONFIG_PROC_FS
2021
2022#define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2023
2024struct rt6_proc_arg
2025{
2026 char *buffer;
2027 int offset;
2028 int length;
2029 int skip;
2030 int len;
2031};
2032
2033static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2034{
2035 struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
2036 int i;
2037
2038 if (arg->skip < arg->offset / RT6_INFO_LEN) {
2039 arg->skip++;
2040 return 0;
2041 }
2042
2043 if (arg->len >= arg->length)
2044 return 0;
2045
2046 for (i=0; i<16; i++) {
2047 sprintf(arg->buffer + arg->len, "%02x",
2048 rt->rt6i_dst.addr.s6_addr[i]);
2049 arg->len += 2;
2050 }
2051 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
2052 rt->rt6i_dst.plen);
2053
2054#ifdef CONFIG_IPV6_SUBTREES
2055 for (i=0; i<16; i++) {
2056 sprintf(arg->buffer + arg->len, "%02x",
2057 rt->rt6i_src.addr.s6_addr[i]);
2058 arg->len += 2;
2059 }
2060 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
2061 rt->rt6i_src.plen);
2062#else
2063 sprintf(arg->buffer + arg->len,
2064 "00000000000000000000000000000000 00 ");
2065 arg->len += 36;
2066#endif
2067
2068 if (rt->rt6i_nexthop) {
2069 for (i=0; i<16; i++) {
2070 sprintf(arg->buffer + arg->len, "%02x",
2071 rt->rt6i_nexthop->primary_key[i]);
2072 arg->len += 2;
2073 }
2074 } else {
2075 sprintf(arg->buffer + arg->len,
2076 "00000000000000000000000000000000");
2077 arg->len += 32;
2078 }
2079 arg->len += sprintf(arg->buffer + arg->len,
2080 " %08x %08x %08x %08x %8s\n",
2081 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
2082 rt->u.dst.__use, rt->rt6i_flags,
2083 rt->rt6i_dev ? rt->rt6i_dev->name : "");
2084 return 0;
2085}
2086
2087static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
2088{
2089 struct rt6_proc_arg arg;
2090 arg.buffer = buffer;
2091 arg.offset = offset;
2092 arg.length = length;
2093 arg.skip = 0;
2094 arg.len = 0;
2095
2096 read_lock_bh(&rt6_lock);
2097 fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg);
2098 read_unlock_bh(&rt6_lock);
2099
2100 *start = buffer;
2101 if (offset)
2102 *start += offset % RT6_INFO_LEN;
2103
2104 arg.len -= offset % RT6_INFO_LEN;
2105
2106 if (arg.len > length)
2107 arg.len = length;
2108 if (arg.len < 0)
2109 arg.len = 0;
2110
2111 return arg.len;
2112}
2113
Linus Torvalds1da177e2005-04-16 15:20:36 -07002114static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2115{
2116 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2117 rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
2118 rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
2119 rt6_stats.fib_rt_cache,
2120 atomic_read(&ip6_dst_ops.entries),
2121 rt6_stats.fib_discarded_routes);
2122
2123 return 0;
2124}
2125
2126static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2127{
2128 return single_open(file, rt6_stats_seq_show, NULL);
2129}
2130
2131static struct file_operations rt6_stats_seq_fops = {
2132 .owner = THIS_MODULE,
2133 .open = rt6_stats_seq_open,
2134 .read = seq_read,
2135 .llseek = seq_lseek,
2136 .release = single_release,
2137};
2138#endif /* CONFIG_PROC_FS */
2139
2140#ifdef CONFIG_SYSCTL
2141
2142static int flush_delay;
2143
2144static
2145int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2146 void __user *buffer, size_t *lenp, loff_t *ppos)
2147{
2148 if (write) {
2149 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2150 fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
2151 return 0;
2152 } else
2153 return -EINVAL;
2154}
2155
2156ctl_table ipv6_route_table[] = {
2157 {
2158 .ctl_name = NET_IPV6_ROUTE_FLUSH,
2159 .procname = "flush",
2160 .data = &flush_delay,
2161 .maxlen = sizeof(int),
Dave Jones89c8b3a12005-04-28 12:11:49 -07002162 .mode = 0200,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002163 .proc_handler = &ipv6_sysctl_rtcache_flush
2164 },
2165 {
2166 .ctl_name = NET_IPV6_ROUTE_GC_THRESH,
2167 .procname = "gc_thresh",
2168 .data = &ip6_dst_ops.gc_thresh,
2169 .maxlen = sizeof(int),
2170 .mode = 0644,
2171 .proc_handler = &proc_dointvec,
2172 },
2173 {
2174 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE,
2175 .procname = "max_size",
2176 .data = &ip6_rt_max_size,
2177 .maxlen = sizeof(int),
2178 .mode = 0644,
2179 .proc_handler = &proc_dointvec,
2180 },
2181 {
2182 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2183 .procname = "gc_min_interval",
2184 .data = &ip6_rt_gc_min_interval,
2185 .maxlen = sizeof(int),
2186 .mode = 0644,
2187 .proc_handler = &proc_dointvec_jiffies,
2188 .strategy = &sysctl_jiffies,
2189 },
2190 {
2191 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT,
2192 .procname = "gc_timeout",
2193 .data = &ip6_rt_gc_timeout,
2194 .maxlen = sizeof(int),
2195 .mode = 0644,
2196 .proc_handler = &proc_dointvec_jiffies,
2197 .strategy = &sysctl_jiffies,
2198 },
2199 {
2200 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL,
2201 .procname = "gc_interval",
2202 .data = &ip6_rt_gc_interval,
2203 .maxlen = sizeof(int),
2204 .mode = 0644,
2205 .proc_handler = &proc_dointvec_jiffies,
2206 .strategy = &sysctl_jiffies,
2207 },
2208 {
2209 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY,
2210 .procname = "gc_elasticity",
2211 .data = &ip6_rt_gc_elasticity,
2212 .maxlen = sizeof(int),
2213 .mode = 0644,
2214 .proc_handler = &proc_dointvec_jiffies,
2215 .strategy = &sysctl_jiffies,
2216 },
2217 {
2218 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES,
2219 .procname = "mtu_expires",
2220 .data = &ip6_rt_mtu_expires,
2221 .maxlen = sizeof(int),
2222 .mode = 0644,
2223 .proc_handler = &proc_dointvec_jiffies,
2224 .strategy = &sysctl_jiffies,
2225 },
2226 {
2227 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS,
2228 .procname = "min_adv_mss",
2229 .data = &ip6_rt_min_advmss,
2230 .maxlen = sizeof(int),
2231 .mode = 0644,
2232 .proc_handler = &proc_dointvec_jiffies,
2233 .strategy = &sysctl_jiffies,
2234 },
2235 {
2236 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2237 .procname = "gc_min_interval_ms",
2238 .data = &ip6_rt_gc_min_interval,
2239 .maxlen = sizeof(int),
2240 .mode = 0644,
2241 .proc_handler = &proc_dointvec_ms_jiffies,
2242 .strategy = &sysctl_ms_jiffies,
2243 },
2244 { .ctl_name = 0 }
2245};
2246
2247#endif
2248
2249void __init ip6_route_init(void)
2250{
2251 struct proc_dir_entry *p;
2252
2253 ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
2254 sizeof(struct rt6_info),
2255 0, SLAB_HWCACHE_ALIGN,
2256 NULL, NULL);
2257 if (!ip6_dst_ops.kmem_cachep)
2258 panic("cannot create ip6_dst_cache");
2259
2260 fib6_init();
2261#ifdef CONFIG_PROC_FS
2262 p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2263 if (p)
2264 p->owner = THIS_MODULE;
2265
2266 proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2267#endif
2268#ifdef CONFIG_XFRM
2269 xfrm6_init();
2270#endif
2271}
2272
2273void ip6_route_cleanup(void)
2274{
2275#ifdef CONFIG_PROC_FS
2276 proc_net_remove("ipv6_route");
2277 proc_net_remove("rt6_stats");
2278#endif
2279#ifdef CONFIG_XFRM
2280 xfrm6_fini();
2281#endif
2282 rt6_ifdown(NULL);
2283 fib6_gc_cleanup();
2284 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2285}