blob: 4b163711f3a86382609d925b62e5d3e52cc1b4b7 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
16/* Changes:
17 *
18 * YOSHIFUJI Hideaki @USAGI
19 * reworked default router selection.
20 * - respect outgoing interface
21 * - select from (probably) reachable routers (i.e.
22 * routers in REACHABLE, STALE, DELAY or PROBE states).
23 * - always select the same router if it is (probably)
24 * reachable. otherwise, round-robin the list.
25 */
26
Randy Dunlap4fc268d2006-01-11 12:17:47 -080027#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070028#include <linux/errno.h>
29#include <linux/types.h>
30#include <linux/times.h>
31#include <linux/socket.h>
32#include <linux/sockios.h>
33#include <linux/net.h>
34#include <linux/route.h>
35#include <linux/netdevice.h>
36#include <linux/in6.h>
37#include <linux/init.h>
38#include <linux/netlink.h>
39#include <linux/if_arp.h>
40
41#ifdef CONFIG_PROC_FS
42#include <linux/proc_fs.h>
43#include <linux/seq_file.h>
44#endif
45
46#include <net/snmp.h>
47#include <net/ipv6.h>
48#include <net/ip6_fib.h>
49#include <net/ip6_route.h>
50#include <net/ndisc.h>
51#include <net/addrconf.h>
52#include <net/tcp.h>
53#include <linux/rtnetlink.h>
54#include <net/dst.h>
55#include <net/xfrm.h>
Tom Tucker8d717402006-07-30 20:43:36 -070056#include <net/netevent.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070057
58#include <asm/uaccess.h>
59
60#ifdef CONFIG_SYSCTL
61#include <linux/sysctl.h>
62#endif
63
64/* Set to 3 to get tracing. */
65#define RT6_DEBUG 2
66
67#if RT6_DEBUG >= 3
68#define RDBG(x) printk x
69#define RT6_TRACE(x...) printk(KERN_DEBUG x)
70#else
71#define RDBG(x)
72#define RT6_TRACE(x...) do { ; } while (0)
73#endif
74
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -080075#define CLONE_OFFLINK_ROUTE 0
Linus Torvalds1da177e2005-04-16 15:20:36 -070076
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -080077#define RT6_SELECT_F_IFACE 0x1
78#define RT6_SELECT_F_REACHABLE 0x2
79
Linus Torvalds1da177e2005-04-16 15:20:36 -070080static int ip6_rt_max_size = 4096;
81static int ip6_rt_gc_min_interval = HZ / 2;
82static int ip6_rt_gc_timeout = 60*HZ;
83int ip6_rt_gc_interval = 30*HZ;
84static int ip6_rt_gc_elasticity = 9;
85static int ip6_rt_mtu_expires = 10*60*HZ;
86static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
87
88static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
89static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
90static struct dst_entry *ip6_negative_advice(struct dst_entry *);
91static void ip6_dst_destroy(struct dst_entry *);
92static void ip6_dst_ifdown(struct dst_entry *,
93 struct net_device *dev, int how);
94static int ip6_dst_gc(void);
95
96static int ip6_pkt_discard(struct sk_buff *skb);
97static int ip6_pkt_discard_out(struct sk_buff *skb);
98static void ip6_link_failure(struct sk_buff *skb);
99static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
100
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800101#ifdef CONFIG_IPV6_ROUTE_INFO
102static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
103 struct in6_addr *gwaddr, int ifindex,
104 unsigned pref);
105static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
106 struct in6_addr *gwaddr, int ifindex);
107#endif
108
Linus Torvalds1da177e2005-04-16 15:20:36 -0700109static struct dst_ops ip6_dst_ops = {
110 .family = AF_INET6,
111 .protocol = __constant_htons(ETH_P_IPV6),
112 .gc = ip6_dst_gc,
113 .gc_thresh = 1024,
114 .check = ip6_dst_check,
115 .destroy = ip6_dst_destroy,
116 .ifdown = ip6_dst_ifdown,
117 .negative_advice = ip6_negative_advice,
118 .link_failure = ip6_link_failure,
119 .update_pmtu = ip6_rt_update_pmtu,
120 .entry_size = sizeof(struct rt6_info),
121};
122
123struct rt6_info ip6_null_entry = {
124 .u = {
125 .dst = {
126 .__refcnt = ATOMIC_INIT(1),
127 .__use = 1,
128 .dev = &loopback_dev,
129 .obsolete = -1,
130 .error = -ENETUNREACH,
131 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
132 .input = ip6_pkt_discard,
133 .output = ip6_pkt_discard_out,
134 .ops = &ip6_dst_ops,
135 .path = (struct dst_entry*)&ip6_null_entry,
136 }
137 },
138 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
139 .rt6i_metric = ~(u32) 0,
140 .rt6i_ref = ATOMIC_INIT(1),
141};
142
143struct fib6_node ip6_routing_table = {
144 .leaf = &ip6_null_entry,
145 .fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
146};
147
148/* Protects all the ip6 fib */
149
150DEFINE_RWLOCK(rt6_lock);
151
152
153/* allocate dst with ip6_dst_ops */
154static __inline__ struct rt6_info *ip6_dst_alloc(void)
155{
156 return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
157}
158
159static void ip6_dst_destroy(struct dst_entry *dst)
160{
161 struct rt6_info *rt = (struct rt6_info *)dst;
162 struct inet6_dev *idev = rt->rt6i_idev;
163
164 if (idev != NULL) {
165 rt->rt6i_idev = NULL;
166 in6_dev_put(idev);
167 }
168}
169
170static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
171 int how)
172{
173 struct rt6_info *rt = (struct rt6_info *)dst;
174 struct inet6_dev *idev = rt->rt6i_idev;
175
176 if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
177 struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
178 if (loopback_idev != NULL) {
179 rt->rt6i_idev = loopback_idev;
180 in6_dev_put(idev);
181 }
182 }
183}
184
185static __inline__ int rt6_check_expired(const struct rt6_info *rt)
186{
187 return (rt->rt6i_flags & RTF_EXPIRES &&
188 time_after(jiffies, rt->rt6i_expires));
189}
190
191/*
192 * Route lookup. Any rt6_lock is implied.
193 */
194
195static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
196 int oif,
197 int strict)
198{
199 struct rt6_info *local = NULL;
200 struct rt6_info *sprt;
201
202 if (oif) {
203 for (sprt = rt; sprt; sprt = sprt->u.next) {
204 struct net_device *dev = sprt->rt6i_dev;
205 if (dev->ifindex == oif)
206 return sprt;
207 if (dev->flags & IFF_LOOPBACK) {
208 if (sprt->rt6i_idev == NULL ||
209 sprt->rt6i_idev->dev->ifindex != oif) {
210 if (strict && oif)
211 continue;
212 if (local && (!oif ||
213 local->rt6i_idev->dev->ifindex == oif))
214 continue;
215 }
216 local = sprt;
217 }
218 }
219
220 if (local)
221 return local;
222
223 if (strict)
224 return &ip6_null_entry;
225 }
226 return rt;
227}
228
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800229#ifdef CONFIG_IPV6_ROUTER_PREF
230static void rt6_probe(struct rt6_info *rt)
231{
232 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
233 /*
234 * Okay, this does not seem to be appropriate
235 * for now, however, we need to check if it
236 * is really so; aka Router Reachability Probing.
237 *
238 * Router Reachability Probe MUST be rate-limited
239 * to no more than one per minute.
240 */
241 if (!neigh || (neigh->nud_state & NUD_VALID))
242 return;
243 read_lock_bh(&neigh->lock);
244 if (!(neigh->nud_state & NUD_VALID) &&
YOSHIFUJI Hideaki52e16352006-03-20 17:05:47 -0800245 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800246 struct in6_addr mcaddr;
247 struct in6_addr *target;
248
249 neigh->updated = jiffies;
250 read_unlock_bh(&neigh->lock);
251
252 target = (struct in6_addr *)&neigh->primary_key;
253 addrconf_addr_solict_mult(target, &mcaddr);
254 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
255 } else
256 read_unlock_bh(&neigh->lock);
257}
258#else
259static inline void rt6_probe(struct rt6_info *rt)
260{
261 return;
262}
263#endif
264
Linus Torvalds1da177e2005-04-16 15:20:36 -0700265/*
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800266 * Default Router Selection (RFC 2461 6.3.6)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700267 */
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800268static int inline rt6_check_dev(struct rt6_info *rt, int oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700269{
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800270 struct net_device *dev = rt->rt6i_dev;
271 if (!oif || dev->ifindex == oif)
272 return 2;
273 if ((dev->flags & IFF_LOOPBACK) &&
274 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
275 return 1;
276 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700277}
278
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800279static int inline rt6_check_neigh(struct rt6_info *rt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700280{
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800281 struct neighbour *neigh = rt->rt6i_nexthop;
282 int m = 0;
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700283 if (rt->rt6i_flags & RTF_NONEXTHOP ||
284 !(rt->rt6i_flags & RTF_GATEWAY))
285 m = 1;
286 else if (neigh) {
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800287 read_lock_bh(&neigh->lock);
288 if (neigh->nud_state & NUD_VALID)
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700289 m = 2;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800290 read_unlock_bh(&neigh->lock);
291 }
292 return m;
293}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700294
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800295static int rt6_score_route(struct rt6_info *rt, int oif,
296 int strict)
297{
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700298 int m, n;
299
300 m = rt6_check_dev(rt, oif);
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800301 if (!m && (strict & RT6_SELECT_F_IFACE))
302 return -1;
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -0800303#ifdef CONFIG_IPV6_ROUTER_PREF
304 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
305#endif
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700306 n = rt6_check_neigh(rt);
307 if (n > 1)
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -0800308 m |= 16;
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700309 else if (!n && strict & RT6_SELECT_F_REACHABLE)
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800310 return -1;
311 return m;
312}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700313
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800314static struct rt6_info *rt6_select(struct rt6_info **head, int oif,
315 int strict)
316{
317 struct rt6_info *match = NULL, *last = NULL;
318 struct rt6_info *rt, *rt0 = *head;
319 u32 metric;
320 int mpri = -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700321
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800322 RT6_TRACE("%s(head=%p(*head=%p), oif=%d)\n",
323 __FUNCTION__, head, head ? *head : NULL, oif);
324
325 for (rt = rt0, metric = rt0->rt6i_metric;
YOSHIFUJI Hideakic302e6d2006-04-28 15:59:15 -0700326 rt && rt->rt6i_metric == metric && (!last || rt != rt0);
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800327 rt = rt->u.next) {
328 int m;
329
330 if (rt6_check_expired(rt))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700331 continue;
332
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800333 last = rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700334
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800335 m = rt6_score_route(rt, oif, strict);
336 if (m < 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700337 continue;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700338
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800339 if (m > mpri) {
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800340 rt6_probe(match);
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800341 match = rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700342 mpri = m;
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800343 } else {
344 rt6_probe(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700345 }
346 }
347
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800348 if (!match &&
349 (strict & RT6_SELECT_F_REACHABLE) &&
350 last && last != rt0) {
351 /* no entries matched; do round-robin */
Ingo Molnar34af9462006-06-27 02:53:55 -0700352 static DEFINE_SPINLOCK(lock);
YOSHIFUJI Hideakic302e6d2006-04-28 15:59:15 -0700353 spin_lock(&lock);
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800354 *head = rt0->u.next;
355 rt0->u.next = last->u.next;
356 last->u.next = rt0;
YOSHIFUJI Hideakic302e6d2006-04-28 15:59:15 -0700357 spin_unlock(&lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700358 }
359
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800360 RT6_TRACE("%s() => %p, score=%d\n",
361 __FUNCTION__, match, mpri);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700362
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800363 return (match ? match : &ip6_null_entry);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700364}
365
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800366#ifdef CONFIG_IPV6_ROUTE_INFO
367int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
368 struct in6_addr *gwaddr)
369{
370 struct route_info *rinfo = (struct route_info *) opt;
371 struct in6_addr prefix_buf, *prefix;
372 unsigned int pref;
373 u32 lifetime;
374 struct rt6_info *rt;
375
376 if (len < sizeof(struct route_info)) {
377 return -EINVAL;
378 }
379
380 /* Sanity check for prefix_len and length */
381 if (rinfo->length > 3) {
382 return -EINVAL;
383 } else if (rinfo->prefix_len > 128) {
384 return -EINVAL;
385 } else if (rinfo->prefix_len > 64) {
386 if (rinfo->length < 2) {
387 return -EINVAL;
388 }
389 } else if (rinfo->prefix_len > 0) {
390 if (rinfo->length < 1) {
391 return -EINVAL;
392 }
393 }
394
395 pref = rinfo->route_pref;
396 if (pref == ICMPV6_ROUTER_PREF_INVALID)
397 pref = ICMPV6_ROUTER_PREF_MEDIUM;
398
399 lifetime = htonl(rinfo->lifetime);
400 if (lifetime == 0xffffffff) {
401 /* infinity */
402 } else if (lifetime > 0x7fffffff/HZ) {
403 /* Avoid arithmetic overflow */
404 lifetime = 0x7fffffff/HZ - 1;
405 }
406
407 if (rinfo->length == 3)
408 prefix = (struct in6_addr *)rinfo->prefix;
409 else {
410 /* this function is safe */
411 ipv6_addr_prefix(&prefix_buf,
412 (struct in6_addr *)rinfo->prefix,
413 rinfo->prefix_len);
414 prefix = &prefix_buf;
415 }
416
417 rt = rt6_get_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex);
418
419 if (rt && !lifetime) {
420 ip6_del_rt(rt, NULL, NULL, NULL);
421 rt = NULL;
422 }
423
424 if (!rt && lifetime)
425 rt = rt6_add_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
426 pref);
427 else if (rt)
428 rt->rt6i_flags = RTF_ROUTEINFO |
429 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
430
431 if (rt) {
432 if (lifetime == 0xffffffff) {
433 rt->rt6i_flags &= ~RTF_EXPIRES;
434 } else {
435 rt->rt6i_expires = jiffies + HZ * lifetime;
436 rt->rt6i_flags |= RTF_EXPIRES;
437 }
438 dst_release(&rt->u.dst);
439 }
440 return 0;
441}
442#endif
443
Linus Torvalds1da177e2005-04-16 15:20:36 -0700444struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
445 int oif, int strict)
446{
447 struct fib6_node *fn;
448 struct rt6_info *rt;
449
450 read_lock_bh(&rt6_lock);
451 fn = fib6_lookup(&ip6_routing_table, daddr, saddr);
452 rt = rt6_device_match(fn->leaf, oif, strict);
453 dst_hold(&rt->u.dst);
454 rt->u.dst.__use++;
455 read_unlock_bh(&rt6_lock);
456
457 rt->u.dst.lastuse = jiffies;
458 if (rt->u.dst.error == 0)
459 return rt;
460 dst_release(&rt->u.dst);
461 return NULL;
462}
463
464/* ip6_ins_rt is called with FREE rt6_lock.
465 It takes new route entry, the addition fails by any reason the
466 route is freed. In any case, if caller does not hold it, it may
467 be destroyed.
468 */
469
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -0700470int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh,
471 void *_rtattr, struct netlink_skb_parms *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700472{
473 int err;
474
475 write_lock_bh(&rt6_lock);
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -0700476 err = fib6_add(&ip6_routing_table, rt, nlh, _rtattr, req);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700477 write_unlock_bh(&rt6_lock);
478
479 return err;
480}
481
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800482static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
483 struct in6_addr *saddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700484{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700485 struct rt6_info *rt;
486
487 /*
488 * Clone the route.
489 */
490
491 rt = ip6_rt_copy(ort);
492
493 if (rt) {
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900494 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
495 if (rt->rt6i_dst.plen != 128 &&
496 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
497 rt->rt6i_flags |= RTF_ANYCAST;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700498 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900499 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700500
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900501 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700502 rt->rt6i_dst.plen = 128;
503 rt->rt6i_flags |= RTF_CACHE;
504 rt->u.dst.flags |= DST_HOST;
505
506#ifdef CONFIG_IPV6_SUBTREES
507 if (rt->rt6i_src.plen && saddr) {
508 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
509 rt->rt6i_src.plen = 128;
510 }
511#endif
512
513 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
514
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800515 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700516
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800517 return rt;
518}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700519
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800520static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
521{
522 struct rt6_info *rt = ip6_rt_copy(ort);
523 if (rt) {
524 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
525 rt->rt6i_dst.plen = 128;
526 rt->rt6i_flags |= RTF_CACHE;
527 if (rt->rt6i_flags & RTF_REJECT)
528 rt->u.dst.error = ort->u.dst.error;
529 rt->u.dst.flags |= DST_HOST;
530 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
531 }
532 return rt;
533}
534
Linus Torvalds1da177e2005-04-16 15:20:36 -0700535#define BACKTRACK() \
YOSHIFUJI Hideakibb133962006-03-20 17:01:43 -0800536if (rt == &ip6_null_entry) { \
Linus Torvalds1da177e2005-04-16 15:20:36 -0700537 while ((fn = fn->parent) != NULL) { \
538 if (fn->fn_flags & RTN_ROOT) { \
Linus Torvalds1da177e2005-04-16 15:20:36 -0700539 goto out; \
540 } \
541 if (fn->fn_flags & RTN_RTINFO) \
542 goto restart; \
543 } \
544}
545
546
547void ip6_route_input(struct sk_buff *skb)
548{
549 struct fib6_node *fn;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800550 struct rt6_info *rt, *nrt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700551 int strict;
552 int attempts = 3;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800553 int err;
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800554 int reachable = RT6_SELECT_F_REACHABLE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700555
YOSHIFUJI Hideaki118f8c12006-03-20 17:01:06 -0800556 strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL) ? RT6_SELECT_F_IFACE : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700557
558relookup:
559 read_lock_bh(&rt6_lock);
560
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800561restart_2:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700562 fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr,
563 &skb->nh.ipv6h->saddr);
564
565restart:
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800566 rt = rt6_select(&fn->leaf, skb->dev->ifindex, strict | reachable);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700567 BACKTRACK();
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800568 if (rt == &ip6_null_entry ||
569 rt->rt6i_flags & RTF_CACHE)
YOSHIFUJI Hideaki1ddef0442006-03-20 17:01:24 -0800570 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700571
YOSHIFUJI Hideakifb9de912006-03-20 16:59:08 -0800572 dst_hold(&rt->u.dst);
573 read_unlock_bh(&rt6_lock);
574
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800575 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
576 nrt = rt6_alloc_cow(rt, &skb->nh.ipv6h->daddr, &skb->nh.ipv6h->saddr);
577 else {
578#if CLONE_OFFLINK_ROUTE
579 nrt = rt6_alloc_clone(rt, &skb->nh.ipv6h->daddr);
580#else
581 goto out2;
582#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700583 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700584
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800585 dst_release(&rt->u.dst);
586 rt = nrt ? : &ip6_null_entry;
587
588 dst_hold(&rt->u.dst);
589 if (nrt) {
590 err = ip6_ins_rt(nrt, NULL, NULL, &NETLINK_CB(skb));
591 if (!err)
592 goto out2;
593 }
594
595 if (--attempts <= 0)
596 goto out2;
597
598 /*
599 * Race condition! In the gap, when rt6_lock was
600 * released someone could insert this route. Relookup.
601 */
602 dst_release(&rt->u.dst);
603 goto relookup;
604
605out:
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800606 if (reachable) {
607 reachable = 0;
608 goto restart_2;
609 }
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800610 dst_hold(&rt->u.dst);
611 read_unlock_bh(&rt6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700612out2:
613 rt->u.dst.lastuse = jiffies;
614 rt->u.dst.__use++;
615 skb->dst = (struct dst_entry *) rt;
YOSHIFUJI Hideakifb9de912006-03-20 16:59:08 -0800616 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700617}
618
619struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
620{
621 struct fib6_node *fn;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800622 struct rt6_info *rt, *nrt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700623 int strict;
624 int attempts = 3;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800625 int err;
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800626 int reachable = RT6_SELECT_F_REACHABLE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700627
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800628 strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL) ? RT6_SELECT_F_IFACE : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700629
630relookup:
631 read_lock_bh(&rt6_lock);
632
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800633restart_2:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700634 fn = fib6_lookup(&ip6_routing_table, &fl->fl6_dst, &fl->fl6_src);
635
636restart:
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800637 rt = rt6_select(&fn->leaf, fl->oif, strict | reachable);
YOSHIFUJI Hideaki1ddef0442006-03-20 17:01:24 -0800638 BACKTRACK();
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800639 if (rt == &ip6_null_entry ||
640 rt->rt6i_flags & RTF_CACHE)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700641 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700642
YOSHIFUJI Hideakifb9de912006-03-20 16:59:08 -0800643 dst_hold(&rt->u.dst);
644 read_unlock_bh(&rt6_lock);
645
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800646 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
YOSHIFUJI Hideakie40cf352006-03-20 16:59:27 -0800647 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800648 else {
649#if CLONE_OFFLINK_ROUTE
650 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
651#else
652 goto out2;
653#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700654 }
YOSHIFUJI Hideakie40cf352006-03-20 16:59:27 -0800655
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800656 dst_release(&rt->u.dst);
657 rt = nrt ? : &ip6_null_entry;
658
659 dst_hold(&rt->u.dst);
660 if (nrt) {
661 err = ip6_ins_rt(nrt, NULL, NULL, NULL);
662 if (!err)
663 goto out2;
664 }
665
666 if (--attempts <= 0)
667 goto out2;
668
669 /*
670 * Race condition! In the gap, when rt6_lock was
671 * released someone could insert this route. Relookup.
672 */
673 dst_release(&rt->u.dst);
674 goto relookup;
675
676out:
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800677 if (reachable) {
678 reachable = 0;
679 goto restart_2;
680 }
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800681 dst_hold(&rt->u.dst);
682 read_unlock_bh(&rt6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700683out2:
684 rt->u.dst.lastuse = jiffies;
685 rt->u.dst.__use++;
686 return &rt->u.dst;
687}
688
689
690/*
691 * Destination cache support functions
692 */
693
694static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
695{
696 struct rt6_info *rt;
697
698 rt = (struct rt6_info *) dst;
699
700 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
701 return dst;
702
703 return NULL;
704}
705
706static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
707{
708 struct rt6_info *rt = (struct rt6_info *) dst;
709
710 if (rt) {
711 if (rt->rt6i_flags & RTF_CACHE)
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -0700712 ip6_del_rt(rt, NULL, NULL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700713 else
714 dst_release(dst);
715 }
716 return NULL;
717}
718
719static void ip6_link_failure(struct sk_buff *skb)
720{
721 struct rt6_info *rt;
722
723 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
724
725 rt = (struct rt6_info *) skb->dst;
726 if (rt) {
727 if (rt->rt6i_flags&RTF_CACHE) {
728 dst_set_expires(&rt->u.dst, 0);
729 rt->rt6i_flags |= RTF_EXPIRES;
730 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
731 rt->rt6i_node->fn_sernum = -1;
732 }
733}
734
735static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
736{
737 struct rt6_info *rt6 = (struct rt6_info*)dst;
738
739 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
740 rt6->rt6i_flags |= RTF_MODIFIED;
741 if (mtu < IPV6_MIN_MTU) {
742 mtu = IPV6_MIN_MTU;
743 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
744 }
745 dst->metrics[RTAX_MTU-1] = mtu;
Tom Tucker8d717402006-07-30 20:43:36 -0700746 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700747 }
748}
749
750/* Protected by rt6_lock. */
751static struct dst_entry *ndisc_dst_gc_list;
752static int ipv6_get_mtu(struct net_device *dev);
753
754static inline unsigned int ipv6_advmss(unsigned int mtu)
755{
756 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
757
758 if (mtu < ip6_rt_min_advmss)
759 mtu = ip6_rt_min_advmss;
760
761 /*
762 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
763 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
764 * IPV6_MAXPLEN is also valid and means: "any MSS,
765 * rely only on pmtu discovery"
766 */
767 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
768 mtu = IPV6_MAXPLEN;
769 return mtu;
770}
771
772struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
773 struct neighbour *neigh,
774 struct in6_addr *addr,
775 int (*output)(struct sk_buff *))
776{
777 struct rt6_info *rt;
778 struct inet6_dev *idev = in6_dev_get(dev);
779
780 if (unlikely(idev == NULL))
781 return NULL;
782
783 rt = ip6_dst_alloc();
784 if (unlikely(rt == NULL)) {
785 in6_dev_put(idev);
786 goto out;
787 }
788
789 dev_hold(dev);
790 if (neigh)
791 neigh_hold(neigh);
792 else
793 neigh = ndisc_get_neigh(dev, addr);
794
795 rt->rt6i_dev = dev;
796 rt->rt6i_idev = idev;
797 rt->rt6i_nexthop = neigh;
798 atomic_set(&rt->u.dst.__refcnt, 1);
799 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
800 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
801 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
802 rt->u.dst.output = output;
803
804#if 0 /* there's no chance to use these for ndisc */
805 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
806 ? DST_HOST
807 : 0;
808 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
809 rt->rt6i_dst.plen = 128;
810#endif
811
812 write_lock_bh(&rt6_lock);
813 rt->u.dst.next = ndisc_dst_gc_list;
814 ndisc_dst_gc_list = &rt->u.dst;
815 write_unlock_bh(&rt6_lock);
816
817 fib6_force_start_gc();
818
819out:
820 return (struct dst_entry *)rt;
821}
822
823int ndisc_dst_gc(int *more)
824{
825 struct dst_entry *dst, *next, **pprev;
826 int freed;
827
828 next = NULL;
829 pprev = &ndisc_dst_gc_list;
830 freed = 0;
831 while ((dst = *pprev) != NULL) {
832 if (!atomic_read(&dst->__refcnt)) {
833 *pprev = dst->next;
834 dst_free(dst);
835 freed++;
836 } else {
837 pprev = &dst->next;
838 (*more)++;
839 }
840 }
841
842 return freed;
843}
844
845static int ip6_dst_gc(void)
846{
847 static unsigned expire = 30*HZ;
848 static unsigned long last_gc;
849 unsigned long now = jiffies;
850
851 if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
852 atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
853 goto out;
854
855 expire++;
856 fib6_run_gc(expire);
857 last_gc = now;
858 if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
859 expire = ip6_rt_gc_timeout>>1;
860
861out:
862 expire -= expire>>ip6_rt_gc_elasticity;
863 return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
864}
865
866/* Clean host part of a prefix. Not necessary in radix tree,
867 but results in cleaner routing tables.
868
869 Remove it only when all the things will work!
870 */
871
872static int ipv6_get_mtu(struct net_device *dev)
873{
874 int mtu = IPV6_MIN_MTU;
875 struct inet6_dev *idev;
876
877 idev = in6_dev_get(dev);
878 if (idev) {
879 mtu = idev->cnf.mtu6;
880 in6_dev_put(idev);
881 }
882 return mtu;
883}
884
885int ipv6_get_hoplimit(struct net_device *dev)
886{
887 int hoplimit = ipv6_devconf.hop_limit;
888 struct inet6_dev *idev;
889
890 idev = in6_dev_get(dev);
891 if (idev) {
892 hoplimit = idev->cnf.hop_limit;
893 in6_dev_put(idev);
894 }
895 return hoplimit;
896}
897
898/*
899 *
900 */
901
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -0700902int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
903 void *_rtattr, struct netlink_skb_parms *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700904{
905 int err;
906 struct rtmsg *r;
907 struct rtattr **rta;
908 struct rt6_info *rt = NULL;
909 struct net_device *dev = NULL;
910 struct inet6_dev *idev = NULL;
911 int addr_type;
912
913 rta = (struct rtattr **) _rtattr;
914
915 if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128)
916 return -EINVAL;
917#ifndef CONFIG_IPV6_SUBTREES
918 if (rtmsg->rtmsg_src_len)
919 return -EINVAL;
920#endif
921 if (rtmsg->rtmsg_ifindex) {
922 err = -ENODEV;
923 dev = dev_get_by_index(rtmsg->rtmsg_ifindex);
924 if (!dev)
925 goto out;
926 idev = in6_dev_get(dev);
927 if (!idev)
928 goto out;
929 }
930
931 if (rtmsg->rtmsg_metric == 0)
932 rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
933
934 rt = ip6_dst_alloc();
935
936 if (rt == NULL) {
937 err = -ENOMEM;
938 goto out;
939 }
940
941 rt->u.dst.obsolete = -1;
YOSHIFUJI Hideaki3dd4bc62005-12-19 14:02:45 -0800942 rt->rt6i_expires = jiffies + clock_t_to_jiffies(rtmsg->rtmsg_info);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700943 if (nlh && (r = NLMSG_DATA(nlh))) {
944 rt->rt6i_protocol = r->rtm_protocol;
945 } else {
946 rt->rt6i_protocol = RTPROT_BOOT;
947 }
948
949 addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst);
950
951 if (addr_type & IPV6_ADDR_MULTICAST)
952 rt->u.dst.input = ip6_mc_input;
953 else
954 rt->u.dst.input = ip6_forward;
955
956 rt->u.dst.output = ip6_output;
957
958 ipv6_addr_prefix(&rt->rt6i_dst.addr,
959 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len);
960 rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len;
961 if (rt->rt6i_dst.plen == 128)
962 rt->u.dst.flags = DST_HOST;
963
964#ifdef CONFIG_IPV6_SUBTREES
965 ipv6_addr_prefix(&rt->rt6i_src.addr,
966 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
967 rt->rt6i_src.plen = rtmsg->rtmsg_src_len;
968#endif
969
970 rt->rt6i_metric = rtmsg->rtmsg_metric;
971
972 /* We cannot add true routes via loopback here,
973 they would result in kernel looping; promote them to reject routes
974 */
975 if ((rtmsg->rtmsg_flags&RTF_REJECT) ||
976 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
977 /* hold loopback dev/idev if we haven't done so. */
978 if (dev != &loopback_dev) {
979 if (dev) {
980 dev_put(dev);
981 in6_dev_put(idev);
982 }
983 dev = &loopback_dev;
984 dev_hold(dev);
985 idev = in6_dev_get(dev);
986 if (!idev) {
987 err = -ENODEV;
988 goto out;
989 }
990 }
991 rt->u.dst.output = ip6_pkt_discard_out;
992 rt->u.dst.input = ip6_pkt_discard;
993 rt->u.dst.error = -ENETUNREACH;
994 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
995 goto install_route;
996 }
997
998 if (rtmsg->rtmsg_flags & RTF_GATEWAY) {
999 struct in6_addr *gw_addr;
1000 int gwa_type;
1001
1002 gw_addr = &rtmsg->rtmsg_gateway;
1003 ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway);
1004 gwa_type = ipv6_addr_type(gw_addr);
1005
1006 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1007 struct rt6_info *grt;
1008
1009 /* IPv6 strictly inhibits using not link-local
1010 addresses as nexthop address.
1011 Otherwise, router will not able to send redirects.
1012 It is very good, but in some (rare!) circumstances
1013 (SIT, PtP, NBMA NOARP links) it is handy to allow
1014 some exceptions. --ANK
1015 */
1016 err = -EINVAL;
1017 if (!(gwa_type&IPV6_ADDR_UNICAST))
1018 goto out;
1019
1020 grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1);
1021
1022 err = -EHOSTUNREACH;
1023 if (grt == NULL)
1024 goto out;
1025 if (dev) {
1026 if (dev != grt->rt6i_dev) {
1027 dst_release(&grt->u.dst);
1028 goto out;
1029 }
1030 } else {
1031 dev = grt->rt6i_dev;
1032 idev = grt->rt6i_idev;
1033 dev_hold(dev);
1034 in6_dev_hold(grt->rt6i_idev);
1035 }
1036 if (!(grt->rt6i_flags&RTF_GATEWAY))
1037 err = 0;
1038 dst_release(&grt->u.dst);
1039
1040 if (err)
1041 goto out;
1042 }
1043 err = -EINVAL;
1044 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1045 goto out;
1046 }
1047
1048 err = -ENODEV;
1049 if (dev == NULL)
1050 goto out;
1051
1052 if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) {
1053 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1054 if (IS_ERR(rt->rt6i_nexthop)) {
1055 err = PTR_ERR(rt->rt6i_nexthop);
1056 rt->rt6i_nexthop = NULL;
1057 goto out;
1058 }
1059 }
1060
1061 rt->rt6i_flags = rtmsg->rtmsg_flags;
1062
1063install_route:
1064 if (rta && rta[RTA_METRICS-1]) {
1065 int attrlen = RTA_PAYLOAD(rta[RTA_METRICS-1]);
1066 struct rtattr *attr = RTA_DATA(rta[RTA_METRICS-1]);
1067
1068 while (RTA_OK(attr, attrlen)) {
1069 unsigned flavor = attr->rta_type;
1070 if (flavor) {
1071 if (flavor > RTAX_MAX) {
1072 err = -EINVAL;
1073 goto out;
1074 }
1075 rt->u.dst.metrics[flavor-1] =
1076 *(u32 *)RTA_DATA(attr);
1077 }
1078 attr = RTA_NEXT(attr, attrlen);
1079 }
1080 }
1081
1082 if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
1083 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1084 if (!rt->u.dst.metrics[RTAX_MTU-1])
1085 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1086 if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
1087 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1088 rt->u.dst.dev = dev;
1089 rt->rt6i_idev = idev;
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001090 return ip6_ins_rt(rt, nlh, _rtattr, req);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001091
1092out:
1093 if (dev)
1094 dev_put(dev);
1095 if (idev)
1096 in6_dev_put(idev);
1097 if (rt)
1098 dst_free((struct dst_entry *) rt);
1099 return err;
1100}
1101
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001102int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001103{
1104 int err;
1105
1106 write_lock_bh(&rt6_lock);
1107
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001108 err = fib6_del(rt, nlh, _rtattr, req);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001109 dst_release(&rt->u.dst);
1110
1111 write_unlock_bh(&rt6_lock);
1112
1113 return err;
1114}
1115
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001116static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001117{
1118 struct fib6_node *fn;
1119 struct rt6_info *rt;
1120 int err = -ESRCH;
1121
1122 read_lock_bh(&rt6_lock);
1123
1124 fn = fib6_locate(&ip6_routing_table,
1125 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len,
1126 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
1127
1128 if (fn) {
1129 for (rt = fn->leaf; rt; rt = rt->u.next) {
1130 if (rtmsg->rtmsg_ifindex &&
1131 (rt->rt6i_dev == NULL ||
1132 rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex))
1133 continue;
1134 if (rtmsg->rtmsg_flags&RTF_GATEWAY &&
1135 !ipv6_addr_equal(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway))
1136 continue;
1137 if (rtmsg->rtmsg_metric &&
1138 rtmsg->rtmsg_metric != rt->rt6i_metric)
1139 continue;
1140 dst_hold(&rt->u.dst);
1141 read_unlock_bh(&rt6_lock);
1142
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001143 return ip6_del_rt(rt, nlh, _rtattr, req);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001144 }
1145 }
1146 read_unlock_bh(&rt6_lock);
1147
1148 return err;
1149}
1150
1151/*
1152 * Handle redirects
1153 */
1154void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
1155 struct neighbour *neigh, u8 *lladdr, int on_link)
1156{
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001157 struct rt6_info *rt, *nrt = NULL;
1158 int strict;
1159 struct fib6_node *fn;
Tom Tucker8d717402006-07-30 20:43:36 -07001160 struct netevent_redirect netevent;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001161
1162 /*
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001163 * Get the "current" route for this destination and
1164 * check if the redirect has come from approriate router.
1165 *
1166 * RFC 2461 specifies that redirects should only be
1167 * accepted if they come from the nexthop to the target.
1168 * Due to the way the routes are chosen, this notion
1169 * is a bit fuzzy and one might need to check all possible
1170 * routes.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001171 */
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001172 strict = ipv6_addr_type(dest) & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001173
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001174 read_lock_bh(&rt6_lock);
1175 fn = fib6_lookup(&ip6_routing_table, dest, NULL);
1176restart:
1177 for (rt = fn->leaf; rt; rt = rt->u.next) {
1178 /*
1179 * Current route is on-link; redirect is always invalid.
1180 *
1181 * Seems, previous statement is not true. It could
1182 * be node, which looks for us as on-link (f.e. proxy ndisc)
1183 * But then router serving it might decide, that we should
1184 * know truth 8)8) --ANK (980726).
1185 */
1186 if (rt6_check_expired(rt))
1187 continue;
1188 if (!(rt->rt6i_flags & RTF_GATEWAY))
1189 continue;
1190 if (neigh->dev != rt->rt6i_dev)
1191 continue;
1192 if (!ipv6_addr_equal(saddr, &rt->rt6i_gateway))
1193 continue;
1194 break;
1195 }
1196 if (rt)
1197 dst_hold(&rt->u.dst);
1198 else if (strict) {
1199 while ((fn = fn->parent) != NULL) {
1200 if (fn->fn_flags & RTN_ROOT)
1201 break;
1202 if (fn->fn_flags & RTN_RTINFO)
1203 goto restart;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001204 }
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001205 }
1206 read_unlock_bh(&rt6_lock);
1207
1208 if (!rt) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001209 if (net_ratelimit())
1210 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1211 "for redirect target\n");
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001212 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001213 }
1214
Linus Torvalds1da177e2005-04-16 15:20:36 -07001215 /*
1216 * We have finally decided to accept it.
1217 */
1218
1219 neigh_update(neigh, lladdr, NUD_STALE,
1220 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1221 NEIGH_UPDATE_F_OVERRIDE|
1222 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1223 NEIGH_UPDATE_F_ISROUTER))
1224 );
1225
1226 /*
1227 * Redirect received -> path was valid.
1228 * Look, redirects are sent only in response to data packets,
1229 * so that this nexthop apparently is reachable. --ANK
1230 */
1231 dst_confirm(&rt->u.dst);
1232
1233 /* Duplicate redirect: silently ignore. */
1234 if (neigh == rt->u.dst.neighbour)
1235 goto out;
1236
1237 nrt = ip6_rt_copy(rt);
1238 if (nrt == NULL)
1239 goto out;
1240
1241 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1242 if (on_link)
1243 nrt->rt6i_flags &= ~RTF_GATEWAY;
1244
1245 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1246 nrt->rt6i_dst.plen = 128;
1247 nrt->u.dst.flags |= DST_HOST;
1248
1249 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1250 nrt->rt6i_nexthop = neigh_clone(neigh);
1251 /* Reset pmtu, it may be better */
1252 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1253 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1254
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001255 if (ip6_ins_rt(nrt, NULL, NULL, NULL))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001256 goto out;
1257
Tom Tucker8d717402006-07-30 20:43:36 -07001258 netevent.old = &rt->u.dst;
1259 netevent.new = &nrt->u.dst;
1260 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1261
Linus Torvalds1da177e2005-04-16 15:20:36 -07001262 if (rt->rt6i_flags&RTF_CACHE) {
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001263 ip6_del_rt(rt, NULL, NULL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001264 return;
1265 }
1266
1267out:
1268 dst_release(&rt->u.dst);
1269 return;
1270}
1271
1272/*
1273 * Handle ICMP "packet too big" messages
1274 * i.e. Path MTU discovery
1275 */
1276
1277void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1278 struct net_device *dev, u32 pmtu)
1279{
1280 struct rt6_info *rt, *nrt;
1281 int allfrag = 0;
1282
1283 rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1284 if (rt == NULL)
1285 return;
1286
1287 if (pmtu >= dst_mtu(&rt->u.dst))
1288 goto out;
1289
1290 if (pmtu < IPV6_MIN_MTU) {
1291 /*
1292 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1293 * MTU (1280) and a fragment header should always be included
1294 * after a node receiving Too Big message reporting PMTU is
1295 * less than the IPv6 Minimum Link MTU.
1296 */
1297 pmtu = IPV6_MIN_MTU;
1298 allfrag = 1;
1299 }
1300
1301 /* New mtu received -> path was valid.
1302 They are sent only in response to data packets,
1303 so that this nexthop apparently is reachable. --ANK
1304 */
1305 dst_confirm(&rt->u.dst);
1306
1307 /* Host route. If it is static, it would be better
1308 not to override it, but add new one, so that
1309 when cache entry will expire old pmtu
1310 would return automatically.
1311 */
1312 if (rt->rt6i_flags & RTF_CACHE) {
1313 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1314 if (allfrag)
1315 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1316 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1317 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1318 goto out;
1319 }
1320
1321 /* Network route.
1322 Two cases are possible:
1323 1. It is connected route. Action: COW
1324 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1325 */
YOSHIFUJI Hideakid5315b52006-03-20 16:58:48 -08001326 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001327 nrt = rt6_alloc_cow(rt, daddr, saddr);
YOSHIFUJI Hideakid5315b52006-03-20 16:58:48 -08001328 else
1329 nrt = rt6_alloc_clone(rt, daddr);
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001330
YOSHIFUJI Hideakid5315b52006-03-20 16:58:48 -08001331 if (nrt) {
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001332 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1333 if (allfrag)
1334 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1335
1336 /* According to RFC 1981, detecting PMTU increase shouldn't be
1337 * happened within 5 mins, the recommended timer is 10 mins.
1338 * Here this route expiration time is set to ip6_rt_mtu_expires
1339 * which is 10 mins. After 10 mins the decreased pmtu is expired
1340 * and detecting PMTU increase will be automatically happened.
1341 */
1342 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1343 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1344
1345 ip6_ins_rt(nrt, NULL, NULL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001346 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001347out:
1348 dst_release(&rt->u.dst);
1349}
1350
1351/*
1352 * Misc support functions
1353 */
1354
1355static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1356{
1357 struct rt6_info *rt = ip6_dst_alloc();
1358
1359 if (rt) {
1360 rt->u.dst.input = ort->u.dst.input;
1361 rt->u.dst.output = ort->u.dst.output;
1362
1363 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1364 rt->u.dst.dev = ort->u.dst.dev;
1365 if (rt->u.dst.dev)
1366 dev_hold(rt->u.dst.dev);
1367 rt->rt6i_idev = ort->rt6i_idev;
1368 if (rt->rt6i_idev)
1369 in6_dev_hold(rt->rt6i_idev);
1370 rt->u.dst.lastuse = jiffies;
1371 rt->rt6i_expires = 0;
1372
1373 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1374 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1375 rt->rt6i_metric = 0;
1376
1377 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1378#ifdef CONFIG_IPV6_SUBTREES
1379 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1380#endif
1381 }
1382 return rt;
1383}
1384
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001385#ifdef CONFIG_IPV6_ROUTE_INFO
1386static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
1387 struct in6_addr *gwaddr, int ifindex)
1388{
1389 struct fib6_node *fn;
1390 struct rt6_info *rt = NULL;
1391
1392 write_lock_bh(&rt6_lock);
1393 fn = fib6_locate(&ip6_routing_table, prefix ,prefixlen, NULL, 0);
1394 if (!fn)
1395 goto out;
1396
1397 for (rt = fn->leaf; rt; rt = rt->u.next) {
1398 if (rt->rt6i_dev->ifindex != ifindex)
1399 continue;
1400 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1401 continue;
1402 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1403 continue;
1404 dst_hold(&rt->u.dst);
1405 break;
1406 }
1407out:
1408 write_unlock_bh(&rt6_lock);
1409 return rt;
1410}
1411
1412static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
1413 struct in6_addr *gwaddr, int ifindex,
1414 unsigned pref)
1415{
1416 struct in6_rtmsg rtmsg;
1417
1418 memset(&rtmsg, 0, sizeof(rtmsg));
1419 rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1420 ipv6_addr_copy(&rtmsg.rtmsg_dst, prefix);
1421 rtmsg.rtmsg_dst_len = prefixlen;
1422 ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1423 rtmsg.rtmsg_metric = 1024;
1424 rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO | RTF_UP | RTF_PREF(pref);
YOSHIFUJI Hideakie317da92006-03-20 17:06:42 -08001425 /* We should treat it as a default route if prefix length is 0. */
1426 if (!prefixlen)
1427 rtmsg.rtmsg_flags |= RTF_DEFAULT;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001428 rtmsg.rtmsg_ifindex = ifindex;
1429
1430 ip6_route_add(&rtmsg, NULL, NULL, NULL);
1431
1432 return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex);
1433}
1434#endif
1435
Linus Torvalds1da177e2005-04-16 15:20:36 -07001436struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1437{
1438 struct rt6_info *rt;
1439 struct fib6_node *fn;
1440
1441 fn = &ip6_routing_table;
1442
1443 write_lock_bh(&rt6_lock);
1444 for (rt = fn->leaf; rt; rt=rt->u.next) {
1445 if (dev == rt->rt6i_dev &&
YOSHIFUJI Hideaki045927f2006-03-20 17:00:48 -08001446 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001447 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1448 break;
1449 }
1450 if (rt)
1451 dst_hold(&rt->u.dst);
1452 write_unlock_bh(&rt6_lock);
1453 return rt;
1454}
1455
1456struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -08001457 struct net_device *dev,
1458 unsigned int pref)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001459{
1460 struct in6_rtmsg rtmsg;
1461
1462 memset(&rtmsg, 0, sizeof(struct in6_rtmsg));
1463 rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1464 ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1465 rtmsg.rtmsg_metric = 1024;
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -08001466 rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES |
1467 RTF_PREF(pref);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001468
1469 rtmsg.rtmsg_ifindex = dev->ifindex;
1470
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001471 ip6_route_add(&rtmsg, NULL, NULL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001472 return rt6_get_dflt_router(gwaddr, dev);
1473}
1474
1475void rt6_purge_dflt_routers(void)
1476{
1477 struct rt6_info *rt;
1478
1479restart:
1480 read_lock_bh(&rt6_lock);
1481 for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) {
1482 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1483 dst_hold(&rt->u.dst);
1484
Linus Torvalds1da177e2005-04-16 15:20:36 -07001485 read_unlock_bh(&rt6_lock);
1486
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001487 ip6_del_rt(rt, NULL, NULL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001488
1489 goto restart;
1490 }
1491 }
1492 read_unlock_bh(&rt6_lock);
1493}
1494
1495int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1496{
1497 struct in6_rtmsg rtmsg;
1498 int err;
1499
1500 switch(cmd) {
1501 case SIOCADDRT: /* Add a route */
1502 case SIOCDELRT: /* Delete a route */
1503 if (!capable(CAP_NET_ADMIN))
1504 return -EPERM;
1505 err = copy_from_user(&rtmsg, arg,
1506 sizeof(struct in6_rtmsg));
1507 if (err)
1508 return -EFAULT;
1509
1510 rtnl_lock();
1511 switch (cmd) {
1512 case SIOCADDRT:
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001513 err = ip6_route_add(&rtmsg, NULL, NULL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001514 break;
1515 case SIOCDELRT:
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001516 err = ip6_route_del(&rtmsg, NULL, NULL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001517 break;
1518 default:
1519 err = -EINVAL;
1520 }
1521 rtnl_unlock();
1522
1523 return err;
1524 };
1525
1526 return -EINVAL;
1527}
1528
1529/*
1530 * Drop the packet on the floor
1531 */
1532
Arnaldo Carvalho de Melo20380732005-08-16 02:18:02 -03001533static int ip6_pkt_discard(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001534{
1535 IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
1536 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
1537 kfree_skb(skb);
1538 return 0;
1539}
1540
Arnaldo Carvalho de Melo20380732005-08-16 02:18:02 -03001541static int ip6_pkt_discard_out(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001542{
1543 skb->dev = skb->dst->dev;
1544 return ip6_pkt_discard(skb);
1545}
1546
1547/*
1548 * Allocate a dst for local (unicast / anycast) address.
1549 */
1550
1551struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1552 const struct in6_addr *addr,
1553 int anycast)
1554{
1555 struct rt6_info *rt = ip6_dst_alloc();
1556
1557 if (rt == NULL)
1558 return ERR_PTR(-ENOMEM);
1559
1560 dev_hold(&loopback_dev);
1561 in6_dev_hold(idev);
1562
1563 rt->u.dst.flags = DST_HOST;
1564 rt->u.dst.input = ip6_input;
1565 rt->u.dst.output = ip6_output;
1566 rt->rt6i_dev = &loopback_dev;
1567 rt->rt6i_idev = idev;
1568 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1569 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1570 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1571 rt->u.dst.obsolete = -1;
1572
1573 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +09001574 if (anycast)
1575 rt->rt6i_flags |= RTF_ANYCAST;
1576 else
Linus Torvalds1da177e2005-04-16 15:20:36 -07001577 rt->rt6i_flags |= RTF_LOCAL;
1578 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1579 if (rt->rt6i_nexthop == NULL) {
1580 dst_free((struct dst_entry *) rt);
1581 return ERR_PTR(-ENOMEM);
1582 }
1583
1584 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1585 rt->rt6i_dst.plen = 128;
1586
1587 atomic_set(&rt->u.dst.__refcnt, 1);
1588
1589 return rt;
1590}
1591
1592static int fib6_ifdown(struct rt6_info *rt, void *arg)
1593{
1594 if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1595 rt != &ip6_null_entry) {
1596 RT6_TRACE("deleted by ifdown %p\n", rt);
1597 return -1;
1598 }
1599 return 0;
1600}
1601
1602void rt6_ifdown(struct net_device *dev)
1603{
1604 write_lock_bh(&rt6_lock);
1605 fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev);
1606 write_unlock_bh(&rt6_lock);
1607}
1608
1609struct rt6_mtu_change_arg
1610{
1611 struct net_device *dev;
1612 unsigned mtu;
1613};
1614
1615static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1616{
1617 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1618 struct inet6_dev *idev;
1619
1620 /* In IPv6 pmtu discovery is not optional,
1621 so that RTAX_MTU lock cannot disable it.
1622 We still use this lock to block changes
1623 caused by addrconf/ndisc.
1624 */
1625
1626 idev = __in6_dev_get(arg->dev);
1627 if (idev == NULL)
1628 return 0;
1629
1630 /* For administrative MTU increase, there is no way to discover
1631 IPv6 PMTU increase, so PMTU increase should be updated here.
1632 Since RFC 1981 doesn't include administrative MTU increase
1633 update PMTU increase is a MUST. (i.e. jumbo frame)
1634 */
1635 /*
1636 If new MTU is less than route PMTU, this new MTU will be the
1637 lowest MTU in the path, update the route PMTU to reflect PMTU
1638 decreases; if new MTU is greater than route PMTU, and the
1639 old MTU is the lowest MTU in the path, update the route PMTU
1640 to reflect the increase. In this case if the other nodes' MTU
1641 also have the lowest MTU, TOO BIG MESSAGE will be lead to
1642 PMTU discouvery.
1643 */
1644 if (rt->rt6i_dev == arg->dev &&
1645 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1646 (dst_mtu(&rt->u.dst) > arg->mtu ||
1647 (dst_mtu(&rt->u.dst) < arg->mtu &&
1648 dst_mtu(&rt->u.dst) == idev->cnf.mtu6)))
1649 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1650 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1651 return 0;
1652}
1653
1654void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1655{
1656 struct rt6_mtu_change_arg arg;
1657
1658 arg.dev = dev;
1659 arg.mtu = mtu;
1660 read_lock_bh(&rt6_lock);
1661 fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg);
1662 read_unlock_bh(&rt6_lock);
1663}
1664
1665static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta,
1666 struct in6_rtmsg *rtmsg)
1667{
1668 memset(rtmsg, 0, sizeof(*rtmsg));
1669
1670 rtmsg->rtmsg_dst_len = r->rtm_dst_len;
1671 rtmsg->rtmsg_src_len = r->rtm_src_len;
1672 rtmsg->rtmsg_flags = RTF_UP;
1673 if (r->rtm_type == RTN_UNREACHABLE)
1674 rtmsg->rtmsg_flags |= RTF_REJECT;
1675
1676 if (rta[RTA_GATEWAY-1]) {
1677 if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16))
1678 return -EINVAL;
1679 memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16);
1680 rtmsg->rtmsg_flags |= RTF_GATEWAY;
1681 }
1682 if (rta[RTA_DST-1]) {
1683 if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3))
1684 return -EINVAL;
1685 memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3));
1686 }
1687 if (rta[RTA_SRC-1]) {
1688 if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3))
1689 return -EINVAL;
1690 memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3));
1691 }
1692 if (rta[RTA_OIF-1]) {
1693 if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int)))
1694 return -EINVAL;
1695 memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1696 }
1697 if (rta[RTA_PRIORITY-1]) {
1698 if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4))
1699 return -EINVAL;
1700 memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
1701 }
1702 return 0;
1703}
1704
1705int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1706{
1707 struct rtmsg *r = NLMSG_DATA(nlh);
1708 struct in6_rtmsg rtmsg;
1709
1710 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1711 return -EINVAL;
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001712 return ip6_route_del(&rtmsg, nlh, arg, &NETLINK_CB(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001713}
1714
1715int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1716{
1717 struct rtmsg *r = NLMSG_DATA(nlh);
1718 struct in6_rtmsg rtmsg;
1719
1720 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1721 return -EINVAL;
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001722 return ip6_route_add(&rtmsg, nlh, arg, &NETLINK_CB(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001723}
1724
1725struct rt6_rtnl_dump_arg
1726{
1727 struct sk_buff *skb;
1728 struct netlink_callback *cb;
1729};
1730
1731static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001732 struct in6_addr *dst, struct in6_addr *src,
1733 int iif, int type, u32 pid, u32 seq,
1734 int prefix, unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001735{
1736 struct rtmsg *rtm;
1737 struct nlmsghdr *nlh;
1738 unsigned char *b = skb->tail;
1739 struct rta_cacheinfo ci;
1740
1741 if (prefix) { /* user wants prefix routes only */
1742 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
1743 /* success since this is not a prefix route */
1744 return 1;
1745 }
1746 }
1747
Jamal Hadi Salimb6544c02005-06-18 22:54:12 -07001748 nlh = NLMSG_NEW(skb, pid, seq, type, sizeof(*rtm), flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001749 rtm = NLMSG_DATA(nlh);
1750 rtm->rtm_family = AF_INET6;
1751 rtm->rtm_dst_len = rt->rt6i_dst.plen;
1752 rtm->rtm_src_len = rt->rt6i_src.plen;
1753 rtm->rtm_tos = 0;
1754 rtm->rtm_table = RT_TABLE_MAIN;
1755 if (rt->rt6i_flags&RTF_REJECT)
1756 rtm->rtm_type = RTN_UNREACHABLE;
1757 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
1758 rtm->rtm_type = RTN_LOCAL;
1759 else
1760 rtm->rtm_type = RTN_UNICAST;
1761 rtm->rtm_flags = 0;
1762 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1763 rtm->rtm_protocol = rt->rt6i_protocol;
1764 if (rt->rt6i_flags&RTF_DYNAMIC)
1765 rtm->rtm_protocol = RTPROT_REDIRECT;
1766 else if (rt->rt6i_flags & RTF_ADDRCONF)
1767 rtm->rtm_protocol = RTPROT_KERNEL;
1768 else if (rt->rt6i_flags&RTF_DEFAULT)
1769 rtm->rtm_protocol = RTPROT_RA;
1770
1771 if (rt->rt6i_flags&RTF_CACHE)
1772 rtm->rtm_flags |= RTM_F_CLONED;
1773
1774 if (dst) {
1775 RTA_PUT(skb, RTA_DST, 16, dst);
1776 rtm->rtm_dst_len = 128;
1777 } else if (rtm->rtm_dst_len)
1778 RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1779#ifdef CONFIG_IPV6_SUBTREES
1780 if (src) {
1781 RTA_PUT(skb, RTA_SRC, 16, src);
1782 rtm->rtm_src_len = 128;
1783 } else if (rtm->rtm_src_len)
1784 RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1785#endif
1786 if (iif)
1787 RTA_PUT(skb, RTA_IIF, 4, &iif);
1788 else if (dst) {
1789 struct in6_addr saddr_buf;
1790 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
1791 RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1792 }
1793 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
1794 goto rtattr_failure;
1795 if (rt->u.dst.neighbour)
1796 RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
1797 if (rt->u.dst.dev)
1798 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex);
1799 RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric);
1800 ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
1801 if (rt->rt6i_expires)
1802 ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
1803 else
1804 ci.rta_expires = 0;
1805 ci.rta_used = rt->u.dst.__use;
1806 ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
1807 ci.rta_error = rt->u.dst.error;
1808 ci.rta_id = 0;
1809 ci.rta_ts = 0;
1810 ci.rta_tsage = 0;
1811 RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
1812 nlh->nlmsg_len = skb->tail - b;
1813 return skb->len;
1814
1815nlmsg_failure:
1816rtattr_failure:
1817 skb_trim(skb, b - skb->data);
1818 return -1;
1819}
1820
1821static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1822{
1823 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1824 int prefix;
1825
1826 if (arg->cb->nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(struct rtmsg))) {
1827 struct rtmsg *rtm = NLMSG_DATA(arg->cb->nlh);
1828 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
1829 } else
1830 prefix = 0;
1831
1832 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1833 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001834 prefix, NLM_F_MULTI);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001835}
1836
1837static int fib6_dump_node(struct fib6_walker_t *w)
1838{
1839 int res;
1840 struct rt6_info *rt;
1841
1842 for (rt = w->leaf; rt; rt = rt->u.next) {
1843 res = rt6_dump_route(rt, w->args);
1844 if (res < 0) {
1845 /* Frame is full, suspend walking */
1846 w->leaf = rt;
1847 return 1;
1848 }
1849 BUG_TRAP(res!=0);
1850 }
1851 w->leaf = NULL;
1852 return 0;
1853}
1854
1855static void fib6_dump_end(struct netlink_callback *cb)
1856{
1857 struct fib6_walker_t *w = (void*)cb->args[0];
1858
1859 if (w) {
1860 cb->args[0] = 0;
1861 fib6_walker_unlink(w);
1862 kfree(w);
1863 }
Herbert Xuefacfbc2005-11-12 12:12:05 -08001864 cb->done = (void*)cb->args[1];
1865 cb->args[1] = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001866}
1867
1868static int fib6_dump_done(struct netlink_callback *cb)
1869{
1870 fib6_dump_end(cb);
Thomas Grafa8f74b22005-11-10 02:25:52 +01001871 return cb->done ? cb->done(cb) : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001872}
1873
1874int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1875{
1876 struct rt6_rtnl_dump_arg arg;
1877 struct fib6_walker_t *w;
1878 int res;
1879
1880 arg.skb = skb;
1881 arg.cb = cb;
1882
1883 w = (void*)cb->args[0];
1884 if (w == NULL) {
1885 /* New dump:
1886 *
1887 * 1. hook callback destructor.
1888 */
1889 cb->args[1] = (long)cb->done;
1890 cb->done = fib6_dump_done;
1891
1892 /*
1893 * 2. allocate and initialize walker.
1894 */
Ingo Oeser0c600ed2006-03-20 23:01:32 -08001895 w = kzalloc(sizeof(*w), GFP_ATOMIC);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001896 if (w == NULL)
1897 return -ENOMEM;
1898 RT6_TRACE("dump<%p", w);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001899 w->root = &ip6_routing_table;
1900 w->func = fib6_dump_node;
1901 w->args = &arg;
1902 cb->args[0] = (long)w;
1903 read_lock_bh(&rt6_lock);
1904 res = fib6_walk(w);
1905 read_unlock_bh(&rt6_lock);
1906 } else {
1907 w->args = &arg;
1908 read_lock_bh(&rt6_lock);
1909 res = fib6_walk_continue(w);
1910 read_unlock_bh(&rt6_lock);
1911 }
1912#if RT6_DEBUG >= 3
1913 if (res <= 0 && skb->len == 0)
1914 RT6_TRACE("%p>dump end\n", w);
1915#endif
1916 res = res < 0 ? res : skb->len;
1917 /* res < 0 is an error. (really, impossible)
1918 res == 0 means that dump is complete, but skb still can contain data.
1919 res > 0 dump is not complete, but frame is full.
1920 */
1921 /* Destroy walker, if dump of this table is complete. */
1922 if (res <= 0)
1923 fib6_dump_end(cb);
1924 return res;
1925}
1926
1927int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1928{
1929 struct rtattr **rta = arg;
1930 int iif = 0;
1931 int err = -ENOBUFS;
1932 struct sk_buff *skb;
1933 struct flowi fl;
1934 struct rt6_info *rt;
1935
1936 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1937 if (skb == NULL)
1938 goto out;
1939
1940 /* Reserve room for dummy headers, this skb can pass
1941 through good chunk of routing engine.
1942 */
1943 skb->mac.raw = skb->data;
1944 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1945
1946 memset(&fl, 0, sizeof(fl));
1947 if (rta[RTA_SRC-1])
1948 ipv6_addr_copy(&fl.fl6_src,
1949 (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]));
1950 if (rta[RTA_DST-1])
1951 ipv6_addr_copy(&fl.fl6_dst,
1952 (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]));
1953
1954 if (rta[RTA_IIF-1])
1955 memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
1956
1957 if (iif) {
1958 struct net_device *dev;
1959 dev = __dev_get_by_index(iif);
1960 if (!dev) {
1961 err = -ENODEV;
1962 goto out_free;
1963 }
1964 }
1965
1966 fl.oif = 0;
1967 if (rta[RTA_OIF-1])
1968 memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1969
1970 rt = (struct rt6_info*)ip6_route_output(NULL, &fl);
1971
1972 skb->dst = &rt->u.dst;
1973
1974 NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
1975 err = rt6_fill_node(skb, rt,
1976 &fl.fl6_dst, &fl.fl6_src,
1977 iif,
1978 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001979 nlh->nlmsg_seq, 0, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001980 if (err < 0) {
1981 err = -EMSGSIZE;
1982 goto out_free;
1983 }
1984
1985 err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
1986 if (err > 0)
1987 err = 0;
1988out:
1989 return err;
1990out_free:
1991 kfree_skb(skb);
1992 goto out;
1993}
1994
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001995void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh,
1996 struct netlink_skb_parms *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001997{
1998 struct sk_buff *skb;
1999 int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07002000 u32 pid = current->pid;
2001 u32 seq = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002002
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07002003 if (req)
2004 pid = req->pid;
2005 if (nlh)
2006 seq = nlh->nlmsg_seq;
2007
Linus Torvalds1da177e2005-04-16 15:20:36 -07002008 skb = alloc_skb(size, gfp_any());
2009 if (!skb) {
Patrick McHardyac6d4392005-08-14 19:29:52 -07002010 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, ENOBUFS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002011 return;
2012 }
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07002013 if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0) < 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002014 kfree_skb(skb);
Patrick McHardyac6d4392005-08-14 19:29:52 -07002015 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, EINVAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002016 return;
2017 }
Patrick McHardyac6d4392005-08-14 19:29:52 -07002018 NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_ROUTE;
2019 netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_ROUTE, gfp_any());
Linus Torvalds1da177e2005-04-16 15:20:36 -07002020}
2021
2022/*
2023 * /proc
2024 */
2025
2026#ifdef CONFIG_PROC_FS
2027
2028#define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2029
2030struct rt6_proc_arg
2031{
2032 char *buffer;
2033 int offset;
2034 int length;
2035 int skip;
2036 int len;
2037};
2038
2039static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2040{
2041 struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
2042 int i;
2043
2044 if (arg->skip < arg->offset / RT6_INFO_LEN) {
2045 arg->skip++;
2046 return 0;
2047 }
2048
2049 if (arg->len >= arg->length)
2050 return 0;
2051
2052 for (i=0; i<16; i++) {
2053 sprintf(arg->buffer + arg->len, "%02x",
2054 rt->rt6i_dst.addr.s6_addr[i]);
2055 arg->len += 2;
2056 }
2057 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
2058 rt->rt6i_dst.plen);
2059
2060#ifdef CONFIG_IPV6_SUBTREES
2061 for (i=0; i<16; i++) {
2062 sprintf(arg->buffer + arg->len, "%02x",
2063 rt->rt6i_src.addr.s6_addr[i]);
2064 arg->len += 2;
2065 }
2066 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
2067 rt->rt6i_src.plen);
2068#else
2069 sprintf(arg->buffer + arg->len,
2070 "00000000000000000000000000000000 00 ");
2071 arg->len += 36;
2072#endif
2073
2074 if (rt->rt6i_nexthop) {
2075 for (i=0; i<16; i++) {
2076 sprintf(arg->buffer + arg->len, "%02x",
2077 rt->rt6i_nexthop->primary_key[i]);
2078 arg->len += 2;
2079 }
2080 } else {
2081 sprintf(arg->buffer + arg->len,
2082 "00000000000000000000000000000000");
2083 arg->len += 32;
2084 }
2085 arg->len += sprintf(arg->buffer + arg->len,
2086 " %08x %08x %08x %08x %8s\n",
2087 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
2088 rt->u.dst.__use, rt->rt6i_flags,
2089 rt->rt6i_dev ? rt->rt6i_dev->name : "");
2090 return 0;
2091}
2092
2093static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
2094{
2095 struct rt6_proc_arg arg;
2096 arg.buffer = buffer;
2097 arg.offset = offset;
2098 arg.length = length;
2099 arg.skip = 0;
2100 arg.len = 0;
2101
2102 read_lock_bh(&rt6_lock);
2103 fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg);
2104 read_unlock_bh(&rt6_lock);
2105
2106 *start = buffer;
2107 if (offset)
2108 *start += offset % RT6_INFO_LEN;
2109
2110 arg.len -= offset % RT6_INFO_LEN;
2111
2112 if (arg.len > length)
2113 arg.len = length;
2114 if (arg.len < 0)
2115 arg.len = 0;
2116
2117 return arg.len;
2118}
2119
Linus Torvalds1da177e2005-04-16 15:20:36 -07002120static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2121{
2122 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2123 rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
2124 rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
2125 rt6_stats.fib_rt_cache,
2126 atomic_read(&ip6_dst_ops.entries),
2127 rt6_stats.fib_discarded_routes);
2128
2129 return 0;
2130}
2131
2132static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2133{
2134 return single_open(file, rt6_stats_seq_show, NULL);
2135}
2136
2137static struct file_operations rt6_stats_seq_fops = {
2138 .owner = THIS_MODULE,
2139 .open = rt6_stats_seq_open,
2140 .read = seq_read,
2141 .llseek = seq_lseek,
2142 .release = single_release,
2143};
2144#endif /* CONFIG_PROC_FS */
2145
2146#ifdef CONFIG_SYSCTL
2147
2148static int flush_delay;
2149
2150static
2151int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2152 void __user *buffer, size_t *lenp, loff_t *ppos)
2153{
2154 if (write) {
2155 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2156 fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
2157 return 0;
2158 } else
2159 return -EINVAL;
2160}
2161
2162ctl_table ipv6_route_table[] = {
2163 {
2164 .ctl_name = NET_IPV6_ROUTE_FLUSH,
2165 .procname = "flush",
2166 .data = &flush_delay,
2167 .maxlen = sizeof(int),
Dave Jones89c8b3a12005-04-28 12:11:49 -07002168 .mode = 0200,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002169 .proc_handler = &ipv6_sysctl_rtcache_flush
2170 },
2171 {
2172 .ctl_name = NET_IPV6_ROUTE_GC_THRESH,
2173 .procname = "gc_thresh",
2174 .data = &ip6_dst_ops.gc_thresh,
2175 .maxlen = sizeof(int),
2176 .mode = 0644,
2177 .proc_handler = &proc_dointvec,
2178 },
2179 {
2180 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE,
2181 .procname = "max_size",
2182 .data = &ip6_rt_max_size,
2183 .maxlen = sizeof(int),
2184 .mode = 0644,
2185 .proc_handler = &proc_dointvec,
2186 },
2187 {
2188 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2189 .procname = "gc_min_interval",
2190 .data = &ip6_rt_gc_min_interval,
2191 .maxlen = sizeof(int),
2192 .mode = 0644,
2193 .proc_handler = &proc_dointvec_jiffies,
2194 .strategy = &sysctl_jiffies,
2195 },
2196 {
2197 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT,
2198 .procname = "gc_timeout",
2199 .data = &ip6_rt_gc_timeout,
2200 .maxlen = sizeof(int),
2201 .mode = 0644,
2202 .proc_handler = &proc_dointvec_jiffies,
2203 .strategy = &sysctl_jiffies,
2204 },
2205 {
2206 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL,
2207 .procname = "gc_interval",
2208 .data = &ip6_rt_gc_interval,
2209 .maxlen = sizeof(int),
2210 .mode = 0644,
2211 .proc_handler = &proc_dointvec_jiffies,
2212 .strategy = &sysctl_jiffies,
2213 },
2214 {
2215 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY,
2216 .procname = "gc_elasticity",
2217 .data = &ip6_rt_gc_elasticity,
2218 .maxlen = sizeof(int),
2219 .mode = 0644,
2220 .proc_handler = &proc_dointvec_jiffies,
2221 .strategy = &sysctl_jiffies,
2222 },
2223 {
2224 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES,
2225 .procname = "mtu_expires",
2226 .data = &ip6_rt_mtu_expires,
2227 .maxlen = sizeof(int),
2228 .mode = 0644,
2229 .proc_handler = &proc_dointvec_jiffies,
2230 .strategy = &sysctl_jiffies,
2231 },
2232 {
2233 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS,
2234 .procname = "min_adv_mss",
2235 .data = &ip6_rt_min_advmss,
2236 .maxlen = sizeof(int),
2237 .mode = 0644,
2238 .proc_handler = &proc_dointvec_jiffies,
2239 .strategy = &sysctl_jiffies,
2240 },
2241 {
2242 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2243 .procname = "gc_min_interval_ms",
2244 .data = &ip6_rt_gc_min_interval,
2245 .maxlen = sizeof(int),
2246 .mode = 0644,
2247 .proc_handler = &proc_dointvec_ms_jiffies,
2248 .strategy = &sysctl_ms_jiffies,
2249 },
2250 { .ctl_name = 0 }
2251};
2252
2253#endif
2254
2255void __init ip6_route_init(void)
2256{
2257 struct proc_dir_entry *p;
2258
2259 ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
2260 sizeof(struct rt6_info),
2261 0, SLAB_HWCACHE_ALIGN,
2262 NULL, NULL);
2263 if (!ip6_dst_ops.kmem_cachep)
2264 panic("cannot create ip6_dst_cache");
2265
2266 fib6_init();
2267#ifdef CONFIG_PROC_FS
2268 p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2269 if (p)
2270 p->owner = THIS_MODULE;
2271
2272 proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2273#endif
2274#ifdef CONFIG_XFRM
2275 xfrm6_init();
2276#endif
2277}
2278
2279void ip6_route_cleanup(void)
2280{
2281#ifdef CONFIG_PROC_FS
2282 proc_net_remove("ipv6_route");
2283 proc_net_remove("rt6_stats");
2284#endif
2285#ifdef CONFIG_XFRM
2286 xfrm6_fini();
2287#endif
2288 rt6_ifdown(NULL);
2289 fib6_gc_cleanup();
2290 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2291}