blob: 5c00ca4fa52ce796ba0616627207e31d4bddcf1b [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
16/* Changes:
17 *
18 * YOSHIFUJI Hideaki @USAGI
19 * reworked default router selection.
20 * - respect outgoing interface
21 * - select from (probably) reachable routers (i.e.
22 * routers in REACHABLE, STALE, DELAY or PROBE states).
23 * - always select the same router if it is (probably)
24 * reachable. otherwise, round-robin the list.
YOSHIFUJI Hideakic0bece92006-08-23 17:23:25 -070025 * Ville Nuorvala
26 * Fixed routing subtrees.
Linus Torvalds1da177e2005-04-16 15:20:36 -070027 */
28
Randy Dunlap4fc268d2006-01-11 12:17:47 -080029#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070030#include <linux/errno.h>
31#include <linux/types.h>
32#include <linux/times.h>
33#include <linux/socket.h>
34#include <linux/sockios.h>
35#include <linux/net.h>
36#include <linux/route.h>
37#include <linux/netdevice.h>
38#include <linux/in6.h>
39#include <linux/init.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070040#include <linux/if_arp.h>
41
42#ifdef CONFIG_PROC_FS
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
45#endif
46
47#include <net/snmp.h>
48#include <net/ipv6.h>
49#include <net/ip6_fib.h>
50#include <net/ip6_route.h>
51#include <net/ndisc.h>
52#include <net/addrconf.h>
53#include <net/tcp.h>
54#include <linux/rtnetlink.h>
55#include <net/dst.h>
56#include <net/xfrm.h>
Tom Tucker8d717402006-07-30 20:43:36 -070057#include <net/netevent.h>
Thomas Graf21713eb2006-08-15 00:35:24 -070058#include <net/netlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070059
60#include <asm/uaccess.h>
61
62#ifdef CONFIG_SYSCTL
63#include <linux/sysctl.h>
64#endif
65
66/* Set to 3 to get tracing. */
67#define RT6_DEBUG 2
68
69#if RT6_DEBUG >= 3
70#define RDBG(x) printk x
71#define RT6_TRACE(x...) printk(KERN_DEBUG x)
72#else
73#define RDBG(x)
74#define RT6_TRACE(x...) do { ; } while (0)
75#endif
76
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -080077#define CLONE_OFFLINK_ROUTE 0
Linus Torvalds1da177e2005-04-16 15:20:36 -070078
79static int ip6_rt_max_size = 4096;
80static int ip6_rt_gc_min_interval = HZ / 2;
81static int ip6_rt_gc_timeout = 60*HZ;
82int ip6_rt_gc_interval = 30*HZ;
83static int ip6_rt_gc_elasticity = 9;
84static int ip6_rt_mtu_expires = 10*60*HZ;
85static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
86
87static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
88static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
89static struct dst_entry *ip6_negative_advice(struct dst_entry *);
90static void ip6_dst_destroy(struct dst_entry *);
91static void ip6_dst_ifdown(struct dst_entry *,
92 struct net_device *dev, int how);
93static int ip6_dst_gc(void);
94
95static int ip6_pkt_discard(struct sk_buff *skb);
96static int ip6_pkt_discard_out(struct sk_buff *skb);
Thomas Graf9ce8ade2006-10-18 20:46:54 -070097static int ip6_pkt_prohibit(struct sk_buff *skb);
98static int ip6_pkt_prohibit_out(struct sk_buff *skb);
99static int ip6_pkt_blk_hole(struct sk_buff *skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700100static void ip6_link_failure(struct sk_buff *skb);
101static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
102
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800103#ifdef CONFIG_IPV6_ROUTE_INFO
104static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
105 struct in6_addr *gwaddr, int ifindex,
106 unsigned pref);
107static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
108 struct in6_addr *gwaddr, int ifindex);
109#endif
110
Linus Torvalds1da177e2005-04-16 15:20:36 -0700111static struct dst_ops ip6_dst_ops = {
112 .family = AF_INET6,
113 .protocol = __constant_htons(ETH_P_IPV6),
114 .gc = ip6_dst_gc,
115 .gc_thresh = 1024,
116 .check = ip6_dst_check,
117 .destroy = ip6_dst_destroy,
118 .ifdown = ip6_dst_ifdown,
119 .negative_advice = ip6_negative_advice,
120 .link_failure = ip6_link_failure,
121 .update_pmtu = ip6_rt_update_pmtu,
122 .entry_size = sizeof(struct rt6_info),
123};
124
125struct rt6_info ip6_null_entry = {
126 .u = {
127 .dst = {
128 .__refcnt = ATOMIC_INIT(1),
129 .__use = 1,
130 .dev = &loopback_dev,
131 .obsolete = -1,
132 .error = -ENETUNREACH,
133 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
134 .input = ip6_pkt_discard,
135 .output = ip6_pkt_discard_out,
136 .ops = &ip6_dst_ops,
137 .path = (struct dst_entry*)&ip6_null_entry,
138 }
139 },
140 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
141 .rt6i_metric = ~(u32) 0,
142 .rt6i_ref = ATOMIC_INIT(1),
143};
144
Thomas Graf101367c2006-08-04 03:39:02 -0700145#ifdef CONFIG_IPV6_MULTIPLE_TABLES
146
147struct rt6_info ip6_prohibit_entry = {
148 .u = {
149 .dst = {
150 .__refcnt = ATOMIC_INIT(1),
151 .__use = 1,
152 .dev = &loopback_dev,
153 .obsolete = -1,
154 .error = -EACCES,
155 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
Thomas Graf9ce8ade2006-10-18 20:46:54 -0700156 .input = ip6_pkt_prohibit,
157 .output = ip6_pkt_prohibit_out,
Thomas Graf101367c2006-08-04 03:39:02 -0700158 .ops = &ip6_dst_ops,
159 .path = (struct dst_entry*)&ip6_prohibit_entry,
160 }
161 },
162 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
163 .rt6i_metric = ~(u32) 0,
164 .rt6i_ref = ATOMIC_INIT(1),
165};
166
167struct rt6_info ip6_blk_hole_entry = {
168 .u = {
169 .dst = {
170 .__refcnt = ATOMIC_INIT(1),
171 .__use = 1,
172 .dev = &loopback_dev,
173 .obsolete = -1,
174 .error = -EINVAL,
175 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
Thomas Graf9ce8ade2006-10-18 20:46:54 -0700176 .input = ip6_pkt_blk_hole,
177 .output = ip6_pkt_blk_hole,
Thomas Graf101367c2006-08-04 03:39:02 -0700178 .ops = &ip6_dst_ops,
179 .path = (struct dst_entry*)&ip6_blk_hole_entry,
180 }
181 },
182 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
183 .rt6i_metric = ~(u32) 0,
184 .rt6i_ref = ATOMIC_INIT(1),
185};
186
187#endif
188
Linus Torvalds1da177e2005-04-16 15:20:36 -0700189/* allocate dst with ip6_dst_ops */
190static __inline__ struct rt6_info *ip6_dst_alloc(void)
191{
192 return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
193}
194
195static void ip6_dst_destroy(struct dst_entry *dst)
196{
197 struct rt6_info *rt = (struct rt6_info *)dst;
198 struct inet6_dev *idev = rt->rt6i_idev;
199
200 if (idev != NULL) {
201 rt->rt6i_idev = NULL;
202 in6_dev_put(idev);
203 }
204}
205
206static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
207 int how)
208{
209 struct rt6_info *rt = (struct rt6_info *)dst;
210 struct inet6_dev *idev = rt->rt6i_idev;
211
212 if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
213 struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
214 if (loopback_idev != NULL) {
215 rt->rt6i_idev = loopback_idev;
216 in6_dev_put(idev);
217 }
218 }
219}
220
221static __inline__ int rt6_check_expired(const struct rt6_info *rt)
222{
223 return (rt->rt6i_flags & RTF_EXPIRES &&
224 time_after(jiffies, rt->rt6i_expires));
225}
226
Thomas Grafc71099a2006-08-04 23:20:06 -0700227static inline int rt6_need_strict(struct in6_addr *daddr)
228{
229 return (ipv6_addr_type(daddr) &
230 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
231}
232
Linus Torvalds1da177e2005-04-16 15:20:36 -0700233/*
Thomas Grafc71099a2006-08-04 23:20:06 -0700234 * Route lookup. Any table->tb6_lock is implied.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700235 */
236
237static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
238 int oif,
239 int strict)
240{
241 struct rt6_info *local = NULL;
242 struct rt6_info *sprt;
243
244 if (oif) {
245 for (sprt = rt; sprt; sprt = sprt->u.next) {
246 struct net_device *dev = sprt->rt6i_dev;
247 if (dev->ifindex == oif)
248 return sprt;
249 if (dev->flags & IFF_LOOPBACK) {
250 if (sprt->rt6i_idev == NULL ||
251 sprt->rt6i_idev->dev->ifindex != oif) {
252 if (strict && oif)
253 continue;
254 if (local && (!oif ||
255 local->rt6i_idev->dev->ifindex == oif))
256 continue;
257 }
258 local = sprt;
259 }
260 }
261
262 if (local)
263 return local;
264
265 if (strict)
266 return &ip6_null_entry;
267 }
268 return rt;
269}
270
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800271#ifdef CONFIG_IPV6_ROUTER_PREF
272static void rt6_probe(struct rt6_info *rt)
273{
274 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
275 /*
276 * Okay, this does not seem to be appropriate
277 * for now, however, we need to check if it
278 * is really so; aka Router Reachability Probing.
279 *
280 * Router Reachability Probe MUST be rate-limited
281 * to no more than one per minute.
282 */
283 if (!neigh || (neigh->nud_state & NUD_VALID))
284 return;
285 read_lock_bh(&neigh->lock);
286 if (!(neigh->nud_state & NUD_VALID) &&
YOSHIFUJI Hideaki52e16352006-03-20 17:05:47 -0800287 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800288 struct in6_addr mcaddr;
289 struct in6_addr *target;
290
291 neigh->updated = jiffies;
292 read_unlock_bh(&neigh->lock);
293
294 target = (struct in6_addr *)&neigh->primary_key;
295 addrconf_addr_solict_mult(target, &mcaddr);
296 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
297 } else
298 read_unlock_bh(&neigh->lock);
299}
300#else
301static inline void rt6_probe(struct rt6_info *rt)
302{
303 return;
304}
305#endif
306
Linus Torvalds1da177e2005-04-16 15:20:36 -0700307/*
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800308 * Default Router Selection (RFC 2461 6.3.6)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700309 */
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800310static int inline rt6_check_dev(struct rt6_info *rt, int oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700311{
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800312 struct net_device *dev = rt->rt6i_dev;
313 if (!oif || dev->ifindex == oif)
314 return 2;
315 if ((dev->flags & IFF_LOOPBACK) &&
316 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
317 return 1;
318 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700319}
320
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800321static int inline rt6_check_neigh(struct rt6_info *rt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700322{
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800323 struct neighbour *neigh = rt->rt6i_nexthop;
324 int m = 0;
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700325 if (rt->rt6i_flags & RTF_NONEXTHOP ||
326 !(rt->rt6i_flags & RTF_GATEWAY))
327 m = 1;
328 else if (neigh) {
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800329 read_lock_bh(&neigh->lock);
330 if (neigh->nud_state & NUD_VALID)
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700331 m = 2;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800332 read_unlock_bh(&neigh->lock);
333 }
334 return m;
335}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700336
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800337static int rt6_score_route(struct rt6_info *rt, int oif,
338 int strict)
339{
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700340 int m, n;
341
342 m = rt6_check_dev(rt, oif);
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700343 if (!m && (strict & RT6_LOOKUP_F_IFACE))
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800344 return -1;
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -0800345#ifdef CONFIG_IPV6_ROUTER_PREF
346 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
347#endif
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700348 n = rt6_check_neigh(rt);
349 if (n > 1)
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -0800350 m |= 16;
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700351 else if (!n && strict & RT6_LOOKUP_F_REACHABLE)
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800352 return -1;
353 return m;
354}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700355
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800356static struct rt6_info *rt6_select(struct rt6_info **head, int oif,
357 int strict)
358{
359 struct rt6_info *match = NULL, *last = NULL;
360 struct rt6_info *rt, *rt0 = *head;
361 u32 metric;
362 int mpri = -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700363
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800364 RT6_TRACE("%s(head=%p(*head=%p), oif=%d)\n",
365 __FUNCTION__, head, head ? *head : NULL, oif);
366
367 for (rt = rt0, metric = rt0->rt6i_metric;
YOSHIFUJI Hideakic302e6d2006-04-28 15:59:15 -0700368 rt && rt->rt6i_metric == metric && (!last || rt != rt0);
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800369 rt = rt->u.next) {
370 int m;
371
372 if (rt6_check_expired(rt))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700373 continue;
374
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800375 last = rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700376
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800377 m = rt6_score_route(rt, oif, strict);
378 if (m < 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700379 continue;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700380
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800381 if (m > mpri) {
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800382 rt6_probe(match);
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800383 match = rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700384 mpri = m;
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800385 } else {
386 rt6_probe(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700387 }
388 }
389
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800390 if (!match &&
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700391 (strict & RT6_LOOKUP_F_REACHABLE) &&
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800392 last && last != rt0) {
393 /* no entries matched; do round-robin */
Ingo Molnar34af9462006-06-27 02:53:55 -0700394 static DEFINE_SPINLOCK(lock);
YOSHIFUJI Hideakic302e6d2006-04-28 15:59:15 -0700395 spin_lock(&lock);
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800396 *head = rt0->u.next;
397 rt0->u.next = last->u.next;
398 last->u.next = rt0;
YOSHIFUJI Hideakic302e6d2006-04-28 15:59:15 -0700399 spin_unlock(&lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700400 }
401
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800402 RT6_TRACE("%s() => %p, score=%d\n",
403 __FUNCTION__, match, mpri);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700404
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800405 return (match ? match : &ip6_null_entry);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700406}
407
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800408#ifdef CONFIG_IPV6_ROUTE_INFO
409int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
410 struct in6_addr *gwaddr)
411{
412 struct route_info *rinfo = (struct route_info *) opt;
413 struct in6_addr prefix_buf, *prefix;
414 unsigned int pref;
415 u32 lifetime;
416 struct rt6_info *rt;
417
418 if (len < sizeof(struct route_info)) {
419 return -EINVAL;
420 }
421
422 /* Sanity check for prefix_len and length */
423 if (rinfo->length > 3) {
424 return -EINVAL;
425 } else if (rinfo->prefix_len > 128) {
426 return -EINVAL;
427 } else if (rinfo->prefix_len > 64) {
428 if (rinfo->length < 2) {
429 return -EINVAL;
430 }
431 } else if (rinfo->prefix_len > 0) {
432 if (rinfo->length < 1) {
433 return -EINVAL;
434 }
435 }
436
437 pref = rinfo->route_pref;
438 if (pref == ICMPV6_ROUTER_PREF_INVALID)
439 pref = ICMPV6_ROUTER_PREF_MEDIUM;
440
441 lifetime = htonl(rinfo->lifetime);
442 if (lifetime == 0xffffffff) {
443 /* infinity */
444 } else if (lifetime > 0x7fffffff/HZ) {
445 /* Avoid arithmetic overflow */
446 lifetime = 0x7fffffff/HZ - 1;
447 }
448
449 if (rinfo->length == 3)
450 prefix = (struct in6_addr *)rinfo->prefix;
451 else {
452 /* this function is safe */
453 ipv6_addr_prefix(&prefix_buf,
454 (struct in6_addr *)rinfo->prefix,
455 rinfo->prefix_len);
456 prefix = &prefix_buf;
457 }
458
459 rt = rt6_get_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex);
460
461 if (rt && !lifetime) {
Thomas Grafe0a1ad732006-08-22 00:00:21 -0700462 ip6_del_rt(rt);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800463 rt = NULL;
464 }
465
466 if (!rt && lifetime)
467 rt = rt6_add_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
468 pref);
469 else if (rt)
470 rt->rt6i_flags = RTF_ROUTEINFO |
471 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
472
473 if (rt) {
474 if (lifetime == 0xffffffff) {
475 rt->rt6i_flags &= ~RTF_EXPIRES;
476 } else {
477 rt->rt6i_expires = jiffies + HZ * lifetime;
478 rt->rt6i_flags |= RTF_EXPIRES;
479 }
480 dst_release(&rt->u.dst);
481 }
482 return 0;
483}
484#endif
485
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700486#define BACKTRACK(saddr) \
487do { \
488 if (rt == &ip6_null_entry) { \
489 struct fib6_node *pn; \
Ville Nuorvalae0eda7b2006-10-16 22:11:11 -0700490 while (1) { \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700491 if (fn->fn_flags & RTN_TL_ROOT) \
492 goto out; \
493 pn = fn->parent; \
494 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
495 fn = fib6_lookup(pn->subtree, NULL, saddr); \
496 else \
497 fn = pn; \
498 if (fn->fn_flags & RTN_RTINFO) \
499 goto restart; \
Thomas Grafc71099a2006-08-04 23:20:06 -0700500 } \
Thomas Grafc71099a2006-08-04 23:20:06 -0700501 } \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700502} while(0)
Thomas Grafc71099a2006-08-04 23:20:06 -0700503
504static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table,
505 struct flowi *fl, int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700506{
507 struct fib6_node *fn;
508 struct rt6_info *rt;
509
Thomas Grafc71099a2006-08-04 23:20:06 -0700510 read_lock_bh(&table->tb6_lock);
511 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
512restart:
513 rt = fn->leaf;
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700514 rt = rt6_device_match(rt, fl->oif, flags);
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700515 BACKTRACK(&fl->fl6_src);
Thomas Grafc71099a2006-08-04 23:20:06 -0700516out:
YOSHIFUJI Hideaki33cc4892006-08-28 13:19:30 -0700517 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700518 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700519
520 rt->u.dst.lastuse = jiffies;
Thomas Grafc71099a2006-08-04 23:20:06 -0700521 rt->u.dst.__use++;
522
523 return rt;
524
525}
526
527struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
528 int oif, int strict)
529{
530 struct flowi fl = {
531 .oif = oif,
532 .nl_u = {
533 .ip6_u = {
534 .daddr = *daddr,
Thomas Grafc71099a2006-08-04 23:20:06 -0700535 },
536 },
537 };
538 struct dst_entry *dst;
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700539 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
Thomas Grafc71099a2006-08-04 23:20:06 -0700540
Thomas Grafadaa70b2006-10-13 15:01:03 -0700541 if (saddr) {
542 memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
543 flags |= RT6_LOOKUP_F_HAS_SADDR;
544 }
545
Thomas Grafc71099a2006-08-04 23:20:06 -0700546 dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_lookup);
547 if (dst->error == 0)
548 return (struct rt6_info *) dst;
549
550 dst_release(dst);
551
Linus Torvalds1da177e2005-04-16 15:20:36 -0700552 return NULL;
553}
554
Thomas Grafc71099a2006-08-04 23:20:06 -0700555/* ip6_ins_rt is called with FREE table->tb6_lock.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700556 It takes new route entry, the addition fails by any reason the
557 route is freed. In any case, if caller does not hold it, it may
558 be destroyed.
559 */
560
Thomas Graf86872cb2006-08-22 00:01:08 -0700561static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700562{
563 int err;
Thomas Grafc71099a2006-08-04 23:20:06 -0700564 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700565
Thomas Grafc71099a2006-08-04 23:20:06 -0700566 table = rt->rt6i_table;
567 write_lock_bh(&table->tb6_lock);
Thomas Graf86872cb2006-08-22 00:01:08 -0700568 err = fib6_add(&table->tb6_root, rt, info);
Thomas Grafc71099a2006-08-04 23:20:06 -0700569 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700570
571 return err;
572}
573
Thomas Graf40e22e82006-08-22 00:00:45 -0700574int ip6_ins_rt(struct rt6_info *rt)
575{
Thomas Graf86872cb2006-08-22 00:01:08 -0700576 return __ip6_ins_rt(rt, NULL);
Thomas Graf40e22e82006-08-22 00:00:45 -0700577}
578
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800579static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
580 struct in6_addr *saddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700581{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700582 struct rt6_info *rt;
583
584 /*
585 * Clone the route.
586 */
587
588 rt = ip6_rt_copy(ort);
589
590 if (rt) {
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900591 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
592 if (rt->rt6i_dst.plen != 128 &&
593 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
594 rt->rt6i_flags |= RTF_ANYCAST;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700595 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900596 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700597
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900598 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700599 rt->rt6i_dst.plen = 128;
600 rt->rt6i_flags |= RTF_CACHE;
601 rt->u.dst.flags |= DST_HOST;
602
603#ifdef CONFIG_IPV6_SUBTREES
604 if (rt->rt6i_src.plen && saddr) {
605 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
606 rt->rt6i_src.plen = 128;
607 }
608#endif
609
610 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
611
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800612 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700613
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800614 return rt;
615}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700616
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800617static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
618{
619 struct rt6_info *rt = ip6_rt_copy(ort);
620 if (rt) {
621 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
622 rt->rt6i_dst.plen = 128;
623 rt->rt6i_flags |= RTF_CACHE;
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800624 rt->u.dst.flags |= DST_HOST;
625 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
626 }
627 return rt;
628}
629
Adrian Bunk8ce11e62006-08-07 21:50:48 -0700630static struct rt6_info *ip6_pol_route_input(struct fib6_table *table,
631 struct flowi *fl, int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700632{
633 struct fib6_node *fn;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800634 struct rt6_info *rt, *nrt;
Thomas Grafc71099a2006-08-04 23:20:06 -0700635 int strict = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700636 int attempts = 3;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800637 int err;
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700638 int reachable = RT6_LOOKUP_F_REACHABLE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700639
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700640 strict |= flags & RT6_LOOKUP_F_IFACE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700641
642relookup:
Thomas Grafc71099a2006-08-04 23:20:06 -0700643 read_lock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700644
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800645restart_2:
Thomas Grafc71099a2006-08-04 23:20:06 -0700646 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700647
648restart:
Thomas Grafc71099a2006-08-04 23:20:06 -0700649 rt = rt6_select(&fn->leaf, fl->iif, strict | reachable);
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700650 BACKTRACK(&fl->fl6_src);
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800651 if (rt == &ip6_null_entry ||
652 rt->rt6i_flags & RTF_CACHE)
YOSHIFUJI Hideaki1ddef0442006-03-20 17:01:24 -0800653 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700654
YOSHIFUJI Hideakifb9de912006-03-20 16:59:08 -0800655 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700656 read_unlock_bh(&table->tb6_lock);
YOSHIFUJI Hideakifb9de912006-03-20 16:59:08 -0800657
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800658 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
YOSHIFUJI Hideakie40cf352006-03-20 16:59:27 -0800659 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800660 else {
661#if CLONE_OFFLINK_ROUTE
662 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
663#else
664 goto out2;
665#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700666 }
YOSHIFUJI Hideakie40cf352006-03-20 16:59:27 -0800667
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800668 dst_release(&rt->u.dst);
669 rt = nrt ? : &ip6_null_entry;
670
671 dst_hold(&rt->u.dst);
672 if (nrt) {
Thomas Graf40e22e82006-08-22 00:00:45 -0700673 err = ip6_ins_rt(nrt);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800674 if (!err)
675 goto out2;
676 }
677
678 if (--attempts <= 0)
679 goto out2;
680
681 /*
Thomas Grafc71099a2006-08-04 23:20:06 -0700682 * Race condition! In the gap, when table->tb6_lock was
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800683 * released someone could insert this route. Relookup.
684 */
685 dst_release(&rt->u.dst);
686 goto relookup;
687
688out:
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800689 if (reachable) {
690 reachable = 0;
691 goto restart_2;
692 }
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800693 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700694 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700695out2:
696 rt->u.dst.lastuse = jiffies;
697 rt->u.dst.__use++;
Thomas Grafc71099a2006-08-04 23:20:06 -0700698
699 return rt;
700}
701
702void ip6_route_input(struct sk_buff *skb)
703{
704 struct ipv6hdr *iph = skb->nh.ipv6h;
Thomas Grafadaa70b2006-10-13 15:01:03 -0700705 int flags = RT6_LOOKUP_F_HAS_SADDR;
Thomas Grafc71099a2006-08-04 23:20:06 -0700706 struct flowi fl = {
707 .iif = skb->dev->ifindex,
708 .nl_u = {
709 .ip6_u = {
710 .daddr = iph->daddr,
711 .saddr = iph->saddr,
David S. Miller267935b2006-08-25 16:07:48 -0700712#ifdef CONFIG_IPV6_ROUTE_FWMARK
YOSHIFUJI Hideaki75bff8f2006-08-21 19:22:01 +0900713 .fwmark = skb->nfmark,
David S. Miller267935b2006-08-25 16:07:48 -0700714#endif
Thomas Grafc71099a2006-08-04 23:20:06 -0700715 .flowlabel = (* (u32 *) iph)&IPV6_FLOWINFO_MASK,
716 },
717 },
718 .proto = iph->nexthdr,
719 };
Thomas Grafadaa70b2006-10-13 15:01:03 -0700720
721 if (rt6_need_strict(&iph->daddr))
722 flags |= RT6_LOOKUP_F_IFACE;
Thomas Grafc71099a2006-08-04 23:20:06 -0700723
724 skb->dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_input);
725}
726
727static struct rt6_info *ip6_pol_route_output(struct fib6_table *table,
728 struct flowi *fl, int flags)
729{
730 struct fib6_node *fn;
731 struct rt6_info *rt, *nrt;
732 int strict = 0;
733 int attempts = 3;
734 int err;
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700735 int reachable = RT6_LOOKUP_F_REACHABLE;
Thomas Grafc71099a2006-08-04 23:20:06 -0700736
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700737 strict |= flags & RT6_LOOKUP_F_IFACE;
Thomas Grafc71099a2006-08-04 23:20:06 -0700738
739relookup:
740 read_lock_bh(&table->tb6_lock);
741
742restart_2:
743 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
744
745restart:
746 rt = rt6_select(&fn->leaf, fl->oif, strict | reachable);
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700747 BACKTRACK(&fl->fl6_src);
Thomas Grafc71099a2006-08-04 23:20:06 -0700748 if (rt == &ip6_null_entry ||
749 rt->rt6i_flags & RTF_CACHE)
750 goto out;
751
752 dst_hold(&rt->u.dst);
753 read_unlock_bh(&table->tb6_lock);
754
755 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
756 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
757 else {
758#if CLONE_OFFLINK_ROUTE
759 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
760#else
761 goto out2;
762#endif
763 }
764
765 dst_release(&rt->u.dst);
766 rt = nrt ? : &ip6_null_entry;
767
768 dst_hold(&rt->u.dst);
769 if (nrt) {
Thomas Graf40e22e82006-08-22 00:00:45 -0700770 err = ip6_ins_rt(nrt);
Thomas Grafc71099a2006-08-04 23:20:06 -0700771 if (!err)
772 goto out2;
773 }
774
775 if (--attempts <= 0)
776 goto out2;
777
778 /*
779 * Race condition! In the gap, when table->tb6_lock was
780 * released someone could insert this route. Relookup.
781 */
782 dst_release(&rt->u.dst);
783 goto relookup;
784
785out:
786 if (reachable) {
787 reachable = 0;
788 goto restart_2;
789 }
790 dst_hold(&rt->u.dst);
791 read_unlock_bh(&table->tb6_lock);
792out2:
793 rt->u.dst.lastuse = jiffies;
794 rt->u.dst.__use++;
795 return rt;
796}
797
798struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
799{
800 int flags = 0;
801
802 if (rt6_need_strict(&fl->fl6_dst))
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700803 flags |= RT6_LOOKUP_F_IFACE;
Thomas Grafc71099a2006-08-04 23:20:06 -0700804
Thomas Grafadaa70b2006-10-13 15:01:03 -0700805 if (!ipv6_addr_any(&fl->fl6_src))
806 flags |= RT6_LOOKUP_F_HAS_SADDR;
807
Thomas Grafc71099a2006-08-04 23:20:06 -0700808 return fib6_rule_lookup(fl, flags, ip6_pol_route_output);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700809}
810
811
812/*
813 * Destination cache support functions
814 */
815
816static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
817{
818 struct rt6_info *rt;
819
820 rt = (struct rt6_info *) dst;
821
822 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
823 return dst;
824
825 return NULL;
826}
827
828static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
829{
830 struct rt6_info *rt = (struct rt6_info *) dst;
831
832 if (rt) {
833 if (rt->rt6i_flags & RTF_CACHE)
Thomas Grafe0a1ad732006-08-22 00:00:21 -0700834 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700835 else
836 dst_release(dst);
837 }
838 return NULL;
839}
840
841static void ip6_link_failure(struct sk_buff *skb)
842{
843 struct rt6_info *rt;
844
845 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
846
847 rt = (struct rt6_info *) skb->dst;
848 if (rt) {
849 if (rt->rt6i_flags&RTF_CACHE) {
850 dst_set_expires(&rt->u.dst, 0);
851 rt->rt6i_flags |= RTF_EXPIRES;
852 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
853 rt->rt6i_node->fn_sernum = -1;
854 }
855}
856
857static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
858{
859 struct rt6_info *rt6 = (struct rt6_info*)dst;
860
861 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
862 rt6->rt6i_flags |= RTF_MODIFIED;
863 if (mtu < IPV6_MIN_MTU) {
864 mtu = IPV6_MIN_MTU;
865 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
866 }
867 dst->metrics[RTAX_MTU-1] = mtu;
Tom Tucker8d717402006-07-30 20:43:36 -0700868 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700869 }
870}
871
Linus Torvalds1da177e2005-04-16 15:20:36 -0700872static int ipv6_get_mtu(struct net_device *dev);
873
874static inline unsigned int ipv6_advmss(unsigned int mtu)
875{
876 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
877
878 if (mtu < ip6_rt_min_advmss)
879 mtu = ip6_rt_min_advmss;
880
881 /*
882 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
883 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
884 * IPV6_MAXPLEN is also valid and means: "any MSS,
885 * rely only on pmtu discovery"
886 */
887 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
888 mtu = IPV6_MAXPLEN;
889 return mtu;
890}
891
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700892static struct dst_entry *ndisc_dst_gc_list;
Adrian Bunk8ce11e62006-08-07 21:50:48 -0700893static DEFINE_SPINLOCK(ndisc_lock);
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700894
Linus Torvalds1da177e2005-04-16 15:20:36 -0700895struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
896 struct neighbour *neigh,
897 struct in6_addr *addr,
898 int (*output)(struct sk_buff *))
899{
900 struct rt6_info *rt;
901 struct inet6_dev *idev = in6_dev_get(dev);
902
903 if (unlikely(idev == NULL))
904 return NULL;
905
906 rt = ip6_dst_alloc();
907 if (unlikely(rt == NULL)) {
908 in6_dev_put(idev);
909 goto out;
910 }
911
912 dev_hold(dev);
913 if (neigh)
914 neigh_hold(neigh);
915 else
916 neigh = ndisc_get_neigh(dev, addr);
917
918 rt->rt6i_dev = dev;
919 rt->rt6i_idev = idev;
920 rt->rt6i_nexthop = neigh;
921 atomic_set(&rt->u.dst.__refcnt, 1);
922 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
923 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
924 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
925 rt->u.dst.output = output;
926
927#if 0 /* there's no chance to use these for ndisc */
928 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
929 ? DST_HOST
930 : 0;
931 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
932 rt->rt6i_dst.plen = 128;
933#endif
934
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700935 spin_lock_bh(&ndisc_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700936 rt->u.dst.next = ndisc_dst_gc_list;
937 ndisc_dst_gc_list = &rt->u.dst;
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700938 spin_unlock_bh(&ndisc_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700939
940 fib6_force_start_gc();
941
942out:
943 return (struct dst_entry *)rt;
944}
945
946int ndisc_dst_gc(int *more)
947{
948 struct dst_entry *dst, *next, **pprev;
949 int freed;
950
951 next = NULL;
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700952 freed = 0;
953
954 spin_lock_bh(&ndisc_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700955 pprev = &ndisc_dst_gc_list;
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700956
Linus Torvalds1da177e2005-04-16 15:20:36 -0700957 while ((dst = *pprev) != NULL) {
958 if (!atomic_read(&dst->__refcnt)) {
959 *pprev = dst->next;
960 dst_free(dst);
961 freed++;
962 } else {
963 pprev = &dst->next;
964 (*more)++;
965 }
966 }
967
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700968 spin_unlock_bh(&ndisc_lock);
969
Linus Torvalds1da177e2005-04-16 15:20:36 -0700970 return freed;
971}
972
973static int ip6_dst_gc(void)
974{
975 static unsigned expire = 30*HZ;
976 static unsigned long last_gc;
977 unsigned long now = jiffies;
978
979 if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
980 atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
981 goto out;
982
983 expire++;
984 fib6_run_gc(expire);
985 last_gc = now;
986 if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
987 expire = ip6_rt_gc_timeout>>1;
988
989out:
990 expire -= expire>>ip6_rt_gc_elasticity;
991 return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
992}
993
994/* Clean host part of a prefix. Not necessary in radix tree,
995 but results in cleaner routing tables.
996
997 Remove it only when all the things will work!
998 */
999
1000static int ipv6_get_mtu(struct net_device *dev)
1001{
1002 int mtu = IPV6_MIN_MTU;
1003 struct inet6_dev *idev;
1004
1005 idev = in6_dev_get(dev);
1006 if (idev) {
1007 mtu = idev->cnf.mtu6;
1008 in6_dev_put(idev);
1009 }
1010 return mtu;
1011}
1012
1013int ipv6_get_hoplimit(struct net_device *dev)
1014{
1015 int hoplimit = ipv6_devconf.hop_limit;
1016 struct inet6_dev *idev;
1017
1018 idev = in6_dev_get(dev);
1019 if (idev) {
1020 hoplimit = idev->cnf.hop_limit;
1021 in6_dev_put(idev);
1022 }
1023 return hoplimit;
1024}
1025
1026/*
1027 *
1028 */
1029
Thomas Graf86872cb2006-08-22 00:01:08 -07001030int ip6_route_add(struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001031{
1032 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001033 struct rt6_info *rt = NULL;
1034 struct net_device *dev = NULL;
1035 struct inet6_dev *idev = NULL;
Thomas Grafc71099a2006-08-04 23:20:06 -07001036 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001037 int addr_type;
1038
Thomas Graf86872cb2006-08-22 00:01:08 -07001039 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001040 return -EINVAL;
1041#ifndef CONFIG_IPV6_SUBTREES
Thomas Graf86872cb2006-08-22 00:01:08 -07001042 if (cfg->fc_src_len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001043 return -EINVAL;
1044#endif
Thomas Graf86872cb2006-08-22 00:01:08 -07001045 if (cfg->fc_ifindex) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001046 err = -ENODEV;
Thomas Graf86872cb2006-08-22 00:01:08 -07001047 dev = dev_get_by_index(cfg->fc_ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001048 if (!dev)
1049 goto out;
1050 idev = in6_dev_get(dev);
1051 if (!idev)
1052 goto out;
1053 }
1054
Thomas Graf86872cb2006-08-22 00:01:08 -07001055 if (cfg->fc_metric == 0)
1056 cfg->fc_metric = IP6_RT_PRIO_USER;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001057
Thomas Graf86872cb2006-08-22 00:01:08 -07001058 table = fib6_new_table(cfg->fc_table);
Thomas Grafc71099a2006-08-04 23:20:06 -07001059 if (table == NULL) {
1060 err = -ENOBUFS;
1061 goto out;
1062 }
1063
Linus Torvalds1da177e2005-04-16 15:20:36 -07001064 rt = ip6_dst_alloc();
1065
1066 if (rt == NULL) {
1067 err = -ENOMEM;
1068 goto out;
1069 }
1070
1071 rt->u.dst.obsolete = -1;
Thomas Graf86872cb2006-08-22 00:01:08 -07001072 rt->rt6i_expires = jiffies + clock_t_to_jiffies(cfg->fc_expires);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001073
Thomas Graf86872cb2006-08-22 00:01:08 -07001074 if (cfg->fc_protocol == RTPROT_UNSPEC)
1075 cfg->fc_protocol = RTPROT_BOOT;
1076 rt->rt6i_protocol = cfg->fc_protocol;
1077
1078 addr_type = ipv6_addr_type(&cfg->fc_dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001079
1080 if (addr_type & IPV6_ADDR_MULTICAST)
1081 rt->u.dst.input = ip6_mc_input;
1082 else
1083 rt->u.dst.input = ip6_forward;
1084
1085 rt->u.dst.output = ip6_output;
1086
Thomas Graf86872cb2006-08-22 00:01:08 -07001087 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1088 rt->rt6i_dst.plen = cfg->fc_dst_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001089 if (rt->rt6i_dst.plen == 128)
1090 rt->u.dst.flags = DST_HOST;
1091
1092#ifdef CONFIG_IPV6_SUBTREES
Thomas Graf86872cb2006-08-22 00:01:08 -07001093 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1094 rt->rt6i_src.plen = cfg->fc_src_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001095#endif
1096
Thomas Graf86872cb2006-08-22 00:01:08 -07001097 rt->rt6i_metric = cfg->fc_metric;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001098
1099 /* We cannot add true routes via loopback here,
1100 they would result in kernel looping; promote them to reject routes
1101 */
Thomas Graf86872cb2006-08-22 00:01:08 -07001102 if ((cfg->fc_flags & RTF_REJECT) ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07001103 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
1104 /* hold loopback dev/idev if we haven't done so. */
1105 if (dev != &loopback_dev) {
1106 if (dev) {
1107 dev_put(dev);
1108 in6_dev_put(idev);
1109 }
1110 dev = &loopback_dev;
1111 dev_hold(dev);
1112 idev = in6_dev_get(dev);
1113 if (!idev) {
1114 err = -ENODEV;
1115 goto out;
1116 }
1117 }
1118 rt->u.dst.output = ip6_pkt_discard_out;
1119 rt->u.dst.input = ip6_pkt_discard;
1120 rt->u.dst.error = -ENETUNREACH;
1121 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1122 goto install_route;
1123 }
1124
Thomas Graf86872cb2006-08-22 00:01:08 -07001125 if (cfg->fc_flags & RTF_GATEWAY) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001126 struct in6_addr *gw_addr;
1127 int gwa_type;
1128
Thomas Graf86872cb2006-08-22 00:01:08 -07001129 gw_addr = &cfg->fc_gateway;
1130 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001131 gwa_type = ipv6_addr_type(gw_addr);
1132
1133 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1134 struct rt6_info *grt;
1135
1136 /* IPv6 strictly inhibits using not link-local
1137 addresses as nexthop address.
1138 Otherwise, router will not able to send redirects.
1139 It is very good, but in some (rare!) circumstances
1140 (SIT, PtP, NBMA NOARP links) it is handy to allow
1141 some exceptions. --ANK
1142 */
1143 err = -EINVAL;
1144 if (!(gwa_type&IPV6_ADDR_UNICAST))
1145 goto out;
1146
Thomas Graf86872cb2006-08-22 00:01:08 -07001147 grt = rt6_lookup(gw_addr, NULL, cfg->fc_ifindex, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001148
1149 err = -EHOSTUNREACH;
1150 if (grt == NULL)
1151 goto out;
1152 if (dev) {
1153 if (dev != grt->rt6i_dev) {
1154 dst_release(&grt->u.dst);
1155 goto out;
1156 }
1157 } else {
1158 dev = grt->rt6i_dev;
1159 idev = grt->rt6i_idev;
1160 dev_hold(dev);
1161 in6_dev_hold(grt->rt6i_idev);
1162 }
1163 if (!(grt->rt6i_flags&RTF_GATEWAY))
1164 err = 0;
1165 dst_release(&grt->u.dst);
1166
1167 if (err)
1168 goto out;
1169 }
1170 err = -EINVAL;
1171 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1172 goto out;
1173 }
1174
1175 err = -ENODEV;
1176 if (dev == NULL)
1177 goto out;
1178
Thomas Graf86872cb2006-08-22 00:01:08 -07001179 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001180 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1181 if (IS_ERR(rt->rt6i_nexthop)) {
1182 err = PTR_ERR(rt->rt6i_nexthop);
1183 rt->rt6i_nexthop = NULL;
1184 goto out;
1185 }
1186 }
1187
Thomas Graf86872cb2006-08-22 00:01:08 -07001188 rt->rt6i_flags = cfg->fc_flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001189
1190install_route:
Thomas Graf86872cb2006-08-22 00:01:08 -07001191 if (cfg->fc_mx) {
1192 struct nlattr *nla;
1193 int remaining;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001194
Thomas Graf86872cb2006-08-22 00:01:08 -07001195 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1196 int type = nla->nla_type;
1197
1198 if (type) {
1199 if (type > RTAX_MAX) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001200 err = -EINVAL;
1201 goto out;
1202 }
Thomas Graf86872cb2006-08-22 00:01:08 -07001203
1204 rt->u.dst.metrics[type - 1] = nla_get_u32(nla);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001205 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001206 }
1207 }
1208
1209 if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
1210 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1211 if (!rt->u.dst.metrics[RTAX_MTU-1])
1212 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1213 if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
1214 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1215 rt->u.dst.dev = dev;
1216 rt->rt6i_idev = idev;
Thomas Grafc71099a2006-08-04 23:20:06 -07001217 rt->rt6i_table = table;
Thomas Graf86872cb2006-08-22 00:01:08 -07001218 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001219
1220out:
1221 if (dev)
1222 dev_put(dev);
1223 if (idev)
1224 in6_dev_put(idev);
1225 if (rt)
1226 dst_free((struct dst_entry *) rt);
1227 return err;
1228}
1229
Thomas Graf86872cb2006-08-22 00:01:08 -07001230static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001231{
1232 int err;
Thomas Grafc71099a2006-08-04 23:20:06 -07001233 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001234
Patrick McHardy6c813a72006-08-06 22:22:47 -07001235 if (rt == &ip6_null_entry)
1236 return -ENOENT;
1237
Thomas Grafc71099a2006-08-04 23:20:06 -07001238 table = rt->rt6i_table;
1239 write_lock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001240
Thomas Graf86872cb2006-08-22 00:01:08 -07001241 err = fib6_del(rt, info);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001242 dst_release(&rt->u.dst);
1243
Thomas Grafc71099a2006-08-04 23:20:06 -07001244 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001245
1246 return err;
1247}
1248
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001249int ip6_del_rt(struct rt6_info *rt)
1250{
Thomas Graf86872cb2006-08-22 00:01:08 -07001251 return __ip6_del_rt(rt, NULL);
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001252}
1253
Thomas Graf86872cb2006-08-22 00:01:08 -07001254static int ip6_route_del(struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001255{
Thomas Grafc71099a2006-08-04 23:20:06 -07001256 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001257 struct fib6_node *fn;
1258 struct rt6_info *rt;
1259 int err = -ESRCH;
1260
Thomas Graf86872cb2006-08-22 00:01:08 -07001261 table = fib6_get_table(cfg->fc_table);
Thomas Grafc71099a2006-08-04 23:20:06 -07001262 if (table == NULL)
1263 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001264
Thomas Grafc71099a2006-08-04 23:20:06 -07001265 read_lock_bh(&table->tb6_lock);
1266
1267 fn = fib6_locate(&table->tb6_root,
Thomas Graf86872cb2006-08-22 00:01:08 -07001268 &cfg->fc_dst, cfg->fc_dst_len,
1269 &cfg->fc_src, cfg->fc_src_len);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001270
1271 if (fn) {
1272 for (rt = fn->leaf; rt; rt = rt->u.next) {
Thomas Graf86872cb2006-08-22 00:01:08 -07001273 if (cfg->fc_ifindex &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001274 (rt->rt6i_dev == NULL ||
Thomas Graf86872cb2006-08-22 00:01:08 -07001275 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001276 continue;
Thomas Graf86872cb2006-08-22 00:01:08 -07001277 if (cfg->fc_flags & RTF_GATEWAY &&
1278 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001279 continue;
Thomas Graf86872cb2006-08-22 00:01:08 -07001280 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001281 continue;
1282 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001283 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001284
Thomas Graf86872cb2006-08-22 00:01:08 -07001285 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001286 }
1287 }
Thomas Grafc71099a2006-08-04 23:20:06 -07001288 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001289
1290 return err;
1291}
1292
1293/*
1294 * Handle redirects
1295 */
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001296struct ip6rd_flowi {
1297 struct flowi fl;
1298 struct in6_addr gateway;
1299};
Linus Torvalds1da177e2005-04-16 15:20:36 -07001300
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001301static struct rt6_info *__ip6_route_redirect(struct fib6_table *table,
1302 struct flowi *fl,
1303 int flags)
1304{
1305 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1306 struct rt6_info *rt;
1307 struct fib6_node *fn;
Thomas Grafc71099a2006-08-04 23:20:06 -07001308
Linus Torvalds1da177e2005-04-16 15:20:36 -07001309 /*
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001310 * Get the "current" route for this destination and
1311 * check if the redirect has come from approriate router.
1312 *
1313 * RFC 2461 specifies that redirects should only be
1314 * accepted if they come from the nexthop to the target.
1315 * Due to the way the routes are chosen, this notion
1316 * is a bit fuzzy and one might need to check all possible
1317 * routes.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001318 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001319
Thomas Grafc71099a2006-08-04 23:20:06 -07001320 read_lock_bh(&table->tb6_lock);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001321 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001322restart:
1323 for (rt = fn->leaf; rt; rt = rt->u.next) {
1324 /*
1325 * Current route is on-link; redirect is always invalid.
1326 *
1327 * Seems, previous statement is not true. It could
1328 * be node, which looks for us as on-link (f.e. proxy ndisc)
1329 * But then router serving it might decide, that we should
1330 * know truth 8)8) --ANK (980726).
1331 */
1332 if (rt6_check_expired(rt))
1333 continue;
1334 if (!(rt->rt6i_flags & RTF_GATEWAY))
1335 continue;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001336 if (fl->oif != rt->rt6i_dev->ifindex)
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001337 continue;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001338 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001339 continue;
1340 break;
1341 }
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001342
YOSHIFUJI Hideakicb15d9c2006-08-23 17:23:11 -07001343 if (!rt)
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001344 rt = &ip6_null_entry;
YOSHIFUJI Hideakicb15d9c2006-08-23 17:23:11 -07001345 BACKTRACK(&fl->fl6_src);
1346out:
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001347 dst_hold(&rt->u.dst);
1348
1349 read_unlock_bh(&table->tb6_lock);
1350
1351 return rt;
1352};
1353
1354static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1355 struct in6_addr *src,
1356 struct in6_addr *gateway,
1357 struct net_device *dev)
1358{
Thomas Grafadaa70b2006-10-13 15:01:03 -07001359 int flags = RT6_LOOKUP_F_HAS_SADDR;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001360 struct ip6rd_flowi rdfl = {
1361 .fl = {
1362 .oif = dev->ifindex,
1363 .nl_u = {
1364 .ip6_u = {
1365 .daddr = *dest,
1366 .saddr = *src,
1367 },
1368 },
1369 },
1370 .gateway = *gateway,
1371 };
Thomas Grafadaa70b2006-10-13 15:01:03 -07001372
1373 if (rt6_need_strict(dest))
1374 flags |= RT6_LOOKUP_F_IFACE;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001375
1376 return (struct rt6_info *)fib6_rule_lookup((struct flowi *)&rdfl, flags, __ip6_route_redirect);
1377}
1378
1379void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1380 struct in6_addr *saddr,
1381 struct neighbour *neigh, u8 *lladdr, int on_link)
1382{
1383 struct rt6_info *rt, *nrt = NULL;
1384 struct netevent_redirect netevent;
1385
1386 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1387
1388 if (rt == &ip6_null_entry) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001389 if (net_ratelimit())
1390 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1391 "for redirect target\n");
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001392 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001393 }
1394
Linus Torvalds1da177e2005-04-16 15:20:36 -07001395 /*
1396 * We have finally decided to accept it.
1397 */
1398
1399 neigh_update(neigh, lladdr, NUD_STALE,
1400 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1401 NEIGH_UPDATE_F_OVERRIDE|
1402 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1403 NEIGH_UPDATE_F_ISROUTER))
1404 );
1405
1406 /*
1407 * Redirect received -> path was valid.
1408 * Look, redirects are sent only in response to data packets,
1409 * so that this nexthop apparently is reachable. --ANK
1410 */
1411 dst_confirm(&rt->u.dst);
1412
1413 /* Duplicate redirect: silently ignore. */
1414 if (neigh == rt->u.dst.neighbour)
1415 goto out;
1416
1417 nrt = ip6_rt_copy(rt);
1418 if (nrt == NULL)
1419 goto out;
1420
1421 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1422 if (on_link)
1423 nrt->rt6i_flags &= ~RTF_GATEWAY;
1424
1425 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1426 nrt->rt6i_dst.plen = 128;
1427 nrt->u.dst.flags |= DST_HOST;
1428
1429 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1430 nrt->rt6i_nexthop = neigh_clone(neigh);
1431 /* Reset pmtu, it may be better */
1432 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1433 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1434
Thomas Graf40e22e82006-08-22 00:00:45 -07001435 if (ip6_ins_rt(nrt))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001436 goto out;
1437
Tom Tucker8d717402006-07-30 20:43:36 -07001438 netevent.old = &rt->u.dst;
1439 netevent.new = &nrt->u.dst;
1440 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1441
Linus Torvalds1da177e2005-04-16 15:20:36 -07001442 if (rt->rt6i_flags&RTF_CACHE) {
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001443 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001444 return;
1445 }
1446
1447out:
1448 dst_release(&rt->u.dst);
1449 return;
1450}
1451
1452/*
1453 * Handle ICMP "packet too big" messages
1454 * i.e. Path MTU discovery
1455 */
1456
1457void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1458 struct net_device *dev, u32 pmtu)
1459{
1460 struct rt6_info *rt, *nrt;
1461 int allfrag = 0;
1462
1463 rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1464 if (rt == NULL)
1465 return;
1466
1467 if (pmtu >= dst_mtu(&rt->u.dst))
1468 goto out;
1469
1470 if (pmtu < IPV6_MIN_MTU) {
1471 /*
1472 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1473 * MTU (1280) and a fragment header should always be included
1474 * after a node receiving Too Big message reporting PMTU is
1475 * less than the IPv6 Minimum Link MTU.
1476 */
1477 pmtu = IPV6_MIN_MTU;
1478 allfrag = 1;
1479 }
1480
1481 /* New mtu received -> path was valid.
1482 They are sent only in response to data packets,
1483 so that this nexthop apparently is reachable. --ANK
1484 */
1485 dst_confirm(&rt->u.dst);
1486
1487 /* Host route. If it is static, it would be better
1488 not to override it, but add new one, so that
1489 when cache entry will expire old pmtu
1490 would return automatically.
1491 */
1492 if (rt->rt6i_flags & RTF_CACHE) {
1493 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1494 if (allfrag)
1495 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1496 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1497 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1498 goto out;
1499 }
1500
1501 /* Network route.
1502 Two cases are possible:
1503 1. It is connected route. Action: COW
1504 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1505 */
YOSHIFUJI Hideakid5315b52006-03-20 16:58:48 -08001506 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001507 nrt = rt6_alloc_cow(rt, daddr, saddr);
YOSHIFUJI Hideakid5315b52006-03-20 16:58:48 -08001508 else
1509 nrt = rt6_alloc_clone(rt, daddr);
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001510
YOSHIFUJI Hideakid5315b52006-03-20 16:58:48 -08001511 if (nrt) {
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001512 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1513 if (allfrag)
1514 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1515
1516 /* According to RFC 1981, detecting PMTU increase shouldn't be
1517 * happened within 5 mins, the recommended timer is 10 mins.
1518 * Here this route expiration time is set to ip6_rt_mtu_expires
1519 * which is 10 mins. After 10 mins the decreased pmtu is expired
1520 * and detecting PMTU increase will be automatically happened.
1521 */
1522 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1523 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1524
Thomas Graf40e22e82006-08-22 00:00:45 -07001525 ip6_ins_rt(nrt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001526 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001527out:
1528 dst_release(&rt->u.dst);
1529}
1530
1531/*
1532 * Misc support functions
1533 */
1534
1535static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1536{
1537 struct rt6_info *rt = ip6_dst_alloc();
1538
1539 if (rt) {
1540 rt->u.dst.input = ort->u.dst.input;
1541 rt->u.dst.output = ort->u.dst.output;
1542
1543 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
Ville Nuorvala22e1e4d2006-10-16 22:14:26 -07001544 rt->u.dst.error = ort->u.dst.error;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001545 rt->u.dst.dev = ort->u.dst.dev;
1546 if (rt->u.dst.dev)
1547 dev_hold(rt->u.dst.dev);
1548 rt->rt6i_idev = ort->rt6i_idev;
1549 if (rt->rt6i_idev)
1550 in6_dev_hold(rt->rt6i_idev);
1551 rt->u.dst.lastuse = jiffies;
1552 rt->rt6i_expires = 0;
1553
1554 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1555 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1556 rt->rt6i_metric = 0;
1557
1558 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1559#ifdef CONFIG_IPV6_SUBTREES
1560 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1561#endif
Thomas Grafc71099a2006-08-04 23:20:06 -07001562 rt->rt6i_table = ort->rt6i_table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001563 }
1564 return rt;
1565}
1566
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001567#ifdef CONFIG_IPV6_ROUTE_INFO
1568static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
1569 struct in6_addr *gwaddr, int ifindex)
1570{
1571 struct fib6_node *fn;
1572 struct rt6_info *rt = NULL;
Thomas Grafc71099a2006-08-04 23:20:06 -07001573 struct fib6_table *table;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001574
Thomas Grafc71099a2006-08-04 23:20:06 -07001575 table = fib6_get_table(RT6_TABLE_INFO);
1576 if (table == NULL)
1577 return NULL;
1578
1579 write_lock_bh(&table->tb6_lock);
1580 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001581 if (!fn)
1582 goto out;
1583
1584 for (rt = fn->leaf; rt; rt = rt->u.next) {
1585 if (rt->rt6i_dev->ifindex != ifindex)
1586 continue;
1587 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1588 continue;
1589 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1590 continue;
1591 dst_hold(&rt->u.dst);
1592 break;
1593 }
1594out:
Thomas Grafc71099a2006-08-04 23:20:06 -07001595 write_unlock_bh(&table->tb6_lock);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001596 return rt;
1597}
1598
1599static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
1600 struct in6_addr *gwaddr, int ifindex,
1601 unsigned pref)
1602{
Thomas Graf86872cb2006-08-22 00:01:08 -07001603 struct fib6_config cfg = {
1604 .fc_table = RT6_TABLE_INFO,
1605 .fc_metric = 1024,
1606 .fc_ifindex = ifindex,
1607 .fc_dst_len = prefixlen,
1608 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1609 RTF_UP | RTF_PREF(pref),
1610 };
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001611
Thomas Graf86872cb2006-08-22 00:01:08 -07001612 ipv6_addr_copy(&cfg.fc_dst, prefix);
1613 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1614
YOSHIFUJI Hideakie317da92006-03-20 17:06:42 -08001615 /* We should treat it as a default route if prefix length is 0. */
1616 if (!prefixlen)
Thomas Graf86872cb2006-08-22 00:01:08 -07001617 cfg.fc_flags |= RTF_DEFAULT;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001618
Thomas Graf86872cb2006-08-22 00:01:08 -07001619 ip6_route_add(&cfg);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001620
1621 return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex);
1622}
1623#endif
1624
Linus Torvalds1da177e2005-04-16 15:20:36 -07001625struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1626{
1627 struct rt6_info *rt;
Thomas Grafc71099a2006-08-04 23:20:06 -07001628 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001629
Thomas Grafc71099a2006-08-04 23:20:06 -07001630 table = fib6_get_table(RT6_TABLE_DFLT);
1631 if (table == NULL)
1632 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001633
Thomas Grafc71099a2006-08-04 23:20:06 -07001634 write_lock_bh(&table->tb6_lock);
1635 for (rt = table->tb6_root.leaf; rt; rt=rt->u.next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001636 if (dev == rt->rt6i_dev &&
YOSHIFUJI Hideaki045927f2006-03-20 17:00:48 -08001637 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001638 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1639 break;
1640 }
1641 if (rt)
1642 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001643 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001644 return rt;
1645}
1646
1647struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -08001648 struct net_device *dev,
1649 unsigned int pref)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001650{
Thomas Graf86872cb2006-08-22 00:01:08 -07001651 struct fib6_config cfg = {
1652 .fc_table = RT6_TABLE_DFLT,
1653 .fc_metric = 1024,
1654 .fc_ifindex = dev->ifindex,
1655 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1656 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1657 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07001658
Thomas Graf86872cb2006-08-22 00:01:08 -07001659 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001660
Thomas Graf86872cb2006-08-22 00:01:08 -07001661 ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001662
Linus Torvalds1da177e2005-04-16 15:20:36 -07001663 return rt6_get_dflt_router(gwaddr, dev);
1664}
1665
1666void rt6_purge_dflt_routers(void)
1667{
1668 struct rt6_info *rt;
Thomas Grafc71099a2006-08-04 23:20:06 -07001669 struct fib6_table *table;
1670
1671 /* NOTE: Keep consistent with rt6_get_dflt_router */
1672 table = fib6_get_table(RT6_TABLE_DFLT);
1673 if (table == NULL)
1674 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001675
1676restart:
Thomas Grafc71099a2006-08-04 23:20:06 -07001677 read_lock_bh(&table->tb6_lock);
1678 for (rt = table->tb6_root.leaf; rt; rt = rt->u.next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001679 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1680 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001681 read_unlock_bh(&table->tb6_lock);
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001682 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001683 goto restart;
1684 }
1685 }
Thomas Grafc71099a2006-08-04 23:20:06 -07001686 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001687}
1688
Thomas Graf86872cb2006-08-22 00:01:08 -07001689static void rtmsg_to_fib6_config(struct in6_rtmsg *rtmsg,
1690 struct fib6_config *cfg)
1691{
1692 memset(cfg, 0, sizeof(*cfg));
1693
1694 cfg->fc_table = RT6_TABLE_MAIN;
1695 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1696 cfg->fc_metric = rtmsg->rtmsg_metric;
1697 cfg->fc_expires = rtmsg->rtmsg_info;
1698 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1699 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1700 cfg->fc_flags = rtmsg->rtmsg_flags;
1701
1702 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1703 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1704 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1705}
1706
Linus Torvalds1da177e2005-04-16 15:20:36 -07001707int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1708{
Thomas Graf86872cb2006-08-22 00:01:08 -07001709 struct fib6_config cfg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001710 struct in6_rtmsg rtmsg;
1711 int err;
1712
1713 switch(cmd) {
1714 case SIOCADDRT: /* Add a route */
1715 case SIOCDELRT: /* Delete a route */
1716 if (!capable(CAP_NET_ADMIN))
1717 return -EPERM;
1718 err = copy_from_user(&rtmsg, arg,
1719 sizeof(struct in6_rtmsg));
1720 if (err)
1721 return -EFAULT;
Thomas Graf86872cb2006-08-22 00:01:08 -07001722
1723 rtmsg_to_fib6_config(&rtmsg, &cfg);
1724
Linus Torvalds1da177e2005-04-16 15:20:36 -07001725 rtnl_lock();
1726 switch (cmd) {
1727 case SIOCADDRT:
Thomas Graf86872cb2006-08-22 00:01:08 -07001728 err = ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001729 break;
1730 case SIOCDELRT:
Thomas Graf86872cb2006-08-22 00:01:08 -07001731 err = ip6_route_del(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001732 break;
1733 default:
1734 err = -EINVAL;
1735 }
1736 rtnl_unlock();
1737
1738 return err;
1739 };
1740
1741 return -EINVAL;
1742}
1743
1744/*
1745 * Drop the packet on the floor
1746 */
1747
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001748static inline int ip6_pkt_drop(struct sk_buff *skb, int code)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001749{
Lv Liangying76d0cc12006-08-29 00:00:47 -07001750 int type = ipv6_addr_type(&skb->nh.ipv6h->daddr);
1751 if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED)
1752 IP6_INC_STATS(IPSTATS_MIB_INADDRERRORS);
1753
Linus Torvalds1da177e2005-04-16 15:20:36 -07001754 IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001755 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0, skb->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001756 kfree_skb(skb);
1757 return 0;
1758}
1759
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001760static int ip6_pkt_discard(struct sk_buff *skb)
1761{
1762 return ip6_pkt_drop(skb, ICMPV6_NOROUTE);
1763}
1764
Arnaldo Carvalho de Melo20380732005-08-16 02:18:02 -03001765static int ip6_pkt_discard_out(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001766{
1767 skb->dev = skb->dst->dev;
1768 return ip6_pkt_discard(skb);
1769}
1770
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001771static int ip6_pkt_prohibit(struct sk_buff *skb)
1772{
1773 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED);
1774}
1775
1776static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1777{
1778 skb->dev = skb->dst->dev;
1779 return ip6_pkt_prohibit(skb);
1780}
1781
1782static int ip6_pkt_blk_hole(struct sk_buff *skb)
1783{
1784 kfree_skb(skb);
1785 return 0;
1786}
1787
Linus Torvalds1da177e2005-04-16 15:20:36 -07001788/*
1789 * Allocate a dst for local (unicast / anycast) address.
1790 */
1791
1792struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1793 const struct in6_addr *addr,
1794 int anycast)
1795{
1796 struct rt6_info *rt = ip6_dst_alloc();
1797
1798 if (rt == NULL)
1799 return ERR_PTR(-ENOMEM);
1800
1801 dev_hold(&loopback_dev);
1802 in6_dev_hold(idev);
1803
1804 rt->u.dst.flags = DST_HOST;
1805 rt->u.dst.input = ip6_input;
1806 rt->u.dst.output = ip6_output;
1807 rt->rt6i_dev = &loopback_dev;
1808 rt->rt6i_idev = idev;
1809 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1810 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1811 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1812 rt->u.dst.obsolete = -1;
1813
1814 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +09001815 if (anycast)
1816 rt->rt6i_flags |= RTF_ANYCAST;
1817 else
Linus Torvalds1da177e2005-04-16 15:20:36 -07001818 rt->rt6i_flags |= RTF_LOCAL;
1819 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1820 if (rt->rt6i_nexthop == NULL) {
1821 dst_free((struct dst_entry *) rt);
1822 return ERR_PTR(-ENOMEM);
1823 }
1824
1825 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1826 rt->rt6i_dst.plen = 128;
Thomas Grafc71099a2006-08-04 23:20:06 -07001827 rt->rt6i_table = fib6_get_table(RT6_TABLE_LOCAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001828
1829 atomic_set(&rt->u.dst.__refcnt, 1);
1830
1831 return rt;
1832}
1833
1834static int fib6_ifdown(struct rt6_info *rt, void *arg)
1835{
1836 if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1837 rt != &ip6_null_entry) {
1838 RT6_TRACE("deleted by ifdown %p\n", rt);
1839 return -1;
1840 }
1841 return 0;
1842}
1843
1844void rt6_ifdown(struct net_device *dev)
1845{
Thomas Grafc71099a2006-08-04 23:20:06 -07001846 fib6_clean_all(fib6_ifdown, 0, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001847}
1848
1849struct rt6_mtu_change_arg
1850{
1851 struct net_device *dev;
1852 unsigned mtu;
1853};
1854
1855static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1856{
1857 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1858 struct inet6_dev *idev;
1859
1860 /* In IPv6 pmtu discovery is not optional,
1861 so that RTAX_MTU lock cannot disable it.
1862 We still use this lock to block changes
1863 caused by addrconf/ndisc.
1864 */
1865
1866 idev = __in6_dev_get(arg->dev);
1867 if (idev == NULL)
1868 return 0;
1869
1870 /* For administrative MTU increase, there is no way to discover
1871 IPv6 PMTU increase, so PMTU increase should be updated here.
1872 Since RFC 1981 doesn't include administrative MTU increase
1873 update PMTU increase is a MUST. (i.e. jumbo frame)
1874 */
1875 /*
1876 If new MTU is less than route PMTU, this new MTU will be the
1877 lowest MTU in the path, update the route PMTU to reflect PMTU
1878 decreases; if new MTU is greater than route PMTU, and the
1879 old MTU is the lowest MTU in the path, update the route PMTU
1880 to reflect the increase. In this case if the other nodes' MTU
1881 also have the lowest MTU, TOO BIG MESSAGE will be lead to
1882 PMTU discouvery.
1883 */
1884 if (rt->rt6i_dev == arg->dev &&
1885 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1886 (dst_mtu(&rt->u.dst) > arg->mtu ||
1887 (dst_mtu(&rt->u.dst) < arg->mtu &&
1888 dst_mtu(&rt->u.dst) == idev->cnf.mtu6)))
1889 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1890 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1891 return 0;
1892}
1893
1894void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1895{
Thomas Grafc71099a2006-08-04 23:20:06 -07001896 struct rt6_mtu_change_arg arg = {
1897 .dev = dev,
1898 .mtu = mtu,
1899 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07001900
Thomas Grafc71099a2006-08-04 23:20:06 -07001901 fib6_clean_all(rt6_mtu_change_route, 0, &arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001902}
1903
Thomas Graf86872cb2006-08-22 00:01:08 -07001904static struct nla_policy rtm_ipv6_policy[RTA_MAX+1] __read_mostly = {
Thomas Graf5176f912006-08-26 20:13:18 -07001905 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
Thomas Graf86872cb2006-08-22 00:01:08 -07001906 [RTA_OIF] = { .type = NLA_U32 },
Thomas Grafab364a62006-08-22 00:01:47 -07001907 [RTA_IIF] = { .type = NLA_U32 },
Thomas Graf86872cb2006-08-22 00:01:08 -07001908 [RTA_PRIORITY] = { .type = NLA_U32 },
1909 [RTA_METRICS] = { .type = NLA_NESTED },
1910};
1911
1912static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
1913 struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001914{
Thomas Graf86872cb2006-08-22 00:01:08 -07001915 struct rtmsg *rtm;
1916 struct nlattr *tb[RTA_MAX+1];
1917 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001918
Thomas Graf86872cb2006-08-22 00:01:08 -07001919 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
1920 if (err < 0)
1921 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001922
Thomas Graf86872cb2006-08-22 00:01:08 -07001923 err = -EINVAL;
1924 rtm = nlmsg_data(nlh);
1925 memset(cfg, 0, sizeof(*cfg));
1926
1927 cfg->fc_table = rtm->rtm_table;
1928 cfg->fc_dst_len = rtm->rtm_dst_len;
1929 cfg->fc_src_len = rtm->rtm_src_len;
1930 cfg->fc_flags = RTF_UP;
1931 cfg->fc_protocol = rtm->rtm_protocol;
1932
1933 if (rtm->rtm_type == RTN_UNREACHABLE)
1934 cfg->fc_flags |= RTF_REJECT;
1935
1936 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
1937 cfg->fc_nlinfo.nlh = nlh;
1938
1939 if (tb[RTA_GATEWAY]) {
1940 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
1941 cfg->fc_flags |= RTF_GATEWAY;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001942 }
Thomas Graf86872cb2006-08-22 00:01:08 -07001943
1944 if (tb[RTA_DST]) {
1945 int plen = (rtm->rtm_dst_len + 7) >> 3;
1946
1947 if (nla_len(tb[RTA_DST]) < plen)
1948 goto errout;
1949
1950 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001951 }
Thomas Graf86872cb2006-08-22 00:01:08 -07001952
1953 if (tb[RTA_SRC]) {
1954 int plen = (rtm->rtm_src_len + 7) >> 3;
1955
1956 if (nla_len(tb[RTA_SRC]) < plen)
1957 goto errout;
1958
1959 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001960 }
Thomas Graf86872cb2006-08-22 00:01:08 -07001961
1962 if (tb[RTA_OIF])
1963 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
1964
1965 if (tb[RTA_PRIORITY])
1966 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
1967
1968 if (tb[RTA_METRICS]) {
1969 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
1970 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001971 }
Thomas Graf86872cb2006-08-22 00:01:08 -07001972
1973 if (tb[RTA_TABLE])
1974 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
1975
1976 err = 0;
1977errout:
1978 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001979}
1980
1981int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1982{
Thomas Graf86872cb2006-08-22 00:01:08 -07001983 struct fib6_config cfg;
1984 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001985
Thomas Graf86872cb2006-08-22 00:01:08 -07001986 err = rtm_to_fib6_config(skb, nlh, &cfg);
1987 if (err < 0)
1988 return err;
1989
1990 return ip6_route_del(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001991}
1992
1993int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1994{
Thomas Graf86872cb2006-08-22 00:01:08 -07001995 struct fib6_config cfg;
1996 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001997
Thomas Graf86872cb2006-08-22 00:01:08 -07001998 err = rtm_to_fib6_config(skb, nlh, &cfg);
1999 if (err < 0)
2000 return err;
2001
2002 return ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002003}
2004
Linus Torvalds1da177e2005-04-16 15:20:36 -07002005static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07002006 struct in6_addr *dst, struct in6_addr *src,
2007 int iif, int type, u32 pid, u32 seq,
2008 int prefix, unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002009{
2010 struct rtmsg *rtm;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002011 struct nlmsghdr *nlh;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002012 struct rta_cacheinfo ci;
Patrick McHardy9e762a42006-08-10 23:09:48 -07002013 u32 table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002014
2015 if (prefix) { /* user wants prefix routes only */
2016 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2017 /* success since this is not a prefix route */
2018 return 1;
2019 }
2020 }
2021
Thomas Graf2d7202b2006-08-22 00:01:27 -07002022 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2023 if (nlh == NULL)
2024 return -ENOBUFS;
2025
2026 rtm = nlmsg_data(nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002027 rtm->rtm_family = AF_INET6;
2028 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2029 rtm->rtm_src_len = rt->rt6i_src.plen;
2030 rtm->rtm_tos = 0;
Thomas Grafc71099a2006-08-04 23:20:06 -07002031 if (rt->rt6i_table)
Patrick McHardy9e762a42006-08-10 23:09:48 -07002032 table = rt->rt6i_table->tb6_id;
Thomas Grafc71099a2006-08-04 23:20:06 -07002033 else
Patrick McHardy9e762a42006-08-10 23:09:48 -07002034 table = RT6_TABLE_UNSPEC;
2035 rtm->rtm_table = table;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002036 NLA_PUT_U32(skb, RTA_TABLE, table);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002037 if (rt->rt6i_flags&RTF_REJECT)
2038 rtm->rtm_type = RTN_UNREACHABLE;
2039 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2040 rtm->rtm_type = RTN_LOCAL;
2041 else
2042 rtm->rtm_type = RTN_UNICAST;
2043 rtm->rtm_flags = 0;
2044 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2045 rtm->rtm_protocol = rt->rt6i_protocol;
2046 if (rt->rt6i_flags&RTF_DYNAMIC)
2047 rtm->rtm_protocol = RTPROT_REDIRECT;
2048 else if (rt->rt6i_flags & RTF_ADDRCONF)
2049 rtm->rtm_protocol = RTPROT_KERNEL;
2050 else if (rt->rt6i_flags&RTF_DEFAULT)
2051 rtm->rtm_protocol = RTPROT_RA;
2052
2053 if (rt->rt6i_flags&RTF_CACHE)
2054 rtm->rtm_flags |= RTM_F_CLONED;
2055
2056 if (dst) {
Thomas Graf2d7202b2006-08-22 00:01:27 -07002057 NLA_PUT(skb, RTA_DST, 16, dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002058 rtm->rtm_dst_len = 128;
2059 } else if (rtm->rtm_dst_len)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002060 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002061#ifdef CONFIG_IPV6_SUBTREES
2062 if (src) {
Thomas Graf2d7202b2006-08-22 00:01:27 -07002063 NLA_PUT(skb, RTA_SRC, 16, src);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002064 rtm->rtm_src_len = 128;
2065 } else if (rtm->rtm_src_len)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002066 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002067#endif
2068 if (iif)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002069 NLA_PUT_U32(skb, RTA_IIF, iif);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002070 else if (dst) {
2071 struct in6_addr saddr_buf;
2072 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002073 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002074 }
Thomas Graf2d7202b2006-08-22 00:01:27 -07002075
Linus Torvalds1da177e2005-04-16 15:20:36 -07002076 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002077 goto nla_put_failure;
2078
Linus Torvalds1da177e2005-04-16 15:20:36 -07002079 if (rt->u.dst.neighbour)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002080 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
2081
Linus Torvalds1da177e2005-04-16 15:20:36 -07002082 if (rt->u.dst.dev)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002083 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2084
2085 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002086 ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
2087 if (rt->rt6i_expires)
2088 ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
2089 else
2090 ci.rta_expires = 0;
2091 ci.rta_used = rt->u.dst.__use;
2092 ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
2093 ci.rta_error = rt->u.dst.error;
2094 ci.rta_id = 0;
2095 ci.rta_ts = 0;
2096 ci.rta_tsage = 0;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002097 NLA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002098
Thomas Graf2d7202b2006-08-22 00:01:27 -07002099 return nlmsg_end(skb, nlh);
2100
2101nla_put_failure:
2102 return nlmsg_cancel(skb, nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002103}
2104
Patrick McHardy1b43af52006-08-10 23:11:17 -07002105int rt6_dump_route(struct rt6_info *rt, void *p_arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002106{
2107 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2108 int prefix;
2109
Thomas Graf2d7202b2006-08-22 00:01:27 -07002110 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2111 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002112 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2113 } else
2114 prefix = 0;
2115
2116 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2117 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07002118 prefix, NLM_F_MULTI);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002119}
2120
Linus Torvalds1da177e2005-04-16 15:20:36 -07002121int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2122{
Thomas Grafab364a62006-08-22 00:01:47 -07002123 struct nlattr *tb[RTA_MAX+1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07002124 struct rt6_info *rt;
Thomas Grafab364a62006-08-22 00:01:47 -07002125 struct sk_buff *skb;
2126 struct rtmsg *rtm;
2127 struct flowi fl;
2128 int err, iif = 0;
2129
2130 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2131 if (err < 0)
2132 goto errout;
2133
2134 err = -EINVAL;
2135 memset(&fl, 0, sizeof(fl));
2136
2137 if (tb[RTA_SRC]) {
2138 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2139 goto errout;
2140
2141 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2142 }
2143
2144 if (tb[RTA_DST]) {
2145 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2146 goto errout;
2147
2148 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2149 }
2150
2151 if (tb[RTA_IIF])
2152 iif = nla_get_u32(tb[RTA_IIF]);
2153
2154 if (tb[RTA_OIF])
2155 fl.oif = nla_get_u32(tb[RTA_OIF]);
2156
2157 if (iif) {
2158 struct net_device *dev;
2159 dev = __dev_get_by_index(iif);
2160 if (!dev) {
2161 err = -ENODEV;
2162 goto errout;
2163 }
2164 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002165
2166 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
Thomas Grafab364a62006-08-22 00:01:47 -07002167 if (skb == NULL) {
2168 err = -ENOBUFS;
2169 goto errout;
2170 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002171
2172 /* Reserve room for dummy headers, this skb can pass
2173 through good chunk of routing engine.
2174 */
2175 skb->mac.raw = skb->data;
2176 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2177
Thomas Grafab364a62006-08-22 00:01:47 -07002178 rt = (struct rt6_info*) ip6_route_output(NULL, &fl);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002179 skb->dst = &rt->u.dst;
2180
Thomas Grafab364a62006-08-22 00:01:47 -07002181 err = rt6_fill_node(skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002182 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07002183 nlh->nlmsg_seq, 0, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002184 if (err < 0) {
Thomas Grafab364a62006-08-22 00:01:47 -07002185 kfree_skb(skb);
2186 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002187 }
2188
Thomas Graf2942e902006-08-15 00:30:25 -07002189 err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid);
Thomas Grafab364a62006-08-22 00:01:47 -07002190errout:
Linus Torvalds1da177e2005-04-16 15:20:36 -07002191 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002192}
2193
Thomas Graf86872cb2006-08-22 00:01:08 -07002194void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002195{
2196 struct sk_buff *skb;
Thomas Graf86872cb2006-08-22 00:01:08 -07002197 u32 pid = 0, seq = 0;
2198 struct nlmsghdr *nlh = NULL;
Thomas Graf21713eb2006-08-15 00:35:24 -07002199 int payload = sizeof(struct rtmsg) + 256;
2200 int err = -ENOBUFS;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002201
Thomas Graf86872cb2006-08-22 00:01:08 -07002202 if (info) {
2203 pid = info->pid;
2204 nlh = info->nlh;
2205 if (nlh)
2206 seq = nlh->nlmsg_seq;
2207 }
2208
Thomas Graf21713eb2006-08-15 00:35:24 -07002209 skb = nlmsg_new(nlmsg_total_size(payload), gfp_any());
2210 if (skb == NULL)
2211 goto errout;
2212
2213 err = rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0);
2214 if (err < 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002215 kfree_skb(skb);
Thomas Graf21713eb2006-08-15 00:35:24 -07002216 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002217 }
Thomas Graf21713eb2006-08-15 00:35:24 -07002218
2219 err = rtnl_notify(skb, pid, RTNLGRP_IPV6_ROUTE, nlh, gfp_any());
2220errout:
2221 if (err < 0)
2222 rtnl_set_sk_err(RTNLGRP_IPV6_ROUTE, err);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002223}
2224
2225/*
2226 * /proc
2227 */
2228
2229#ifdef CONFIG_PROC_FS
2230
2231#define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2232
2233struct rt6_proc_arg
2234{
2235 char *buffer;
2236 int offset;
2237 int length;
2238 int skip;
2239 int len;
2240};
2241
2242static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2243{
2244 struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
2245 int i;
2246
2247 if (arg->skip < arg->offset / RT6_INFO_LEN) {
2248 arg->skip++;
2249 return 0;
2250 }
2251
2252 if (arg->len >= arg->length)
2253 return 0;
2254
2255 for (i=0; i<16; i++) {
2256 sprintf(arg->buffer + arg->len, "%02x",
2257 rt->rt6i_dst.addr.s6_addr[i]);
2258 arg->len += 2;
2259 }
2260 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
2261 rt->rt6i_dst.plen);
2262
2263#ifdef CONFIG_IPV6_SUBTREES
2264 for (i=0; i<16; i++) {
2265 sprintf(arg->buffer + arg->len, "%02x",
2266 rt->rt6i_src.addr.s6_addr[i]);
2267 arg->len += 2;
2268 }
2269 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
2270 rt->rt6i_src.plen);
2271#else
2272 sprintf(arg->buffer + arg->len,
2273 "00000000000000000000000000000000 00 ");
2274 arg->len += 36;
2275#endif
2276
2277 if (rt->rt6i_nexthop) {
2278 for (i=0; i<16; i++) {
2279 sprintf(arg->buffer + arg->len, "%02x",
2280 rt->rt6i_nexthop->primary_key[i]);
2281 arg->len += 2;
2282 }
2283 } else {
2284 sprintf(arg->buffer + arg->len,
2285 "00000000000000000000000000000000");
2286 arg->len += 32;
2287 }
2288 arg->len += sprintf(arg->buffer + arg->len,
2289 " %08x %08x %08x %08x %8s\n",
2290 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
2291 rt->u.dst.__use, rt->rt6i_flags,
2292 rt->rt6i_dev ? rt->rt6i_dev->name : "");
2293 return 0;
2294}
2295
2296static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
2297{
Thomas Grafc71099a2006-08-04 23:20:06 -07002298 struct rt6_proc_arg arg = {
2299 .buffer = buffer,
2300 .offset = offset,
2301 .length = length,
2302 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07002303
Thomas Grafc71099a2006-08-04 23:20:06 -07002304 fib6_clean_all(rt6_info_route, 0, &arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002305
2306 *start = buffer;
2307 if (offset)
2308 *start += offset % RT6_INFO_LEN;
2309
2310 arg.len -= offset % RT6_INFO_LEN;
2311
2312 if (arg.len > length)
2313 arg.len = length;
2314 if (arg.len < 0)
2315 arg.len = 0;
2316
2317 return arg.len;
2318}
2319
Linus Torvalds1da177e2005-04-16 15:20:36 -07002320static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2321{
2322 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2323 rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
2324 rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
2325 rt6_stats.fib_rt_cache,
2326 atomic_read(&ip6_dst_ops.entries),
2327 rt6_stats.fib_discarded_routes);
2328
2329 return 0;
2330}
2331
2332static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2333{
2334 return single_open(file, rt6_stats_seq_show, NULL);
2335}
2336
2337static struct file_operations rt6_stats_seq_fops = {
2338 .owner = THIS_MODULE,
2339 .open = rt6_stats_seq_open,
2340 .read = seq_read,
2341 .llseek = seq_lseek,
2342 .release = single_release,
2343};
2344#endif /* CONFIG_PROC_FS */
2345
2346#ifdef CONFIG_SYSCTL
2347
2348static int flush_delay;
2349
2350static
2351int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2352 void __user *buffer, size_t *lenp, loff_t *ppos)
2353{
2354 if (write) {
2355 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2356 fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
2357 return 0;
2358 } else
2359 return -EINVAL;
2360}
2361
2362ctl_table ipv6_route_table[] = {
2363 {
2364 .ctl_name = NET_IPV6_ROUTE_FLUSH,
2365 .procname = "flush",
2366 .data = &flush_delay,
2367 .maxlen = sizeof(int),
Dave Jones89c8b3a12005-04-28 12:11:49 -07002368 .mode = 0200,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002369 .proc_handler = &ipv6_sysctl_rtcache_flush
2370 },
2371 {
2372 .ctl_name = NET_IPV6_ROUTE_GC_THRESH,
2373 .procname = "gc_thresh",
2374 .data = &ip6_dst_ops.gc_thresh,
2375 .maxlen = sizeof(int),
2376 .mode = 0644,
2377 .proc_handler = &proc_dointvec,
2378 },
2379 {
2380 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE,
2381 .procname = "max_size",
2382 .data = &ip6_rt_max_size,
2383 .maxlen = sizeof(int),
2384 .mode = 0644,
2385 .proc_handler = &proc_dointvec,
2386 },
2387 {
2388 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2389 .procname = "gc_min_interval",
2390 .data = &ip6_rt_gc_min_interval,
2391 .maxlen = sizeof(int),
2392 .mode = 0644,
2393 .proc_handler = &proc_dointvec_jiffies,
2394 .strategy = &sysctl_jiffies,
2395 },
2396 {
2397 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT,
2398 .procname = "gc_timeout",
2399 .data = &ip6_rt_gc_timeout,
2400 .maxlen = sizeof(int),
2401 .mode = 0644,
2402 .proc_handler = &proc_dointvec_jiffies,
2403 .strategy = &sysctl_jiffies,
2404 },
2405 {
2406 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL,
2407 .procname = "gc_interval",
2408 .data = &ip6_rt_gc_interval,
2409 .maxlen = sizeof(int),
2410 .mode = 0644,
2411 .proc_handler = &proc_dointvec_jiffies,
2412 .strategy = &sysctl_jiffies,
2413 },
2414 {
2415 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY,
2416 .procname = "gc_elasticity",
2417 .data = &ip6_rt_gc_elasticity,
2418 .maxlen = sizeof(int),
2419 .mode = 0644,
2420 .proc_handler = &proc_dointvec_jiffies,
2421 .strategy = &sysctl_jiffies,
2422 },
2423 {
2424 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES,
2425 .procname = "mtu_expires",
2426 .data = &ip6_rt_mtu_expires,
2427 .maxlen = sizeof(int),
2428 .mode = 0644,
2429 .proc_handler = &proc_dointvec_jiffies,
2430 .strategy = &sysctl_jiffies,
2431 },
2432 {
2433 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS,
2434 .procname = "min_adv_mss",
2435 .data = &ip6_rt_min_advmss,
2436 .maxlen = sizeof(int),
2437 .mode = 0644,
2438 .proc_handler = &proc_dointvec_jiffies,
2439 .strategy = &sysctl_jiffies,
2440 },
2441 {
2442 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2443 .procname = "gc_min_interval_ms",
2444 .data = &ip6_rt_gc_min_interval,
2445 .maxlen = sizeof(int),
2446 .mode = 0644,
2447 .proc_handler = &proc_dointvec_ms_jiffies,
2448 .strategy = &sysctl_ms_jiffies,
2449 },
2450 { .ctl_name = 0 }
2451};
2452
2453#endif
2454
2455void __init ip6_route_init(void)
2456{
2457 struct proc_dir_entry *p;
2458
Alexey Dobriyane5d679f332006-08-26 19:25:52 -07002459 ip6_dst_ops.kmem_cachep =
2460 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2461 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002462 fib6_init();
2463#ifdef CONFIG_PROC_FS
2464 p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2465 if (p)
2466 p->owner = THIS_MODULE;
2467
2468 proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2469#endif
2470#ifdef CONFIG_XFRM
2471 xfrm6_init();
2472#endif
Thomas Graf101367c2006-08-04 03:39:02 -07002473#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2474 fib6_rules_init();
2475#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07002476}
2477
2478void ip6_route_cleanup(void)
2479{
Thomas Graf101367c2006-08-04 03:39:02 -07002480#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2481 fib6_rules_cleanup();
2482#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07002483#ifdef CONFIG_PROC_FS
2484 proc_net_remove("ipv6_route");
2485 proc_net_remove("rt6_stats");
2486#endif
2487#ifdef CONFIG_XFRM
2488 xfrm6_fini();
2489#endif
2490 rt6_ifdown(NULL);
2491 fib6_gc_cleanup();
2492 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2493}