blob: 73efdadb9ab89ab3ef98c63137237adaa359c447 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
16/* Changes:
17 *
18 * YOSHIFUJI Hideaki @USAGI
19 * reworked default router selection.
20 * - respect outgoing interface
21 * - select from (probably) reachable routers (i.e.
22 * routers in REACHABLE, STALE, DELAY or PROBE states).
23 * - always select the same router if it is (probably)
24 * reachable. otherwise, round-robin the list.
25 */
26
Randy Dunlap4fc268d2006-01-11 12:17:47 -080027#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070028#include <linux/errno.h>
29#include <linux/types.h>
30#include <linux/times.h>
31#include <linux/socket.h>
32#include <linux/sockios.h>
33#include <linux/net.h>
34#include <linux/route.h>
35#include <linux/netdevice.h>
36#include <linux/in6.h>
37#include <linux/init.h>
38#include <linux/netlink.h>
39#include <linux/if_arp.h>
40
41#ifdef CONFIG_PROC_FS
42#include <linux/proc_fs.h>
43#include <linux/seq_file.h>
44#endif
45
46#include <net/snmp.h>
47#include <net/ipv6.h>
48#include <net/ip6_fib.h>
49#include <net/ip6_route.h>
50#include <net/ndisc.h>
51#include <net/addrconf.h>
52#include <net/tcp.h>
53#include <linux/rtnetlink.h>
54#include <net/dst.h>
55#include <net/xfrm.h>
Tom Tucker8d717402006-07-30 20:43:36 -070056#include <net/netevent.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070057
58#include <asm/uaccess.h>
59
60#ifdef CONFIG_SYSCTL
61#include <linux/sysctl.h>
62#endif
63
64/* Set to 3 to get tracing. */
65#define RT6_DEBUG 2
66
67#if RT6_DEBUG >= 3
68#define RDBG(x) printk x
69#define RT6_TRACE(x...) printk(KERN_DEBUG x)
70#else
71#define RDBG(x)
72#define RT6_TRACE(x...) do { ; } while (0)
73#endif
74
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -080075#define CLONE_OFFLINK_ROUTE 0
Linus Torvalds1da177e2005-04-16 15:20:36 -070076
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -080077#define RT6_SELECT_F_IFACE 0x1
78#define RT6_SELECT_F_REACHABLE 0x2
79
Linus Torvalds1da177e2005-04-16 15:20:36 -070080static int ip6_rt_max_size = 4096;
81static int ip6_rt_gc_min_interval = HZ / 2;
82static int ip6_rt_gc_timeout = 60*HZ;
83int ip6_rt_gc_interval = 30*HZ;
84static int ip6_rt_gc_elasticity = 9;
85static int ip6_rt_mtu_expires = 10*60*HZ;
86static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
87
88static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
89static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
90static struct dst_entry *ip6_negative_advice(struct dst_entry *);
91static void ip6_dst_destroy(struct dst_entry *);
92static void ip6_dst_ifdown(struct dst_entry *,
93 struct net_device *dev, int how);
94static int ip6_dst_gc(void);
95
96static int ip6_pkt_discard(struct sk_buff *skb);
97static int ip6_pkt_discard_out(struct sk_buff *skb);
98static void ip6_link_failure(struct sk_buff *skb);
99static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
100
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800101#ifdef CONFIG_IPV6_ROUTE_INFO
102static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
103 struct in6_addr *gwaddr, int ifindex,
104 unsigned pref);
105static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
106 struct in6_addr *gwaddr, int ifindex);
107#endif
108
Linus Torvalds1da177e2005-04-16 15:20:36 -0700109static struct dst_ops ip6_dst_ops = {
110 .family = AF_INET6,
111 .protocol = __constant_htons(ETH_P_IPV6),
112 .gc = ip6_dst_gc,
113 .gc_thresh = 1024,
114 .check = ip6_dst_check,
115 .destroy = ip6_dst_destroy,
116 .ifdown = ip6_dst_ifdown,
117 .negative_advice = ip6_negative_advice,
118 .link_failure = ip6_link_failure,
119 .update_pmtu = ip6_rt_update_pmtu,
120 .entry_size = sizeof(struct rt6_info),
121};
122
123struct rt6_info ip6_null_entry = {
124 .u = {
125 .dst = {
126 .__refcnt = ATOMIC_INIT(1),
127 .__use = 1,
128 .dev = &loopback_dev,
129 .obsolete = -1,
130 .error = -ENETUNREACH,
131 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
132 .input = ip6_pkt_discard,
133 .output = ip6_pkt_discard_out,
134 .ops = &ip6_dst_ops,
135 .path = (struct dst_entry*)&ip6_null_entry,
136 }
137 },
138 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
139 .rt6i_metric = ~(u32) 0,
140 .rt6i_ref = ATOMIC_INIT(1),
141};
142
Linus Torvalds1da177e2005-04-16 15:20:36 -0700143/* allocate dst with ip6_dst_ops */
144static __inline__ struct rt6_info *ip6_dst_alloc(void)
145{
146 return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
147}
148
149static void ip6_dst_destroy(struct dst_entry *dst)
150{
151 struct rt6_info *rt = (struct rt6_info *)dst;
152 struct inet6_dev *idev = rt->rt6i_idev;
153
154 if (idev != NULL) {
155 rt->rt6i_idev = NULL;
156 in6_dev_put(idev);
157 }
158}
159
160static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
161 int how)
162{
163 struct rt6_info *rt = (struct rt6_info *)dst;
164 struct inet6_dev *idev = rt->rt6i_idev;
165
166 if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
167 struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
168 if (loopback_idev != NULL) {
169 rt->rt6i_idev = loopback_idev;
170 in6_dev_put(idev);
171 }
172 }
173}
174
175static __inline__ int rt6_check_expired(const struct rt6_info *rt)
176{
177 return (rt->rt6i_flags & RTF_EXPIRES &&
178 time_after(jiffies, rt->rt6i_expires));
179}
180
Thomas Grafc71099a2006-08-04 23:20:06 -0700181static inline int rt6_need_strict(struct in6_addr *daddr)
182{
183 return (ipv6_addr_type(daddr) &
184 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
185}
186
Linus Torvalds1da177e2005-04-16 15:20:36 -0700187/*
Thomas Grafc71099a2006-08-04 23:20:06 -0700188 * Route lookup. Any table->tb6_lock is implied.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700189 */
190
191static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
192 int oif,
193 int strict)
194{
195 struct rt6_info *local = NULL;
196 struct rt6_info *sprt;
197
198 if (oif) {
199 for (sprt = rt; sprt; sprt = sprt->u.next) {
200 struct net_device *dev = sprt->rt6i_dev;
201 if (dev->ifindex == oif)
202 return sprt;
203 if (dev->flags & IFF_LOOPBACK) {
204 if (sprt->rt6i_idev == NULL ||
205 sprt->rt6i_idev->dev->ifindex != oif) {
206 if (strict && oif)
207 continue;
208 if (local && (!oif ||
209 local->rt6i_idev->dev->ifindex == oif))
210 continue;
211 }
212 local = sprt;
213 }
214 }
215
216 if (local)
217 return local;
218
219 if (strict)
220 return &ip6_null_entry;
221 }
222 return rt;
223}
224
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800225#ifdef CONFIG_IPV6_ROUTER_PREF
226static void rt6_probe(struct rt6_info *rt)
227{
228 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
229 /*
230 * Okay, this does not seem to be appropriate
231 * for now, however, we need to check if it
232 * is really so; aka Router Reachability Probing.
233 *
234 * Router Reachability Probe MUST be rate-limited
235 * to no more than one per minute.
236 */
237 if (!neigh || (neigh->nud_state & NUD_VALID))
238 return;
239 read_lock_bh(&neigh->lock);
240 if (!(neigh->nud_state & NUD_VALID) &&
YOSHIFUJI Hideaki52e16352006-03-20 17:05:47 -0800241 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800242 struct in6_addr mcaddr;
243 struct in6_addr *target;
244
245 neigh->updated = jiffies;
246 read_unlock_bh(&neigh->lock);
247
248 target = (struct in6_addr *)&neigh->primary_key;
249 addrconf_addr_solict_mult(target, &mcaddr);
250 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
251 } else
252 read_unlock_bh(&neigh->lock);
253}
254#else
255static inline void rt6_probe(struct rt6_info *rt)
256{
257 return;
258}
259#endif
260
Linus Torvalds1da177e2005-04-16 15:20:36 -0700261/*
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800262 * Default Router Selection (RFC 2461 6.3.6)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700263 */
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800264static int inline rt6_check_dev(struct rt6_info *rt, int oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700265{
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800266 struct net_device *dev = rt->rt6i_dev;
267 if (!oif || dev->ifindex == oif)
268 return 2;
269 if ((dev->flags & IFF_LOOPBACK) &&
270 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
271 return 1;
272 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700273}
274
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800275static int inline rt6_check_neigh(struct rt6_info *rt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700276{
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800277 struct neighbour *neigh = rt->rt6i_nexthop;
278 int m = 0;
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700279 if (rt->rt6i_flags & RTF_NONEXTHOP ||
280 !(rt->rt6i_flags & RTF_GATEWAY))
281 m = 1;
282 else if (neigh) {
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800283 read_lock_bh(&neigh->lock);
284 if (neigh->nud_state & NUD_VALID)
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700285 m = 2;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800286 read_unlock_bh(&neigh->lock);
287 }
288 return m;
289}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700290
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800291static int rt6_score_route(struct rt6_info *rt, int oif,
292 int strict)
293{
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700294 int m, n;
295
296 m = rt6_check_dev(rt, oif);
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800297 if (!m && (strict & RT6_SELECT_F_IFACE))
298 return -1;
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -0800299#ifdef CONFIG_IPV6_ROUTER_PREF
300 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
301#endif
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700302 n = rt6_check_neigh(rt);
303 if (n > 1)
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -0800304 m |= 16;
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700305 else if (!n && strict & RT6_SELECT_F_REACHABLE)
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800306 return -1;
307 return m;
308}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700309
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800310static struct rt6_info *rt6_select(struct rt6_info **head, int oif,
311 int strict)
312{
313 struct rt6_info *match = NULL, *last = NULL;
314 struct rt6_info *rt, *rt0 = *head;
315 u32 metric;
316 int mpri = -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700317
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800318 RT6_TRACE("%s(head=%p(*head=%p), oif=%d)\n",
319 __FUNCTION__, head, head ? *head : NULL, oif);
320
321 for (rt = rt0, metric = rt0->rt6i_metric;
YOSHIFUJI Hideakic302e6d2006-04-28 15:59:15 -0700322 rt && rt->rt6i_metric == metric && (!last || rt != rt0);
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800323 rt = rt->u.next) {
324 int m;
325
326 if (rt6_check_expired(rt))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700327 continue;
328
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800329 last = rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700330
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800331 m = rt6_score_route(rt, oif, strict);
332 if (m < 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700333 continue;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700334
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800335 if (m > mpri) {
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800336 rt6_probe(match);
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800337 match = rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700338 mpri = m;
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800339 } else {
340 rt6_probe(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700341 }
342 }
343
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800344 if (!match &&
345 (strict & RT6_SELECT_F_REACHABLE) &&
346 last && last != rt0) {
347 /* no entries matched; do round-robin */
Ingo Molnar34af9462006-06-27 02:53:55 -0700348 static DEFINE_SPINLOCK(lock);
YOSHIFUJI Hideakic302e6d2006-04-28 15:59:15 -0700349 spin_lock(&lock);
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800350 *head = rt0->u.next;
351 rt0->u.next = last->u.next;
352 last->u.next = rt0;
YOSHIFUJI Hideakic302e6d2006-04-28 15:59:15 -0700353 spin_unlock(&lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700354 }
355
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800356 RT6_TRACE("%s() => %p, score=%d\n",
357 __FUNCTION__, match, mpri);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700358
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800359 return (match ? match : &ip6_null_entry);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700360}
361
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800362#ifdef CONFIG_IPV6_ROUTE_INFO
363int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
364 struct in6_addr *gwaddr)
365{
366 struct route_info *rinfo = (struct route_info *) opt;
367 struct in6_addr prefix_buf, *prefix;
368 unsigned int pref;
369 u32 lifetime;
370 struct rt6_info *rt;
371
372 if (len < sizeof(struct route_info)) {
373 return -EINVAL;
374 }
375
376 /* Sanity check for prefix_len and length */
377 if (rinfo->length > 3) {
378 return -EINVAL;
379 } else if (rinfo->prefix_len > 128) {
380 return -EINVAL;
381 } else if (rinfo->prefix_len > 64) {
382 if (rinfo->length < 2) {
383 return -EINVAL;
384 }
385 } else if (rinfo->prefix_len > 0) {
386 if (rinfo->length < 1) {
387 return -EINVAL;
388 }
389 }
390
391 pref = rinfo->route_pref;
392 if (pref == ICMPV6_ROUTER_PREF_INVALID)
393 pref = ICMPV6_ROUTER_PREF_MEDIUM;
394
395 lifetime = htonl(rinfo->lifetime);
396 if (lifetime == 0xffffffff) {
397 /* infinity */
398 } else if (lifetime > 0x7fffffff/HZ) {
399 /* Avoid arithmetic overflow */
400 lifetime = 0x7fffffff/HZ - 1;
401 }
402
403 if (rinfo->length == 3)
404 prefix = (struct in6_addr *)rinfo->prefix;
405 else {
406 /* this function is safe */
407 ipv6_addr_prefix(&prefix_buf,
408 (struct in6_addr *)rinfo->prefix,
409 rinfo->prefix_len);
410 prefix = &prefix_buf;
411 }
412
413 rt = rt6_get_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex);
414
415 if (rt && !lifetime) {
416 ip6_del_rt(rt, NULL, NULL, NULL);
417 rt = NULL;
418 }
419
420 if (!rt && lifetime)
421 rt = rt6_add_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
422 pref);
423 else if (rt)
424 rt->rt6i_flags = RTF_ROUTEINFO |
425 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
426
427 if (rt) {
428 if (lifetime == 0xffffffff) {
429 rt->rt6i_flags &= ~RTF_EXPIRES;
430 } else {
431 rt->rt6i_expires = jiffies + HZ * lifetime;
432 rt->rt6i_flags |= RTF_EXPIRES;
433 }
434 dst_release(&rt->u.dst);
435 }
436 return 0;
437}
438#endif
439
Thomas Grafc71099a2006-08-04 23:20:06 -0700440#define BACKTRACK() \
441if (rt == &ip6_null_entry && flags & RT6_F_STRICT) { \
442 while ((fn = fn->parent) != NULL) { \
443 if (fn->fn_flags & RTN_TL_ROOT) { \
444 dst_hold(&rt->u.dst); \
445 goto out; \
446 } \
447 if (fn->fn_flags & RTN_RTINFO) \
448 goto restart; \
449 } \
450}
451
452static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table,
453 struct flowi *fl, int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700454{
455 struct fib6_node *fn;
456 struct rt6_info *rt;
457
Thomas Grafc71099a2006-08-04 23:20:06 -0700458 read_lock_bh(&table->tb6_lock);
459 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
460restart:
461 rt = fn->leaf;
462 rt = rt6_device_match(rt, fl->oif, flags & RT6_F_STRICT);
463 BACKTRACK();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700464 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700465out:
466 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700467
468 rt->u.dst.lastuse = jiffies;
Thomas Grafc71099a2006-08-04 23:20:06 -0700469 rt->u.dst.__use++;
470
471 return rt;
472
473}
474
475struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
476 int oif, int strict)
477{
478 struct flowi fl = {
479 .oif = oif,
480 .nl_u = {
481 .ip6_u = {
482 .daddr = *daddr,
483 /* TODO: saddr */
484 },
485 },
486 };
487 struct dst_entry *dst;
488 int flags = strict ? RT6_F_STRICT : 0;
489
490 dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_lookup);
491 if (dst->error == 0)
492 return (struct rt6_info *) dst;
493
494 dst_release(dst);
495
Linus Torvalds1da177e2005-04-16 15:20:36 -0700496 return NULL;
497}
498
Thomas Grafc71099a2006-08-04 23:20:06 -0700499/* ip6_ins_rt is called with FREE table->tb6_lock.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700500 It takes new route entry, the addition fails by any reason the
501 route is freed. In any case, if caller does not hold it, it may
502 be destroyed.
503 */
504
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -0700505int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh,
506 void *_rtattr, struct netlink_skb_parms *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700507{
508 int err;
Thomas Grafc71099a2006-08-04 23:20:06 -0700509 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700510
Thomas Grafc71099a2006-08-04 23:20:06 -0700511 table = rt->rt6i_table;
512 write_lock_bh(&table->tb6_lock);
513 err = fib6_add(&table->tb6_root, rt, nlh, _rtattr, req);
514 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700515
516 return err;
517}
518
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800519static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
520 struct in6_addr *saddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700521{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700522 struct rt6_info *rt;
523
524 /*
525 * Clone the route.
526 */
527
528 rt = ip6_rt_copy(ort);
529
530 if (rt) {
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900531 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
532 if (rt->rt6i_dst.plen != 128 &&
533 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
534 rt->rt6i_flags |= RTF_ANYCAST;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700535 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900536 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700537
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900538 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700539 rt->rt6i_dst.plen = 128;
540 rt->rt6i_flags |= RTF_CACHE;
541 rt->u.dst.flags |= DST_HOST;
542
543#ifdef CONFIG_IPV6_SUBTREES
544 if (rt->rt6i_src.plen && saddr) {
545 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
546 rt->rt6i_src.plen = 128;
547 }
548#endif
549
550 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
551
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800552 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700553
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800554 return rt;
555}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700556
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800557static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
558{
559 struct rt6_info *rt = ip6_rt_copy(ort);
560 if (rt) {
561 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
562 rt->rt6i_dst.plen = 128;
563 rt->rt6i_flags |= RTF_CACHE;
564 if (rt->rt6i_flags & RTF_REJECT)
565 rt->u.dst.error = ort->u.dst.error;
566 rt->u.dst.flags |= DST_HOST;
567 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
568 }
569 return rt;
570}
571
Thomas Grafc71099a2006-08-04 23:20:06 -0700572struct rt6_info *ip6_pol_route_input(struct fib6_table *table, struct flowi *fl,
573 int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700574{
575 struct fib6_node *fn;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800576 struct rt6_info *rt, *nrt;
Thomas Grafc71099a2006-08-04 23:20:06 -0700577 int strict = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700578 int attempts = 3;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800579 int err;
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800580 int reachable = RT6_SELECT_F_REACHABLE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700581
Thomas Grafc71099a2006-08-04 23:20:06 -0700582 if (flags & RT6_F_STRICT)
583 strict = RT6_SELECT_F_IFACE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700584
585relookup:
Thomas Grafc71099a2006-08-04 23:20:06 -0700586 read_lock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700587
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800588restart_2:
Thomas Grafc71099a2006-08-04 23:20:06 -0700589 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700590
591restart:
Thomas Grafc71099a2006-08-04 23:20:06 -0700592 rt = rt6_select(&fn->leaf, fl->iif, strict | reachable);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700593 BACKTRACK();
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800594 if (rt == &ip6_null_entry ||
595 rt->rt6i_flags & RTF_CACHE)
YOSHIFUJI Hideaki1ddef0442006-03-20 17:01:24 -0800596 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700597
YOSHIFUJI Hideakifb9de912006-03-20 16:59:08 -0800598 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700599 read_unlock_bh(&table->tb6_lock);
YOSHIFUJI Hideakifb9de912006-03-20 16:59:08 -0800600
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800601 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
YOSHIFUJI Hideakie40cf352006-03-20 16:59:27 -0800602 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800603 else {
604#if CLONE_OFFLINK_ROUTE
605 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
606#else
607 goto out2;
608#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700609 }
YOSHIFUJI Hideakie40cf352006-03-20 16:59:27 -0800610
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800611 dst_release(&rt->u.dst);
612 rt = nrt ? : &ip6_null_entry;
613
614 dst_hold(&rt->u.dst);
615 if (nrt) {
616 err = ip6_ins_rt(nrt, NULL, NULL, NULL);
617 if (!err)
618 goto out2;
619 }
620
621 if (--attempts <= 0)
622 goto out2;
623
624 /*
Thomas Grafc71099a2006-08-04 23:20:06 -0700625 * Race condition! In the gap, when table->tb6_lock was
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800626 * released someone could insert this route. Relookup.
627 */
628 dst_release(&rt->u.dst);
629 goto relookup;
630
631out:
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800632 if (reachable) {
633 reachable = 0;
634 goto restart_2;
635 }
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800636 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700637 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700638out2:
639 rt->u.dst.lastuse = jiffies;
640 rt->u.dst.__use++;
Thomas Grafc71099a2006-08-04 23:20:06 -0700641
642 return rt;
643}
644
645void ip6_route_input(struct sk_buff *skb)
646{
647 struct ipv6hdr *iph = skb->nh.ipv6h;
648 struct flowi fl = {
649 .iif = skb->dev->ifindex,
650 .nl_u = {
651 .ip6_u = {
652 .daddr = iph->daddr,
653 .saddr = iph->saddr,
654 .flowlabel = (* (u32 *) iph)&IPV6_FLOWINFO_MASK,
655 },
656 },
657 .proto = iph->nexthdr,
658 };
659 int flags = 0;
660
661 if (rt6_need_strict(&iph->daddr))
662 flags |= RT6_F_STRICT;
663
664 skb->dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_input);
665}
666
667static struct rt6_info *ip6_pol_route_output(struct fib6_table *table,
668 struct flowi *fl, int flags)
669{
670 struct fib6_node *fn;
671 struct rt6_info *rt, *nrt;
672 int strict = 0;
673 int attempts = 3;
674 int err;
675 int reachable = RT6_SELECT_F_REACHABLE;
676
677 if (flags & RT6_F_STRICT)
678 strict = RT6_SELECT_F_IFACE;
679
680relookup:
681 read_lock_bh(&table->tb6_lock);
682
683restart_2:
684 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
685
686restart:
687 rt = rt6_select(&fn->leaf, fl->oif, strict | reachable);
688 BACKTRACK();
689 if (rt == &ip6_null_entry ||
690 rt->rt6i_flags & RTF_CACHE)
691 goto out;
692
693 dst_hold(&rt->u.dst);
694 read_unlock_bh(&table->tb6_lock);
695
696 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
697 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
698 else {
699#if CLONE_OFFLINK_ROUTE
700 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
701#else
702 goto out2;
703#endif
704 }
705
706 dst_release(&rt->u.dst);
707 rt = nrt ? : &ip6_null_entry;
708
709 dst_hold(&rt->u.dst);
710 if (nrt) {
711 err = ip6_ins_rt(nrt, NULL, NULL, NULL);
712 if (!err)
713 goto out2;
714 }
715
716 if (--attempts <= 0)
717 goto out2;
718
719 /*
720 * Race condition! In the gap, when table->tb6_lock was
721 * released someone could insert this route. Relookup.
722 */
723 dst_release(&rt->u.dst);
724 goto relookup;
725
726out:
727 if (reachable) {
728 reachable = 0;
729 goto restart_2;
730 }
731 dst_hold(&rt->u.dst);
732 read_unlock_bh(&table->tb6_lock);
733out2:
734 rt->u.dst.lastuse = jiffies;
735 rt->u.dst.__use++;
736 return rt;
737}
738
739struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
740{
741 int flags = 0;
742
743 if (rt6_need_strict(&fl->fl6_dst))
744 flags |= RT6_F_STRICT;
745
746 return fib6_rule_lookup(fl, flags, ip6_pol_route_output);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700747}
748
749
750/*
751 * Destination cache support functions
752 */
753
754static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
755{
756 struct rt6_info *rt;
757
758 rt = (struct rt6_info *) dst;
759
760 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
761 return dst;
762
763 return NULL;
764}
765
766static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
767{
768 struct rt6_info *rt = (struct rt6_info *) dst;
769
770 if (rt) {
771 if (rt->rt6i_flags & RTF_CACHE)
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -0700772 ip6_del_rt(rt, NULL, NULL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700773 else
774 dst_release(dst);
775 }
776 return NULL;
777}
778
779static void ip6_link_failure(struct sk_buff *skb)
780{
781 struct rt6_info *rt;
782
783 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
784
785 rt = (struct rt6_info *) skb->dst;
786 if (rt) {
787 if (rt->rt6i_flags&RTF_CACHE) {
788 dst_set_expires(&rt->u.dst, 0);
789 rt->rt6i_flags |= RTF_EXPIRES;
790 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
791 rt->rt6i_node->fn_sernum = -1;
792 }
793}
794
795static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
796{
797 struct rt6_info *rt6 = (struct rt6_info*)dst;
798
799 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
800 rt6->rt6i_flags |= RTF_MODIFIED;
801 if (mtu < IPV6_MIN_MTU) {
802 mtu = IPV6_MIN_MTU;
803 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
804 }
805 dst->metrics[RTAX_MTU-1] = mtu;
Tom Tucker8d717402006-07-30 20:43:36 -0700806 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700807 }
808}
809
Linus Torvalds1da177e2005-04-16 15:20:36 -0700810static int ipv6_get_mtu(struct net_device *dev);
811
812static inline unsigned int ipv6_advmss(unsigned int mtu)
813{
814 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
815
816 if (mtu < ip6_rt_min_advmss)
817 mtu = ip6_rt_min_advmss;
818
819 /*
820 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
821 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
822 * IPV6_MAXPLEN is also valid and means: "any MSS,
823 * rely only on pmtu discovery"
824 */
825 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
826 mtu = IPV6_MAXPLEN;
827 return mtu;
828}
829
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700830static struct dst_entry *ndisc_dst_gc_list;
831DEFINE_SPINLOCK(ndisc_lock);
832
Linus Torvalds1da177e2005-04-16 15:20:36 -0700833struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
834 struct neighbour *neigh,
835 struct in6_addr *addr,
836 int (*output)(struct sk_buff *))
837{
838 struct rt6_info *rt;
839 struct inet6_dev *idev = in6_dev_get(dev);
840
841 if (unlikely(idev == NULL))
842 return NULL;
843
844 rt = ip6_dst_alloc();
845 if (unlikely(rt == NULL)) {
846 in6_dev_put(idev);
847 goto out;
848 }
849
850 dev_hold(dev);
851 if (neigh)
852 neigh_hold(neigh);
853 else
854 neigh = ndisc_get_neigh(dev, addr);
855
856 rt->rt6i_dev = dev;
857 rt->rt6i_idev = idev;
858 rt->rt6i_nexthop = neigh;
859 atomic_set(&rt->u.dst.__refcnt, 1);
860 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
861 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
862 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
863 rt->u.dst.output = output;
864
865#if 0 /* there's no chance to use these for ndisc */
866 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
867 ? DST_HOST
868 : 0;
869 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
870 rt->rt6i_dst.plen = 128;
871#endif
872
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700873 spin_lock_bh(&ndisc_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700874 rt->u.dst.next = ndisc_dst_gc_list;
875 ndisc_dst_gc_list = &rt->u.dst;
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700876 spin_unlock_bh(&ndisc_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700877
878 fib6_force_start_gc();
879
880out:
881 return (struct dst_entry *)rt;
882}
883
884int ndisc_dst_gc(int *more)
885{
886 struct dst_entry *dst, *next, **pprev;
887 int freed;
888
889 next = NULL;
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700890 freed = 0;
891
892 spin_lock_bh(&ndisc_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700893 pprev = &ndisc_dst_gc_list;
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700894
Linus Torvalds1da177e2005-04-16 15:20:36 -0700895 while ((dst = *pprev) != NULL) {
896 if (!atomic_read(&dst->__refcnt)) {
897 *pprev = dst->next;
898 dst_free(dst);
899 freed++;
900 } else {
901 pprev = &dst->next;
902 (*more)++;
903 }
904 }
905
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700906 spin_unlock_bh(&ndisc_lock);
907
Linus Torvalds1da177e2005-04-16 15:20:36 -0700908 return freed;
909}
910
911static int ip6_dst_gc(void)
912{
913 static unsigned expire = 30*HZ;
914 static unsigned long last_gc;
915 unsigned long now = jiffies;
916
917 if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
918 atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
919 goto out;
920
921 expire++;
922 fib6_run_gc(expire);
923 last_gc = now;
924 if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
925 expire = ip6_rt_gc_timeout>>1;
926
927out:
928 expire -= expire>>ip6_rt_gc_elasticity;
929 return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
930}
931
932/* Clean host part of a prefix. Not necessary in radix tree,
933 but results in cleaner routing tables.
934
935 Remove it only when all the things will work!
936 */
937
938static int ipv6_get_mtu(struct net_device *dev)
939{
940 int mtu = IPV6_MIN_MTU;
941 struct inet6_dev *idev;
942
943 idev = in6_dev_get(dev);
944 if (idev) {
945 mtu = idev->cnf.mtu6;
946 in6_dev_put(idev);
947 }
948 return mtu;
949}
950
951int ipv6_get_hoplimit(struct net_device *dev)
952{
953 int hoplimit = ipv6_devconf.hop_limit;
954 struct inet6_dev *idev;
955
956 idev = in6_dev_get(dev);
957 if (idev) {
958 hoplimit = idev->cnf.hop_limit;
959 in6_dev_put(idev);
960 }
961 return hoplimit;
962}
963
964/*
965 *
966 */
967
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -0700968int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
Thomas Grafc71099a2006-08-04 23:20:06 -0700969 void *_rtattr, struct netlink_skb_parms *req,
970 u32 table_id)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700971{
972 int err;
973 struct rtmsg *r;
974 struct rtattr **rta;
975 struct rt6_info *rt = NULL;
976 struct net_device *dev = NULL;
977 struct inet6_dev *idev = NULL;
Thomas Grafc71099a2006-08-04 23:20:06 -0700978 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700979 int addr_type;
980
981 rta = (struct rtattr **) _rtattr;
982
983 if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128)
984 return -EINVAL;
985#ifndef CONFIG_IPV6_SUBTREES
986 if (rtmsg->rtmsg_src_len)
987 return -EINVAL;
988#endif
989 if (rtmsg->rtmsg_ifindex) {
990 err = -ENODEV;
991 dev = dev_get_by_index(rtmsg->rtmsg_ifindex);
992 if (!dev)
993 goto out;
994 idev = in6_dev_get(dev);
995 if (!idev)
996 goto out;
997 }
998
999 if (rtmsg->rtmsg_metric == 0)
1000 rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
1001
Thomas Grafc71099a2006-08-04 23:20:06 -07001002 table = fib6_new_table(table_id);
1003 if (table == NULL) {
1004 err = -ENOBUFS;
1005 goto out;
1006 }
1007
Linus Torvalds1da177e2005-04-16 15:20:36 -07001008 rt = ip6_dst_alloc();
1009
1010 if (rt == NULL) {
1011 err = -ENOMEM;
1012 goto out;
1013 }
1014
1015 rt->u.dst.obsolete = -1;
YOSHIFUJI Hideaki3dd4bc62005-12-19 14:02:45 -08001016 rt->rt6i_expires = jiffies + clock_t_to_jiffies(rtmsg->rtmsg_info);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001017 if (nlh && (r = NLMSG_DATA(nlh))) {
1018 rt->rt6i_protocol = r->rtm_protocol;
1019 } else {
1020 rt->rt6i_protocol = RTPROT_BOOT;
1021 }
1022
1023 addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst);
1024
1025 if (addr_type & IPV6_ADDR_MULTICAST)
1026 rt->u.dst.input = ip6_mc_input;
1027 else
1028 rt->u.dst.input = ip6_forward;
1029
1030 rt->u.dst.output = ip6_output;
1031
1032 ipv6_addr_prefix(&rt->rt6i_dst.addr,
1033 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len);
1034 rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len;
1035 if (rt->rt6i_dst.plen == 128)
1036 rt->u.dst.flags = DST_HOST;
1037
1038#ifdef CONFIG_IPV6_SUBTREES
1039 ipv6_addr_prefix(&rt->rt6i_src.addr,
1040 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
1041 rt->rt6i_src.plen = rtmsg->rtmsg_src_len;
1042#endif
1043
1044 rt->rt6i_metric = rtmsg->rtmsg_metric;
1045
1046 /* We cannot add true routes via loopback here,
1047 they would result in kernel looping; promote them to reject routes
1048 */
1049 if ((rtmsg->rtmsg_flags&RTF_REJECT) ||
1050 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
1051 /* hold loopback dev/idev if we haven't done so. */
1052 if (dev != &loopback_dev) {
1053 if (dev) {
1054 dev_put(dev);
1055 in6_dev_put(idev);
1056 }
1057 dev = &loopback_dev;
1058 dev_hold(dev);
1059 idev = in6_dev_get(dev);
1060 if (!idev) {
1061 err = -ENODEV;
1062 goto out;
1063 }
1064 }
1065 rt->u.dst.output = ip6_pkt_discard_out;
1066 rt->u.dst.input = ip6_pkt_discard;
1067 rt->u.dst.error = -ENETUNREACH;
1068 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1069 goto install_route;
1070 }
1071
1072 if (rtmsg->rtmsg_flags & RTF_GATEWAY) {
1073 struct in6_addr *gw_addr;
1074 int gwa_type;
1075
1076 gw_addr = &rtmsg->rtmsg_gateway;
1077 ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway);
1078 gwa_type = ipv6_addr_type(gw_addr);
1079
1080 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1081 struct rt6_info *grt;
1082
1083 /* IPv6 strictly inhibits using not link-local
1084 addresses as nexthop address.
1085 Otherwise, router will not able to send redirects.
1086 It is very good, but in some (rare!) circumstances
1087 (SIT, PtP, NBMA NOARP links) it is handy to allow
1088 some exceptions. --ANK
1089 */
1090 err = -EINVAL;
1091 if (!(gwa_type&IPV6_ADDR_UNICAST))
1092 goto out;
1093
1094 grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1);
1095
1096 err = -EHOSTUNREACH;
1097 if (grt == NULL)
1098 goto out;
1099 if (dev) {
1100 if (dev != grt->rt6i_dev) {
1101 dst_release(&grt->u.dst);
1102 goto out;
1103 }
1104 } else {
1105 dev = grt->rt6i_dev;
1106 idev = grt->rt6i_idev;
1107 dev_hold(dev);
1108 in6_dev_hold(grt->rt6i_idev);
1109 }
1110 if (!(grt->rt6i_flags&RTF_GATEWAY))
1111 err = 0;
1112 dst_release(&grt->u.dst);
1113
1114 if (err)
1115 goto out;
1116 }
1117 err = -EINVAL;
1118 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1119 goto out;
1120 }
1121
1122 err = -ENODEV;
1123 if (dev == NULL)
1124 goto out;
1125
1126 if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) {
1127 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1128 if (IS_ERR(rt->rt6i_nexthop)) {
1129 err = PTR_ERR(rt->rt6i_nexthop);
1130 rt->rt6i_nexthop = NULL;
1131 goto out;
1132 }
1133 }
1134
1135 rt->rt6i_flags = rtmsg->rtmsg_flags;
1136
1137install_route:
1138 if (rta && rta[RTA_METRICS-1]) {
1139 int attrlen = RTA_PAYLOAD(rta[RTA_METRICS-1]);
1140 struct rtattr *attr = RTA_DATA(rta[RTA_METRICS-1]);
1141
1142 while (RTA_OK(attr, attrlen)) {
1143 unsigned flavor = attr->rta_type;
1144 if (flavor) {
1145 if (flavor > RTAX_MAX) {
1146 err = -EINVAL;
1147 goto out;
1148 }
1149 rt->u.dst.metrics[flavor-1] =
1150 *(u32 *)RTA_DATA(attr);
1151 }
1152 attr = RTA_NEXT(attr, attrlen);
1153 }
1154 }
1155
1156 if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
1157 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1158 if (!rt->u.dst.metrics[RTAX_MTU-1])
1159 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1160 if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
1161 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1162 rt->u.dst.dev = dev;
1163 rt->rt6i_idev = idev;
Thomas Grafc71099a2006-08-04 23:20:06 -07001164 rt->rt6i_table = table;
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001165 return ip6_ins_rt(rt, nlh, _rtattr, req);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001166
1167out:
1168 if (dev)
1169 dev_put(dev);
1170 if (idev)
1171 in6_dev_put(idev);
1172 if (rt)
1173 dst_free((struct dst_entry *) rt);
1174 return err;
1175}
1176
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001177int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001178{
1179 int err;
Thomas Grafc71099a2006-08-04 23:20:06 -07001180 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001181
Thomas Grafc71099a2006-08-04 23:20:06 -07001182 table = rt->rt6i_table;
1183 write_lock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001184
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001185 err = fib6_del(rt, nlh, _rtattr, req);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001186 dst_release(&rt->u.dst);
1187
Thomas Grafc71099a2006-08-04 23:20:06 -07001188 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001189
1190 return err;
1191}
1192
Thomas Grafc71099a2006-08-04 23:20:06 -07001193static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
1194 void *_rtattr, struct netlink_skb_parms *req,
1195 u32 table_id)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001196{
Thomas Grafc71099a2006-08-04 23:20:06 -07001197 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001198 struct fib6_node *fn;
1199 struct rt6_info *rt;
1200 int err = -ESRCH;
1201
Thomas Grafc71099a2006-08-04 23:20:06 -07001202 table = fib6_get_table(table_id);
1203 if (table == NULL)
1204 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001205
Thomas Grafc71099a2006-08-04 23:20:06 -07001206 read_lock_bh(&table->tb6_lock);
1207
1208 fn = fib6_locate(&table->tb6_root,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001209 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len,
1210 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
1211
1212 if (fn) {
1213 for (rt = fn->leaf; rt; rt = rt->u.next) {
1214 if (rtmsg->rtmsg_ifindex &&
1215 (rt->rt6i_dev == NULL ||
1216 rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex))
1217 continue;
1218 if (rtmsg->rtmsg_flags&RTF_GATEWAY &&
1219 !ipv6_addr_equal(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway))
1220 continue;
1221 if (rtmsg->rtmsg_metric &&
1222 rtmsg->rtmsg_metric != rt->rt6i_metric)
1223 continue;
1224 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001225 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001226
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001227 return ip6_del_rt(rt, nlh, _rtattr, req);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001228 }
1229 }
Thomas Grafc71099a2006-08-04 23:20:06 -07001230 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001231
1232 return err;
1233}
1234
1235/*
1236 * Handle redirects
1237 */
1238void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
1239 struct neighbour *neigh, u8 *lladdr, int on_link)
1240{
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001241 struct rt6_info *rt, *nrt = NULL;
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001242 struct fib6_node *fn;
Thomas Grafc71099a2006-08-04 23:20:06 -07001243 struct fib6_table *table;
Tom Tucker8d717402006-07-30 20:43:36 -07001244 struct netevent_redirect netevent;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001245
Thomas Grafc71099a2006-08-04 23:20:06 -07001246 /* TODO: Very lazy, might need to check all tables */
1247 table = fib6_get_table(RT6_TABLE_MAIN);
1248 if (table == NULL)
1249 return;
1250
Linus Torvalds1da177e2005-04-16 15:20:36 -07001251 /*
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001252 * Get the "current" route for this destination and
1253 * check if the redirect has come from approriate router.
1254 *
1255 * RFC 2461 specifies that redirects should only be
1256 * accepted if they come from the nexthop to the target.
1257 * Due to the way the routes are chosen, this notion
1258 * is a bit fuzzy and one might need to check all possible
1259 * routes.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001260 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001261
Thomas Grafc71099a2006-08-04 23:20:06 -07001262 read_lock_bh(&table->tb6_lock);
1263 fn = fib6_lookup(&table->tb6_root, dest, NULL);
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001264restart:
1265 for (rt = fn->leaf; rt; rt = rt->u.next) {
1266 /*
1267 * Current route is on-link; redirect is always invalid.
1268 *
1269 * Seems, previous statement is not true. It could
1270 * be node, which looks for us as on-link (f.e. proxy ndisc)
1271 * But then router serving it might decide, that we should
1272 * know truth 8)8) --ANK (980726).
1273 */
1274 if (rt6_check_expired(rt))
1275 continue;
1276 if (!(rt->rt6i_flags & RTF_GATEWAY))
1277 continue;
1278 if (neigh->dev != rt->rt6i_dev)
1279 continue;
1280 if (!ipv6_addr_equal(saddr, &rt->rt6i_gateway))
1281 continue;
1282 break;
1283 }
1284 if (rt)
1285 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001286 else if (rt6_need_strict(dest)) {
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001287 while ((fn = fn->parent) != NULL) {
1288 if (fn->fn_flags & RTN_ROOT)
1289 break;
1290 if (fn->fn_flags & RTN_RTINFO)
1291 goto restart;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001292 }
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001293 }
Thomas Grafc71099a2006-08-04 23:20:06 -07001294 read_unlock_bh(&table->tb6_lock);
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001295
1296 if (!rt) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001297 if (net_ratelimit())
1298 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1299 "for redirect target\n");
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001300 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001301 }
1302
Linus Torvalds1da177e2005-04-16 15:20:36 -07001303 /*
1304 * We have finally decided to accept it.
1305 */
1306
1307 neigh_update(neigh, lladdr, NUD_STALE,
1308 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1309 NEIGH_UPDATE_F_OVERRIDE|
1310 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1311 NEIGH_UPDATE_F_ISROUTER))
1312 );
1313
1314 /*
1315 * Redirect received -> path was valid.
1316 * Look, redirects are sent only in response to data packets,
1317 * so that this nexthop apparently is reachable. --ANK
1318 */
1319 dst_confirm(&rt->u.dst);
1320
1321 /* Duplicate redirect: silently ignore. */
1322 if (neigh == rt->u.dst.neighbour)
1323 goto out;
1324
1325 nrt = ip6_rt_copy(rt);
1326 if (nrt == NULL)
1327 goto out;
1328
1329 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1330 if (on_link)
1331 nrt->rt6i_flags &= ~RTF_GATEWAY;
1332
1333 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1334 nrt->rt6i_dst.plen = 128;
1335 nrt->u.dst.flags |= DST_HOST;
1336
1337 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1338 nrt->rt6i_nexthop = neigh_clone(neigh);
1339 /* Reset pmtu, it may be better */
1340 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1341 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1342
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001343 if (ip6_ins_rt(nrt, NULL, NULL, NULL))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001344 goto out;
1345
Tom Tucker8d717402006-07-30 20:43:36 -07001346 netevent.old = &rt->u.dst;
1347 netevent.new = &nrt->u.dst;
1348 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1349
Linus Torvalds1da177e2005-04-16 15:20:36 -07001350 if (rt->rt6i_flags&RTF_CACHE) {
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001351 ip6_del_rt(rt, NULL, NULL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001352 return;
1353 }
1354
1355out:
1356 dst_release(&rt->u.dst);
1357 return;
1358}
1359
1360/*
1361 * Handle ICMP "packet too big" messages
1362 * i.e. Path MTU discovery
1363 */
1364
1365void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1366 struct net_device *dev, u32 pmtu)
1367{
1368 struct rt6_info *rt, *nrt;
1369 int allfrag = 0;
1370
1371 rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1372 if (rt == NULL)
1373 return;
1374
1375 if (pmtu >= dst_mtu(&rt->u.dst))
1376 goto out;
1377
1378 if (pmtu < IPV6_MIN_MTU) {
1379 /*
1380 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1381 * MTU (1280) and a fragment header should always be included
1382 * after a node receiving Too Big message reporting PMTU is
1383 * less than the IPv6 Minimum Link MTU.
1384 */
1385 pmtu = IPV6_MIN_MTU;
1386 allfrag = 1;
1387 }
1388
1389 /* New mtu received -> path was valid.
1390 They are sent only in response to data packets,
1391 so that this nexthop apparently is reachable. --ANK
1392 */
1393 dst_confirm(&rt->u.dst);
1394
1395 /* Host route. If it is static, it would be better
1396 not to override it, but add new one, so that
1397 when cache entry will expire old pmtu
1398 would return automatically.
1399 */
1400 if (rt->rt6i_flags & RTF_CACHE) {
1401 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1402 if (allfrag)
1403 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1404 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1405 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1406 goto out;
1407 }
1408
1409 /* Network route.
1410 Two cases are possible:
1411 1. It is connected route. Action: COW
1412 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1413 */
YOSHIFUJI Hideakid5315b52006-03-20 16:58:48 -08001414 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001415 nrt = rt6_alloc_cow(rt, daddr, saddr);
YOSHIFUJI Hideakid5315b52006-03-20 16:58:48 -08001416 else
1417 nrt = rt6_alloc_clone(rt, daddr);
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001418
YOSHIFUJI Hideakid5315b52006-03-20 16:58:48 -08001419 if (nrt) {
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001420 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1421 if (allfrag)
1422 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1423
1424 /* According to RFC 1981, detecting PMTU increase shouldn't be
1425 * happened within 5 mins, the recommended timer is 10 mins.
1426 * Here this route expiration time is set to ip6_rt_mtu_expires
1427 * which is 10 mins. After 10 mins the decreased pmtu is expired
1428 * and detecting PMTU increase will be automatically happened.
1429 */
1430 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1431 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1432
1433 ip6_ins_rt(nrt, NULL, NULL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001434 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001435out:
1436 dst_release(&rt->u.dst);
1437}
1438
1439/*
1440 * Misc support functions
1441 */
1442
1443static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1444{
1445 struct rt6_info *rt = ip6_dst_alloc();
1446
1447 if (rt) {
1448 rt->u.dst.input = ort->u.dst.input;
1449 rt->u.dst.output = ort->u.dst.output;
1450
1451 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1452 rt->u.dst.dev = ort->u.dst.dev;
1453 if (rt->u.dst.dev)
1454 dev_hold(rt->u.dst.dev);
1455 rt->rt6i_idev = ort->rt6i_idev;
1456 if (rt->rt6i_idev)
1457 in6_dev_hold(rt->rt6i_idev);
1458 rt->u.dst.lastuse = jiffies;
1459 rt->rt6i_expires = 0;
1460
1461 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1462 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1463 rt->rt6i_metric = 0;
1464
1465 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1466#ifdef CONFIG_IPV6_SUBTREES
1467 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1468#endif
Thomas Grafc71099a2006-08-04 23:20:06 -07001469 rt->rt6i_table = ort->rt6i_table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001470 }
1471 return rt;
1472}
1473
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001474#ifdef CONFIG_IPV6_ROUTE_INFO
1475static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
1476 struct in6_addr *gwaddr, int ifindex)
1477{
1478 struct fib6_node *fn;
1479 struct rt6_info *rt = NULL;
Thomas Grafc71099a2006-08-04 23:20:06 -07001480 struct fib6_table *table;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001481
Thomas Grafc71099a2006-08-04 23:20:06 -07001482 table = fib6_get_table(RT6_TABLE_INFO);
1483 if (table == NULL)
1484 return NULL;
1485
1486 write_lock_bh(&table->tb6_lock);
1487 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001488 if (!fn)
1489 goto out;
1490
1491 for (rt = fn->leaf; rt; rt = rt->u.next) {
1492 if (rt->rt6i_dev->ifindex != ifindex)
1493 continue;
1494 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1495 continue;
1496 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1497 continue;
1498 dst_hold(&rt->u.dst);
1499 break;
1500 }
1501out:
Thomas Grafc71099a2006-08-04 23:20:06 -07001502 write_unlock_bh(&table->tb6_lock);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001503 return rt;
1504}
1505
1506static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
1507 struct in6_addr *gwaddr, int ifindex,
1508 unsigned pref)
1509{
1510 struct in6_rtmsg rtmsg;
1511
1512 memset(&rtmsg, 0, sizeof(rtmsg));
1513 rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1514 ipv6_addr_copy(&rtmsg.rtmsg_dst, prefix);
1515 rtmsg.rtmsg_dst_len = prefixlen;
1516 ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1517 rtmsg.rtmsg_metric = 1024;
1518 rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO | RTF_UP | RTF_PREF(pref);
YOSHIFUJI Hideakie317da92006-03-20 17:06:42 -08001519 /* We should treat it as a default route if prefix length is 0. */
1520 if (!prefixlen)
1521 rtmsg.rtmsg_flags |= RTF_DEFAULT;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001522 rtmsg.rtmsg_ifindex = ifindex;
1523
Thomas Grafc71099a2006-08-04 23:20:06 -07001524 ip6_route_add(&rtmsg, NULL, NULL, NULL, RT6_TABLE_INFO);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001525
1526 return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex);
1527}
1528#endif
1529
Linus Torvalds1da177e2005-04-16 15:20:36 -07001530struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1531{
1532 struct rt6_info *rt;
Thomas Grafc71099a2006-08-04 23:20:06 -07001533 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001534
Thomas Grafc71099a2006-08-04 23:20:06 -07001535 table = fib6_get_table(RT6_TABLE_DFLT);
1536 if (table == NULL)
1537 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001538
Thomas Grafc71099a2006-08-04 23:20:06 -07001539 write_lock_bh(&table->tb6_lock);
1540 for (rt = table->tb6_root.leaf; rt; rt=rt->u.next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001541 if (dev == rt->rt6i_dev &&
YOSHIFUJI Hideaki045927f2006-03-20 17:00:48 -08001542 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001543 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1544 break;
1545 }
1546 if (rt)
1547 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001548 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001549 return rt;
1550}
1551
1552struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -08001553 struct net_device *dev,
1554 unsigned int pref)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001555{
1556 struct in6_rtmsg rtmsg;
1557
1558 memset(&rtmsg, 0, sizeof(struct in6_rtmsg));
1559 rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1560 ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1561 rtmsg.rtmsg_metric = 1024;
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -08001562 rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES |
1563 RTF_PREF(pref);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001564
1565 rtmsg.rtmsg_ifindex = dev->ifindex;
1566
Thomas Grafc71099a2006-08-04 23:20:06 -07001567 ip6_route_add(&rtmsg, NULL, NULL, NULL, RT6_TABLE_DFLT);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001568 return rt6_get_dflt_router(gwaddr, dev);
1569}
1570
1571void rt6_purge_dflt_routers(void)
1572{
1573 struct rt6_info *rt;
Thomas Grafc71099a2006-08-04 23:20:06 -07001574 struct fib6_table *table;
1575
1576 /* NOTE: Keep consistent with rt6_get_dflt_router */
1577 table = fib6_get_table(RT6_TABLE_DFLT);
1578 if (table == NULL)
1579 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001580
1581restart:
Thomas Grafc71099a2006-08-04 23:20:06 -07001582 read_lock_bh(&table->tb6_lock);
1583 for (rt = table->tb6_root.leaf; rt; rt = rt->u.next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001584 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1585 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001586 read_unlock_bh(&table->tb6_lock);
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001587 ip6_del_rt(rt, NULL, NULL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001588 goto restart;
1589 }
1590 }
Thomas Grafc71099a2006-08-04 23:20:06 -07001591 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001592}
1593
1594int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1595{
1596 struct in6_rtmsg rtmsg;
1597 int err;
1598
1599 switch(cmd) {
1600 case SIOCADDRT: /* Add a route */
1601 case SIOCDELRT: /* Delete a route */
1602 if (!capable(CAP_NET_ADMIN))
1603 return -EPERM;
1604 err = copy_from_user(&rtmsg, arg,
1605 sizeof(struct in6_rtmsg));
1606 if (err)
1607 return -EFAULT;
1608
1609 rtnl_lock();
1610 switch (cmd) {
1611 case SIOCADDRT:
Thomas Grafc71099a2006-08-04 23:20:06 -07001612 err = ip6_route_add(&rtmsg, NULL, NULL, NULL,
1613 RT6_TABLE_MAIN);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001614 break;
1615 case SIOCDELRT:
Thomas Grafc71099a2006-08-04 23:20:06 -07001616 err = ip6_route_del(&rtmsg, NULL, NULL, NULL,
1617 RT6_TABLE_MAIN);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001618 break;
1619 default:
1620 err = -EINVAL;
1621 }
1622 rtnl_unlock();
1623
1624 return err;
1625 };
1626
1627 return -EINVAL;
1628}
1629
1630/*
1631 * Drop the packet on the floor
1632 */
1633
Arnaldo Carvalho de Melo20380732005-08-16 02:18:02 -03001634static int ip6_pkt_discard(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001635{
Lv Liangying76d0cc12006-08-29 00:00:47 -07001636 int type = ipv6_addr_type(&skb->nh.ipv6h->daddr);
1637 if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED)
1638 IP6_INC_STATS(IPSTATS_MIB_INADDRERRORS);
1639
Linus Torvalds1da177e2005-04-16 15:20:36 -07001640 IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
1641 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
1642 kfree_skb(skb);
1643 return 0;
1644}
1645
Arnaldo Carvalho de Melo20380732005-08-16 02:18:02 -03001646static int ip6_pkt_discard_out(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001647{
1648 skb->dev = skb->dst->dev;
1649 return ip6_pkt_discard(skb);
1650}
1651
1652/*
1653 * Allocate a dst for local (unicast / anycast) address.
1654 */
1655
1656struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1657 const struct in6_addr *addr,
1658 int anycast)
1659{
1660 struct rt6_info *rt = ip6_dst_alloc();
1661
1662 if (rt == NULL)
1663 return ERR_PTR(-ENOMEM);
1664
1665 dev_hold(&loopback_dev);
1666 in6_dev_hold(idev);
1667
1668 rt->u.dst.flags = DST_HOST;
1669 rt->u.dst.input = ip6_input;
1670 rt->u.dst.output = ip6_output;
1671 rt->rt6i_dev = &loopback_dev;
1672 rt->rt6i_idev = idev;
1673 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1674 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1675 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1676 rt->u.dst.obsolete = -1;
1677
1678 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +09001679 if (anycast)
1680 rt->rt6i_flags |= RTF_ANYCAST;
1681 else
Linus Torvalds1da177e2005-04-16 15:20:36 -07001682 rt->rt6i_flags |= RTF_LOCAL;
1683 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1684 if (rt->rt6i_nexthop == NULL) {
1685 dst_free((struct dst_entry *) rt);
1686 return ERR_PTR(-ENOMEM);
1687 }
1688
1689 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1690 rt->rt6i_dst.plen = 128;
Thomas Grafc71099a2006-08-04 23:20:06 -07001691 rt->rt6i_table = fib6_get_table(RT6_TABLE_LOCAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001692
1693 atomic_set(&rt->u.dst.__refcnt, 1);
1694
1695 return rt;
1696}
1697
1698static int fib6_ifdown(struct rt6_info *rt, void *arg)
1699{
1700 if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1701 rt != &ip6_null_entry) {
1702 RT6_TRACE("deleted by ifdown %p\n", rt);
1703 return -1;
1704 }
1705 return 0;
1706}
1707
1708void rt6_ifdown(struct net_device *dev)
1709{
Thomas Grafc71099a2006-08-04 23:20:06 -07001710 fib6_clean_all(fib6_ifdown, 0, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001711}
1712
1713struct rt6_mtu_change_arg
1714{
1715 struct net_device *dev;
1716 unsigned mtu;
1717};
1718
1719static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1720{
1721 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1722 struct inet6_dev *idev;
1723
1724 /* In IPv6 pmtu discovery is not optional,
1725 so that RTAX_MTU lock cannot disable it.
1726 We still use this lock to block changes
1727 caused by addrconf/ndisc.
1728 */
1729
1730 idev = __in6_dev_get(arg->dev);
1731 if (idev == NULL)
1732 return 0;
1733
1734 /* For administrative MTU increase, there is no way to discover
1735 IPv6 PMTU increase, so PMTU increase should be updated here.
1736 Since RFC 1981 doesn't include administrative MTU increase
1737 update PMTU increase is a MUST. (i.e. jumbo frame)
1738 */
1739 /*
1740 If new MTU is less than route PMTU, this new MTU will be the
1741 lowest MTU in the path, update the route PMTU to reflect PMTU
1742 decreases; if new MTU is greater than route PMTU, and the
1743 old MTU is the lowest MTU in the path, update the route PMTU
1744 to reflect the increase. In this case if the other nodes' MTU
1745 also have the lowest MTU, TOO BIG MESSAGE will be lead to
1746 PMTU discouvery.
1747 */
1748 if (rt->rt6i_dev == arg->dev &&
1749 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1750 (dst_mtu(&rt->u.dst) > arg->mtu ||
1751 (dst_mtu(&rt->u.dst) < arg->mtu &&
1752 dst_mtu(&rt->u.dst) == idev->cnf.mtu6)))
1753 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1754 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1755 return 0;
1756}
1757
1758void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1759{
Thomas Grafc71099a2006-08-04 23:20:06 -07001760 struct rt6_mtu_change_arg arg = {
1761 .dev = dev,
1762 .mtu = mtu,
1763 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07001764
Thomas Grafc71099a2006-08-04 23:20:06 -07001765 fib6_clean_all(rt6_mtu_change_route, 0, &arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001766}
1767
1768static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta,
1769 struct in6_rtmsg *rtmsg)
1770{
1771 memset(rtmsg, 0, sizeof(*rtmsg));
1772
1773 rtmsg->rtmsg_dst_len = r->rtm_dst_len;
1774 rtmsg->rtmsg_src_len = r->rtm_src_len;
1775 rtmsg->rtmsg_flags = RTF_UP;
1776 if (r->rtm_type == RTN_UNREACHABLE)
1777 rtmsg->rtmsg_flags |= RTF_REJECT;
1778
1779 if (rta[RTA_GATEWAY-1]) {
1780 if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16))
1781 return -EINVAL;
1782 memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16);
1783 rtmsg->rtmsg_flags |= RTF_GATEWAY;
1784 }
1785 if (rta[RTA_DST-1]) {
1786 if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3))
1787 return -EINVAL;
1788 memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3));
1789 }
1790 if (rta[RTA_SRC-1]) {
1791 if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3))
1792 return -EINVAL;
1793 memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3));
1794 }
1795 if (rta[RTA_OIF-1]) {
1796 if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int)))
1797 return -EINVAL;
1798 memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1799 }
1800 if (rta[RTA_PRIORITY-1]) {
1801 if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4))
1802 return -EINVAL;
1803 memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
1804 }
1805 return 0;
1806}
1807
1808int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1809{
1810 struct rtmsg *r = NLMSG_DATA(nlh);
1811 struct in6_rtmsg rtmsg;
1812
1813 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1814 return -EINVAL;
Thomas Grafc71099a2006-08-04 23:20:06 -07001815 return ip6_route_del(&rtmsg, nlh, arg, &NETLINK_CB(skb), r->rtm_table);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001816}
1817
1818int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1819{
1820 struct rtmsg *r = NLMSG_DATA(nlh);
1821 struct in6_rtmsg rtmsg;
1822
1823 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1824 return -EINVAL;
Thomas Grafc71099a2006-08-04 23:20:06 -07001825 return ip6_route_add(&rtmsg, nlh, arg, &NETLINK_CB(skb), r->rtm_table);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001826}
1827
1828struct rt6_rtnl_dump_arg
1829{
1830 struct sk_buff *skb;
1831 struct netlink_callback *cb;
1832};
1833
1834static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001835 struct in6_addr *dst, struct in6_addr *src,
1836 int iif, int type, u32 pid, u32 seq,
1837 int prefix, unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001838{
1839 struct rtmsg *rtm;
1840 struct nlmsghdr *nlh;
1841 unsigned char *b = skb->tail;
1842 struct rta_cacheinfo ci;
1843
1844 if (prefix) { /* user wants prefix routes only */
1845 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
1846 /* success since this is not a prefix route */
1847 return 1;
1848 }
1849 }
1850
Jamal Hadi Salimb6544c02005-06-18 22:54:12 -07001851 nlh = NLMSG_NEW(skb, pid, seq, type, sizeof(*rtm), flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001852 rtm = NLMSG_DATA(nlh);
1853 rtm->rtm_family = AF_INET6;
1854 rtm->rtm_dst_len = rt->rt6i_dst.plen;
1855 rtm->rtm_src_len = rt->rt6i_src.plen;
1856 rtm->rtm_tos = 0;
Thomas Grafc71099a2006-08-04 23:20:06 -07001857 if (rt->rt6i_table)
1858 rtm->rtm_table = rt->rt6i_table->tb6_id;
1859 else
1860 rtm->rtm_table = RT6_TABLE_UNSPEC;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001861 rtm->rtm_table = RT_TABLE_MAIN;
1862 if (rt->rt6i_flags&RTF_REJECT)
1863 rtm->rtm_type = RTN_UNREACHABLE;
1864 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
1865 rtm->rtm_type = RTN_LOCAL;
1866 else
1867 rtm->rtm_type = RTN_UNICAST;
1868 rtm->rtm_flags = 0;
1869 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1870 rtm->rtm_protocol = rt->rt6i_protocol;
1871 if (rt->rt6i_flags&RTF_DYNAMIC)
1872 rtm->rtm_protocol = RTPROT_REDIRECT;
1873 else if (rt->rt6i_flags & RTF_ADDRCONF)
1874 rtm->rtm_protocol = RTPROT_KERNEL;
1875 else if (rt->rt6i_flags&RTF_DEFAULT)
1876 rtm->rtm_protocol = RTPROT_RA;
1877
1878 if (rt->rt6i_flags&RTF_CACHE)
1879 rtm->rtm_flags |= RTM_F_CLONED;
1880
1881 if (dst) {
1882 RTA_PUT(skb, RTA_DST, 16, dst);
1883 rtm->rtm_dst_len = 128;
1884 } else if (rtm->rtm_dst_len)
1885 RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1886#ifdef CONFIG_IPV6_SUBTREES
1887 if (src) {
1888 RTA_PUT(skb, RTA_SRC, 16, src);
1889 rtm->rtm_src_len = 128;
1890 } else if (rtm->rtm_src_len)
1891 RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1892#endif
1893 if (iif)
1894 RTA_PUT(skb, RTA_IIF, 4, &iif);
1895 else if (dst) {
1896 struct in6_addr saddr_buf;
1897 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
1898 RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1899 }
1900 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
1901 goto rtattr_failure;
1902 if (rt->u.dst.neighbour)
1903 RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
1904 if (rt->u.dst.dev)
1905 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex);
1906 RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric);
1907 ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
1908 if (rt->rt6i_expires)
1909 ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
1910 else
1911 ci.rta_expires = 0;
1912 ci.rta_used = rt->u.dst.__use;
1913 ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
1914 ci.rta_error = rt->u.dst.error;
1915 ci.rta_id = 0;
1916 ci.rta_ts = 0;
1917 ci.rta_tsage = 0;
1918 RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
1919 nlh->nlmsg_len = skb->tail - b;
1920 return skb->len;
1921
1922nlmsg_failure:
1923rtattr_failure:
1924 skb_trim(skb, b - skb->data);
1925 return -1;
1926}
1927
1928static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1929{
1930 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1931 int prefix;
1932
1933 if (arg->cb->nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(struct rtmsg))) {
1934 struct rtmsg *rtm = NLMSG_DATA(arg->cb->nlh);
1935 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
1936 } else
1937 prefix = 0;
1938
1939 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1940 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001941 prefix, NLM_F_MULTI);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001942}
1943
1944static int fib6_dump_node(struct fib6_walker_t *w)
1945{
1946 int res;
1947 struct rt6_info *rt;
1948
1949 for (rt = w->leaf; rt; rt = rt->u.next) {
1950 res = rt6_dump_route(rt, w->args);
1951 if (res < 0) {
1952 /* Frame is full, suspend walking */
1953 w->leaf = rt;
1954 return 1;
1955 }
1956 BUG_TRAP(res!=0);
1957 }
1958 w->leaf = NULL;
1959 return 0;
1960}
1961
1962static void fib6_dump_end(struct netlink_callback *cb)
1963{
1964 struct fib6_walker_t *w = (void*)cb->args[0];
1965
1966 if (w) {
1967 cb->args[0] = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001968 kfree(w);
1969 }
Herbert Xuefacfbc2005-11-12 12:12:05 -08001970 cb->done = (void*)cb->args[1];
1971 cb->args[1] = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001972}
1973
1974static int fib6_dump_done(struct netlink_callback *cb)
1975{
1976 fib6_dump_end(cb);
Thomas Grafa8f74b22005-11-10 02:25:52 +01001977 return cb->done ? cb->done(cb) : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001978}
1979
1980int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1981{
Thomas Grafc71099a2006-08-04 23:20:06 -07001982 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001983 struct rt6_rtnl_dump_arg arg;
1984 struct fib6_walker_t *w;
Thomas Grafc71099a2006-08-04 23:20:06 -07001985 int i, res = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001986
1987 arg.skb = skb;
1988 arg.cb = cb;
1989
Thomas Grafc71099a2006-08-04 23:20:06 -07001990 /*
1991 * cb->args[0] = pointer to walker structure
1992 * cb->args[1] = saved cb->done() pointer
1993 * cb->args[2] = current table being dumped
1994 */
1995
Linus Torvalds1da177e2005-04-16 15:20:36 -07001996 w = (void*)cb->args[0];
1997 if (w == NULL) {
1998 /* New dump:
1999 *
2000 * 1. hook callback destructor.
2001 */
2002 cb->args[1] = (long)cb->done;
2003 cb->done = fib6_dump_done;
2004
2005 /*
2006 * 2. allocate and initialize walker.
2007 */
Ingo Oeser0c600ed2006-03-20 23:01:32 -08002008 w = kzalloc(sizeof(*w), GFP_ATOMIC);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002009 if (w == NULL)
2010 return -ENOMEM;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002011 w->func = fib6_dump_node;
2012 w->args = &arg;
2013 cb->args[0] = (long)w;
Thomas Grafc71099a2006-08-04 23:20:06 -07002014 cb->args[2] = FIB6_TABLE_MIN;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002015 } else {
2016 w->args = &arg;
Thomas Grafc71099a2006-08-04 23:20:06 -07002017 i = cb->args[2];
2018 if (i > FIB6_TABLE_MAX)
2019 goto end;
2020
2021 table = fib6_get_table(i);
2022 if (table != NULL) {
2023 read_lock_bh(&table->tb6_lock);
2024 w->root = &table->tb6_root;
2025 res = fib6_walk_continue(w);
2026 read_unlock_bh(&table->tb6_lock);
2027 if (res != 0) {
2028 if (res < 0)
2029 fib6_walker_unlink(w);
2030 goto end;
2031 }
2032 }
2033
2034 fib6_walker_unlink(w);
2035 cb->args[2] = ++i;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002036 }
Thomas Grafc71099a2006-08-04 23:20:06 -07002037
2038 for (i = cb->args[2]; i <= FIB6_TABLE_MAX; i++) {
2039 table = fib6_get_table(i);
2040 if (table == NULL)
2041 continue;
2042
2043 read_lock_bh(&table->tb6_lock);
2044 w->root = &table->tb6_root;
2045 res = fib6_walk(w);
2046 read_unlock_bh(&table->tb6_lock);
2047 if (res)
2048 break;
2049 }
2050end:
2051 cb->args[2] = i;
2052
Linus Torvalds1da177e2005-04-16 15:20:36 -07002053 res = res < 0 ? res : skb->len;
2054 /* res < 0 is an error. (really, impossible)
2055 res == 0 means that dump is complete, but skb still can contain data.
2056 res > 0 dump is not complete, but frame is full.
2057 */
2058 /* Destroy walker, if dump of this table is complete. */
2059 if (res <= 0)
2060 fib6_dump_end(cb);
2061 return res;
2062}
2063
2064int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2065{
2066 struct rtattr **rta = arg;
2067 int iif = 0;
2068 int err = -ENOBUFS;
2069 struct sk_buff *skb;
2070 struct flowi fl;
2071 struct rt6_info *rt;
2072
2073 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2074 if (skb == NULL)
2075 goto out;
2076
2077 /* Reserve room for dummy headers, this skb can pass
2078 through good chunk of routing engine.
2079 */
2080 skb->mac.raw = skb->data;
2081 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2082
2083 memset(&fl, 0, sizeof(fl));
2084 if (rta[RTA_SRC-1])
2085 ipv6_addr_copy(&fl.fl6_src,
2086 (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]));
2087 if (rta[RTA_DST-1])
2088 ipv6_addr_copy(&fl.fl6_dst,
2089 (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]));
2090
2091 if (rta[RTA_IIF-1])
2092 memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
2093
2094 if (iif) {
2095 struct net_device *dev;
2096 dev = __dev_get_by_index(iif);
2097 if (!dev) {
2098 err = -ENODEV;
2099 goto out_free;
2100 }
2101 }
2102
2103 fl.oif = 0;
2104 if (rta[RTA_OIF-1])
2105 memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
2106
2107 rt = (struct rt6_info*)ip6_route_output(NULL, &fl);
2108
2109 skb->dst = &rt->u.dst;
2110
2111 NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
2112 err = rt6_fill_node(skb, rt,
2113 &fl.fl6_dst, &fl.fl6_src,
2114 iif,
2115 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07002116 nlh->nlmsg_seq, 0, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002117 if (err < 0) {
2118 err = -EMSGSIZE;
2119 goto out_free;
2120 }
2121
2122 err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
2123 if (err > 0)
2124 err = 0;
2125out:
2126 return err;
2127out_free:
2128 kfree_skb(skb);
2129 goto out;
2130}
2131
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07002132void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh,
2133 struct netlink_skb_parms *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002134{
2135 struct sk_buff *skb;
2136 int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07002137 u32 pid = current->pid;
2138 u32 seq = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002139
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07002140 if (req)
2141 pid = req->pid;
2142 if (nlh)
2143 seq = nlh->nlmsg_seq;
2144
Linus Torvalds1da177e2005-04-16 15:20:36 -07002145 skb = alloc_skb(size, gfp_any());
2146 if (!skb) {
Patrick McHardyac6d4392005-08-14 19:29:52 -07002147 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, ENOBUFS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002148 return;
2149 }
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07002150 if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0) < 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002151 kfree_skb(skb);
Patrick McHardyac6d4392005-08-14 19:29:52 -07002152 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, EINVAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002153 return;
2154 }
Patrick McHardyac6d4392005-08-14 19:29:52 -07002155 NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_ROUTE;
2156 netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_ROUTE, gfp_any());
Linus Torvalds1da177e2005-04-16 15:20:36 -07002157}
2158
2159/*
2160 * /proc
2161 */
2162
2163#ifdef CONFIG_PROC_FS
2164
2165#define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2166
2167struct rt6_proc_arg
2168{
2169 char *buffer;
2170 int offset;
2171 int length;
2172 int skip;
2173 int len;
2174};
2175
2176static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2177{
2178 struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
2179 int i;
2180
2181 if (arg->skip < arg->offset / RT6_INFO_LEN) {
2182 arg->skip++;
2183 return 0;
2184 }
2185
2186 if (arg->len >= arg->length)
2187 return 0;
2188
2189 for (i=0; i<16; i++) {
2190 sprintf(arg->buffer + arg->len, "%02x",
2191 rt->rt6i_dst.addr.s6_addr[i]);
2192 arg->len += 2;
2193 }
2194 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
2195 rt->rt6i_dst.plen);
2196
2197#ifdef CONFIG_IPV6_SUBTREES
2198 for (i=0; i<16; i++) {
2199 sprintf(arg->buffer + arg->len, "%02x",
2200 rt->rt6i_src.addr.s6_addr[i]);
2201 arg->len += 2;
2202 }
2203 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
2204 rt->rt6i_src.plen);
2205#else
2206 sprintf(arg->buffer + arg->len,
2207 "00000000000000000000000000000000 00 ");
2208 arg->len += 36;
2209#endif
2210
2211 if (rt->rt6i_nexthop) {
2212 for (i=0; i<16; i++) {
2213 sprintf(arg->buffer + arg->len, "%02x",
2214 rt->rt6i_nexthop->primary_key[i]);
2215 arg->len += 2;
2216 }
2217 } else {
2218 sprintf(arg->buffer + arg->len,
2219 "00000000000000000000000000000000");
2220 arg->len += 32;
2221 }
2222 arg->len += sprintf(arg->buffer + arg->len,
2223 " %08x %08x %08x %08x %8s\n",
2224 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
2225 rt->u.dst.__use, rt->rt6i_flags,
2226 rt->rt6i_dev ? rt->rt6i_dev->name : "");
2227 return 0;
2228}
2229
2230static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
2231{
Thomas Grafc71099a2006-08-04 23:20:06 -07002232 struct rt6_proc_arg arg = {
2233 .buffer = buffer,
2234 .offset = offset,
2235 .length = length,
2236 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07002237
Thomas Grafc71099a2006-08-04 23:20:06 -07002238 fib6_clean_all(rt6_info_route, 0, &arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002239
2240 *start = buffer;
2241 if (offset)
2242 *start += offset % RT6_INFO_LEN;
2243
2244 arg.len -= offset % RT6_INFO_LEN;
2245
2246 if (arg.len > length)
2247 arg.len = length;
2248 if (arg.len < 0)
2249 arg.len = 0;
2250
2251 return arg.len;
2252}
2253
Linus Torvalds1da177e2005-04-16 15:20:36 -07002254static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2255{
2256 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2257 rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
2258 rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
2259 rt6_stats.fib_rt_cache,
2260 atomic_read(&ip6_dst_ops.entries),
2261 rt6_stats.fib_discarded_routes);
2262
2263 return 0;
2264}
2265
2266static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2267{
2268 return single_open(file, rt6_stats_seq_show, NULL);
2269}
2270
2271static struct file_operations rt6_stats_seq_fops = {
2272 .owner = THIS_MODULE,
2273 .open = rt6_stats_seq_open,
2274 .read = seq_read,
2275 .llseek = seq_lseek,
2276 .release = single_release,
2277};
2278#endif /* CONFIG_PROC_FS */
2279
2280#ifdef CONFIG_SYSCTL
2281
2282static int flush_delay;
2283
2284static
2285int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2286 void __user *buffer, size_t *lenp, loff_t *ppos)
2287{
2288 if (write) {
2289 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2290 fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
2291 return 0;
2292 } else
2293 return -EINVAL;
2294}
2295
2296ctl_table ipv6_route_table[] = {
2297 {
2298 .ctl_name = NET_IPV6_ROUTE_FLUSH,
2299 .procname = "flush",
2300 .data = &flush_delay,
2301 .maxlen = sizeof(int),
Dave Jones89c8b3a12005-04-28 12:11:49 -07002302 .mode = 0200,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002303 .proc_handler = &ipv6_sysctl_rtcache_flush
2304 },
2305 {
2306 .ctl_name = NET_IPV6_ROUTE_GC_THRESH,
2307 .procname = "gc_thresh",
2308 .data = &ip6_dst_ops.gc_thresh,
2309 .maxlen = sizeof(int),
2310 .mode = 0644,
2311 .proc_handler = &proc_dointvec,
2312 },
2313 {
2314 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE,
2315 .procname = "max_size",
2316 .data = &ip6_rt_max_size,
2317 .maxlen = sizeof(int),
2318 .mode = 0644,
2319 .proc_handler = &proc_dointvec,
2320 },
2321 {
2322 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2323 .procname = "gc_min_interval",
2324 .data = &ip6_rt_gc_min_interval,
2325 .maxlen = sizeof(int),
2326 .mode = 0644,
2327 .proc_handler = &proc_dointvec_jiffies,
2328 .strategy = &sysctl_jiffies,
2329 },
2330 {
2331 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT,
2332 .procname = "gc_timeout",
2333 .data = &ip6_rt_gc_timeout,
2334 .maxlen = sizeof(int),
2335 .mode = 0644,
2336 .proc_handler = &proc_dointvec_jiffies,
2337 .strategy = &sysctl_jiffies,
2338 },
2339 {
2340 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL,
2341 .procname = "gc_interval",
2342 .data = &ip6_rt_gc_interval,
2343 .maxlen = sizeof(int),
2344 .mode = 0644,
2345 .proc_handler = &proc_dointvec_jiffies,
2346 .strategy = &sysctl_jiffies,
2347 },
2348 {
2349 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY,
2350 .procname = "gc_elasticity",
2351 .data = &ip6_rt_gc_elasticity,
2352 .maxlen = sizeof(int),
2353 .mode = 0644,
2354 .proc_handler = &proc_dointvec_jiffies,
2355 .strategy = &sysctl_jiffies,
2356 },
2357 {
2358 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES,
2359 .procname = "mtu_expires",
2360 .data = &ip6_rt_mtu_expires,
2361 .maxlen = sizeof(int),
2362 .mode = 0644,
2363 .proc_handler = &proc_dointvec_jiffies,
2364 .strategy = &sysctl_jiffies,
2365 },
2366 {
2367 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS,
2368 .procname = "min_adv_mss",
2369 .data = &ip6_rt_min_advmss,
2370 .maxlen = sizeof(int),
2371 .mode = 0644,
2372 .proc_handler = &proc_dointvec_jiffies,
2373 .strategy = &sysctl_jiffies,
2374 },
2375 {
2376 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2377 .procname = "gc_min_interval_ms",
2378 .data = &ip6_rt_gc_min_interval,
2379 .maxlen = sizeof(int),
2380 .mode = 0644,
2381 .proc_handler = &proc_dointvec_ms_jiffies,
2382 .strategy = &sysctl_ms_jiffies,
2383 },
2384 { .ctl_name = 0 }
2385};
2386
2387#endif
2388
2389void __init ip6_route_init(void)
2390{
2391 struct proc_dir_entry *p;
2392
2393 ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
2394 sizeof(struct rt6_info),
2395 0, SLAB_HWCACHE_ALIGN,
2396 NULL, NULL);
2397 if (!ip6_dst_ops.kmem_cachep)
2398 panic("cannot create ip6_dst_cache");
2399
2400 fib6_init();
2401#ifdef CONFIG_PROC_FS
2402 p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2403 if (p)
2404 p->owner = THIS_MODULE;
2405
2406 proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2407#endif
2408#ifdef CONFIG_XFRM
2409 xfrm6_init();
2410#endif
2411}
2412
2413void ip6_route_cleanup(void)
2414{
2415#ifdef CONFIG_PROC_FS
2416 proc_net_remove("ipv6_route");
2417 proc_net_remove("rt6_stats");
2418#endif
2419#ifdef CONFIG_XFRM
2420 xfrm6_fini();
2421#endif
2422 rt6_ifdown(NULL);
2423 fib6_gc_cleanup();
2424 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2425}