blob: fd44721abebbbf0b616228a4ca45054331cfd13a [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09006 * Pedro Roque <roque@di.fc.ul.pt>
Linus Torvalds1da177e2005-04-16 15:20:36 -07007 *
8 * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
16/* Changes:
17 *
18 * YOSHIFUJI Hideaki @USAGI
19 * reworked default router selection.
20 * - respect outgoing interface
21 * - select from (probably) reachable routers (i.e.
22 * routers in REACHABLE, STALE, DELAY or PROBE states).
23 * - always select the same router if it is (probably)
24 * reachable. otherwise, round-robin the list.
YOSHIFUJI Hideakic0bece92006-08-23 17:23:25 -070025 * Ville Nuorvala
26 * Fixed routing subtrees.
Linus Torvalds1da177e2005-04-16 15:20:36 -070027 */
28
Randy Dunlap4fc268d2006-01-11 12:17:47 -080029#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070030#include <linux/errno.h>
31#include <linux/types.h>
32#include <linux/times.h>
33#include <linux/socket.h>
34#include <linux/sockios.h>
35#include <linux/net.h>
36#include <linux/route.h>
37#include <linux/netdevice.h>
38#include <linux/in6.h>
39#include <linux/init.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070040#include <linux/if_arp.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070041#include <linux/proc_fs.h>
42#include <linux/seq_file.h>
Daniel Lezcano5b7c9312008-03-03 23:28:58 -080043#include <linux/nsproxy.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020044#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070045#include <net/snmp.h>
46#include <net/ipv6.h>
47#include <net/ip6_fib.h>
48#include <net/ip6_route.h>
49#include <net/ndisc.h>
50#include <net/addrconf.h>
51#include <net/tcp.h>
52#include <linux/rtnetlink.h>
53#include <net/dst.h>
54#include <net/xfrm.h>
Tom Tucker8d717402006-07-30 20:43:36 -070055#include <net/netevent.h>
Thomas Graf21713eb2006-08-15 00:35:24 -070056#include <net/netlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070057
58#include <asm/uaccess.h>
59
60#ifdef CONFIG_SYSCTL
61#include <linux/sysctl.h>
62#endif
63
64/* Set to 3 to get tracing. */
65#define RT6_DEBUG 2
66
67#if RT6_DEBUG >= 3
68#define RDBG(x) printk x
69#define RT6_TRACE(x...) printk(KERN_DEBUG x)
70#else
71#define RDBG(x)
72#define RT6_TRACE(x...) do { ; } while (0)
73#endif
74
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -080075#define CLONE_OFFLINK_ROUTE 0
Linus Torvalds1da177e2005-04-16 15:20:36 -070076
Linus Torvalds1da177e2005-04-16 15:20:36 -070077static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
78static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
79static struct dst_entry *ip6_negative_advice(struct dst_entry *);
80static void ip6_dst_destroy(struct dst_entry *);
81static void ip6_dst_ifdown(struct dst_entry *,
82 struct net_device *dev, int how);
Daniel Lezcano569d3642008-01-18 03:56:57 -080083static int ip6_dst_gc(struct dst_ops *ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -070084
85static int ip6_pkt_discard(struct sk_buff *skb);
86static int ip6_pkt_discard_out(struct sk_buff *skb);
87static void ip6_link_failure(struct sk_buff *skb);
88static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
89
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -080090#ifdef CONFIG_IPV6_ROUTE_INFO
91static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
92 struct in6_addr *gwaddr, int ifindex,
93 unsigned pref);
94static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
95 struct in6_addr *gwaddr, int ifindex);
96#endif
97
Linus Torvalds1da177e2005-04-16 15:20:36 -070098static struct dst_ops ip6_dst_ops = {
99 .family = AF_INET6,
100 .protocol = __constant_htons(ETH_P_IPV6),
101 .gc = ip6_dst_gc,
102 .gc_thresh = 1024,
103 .check = ip6_dst_check,
104 .destroy = ip6_dst_destroy,
105 .ifdown = ip6_dst_ifdown,
106 .negative_advice = ip6_negative_advice,
107 .link_failure = ip6_link_failure,
108 .update_pmtu = ip6_rt_update_pmtu,
Herbert Xu862b82c2007-11-13 21:43:11 -0800109 .local_out = ip6_local_out,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700110 .entry_size = sizeof(struct rt6_info),
Eric Dumazete2422972008-01-30 20:07:45 -0800111 .entries = ATOMIC_INIT(0),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700112};
113
David S. Miller14e50e52007-05-24 18:17:54 -0700114static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
115{
116}
117
118static struct dst_ops ip6_dst_blackhole_ops = {
119 .family = AF_INET6,
120 .protocol = __constant_htons(ETH_P_IPV6),
121 .destroy = ip6_dst_destroy,
122 .check = ip6_dst_check,
123 .update_pmtu = ip6_rt_blackhole_update_pmtu,
124 .entry_size = sizeof(struct rt6_info),
Eric Dumazete2422972008-01-30 20:07:45 -0800125 .entries = ATOMIC_INIT(0),
David S. Miller14e50e52007-05-24 18:17:54 -0700126};
127
Linus Torvalds1da177e2005-04-16 15:20:36 -0700128struct rt6_info ip6_null_entry = {
129 .u = {
130 .dst = {
131 .__refcnt = ATOMIC_INIT(1),
132 .__use = 1,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700133 .obsolete = -1,
134 .error = -ENETUNREACH,
135 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
136 .input = ip6_pkt_discard,
137 .output = ip6_pkt_discard_out,
138 .ops = &ip6_dst_ops,
139 .path = (struct dst_entry*)&ip6_null_entry,
140 }
141 },
142 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
143 .rt6i_metric = ~(u32) 0,
144 .rt6i_ref = ATOMIC_INIT(1),
145};
146
Thomas Graf101367c2006-08-04 03:39:02 -0700147#ifdef CONFIG_IPV6_MULTIPLE_TABLES
148
David S. Miller6723ab52006-10-18 21:20:57 -0700149static int ip6_pkt_prohibit(struct sk_buff *skb);
150static int ip6_pkt_prohibit_out(struct sk_buff *skb);
David S. Miller6723ab52006-10-18 21:20:57 -0700151
Thomas Graf101367c2006-08-04 03:39:02 -0700152struct rt6_info ip6_prohibit_entry = {
153 .u = {
154 .dst = {
155 .__refcnt = ATOMIC_INIT(1),
156 .__use = 1,
Thomas Graf101367c2006-08-04 03:39:02 -0700157 .obsolete = -1,
158 .error = -EACCES,
159 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
Thomas Graf9ce8ade2006-10-18 20:46:54 -0700160 .input = ip6_pkt_prohibit,
161 .output = ip6_pkt_prohibit_out,
Thomas Graf101367c2006-08-04 03:39:02 -0700162 .ops = &ip6_dst_ops,
163 .path = (struct dst_entry*)&ip6_prohibit_entry,
164 }
165 },
166 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
167 .rt6i_metric = ~(u32) 0,
168 .rt6i_ref = ATOMIC_INIT(1),
169};
170
171struct rt6_info ip6_blk_hole_entry = {
172 .u = {
173 .dst = {
174 .__refcnt = ATOMIC_INIT(1),
175 .__use = 1,
Thomas Graf101367c2006-08-04 03:39:02 -0700176 .obsolete = -1,
177 .error = -EINVAL,
178 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
Herbert Xu352e5122007-11-13 21:34:06 -0800179 .input = dst_discard,
180 .output = dst_discard,
Thomas Graf101367c2006-08-04 03:39:02 -0700181 .ops = &ip6_dst_ops,
182 .path = (struct dst_entry*)&ip6_blk_hole_entry,
183 }
184 },
185 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
186 .rt6i_metric = ~(u32) 0,
187 .rt6i_ref = ATOMIC_INIT(1),
188};
189
190#endif
191
Linus Torvalds1da177e2005-04-16 15:20:36 -0700192/* allocate dst with ip6_dst_ops */
193static __inline__ struct rt6_info *ip6_dst_alloc(void)
194{
195 return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
196}
197
198static void ip6_dst_destroy(struct dst_entry *dst)
199{
200 struct rt6_info *rt = (struct rt6_info *)dst;
201 struct inet6_dev *idev = rt->rt6i_idev;
202
203 if (idev != NULL) {
204 rt->rt6i_idev = NULL;
205 in6_dev_put(idev);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900206 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700207}
208
209static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
210 int how)
211{
212 struct rt6_info *rt = (struct rt6_info *)dst;
213 struct inet6_dev *idev = rt->rt6i_idev;
Denis V. Lunev5a3e55d2007-12-07 00:38:10 -0800214 struct net_device *loopback_dev =
215 dev->nd_net->loopback_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700216
Denis V. Lunev5a3e55d2007-12-07 00:38:10 -0800217 if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
218 struct inet6_dev *loopback_idev =
219 in6_dev_get(loopback_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700220 if (loopback_idev != NULL) {
221 rt->rt6i_idev = loopback_idev;
222 in6_dev_put(idev);
223 }
224 }
225}
226
227static __inline__ int rt6_check_expired(const struct rt6_info *rt)
228{
229 return (rt->rt6i_flags & RTF_EXPIRES &&
230 time_after(jiffies, rt->rt6i_expires));
231}
232
Thomas Grafc71099a2006-08-04 23:20:06 -0700233static inline int rt6_need_strict(struct in6_addr *daddr)
234{
235 return (ipv6_addr_type(daddr) &
236 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
237}
238
Linus Torvalds1da177e2005-04-16 15:20:36 -0700239/*
Thomas Grafc71099a2006-08-04 23:20:06 -0700240 * Route lookup. Any table->tb6_lock is implied.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700241 */
242
243static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
244 int oif,
245 int strict)
246{
247 struct rt6_info *local = NULL;
248 struct rt6_info *sprt;
249
250 if (oif) {
Eric Dumazet7cc48262007-02-09 16:22:57 -0800251 for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700252 struct net_device *dev = sprt->rt6i_dev;
253 if (dev->ifindex == oif)
254 return sprt;
255 if (dev->flags & IFF_LOOPBACK) {
256 if (sprt->rt6i_idev == NULL ||
257 sprt->rt6i_idev->dev->ifindex != oif) {
258 if (strict && oif)
259 continue;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900260 if (local && (!oif ||
Linus Torvalds1da177e2005-04-16 15:20:36 -0700261 local->rt6i_idev->dev->ifindex == oif))
262 continue;
263 }
264 local = sprt;
265 }
266 }
267
268 if (local)
269 return local;
270
271 if (strict)
272 return &ip6_null_entry;
273 }
274 return rt;
275}
276
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800277#ifdef CONFIG_IPV6_ROUTER_PREF
278static void rt6_probe(struct rt6_info *rt)
279{
280 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
281 /*
282 * Okay, this does not seem to be appropriate
283 * for now, however, we need to check if it
284 * is really so; aka Router Reachability Probing.
285 *
286 * Router Reachability Probe MUST be rate-limited
287 * to no more than one per minute.
288 */
289 if (!neigh || (neigh->nud_state & NUD_VALID))
290 return;
291 read_lock_bh(&neigh->lock);
292 if (!(neigh->nud_state & NUD_VALID) &&
YOSHIFUJI Hideaki52e16352006-03-20 17:05:47 -0800293 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800294 struct in6_addr mcaddr;
295 struct in6_addr *target;
296
297 neigh->updated = jiffies;
298 read_unlock_bh(&neigh->lock);
299
300 target = (struct in6_addr *)&neigh->primary_key;
301 addrconf_addr_solict_mult(target, &mcaddr);
302 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
303 } else
304 read_unlock_bh(&neigh->lock);
305}
306#else
307static inline void rt6_probe(struct rt6_info *rt)
308{
309 return;
310}
311#endif
312
Linus Torvalds1da177e2005-04-16 15:20:36 -0700313/*
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800314 * Default Router Selection (RFC 2461 6.3.6)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700315 */
Dave Jonesb6f99a22007-03-22 12:27:49 -0700316static inline int rt6_check_dev(struct rt6_info *rt, int oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700317{
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800318 struct net_device *dev = rt->rt6i_dev;
David S. Miller161980f2007-04-06 11:42:27 -0700319 if (!oif || dev->ifindex == oif)
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800320 return 2;
David S. Miller161980f2007-04-06 11:42:27 -0700321 if ((dev->flags & IFF_LOOPBACK) &&
322 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
323 return 1;
324 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700325}
326
Dave Jonesb6f99a22007-03-22 12:27:49 -0700327static inline int rt6_check_neigh(struct rt6_info *rt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700328{
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800329 struct neighbour *neigh = rt->rt6i_nexthop;
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800330 int m;
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700331 if (rt->rt6i_flags & RTF_NONEXTHOP ||
332 !(rt->rt6i_flags & RTF_GATEWAY))
333 m = 1;
334 else if (neigh) {
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800335 read_lock_bh(&neigh->lock);
336 if (neigh->nud_state & NUD_VALID)
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700337 m = 2;
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800338#ifdef CONFIG_IPV6_ROUTER_PREF
339 else if (neigh->nud_state & NUD_FAILED)
340 m = 0;
341#endif
342 else
YOSHIFUJI Hideakiea73ee22006-11-06 09:45:44 -0800343 m = 1;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800344 read_unlock_bh(&neigh->lock);
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800345 } else
346 m = 0;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800347 return m;
348}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700349
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800350static int rt6_score_route(struct rt6_info *rt, int oif,
351 int strict)
352{
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700353 int m, n;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900354
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700355 m = rt6_check_dev(rt, oif);
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700356 if (!m && (strict & RT6_LOOKUP_F_IFACE))
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800357 return -1;
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -0800358#ifdef CONFIG_IPV6_ROUTER_PREF
359 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
360#endif
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700361 n = rt6_check_neigh(rt);
YOSHIFUJI Hideaki557e92e2006-11-06 09:45:45 -0800362 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800363 return -1;
364 return m;
365}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700366
David S. Millerf11e6652007-03-24 20:36:25 -0700367static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
368 int *mpri, struct rt6_info *match)
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800369{
David S. Millerf11e6652007-03-24 20:36:25 -0700370 int m;
371
372 if (rt6_check_expired(rt))
373 goto out;
374
375 m = rt6_score_route(rt, oif, strict);
376 if (m < 0)
377 goto out;
378
379 if (m > *mpri) {
380 if (strict & RT6_LOOKUP_F_REACHABLE)
381 rt6_probe(match);
382 *mpri = m;
383 match = rt;
384 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
385 rt6_probe(rt);
386 }
387
388out:
389 return match;
390}
391
392static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
393 struct rt6_info *rr_head,
394 u32 metric, int oif, int strict)
395{
396 struct rt6_info *rt, *match;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800397 int mpri = -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700398
David S. Millerf11e6652007-03-24 20:36:25 -0700399 match = NULL;
400 for (rt = rr_head; rt && rt->rt6i_metric == metric;
401 rt = rt->u.dst.rt6_next)
402 match = find_match(rt, oif, strict, &mpri, match);
403 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
404 rt = rt->u.dst.rt6_next)
405 match = find_match(rt, oif, strict, &mpri, match);
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800406
David S. Millerf11e6652007-03-24 20:36:25 -0700407 return match;
408}
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800409
David S. Millerf11e6652007-03-24 20:36:25 -0700410static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
411{
412 struct rt6_info *match, *rt0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700413
David S. Millerf11e6652007-03-24 20:36:25 -0700414 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
415 __FUNCTION__, fn->leaf, oif);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700416
David S. Millerf11e6652007-03-24 20:36:25 -0700417 rt0 = fn->rr_ptr;
418 if (!rt0)
419 fn->rr_ptr = rt0 = fn->leaf;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700420
David S. Millerf11e6652007-03-24 20:36:25 -0700421 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700422
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800423 if (!match &&
David S. Millerf11e6652007-03-24 20:36:25 -0700424 (strict & RT6_LOOKUP_F_REACHABLE)) {
425 struct rt6_info *next = rt0->u.dst.rt6_next;
426
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800427 /* no entries matched; do round-robin */
David S. Millerf11e6652007-03-24 20:36:25 -0700428 if (!next || next->rt6i_metric != rt0->rt6i_metric)
429 next = fn->leaf;
430
431 if (next != rt0)
432 fn->rr_ptr = next;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700433 }
434
David S. Millerf11e6652007-03-24 20:36:25 -0700435 RT6_TRACE("%s() => %p\n",
436 __FUNCTION__, match);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700437
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800438 return (match ? match : &ip6_null_entry);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700439}
440
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800441#ifdef CONFIG_IPV6_ROUTE_INFO
442int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
443 struct in6_addr *gwaddr)
444{
445 struct route_info *rinfo = (struct route_info *) opt;
446 struct in6_addr prefix_buf, *prefix;
447 unsigned int pref;
448 u32 lifetime;
449 struct rt6_info *rt;
450
451 if (len < sizeof(struct route_info)) {
452 return -EINVAL;
453 }
454
455 /* Sanity check for prefix_len and length */
456 if (rinfo->length > 3) {
457 return -EINVAL;
458 } else if (rinfo->prefix_len > 128) {
459 return -EINVAL;
460 } else if (rinfo->prefix_len > 64) {
461 if (rinfo->length < 2) {
462 return -EINVAL;
463 }
464 } else if (rinfo->prefix_len > 0) {
465 if (rinfo->length < 1) {
466 return -EINVAL;
467 }
468 }
469
470 pref = rinfo->route_pref;
471 if (pref == ICMPV6_ROUTER_PREF_INVALID)
472 pref = ICMPV6_ROUTER_PREF_MEDIUM;
473
Al Viroe69a4adc2006-11-14 20:56:00 -0800474 lifetime = ntohl(rinfo->lifetime);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800475 if (lifetime == 0xffffffff) {
476 /* infinity */
477 } else if (lifetime > 0x7fffffff/HZ) {
478 /* Avoid arithmetic overflow */
479 lifetime = 0x7fffffff/HZ - 1;
480 }
481
482 if (rinfo->length == 3)
483 prefix = (struct in6_addr *)rinfo->prefix;
484 else {
485 /* this function is safe */
486 ipv6_addr_prefix(&prefix_buf,
487 (struct in6_addr *)rinfo->prefix,
488 rinfo->prefix_len);
489 prefix = &prefix_buf;
490 }
491
492 rt = rt6_get_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex);
493
494 if (rt && !lifetime) {
Thomas Grafe0a1ad732006-08-22 00:00:21 -0700495 ip6_del_rt(rt);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800496 rt = NULL;
497 }
498
499 if (!rt && lifetime)
500 rt = rt6_add_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
501 pref);
502 else if (rt)
503 rt->rt6i_flags = RTF_ROUTEINFO |
504 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
505
506 if (rt) {
507 if (lifetime == 0xffffffff) {
508 rt->rt6i_flags &= ~RTF_EXPIRES;
509 } else {
510 rt->rt6i_expires = jiffies + HZ * lifetime;
511 rt->rt6i_flags |= RTF_EXPIRES;
512 }
513 dst_release(&rt->u.dst);
514 }
515 return 0;
516}
517#endif
518
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700519#define BACKTRACK(saddr) \
520do { \
521 if (rt == &ip6_null_entry) { \
522 struct fib6_node *pn; \
Ville Nuorvalae0eda7b2006-10-16 22:11:11 -0700523 while (1) { \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700524 if (fn->fn_flags & RTN_TL_ROOT) \
525 goto out; \
526 pn = fn->parent; \
527 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
Kim Nordlund8bce65b2006-12-13 16:38:29 -0800528 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700529 else \
530 fn = pn; \
531 if (fn->fn_flags & RTN_RTINFO) \
532 goto restart; \
Thomas Grafc71099a2006-08-04 23:20:06 -0700533 } \
Thomas Grafc71099a2006-08-04 23:20:06 -0700534 } \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700535} while(0)
Thomas Grafc71099a2006-08-04 23:20:06 -0700536
537static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table,
538 struct flowi *fl, int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700539{
540 struct fib6_node *fn;
541 struct rt6_info *rt;
542
Thomas Grafc71099a2006-08-04 23:20:06 -0700543 read_lock_bh(&table->tb6_lock);
544 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
545restart:
546 rt = fn->leaf;
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700547 rt = rt6_device_match(rt, fl->oif, flags);
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700548 BACKTRACK(&fl->fl6_src);
Thomas Grafc71099a2006-08-04 23:20:06 -0700549out:
Pavel Emelyanov03f49f32007-11-10 21:28:34 -0800550 dst_use(&rt->u.dst, jiffies);
Thomas Grafc71099a2006-08-04 23:20:06 -0700551 read_unlock_bh(&table->tb6_lock);
Thomas Grafc71099a2006-08-04 23:20:06 -0700552 return rt;
553
554}
555
556struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
557 int oif, int strict)
558{
559 struct flowi fl = {
560 .oif = oif,
561 .nl_u = {
562 .ip6_u = {
563 .daddr = *daddr,
Thomas Grafc71099a2006-08-04 23:20:06 -0700564 },
565 },
566 };
567 struct dst_entry *dst;
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700568 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
Thomas Grafc71099a2006-08-04 23:20:06 -0700569
Thomas Grafadaa70b2006-10-13 15:01:03 -0700570 if (saddr) {
571 memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
572 flags |= RT6_LOOKUP_F_HAS_SADDR;
573 }
574
Daniel Lezcano58f09b72008-03-03 23:25:27 -0800575 dst = fib6_rule_lookup(&init_net, &fl, flags, ip6_pol_route_lookup);
Thomas Grafc71099a2006-08-04 23:20:06 -0700576 if (dst->error == 0)
577 return (struct rt6_info *) dst;
578
579 dst_release(dst);
580
Linus Torvalds1da177e2005-04-16 15:20:36 -0700581 return NULL;
582}
583
YOSHIFUJI Hideaki71590392007-02-22 22:05:40 +0900584EXPORT_SYMBOL(rt6_lookup);
585
Thomas Grafc71099a2006-08-04 23:20:06 -0700586/* ip6_ins_rt is called with FREE table->tb6_lock.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700587 It takes new route entry, the addition fails by any reason the
588 route is freed. In any case, if caller does not hold it, it may
589 be destroyed.
590 */
591
Thomas Graf86872cb2006-08-22 00:01:08 -0700592static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700593{
594 int err;
Thomas Grafc71099a2006-08-04 23:20:06 -0700595 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700596
Thomas Grafc71099a2006-08-04 23:20:06 -0700597 table = rt->rt6i_table;
598 write_lock_bh(&table->tb6_lock);
Thomas Graf86872cb2006-08-22 00:01:08 -0700599 err = fib6_add(&table->tb6_root, rt, info);
Thomas Grafc71099a2006-08-04 23:20:06 -0700600 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700601
602 return err;
603}
604
Thomas Graf40e22e82006-08-22 00:00:45 -0700605int ip6_ins_rt(struct rt6_info *rt)
606{
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800607 struct nl_info info = {
608 .nl_net = &init_net,
609 };
Denis V. Lunev528c4ce2007-12-13 09:45:12 -0800610 return __ip6_ins_rt(rt, &info);
Thomas Graf40e22e82006-08-22 00:00:45 -0700611}
612
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800613static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
614 struct in6_addr *saddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700615{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700616 struct rt6_info *rt;
617
618 /*
619 * Clone the route.
620 */
621
622 rt = ip6_rt_copy(ort);
623
624 if (rt) {
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900625 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
626 if (rt->rt6i_dst.plen != 128 &&
627 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
628 rt->rt6i_flags |= RTF_ANYCAST;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700629 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900630 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700631
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900632 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700633 rt->rt6i_dst.plen = 128;
634 rt->rt6i_flags |= RTF_CACHE;
635 rt->u.dst.flags |= DST_HOST;
636
637#ifdef CONFIG_IPV6_SUBTREES
638 if (rt->rt6i_src.plen && saddr) {
639 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
640 rt->rt6i_src.plen = 128;
641 }
642#endif
643
644 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
645
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800646 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700647
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800648 return rt;
649}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700650
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800651static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
652{
653 struct rt6_info *rt = ip6_rt_copy(ort);
654 if (rt) {
655 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
656 rt->rt6i_dst.plen = 128;
657 rt->rt6i_flags |= RTF_CACHE;
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800658 rt->u.dst.flags |= DST_HOST;
659 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
660 }
661 return rt;
662}
663
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700664static struct rt6_info *ip6_pol_route(struct fib6_table *table, int oif,
Adrian Bunk8ce11e62006-08-07 21:50:48 -0700665 struct flowi *fl, int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700666{
667 struct fib6_node *fn;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800668 struct rt6_info *rt, *nrt;
Thomas Grafc71099a2006-08-04 23:20:06 -0700669 int strict = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700670 int attempts = 3;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800671 int err;
YOSHIFUJI Hideakiea659e02006-11-06 09:45:45 -0800672 int reachable = ipv6_devconf.forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700673
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700674 strict |= flags & RT6_LOOKUP_F_IFACE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700675
676relookup:
Thomas Grafc71099a2006-08-04 23:20:06 -0700677 read_lock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700678
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800679restart_2:
Thomas Grafc71099a2006-08-04 23:20:06 -0700680 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700681
682restart:
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700683 rt = rt6_select(fn, oif, strict | reachable);
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700684 BACKTRACK(&fl->fl6_src);
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800685 if (rt == &ip6_null_entry ||
686 rt->rt6i_flags & RTF_CACHE)
YOSHIFUJI Hideaki1ddef0442006-03-20 17:01:24 -0800687 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700688
YOSHIFUJI Hideakifb9de912006-03-20 16:59:08 -0800689 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700690 read_unlock_bh(&table->tb6_lock);
YOSHIFUJI Hideakifb9de912006-03-20 16:59:08 -0800691
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800692 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
YOSHIFUJI Hideakie40cf352006-03-20 16:59:27 -0800693 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800694 else {
695#if CLONE_OFFLINK_ROUTE
696 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
697#else
698 goto out2;
699#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700700 }
YOSHIFUJI Hideakie40cf352006-03-20 16:59:27 -0800701
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800702 dst_release(&rt->u.dst);
703 rt = nrt ? : &ip6_null_entry;
704
705 dst_hold(&rt->u.dst);
706 if (nrt) {
Thomas Graf40e22e82006-08-22 00:00:45 -0700707 err = ip6_ins_rt(nrt);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800708 if (!err)
709 goto out2;
710 }
711
712 if (--attempts <= 0)
713 goto out2;
714
715 /*
Thomas Grafc71099a2006-08-04 23:20:06 -0700716 * Race condition! In the gap, when table->tb6_lock was
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800717 * released someone could insert this route. Relookup.
718 */
719 dst_release(&rt->u.dst);
720 goto relookup;
721
722out:
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800723 if (reachable) {
724 reachable = 0;
725 goto restart_2;
726 }
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800727 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700728 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700729out2:
730 rt->u.dst.lastuse = jiffies;
731 rt->u.dst.__use++;
Thomas Grafc71099a2006-08-04 23:20:06 -0700732
733 return rt;
734}
735
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700736static struct rt6_info *ip6_pol_route_input(struct fib6_table *table,
737 struct flowi *fl, int flags)
738{
739 return ip6_pol_route(table, fl->iif, fl, flags);
740}
741
Thomas Grafc71099a2006-08-04 23:20:06 -0700742void ip6_route_input(struct sk_buff *skb)
743{
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -0700744 struct ipv6hdr *iph = ipv6_hdr(skb);
Thomas Grafadaa70b2006-10-13 15:01:03 -0700745 int flags = RT6_LOOKUP_F_HAS_SADDR;
Thomas Grafc71099a2006-08-04 23:20:06 -0700746 struct flowi fl = {
747 .iif = skb->dev->ifindex,
748 .nl_u = {
749 .ip6_u = {
750 .daddr = iph->daddr,
751 .saddr = iph->saddr,
Al Viro90bcaf72006-11-08 00:25:17 -0800752 .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
Thomas Grafc71099a2006-08-04 23:20:06 -0700753 },
754 },
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900755 .mark = skb->mark,
Thomas Grafc71099a2006-08-04 23:20:06 -0700756 .proto = iph->nexthdr,
757 };
Thomas Grafadaa70b2006-10-13 15:01:03 -0700758
759 if (rt6_need_strict(&iph->daddr))
760 flags |= RT6_LOOKUP_F_IFACE;
Thomas Grafc71099a2006-08-04 23:20:06 -0700761
Daniel Lezcano58f09b72008-03-03 23:25:27 -0800762 skb->dst = fib6_rule_lookup(&init_net, &fl, flags, ip6_pol_route_input);
Thomas Grafc71099a2006-08-04 23:20:06 -0700763}
764
765static struct rt6_info *ip6_pol_route_output(struct fib6_table *table,
766 struct flowi *fl, int flags)
767{
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700768 return ip6_pol_route(table, fl->oif, fl, flags);
Thomas Grafc71099a2006-08-04 23:20:06 -0700769}
770
771struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
772{
773 int flags = 0;
774
775 if (rt6_need_strict(&fl->fl6_dst))
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700776 flags |= RT6_LOOKUP_F_IFACE;
Thomas Grafc71099a2006-08-04 23:20:06 -0700777
Thomas Grafadaa70b2006-10-13 15:01:03 -0700778 if (!ipv6_addr_any(&fl->fl6_src))
779 flags |= RT6_LOOKUP_F_HAS_SADDR;
780
Daniel Lezcano58f09b72008-03-03 23:25:27 -0800781 return fib6_rule_lookup(&init_net, fl, flags, ip6_pol_route_output);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700782}
783
YOSHIFUJI Hideaki71590392007-02-22 22:05:40 +0900784EXPORT_SYMBOL(ip6_route_output);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700785
David S. Miller14e50e52007-05-24 18:17:54 -0700786int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
787{
788 struct rt6_info *ort = (struct rt6_info *) *dstp;
789 struct rt6_info *rt = (struct rt6_info *)
790 dst_alloc(&ip6_dst_blackhole_ops);
791 struct dst_entry *new = NULL;
792
793 if (rt) {
794 new = &rt->u.dst;
795
796 atomic_set(&new->__refcnt, 1);
797 new->__use = 1;
Herbert Xu352e5122007-11-13 21:34:06 -0800798 new->input = dst_discard;
799 new->output = dst_discard;
David S. Miller14e50e52007-05-24 18:17:54 -0700800
801 memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
802 new->dev = ort->u.dst.dev;
803 if (new->dev)
804 dev_hold(new->dev);
805 rt->rt6i_idev = ort->rt6i_idev;
806 if (rt->rt6i_idev)
807 in6_dev_hold(rt->rt6i_idev);
808 rt->rt6i_expires = 0;
809
810 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
811 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
812 rt->rt6i_metric = 0;
813
814 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
815#ifdef CONFIG_IPV6_SUBTREES
816 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
817#endif
818
819 dst_free(new);
820 }
821
822 dst_release(*dstp);
823 *dstp = new;
824 return (new ? 0 : -ENOMEM);
825}
826EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
827
Linus Torvalds1da177e2005-04-16 15:20:36 -0700828/*
829 * Destination cache support functions
830 */
831
832static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
833{
834 struct rt6_info *rt;
835
836 rt = (struct rt6_info *) dst;
837
838 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
839 return dst;
840
841 return NULL;
842}
843
844static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
845{
846 struct rt6_info *rt = (struct rt6_info *) dst;
847
848 if (rt) {
849 if (rt->rt6i_flags & RTF_CACHE)
Thomas Grafe0a1ad732006-08-22 00:00:21 -0700850 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700851 else
852 dst_release(dst);
853 }
854 return NULL;
855}
856
857static void ip6_link_failure(struct sk_buff *skb)
858{
859 struct rt6_info *rt;
860
861 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
862
863 rt = (struct rt6_info *) skb->dst;
864 if (rt) {
865 if (rt->rt6i_flags&RTF_CACHE) {
866 dst_set_expires(&rt->u.dst, 0);
867 rt->rt6i_flags |= RTF_EXPIRES;
868 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
869 rt->rt6i_node->fn_sernum = -1;
870 }
871}
872
873static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
874{
875 struct rt6_info *rt6 = (struct rt6_info*)dst;
876
877 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
878 rt6->rt6i_flags |= RTF_MODIFIED;
879 if (mtu < IPV6_MIN_MTU) {
880 mtu = IPV6_MIN_MTU;
881 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
882 }
883 dst->metrics[RTAX_MTU-1] = mtu;
Tom Tucker8d717402006-07-30 20:43:36 -0700884 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700885 }
886}
887
Linus Torvalds1da177e2005-04-16 15:20:36 -0700888static int ipv6_get_mtu(struct net_device *dev);
889
890static inline unsigned int ipv6_advmss(unsigned int mtu)
891{
892 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
893
Daniel Lezcano49905092008-01-10 03:01:01 -0800894 if (mtu < init_net.ipv6.sysctl.ip6_rt_min_advmss)
895 mtu = init_net.ipv6.sysctl.ip6_rt_min_advmss;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700896
897 /*
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900898 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
899 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
900 * IPV6_MAXPLEN is also valid and means: "any MSS,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700901 * rely only on pmtu discovery"
902 */
903 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
904 mtu = IPV6_MAXPLEN;
905 return mtu;
906}
907
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -0800908static struct dst_entry *icmp6_dst_gc_list;
909static DEFINE_SPINLOCK(icmp6_dst_lock);
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700910
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -0800911struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700912 struct neighbour *neigh,
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -0800913 struct in6_addr *addr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700914{
915 struct rt6_info *rt;
916 struct inet6_dev *idev = in6_dev_get(dev);
917
918 if (unlikely(idev == NULL))
919 return NULL;
920
921 rt = ip6_dst_alloc();
922 if (unlikely(rt == NULL)) {
923 in6_dev_put(idev);
924 goto out;
925 }
926
927 dev_hold(dev);
928 if (neigh)
929 neigh_hold(neigh);
930 else
931 neigh = ndisc_get_neigh(dev, addr);
932
933 rt->rt6i_dev = dev;
934 rt->rt6i_idev = idev;
935 rt->rt6i_nexthop = neigh;
936 atomic_set(&rt->u.dst.__refcnt, 1);
937 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
938 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
939 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -0800940 rt->u.dst.output = ip6_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700941
942#if 0 /* there's no chance to use these for ndisc */
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900943 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
944 ? DST_HOST
Linus Torvalds1da177e2005-04-16 15:20:36 -0700945 : 0;
946 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
947 rt->rt6i_dst.plen = 128;
948#endif
949
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -0800950 spin_lock_bh(&icmp6_dst_lock);
951 rt->u.dst.next = icmp6_dst_gc_list;
952 icmp6_dst_gc_list = &rt->u.dst;
953 spin_unlock_bh(&icmp6_dst_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700954
955 fib6_force_start_gc();
956
957out:
YOSHIFUJI Hideaki40aa7b92006-10-19 13:50:09 +0900958 return &rt->u.dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700959}
960
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -0800961int icmp6_dst_gc(int *more)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700962{
963 struct dst_entry *dst, *next, **pprev;
964 int freed;
965
966 next = NULL;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900967 freed = 0;
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700968
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -0800969 spin_lock_bh(&icmp6_dst_lock);
970 pprev = &icmp6_dst_gc_list;
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700971
Linus Torvalds1da177e2005-04-16 15:20:36 -0700972 while ((dst = *pprev) != NULL) {
973 if (!atomic_read(&dst->__refcnt)) {
974 *pprev = dst->next;
975 dst_free(dst);
976 freed++;
977 } else {
978 pprev = &dst->next;
979 (*more)++;
980 }
981 }
982
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -0800983 spin_unlock_bh(&icmp6_dst_lock);
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700984
Linus Torvalds1da177e2005-04-16 15:20:36 -0700985 return freed;
986}
987
Daniel Lezcano569d3642008-01-18 03:56:57 -0800988static int ip6_dst_gc(struct dst_ops *ops)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700989{
990 static unsigned expire = 30*HZ;
991 static unsigned long last_gc;
992 unsigned long now = jiffies;
993
Daniel Lezcano49905092008-01-10 03:01:01 -0800994 if (time_after(last_gc + init_net.ipv6.sysctl.ip6_rt_gc_min_interval, now) &&
995 atomic_read(&ip6_dst_ops.entries) <= init_net.ipv6.sysctl.ip6_rt_max_size)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700996 goto out;
997
998 expire++;
Daniel Lezcano5b7c9312008-03-03 23:28:58 -0800999 fib6_run_gc(expire, &init_net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001000 last_gc = now;
1001 if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
Daniel Lezcano49905092008-01-10 03:01:01 -08001002 expire = init_net.ipv6.sysctl.ip6_rt_gc_timeout>>1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001003
1004out:
Daniel Lezcano49905092008-01-10 03:01:01 -08001005 expire -= expire>>init_net.ipv6.sysctl.ip6_rt_gc_elasticity;
1006 return (atomic_read(&ip6_dst_ops.entries) > init_net.ipv6.sysctl.ip6_rt_max_size);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001007}
1008
1009/* Clean host part of a prefix. Not necessary in radix tree,
1010 but results in cleaner routing tables.
1011
1012 Remove it only when all the things will work!
1013 */
1014
1015static int ipv6_get_mtu(struct net_device *dev)
1016{
1017 int mtu = IPV6_MIN_MTU;
1018 struct inet6_dev *idev;
1019
1020 idev = in6_dev_get(dev);
1021 if (idev) {
1022 mtu = idev->cnf.mtu6;
1023 in6_dev_put(idev);
1024 }
1025 return mtu;
1026}
1027
1028int ipv6_get_hoplimit(struct net_device *dev)
1029{
1030 int hoplimit = ipv6_devconf.hop_limit;
1031 struct inet6_dev *idev;
1032
1033 idev = in6_dev_get(dev);
1034 if (idev) {
1035 hoplimit = idev->cnf.hop_limit;
1036 in6_dev_put(idev);
1037 }
1038 return hoplimit;
1039}
1040
1041/*
1042 *
1043 */
1044
Thomas Graf86872cb2006-08-22 00:01:08 -07001045int ip6_route_add(struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001046{
1047 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001048 struct rt6_info *rt = NULL;
1049 struct net_device *dev = NULL;
1050 struct inet6_dev *idev = NULL;
Thomas Grafc71099a2006-08-04 23:20:06 -07001051 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001052 int addr_type;
1053
Thomas Graf86872cb2006-08-22 00:01:08 -07001054 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001055 return -EINVAL;
1056#ifndef CONFIG_IPV6_SUBTREES
Thomas Graf86872cb2006-08-22 00:01:08 -07001057 if (cfg->fc_src_len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001058 return -EINVAL;
1059#endif
Thomas Graf86872cb2006-08-22 00:01:08 -07001060 if (cfg->fc_ifindex) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001061 err = -ENODEV;
Eric W. Biederman881d9662007-09-17 11:56:21 -07001062 dev = dev_get_by_index(&init_net, cfg->fc_ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001063 if (!dev)
1064 goto out;
1065 idev = in6_dev_get(dev);
1066 if (!idev)
1067 goto out;
1068 }
1069
Thomas Graf86872cb2006-08-22 00:01:08 -07001070 if (cfg->fc_metric == 0)
1071 cfg->fc_metric = IP6_RT_PRIO_USER;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001072
Daniel Lezcano58f09b72008-03-03 23:25:27 -08001073 table = fib6_new_table(&init_net, cfg->fc_table);
Thomas Grafc71099a2006-08-04 23:20:06 -07001074 if (table == NULL) {
1075 err = -ENOBUFS;
1076 goto out;
1077 }
1078
Linus Torvalds1da177e2005-04-16 15:20:36 -07001079 rt = ip6_dst_alloc();
1080
1081 if (rt == NULL) {
1082 err = -ENOMEM;
1083 goto out;
1084 }
1085
1086 rt->u.dst.obsolete = -1;
Thomas Graf86872cb2006-08-22 00:01:08 -07001087 rt->rt6i_expires = jiffies + clock_t_to_jiffies(cfg->fc_expires);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001088
Thomas Graf86872cb2006-08-22 00:01:08 -07001089 if (cfg->fc_protocol == RTPROT_UNSPEC)
1090 cfg->fc_protocol = RTPROT_BOOT;
1091 rt->rt6i_protocol = cfg->fc_protocol;
1092
1093 addr_type = ipv6_addr_type(&cfg->fc_dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001094
1095 if (addr_type & IPV6_ADDR_MULTICAST)
1096 rt->u.dst.input = ip6_mc_input;
1097 else
1098 rt->u.dst.input = ip6_forward;
1099
1100 rt->u.dst.output = ip6_output;
1101
Thomas Graf86872cb2006-08-22 00:01:08 -07001102 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1103 rt->rt6i_dst.plen = cfg->fc_dst_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001104 if (rt->rt6i_dst.plen == 128)
1105 rt->u.dst.flags = DST_HOST;
1106
1107#ifdef CONFIG_IPV6_SUBTREES
Thomas Graf86872cb2006-08-22 00:01:08 -07001108 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1109 rt->rt6i_src.plen = cfg->fc_src_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001110#endif
1111
Thomas Graf86872cb2006-08-22 00:01:08 -07001112 rt->rt6i_metric = cfg->fc_metric;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001113
1114 /* We cannot add true routes via loopback here,
1115 they would result in kernel looping; promote them to reject routes
1116 */
Thomas Graf86872cb2006-08-22 00:01:08 -07001117 if ((cfg->fc_flags & RTF_REJECT) ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07001118 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
1119 /* hold loopback dev/idev if we haven't done so. */
Eric W. Biederman2774c7a2007-09-26 22:10:56 -07001120 if (dev != init_net.loopback_dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001121 if (dev) {
1122 dev_put(dev);
1123 in6_dev_put(idev);
1124 }
Eric W. Biederman2774c7a2007-09-26 22:10:56 -07001125 dev = init_net.loopback_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001126 dev_hold(dev);
1127 idev = in6_dev_get(dev);
1128 if (!idev) {
1129 err = -ENODEV;
1130 goto out;
1131 }
1132 }
1133 rt->u.dst.output = ip6_pkt_discard_out;
1134 rt->u.dst.input = ip6_pkt_discard;
1135 rt->u.dst.error = -ENETUNREACH;
1136 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1137 goto install_route;
1138 }
1139
Thomas Graf86872cb2006-08-22 00:01:08 -07001140 if (cfg->fc_flags & RTF_GATEWAY) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001141 struct in6_addr *gw_addr;
1142 int gwa_type;
1143
Thomas Graf86872cb2006-08-22 00:01:08 -07001144 gw_addr = &cfg->fc_gateway;
1145 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001146 gwa_type = ipv6_addr_type(gw_addr);
1147
1148 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1149 struct rt6_info *grt;
1150
1151 /* IPv6 strictly inhibits using not link-local
1152 addresses as nexthop address.
1153 Otherwise, router will not able to send redirects.
1154 It is very good, but in some (rare!) circumstances
1155 (SIT, PtP, NBMA NOARP links) it is handy to allow
1156 some exceptions. --ANK
1157 */
1158 err = -EINVAL;
1159 if (!(gwa_type&IPV6_ADDR_UNICAST))
1160 goto out;
1161
Thomas Graf86872cb2006-08-22 00:01:08 -07001162 grt = rt6_lookup(gw_addr, NULL, cfg->fc_ifindex, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001163
1164 err = -EHOSTUNREACH;
1165 if (grt == NULL)
1166 goto out;
1167 if (dev) {
1168 if (dev != grt->rt6i_dev) {
1169 dst_release(&grt->u.dst);
1170 goto out;
1171 }
1172 } else {
1173 dev = grt->rt6i_dev;
1174 idev = grt->rt6i_idev;
1175 dev_hold(dev);
1176 in6_dev_hold(grt->rt6i_idev);
1177 }
1178 if (!(grt->rt6i_flags&RTF_GATEWAY))
1179 err = 0;
1180 dst_release(&grt->u.dst);
1181
1182 if (err)
1183 goto out;
1184 }
1185 err = -EINVAL;
1186 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1187 goto out;
1188 }
1189
1190 err = -ENODEV;
1191 if (dev == NULL)
1192 goto out;
1193
Thomas Graf86872cb2006-08-22 00:01:08 -07001194 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001195 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1196 if (IS_ERR(rt->rt6i_nexthop)) {
1197 err = PTR_ERR(rt->rt6i_nexthop);
1198 rt->rt6i_nexthop = NULL;
1199 goto out;
1200 }
1201 }
1202
Thomas Graf86872cb2006-08-22 00:01:08 -07001203 rt->rt6i_flags = cfg->fc_flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001204
1205install_route:
Thomas Graf86872cb2006-08-22 00:01:08 -07001206 if (cfg->fc_mx) {
1207 struct nlattr *nla;
1208 int remaining;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001209
Thomas Graf86872cb2006-08-22 00:01:08 -07001210 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
Thomas Graf8f4c1f92007-09-12 14:44:36 +02001211 int type = nla_type(nla);
Thomas Graf86872cb2006-08-22 00:01:08 -07001212
1213 if (type) {
1214 if (type > RTAX_MAX) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001215 err = -EINVAL;
1216 goto out;
1217 }
Thomas Graf86872cb2006-08-22 00:01:08 -07001218
1219 rt->u.dst.metrics[type - 1] = nla_get_u32(nla);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001220 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001221 }
1222 }
1223
1224 if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
1225 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1226 if (!rt->u.dst.metrics[RTAX_MTU-1])
1227 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1228 if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
1229 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1230 rt->u.dst.dev = dev;
1231 rt->rt6i_idev = idev;
Thomas Grafc71099a2006-08-04 23:20:06 -07001232 rt->rt6i_table = table;
Thomas Graf86872cb2006-08-22 00:01:08 -07001233 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001234
1235out:
1236 if (dev)
1237 dev_put(dev);
1238 if (idev)
1239 in6_dev_put(idev);
1240 if (rt)
YOSHIFUJI Hideaki40aa7b92006-10-19 13:50:09 +09001241 dst_free(&rt->u.dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001242 return err;
1243}
1244
Thomas Graf86872cb2006-08-22 00:01:08 -07001245static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001246{
1247 int err;
Thomas Grafc71099a2006-08-04 23:20:06 -07001248 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001249
Patrick McHardy6c813a72006-08-06 22:22:47 -07001250 if (rt == &ip6_null_entry)
1251 return -ENOENT;
1252
Thomas Grafc71099a2006-08-04 23:20:06 -07001253 table = rt->rt6i_table;
1254 write_lock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001255
Thomas Graf86872cb2006-08-22 00:01:08 -07001256 err = fib6_del(rt, info);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001257 dst_release(&rt->u.dst);
1258
Thomas Grafc71099a2006-08-04 23:20:06 -07001259 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001260
1261 return err;
1262}
1263
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001264int ip6_del_rt(struct rt6_info *rt)
1265{
Denis V. Lunev4d1169c2008-01-10 03:26:13 -08001266 struct nl_info info = {
1267 .nl_net = &init_net,
1268 };
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08001269 return __ip6_del_rt(rt, &info);
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001270}
1271
Thomas Graf86872cb2006-08-22 00:01:08 -07001272static int ip6_route_del(struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001273{
Thomas Grafc71099a2006-08-04 23:20:06 -07001274 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001275 struct fib6_node *fn;
1276 struct rt6_info *rt;
1277 int err = -ESRCH;
1278
Daniel Lezcano58f09b72008-03-03 23:25:27 -08001279 table = fib6_get_table(&init_net, cfg->fc_table);
Thomas Grafc71099a2006-08-04 23:20:06 -07001280 if (table == NULL)
1281 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001282
Thomas Grafc71099a2006-08-04 23:20:06 -07001283 read_lock_bh(&table->tb6_lock);
1284
1285 fn = fib6_locate(&table->tb6_root,
Thomas Graf86872cb2006-08-22 00:01:08 -07001286 &cfg->fc_dst, cfg->fc_dst_len,
1287 &cfg->fc_src, cfg->fc_src_len);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001288
Linus Torvalds1da177e2005-04-16 15:20:36 -07001289 if (fn) {
Eric Dumazet7cc48262007-02-09 16:22:57 -08001290 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
Thomas Graf86872cb2006-08-22 00:01:08 -07001291 if (cfg->fc_ifindex &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001292 (rt->rt6i_dev == NULL ||
Thomas Graf86872cb2006-08-22 00:01:08 -07001293 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001294 continue;
Thomas Graf86872cb2006-08-22 00:01:08 -07001295 if (cfg->fc_flags & RTF_GATEWAY &&
1296 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001297 continue;
Thomas Graf86872cb2006-08-22 00:01:08 -07001298 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001299 continue;
1300 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001301 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001302
Thomas Graf86872cb2006-08-22 00:01:08 -07001303 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001304 }
1305 }
Thomas Grafc71099a2006-08-04 23:20:06 -07001306 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001307
1308 return err;
1309}
1310
1311/*
1312 * Handle redirects
1313 */
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001314struct ip6rd_flowi {
1315 struct flowi fl;
1316 struct in6_addr gateway;
1317};
Linus Torvalds1da177e2005-04-16 15:20:36 -07001318
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001319static struct rt6_info *__ip6_route_redirect(struct fib6_table *table,
1320 struct flowi *fl,
1321 int flags)
1322{
1323 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1324 struct rt6_info *rt;
1325 struct fib6_node *fn;
Thomas Grafc71099a2006-08-04 23:20:06 -07001326
Linus Torvalds1da177e2005-04-16 15:20:36 -07001327 /*
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001328 * Get the "current" route for this destination and
1329 * check if the redirect has come from approriate router.
1330 *
1331 * RFC 2461 specifies that redirects should only be
1332 * accepted if they come from the nexthop to the target.
1333 * Due to the way the routes are chosen, this notion
1334 * is a bit fuzzy and one might need to check all possible
1335 * routes.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001336 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001337
Thomas Grafc71099a2006-08-04 23:20:06 -07001338 read_lock_bh(&table->tb6_lock);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001339 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001340restart:
Eric Dumazet7cc48262007-02-09 16:22:57 -08001341 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001342 /*
1343 * Current route is on-link; redirect is always invalid.
1344 *
1345 * Seems, previous statement is not true. It could
1346 * be node, which looks for us as on-link (f.e. proxy ndisc)
1347 * But then router serving it might decide, that we should
1348 * know truth 8)8) --ANK (980726).
1349 */
1350 if (rt6_check_expired(rt))
1351 continue;
1352 if (!(rt->rt6i_flags & RTF_GATEWAY))
1353 continue;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001354 if (fl->oif != rt->rt6i_dev->ifindex)
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001355 continue;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001356 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001357 continue;
1358 break;
1359 }
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001360
YOSHIFUJI Hideakicb15d9c2006-08-23 17:23:11 -07001361 if (!rt)
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001362 rt = &ip6_null_entry;
YOSHIFUJI Hideakicb15d9c2006-08-23 17:23:11 -07001363 BACKTRACK(&fl->fl6_src);
1364out:
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001365 dst_hold(&rt->u.dst);
1366
1367 read_unlock_bh(&table->tb6_lock);
1368
1369 return rt;
1370};
1371
1372static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1373 struct in6_addr *src,
1374 struct in6_addr *gateway,
1375 struct net_device *dev)
1376{
Thomas Grafadaa70b2006-10-13 15:01:03 -07001377 int flags = RT6_LOOKUP_F_HAS_SADDR;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001378 struct ip6rd_flowi rdfl = {
1379 .fl = {
1380 .oif = dev->ifindex,
1381 .nl_u = {
1382 .ip6_u = {
1383 .daddr = *dest,
1384 .saddr = *src,
1385 },
1386 },
1387 },
1388 .gateway = *gateway,
1389 };
Thomas Grafadaa70b2006-10-13 15:01:03 -07001390
1391 if (rt6_need_strict(dest))
1392 flags |= RT6_LOOKUP_F_IFACE;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001393
Daniel Lezcano58f09b72008-03-03 23:25:27 -08001394 return (struct rt6_info *)fib6_rule_lookup(&init_net,
1395 (struct flowi *)&rdfl,
1396 flags, __ip6_route_redirect);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001397}
1398
1399void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1400 struct in6_addr *saddr,
1401 struct neighbour *neigh, u8 *lladdr, int on_link)
1402{
1403 struct rt6_info *rt, *nrt = NULL;
1404 struct netevent_redirect netevent;
1405
1406 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1407
1408 if (rt == &ip6_null_entry) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001409 if (net_ratelimit())
1410 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1411 "for redirect target\n");
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001412 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001413 }
1414
Linus Torvalds1da177e2005-04-16 15:20:36 -07001415 /*
1416 * We have finally decided to accept it.
1417 */
1418
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001419 neigh_update(neigh, lladdr, NUD_STALE,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001420 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1421 NEIGH_UPDATE_F_OVERRIDE|
1422 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1423 NEIGH_UPDATE_F_ISROUTER))
1424 );
1425
1426 /*
1427 * Redirect received -> path was valid.
1428 * Look, redirects are sent only in response to data packets,
1429 * so that this nexthop apparently is reachable. --ANK
1430 */
1431 dst_confirm(&rt->u.dst);
1432
1433 /* Duplicate redirect: silently ignore. */
1434 if (neigh == rt->u.dst.neighbour)
1435 goto out;
1436
1437 nrt = ip6_rt_copy(rt);
1438 if (nrt == NULL)
1439 goto out;
1440
1441 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1442 if (on_link)
1443 nrt->rt6i_flags &= ~RTF_GATEWAY;
1444
1445 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1446 nrt->rt6i_dst.plen = 128;
1447 nrt->u.dst.flags |= DST_HOST;
1448
1449 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1450 nrt->rt6i_nexthop = neigh_clone(neigh);
1451 /* Reset pmtu, it may be better */
1452 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1453 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1454
Thomas Graf40e22e82006-08-22 00:00:45 -07001455 if (ip6_ins_rt(nrt))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001456 goto out;
1457
Tom Tucker8d717402006-07-30 20:43:36 -07001458 netevent.old = &rt->u.dst;
1459 netevent.new = &nrt->u.dst;
1460 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1461
Linus Torvalds1da177e2005-04-16 15:20:36 -07001462 if (rt->rt6i_flags&RTF_CACHE) {
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001463 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001464 return;
1465 }
1466
1467out:
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001468 dst_release(&rt->u.dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001469 return;
1470}
1471
1472/*
1473 * Handle ICMP "packet too big" messages
1474 * i.e. Path MTU discovery
1475 */
1476
1477void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1478 struct net_device *dev, u32 pmtu)
1479{
1480 struct rt6_info *rt, *nrt;
1481 int allfrag = 0;
1482
1483 rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1484 if (rt == NULL)
1485 return;
1486
1487 if (pmtu >= dst_mtu(&rt->u.dst))
1488 goto out;
1489
1490 if (pmtu < IPV6_MIN_MTU) {
1491 /*
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001492 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
Linus Torvalds1da177e2005-04-16 15:20:36 -07001493 * MTU (1280) and a fragment header should always be included
1494 * after a node receiving Too Big message reporting PMTU is
1495 * less than the IPv6 Minimum Link MTU.
1496 */
1497 pmtu = IPV6_MIN_MTU;
1498 allfrag = 1;
1499 }
1500
1501 /* New mtu received -> path was valid.
1502 They are sent only in response to data packets,
1503 so that this nexthop apparently is reachable. --ANK
1504 */
1505 dst_confirm(&rt->u.dst);
1506
1507 /* Host route. If it is static, it would be better
1508 not to override it, but add new one, so that
1509 when cache entry will expire old pmtu
1510 would return automatically.
1511 */
1512 if (rt->rt6i_flags & RTF_CACHE) {
1513 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1514 if (allfrag)
1515 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
Daniel Lezcano49905092008-01-10 03:01:01 -08001516 dst_set_expires(&rt->u.dst, init_net.ipv6.sysctl.ip6_rt_mtu_expires);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001517 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1518 goto out;
1519 }
1520
1521 /* Network route.
1522 Two cases are possible:
1523 1. It is connected route. Action: COW
1524 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1525 */
YOSHIFUJI Hideakid5315b52006-03-20 16:58:48 -08001526 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001527 nrt = rt6_alloc_cow(rt, daddr, saddr);
YOSHIFUJI Hideakid5315b52006-03-20 16:58:48 -08001528 else
1529 nrt = rt6_alloc_clone(rt, daddr);
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001530
YOSHIFUJI Hideakid5315b52006-03-20 16:58:48 -08001531 if (nrt) {
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001532 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1533 if (allfrag)
1534 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1535
1536 /* According to RFC 1981, detecting PMTU increase shouldn't be
1537 * happened within 5 mins, the recommended timer is 10 mins.
1538 * Here this route expiration time is set to ip6_rt_mtu_expires
1539 * which is 10 mins. After 10 mins the decreased pmtu is expired
1540 * and detecting PMTU increase will be automatically happened.
1541 */
Daniel Lezcano49905092008-01-10 03:01:01 -08001542 dst_set_expires(&nrt->u.dst, init_net.ipv6.sysctl.ip6_rt_mtu_expires);
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001543 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1544
Thomas Graf40e22e82006-08-22 00:00:45 -07001545 ip6_ins_rt(nrt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001546 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001547out:
1548 dst_release(&rt->u.dst);
1549}
1550
1551/*
1552 * Misc support functions
1553 */
1554
1555static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1556{
1557 struct rt6_info *rt = ip6_dst_alloc();
1558
1559 if (rt) {
1560 rt->u.dst.input = ort->u.dst.input;
1561 rt->u.dst.output = ort->u.dst.output;
1562
1563 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
Ville Nuorvala22e1e4d2006-10-16 22:14:26 -07001564 rt->u.dst.error = ort->u.dst.error;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001565 rt->u.dst.dev = ort->u.dst.dev;
1566 if (rt->u.dst.dev)
1567 dev_hold(rt->u.dst.dev);
1568 rt->rt6i_idev = ort->rt6i_idev;
1569 if (rt->rt6i_idev)
1570 in6_dev_hold(rt->rt6i_idev);
1571 rt->u.dst.lastuse = jiffies;
1572 rt->rt6i_expires = 0;
1573
1574 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1575 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1576 rt->rt6i_metric = 0;
1577
1578 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1579#ifdef CONFIG_IPV6_SUBTREES
1580 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1581#endif
Thomas Grafc71099a2006-08-04 23:20:06 -07001582 rt->rt6i_table = ort->rt6i_table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001583 }
1584 return rt;
1585}
1586
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001587#ifdef CONFIG_IPV6_ROUTE_INFO
1588static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
1589 struct in6_addr *gwaddr, int ifindex)
1590{
1591 struct fib6_node *fn;
1592 struct rt6_info *rt = NULL;
Thomas Grafc71099a2006-08-04 23:20:06 -07001593 struct fib6_table *table;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001594
Daniel Lezcano58f09b72008-03-03 23:25:27 -08001595 table = fib6_get_table(&init_net, RT6_TABLE_INFO);
Thomas Grafc71099a2006-08-04 23:20:06 -07001596 if (table == NULL)
1597 return NULL;
1598
1599 write_lock_bh(&table->tb6_lock);
1600 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001601 if (!fn)
1602 goto out;
1603
Eric Dumazet7cc48262007-02-09 16:22:57 -08001604 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001605 if (rt->rt6i_dev->ifindex != ifindex)
1606 continue;
1607 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1608 continue;
1609 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1610 continue;
1611 dst_hold(&rt->u.dst);
1612 break;
1613 }
1614out:
Thomas Grafc71099a2006-08-04 23:20:06 -07001615 write_unlock_bh(&table->tb6_lock);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001616 return rt;
1617}
1618
1619static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
1620 struct in6_addr *gwaddr, int ifindex,
1621 unsigned pref)
1622{
Thomas Graf86872cb2006-08-22 00:01:08 -07001623 struct fib6_config cfg = {
1624 .fc_table = RT6_TABLE_INFO,
Rami Rosen238fc7e2008-02-09 23:43:11 -08001625 .fc_metric = IP6_RT_PRIO_USER,
Thomas Graf86872cb2006-08-22 00:01:08 -07001626 .fc_ifindex = ifindex,
1627 .fc_dst_len = prefixlen,
1628 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1629 RTF_UP | RTF_PREF(pref),
1630 };
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001631
Thomas Graf86872cb2006-08-22 00:01:08 -07001632 ipv6_addr_copy(&cfg.fc_dst, prefix);
1633 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1634
YOSHIFUJI Hideakie317da92006-03-20 17:06:42 -08001635 /* We should treat it as a default route if prefix length is 0. */
1636 if (!prefixlen)
Thomas Graf86872cb2006-08-22 00:01:08 -07001637 cfg.fc_flags |= RTF_DEFAULT;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001638
Thomas Graf86872cb2006-08-22 00:01:08 -07001639 ip6_route_add(&cfg);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001640
1641 return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex);
1642}
1643#endif
1644
Linus Torvalds1da177e2005-04-16 15:20:36 -07001645struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001646{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001647 struct rt6_info *rt;
Thomas Grafc71099a2006-08-04 23:20:06 -07001648 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001649
Daniel Lezcano58f09b72008-03-03 23:25:27 -08001650 table = fib6_get_table(&init_net, RT6_TABLE_DFLT);
Thomas Grafc71099a2006-08-04 23:20:06 -07001651 if (table == NULL)
1652 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001653
Thomas Grafc71099a2006-08-04 23:20:06 -07001654 write_lock_bh(&table->tb6_lock);
Eric Dumazet7cc48262007-02-09 16:22:57 -08001655 for (rt = table->tb6_root.leaf; rt; rt=rt->u.dst.rt6_next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001656 if (dev == rt->rt6i_dev &&
YOSHIFUJI Hideaki045927f2006-03-20 17:00:48 -08001657 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001658 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1659 break;
1660 }
1661 if (rt)
1662 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001663 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001664 return rt;
1665}
1666
Fred L. Templinc7dc89c2007-11-29 22:11:40 +11001667EXPORT_SYMBOL(rt6_get_dflt_router);
1668
Linus Torvalds1da177e2005-04-16 15:20:36 -07001669struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -08001670 struct net_device *dev,
1671 unsigned int pref)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001672{
Thomas Graf86872cb2006-08-22 00:01:08 -07001673 struct fib6_config cfg = {
1674 .fc_table = RT6_TABLE_DFLT,
Rami Rosen238fc7e2008-02-09 23:43:11 -08001675 .fc_metric = IP6_RT_PRIO_USER,
Thomas Graf86872cb2006-08-22 00:01:08 -07001676 .fc_ifindex = dev->ifindex,
1677 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1678 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1679 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07001680
Thomas Graf86872cb2006-08-22 00:01:08 -07001681 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001682
Thomas Graf86872cb2006-08-22 00:01:08 -07001683 ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001684
Linus Torvalds1da177e2005-04-16 15:20:36 -07001685 return rt6_get_dflt_router(gwaddr, dev);
1686}
1687
1688void rt6_purge_dflt_routers(void)
1689{
1690 struct rt6_info *rt;
Thomas Grafc71099a2006-08-04 23:20:06 -07001691 struct fib6_table *table;
1692
1693 /* NOTE: Keep consistent with rt6_get_dflt_router */
Daniel Lezcano58f09b72008-03-03 23:25:27 -08001694 table = fib6_get_table(&init_net, RT6_TABLE_DFLT);
Thomas Grafc71099a2006-08-04 23:20:06 -07001695 if (table == NULL)
1696 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001697
1698restart:
Thomas Grafc71099a2006-08-04 23:20:06 -07001699 read_lock_bh(&table->tb6_lock);
Eric Dumazet7cc48262007-02-09 16:22:57 -08001700 for (rt = table->tb6_root.leaf; rt; rt = rt->u.dst.rt6_next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001701 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1702 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001703 read_unlock_bh(&table->tb6_lock);
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001704 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001705 goto restart;
1706 }
1707 }
Thomas Grafc71099a2006-08-04 23:20:06 -07001708 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001709}
1710
Thomas Graf86872cb2006-08-22 00:01:08 -07001711static void rtmsg_to_fib6_config(struct in6_rtmsg *rtmsg,
1712 struct fib6_config *cfg)
1713{
1714 memset(cfg, 0, sizeof(*cfg));
1715
1716 cfg->fc_table = RT6_TABLE_MAIN;
1717 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1718 cfg->fc_metric = rtmsg->rtmsg_metric;
1719 cfg->fc_expires = rtmsg->rtmsg_info;
1720 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1721 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1722 cfg->fc_flags = rtmsg->rtmsg_flags;
1723
Benjamin Theryf1243c22008-02-26 18:10:03 -08001724 cfg->fc_nlinfo.nl_net = &init_net;
1725
Thomas Graf86872cb2006-08-22 00:01:08 -07001726 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1727 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1728 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1729}
1730
Linus Torvalds1da177e2005-04-16 15:20:36 -07001731int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1732{
Thomas Graf86872cb2006-08-22 00:01:08 -07001733 struct fib6_config cfg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001734 struct in6_rtmsg rtmsg;
1735 int err;
1736
1737 switch(cmd) {
1738 case SIOCADDRT: /* Add a route */
1739 case SIOCDELRT: /* Delete a route */
1740 if (!capable(CAP_NET_ADMIN))
1741 return -EPERM;
1742 err = copy_from_user(&rtmsg, arg,
1743 sizeof(struct in6_rtmsg));
1744 if (err)
1745 return -EFAULT;
Thomas Graf86872cb2006-08-22 00:01:08 -07001746
1747 rtmsg_to_fib6_config(&rtmsg, &cfg);
1748
Linus Torvalds1da177e2005-04-16 15:20:36 -07001749 rtnl_lock();
1750 switch (cmd) {
1751 case SIOCADDRT:
Thomas Graf86872cb2006-08-22 00:01:08 -07001752 err = ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001753 break;
1754 case SIOCDELRT:
Thomas Graf86872cb2006-08-22 00:01:08 -07001755 err = ip6_route_del(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001756 break;
1757 default:
1758 err = -EINVAL;
1759 }
1760 rtnl_unlock();
1761
1762 return err;
Stephen Hemminger3ff50b72007-04-20 17:09:22 -07001763 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001764
1765 return -EINVAL;
1766}
1767
1768/*
1769 * Drop the packet on the floor
1770 */
1771
Ilpo Järvinen50eb4312008-01-12 03:21:00 -08001772static int ip6_pkt_drop(struct sk_buff *skb, int code, int ipstats_mib_noroutes)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001773{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001774 int type;
1775 switch (ipstats_mib_noroutes) {
1776 case IPSTATS_MIB_INNOROUTES:
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -07001777 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001778 if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED) {
1779 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS);
1780 break;
1781 }
1782 /* FALLTHROUGH */
1783 case IPSTATS_MIB_OUTNOROUTES:
1784 IP6_INC_STATS(ip6_dst_idev(skb->dst), ipstats_mib_noroutes);
1785 break;
1786 }
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001787 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0, skb->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001788 kfree_skb(skb);
1789 return 0;
1790}
1791
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001792static int ip6_pkt_discard(struct sk_buff *skb)
1793{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001794 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001795}
1796
Arnaldo Carvalho de Melo20380732005-08-16 02:18:02 -03001797static int ip6_pkt_discard_out(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001798{
1799 skb->dev = skb->dst->dev;
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001800 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001801}
1802
David S. Miller6723ab52006-10-18 21:20:57 -07001803#ifdef CONFIG_IPV6_MULTIPLE_TABLES
1804
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001805static int ip6_pkt_prohibit(struct sk_buff *skb)
1806{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001807 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001808}
1809
1810static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1811{
1812 skb->dev = skb->dst->dev;
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001813 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001814}
1815
David S. Miller6723ab52006-10-18 21:20:57 -07001816#endif
1817
Linus Torvalds1da177e2005-04-16 15:20:36 -07001818/*
1819 * Allocate a dst for local (unicast / anycast) address.
1820 */
1821
1822struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1823 const struct in6_addr *addr,
1824 int anycast)
1825{
1826 struct rt6_info *rt = ip6_dst_alloc();
1827
1828 if (rt == NULL)
1829 return ERR_PTR(-ENOMEM);
1830
Eric W. Biederman2774c7a2007-09-26 22:10:56 -07001831 dev_hold(init_net.loopback_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001832 in6_dev_hold(idev);
1833
1834 rt->u.dst.flags = DST_HOST;
1835 rt->u.dst.input = ip6_input;
1836 rt->u.dst.output = ip6_output;
Eric W. Biederman2774c7a2007-09-26 22:10:56 -07001837 rt->rt6i_dev = init_net.loopback_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001838 rt->rt6i_idev = idev;
1839 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1840 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1841 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1842 rt->u.dst.obsolete = -1;
1843
1844 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +09001845 if (anycast)
1846 rt->rt6i_flags |= RTF_ANYCAST;
1847 else
Linus Torvalds1da177e2005-04-16 15:20:36 -07001848 rt->rt6i_flags |= RTF_LOCAL;
1849 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1850 if (rt->rt6i_nexthop == NULL) {
YOSHIFUJI Hideaki40aa7b92006-10-19 13:50:09 +09001851 dst_free(&rt->u.dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001852 return ERR_PTR(-ENOMEM);
1853 }
1854
1855 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1856 rt->rt6i_dst.plen = 128;
Daniel Lezcano58f09b72008-03-03 23:25:27 -08001857 rt->rt6i_table = fib6_get_table(&init_net, RT6_TABLE_LOCAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001858
1859 atomic_set(&rt->u.dst.__refcnt, 1);
1860
1861 return rt;
1862}
1863
1864static int fib6_ifdown(struct rt6_info *rt, void *arg)
1865{
1866 if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1867 rt != &ip6_null_entry) {
1868 RT6_TRACE("deleted by ifdown %p\n", rt);
1869 return -1;
1870 }
1871 return 0;
1872}
1873
Daniel Lezcanof3db4852008-03-03 23:27:06 -08001874void rt6_ifdown(struct net *net, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001875{
Daniel Lezcanof3db4852008-03-03 23:27:06 -08001876 fib6_clean_all(net, fib6_ifdown, 0, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001877}
1878
1879struct rt6_mtu_change_arg
1880{
1881 struct net_device *dev;
1882 unsigned mtu;
1883};
1884
1885static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1886{
1887 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1888 struct inet6_dev *idev;
1889
1890 /* In IPv6 pmtu discovery is not optional,
1891 so that RTAX_MTU lock cannot disable it.
1892 We still use this lock to block changes
1893 caused by addrconf/ndisc.
1894 */
1895
1896 idev = __in6_dev_get(arg->dev);
1897 if (idev == NULL)
1898 return 0;
1899
1900 /* For administrative MTU increase, there is no way to discover
1901 IPv6 PMTU increase, so PMTU increase should be updated here.
1902 Since RFC 1981 doesn't include administrative MTU increase
1903 update PMTU increase is a MUST. (i.e. jumbo frame)
1904 */
1905 /*
1906 If new MTU is less than route PMTU, this new MTU will be the
1907 lowest MTU in the path, update the route PMTU to reflect PMTU
1908 decreases; if new MTU is greater than route PMTU, and the
1909 old MTU is the lowest MTU in the path, update the route PMTU
1910 to reflect the increase. In this case if the other nodes' MTU
1911 also have the lowest MTU, TOO BIG MESSAGE will be lead to
1912 PMTU discouvery.
1913 */
1914 if (rt->rt6i_dev == arg->dev &&
1915 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
Jim Paris23717792008-01-31 16:36:25 -08001916 (dst_mtu(&rt->u.dst) >= arg->mtu ||
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001917 (dst_mtu(&rt->u.dst) < arg->mtu &&
Simon Arlott566cfd82007-07-26 00:09:55 -07001918 dst_mtu(&rt->u.dst) == idev->cnf.mtu6))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001919 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
Simon Arlott566cfd82007-07-26 00:09:55 -07001920 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1921 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001922 return 0;
1923}
1924
1925void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1926{
Thomas Grafc71099a2006-08-04 23:20:06 -07001927 struct rt6_mtu_change_arg arg = {
1928 .dev = dev,
1929 .mtu = mtu,
1930 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07001931
Daniel Lezcanof3db4852008-03-03 23:27:06 -08001932 fib6_clean_all(dev->nd_net, rt6_mtu_change_route, 0, &arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001933}
1934
Patrick McHardyef7c79e2007-06-05 12:38:30 -07001935static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
Thomas Graf5176f912006-08-26 20:13:18 -07001936 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
Thomas Graf86872cb2006-08-22 00:01:08 -07001937 [RTA_OIF] = { .type = NLA_U32 },
Thomas Grafab364a62006-08-22 00:01:47 -07001938 [RTA_IIF] = { .type = NLA_U32 },
Thomas Graf86872cb2006-08-22 00:01:08 -07001939 [RTA_PRIORITY] = { .type = NLA_U32 },
1940 [RTA_METRICS] = { .type = NLA_NESTED },
1941};
1942
1943static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
1944 struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001945{
Thomas Graf86872cb2006-08-22 00:01:08 -07001946 struct rtmsg *rtm;
1947 struct nlattr *tb[RTA_MAX+1];
1948 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001949
Thomas Graf86872cb2006-08-22 00:01:08 -07001950 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
1951 if (err < 0)
1952 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001953
Thomas Graf86872cb2006-08-22 00:01:08 -07001954 err = -EINVAL;
1955 rtm = nlmsg_data(nlh);
1956 memset(cfg, 0, sizeof(*cfg));
1957
1958 cfg->fc_table = rtm->rtm_table;
1959 cfg->fc_dst_len = rtm->rtm_dst_len;
1960 cfg->fc_src_len = rtm->rtm_src_len;
1961 cfg->fc_flags = RTF_UP;
1962 cfg->fc_protocol = rtm->rtm_protocol;
1963
1964 if (rtm->rtm_type == RTN_UNREACHABLE)
1965 cfg->fc_flags |= RTF_REJECT;
1966
1967 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
1968 cfg->fc_nlinfo.nlh = nlh;
Benjamin Thery2216b482008-01-30 19:09:35 -08001969 cfg->fc_nlinfo.nl_net = skb->sk->sk_net;
Thomas Graf86872cb2006-08-22 00:01:08 -07001970
1971 if (tb[RTA_GATEWAY]) {
1972 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
1973 cfg->fc_flags |= RTF_GATEWAY;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001974 }
Thomas Graf86872cb2006-08-22 00:01:08 -07001975
1976 if (tb[RTA_DST]) {
1977 int plen = (rtm->rtm_dst_len + 7) >> 3;
1978
1979 if (nla_len(tb[RTA_DST]) < plen)
1980 goto errout;
1981
1982 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001983 }
Thomas Graf86872cb2006-08-22 00:01:08 -07001984
1985 if (tb[RTA_SRC]) {
1986 int plen = (rtm->rtm_src_len + 7) >> 3;
1987
1988 if (nla_len(tb[RTA_SRC]) < plen)
1989 goto errout;
1990
1991 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001992 }
Thomas Graf86872cb2006-08-22 00:01:08 -07001993
1994 if (tb[RTA_OIF])
1995 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
1996
1997 if (tb[RTA_PRIORITY])
1998 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
1999
2000 if (tb[RTA_METRICS]) {
2001 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2002 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002003 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002004
2005 if (tb[RTA_TABLE])
2006 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2007
2008 err = 0;
2009errout:
2010 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002011}
2012
Thomas Grafc127ea22007-03-22 11:58:32 -07002013static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002014{
Denis V. Lunevb8542722007-12-01 00:21:31 +11002015 struct net *net = skb->sk->sk_net;
Thomas Graf86872cb2006-08-22 00:01:08 -07002016 struct fib6_config cfg;
2017 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002018
Denis V. Lunevb8542722007-12-01 00:21:31 +11002019 if (net != &init_net)
2020 return -EINVAL;
2021
Thomas Graf86872cb2006-08-22 00:01:08 -07002022 err = rtm_to_fib6_config(skb, nlh, &cfg);
2023 if (err < 0)
2024 return err;
2025
2026 return ip6_route_del(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002027}
2028
Thomas Grafc127ea22007-03-22 11:58:32 -07002029static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002030{
Denis V. Lunevb8542722007-12-01 00:21:31 +11002031 struct net *net = skb->sk->sk_net;
Thomas Graf86872cb2006-08-22 00:01:08 -07002032 struct fib6_config cfg;
2033 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002034
Denis V. Lunevb8542722007-12-01 00:21:31 +11002035 if (net != &init_net)
2036 return -EINVAL;
2037
Thomas Graf86872cb2006-08-22 00:01:08 -07002038 err = rtm_to_fib6_config(skb, nlh, &cfg);
2039 if (err < 0)
2040 return err;
2041
2042 return ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002043}
2044
Thomas Graf339bf982006-11-10 14:10:15 -08002045static inline size_t rt6_nlmsg_size(void)
2046{
2047 return NLMSG_ALIGN(sizeof(struct rtmsg))
2048 + nla_total_size(16) /* RTA_SRC */
2049 + nla_total_size(16) /* RTA_DST */
2050 + nla_total_size(16) /* RTA_GATEWAY */
2051 + nla_total_size(16) /* RTA_PREFSRC */
2052 + nla_total_size(4) /* RTA_TABLE */
2053 + nla_total_size(4) /* RTA_IIF */
2054 + nla_total_size(4) /* RTA_OIF */
2055 + nla_total_size(4) /* RTA_PRIORITY */
Noriaki TAKAMIYA6a2b9ce2007-01-23 22:09:41 -08002056 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
Thomas Graf339bf982006-11-10 14:10:15 -08002057 + nla_total_size(sizeof(struct rta_cacheinfo));
2058}
2059
Linus Torvalds1da177e2005-04-16 15:20:36 -07002060static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07002061 struct in6_addr *dst, struct in6_addr *src,
2062 int iif, int type, u32 pid, u32 seq,
2063 int prefix, unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002064{
2065 struct rtmsg *rtm;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002066 struct nlmsghdr *nlh;
Thomas Grafe3703b32006-11-27 09:27:07 -08002067 long expires;
Patrick McHardy9e762a42006-08-10 23:09:48 -07002068 u32 table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002069
2070 if (prefix) { /* user wants prefix routes only */
2071 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2072 /* success since this is not a prefix route */
2073 return 1;
2074 }
2075 }
2076
Thomas Graf2d7202b2006-08-22 00:01:27 -07002077 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2078 if (nlh == NULL)
Patrick McHardy26932562007-01-31 23:16:40 -08002079 return -EMSGSIZE;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002080
2081 rtm = nlmsg_data(nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002082 rtm->rtm_family = AF_INET6;
2083 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2084 rtm->rtm_src_len = rt->rt6i_src.plen;
2085 rtm->rtm_tos = 0;
Thomas Grafc71099a2006-08-04 23:20:06 -07002086 if (rt->rt6i_table)
Patrick McHardy9e762a42006-08-10 23:09:48 -07002087 table = rt->rt6i_table->tb6_id;
Thomas Grafc71099a2006-08-04 23:20:06 -07002088 else
Patrick McHardy9e762a42006-08-10 23:09:48 -07002089 table = RT6_TABLE_UNSPEC;
2090 rtm->rtm_table = table;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002091 NLA_PUT_U32(skb, RTA_TABLE, table);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002092 if (rt->rt6i_flags&RTF_REJECT)
2093 rtm->rtm_type = RTN_UNREACHABLE;
2094 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2095 rtm->rtm_type = RTN_LOCAL;
2096 else
2097 rtm->rtm_type = RTN_UNICAST;
2098 rtm->rtm_flags = 0;
2099 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2100 rtm->rtm_protocol = rt->rt6i_protocol;
2101 if (rt->rt6i_flags&RTF_DYNAMIC)
2102 rtm->rtm_protocol = RTPROT_REDIRECT;
2103 else if (rt->rt6i_flags & RTF_ADDRCONF)
2104 rtm->rtm_protocol = RTPROT_KERNEL;
2105 else if (rt->rt6i_flags&RTF_DEFAULT)
2106 rtm->rtm_protocol = RTPROT_RA;
2107
2108 if (rt->rt6i_flags&RTF_CACHE)
2109 rtm->rtm_flags |= RTM_F_CLONED;
2110
2111 if (dst) {
Thomas Graf2d7202b2006-08-22 00:01:27 -07002112 NLA_PUT(skb, RTA_DST, 16, dst);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002113 rtm->rtm_dst_len = 128;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002114 } else if (rtm->rtm_dst_len)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002115 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002116#ifdef CONFIG_IPV6_SUBTREES
2117 if (src) {
Thomas Graf2d7202b2006-08-22 00:01:27 -07002118 NLA_PUT(skb, RTA_SRC, 16, src);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002119 rtm->rtm_src_len = 128;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002120 } else if (rtm->rtm_src_len)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002121 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002122#endif
2123 if (iif)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002124 NLA_PUT_U32(skb, RTA_IIF, iif);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002125 else if (dst) {
2126 struct in6_addr saddr_buf;
YOSHIFUJI Hideaki5e5f3f02008-03-03 21:44:34 +09002127 if (ipv6_dev_get_saddr(ip6_dst_idev(&rt->u.dst)->dev,
2128 dst, &saddr_buf) == 0)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002129 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002130 }
Thomas Graf2d7202b2006-08-22 00:01:27 -07002131
Linus Torvalds1da177e2005-04-16 15:20:36 -07002132 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002133 goto nla_put_failure;
2134
Linus Torvalds1da177e2005-04-16 15:20:36 -07002135 if (rt->u.dst.neighbour)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002136 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
2137
Linus Torvalds1da177e2005-04-16 15:20:36 -07002138 if (rt->u.dst.dev)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002139 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2140
2141 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
Thomas Grafe3703b32006-11-27 09:27:07 -08002142
2143 expires = rt->rt6i_expires ? rt->rt6i_expires - jiffies : 0;
2144 if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0,
2145 expires, rt->u.dst.error) < 0)
2146 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002147
Thomas Graf2d7202b2006-08-22 00:01:27 -07002148 return nlmsg_end(skb, nlh);
2149
2150nla_put_failure:
Patrick McHardy26932562007-01-31 23:16:40 -08002151 nlmsg_cancel(skb, nlh);
2152 return -EMSGSIZE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002153}
2154
Patrick McHardy1b43af52006-08-10 23:11:17 -07002155int rt6_dump_route(struct rt6_info *rt, void *p_arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002156{
2157 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2158 int prefix;
2159
Thomas Graf2d7202b2006-08-22 00:01:27 -07002160 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2161 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002162 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2163 } else
2164 prefix = 0;
2165
2166 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2167 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07002168 prefix, NLM_F_MULTI);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002169}
2170
Thomas Grafc127ea22007-03-22 11:58:32 -07002171static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002172{
Denis V. Lunevb8542722007-12-01 00:21:31 +11002173 struct net *net = in_skb->sk->sk_net;
Thomas Grafab364a62006-08-22 00:01:47 -07002174 struct nlattr *tb[RTA_MAX+1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07002175 struct rt6_info *rt;
Thomas Grafab364a62006-08-22 00:01:47 -07002176 struct sk_buff *skb;
2177 struct rtmsg *rtm;
2178 struct flowi fl;
2179 int err, iif = 0;
2180
Denis V. Lunevb8542722007-12-01 00:21:31 +11002181 if (net != &init_net)
2182 return -EINVAL;
2183
Thomas Grafab364a62006-08-22 00:01:47 -07002184 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2185 if (err < 0)
2186 goto errout;
2187
2188 err = -EINVAL;
2189 memset(&fl, 0, sizeof(fl));
2190
2191 if (tb[RTA_SRC]) {
2192 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2193 goto errout;
2194
2195 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2196 }
2197
2198 if (tb[RTA_DST]) {
2199 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2200 goto errout;
2201
2202 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2203 }
2204
2205 if (tb[RTA_IIF])
2206 iif = nla_get_u32(tb[RTA_IIF]);
2207
2208 if (tb[RTA_OIF])
2209 fl.oif = nla_get_u32(tb[RTA_OIF]);
2210
2211 if (iif) {
2212 struct net_device *dev;
Eric W. Biederman881d9662007-09-17 11:56:21 -07002213 dev = __dev_get_by_index(&init_net, iif);
Thomas Grafab364a62006-08-22 00:01:47 -07002214 if (!dev) {
2215 err = -ENODEV;
2216 goto errout;
2217 }
2218 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002219
2220 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
Thomas Grafab364a62006-08-22 00:01:47 -07002221 if (skb == NULL) {
2222 err = -ENOBUFS;
2223 goto errout;
2224 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002225
2226 /* Reserve room for dummy headers, this skb can pass
2227 through good chunk of routing engine.
2228 */
Arnaldo Carvalho de Melo459a98e2007-03-19 15:30:44 -07002229 skb_reset_mac_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002230 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2231
Thomas Grafab364a62006-08-22 00:01:47 -07002232 rt = (struct rt6_info*) ip6_route_output(NULL, &fl);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002233 skb->dst = &rt->u.dst;
2234
Thomas Grafab364a62006-08-22 00:01:47 -07002235 err = rt6_fill_node(skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002236 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07002237 nlh->nlmsg_seq, 0, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002238 if (err < 0) {
Thomas Grafab364a62006-08-22 00:01:47 -07002239 kfree_skb(skb);
2240 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002241 }
2242
Denis V. Lunev97c53ca2007-11-19 22:26:51 -08002243 err = rtnl_unicast(skb, &init_net, NETLINK_CB(in_skb).pid);
Thomas Grafab364a62006-08-22 00:01:47 -07002244errout:
Linus Torvalds1da177e2005-04-16 15:20:36 -07002245 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002246}
2247
Thomas Graf86872cb2006-08-22 00:01:08 -07002248void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002249{
2250 struct sk_buff *skb;
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08002251 u32 seq;
2252 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002253
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08002254 err = -ENOBUFS;
2255 seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
Thomas Graf86872cb2006-08-22 00:01:08 -07002256
Thomas Graf339bf982006-11-10 14:10:15 -08002257 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
Thomas Graf21713eb2006-08-15 00:35:24 -07002258 if (skb == NULL)
2259 goto errout;
2260
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08002261 err = rt6_fill_node(skb, rt, NULL, NULL, 0,
2262 event, info->pid, seq, 0, 0);
Patrick McHardy26932562007-01-31 23:16:40 -08002263 if (err < 0) {
2264 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2265 WARN_ON(err == -EMSGSIZE);
2266 kfree_skb(skb);
2267 goto errout;
2268 }
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08002269 err = rtnl_notify(skb, &init_net, info->pid,
2270 RTNLGRP_IPV6_ROUTE, info->nlh, gfp_any());
Thomas Graf21713eb2006-08-15 00:35:24 -07002271errout:
2272 if (err < 0)
Denis V. Lunev97c53ca2007-11-19 22:26:51 -08002273 rtnl_set_sk_err(&init_net, RTNLGRP_IPV6_ROUTE, err);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002274}
2275
2276/*
2277 * /proc
2278 */
2279
2280#ifdef CONFIG_PROC_FS
2281
2282#define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2283
2284struct rt6_proc_arg
2285{
2286 char *buffer;
2287 int offset;
2288 int length;
2289 int skip;
2290 int len;
2291};
2292
2293static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2294{
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002295 struct seq_file *m = p_arg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002296
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002297 seq_printf(m, NIP6_SEQFMT " %02x ", NIP6(rt->rt6i_dst.addr),
2298 rt->rt6i_dst.plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002299
2300#ifdef CONFIG_IPV6_SUBTREES
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002301 seq_printf(m, NIP6_SEQFMT " %02x ", NIP6(rt->rt6i_src.addr),
2302 rt->rt6i_src.plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002303#else
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002304 seq_puts(m, "00000000000000000000000000000000 00 ");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002305#endif
2306
2307 if (rt->rt6i_nexthop) {
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002308 seq_printf(m, NIP6_SEQFMT,
2309 NIP6(*((struct in6_addr *)rt->rt6i_nexthop->primary_key)));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002310 } else {
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002311 seq_puts(m, "00000000000000000000000000000000");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002312 }
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002313 seq_printf(m, " %08x %08x %08x %08x %8s\n",
2314 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
2315 rt->u.dst.__use, rt->rt6i_flags,
2316 rt->rt6i_dev ? rt->rt6i_dev->name : "");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002317 return 0;
2318}
2319
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002320static int ipv6_route_show(struct seq_file *m, void *v)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002321{
Daniel Lezcanof3db4852008-03-03 23:27:06 -08002322 struct net *net = (struct net *)m->private;
2323 fib6_clean_all(net, rt6_info_route, 0, m);
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002324 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002325}
2326
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002327static int ipv6_route_open(struct inode *inode, struct file *file)
2328{
Daniel Lezcanof3db4852008-03-03 23:27:06 -08002329 struct net *net = get_proc_net(inode);
2330 if (!net)
2331 return -ENXIO;
2332 return single_open(file, ipv6_route_show, net);
2333}
2334
2335static int ipv6_route_release(struct inode *inode, struct file *file)
2336{
2337 struct seq_file *seq = file->private_data;
2338 struct net *net = seq->private;
2339 put_net(net);
2340 return single_release(inode, file);
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002341}
2342
2343static const struct file_operations ipv6_route_proc_fops = {
2344 .owner = THIS_MODULE,
2345 .open = ipv6_route_open,
2346 .read = seq_read,
2347 .llseek = seq_lseek,
Daniel Lezcanof3db4852008-03-03 23:27:06 -08002348 .release = ipv6_route_release,
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002349};
2350
Linus Torvalds1da177e2005-04-16 15:20:36 -07002351static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2352{
2353 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2354 rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
2355 rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
2356 rt6_stats.fib_rt_cache,
2357 atomic_read(&ip6_dst_ops.entries),
2358 rt6_stats.fib_discarded_routes);
2359
2360 return 0;
2361}
2362
2363static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2364{
2365 return single_open(file, rt6_stats_seq_show, NULL);
2366}
2367
Arjan van de Ven9a321442007-02-12 00:55:35 -08002368static const struct file_operations rt6_stats_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002369 .owner = THIS_MODULE,
2370 .open = rt6_stats_seq_open,
2371 .read = seq_read,
2372 .llseek = seq_lseek,
2373 .release = single_release,
2374};
Daniel Lezcano75314fb2007-12-08 00:13:32 -08002375
2376static int ipv6_route_proc_init(struct net *net)
2377{
2378 int ret = -ENOMEM;
2379 if (!proc_net_fops_create(net, "ipv6_route",
2380 0, &ipv6_route_proc_fops))
2381 goto out;
2382
2383 if (!proc_net_fops_create(net, "rt6_stats",
2384 S_IRUGO, &rt6_stats_seq_fops))
2385 goto out_ipv6_route;
2386
2387 ret = 0;
2388out:
2389 return ret;
2390out_ipv6_route:
2391 proc_net_remove(net, "ipv6_route");
2392 goto out;
2393}
2394
2395static void ipv6_route_proc_fini(struct net *net)
2396{
2397 proc_net_remove(net, "ipv6_route");
2398 proc_net_remove(net, "rt6_stats");
2399}
2400#else
2401static inline int ipv6_route_proc_init(struct net *net)
2402{
2403 return 0;
2404}
2405static inline void ipv6_route_proc_fini(struct net *net)
2406{
2407 return ;
2408}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002409#endif /* CONFIG_PROC_FS */
2410
2411#ifdef CONFIG_SYSCTL
2412
Linus Torvalds1da177e2005-04-16 15:20:36 -07002413static
2414int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2415 void __user *buffer, size_t *lenp, loff_t *ppos)
2416{
Daniel Lezcano5b7c9312008-03-03 23:28:58 -08002417 struct net *net = current->nsproxy->net_ns;
2418 int delay = net->ipv6.sysctl.flush_delay;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002419 if (write) {
2420 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
Daniel Lezcano5b7c9312008-03-03 23:28:58 -08002421 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002422 return 0;
2423 } else
2424 return -EINVAL;
2425}
2426
Daniel Lezcano760f2d02008-01-10 02:53:43 -08002427ctl_table ipv6_route_table_template[] = {
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002428 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002429 .procname = "flush",
Daniel Lezcano49905092008-01-10 03:01:01 -08002430 .data = &init_net.ipv6.sysctl.flush_delay,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002431 .maxlen = sizeof(int),
Dave Jones89c8b3a12005-04-28 12:11:49 -07002432 .mode = 0200,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002433 .proc_handler = &ipv6_sysctl_rtcache_flush
Linus Torvalds1da177e2005-04-16 15:20:36 -07002434 },
2435 {
2436 .ctl_name = NET_IPV6_ROUTE_GC_THRESH,
2437 .procname = "gc_thresh",
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002438 .data = &ip6_dst_ops.gc_thresh,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002439 .maxlen = sizeof(int),
2440 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002441 .proc_handler = &proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002442 },
2443 {
2444 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE,
2445 .procname = "max_size",
Daniel Lezcano49905092008-01-10 03:01:01 -08002446 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002447 .maxlen = sizeof(int),
2448 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002449 .proc_handler = &proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002450 },
2451 {
2452 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2453 .procname = "gc_min_interval",
Daniel Lezcano49905092008-01-10 03:01:01 -08002454 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002455 .maxlen = sizeof(int),
2456 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002457 .proc_handler = &proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002458 .strategy = &sysctl_jiffies,
2459 },
2460 {
2461 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT,
2462 .procname = "gc_timeout",
Daniel Lezcano49905092008-01-10 03:01:01 -08002463 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002464 .maxlen = sizeof(int),
2465 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002466 .proc_handler = &proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002467 .strategy = &sysctl_jiffies,
2468 },
2469 {
2470 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL,
2471 .procname = "gc_interval",
Daniel Lezcano49905092008-01-10 03:01:01 -08002472 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002473 .maxlen = sizeof(int),
2474 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002475 .proc_handler = &proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002476 .strategy = &sysctl_jiffies,
2477 },
2478 {
2479 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY,
2480 .procname = "gc_elasticity",
Daniel Lezcano49905092008-01-10 03:01:01 -08002481 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002482 .maxlen = sizeof(int),
2483 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002484 .proc_handler = &proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002485 .strategy = &sysctl_jiffies,
2486 },
2487 {
2488 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES,
2489 .procname = "mtu_expires",
Daniel Lezcano49905092008-01-10 03:01:01 -08002490 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002491 .maxlen = sizeof(int),
2492 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002493 .proc_handler = &proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002494 .strategy = &sysctl_jiffies,
2495 },
2496 {
2497 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS,
2498 .procname = "min_adv_mss",
Daniel Lezcano49905092008-01-10 03:01:01 -08002499 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002500 .maxlen = sizeof(int),
2501 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002502 .proc_handler = &proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002503 .strategy = &sysctl_jiffies,
2504 },
2505 {
2506 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2507 .procname = "gc_min_interval_ms",
Daniel Lezcano49905092008-01-10 03:01:01 -08002508 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002509 .maxlen = sizeof(int),
2510 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002511 .proc_handler = &proc_dointvec_ms_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002512 .strategy = &sysctl_ms_jiffies,
2513 },
2514 { .ctl_name = 0 }
2515};
2516
Daniel Lezcano760f2d02008-01-10 02:53:43 -08002517struct ctl_table *ipv6_route_sysctl_init(struct net *net)
2518{
2519 struct ctl_table *table;
2520
2521 table = kmemdup(ipv6_route_table_template,
2522 sizeof(ipv6_route_table_template),
2523 GFP_KERNEL);
YOSHIFUJI Hideaki5ee09102008-02-28 00:24:28 +09002524
2525 if (table) {
2526 table[0].data = &net->ipv6.sysctl.flush_delay;
2527 /* table[1].data will be handled when we have
2528 routes per namespace */
2529 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2530 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2531 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2532 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2533 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2534 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2535 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2536 }
2537
Daniel Lezcano760f2d02008-01-10 02:53:43 -08002538 return table;
2539}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002540#endif
2541
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002542int __init ip6_route_init(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002543{
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002544 int ret;
2545
Alexey Dobriyane5d679f332006-08-26 19:25:52 -07002546 ip6_dst_ops.kmem_cachep =
2547 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
Daniel Lezcanof845ab62007-12-07 00:45:16 -08002548 SLAB_HWCACHE_ALIGN, NULL);
2549 if (!ip6_dst_ops.kmem_cachep)
2550 return -ENOMEM;
2551
David S. Miller14e50e52007-05-24 18:17:54 -07002552 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops.kmem_cachep;
2553
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002554 ret = fib6_init();
2555 if (ret)
2556 goto out_kmem_cache;
2557
Daniel Lezcano75314fb2007-12-08 00:13:32 -08002558 ret = ipv6_route_proc_init(&init_net);
2559 if (ret)
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002560 goto out_fib6_init;
2561
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002562 ret = xfrm6_init();
2563 if (ret)
Daniel Lezcano75314fb2007-12-08 00:13:32 -08002564 goto out_proc_init;
Daniel Lezcanoc35b7e72007-12-08 00:14:11 -08002565
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002566 ret = fib6_rules_init();
2567 if (ret)
2568 goto xfrm6_init;
Daniel Lezcano7e5449c2007-12-08 00:14:54 -08002569
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002570 ret = -ENOBUFS;
2571 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) ||
2572 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) ||
2573 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL))
2574 goto fib6_rules_init;
2575
2576 ret = 0;
2577out:
2578 return ret;
2579
2580fib6_rules_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002581 fib6_rules_cleanup();
2582xfrm6_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002583 xfrm6_fini();
Daniel Lezcano75314fb2007-12-08 00:13:32 -08002584out_proc_init:
2585 ipv6_route_proc_fini(&init_net);
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002586out_fib6_init:
Daniel Lezcanof3db4852008-03-03 23:27:06 -08002587 rt6_ifdown(&init_net, NULL);
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002588 fib6_gc_cleanup();
2589out_kmem_cache:
2590 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2591 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002592}
2593
2594void ip6_route_cleanup(void)
2595{
Thomas Graf101367c2006-08-04 03:39:02 -07002596 fib6_rules_cleanup();
Daniel Lezcano75314fb2007-12-08 00:13:32 -08002597 ipv6_route_proc_fini(&init_net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002598 xfrm6_fini();
Daniel Lezcanof3db4852008-03-03 23:27:06 -08002599 rt6_ifdown(&init_net, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002600 fib6_gc_cleanup();
2601 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2602}