blob: 4278cec522c5b842d44a83170b38fef29460365b [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09006 * Pedro Roque <roque@di.fc.ul.pt>
Linus Torvalds1da177e2005-04-16 15:20:36 -07007 *
8 * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
16/* Changes:
17 *
18 * YOSHIFUJI Hideaki @USAGI
19 * reworked default router selection.
20 * - respect outgoing interface
21 * - select from (probably) reachable routers (i.e.
22 * routers in REACHABLE, STALE, DELAY or PROBE states).
23 * - always select the same router if it is (probably)
24 * reachable. otherwise, round-robin the list.
YOSHIFUJI Hideakic0bece92006-08-23 17:23:25 -070025 * Ville Nuorvala
26 * Fixed routing subtrees.
Linus Torvalds1da177e2005-04-16 15:20:36 -070027 */
28
Randy Dunlap4fc268d2006-01-11 12:17:47 -080029#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070030#include <linux/errno.h>
31#include <linux/types.h>
32#include <linux/times.h>
33#include <linux/socket.h>
34#include <linux/sockios.h>
35#include <linux/net.h>
36#include <linux/route.h>
37#include <linux/netdevice.h>
38#include <linux/in6.h>
39#include <linux/init.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070040#include <linux/if_arp.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070041#include <linux/proc_fs.h>
42#include <linux/seq_file.h>
Daniel Lezcano5b7c9312008-03-03 23:28:58 -080043#include <linux/nsproxy.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020044#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070045#include <net/snmp.h>
46#include <net/ipv6.h>
47#include <net/ip6_fib.h>
48#include <net/ip6_route.h>
49#include <net/ndisc.h>
50#include <net/addrconf.h>
51#include <net/tcp.h>
52#include <linux/rtnetlink.h>
53#include <net/dst.h>
54#include <net/xfrm.h>
Tom Tucker8d717402006-07-30 20:43:36 -070055#include <net/netevent.h>
Thomas Graf21713eb2006-08-15 00:35:24 -070056#include <net/netlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070057
58#include <asm/uaccess.h>
59
60#ifdef CONFIG_SYSCTL
61#include <linux/sysctl.h>
62#endif
63
64/* Set to 3 to get tracing. */
65#define RT6_DEBUG 2
66
67#if RT6_DEBUG >= 3
68#define RDBG(x) printk x
69#define RT6_TRACE(x...) printk(KERN_DEBUG x)
70#else
71#define RDBG(x)
72#define RT6_TRACE(x...) do { ; } while (0)
73#endif
74
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -080075#define CLONE_OFFLINK_ROUTE 0
Linus Torvalds1da177e2005-04-16 15:20:36 -070076
Linus Torvalds1da177e2005-04-16 15:20:36 -070077static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
78static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
79static struct dst_entry *ip6_negative_advice(struct dst_entry *);
80static void ip6_dst_destroy(struct dst_entry *);
81static void ip6_dst_ifdown(struct dst_entry *,
82 struct net_device *dev, int how);
Daniel Lezcano569d3642008-01-18 03:56:57 -080083static int ip6_dst_gc(struct dst_ops *ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -070084
85static int ip6_pkt_discard(struct sk_buff *skb);
86static int ip6_pkt_discard_out(struct sk_buff *skb);
87static void ip6_link_failure(struct sk_buff *skb);
88static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
89
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -080090#ifdef CONFIG_IPV6_ROUTE_INFO
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -080091static struct rt6_info *rt6_add_route_info(struct net *net,
92 struct in6_addr *prefix, int prefixlen,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -080093 struct in6_addr *gwaddr, int ifindex,
94 unsigned pref);
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -080095static struct rt6_info *rt6_get_route_info(struct net *net,
96 struct in6_addr *prefix, int prefixlen,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -080097 struct in6_addr *gwaddr, int ifindex);
98#endif
99
Linus Torvalds1da177e2005-04-16 15:20:36 -0700100static struct dst_ops ip6_dst_ops = {
101 .family = AF_INET6,
102 .protocol = __constant_htons(ETH_P_IPV6),
103 .gc = ip6_dst_gc,
104 .gc_thresh = 1024,
105 .check = ip6_dst_check,
106 .destroy = ip6_dst_destroy,
107 .ifdown = ip6_dst_ifdown,
108 .negative_advice = ip6_negative_advice,
109 .link_failure = ip6_link_failure,
110 .update_pmtu = ip6_rt_update_pmtu,
Herbert Xu862b82c2007-11-13 21:43:11 -0800111 .local_out = ip6_local_out,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700112 .entry_size = sizeof(struct rt6_info),
Eric Dumazete2422972008-01-30 20:07:45 -0800113 .entries = ATOMIC_INIT(0),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700114};
115
David S. Miller14e50e52007-05-24 18:17:54 -0700116static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
117{
118}
119
120static struct dst_ops ip6_dst_blackhole_ops = {
121 .family = AF_INET6,
122 .protocol = __constant_htons(ETH_P_IPV6),
123 .destroy = ip6_dst_destroy,
124 .check = ip6_dst_check,
125 .update_pmtu = ip6_rt_blackhole_update_pmtu,
126 .entry_size = sizeof(struct rt6_info),
Eric Dumazete2422972008-01-30 20:07:45 -0800127 .entries = ATOMIC_INIT(0),
David S. Miller14e50e52007-05-24 18:17:54 -0700128};
129
Linus Torvalds1da177e2005-04-16 15:20:36 -0700130struct rt6_info ip6_null_entry = {
131 .u = {
132 .dst = {
133 .__refcnt = ATOMIC_INIT(1),
134 .__use = 1,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700135 .obsolete = -1,
136 .error = -ENETUNREACH,
137 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
138 .input = ip6_pkt_discard,
139 .output = ip6_pkt_discard_out,
140 .ops = &ip6_dst_ops,
141 .path = (struct dst_entry*)&ip6_null_entry,
142 }
143 },
144 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
145 .rt6i_metric = ~(u32) 0,
146 .rt6i_ref = ATOMIC_INIT(1),
147};
148
Thomas Graf101367c2006-08-04 03:39:02 -0700149#ifdef CONFIG_IPV6_MULTIPLE_TABLES
150
David S. Miller6723ab52006-10-18 21:20:57 -0700151static int ip6_pkt_prohibit(struct sk_buff *skb);
152static int ip6_pkt_prohibit_out(struct sk_buff *skb);
David S. Miller6723ab52006-10-18 21:20:57 -0700153
Thomas Graf101367c2006-08-04 03:39:02 -0700154struct rt6_info ip6_prohibit_entry = {
155 .u = {
156 .dst = {
157 .__refcnt = ATOMIC_INIT(1),
158 .__use = 1,
Thomas Graf101367c2006-08-04 03:39:02 -0700159 .obsolete = -1,
160 .error = -EACCES,
161 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
Thomas Graf9ce8ade2006-10-18 20:46:54 -0700162 .input = ip6_pkt_prohibit,
163 .output = ip6_pkt_prohibit_out,
Thomas Graf101367c2006-08-04 03:39:02 -0700164 .ops = &ip6_dst_ops,
165 .path = (struct dst_entry*)&ip6_prohibit_entry,
166 }
167 },
168 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
169 .rt6i_metric = ~(u32) 0,
170 .rt6i_ref = ATOMIC_INIT(1),
171};
172
173struct rt6_info ip6_blk_hole_entry = {
174 .u = {
175 .dst = {
176 .__refcnt = ATOMIC_INIT(1),
177 .__use = 1,
Thomas Graf101367c2006-08-04 03:39:02 -0700178 .obsolete = -1,
179 .error = -EINVAL,
180 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
Herbert Xu352e5122007-11-13 21:34:06 -0800181 .input = dst_discard,
182 .output = dst_discard,
Thomas Graf101367c2006-08-04 03:39:02 -0700183 .ops = &ip6_dst_ops,
184 .path = (struct dst_entry*)&ip6_blk_hole_entry,
185 }
186 },
187 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
188 .rt6i_metric = ~(u32) 0,
189 .rt6i_ref = ATOMIC_INIT(1),
190};
191
192#endif
193
Linus Torvalds1da177e2005-04-16 15:20:36 -0700194/* allocate dst with ip6_dst_ops */
195static __inline__ struct rt6_info *ip6_dst_alloc(void)
196{
197 return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
198}
199
200static void ip6_dst_destroy(struct dst_entry *dst)
201{
202 struct rt6_info *rt = (struct rt6_info *)dst;
203 struct inet6_dev *idev = rt->rt6i_idev;
204
205 if (idev != NULL) {
206 rt->rt6i_idev = NULL;
207 in6_dev_put(idev);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900208 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700209}
210
211static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
212 int how)
213{
214 struct rt6_info *rt = (struct rt6_info *)dst;
215 struct inet6_dev *idev = rt->rt6i_idev;
Denis V. Lunev5a3e55d2007-12-07 00:38:10 -0800216 struct net_device *loopback_dev =
217 dev->nd_net->loopback_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700218
Denis V. Lunev5a3e55d2007-12-07 00:38:10 -0800219 if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
220 struct inet6_dev *loopback_idev =
221 in6_dev_get(loopback_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700222 if (loopback_idev != NULL) {
223 rt->rt6i_idev = loopback_idev;
224 in6_dev_put(idev);
225 }
226 }
227}
228
229static __inline__ int rt6_check_expired(const struct rt6_info *rt)
230{
231 return (rt->rt6i_flags & RTF_EXPIRES &&
232 time_after(jiffies, rt->rt6i_expires));
233}
234
Thomas Grafc71099a2006-08-04 23:20:06 -0700235static inline int rt6_need_strict(struct in6_addr *daddr)
236{
237 return (ipv6_addr_type(daddr) &
238 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
239}
240
Linus Torvalds1da177e2005-04-16 15:20:36 -0700241/*
Thomas Grafc71099a2006-08-04 23:20:06 -0700242 * Route lookup. Any table->tb6_lock is implied.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700243 */
244
245static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
246 int oif,
247 int strict)
248{
249 struct rt6_info *local = NULL;
250 struct rt6_info *sprt;
251
252 if (oif) {
Eric Dumazet7cc48262007-02-09 16:22:57 -0800253 for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700254 struct net_device *dev = sprt->rt6i_dev;
255 if (dev->ifindex == oif)
256 return sprt;
257 if (dev->flags & IFF_LOOPBACK) {
258 if (sprt->rt6i_idev == NULL ||
259 sprt->rt6i_idev->dev->ifindex != oif) {
260 if (strict && oif)
261 continue;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900262 if (local && (!oif ||
Linus Torvalds1da177e2005-04-16 15:20:36 -0700263 local->rt6i_idev->dev->ifindex == oif))
264 continue;
265 }
266 local = sprt;
267 }
268 }
269
270 if (local)
271 return local;
272
273 if (strict)
274 return &ip6_null_entry;
275 }
276 return rt;
277}
278
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800279#ifdef CONFIG_IPV6_ROUTER_PREF
280static void rt6_probe(struct rt6_info *rt)
281{
282 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
283 /*
284 * Okay, this does not seem to be appropriate
285 * for now, however, we need to check if it
286 * is really so; aka Router Reachability Probing.
287 *
288 * Router Reachability Probe MUST be rate-limited
289 * to no more than one per minute.
290 */
291 if (!neigh || (neigh->nud_state & NUD_VALID))
292 return;
293 read_lock_bh(&neigh->lock);
294 if (!(neigh->nud_state & NUD_VALID) &&
YOSHIFUJI Hideaki52e16352006-03-20 17:05:47 -0800295 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800296 struct in6_addr mcaddr;
297 struct in6_addr *target;
298
299 neigh->updated = jiffies;
300 read_unlock_bh(&neigh->lock);
301
302 target = (struct in6_addr *)&neigh->primary_key;
303 addrconf_addr_solict_mult(target, &mcaddr);
304 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
305 } else
306 read_unlock_bh(&neigh->lock);
307}
308#else
309static inline void rt6_probe(struct rt6_info *rt)
310{
311 return;
312}
313#endif
314
Linus Torvalds1da177e2005-04-16 15:20:36 -0700315/*
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800316 * Default Router Selection (RFC 2461 6.3.6)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700317 */
Dave Jonesb6f99a22007-03-22 12:27:49 -0700318static inline int rt6_check_dev(struct rt6_info *rt, int oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700319{
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800320 struct net_device *dev = rt->rt6i_dev;
David S. Miller161980f2007-04-06 11:42:27 -0700321 if (!oif || dev->ifindex == oif)
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800322 return 2;
David S. Miller161980f2007-04-06 11:42:27 -0700323 if ((dev->flags & IFF_LOOPBACK) &&
324 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
325 return 1;
326 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700327}
328
Dave Jonesb6f99a22007-03-22 12:27:49 -0700329static inline int rt6_check_neigh(struct rt6_info *rt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700330{
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800331 struct neighbour *neigh = rt->rt6i_nexthop;
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800332 int m;
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700333 if (rt->rt6i_flags & RTF_NONEXTHOP ||
334 !(rt->rt6i_flags & RTF_GATEWAY))
335 m = 1;
336 else if (neigh) {
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800337 read_lock_bh(&neigh->lock);
338 if (neigh->nud_state & NUD_VALID)
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700339 m = 2;
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800340#ifdef CONFIG_IPV6_ROUTER_PREF
341 else if (neigh->nud_state & NUD_FAILED)
342 m = 0;
343#endif
344 else
YOSHIFUJI Hideakiea73ee22006-11-06 09:45:44 -0800345 m = 1;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800346 read_unlock_bh(&neigh->lock);
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800347 } else
348 m = 0;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800349 return m;
350}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700351
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800352static int rt6_score_route(struct rt6_info *rt, int oif,
353 int strict)
354{
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700355 int m, n;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900356
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700357 m = rt6_check_dev(rt, oif);
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700358 if (!m && (strict & RT6_LOOKUP_F_IFACE))
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800359 return -1;
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -0800360#ifdef CONFIG_IPV6_ROUTER_PREF
361 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
362#endif
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700363 n = rt6_check_neigh(rt);
YOSHIFUJI Hideaki557e92e2006-11-06 09:45:45 -0800364 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800365 return -1;
366 return m;
367}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700368
David S. Millerf11e6652007-03-24 20:36:25 -0700369static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
370 int *mpri, struct rt6_info *match)
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800371{
David S. Millerf11e6652007-03-24 20:36:25 -0700372 int m;
373
374 if (rt6_check_expired(rt))
375 goto out;
376
377 m = rt6_score_route(rt, oif, strict);
378 if (m < 0)
379 goto out;
380
381 if (m > *mpri) {
382 if (strict & RT6_LOOKUP_F_REACHABLE)
383 rt6_probe(match);
384 *mpri = m;
385 match = rt;
386 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
387 rt6_probe(rt);
388 }
389
390out:
391 return match;
392}
393
394static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
395 struct rt6_info *rr_head,
396 u32 metric, int oif, int strict)
397{
398 struct rt6_info *rt, *match;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800399 int mpri = -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700400
David S. Millerf11e6652007-03-24 20:36:25 -0700401 match = NULL;
402 for (rt = rr_head; rt && rt->rt6i_metric == metric;
403 rt = rt->u.dst.rt6_next)
404 match = find_match(rt, oif, strict, &mpri, match);
405 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
406 rt = rt->u.dst.rt6_next)
407 match = find_match(rt, oif, strict, &mpri, match);
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800408
David S. Millerf11e6652007-03-24 20:36:25 -0700409 return match;
410}
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800411
David S. Millerf11e6652007-03-24 20:36:25 -0700412static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
413{
414 struct rt6_info *match, *rt0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700415
David S. Millerf11e6652007-03-24 20:36:25 -0700416 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
417 __FUNCTION__, fn->leaf, oif);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700418
David S. Millerf11e6652007-03-24 20:36:25 -0700419 rt0 = fn->rr_ptr;
420 if (!rt0)
421 fn->rr_ptr = rt0 = fn->leaf;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700422
David S. Millerf11e6652007-03-24 20:36:25 -0700423 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700424
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800425 if (!match &&
David S. Millerf11e6652007-03-24 20:36:25 -0700426 (strict & RT6_LOOKUP_F_REACHABLE)) {
427 struct rt6_info *next = rt0->u.dst.rt6_next;
428
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800429 /* no entries matched; do round-robin */
David S. Millerf11e6652007-03-24 20:36:25 -0700430 if (!next || next->rt6i_metric != rt0->rt6i_metric)
431 next = fn->leaf;
432
433 if (next != rt0)
434 fn->rr_ptr = next;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700435 }
436
David S. Millerf11e6652007-03-24 20:36:25 -0700437 RT6_TRACE("%s() => %p\n",
438 __FUNCTION__, match);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700439
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800440 return (match ? match : &ip6_null_entry);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700441}
442
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800443#ifdef CONFIG_IPV6_ROUTE_INFO
444int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
445 struct in6_addr *gwaddr)
446{
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -0800447 struct net *net = dev->nd_net;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800448 struct route_info *rinfo = (struct route_info *) opt;
449 struct in6_addr prefix_buf, *prefix;
450 unsigned int pref;
451 u32 lifetime;
452 struct rt6_info *rt;
453
454 if (len < sizeof(struct route_info)) {
455 return -EINVAL;
456 }
457
458 /* Sanity check for prefix_len and length */
459 if (rinfo->length > 3) {
460 return -EINVAL;
461 } else if (rinfo->prefix_len > 128) {
462 return -EINVAL;
463 } else if (rinfo->prefix_len > 64) {
464 if (rinfo->length < 2) {
465 return -EINVAL;
466 }
467 } else if (rinfo->prefix_len > 0) {
468 if (rinfo->length < 1) {
469 return -EINVAL;
470 }
471 }
472
473 pref = rinfo->route_pref;
474 if (pref == ICMPV6_ROUTER_PREF_INVALID)
475 pref = ICMPV6_ROUTER_PREF_MEDIUM;
476
Al Viroe69a4adc2006-11-14 20:56:00 -0800477 lifetime = ntohl(rinfo->lifetime);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800478 if (lifetime == 0xffffffff) {
479 /* infinity */
480 } else if (lifetime > 0x7fffffff/HZ) {
481 /* Avoid arithmetic overflow */
482 lifetime = 0x7fffffff/HZ - 1;
483 }
484
485 if (rinfo->length == 3)
486 prefix = (struct in6_addr *)rinfo->prefix;
487 else {
488 /* this function is safe */
489 ipv6_addr_prefix(&prefix_buf,
490 (struct in6_addr *)rinfo->prefix,
491 rinfo->prefix_len);
492 prefix = &prefix_buf;
493 }
494
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -0800495 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
496 dev->ifindex);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800497
498 if (rt && !lifetime) {
Thomas Grafe0a1ad732006-08-22 00:00:21 -0700499 ip6_del_rt(rt);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800500 rt = NULL;
501 }
502
503 if (!rt && lifetime)
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -0800504 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800505 pref);
506 else if (rt)
507 rt->rt6i_flags = RTF_ROUTEINFO |
508 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
509
510 if (rt) {
511 if (lifetime == 0xffffffff) {
512 rt->rt6i_flags &= ~RTF_EXPIRES;
513 } else {
514 rt->rt6i_expires = jiffies + HZ * lifetime;
515 rt->rt6i_flags |= RTF_EXPIRES;
516 }
517 dst_release(&rt->u.dst);
518 }
519 return 0;
520}
521#endif
522
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700523#define BACKTRACK(saddr) \
524do { \
525 if (rt == &ip6_null_entry) { \
526 struct fib6_node *pn; \
Ville Nuorvalae0eda7b2006-10-16 22:11:11 -0700527 while (1) { \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700528 if (fn->fn_flags & RTN_TL_ROOT) \
529 goto out; \
530 pn = fn->parent; \
531 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
Kim Nordlund8bce65b2006-12-13 16:38:29 -0800532 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700533 else \
534 fn = pn; \
535 if (fn->fn_flags & RTN_RTINFO) \
536 goto restart; \
Thomas Grafc71099a2006-08-04 23:20:06 -0700537 } \
Thomas Grafc71099a2006-08-04 23:20:06 -0700538 } \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700539} while(0)
Thomas Grafc71099a2006-08-04 23:20:06 -0700540
541static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table,
542 struct flowi *fl, int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700543{
544 struct fib6_node *fn;
545 struct rt6_info *rt;
546
Thomas Grafc71099a2006-08-04 23:20:06 -0700547 read_lock_bh(&table->tb6_lock);
548 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
549restart:
550 rt = fn->leaf;
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700551 rt = rt6_device_match(rt, fl->oif, flags);
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700552 BACKTRACK(&fl->fl6_src);
Thomas Grafc71099a2006-08-04 23:20:06 -0700553out:
Pavel Emelyanov03f49f32007-11-10 21:28:34 -0800554 dst_use(&rt->u.dst, jiffies);
Thomas Grafc71099a2006-08-04 23:20:06 -0700555 read_unlock_bh(&table->tb6_lock);
Thomas Grafc71099a2006-08-04 23:20:06 -0700556 return rt;
557
558}
559
Daniel Lezcano606a2b42008-03-04 13:45:59 -0800560struct rt6_info *rt6_lookup(struct net *net, struct in6_addr *daddr,
561 struct in6_addr *saddr, int oif, int strict)
Thomas Grafc71099a2006-08-04 23:20:06 -0700562{
563 struct flowi fl = {
564 .oif = oif,
565 .nl_u = {
566 .ip6_u = {
567 .daddr = *daddr,
Thomas Grafc71099a2006-08-04 23:20:06 -0700568 },
569 },
570 };
571 struct dst_entry *dst;
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700572 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
Thomas Grafc71099a2006-08-04 23:20:06 -0700573
Thomas Grafadaa70b2006-10-13 15:01:03 -0700574 if (saddr) {
575 memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
576 flags |= RT6_LOOKUP_F_HAS_SADDR;
577 }
578
Daniel Lezcano606a2b42008-03-04 13:45:59 -0800579 dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_lookup);
Thomas Grafc71099a2006-08-04 23:20:06 -0700580 if (dst->error == 0)
581 return (struct rt6_info *) dst;
582
583 dst_release(dst);
584
Linus Torvalds1da177e2005-04-16 15:20:36 -0700585 return NULL;
586}
587
YOSHIFUJI Hideaki71590392007-02-22 22:05:40 +0900588EXPORT_SYMBOL(rt6_lookup);
589
Thomas Grafc71099a2006-08-04 23:20:06 -0700590/* ip6_ins_rt is called with FREE table->tb6_lock.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700591 It takes new route entry, the addition fails by any reason the
592 route is freed. In any case, if caller does not hold it, it may
593 be destroyed.
594 */
595
Thomas Graf86872cb2006-08-22 00:01:08 -0700596static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700597{
598 int err;
Thomas Grafc71099a2006-08-04 23:20:06 -0700599 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700600
Thomas Grafc71099a2006-08-04 23:20:06 -0700601 table = rt->rt6i_table;
602 write_lock_bh(&table->tb6_lock);
Thomas Graf86872cb2006-08-22 00:01:08 -0700603 err = fib6_add(&table->tb6_root, rt, info);
Thomas Grafc71099a2006-08-04 23:20:06 -0700604 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700605
606 return err;
607}
608
Thomas Graf40e22e82006-08-22 00:00:45 -0700609int ip6_ins_rt(struct rt6_info *rt)
610{
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800611 struct nl_info info = {
612 .nl_net = &init_net,
613 };
Denis V. Lunev528c4ce2007-12-13 09:45:12 -0800614 return __ip6_ins_rt(rt, &info);
Thomas Graf40e22e82006-08-22 00:00:45 -0700615}
616
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800617static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
618 struct in6_addr *saddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700619{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700620 struct rt6_info *rt;
621
622 /*
623 * Clone the route.
624 */
625
626 rt = ip6_rt_copy(ort);
627
628 if (rt) {
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900629 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
630 if (rt->rt6i_dst.plen != 128 &&
631 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
632 rt->rt6i_flags |= RTF_ANYCAST;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700633 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900634 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700635
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900636 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700637 rt->rt6i_dst.plen = 128;
638 rt->rt6i_flags |= RTF_CACHE;
639 rt->u.dst.flags |= DST_HOST;
640
641#ifdef CONFIG_IPV6_SUBTREES
642 if (rt->rt6i_src.plen && saddr) {
643 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
644 rt->rt6i_src.plen = 128;
645 }
646#endif
647
648 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
649
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800650 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700651
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800652 return rt;
653}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700654
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800655static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
656{
657 struct rt6_info *rt = ip6_rt_copy(ort);
658 if (rt) {
659 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
660 rt->rt6i_dst.plen = 128;
661 rt->rt6i_flags |= RTF_CACHE;
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800662 rt->u.dst.flags |= DST_HOST;
663 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
664 }
665 return rt;
666}
667
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700668static struct rt6_info *ip6_pol_route(struct fib6_table *table, int oif,
Adrian Bunk8ce11e62006-08-07 21:50:48 -0700669 struct flowi *fl, int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700670{
671 struct fib6_node *fn;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800672 struct rt6_info *rt, *nrt;
Thomas Grafc71099a2006-08-04 23:20:06 -0700673 int strict = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700674 int attempts = 3;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800675 int err;
YOSHIFUJI Hideakiea659e02006-11-06 09:45:45 -0800676 int reachable = ipv6_devconf.forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700677
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700678 strict |= flags & RT6_LOOKUP_F_IFACE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700679
680relookup:
Thomas Grafc71099a2006-08-04 23:20:06 -0700681 read_lock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700682
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800683restart_2:
Thomas Grafc71099a2006-08-04 23:20:06 -0700684 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700685
686restart:
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700687 rt = rt6_select(fn, oif, strict | reachable);
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700688 BACKTRACK(&fl->fl6_src);
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800689 if (rt == &ip6_null_entry ||
690 rt->rt6i_flags & RTF_CACHE)
YOSHIFUJI Hideaki1ddef0442006-03-20 17:01:24 -0800691 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700692
YOSHIFUJI Hideakifb9de912006-03-20 16:59:08 -0800693 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700694 read_unlock_bh(&table->tb6_lock);
YOSHIFUJI Hideakifb9de912006-03-20 16:59:08 -0800695
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800696 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
YOSHIFUJI Hideakie40cf352006-03-20 16:59:27 -0800697 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800698 else {
699#if CLONE_OFFLINK_ROUTE
700 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
701#else
702 goto out2;
703#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700704 }
YOSHIFUJI Hideakie40cf352006-03-20 16:59:27 -0800705
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800706 dst_release(&rt->u.dst);
707 rt = nrt ? : &ip6_null_entry;
708
709 dst_hold(&rt->u.dst);
710 if (nrt) {
Thomas Graf40e22e82006-08-22 00:00:45 -0700711 err = ip6_ins_rt(nrt);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800712 if (!err)
713 goto out2;
714 }
715
716 if (--attempts <= 0)
717 goto out2;
718
719 /*
Thomas Grafc71099a2006-08-04 23:20:06 -0700720 * Race condition! In the gap, when table->tb6_lock was
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800721 * released someone could insert this route. Relookup.
722 */
723 dst_release(&rt->u.dst);
724 goto relookup;
725
726out:
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800727 if (reachable) {
728 reachable = 0;
729 goto restart_2;
730 }
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800731 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700732 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700733out2:
734 rt->u.dst.lastuse = jiffies;
735 rt->u.dst.__use++;
Thomas Grafc71099a2006-08-04 23:20:06 -0700736
737 return rt;
738}
739
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700740static struct rt6_info *ip6_pol_route_input(struct fib6_table *table,
741 struct flowi *fl, int flags)
742{
743 return ip6_pol_route(table, fl->iif, fl, flags);
744}
745
Thomas Grafc71099a2006-08-04 23:20:06 -0700746void ip6_route_input(struct sk_buff *skb)
747{
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -0700748 struct ipv6hdr *iph = ipv6_hdr(skb);
Thomas Grafadaa70b2006-10-13 15:01:03 -0700749 int flags = RT6_LOOKUP_F_HAS_SADDR;
Thomas Grafc71099a2006-08-04 23:20:06 -0700750 struct flowi fl = {
751 .iif = skb->dev->ifindex,
752 .nl_u = {
753 .ip6_u = {
754 .daddr = iph->daddr,
755 .saddr = iph->saddr,
Al Viro90bcaf72006-11-08 00:25:17 -0800756 .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
Thomas Grafc71099a2006-08-04 23:20:06 -0700757 },
758 },
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900759 .mark = skb->mark,
Thomas Grafc71099a2006-08-04 23:20:06 -0700760 .proto = iph->nexthdr,
761 };
Thomas Grafadaa70b2006-10-13 15:01:03 -0700762
763 if (rt6_need_strict(&iph->daddr))
764 flags |= RT6_LOOKUP_F_IFACE;
Thomas Grafc71099a2006-08-04 23:20:06 -0700765
Daniel Lezcano58f09b72008-03-03 23:25:27 -0800766 skb->dst = fib6_rule_lookup(&init_net, &fl, flags, ip6_pol_route_input);
Thomas Grafc71099a2006-08-04 23:20:06 -0700767}
768
769static struct rt6_info *ip6_pol_route_output(struct fib6_table *table,
770 struct flowi *fl, int flags)
771{
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700772 return ip6_pol_route(table, fl->oif, fl, flags);
Thomas Grafc71099a2006-08-04 23:20:06 -0700773}
774
775struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
776{
777 int flags = 0;
778
779 if (rt6_need_strict(&fl->fl6_dst))
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700780 flags |= RT6_LOOKUP_F_IFACE;
Thomas Grafc71099a2006-08-04 23:20:06 -0700781
Thomas Grafadaa70b2006-10-13 15:01:03 -0700782 if (!ipv6_addr_any(&fl->fl6_src))
783 flags |= RT6_LOOKUP_F_HAS_SADDR;
784
Daniel Lezcano58f09b72008-03-03 23:25:27 -0800785 return fib6_rule_lookup(&init_net, fl, flags, ip6_pol_route_output);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700786}
787
YOSHIFUJI Hideaki71590392007-02-22 22:05:40 +0900788EXPORT_SYMBOL(ip6_route_output);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700789
David S. Miller14e50e52007-05-24 18:17:54 -0700790int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
791{
792 struct rt6_info *ort = (struct rt6_info *) *dstp;
793 struct rt6_info *rt = (struct rt6_info *)
794 dst_alloc(&ip6_dst_blackhole_ops);
795 struct dst_entry *new = NULL;
796
797 if (rt) {
798 new = &rt->u.dst;
799
800 atomic_set(&new->__refcnt, 1);
801 new->__use = 1;
Herbert Xu352e5122007-11-13 21:34:06 -0800802 new->input = dst_discard;
803 new->output = dst_discard;
David S. Miller14e50e52007-05-24 18:17:54 -0700804
805 memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
806 new->dev = ort->u.dst.dev;
807 if (new->dev)
808 dev_hold(new->dev);
809 rt->rt6i_idev = ort->rt6i_idev;
810 if (rt->rt6i_idev)
811 in6_dev_hold(rt->rt6i_idev);
812 rt->rt6i_expires = 0;
813
814 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
815 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
816 rt->rt6i_metric = 0;
817
818 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
819#ifdef CONFIG_IPV6_SUBTREES
820 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
821#endif
822
823 dst_free(new);
824 }
825
826 dst_release(*dstp);
827 *dstp = new;
828 return (new ? 0 : -ENOMEM);
829}
830EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
831
Linus Torvalds1da177e2005-04-16 15:20:36 -0700832/*
833 * Destination cache support functions
834 */
835
836static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
837{
838 struct rt6_info *rt;
839
840 rt = (struct rt6_info *) dst;
841
842 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
843 return dst;
844
845 return NULL;
846}
847
848static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
849{
850 struct rt6_info *rt = (struct rt6_info *) dst;
851
852 if (rt) {
853 if (rt->rt6i_flags & RTF_CACHE)
Thomas Grafe0a1ad732006-08-22 00:00:21 -0700854 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700855 else
856 dst_release(dst);
857 }
858 return NULL;
859}
860
861static void ip6_link_failure(struct sk_buff *skb)
862{
863 struct rt6_info *rt;
864
865 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
866
867 rt = (struct rt6_info *) skb->dst;
868 if (rt) {
869 if (rt->rt6i_flags&RTF_CACHE) {
870 dst_set_expires(&rt->u.dst, 0);
871 rt->rt6i_flags |= RTF_EXPIRES;
872 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
873 rt->rt6i_node->fn_sernum = -1;
874 }
875}
876
877static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
878{
879 struct rt6_info *rt6 = (struct rt6_info*)dst;
880
881 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
882 rt6->rt6i_flags |= RTF_MODIFIED;
883 if (mtu < IPV6_MIN_MTU) {
884 mtu = IPV6_MIN_MTU;
885 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
886 }
887 dst->metrics[RTAX_MTU-1] = mtu;
Tom Tucker8d717402006-07-30 20:43:36 -0700888 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700889 }
890}
891
Linus Torvalds1da177e2005-04-16 15:20:36 -0700892static int ipv6_get_mtu(struct net_device *dev);
893
894static inline unsigned int ipv6_advmss(unsigned int mtu)
895{
896 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
897
Daniel Lezcano49905092008-01-10 03:01:01 -0800898 if (mtu < init_net.ipv6.sysctl.ip6_rt_min_advmss)
899 mtu = init_net.ipv6.sysctl.ip6_rt_min_advmss;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700900
901 /*
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900902 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
903 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
904 * IPV6_MAXPLEN is also valid and means: "any MSS,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700905 * rely only on pmtu discovery"
906 */
907 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
908 mtu = IPV6_MAXPLEN;
909 return mtu;
910}
911
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -0800912static struct dst_entry *icmp6_dst_gc_list;
913static DEFINE_SPINLOCK(icmp6_dst_lock);
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700914
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -0800915struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700916 struct neighbour *neigh,
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -0800917 struct in6_addr *addr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700918{
919 struct rt6_info *rt;
920 struct inet6_dev *idev = in6_dev_get(dev);
921
922 if (unlikely(idev == NULL))
923 return NULL;
924
925 rt = ip6_dst_alloc();
926 if (unlikely(rt == NULL)) {
927 in6_dev_put(idev);
928 goto out;
929 }
930
931 dev_hold(dev);
932 if (neigh)
933 neigh_hold(neigh);
934 else
935 neigh = ndisc_get_neigh(dev, addr);
936
937 rt->rt6i_dev = dev;
938 rt->rt6i_idev = idev;
939 rt->rt6i_nexthop = neigh;
940 atomic_set(&rt->u.dst.__refcnt, 1);
941 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
942 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
943 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -0800944 rt->u.dst.output = ip6_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700945
946#if 0 /* there's no chance to use these for ndisc */
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900947 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
948 ? DST_HOST
Linus Torvalds1da177e2005-04-16 15:20:36 -0700949 : 0;
950 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
951 rt->rt6i_dst.plen = 128;
952#endif
953
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -0800954 spin_lock_bh(&icmp6_dst_lock);
955 rt->u.dst.next = icmp6_dst_gc_list;
956 icmp6_dst_gc_list = &rt->u.dst;
957 spin_unlock_bh(&icmp6_dst_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700958
Daniel Lezcano63152fc2008-03-03 23:31:11 -0800959 fib6_force_start_gc(dev->nd_net);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700960
961out:
YOSHIFUJI Hideaki40aa7b92006-10-19 13:50:09 +0900962 return &rt->u.dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700963}
964
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -0800965int icmp6_dst_gc(int *more)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700966{
967 struct dst_entry *dst, *next, **pprev;
968 int freed;
969
970 next = NULL;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900971 freed = 0;
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700972
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -0800973 spin_lock_bh(&icmp6_dst_lock);
974 pprev = &icmp6_dst_gc_list;
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700975
Linus Torvalds1da177e2005-04-16 15:20:36 -0700976 while ((dst = *pprev) != NULL) {
977 if (!atomic_read(&dst->__refcnt)) {
978 *pprev = dst->next;
979 dst_free(dst);
980 freed++;
981 } else {
982 pprev = &dst->next;
983 (*more)++;
984 }
985 }
986
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -0800987 spin_unlock_bh(&icmp6_dst_lock);
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700988
Linus Torvalds1da177e2005-04-16 15:20:36 -0700989 return freed;
990}
991
Daniel Lezcano569d3642008-01-18 03:56:57 -0800992static int ip6_dst_gc(struct dst_ops *ops)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700993{
994 static unsigned expire = 30*HZ;
995 static unsigned long last_gc;
996 unsigned long now = jiffies;
997
Daniel Lezcano49905092008-01-10 03:01:01 -0800998 if (time_after(last_gc + init_net.ipv6.sysctl.ip6_rt_gc_min_interval, now) &&
999 atomic_read(&ip6_dst_ops.entries) <= init_net.ipv6.sysctl.ip6_rt_max_size)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001000 goto out;
1001
1002 expire++;
Daniel Lezcano5b7c9312008-03-03 23:28:58 -08001003 fib6_run_gc(expire, &init_net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001004 last_gc = now;
1005 if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
Daniel Lezcano49905092008-01-10 03:01:01 -08001006 expire = init_net.ipv6.sysctl.ip6_rt_gc_timeout>>1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001007
1008out:
Daniel Lezcano49905092008-01-10 03:01:01 -08001009 expire -= expire>>init_net.ipv6.sysctl.ip6_rt_gc_elasticity;
1010 return (atomic_read(&ip6_dst_ops.entries) > init_net.ipv6.sysctl.ip6_rt_max_size);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001011}
1012
1013/* Clean host part of a prefix. Not necessary in radix tree,
1014 but results in cleaner routing tables.
1015
1016 Remove it only when all the things will work!
1017 */
1018
1019static int ipv6_get_mtu(struct net_device *dev)
1020{
1021 int mtu = IPV6_MIN_MTU;
1022 struct inet6_dev *idev;
1023
1024 idev = in6_dev_get(dev);
1025 if (idev) {
1026 mtu = idev->cnf.mtu6;
1027 in6_dev_put(idev);
1028 }
1029 return mtu;
1030}
1031
1032int ipv6_get_hoplimit(struct net_device *dev)
1033{
1034 int hoplimit = ipv6_devconf.hop_limit;
1035 struct inet6_dev *idev;
1036
1037 idev = in6_dev_get(dev);
1038 if (idev) {
1039 hoplimit = idev->cnf.hop_limit;
1040 in6_dev_put(idev);
1041 }
1042 return hoplimit;
1043}
1044
1045/*
1046 *
1047 */
1048
Thomas Graf86872cb2006-08-22 00:01:08 -07001049int ip6_route_add(struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001050{
1051 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001052 struct rt6_info *rt = NULL;
1053 struct net_device *dev = NULL;
1054 struct inet6_dev *idev = NULL;
Thomas Grafc71099a2006-08-04 23:20:06 -07001055 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001056 int addr_type;
1057
Thomas Graf86872cb2006-08-22 00:01:08 -07001058 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001059 return -EINVAL;
1060#ifndef CONFIG_IPV6_SUBTREES
Thomas Graf86872cb2006-08-22 00:01:08 -07001061 if (cfg->fc_src_len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001062 return -EINVAL;
1063#endif
Thomas Graf86872cb2006-08-22 00:01:08 -07001064 if (cfg->fc_ifindex) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001065 err = -ENODEV;
Eric W. Biederman881d9662007-09-17 11:56:21 -07001066 dev = dev_get_by_index(&init_net, cfg->fc_ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001067 if (!dev)
1068 goto out;
1069 idev = in6_dev_get(dev);
1070 if (!idev)
1071 goto out;
1072 }
1073
Thomas Graf86872cb2006-08-22 00:01:08 -07001074 if (cfg->fc_metric == 0)
1075 cfg->fc_metric = IP6_RT_PRIO_USER;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001076
Daniel Lezcano58f09b72008-03-03 23:25:27 -08001077 table = fib6_new_table(&init_net, cfg->fc_table);
Thomas Grafc71099a2006-08-04 23:20:06 -07001078 if (table == NULL) {
1079 err = -ENOBUFS;
1080 goto out;
1081 }
1082
Linus Torvalds1da177e2005-04-16 15:20:36 -07001083 rt = ip6_dst_alloc();
1084
1085 if (rt == NULL) {
1086 err = -ENOMEM;
1087 goto out;
1088 }
1089
1090 rt->u.dst.obsolete = -1;
Thomas Graf86872cb2006-08-22 00:01:08 -07001091 rt->rt6i_expires = jiffies + clock_t_to_jiffies(cfg->fc_expires);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001092
Thomas Graf86872cb2006-08-22 00:01:08 -07001093 if (cfg->fc_protocol == RTPROT_UNSPEC)
1094 cfg->fc_protocol = RTPROT_BOOT;
1095 rt->rt6i_protocol = cfg->fc_protocol;
1096
1097 addr_type = ipv6_addr_type(&cfg->fc_dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001098
1099 if (addr_type & IPV6_ADDR_MULTICAST)
1100 rt->u.dst.input = ip6_mc_input;
1101 else
1102 rt->u.dst.input = ip6_forward;
1103
1104 rt->u.dst.output = ip6_output;
1105
Thomas Graf86872cb2006-08-22 00:01:08 -07001106 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1107 rt->rt6i_dst.plen = cfg->fc_dst_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001108 if (rt->rt6i_dst.plen == 128)
1109 rt->u.dst.flags = DST_HOST;
1110
1111#ifdef CONFIG_IPV6_SUBTREES
Thomas Graf86872cb2006-08-22 00:01:08 -07001112 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1113 rt->rt6i_src.plen = cfg->fc_src_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001114#endif
1115
Thomas Graf86872cb2006-08-22 00:01:08 -07001116 rt->rt6i_metric = cfg->fc_metric;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001117
1118 /* We cannot add true routes via loopback here,
1119 they would result in kernel looping; promote them to reject routes
1120 */
Thomas Graf86872cb2006-08-22 00:01:08 -07001121 if ((cfg->fc_flags & RTF_REJECT) ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07001122 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
1123 /* hold loopback dev/idev if we haven't done so. */
Eric W. Biederman2774c7a2007-09-26 22:10:56 -07001124 if (dev != init_net.loopback_dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001125 if (dev) {
1126 dev_put(dev);
1127 in6_dev_put(idev);
1128 }
Eric W. Biederman2774c7a2007-09-26 22:10:56 -07001129 dev = init_net.loopback_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001130 dev_hold(dev);
1131 idev = in6_dev_get(dev);
1132 if (!idev) {
1133 err = -ENODEV;
1134 goto out;
1135 }
1136 }
1137 rt->u.dst.output = ip6_pkt_discard_out;
1138 rt->u.dst.input = ip6_pkt_discard;
1139 rt->u.dst.error = -ENETUNREACH;
1140 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1141 goto install_route;
1142 }
1143
Thomas Graf86872cb2006-08-22 00:01:08 -07001144 if (cfg->fc_flags & RTF_GATEWAY) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001145 struct in6_addr *gw_addr;
1146 int gwa_type;
1147
Thomas Graf86872cb2006-08-22 00:01:08 -07001148 gw_addr = &cfg->fc_gateway;
1149 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001150 gwa_type = ipv6_addr_type(gw_addr);
1151
1152 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1153 struct rt6_info *grt;
1154
1155 /* IPv6 strictly inhibits using not link-local
1156 addresses as nexthop address.
1157 Otherwise, router will not able to send redirects.
1158 It is very good, but in some (rare!) circumstances
1159 (SIT, PtP, NBMA NOARP links) it is handy to allow
1160 some exceptions. --ANK
1161 */
1162 err = -EINVAL;
1163 if (!(gwa_type&IPV6_ADDR_UNICAST))
1164 goto out;
1165
Daniel Lezcano606a2b42008-03-04 13:45:59 -08001166 grt = rt6_lookup(&init_net, gw_addr, NULL, cfg->fc_ifindex, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001167
1168 err = -EHOSTUNREACH;
1169 if (grt == NULL)
1170 goto out;
1171 if (dev) {
1172 if (dev != grt->rt6i_dev) {
1173 dst_release(&grt->u.dst);
1174 goto out;
1175 }
1176 } else {
1177 dev = grt->rt6i_dev;
1178 idev = grt->rt6i_idev;
1179 dev_hold(dev);
1180 in6_dev_hold(grt->rt6i_idev);
1181 }
1182 if (!(grt->rt6i_flags&RTF_GATEWAY))
1183 err = 0;
1184 dst_release(&grt->u.dst);
1185
1186 if (err)
1187 goto out;
1188 }
1189 err = -EINVAL;
1190 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1191 goto out;
1192 }
1193
1194 err = -ENODEV;
1195 if (dev == NULL)
1196 goto out;
1197
Thomas Graf86872cb2006-08-22 00:01:08 -07001198 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001199 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1200 if (IS_ERR(rt->rt6i_nexthop)) {
1201 err = PTR_ERR(rt->rt6i_nexthop);
1202 rt->rt6i_nexthop = NULL;
1203 goto out;
1204 }
1205 }
1206
Thomas Graf86872cb2006-08-22 00:01:08 -07001207 rt->rt6i_flags = cfg->fc_flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001208
1209install_route:
Thomas Graf86872cb2006-08-22 00:01:08 -07001210 if (cfg->fc_mx) {
1211 struct nlattr *nla;
1212 int remaining;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001213
Thomas Graf86872cb2006-08-22 00:01:08 -07001214 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
Thomas Graf8f4c1f92007-09-12 14:44:36 +02001215 int type = nla_type(nla);
Thomas Graf86872cb2006-08-22 00:01:08 -07001216
1217 if (type) {
1218 if (type > RTAX_MAX) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001219 err = -EINVAL;
1220 goto out;
1221 }
Thomas Graf86872cb2006-08-22 00:01:08 -07001222
1223 rt->u.dst.metrics[type - 1] = nla_get_u32(nla);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001224 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001225 }
1226 }
1227
1228 if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
1229 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1230 if (!rt->u.dst.metrics[RTAX_MTU-1])
1231 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1232 if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
1233 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1234 rt->u.dst.dev = dev;
1235 rt->rt6i_idev = idev;
Thomas Grafc71099a2006-08-04 23:20:06 -07001236 rt->rt6i_table = table;
Daniel Lezcano63152fc2008-03-03 23:31:11 -08001237
1238 cfg->fc_nlinfo.nl_net = dev->nd_net;
1239
Thomas Graf86872cb2006-08-22 00:01:08 -07001240 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001241
1242out:
1243 if (dev)
1244 dev_put(dev);
1245 if (idev)
1246 in6_dev_put(idev);
1247 if (rt)
YOSHIFUJI Hideaki40aa7b92006-10-19 13:50:09 +09001248 dst_free(&rt->u.dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001249 return err;
1250}
1251
Thomas Graf86872cb2006-08-22 00:01:08 -07001252static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001253{
1254 int err;
Thomas Grafc71099a2006-08-04 23:20:06 -07001255 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001256
Patrick McHardy6c813a72006-08-06 22:22:47 -07001257 if (rt == &ip6_null_entry)
1258 return -ENOENT;
1259
Thomas Grafc71099a2006-08-04 23:20:06 -07001260 table = rt->rt6i_table;
1261 write_lock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001262
Thomas Graf86872cb2006-08-22 00:01:08 -07001263 err = fib6_del(rt, info);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001264 dst_release(&rt->u.dst);
1265
Thomas Grafc71099a2006-08-04 23:20:06 -07001266 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001267
1268 return err;
1269}
1270
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001271int ip6_del_rt(struct rt6_info *rt)
1272{
Denis V. Lunev4d1169c2008-01-10 03:26:13 -08001273 struct nl_info info = {
1274 .nl_net = &init_net,
1275 };
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08001276 return __ip6_del_rt(rt, &info);
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001277}
1278
Thomas Graf86872cb2006-08-22 00:01:08 -07001279static int ip6_route_del(struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001280{
Thomas Grafc71099a2006-08-04 23:20:06 -07001281 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001282 struct fib6_node *fn;
1283 struct rt6_info *rt;
1284 int err = -ESRCH;
1285
Daniel Lezcano58f09b72008-03-03 23:25:27 -08001286 table = fib6_get_table(&init_net, cfg->fc_table);
Thomas Grafc71099a2006-08-04 23:20:06 -07001287 if (table == NULL)
1288 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001289
Thomas Grafc71099a2006-08-04 23:20:06 -07001290 read_lock_bh(&table->tb6_lock);
1291
1292 fn = fib6_locate(&table->tb6_root,
Thomas Graf86872cb2006-08-22 00:01:08 -07001293 &cfg->fc_dst, cfg->fc_dst_len,
1294 &cfg->fc_src, cfg->fc_src_len);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001295
Linus Torvalds1da177e2005-04-16 15:20:36 -07001296 if (fn) {
Eric Dumazet7cc48262007-02-09 16:22:57 -08001297 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
Thomas Graf86872cb2006-08-22 00:01:08 -07001298 if (cfg->fc_ifindex &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001299 (rt->rt6i_dev == NULL ||
Thomas Graf86872cb2006-08-22 00:01:08 -07001300 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001301 continue;
Thomas Graf86872cb2006-08-22 00:01:08 -07001302 if (cfg->fc_flags & RTF_GATEWAY &&
1303 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001304 continue;
Thomas Graf86872cb2006-08-22 00:01:08 -07001305 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001306 continue;
1307 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001308 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001309
Thomas Graf86872cb2006-08-22 00:01:08 -07001310 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001311 }
1312 }
Thomas Grafc71099a2006-08-04 23:20:06 -07001313 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001314
1315 return err;
1316}
1317
1318/*
1319 * Handle redirects
1320 */
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001321struct ip6rd_flowi {
1322 struct flowi fl;
1323 struct in6_addr gateway;
1324};
Linus Torvalds1da177e2005-04-16 15:20:36 -07001325
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001326static struct rt6_info *__ip6_route_redirect(struct fib6_table *table,
1327 struct flowi *fl,
1328 int flags)
1329{
1330 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1331 struct rt6_info *rt;
1332 struct fib6_node *fn;
Thomas Grafc71099a2006-08-04 23:20:06 -07001333
Linus Torvalds1da177e2005-04-16 15:20:36 -07001334 /*
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001335 * Get the "current" route for this destination and
1336 * check if the redirect has come from approriate router.
1337 *
1338 * RFC 2461 specifies that redirects should only be
1339 * accepted if they come from the nexthop to the target.
1340 * Due to the way the routes are chosen, this notion
1341 * is a bit fuzzy and one might need to check all possible
1342 * routes.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001343 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001344
Thomas Grafc71099a2006-08-04 23:20:06 -07001345 read_lock_bh(&table->tb6_lock);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001346 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001347restart:
Eric Dumazet7cc48262007-02-09 16:22:57 -08001348 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001349 /*
1350 * Current route is on-link; redirect is always invalid.
1351 *
1352 * Seems, previous statement is not true. It could
1353 * be node, which looks for us as on-link (f.e. proxy ndisc)
1354 * But then router serving it might decide, that we should
1355 * know truth 8)8) --ANK (980726).
1356 */
1357 if (rt6_check_expired(rt))
1358 continue;
1359 if (!(rt->rt6i_flags & RTF_GATEWAY))
1360 continue;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001361 if (fl->oif != rt->rt6i_dev->ifindex)
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001362 continue;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001363 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001364 continue;
1365 break;
1366 }
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001367
YOSHIFUJI Hideakicb15d9c2006-08-23 17:23:11 -07001368 if (!rt)
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001369 rt = &ip6_null_entry;
YOSHIFUJI Hideakicb15d9c2006-08-23 17:23:11 -07001370 BACKTRACK(&fl->fl6_src);
1371out:
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001372 dst_hold(&rt->u.dst);
1373
1374 read_unlock_bh(&table->tb6_lock);
1375
1376 return rt;
1377};
1378
1379static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1380 struct in6_addr *src,
1381 struct in6_addr *gateway,
1382 struct net_device *dev)
1383{
Thomas Grafadaa70b2006-10-13 15:01:03 -07001384 int flags = RT6_LOOKUP_F_HAS_SADDR;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001385 struct ip6rd_flowi rdfl = {
1386 .fl = {
1387 .oif = dev->ifindex,
1388 .nl_u = {
1389 .ip6_u = {
1390 .daddr = *dest,
1391 .saddr = *src,
1392 },
1393 },
1394 },
1395 .gateway = *gateway,
1396 };
Thomas Grafadaa70b2006-10-13 15:01:03 -07001397
1398 if (rt6_need_strict(dest))
1399 flags |= RT6_LOOKUP_F_IFACE;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001400
Daniel Lezcano58f09b72008-03-03 23:25:27 -08001401 return (struct rt6_info *)fib6_rule_lookup(&init_net,
1402 (struct flowi *)&rdfl,
1403 flags, __ip6_route_redirect);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001404}
1405
1406void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1407 struct in6_addr *saddr,
1408 struct neighbour *neigh, u8 *lladdr, int on_link)
1409{
1410 struct rt6_info *rt, *nrt = NULL;
1411 struct netevent_redirect netevent;
1412
1413 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1414
1415 if (rt == &ip6_null_entry) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001416 if (net_ratelimit())
1417 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1418 "for redirect target\n");
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001419 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001420 }
1421
Linus Torvalds1da177e2005-04-16 15:20:36 -07001422 /*
1423 * We have finally decided to accept it.
1424 */
1425
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001426 neigh_update(neigh, lladdr, NUD_STALE,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001427 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1428 NEIGH_UPDATE_F_OVERRIDE|
1429 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1430 NEIGH_UPDATE_F_ISROUTER))
1431 );
1432
1433 /*
1434 * Redirect received -> path was valid.
1435 * Look, redirects are sent only in response to data packets,
1436 * so that this nexthop apparently is reachable. --ANK
1437 */
1438 dst_confirm(&rt->u.dst);
1439
1440 /* Duplicate redirect: silently ignore. */
1441 if (neigh == rt->u.dst.neighbour)
1442 goto out;
1443
1444 nrt = ip6_rt_copy(rt);
1445 if (nrt == NULL)
1446 goto out;
1447
1448 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1449 if (on_link)
1450 nrt->rt6i_flags &= ~RTF_GATEWAY;
1451
1452 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1453 nrt->rt6i_dst.plen = 128;
1454 nrt->u.dst.flags |= DST_HOST;
1455
1456 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1457 nrt->rt6i_nexthop = neigh_clone(neigh);
1458 /* Reset pmtu, it may be better */
1459 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1460 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1461
Thomas Graf40e22e82006-08-22 00:00:45 -07001462 if (ip6_ins_rt(nrt))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001463 goto out;
1464
Tom Tucker8d717402006-07-30 20:43:36 -07001465 netevent.old = &rt->u.dst;
1466 netevent.new = &nrt->u.dst;
1467 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1468
Linus Torvalds1da177e2005-04-16 15:20:36 -07001469 if (rt->rt6i_flags&RTF_CACHE) {
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001470 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001471 return;
1472 }
1473
1474out:
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001475 dst_release(&rt->u.dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001476 return;
1477}
1478
1479/*
1480 * Handle ICMP "packet too big" messages
1481 * i.e. Path MTU discovery
1482 */
1483
1484void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1485 struct net_device *dev, u32 pmtu)
1486{
1487 struct rt6_info *rt, *nrt;
1488 int allfrag = 0;
1489
Daniel Lezcano606a2b42008-03-04 13:45:59 -08001490 rt = rt6_lookup(dev->nd_net, daddr, saddr, dev->ifindex, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001491 if (rt == NULL)
1492 return;
1493
1494 if (pmtu >= dst_mtu(&rt->u.dst))
1495 goto out;
1496
1497 if (pmtu < IPV6_MIN_MTU) {
1498 /*
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001499 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
Linus Torvalds1da177e2005-04-16 15:20:36 -07001500 * MTU (1280) and a fragment header should always be included
1501 * after a node receiving Too Big message reporting PMTU is
1502 * less than the IPv6 Minimum Link MTU.
1503 */
1504 pmtu = IPV6_MIN_MTU;
1505 allfrag = 1;
1506 }
1507
1508 /* New mtu received -> path was valid.
1509 They are sent only in response to data packets,
1510 so that this nexthop apparently is reachable. --ANK
1511 */
1512 dst_confirm(&rt->u.dst);
1513
1514 /* Host route. If it is static, it would be better
1515 not to override it, but add new one, so that
1516 when cache entry will expire old pmtu
1517 would return automatically.
1518 */
1519 if (rt->rt6i_flags & RTF_CACHE) {
1520 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1521 if (allfrag)
1522 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
Daniel Lezcano49905092008-01-10 03:01:01 -08001523 dst_set_expires(&rt->u.dst, init_net.ipv6.sysctl.ip6_rt_mtu_expires);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001524 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1525 goto out;
1526 }
1527
1528 /* Network route.
1529 Two cases are possible:
1530 1. It is connected route. Action: COW
1531 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1532 */
YOSHIFUJI Hideakid5315b52006-03-20 16:58:48 -08001533 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001534 nrt = rt6_alloc_cow(rt, daddr, saddr);
YOSHIFUJI Hideakid5315b52006-03-20 16:58:48 -08001535 else
1536 nrt = rt6_alloc_clone(rt, daddr);
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001537
YOSHIFUJI Hideakid5315b52006-03-20 16:58:48 -08001538 if (nrt) {
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001539 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1540 if (allfrag)
1541 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1542
1543 /* According to RFC 1981, detecting PMTU increase shouldn't be
1544 * happened within 5 mins, the recommended timer is 10 mins.
1545 * Here this route expiration time is set to ip6_rt_mtu_expires
1546 * which is 10 mins. After 10 mins the decreased pmtu is expired
1547 * and detecting PMTU increase will be automatically happened.
1548 */
Daniel Lezcano49905092008-01-10 03:01:01 -08001549 dst_set_expires(&nrt->u.dst, init_net.ipv6.sysctl.ip6_rt_mtu_expires);
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001550 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1551
Thomas Graf40e22e82006-08-22 00:00:45 -07001552 ip6_ins_rt(nrt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001553 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001554out:
1555 dst_release(&rt->u.dst);
1556}
1557
1558/*
1559 * Misc support functions
1560 */
1561
1562static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1563{
1564 struct rt6_info *rt = ip6_dst_alloc();
1565
1566 if (rt) {
1567 rt->u.dst.input = ort->u.dst.input;
1568 rt->u.dst.output = ort->u.dst.output;
1569
1570 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
Ville Nuorvala22e1e4d2006-10-16 22:14:26 -07001571 rt->u.dst.error = ort->u.dst.error;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001572 rt->u.dst.dev = ort->u.dst.dev;
1573 if (rt->u.dst.dev)
1574 dev_hold(rt->u.dst.dev);
1575 rt->rt6i_idev = ort->rt6i_idev;
1576 if (rt->rt6i_idev)
1577 in6_dev_hold(rt->rt6i_idev);
1578 rt->u.dst.lastuse = jiffies;
1579 rt->rt6i_expires = 0;
1580
1581 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1582 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1583 rt->rt6i_metric = 0;
1584
1585 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1586#ifdef CONFIG_IPV6_SUBTREES
1587 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1588#endif
Thomas Grafc71099a2006-08-04 23:20:06 -07001589 rt->rt6i_table = ort->rt6i_table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001590 }
1591 return rt;
1592}
1593
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001594#ifdef CONFIG_IPV6_ROUTE_INFO
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001595static struct rt6_info *rt6_get_route_info(struct net *net,
1596 struct in6_addr *prefix, int prefixlen,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001597 struct in6_addr *gwaddr, int ifindex)
1598{
1599 struct fib6_node *fn;
1600 struct rt6_info *rt = NULL;
Thomas Grafc71099a2006-08-04 23:20:06 -07001601 struct fib6_table *table;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001602
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001603 table = fib6_get_table(net, RT6_TABLE_INFO);
Thomas Grafc71099a2006-08-04 23:20:06 -07001604 if (table == NULL)
1605 return NULL;
1606
1607 write_lock_bh(&table->tb6_lock);
1608 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001609 if (!fn)
1610 goto out;
1611
Eric Dumazet7cc48262007-02-09 16:22:57 -08001612 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001613 if (rt->rt6i_dev->ifindex != ifindex)
1614 continue;
1615 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1616 continue;
1617 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1618 continue;
1619 dst_hold(&rt->u.dst);
1620 break;
1621 }
1622out:
Thomas Grafc71099a2006-08-04 23:20:06 -07001623 write_unlock_bh(&table->tb6_lock);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001624 return rt;
1625}
1626
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001627static struct rt6_info *rt6_add_route_info(struct net *net,
1628 struct in6_addr *prefix, int prefixlen,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001629 struct in6_addr *gwaddr, int ifindex,
1630 unsigned pref)
1631{
Thomas Graf86872cb2006-08-22 00:01:08 -07001632 struct fib6_config cfg = {
1633 .fc_table = RT6_TABLE_INFO,
Rami Rosen238fc7e2008-02-09 23:43:11 -08001634 .fc_metric = IP6_RT_PRIO_USER,
Thomas Graf86872cb2006-08-22 00:01:08 -07001635 .fc_ifindex = ifindex,
1636 .fc_dst_len = prefixlen,
1637 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1638 RTF_UP | RTF_PREF(pref),
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001639 .fc_nlinfo.pid = 0,
1640 .fc_nlinfo.nlh = NULL,
1641 .fc_nlinfo.nl_net = net,
Thomas Graf86872cb2006-08-22 00:01:08 -07001642 };
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001643
Thomas Graf86872cb2006-08-22 00:01:08 -07001644 ipv6_addr_copy(&cfg.fc_dst, prefix);
1645 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1646
YOSHIFUJI Hideakie317da92006-03-20 17:06:42 -08001647 /* We should treat it as a default route if prefix length is 0. */
1648 if (!prefixlen)
Thomas Graf86872cb2006-08-22 00:01:08 -07001649 cfg.fc_flags |= RTF_DEFAULT;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001650
Thomas Graf86872cb2006-08-22 00:01:08 -07001651 ip6_route_add(&cfg);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001652
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001653 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001654}
1655#endif
1656
Linus Torvalds1da177e2005-04-16 15:20:36 -07001657struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001658{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001659 struct rt6_info *rt;
Thomas Grafc71099a2006-08-04 23:20:06 -07001660 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001661
Daniel Lezcano58f09b72008-03-03 23:25:27 -08001662 table = fib6_get_table(&init_net, RT6_TABLE_DFLT);
Thomas Grafc71099a2006-08-04 23:20:06 -07001663 if (table == NULL)
1664 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001665
Thomas Grafc71099a2006-08-04 23:20:06 -07001666 write_lock_bh(&table->tb6_lock);
Eric Dumazet7cc48262007-02-09 16:22:57 -08001667 for (rt = table->tb6_root.leaf; rt; rt=rt->u.dst.rt6_next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001668 if (dev == rt->rt6i_dev &&
YOSHIFUJI Hideaki045927f2006-03-20 17:00:48 -08001669 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001670 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1671 break;
1672 }
1673 if (rt)
1674 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001675 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001676 return rt;
1677}
1678
Fred L. Templinc7dc89c2007-11-29 22:11:40 +11001679EXPORT_SYMBOL(rt6_get_dflt_router);
1680
Linus Torvalds1da177e2005-04-16 15:20:36 -07001681struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -08001682 struct net_device *dev,
1683 unsigned int pref)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001684{
Thomas Graf86872cb2006-08-22 00:01:08 -07001685 struct fib6_config cfg = {
1686 .fc_table = RT6_TABLE_DFLT,
Rami Rosen238fc7e2008-02-09 23:43:11 -08001687 .fc_metric = IP6_RT_PRIO_USER,
Thomas Graf86872cb2006-08-22 00:01:08 -07001688 .fc_ifindex = dev->ifindex,
1689 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1690 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1691 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07001692
Thomas Graf86872cb2006-08-22 00:01:08 -07001693 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001694
Thomas Graf86872cb2006-08-22 00:01:08 -07001695 ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001696
Linus Torvalds1da177e2005-04-16 15:20:36 -07001697 return rt6_get_dflt_router(gwaddr, dev);
1698}
1699
1700void rt6_purge_dflt_routers(void)
1701{
1702 struct rt6_info *rt;
Thomas Grafc71099a2006-08-04 23:20:06 -07001703 struct fib6_table *table;
1704
1705 /* NOTE: Keep consistent with rt6_get_dflt_router */
Daniel Lezcano58f09b72008-03-03 23:25:27 -08001706 table = fib6_get_table(&init_net, RT6_TABLE_DFLT);
Thomas Grafc71099a2006-08-04 23:20:06 -07001707 if (table == NULL)
1708 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001709
1710restart:
Thomas Grafc71099a2006-08-04 23:20:06 -07001711 read_lock_bh(&table->tb6_lock);
Eric Dumazet7cc48262007-02-09 16:22:57 -08001712 for (rt = table->tb6_root.leaf; rt; rt = rt->u.dst.rt6_next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001713 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1714 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001715 read_unlock_bh(&table->tb6_lock);
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001716 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001717 goto restart;
1718 }
1719 }
Thomas Grafc71099a2006-08-04 23:20:06 -07001720 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001721}
1722
Thomas Graf86872cb2006-08-22 00:01:08 -07001723static void rtmsg_to_fib6_config(struct in6_rtmsg *rtmsg,
1724 struct fib6_config *cfg)
1725{
1726 memset(cfg, 0, sizeof(*cfg));
1727
1728 cfg->fc_table = RT6_TABLE_MAIN;
1729 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1730 cfg->fc_metric = rtmsg->rtmsg_metric;
1731 cfg->fc_expires = rtmsg->rtmsg_info;
1732 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1733 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1734 cfg->fc_flags = rtmsg->rtmsg_flags;
1735
Benjamin Theryf1243c22008-02-26 18:10:03 -08001736 cfg->fc_nlinfo.nl_net = &init_net;
1737
Thomas Graf86872cb2006-08-22 00:01:08 -07001738 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1739 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1740 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1741}
1742
Linus Torvalds1da177e2005-04-16 15:20:36 -07001743int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1744{
Thomas Graf86872cb2006-08-22 00:01:08 -07001745 struct fib6_config cfg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001746 struct in6_rtmsg rtmsg;
1747 int err;
1748
1749 switch(cmd) {
1750 case SIOCADDRT: /* Add a route */
1751 case SIOCDELRT: /* Delete a route */
1752 if (!capable(CAP_NET_ADMIN))
1753 return -EPERM;
1754 err = copy_from_user(&rtmsg, arg,
1755 sizeof(struct in6_rtmsg));
1756 if (err)
1757 return -EFAULT;
Thomas Graf86872cb2006-08-22 00:01:08 -07001758
1759 rtmsg_to_fib6_config(&rtmsg, &cfg);
1760
Linus Torvalds1da177e2005-04-16 15:20:36 -07001761 rtnl_lock();
1762 switch (cmd) {
1763 case SIOCADDRT:
Thomas Graf86872cb2006-08-22 00:01:08 -07001764 err = ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001765 break;
1766 case SIOCDELRT:
Thomas Graf86872cb2006-08-22 00:01:08 -07001767 err = ip6_route_del(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001768 break;
1769 default:
1770 err = -EINVAL;
1771 }
1772 rtnl_unlock();
1773
1774 return err;
Stephen Hemminger3ff50b72007-04-20 17:09:22 -07001775 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001776
1777 return -EINVAL;
1778}
1779
1780/*
1781 * Drop the packet on the floor
1782 */
1783
Ilpo Järvinen50eb4312008-01-12 03:21:00 -08001784static int ip6_pkt_drop(struct sk_buff *skb, int code, int ipstats_mib_noroutes)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001785{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001786 int type;
1787 switch (ipstats_mib_noroutes) {
1788 case IPSTATS_MIB_INNOROUTES:
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -07001789 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001790 if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED) {
1791 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS);
1792 break;
1793 }
1794 /* FALLTHROUGH */
1795 case IPSTATS_MIB_OUTNOROUTES:
1796 IP6_INC_STATS(ip6_dst_idev(skb->dst), ipstats_mib_noroutes);
1797 break;
1798 }
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001799 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0, skb->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001800 kfree_skb(skb);
1801 return 0;
1802}
1803
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001804static int ip6_pkt_discard(struct sk_buff *skb)
1805{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001806 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001807}
1808
Arnaldo Carvalho de Melo20380732005-08-16 02:18:02 -03001809static int ip6_pkt_discard_out(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001810{
1811 skb->dev = skb->dst->dev;
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001812 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001813}
1814
David S. Miller6723ab52006-10-18 21:20:57 -07001815#ifdef CONFIG_IPV6_MULTIPLE_TABLES
1816
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001817static int ip6_pkt_prohibit(struct sk_buff *skb)
1818{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001819 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001820}
1821
1822static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1823{
1824 skb->dev = skb->dst->dev;
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001825 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001826}
1827
David S. Miller6723ab52006-10-18 21:20:57 -07001828#endif
1829
Linus Torvalds1da177e2005-04-16 15:20:36 -07001830/*
1831 * Allocate a dst for local (unicast / anycast) address.
1832 */
1833
1834struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1835 const struct in6_addr *addr,
1836 int anycast)
1837{
1838 struct rt6_info *rt = ip6_dst_alloc();
1839
1840 if (rt == NULL)
1841 return ERR_PTR(-ENOMEM);
1842
Eric W. Biederman2774c7a2007-09-26 22:10:56 -07001843 dev_hold(init_net.loopback_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001844 in6_dev_hold(idev);
1845
1846 rt->u.dst.flags = DST_HOST;
1847 rt->u.dst.input = ip6_input;
1848 rt->u.dst.output = ip6_output;
Eric W. Biederman2774c7a2007-09-26 22:10:56 -07001849 rt->rt6i_dev = init_net.loopback_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001850 rt->rt6i_idev = idev;
1851 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1852 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1853 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1854 rt->u.dst.obsolete = -1;
1855
1856 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +09001857 if (anycast)
1858 rt->rt6i_flags |= RTF_ANYCAST;
1859 else
Linus Torvalds1da177e2005-04-16 15:20:36 -07001860 rt->rt6i_flags |= RTF_LOCAL;
1861 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1862 if (rt->rt6i_nexthop == NULL) {
YOSHIFUJI Hideaki40aa7b92006-10-19 13:50:09 +09001863 dst_free(&rt->u.dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001864 return ERR_PTR(-ENOMEM);
1865 }
1866
1867 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1868 rt->rt6i_dst.plen = 128;
Daniel Lezcano58f09b72008-03-03 23:25:27 -08001869 rt->rt6i_table = fib6_get_table(&init_net, RT6_TABLE_LOCAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001870
1871 atomic_set(&rt->u.dst.__refcnt, 1);
1872
1873 return rt;
1874}
1875
1876static int fib6_ifdown(struct rt6_info *rt, void *arg)
1877{
1878 if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1879 rt != &ip6_null_entry) {
1880 RT6_TRACE("deleted by ifdown %p\n", rt);
1881 return -1;
1882 }
1883 return 0;
1884}
1885
Daniel Lezcanof3db4852008-03-03 23:27:06 -08001886void rt6_ifdown(struct net *net, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001887{
Daniel Lezcanof3db4852008-03-03 23:27:06 -08001888 fib6_clean_all(net, fib6_ifdown, 0, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001889}
1890
1891struct rt6_mtu_change_arg
1892{
1893 struct net_device *dev;
1894 unsigned mtu;
1895};
1896
1897static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1898{
1899 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1900 struct inet6_dev *idev;
1901
1902 /* In IPv6 pmtu discovery is not optional,
1903 so that RTAX_MTU lock cannot disable it.
1904 We still use this lock to block changes
1905 caused by addrconf/ndisc.
1906 */
1907
1908 idev = __in6_dev_get(arg->dev);
1909 if (idev == NULL)
1910 return 0;
1911
1912 /* For administrative MTU increase, there is no way to discover
1913 IPv6 PMTU increase, so PMTU increase should be updated here.
1914 Since RFC 1981 doesn't include administrative MTU increase
1915 update PMTU increase is a MUST. (i.e. jumbo frame)
1916 */
1917 /*
1918 If new MTU is less than route PMTU, this new MTU will be the
1919 lowest MTU in the path, update the route PMTU to reflect PMTU
1920 decreases; if new MTU is greater than route PMTU, and the
1921 old MTU is the lowest MTU in the path, update the route PMTU
1922 to reflect the increase. In this case if the other nodes' MTU
1923 also have the lowest MTU, TOO BIG MESSAGE will be lead to
1924 PMTU discouvery.
1925 */
1926 if (rt->rt6i_dev == arg->dev &&
1927 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
Jim Paris23717792008-01-31 16:36:25 -08001928 (dst_mtu(&rt->u.dst) >= arg->mtu ||
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001929 (dst_mtu(&rt->u.dst) < arg->mtu &&
Simon Arlott566cfd82007-07-26 00:09:55 -07001930 dst_mtu(&rt->u.dst) == idev->cnf.mtu6))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001931 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
Simon Arlott566cfd82007-07-26 00:09:55 -07001932 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1933 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001934 return 0;
1935}
1936
1937void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1938{
Thomas Grafc71099a2006-08-04 23:20:06 -07001939 struct rt6_mtu_change_arg arg = {
1940 .dev = dev,
1941 .mtu = mtu,
1942 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07001943
Daniel Lezcanof3db4852008-03-03 23:27:06 -08001944 fib6_clean_all(dev->nd_net, rt6_mtu_change_route, 0, &arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001945}
1946
Patrick McHardyef7c79e2007-06-05 12:38:30 -07001947static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
Thomas Graf5176f912006-08-26 20:13:18 -07001948 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
Thomas Graf86872cb2006-08-22 00:01:08 -07001949 [RTA_OIF] = { .type = NLA_U32 },
Thomas Grafab364a62006-08-22 00:01:47 -07001950 [RTA_IIF] = { .type = NLA_U32 },
Thomas Graf86872cb2006-08-22 00:01:08 -07001951 [RTA_PRIORITY] = { .type = NLA_U32 },
1952 [RTA_METRICS] = { .type = NLA_NESTED },
1953};
1954
1955static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
1956 struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001957{
Thomas Graf86872cb2006-08-22 00:01:08 -07001958 struct rtmsg *rtm;
1959 struct nlattr *tb[RTA_MAX+1];
1960 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001961
Thomas Graf86872cb2006-08-22 00:01:08 -07001962 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
1963 if (err < 0)
1964 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001965
Thomas Graf86872cb2006-08-22 00:01:08 -07001966 err = -EINVAL;
1967 rtm = nlmsg_data(nlh);
1968 memset(cfg, 0, sizeof(*cfg));
1969
1970 cfg->fc_table = rtm->rtm_table;
1971 cfg->fc_dst_len = rtm->rtm_dst_len;
1972 cfg->fc_src_len = rtm->rtm_src_len;
1973 cfg->fc_flags = RTF_UP;
1974 cfg->fc_protocol = rtm->rtm_protocol;
1975
1976 if (rtm->rtm_type == RTN_UNREACHABLE)
1977 cfg->fc_flags |= RTF_REJECT;
1978
1979 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
1980 cfg->fc_nlinfo.nlh = nlh;
Benjamin Thery2216b482008-01-30 19:09:35 -08001981 cfg->fc_nlinfo.nl_net = skb->sk->sk_net;
Thomas Graf86872cb2006-08-22 00:01:08 -07001982
1983 if (tb[RTA_GATEWAY]) {
1984 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
1985 cfg->fc_flags |= RTF_GATEWAY;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001986 }
Thomas Graf86872cb2006-08-22 00:01:08 -07001987
1988 if (tb[RTA_DST]) {
1989 int plen = (rtm->rtm_dst_len + 7) >> 3;
1990
1991 if (nla_len(tb[RTA_DST]) < plen)
1992 goto errout;
1993
1994 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001995 }
Thomas Graf86872cb2006-08-22 00:01:08 -07001996
1997 if (tb[RTA_SRC]) {
1998 int plen = (rtm->rtm_src_len + 7) >> 3;
1999
2000 if (nla_len(tb[RTA_SRC]) < plen)
2001 goto errout;
2002
2003 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002004 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002005
2006 if (tb[RTA_OIF])
2007 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2008
2009 if (tb[RTA_PRIORITY])
2010 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2011
2012 if (tb[RTA_METRICS]) {
2013 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2014 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002015 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002016
2017 if (tb[RTA_TABLE])
2018 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2019
2020 err = 0;
2021errout:
2022 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002023}
2024
Thomas Grafc127ea22007-03-22 11:58:32 -07002025static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002026{
Denis V. Lunevb8542722007-12-01 00:21:31 +11002027 struct net *net = skb->sk->sk_net;
Thomas Graf86872cb2006-08-22 00:01:08 -07002028 struct fib6_config cfg;
2029 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002030
Denis V. Lunevb8542722007-12-01 00:21:31 +11002031 if (net != &init_net)
2032 return -EINVAL;
2033
Thomas Graf86872cb2006-08-22 00:01:08 -07002034 err = rtm_to_fib6_config(skb, nlh, &cfg);
2035 if (err < 0)
2036 return err;
2037
2038 return ip6_route_del(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002039}
2040
Thomas Grafc127ea22007-03-22 11:58:32 -07002041static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002042{
Denis V. Lunevb8542722007-12-01 00:21:31 +11002043 struct net *net = skb->sk->sk_net;
Thomas Graf86872cb2006-08-22 00:01:08 -07002044 struct fib6_config cfg;
2045 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002046
Denis V. Lunevb8542722007-12-01 00:21:31 +11002047 if (net != &init_net)
2048 return -EINVAL;
2049
Thomas Graf86872cb2006-08-22 00:01:08 -07002050 err = rtm_to_fib6_config(skb, nlh, &cfg);
2051 if (err < 0)
2052 return err;
2053
2054 return ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002055}
2056
Thomas Graf339bf982006-11-10 14:10:15 -08002057static inline size_t rt6_nlmsg_size(void)
2058{
2059 return NLMSG_ALIGN(sizeof(struct rtmsg))
2060 + nla_total_size(16) /* RTA_SRC */
2061 + nla_total_size(16) /* RTA_DST */
2062 + nla_total_size(16) /* RTA_GATEWAY */
2063 + nla_total_size(16) /* RTA_PREFSRC */
2064 + nla_total_size(4) /* RTA_TABLE */
2065 + nla_total_size(4) /* RTA_IIF */
2066 + nla_total_size(4) /* RTA_OIF */
2067 + nla_total_size(4) /* RTA_PRIORITY */
Noriaki TAKAMIYA6a2b9ce2007-01-23 22:09:41 -08002068 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
Thomas Graf339bf982006-11-10 14:10:15 -08002069 + nla_total_size(sizeof(struct rta_cacheinfo));
2070}
2071
Linus Torvalds1da177e2005-04-16 15:20:36 -07002072static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07002073 struct in6_addr *dst, struct in6_addr *src,
2074 int iif, int type, u32 pid, u32 seq,
2075 int prefix, unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002076{
2077 struct rtmsg *rtm;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002078 struct nlmsghdr *nlh;
Thomas Grafe3703b32006-11-27 09:27:07 -08002079 long expires;
Patrick McHardy9e762a42006-08-10 23:09:48 -07002080 u32 table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002081
2082 if (prefix) { /* user wants prefix routes only */
2083 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2084 /* success since this is not a prefix route */
2085 return 1;
2086 }
2087 }
2088
Thomas Graf2d7202b2006-08-22 00:01:27 -07002089 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2090 if (nlh == NULL)
Patrick McHardy26932562007-01-31 23:16:40 -08002091 return -EMSGSIZE;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002092
2093 rtm = nlmsg_data(nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002094 rtm->rtm_family = AF_INET6;
2095 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2096 rtm->rtm_src_len = rt->rt6i_src.plen;
2097 rtm->rtm_tos = 0;
Thomas Grafc71099a2006-08-04 23:20:06 -07002098 if (rt->rt6i_table)
Patrick McHardy9e762a42006-08-10 23:09:48 -07002099 table = rt->rt6i_table->tb6_id;
Thomas Grafc71099a2006-08-04 23:20:06 -07002100 else
Patrick McHardy9e762a42006-08-10 23:09:48 -07002101 table = RT6_TABLE_UNSPEC;
2102 rtm->rtm_table = table;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002103 NLA_PUT_U32(skb, RTA_TABLE, table);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002104 if (rt->rt6i_flags&RTF_REJECT)
2105 rtm->rtm_type = RTN_UNREACHABLE;
2106 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2107 rtm->rtm_type = RTN_LOCAL;
2108 else
2109 rtm->rtm_type = RTN_UNICAST;
2110 rtm->rtm_flags = 0;
2111 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2112 rtm->rtm_protocol = rt->rt6i_protocol;
2113 if (rt->rt6i_flags&RTF_DYNAMIC)
2114 rtm->rtm_protocol = RTPROT_REDIRECT;
2115 else if (rt->rt6i_flags & RTF_ADDRCONF)
2116 rtm->rtm_protocol = RTPROT_KERNEL;
2117 else if (rt->rt6i_flags&RTF_DEFAULT)
2118 rtm->rtm_protocol = RTPROT_RA;
2119
2120 if (rt->rt6i_flags&RTF_CACHE)
2121 rtm->rtm_flags |= RTM_F_CLONED;
2122
2123 if (dst) {
Thomas Graf2d7202b2006-08-22 00:01:27 -07002124 NLA_PUT(skb, RTA_DST, 16, dst);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002125 rtm->rtm_dst_len = 128;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002126 } else if (rtm->rtm_dst_len)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002127 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002128#ifdef CONFIG_IPV6_SUBTREES
2129 if (src) {
Thomas Graf2d7202b2006-08-22 00:01:27 -07002130 NLA_PUT(skb, RTA_SRC, 16, src);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002131 rtm->rtm_src_len = 128;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002132 } else if (rtm->rtm_src_len)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002133 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002134#endif
2135 if (iif)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002136 NLA_PUT_U32(skb, RTA_IIF, iif);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002137 else if (dst) {
2138 struct in6_addr saddr_buf;
YOSHIFUJI Hideaki5e5f3f02008-03-03 21:44:34 +09002139 if (ipv6_dev_get_saddr(ip6_dst_idev(&rt->u.dst)->dev,
2140 dst, &saddr_buf) == 0)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002141 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002142 }
Thomas Graf2d7202b2006-08-22 00:01:27 -07002143
Linus Torvalds1da177e2005-04-16 15:20:36 -07002144 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002145 goto nla_put_failure;
2146
Linus Torvalds1da177e2005-04-16 15:20:36 -07002147 if (rt->u.dst.neighbour)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002148 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
2149
Linus Torvalds1da177e2005-04-16 15:20:36 -07002150 if (rt->u.dst.dev)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002151 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2152
2153 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
Thomas Grafe3703b32006-11-27 09:27:07 -08002154
2155 expires = rt->rt6i_expires ? rt->rt6i_expires - jiffies : 0;
2156 if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0,
2157 expires, rt->u.dst.error) < 0)
2158 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002159
Thomas Graf2d7202b2006-08-22 00:01:27 -07002160 return nlmsg_end(skb, nlh);
2161
2162nla_put_failure:
Patrick McHardy26932562007-01-31 23:16:40 -08002163 nlmsg_cancel(skb, nlh);
2164 return -EMSGSIZE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002165}
2166
Patrick McHardy1b43af52006-08-10 23:11:17 -07002167int rt6_dump_route(struct rt6_info *rt, void *p_arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002168{
2169 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2170 int prefix;
2171
Thomas Graf2d7202b2006-08-22 00:01:27 -07002172 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2173 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002174 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2175 } else
2176 prefix = 0;
2177
2178 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2179 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07002180 prefix, NLM_F_MULTI);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002181}
2182
Thomas Grafc127ea22007-03-22 11:58:32 -07002183static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002184{
Denis V. Lunevb8542722007-12-01 00:21:31 +11002185 struct net *net = in_skb->sk->sk_net;
Thomas Grafab364a62006-08-22 00:01:47 -07002186 struct nlattr *tb[RTA_MAX+1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07002187 struct rt6_info *rt;
Thomas Grafab364a62006-08-22 00:01:47 -07002188 struct sk_buff *skb;
2189 struct rtmsg *rtm;
2190 struct flowi fl;
2191 int err, iif = 0;
2192
Denis V. Lunevb8542722007-12-01 00:21:31 +11002193 if (net != &init_net)
2194 return -EINVAL;
2195
Thomas Grafab364a62006-08-22 00:01:47 -07002196 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2197 if (err < 0)
2198 goto errout;
2199
2200 err = -EINVAL;
2201 memset(&fl, 0, sizeof(fl));
2202
2203 if (tb[RTA_SRC]) {
2204 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2205 goto errout;
2206
2207 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2208 }
2209
2210 if (tb[RTA_DST]) {
2211 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2212 goto errout;
2213
2214 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2215 }
2216
2217 if (tb[RTA_IIF])
2218 iif = nla_get_u32(tb[RTA_IIF]);
2219
2220 if (tb[RTA_OIF])
2221 fl.oif = nla_get_u32(tb[RTA_OIF]);
2222
2223 if (iif) {
2224 struct net_device *dev;
Eric W. Biederman881d9662007-09-17 11:56:21 -07002225 dev = __dev_get_by_index(&init_net, iif);
Thomas Grafab364a62006-08-22 00:01:47 -07002226 if (!dev) {
2227 err = -ENODEV;
2228 goto errout;
2229 }
2230 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002231
2232 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
Thomas Grafab364a62006-08-22 00:01:47 -07002233 if (skb == NULL) {
2234 err = -ENOBUFS;
2235 goto errout;
2236 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002237
2238 /* Reserve room for dummy headers, this skb can pass
2239 through good chunk of routing engine.
2240 */
Arnaldo Carvalho de Melo459a98e2007-03-19 15:30:44 -07002241 skb_reset_mac_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002242 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2243
Thomas Grafab364a62006-08-22 00:01:47 -07002244 rt = (struct rt6_info*) ip6_route_output(NULL, &fl);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002245 skb->dst = &rt->u.dst;
2246
Thomas Grafab364a62006-08-22 00:01:47 -07002247 err = rt6_fill_node(skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002248 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07002249 nlh->nlmsg_seq, 0, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002250 if (err < 0) {
Thomas Grafab364a62006-08-22 00:01:47 -07002251 kfree_skb(skb);
2252 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002253 }
2254
Denis V. Lunev97c53ca2007-11-19 22:26:51 -08002255 err = rtnl_unicast(skb, &init_net, NETLINK_CB(in_skb).pid);
Thomas Grafab364a62006-08-22 00:01:47 -07002256errout:
Linus Torvalds1da177e2005-04-16 15:20:36 -07002257 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002258}
2259
Thomas Graf86872cb2006-08-22 00:01:08 -07002260void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002261{
2262 struct sk_buff *skb;
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08002263 u32 seq;
2264 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002265
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08002266 err = -ENOBUFS;
2267 seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
Thomas Graf86872cb2006-08-22 00:01:08 -07002268
Thomas Graf339bf982006-11-10 14:10:15 -08002269 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
Thomas Graf21713eb2006-08-15 00:35:24 -07002270 if (skb == NULL)
2271 goto errout;
2272
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08002273 err = rt6_fill_node(skb, rt, NULL, NULL, 0,
2274 event, info->pid, seq, 0, 0);
Patrick McHardy26932562007-01-31 23:16:40 -08002275 if (err < 0) {
2276 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2277 WARN_ON(err == -EMSGSIZE);
2278 kfree_skb(skb);
2279 goto errout;
2280 }
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08002281 err = rtnl_notify(skb, &init_net, info->pid,
2282 RTNLGRP_IPV6_ROUTE, info->nlh, gfp_any());
Thomas Graf21713eb2006-08-15 00:35:24 -07002283errout:
2284 if (err < 0)
Denis V. Lunev97c53ca2007-11-19 22:26:51 -08002285 rtnl_set_sk_err(&init_net, RTNLGRP_IPV6_ROUTE, err);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002286}
2287
2288/*
2289 * /proc
2290 */
2291
2292#ifdef CONFIG_PROC_FS
2293
2294#define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2295
2296struct rt6_proc_arg
2297{
2298 char *buffer;
2299 int offset;
2300 int length;
2301 int skip;
2302 int len;
2303};
2304
2305static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2306{
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002307 struct seq_file *m = p_arg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002308
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002309 seq_printf(m, NIP6_SEQFMT " %02x ", NIP6(rt->rt6i_dst.addr),
2310 rt->rt6i_dst.plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002311
2312#ifdef CONFIG_IPV6_SUBTREES
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002313 seq_printf(m, NIP6_SEQFMT " %02x ", NIP6(rt->rt6i_src.addr),
2314 rt->rt6i_src.plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002315#else
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002316 seq_puts(m, "00000000000000000000000000000000 00 ");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002317#endif
2318
2319 if (rt->rt6i_nexthop) {
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002320 seq_printf(m, NIP6_SEQFMT,
2321 NIP6(*((struct in6_addr *)rt->rt6i_nexthop->primary_key)));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002322 } else {
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002323 seq_puts(m, "00000000000000000000000000000000");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002324 }
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002325 seq_printf(m, " %08x %08x %08x %08x %8s\n",
2326 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
2327 rt->u.dst.__use, rt->rt6i_flags,
2328 rt->rt6i_dev ? rt->rt6i_dev->name : "");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002329 return 0;
2330}
2331
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002332static int ipv6_route_show(struct seq_file *m, void *v)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002333{
Daniel Lezcanof3db4852008-03-03 23:27:06 -08002334 struct net *net = (struct net *)m->private;
2335 fib6_clean_all(net, rt6_info_route, 0, m);
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002336 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002337}
2338
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002339static int ipv6_route_open(struct inode *inode, struct file *file)
2340{
Daniel Lezcanof3db4852008-03-03 23:27:06 -08002341 struct net *net = get_proc_net(inode);
2342 if (!net)
2343 return -ENXIO;
2344 return single_open(file, ipv6_route_show, net);
2345}
2346
2347static int ipv6_route_release(struct inode *inode, struct file *file)
2348{
2349 struct seq_file *seq = file->private_data;
2350 struct net *net = seq->private;
2351 put_net(net);
2352 return single_release(inode, file);
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002353}
2354
2355static const struct file_operations ipv6_route_proc_fops = {
2356 .owner = THIS_MODULE,
2357 .open = ipv6_route_open,
2358 .read = seq_read,
2359 .llseek = seq_lseek,
Daniel Lezcanof3db4852008-03-03 23:27:06 -08002360 .release = ipv6_route_release,
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002361};
2362
Linus Torvalds1da177e2005-04-16 15:20:36 -07002363static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2364{
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002365 struct net *net = (struct net *)seq->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002366 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002367 net->ipv6.rt6_stats->fib_nodes,
2368 net->ipv6.rt6_stats->fib_route_nodes,
2369 net->ipv6.rt6_stats->fib_rt_alloc,
2370 net->ipv6.rt6_stats->fib_rt_entries,
2371 net->ipv6.rt6_stats->fib_rt_cache,
Benjamin Theryc5728722008-03-03 23:34:17 -08002372 atomic_read(&ip6_dst_ops.entries),
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002373 net->ipv6.rt6_stats->fib_discarded_routes);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002374
2375 return 0;
2376}
2377
2378static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2379{
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002380 struct net *net = get_proc_net(inode);
2381 return single_open(file, rt6_stats_seq_show, net);
2382}
2383
2384static int rt6_stats_seq_release(struct inode *inode, struct file *file)
2385{
2386 struct seq_file *seq = file->private_data;
2387 struct net *net = (struct net *)seq->private;
2388 put_net(net);
2389 return single_release(inode, file);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002390}
2391
Arjan van de Ven9a321442007-02-12 00:55:35 -08002392static const struct file_operations rt6_stats_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002393 .owner = THIS_MODULE,
2394 .open = rt6_stats_seq_open,
2395 .read = seq_read,
2396 .llseek = seq_lseek,
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002397 .release = rt6_stats_seq_release,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002398};
2399#endif /* CONFIG_PROC_FS */
2400
2401#ifdef CONFIG_SYSCTL
2402
Linus Torvalds1da177e2005-04-16 15:20:36 -07002403static
2404int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2405 void __user *buffer, size_t *lenp, loff_t *ppos)
2406{
Daniel Lezcano5b7c9312008-03-03 23:28:58 -08002407 struct net *net = current->nsproxy->net_ns;
2408 int delay = net->ipv6.sysctl.flush_delay;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002409 if (write) {
2410 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
Daniel Lezcano5b7c9312008-03-03 23:28:58 -08002411 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002412 return 0;
2413 } else
2414 return -EINVAL;
2415}
2416
Daniel Lezcano760f2d02008-01-10 02:53:43 -08002417ctl_table ipv6_route_table_template[] = {
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002418 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002419 .procname = "flush",
Daniel Lezcano49905092008-01-10 03:01:01 -08002420 .data = &init_net.ipv6.sysctl.flush_delay,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002421 .maxlen = sizeof(int),
Dave Jones89c8b3a12005-04-28 12:11:49 -07002422 .mode = 0200,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002423 .proc_handler = &ipv6_sysctl_rtcache_flush
Linus Torvalds1da177e2005-04-16 15:20:36 -07002424 },
2425 {
2426 .ctl_name = NET_IPV6_ROUTE_GC_THRESH,
2427 .procname = "gc_thresh",
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002428 .data = &ip6_dst_ops.gc_thresh,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002429 .maxlen = sizeof(int),
2430 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002431 .proc_handler = &proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002432 },
2433 {
2434 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE,
2435 .procname = "max_size",
Daniel Lezcano49905092008-01-10 03:01:01 -08002436 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002437 .maxlen = sizeof(int),
2438 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002439 .proc_handler = &proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002440 },
2441 {
2442 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2443 .procname = "gc_min_interval",
Daniel Lezcano49905092008-01-10 03:01:01 -08002444 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002445 .maxlen = sizeof(int),
2446 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002447 .proc_handler = &proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002448 .strategy = &sysctl_jiffies,
2449 },
2450 {
2451 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT,
2452 .procname = "gc_timeout",
Daniel Lezcano49905092008-01-10 03:01:01 -08002453 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002454 .maxlen = sizeof(int),
2455 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002456 .proc_handler = &proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002457 .strategy = &sysctl_jiffies,
2458 },
2459 {
2460 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL,
2461 .procname = "gc_interval",
Daniel Lezcano49905092008-01-10 03:01:01 -08002462 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002463 .maxlen = sizeof(int),
2464 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002465 .proc_handler = &proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002466 .strategy = &sysctl_jiffies,
2467 },
2468 {
2469 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY,
2470 .procname = "gc_elasticity",
Daniel Lezcano49905092008-01-10 03:01:01 -08002471 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002472 .maxlen = sizeof(int),
2473 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002474 .proc_handler = &proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002475 .strategy = &sysctl_jiffies,
2476 },
2477 {
2478 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES,
2479 .procname = "mtu_expires",
Daniel Lezcano49905092008-01-10 03:01:01 -08002480 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002481 .maxlen = sizeof(int),
2482 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002483 .proc_handler = &proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002484 .strategy = &sysctl_jiffies,
2485 },
2486 {
2487 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS,
2488 .procname = "min_adv_mss",
Daniel Lezcano49905092008-01-10 03:01:01 -08002489 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002490 .maxlen = sizeof(int),
2491 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002492 .proc_handler = &proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002493 .strategy = &sysctl_jiffies,
2494 },
2495 {
2496 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2497 .procname = "gc_min_interval_ms",
Daniel Lezcano49905092008-01-10 03:01:01 -08002498 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002499 .maxlen = sizeof(int),
2500 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002501 .proc_handler = &proc_dointvec_ms_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002502 .strategy = &sysctl_ms_jiffies,
2503 },
2504 { .ctl_name = 0 }
2505};
2506
Daniel Lezcano760f2d02008-01-10 02:53:43 -08002507struct ctl_table *ipv6_route_sysctl_init(struct net *net)
2508{
2509 struct ctl_table *table;
2510
2511 table = kmemdup(ipv6_route_table_template,
2512 sizeof(ipv6_route_table_template),
2513 GFP_KERNEL);
YOSHIFUJI Hideaki5ee09102008-02-28 00:24:28 +09002514
2515 if (table) {
2516 table[0].data = &net->ipv6.sysctl.flush_delay;
2517 /* table[1].data will be handled when we have
2518 routes per namespace */
2519 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2520 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2521 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2522 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2523 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2524 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2525 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2526 }
2527
Daniel Lezcano760f2d02008-01-10 02:53:43 -08002528 return table;
2529}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002530#endif
2531
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002532static int ip6_route_net_init(struct net *net)
2533{
2534#ifdef CONFIG_PROC_FS
2535 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2536 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2537#endif
2538 return 0;
2539}
2540
2541static void ip6_route_net_exit(struct net *net)
2542{
2543#ifdef CONFIG_PROC_FS
2544 proc_net_remove(net, "ipv6_route");
2545 proc_net_remove(net, "rt6_stats");
2546#endif
2547}
2548
2549static struct pernet_operations ip6_route_net_ops = {
2550 .init = ip6_route_net_init,
2551 .exit = ip6_route_net_exit,
2552};
2553
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002554int __init ip6_route_init(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002555{
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002556 int ret;
2557
Alexey Dobriyane5d679f332006-08-26 19:25:52 -07002558 ip6_dst_ops.kmem_cachep =
2559 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
Daniel Lezcanof845ab62007-12-07 00:45:16 -08002560 SLAB_HWCACHE_ALIGN, NULL);
2561 if (!ip6_dst_ops.kmem_cachep)
2562 return -ENOMEM;
2563
David S. Miller14e50e52007-05-24 18:17:54 -07002564 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops.kmem_cachep;
2565
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002566 ret = fib6_init();
2567 if (ret)
2568 goto out_kmem_cache;
2569
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002570 ret = xfrm6_init();
2571 if (ret)
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002572 goto out_fib6_init;
Daniel Lezcanoc35b7e72007-12-08 00:14:11 -08002573
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002574 ret = fib6_rules_init();
2575 if (ret)
2576 goto xfrm6_init;
Daniel Lezcano7e5449c2007-12-08 00:14:54 -08002577
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002578 ret = -ENOBUFS;
2579 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) ||
2580 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) ||
2581 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL))
2582 goto fib6_rules_init;
2583
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002584 ret = register_pernet_subsys(&ip6_route_net_ops);
2585 if (ret)
2586 goto fib6_rules_init;
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002587out:
2588 return ret;
2589
2590fib6_rules_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002591 fib6_rules_cleanup();
2592xfrm6_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002593 xfrm6_fini();
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002594out_fib6_init:
Daniel Lezcanof3db4852008-03-03 23:27:06 -08002595 rt6_ifdown(&init_net, NULL);
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002596 fib6_gc_cleanup();
2597out_kmem_cache:
2598 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2599 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002600}
2601
2602void ip6_route_cleanup(void)
2603{
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002604 unregister_pernet_subsys(&ip6_route_net_ops);
Thomas Graf101367c2006-08-04 03:39:02 -07002605 fib6_rules_cleanup();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002606 xfrm6_fini();
Daniel Lezcanof3db4852008-03-03 23:27:06 -08002607 rt6_ifdown(&init_net, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002608 fib6_gc_cleanup();
2609 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2610}