blob: 252d76199c41e7589b85416bef1145e407239126 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09006 * Pedro Roque <roque@di.fc.ul.pt>
Linus Torvalds1da177e2005-04-16 15:20:36 -07007 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07008 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
YOSHIFUJI Hideakic0bece92006-08-23 17:23:25 -070023 * Ville Nuorvala
24 * Fixed routing subtrees.
Linus Torvalds1da177e2005-04-16 15:20:36 -070025 */
26
Randy Dunlap4fc268d2006-01-11 12:17:47 -080027#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070028#include <linux/errno.h>
29#include <linux/types.h>
30#include <linux/times.h>
31#include <linux/socket.h>
32#include <linux/sockios.h>
33#include <linux/net.h>
34#include <linux/route.h>
35#include <linux/netdevice.h>
36#include <linux/in6.h>
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +090037#include <linux/mroute6.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070038#include <linux/init.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070039#include <linux/if_arp.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070040#include <linux/proc_fs.h>
41#include <linux/seq_file.h>
Daniel Lezcano5b7c9312008-03-03 23:28:58 -080042#include <linux/nsproxy.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090043#include <linux/slab.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020044#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070045#include <net/snmp.h>
46#include <net/ipv6.h>
47#include <net/ip6_fib.h>
48#include <net/ip6_route.h>
49#include <net/ndisc.h>
50#include <net/addrconf.h>
51#include <net/tcp.h>
52#include <linux/rtnetlink.h>
53#include <net/dst.h>
54#include <net/xfrm.h>
Tom Tucker8d717402006-07-30 20:43:36 -070055#include <net/netevent.h>
Thomas Graf21713eb2006-08-15 00:35:24 -070056#include <net/netlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070057
58#include <asm/uaccess.h>
59
60#ifdef CONFIG_SYSCTL
61#include <linux/sysctl.h>
62#endif
63
64/* Set to 3 to get tracing. */
65#define RT6_DEBUG 2
66
67#if RT6_DEBUG >= 3
68#define RDBG(x) printk x
69#define RT6_TRACE(x...) printk(KERN_DEBUG x)
70#else
71#define RDBG(x)
72#define RT6_TRACE(x...) do { ; } while (0)
73#endif
74
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -080075#define CLONE_OFFLINK_ROUTE 0
Linus Torvalds1da177e2005-04-16 15:20:36 -070076
Linus Torvalds1da177e2005-04-16 15:20:36 -070077static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
78static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
79static struct dst_entry *ip6_negative_advice(struct dst_entry *);
80static void ip6_dst_destroy(struct dst_entry *);
81static void ip6_dst_ifdown(struct dst_entry *,
82 struct net_device *dev, int how);
Daniel Lezcano569d3642008-01-18 03:56:57 -080083static int ip6_dst_gc(struct dst_ops *ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -070084
85static int ip6_pkt_discard(struct sk_buff *skb);
86static int ip6_pkt_discard_out(struct sk_buff *skb);
87static void ip6_link_failure(struct sk_buff *skb);
88static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
89
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -080090#ifdef CONFIG_IPV6_ROUTE_INFO
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -080091static struct rt6_info *rt6_add_route_info(struct net *net,
92 struct in6_addr *prefix, int prefixlen,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -080093 struct in6_addr *gwaddr, int ifindex,
94 unsigned pref);
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -080095static struct rt6_info *rt6_get_route_info(struct net *net,
96 struct in6_addr *prefix, int prefixlen,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -080097 struct in6_addr *gwaddr, int ifindex);
98#endif
99
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -0800100static struct dst_ops ip6_dst_ops_template = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700101 .family = AF_INET6,
Harvey Harrison09640e62009-02-01 00:45:17 -0800102 .protocol = cpu_to_be16(ETH_P_IPV6),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700103 .gc = ip6_dst_gc,
104 .gc_thresh = 1024,
105 .check = ip6_dst_check,
106 .destroy = ip6_dst_destroy,
107 .ifdown = ip6_dst_ifdown,
108 .negative_advice = ip6_negative_advice,
109 .link_failure = ip6_link_failure,
110 .update_pmtu = ip6_rt_update_pmtu,
Herbert Xu1ac06e02008-05-20 14:32:14 -0700111 .local_out = __ip6_local_out,
Eric Dumazete2422972008-01-30 20:07:45 -0800112 .entries = ATOMIC_INIT(0),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700113};
114
David S. Miller14e50e52007-05-24 18:17:54 -0700115static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
116{
117}
118
119static struct dst_ops ip6_dst_blackhole_ops = {
120 .family = AF_INET6,
Harvey Harrison09640e62009-02-01 00:45:17 -0800121 .protocol = cpu_to_be16(ETH_P_IPV6),
David S. Miller14e50e52007-05-24 18:17:54 -0700122 .destroy = ip6_dst_destroy,
123 .check = ip6_dst_check,
124 .update_pmtu = ip6_rt_blackhole_update_pmtu,
Eric Dumazete2422972008-01-30 20:07:45 -0800125 .entries = ATOMIC_INIT(0),
David S. Miller14e50e52007-05-24 18:17:54 -0700126};
127
Daniel Lezcanobdb32892008-03-04 13:48:10 -0800128static struct rt6_info ip6_null_entry_template = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700129 .u = {
130 .dst = {
131 .__refcnt = ATOMIC_INIT(1),
132 .__use = 1,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700133 .obsolete = -1,
134 .error = -ENETUNREACH,
135 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
136 .input = ip6_pkt_discard,
137 .output = ip6_pkt_discard_out,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700138 }
139 },
140 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
Jean-Mickael Guerin4f724272009-05-20 17:38:59 -0700141 .rt6i_protocol = RTPROT_KERNEL,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700142 .rt6i_metric = ~(u32) 0,
143 .rt6i_ref = ATOMIC_INIT(1),
144};
145
Thomas Graf101367c2006-08-04 03:39:02 -0700146#ifdef CONFIG_IPV6_MULTIPLE_TABLES
147
David S. Miller6723ab52006-10-18 21:20:57 -0700148static int ip6_pkt_prohibit(struct sk_buff *skb);
149static int ip6_pkt_prohibit_out(struct sk_buff *skb);
David S. Miller6723ab52006-10-18 21:20:57 -0700150
Adrian Bunk280a34c2008-04-21 02:29:32 -0700151static struct rt6_info ip6_prohibit_entry_template = {
Thomas Graf101367c2006-08-04 03:39:02 -0700152 .u = {
153 .dst = {
154 .__refcnt = ATOMIC_INIT(1),
155 .__use = 1,
Thomas Graf101367c2006-08-04 03:39:02 -0700156 .obsolete = -1,
157 .error = -EACCES,
158 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
Thomas Graf9ce8ade2006-10-18 20:46:54 -0700159 .input = ip6_pkt_prohibit,
160 .output = ip6_pkt_prohibit_out,
Thomas Graf101367c2006-08-04 03:39:02 -0700161 }
162 },
163 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
Jean-Mickael Guerin4f724272009-05-20 17:38:59 -0700164 .rt6i_protocol = RTPROT_KERNEL,
Thomas Graf101367c2006-08-04 03:39:02 -0700165 .rt6i_metric = ~(u32) 0,
166 .rt6i_ref = ATOMIC_INIT(1),
167};
168
Daniel Lezcanobdb32892008-03-04 13:48:10 -0800169static struct rt6_info ip6_blk_hole_entry_template = {
Thomas Graf101367c2006-08-04 03:39:02 -0700170 .u = {
171 .dst = {
172 .__refcnt = ATOMIC_INIT(1),
173 .__use = 1,
Thomas Graf101367c2006-08-04 03:39:02 -0700174 .obsolete = -1,
175 .error = -EINVAL,
176 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
Herbert Xu352e5122007-11-13 21:34:06 -0800177 .input = dst_discard,
178 .output = dst_discard,
Thomas Graf101367c2006-08-04 03:39:02 -0700179 }
180 },
181 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
Jean-Mickael Guerin4f724272009-05-20 17:38:59 -0700182 .rt6i_protocol = RTPROT_KERNEL,
Thomas Graf101367c2006-08-04 03:39:02 -0700183 .rt6i_metric = ~(u32) 0,
184 .rt6i_ref = ATOMIC_INIT(1),
185};
186
187#endif
188
Linus Torvalds1da177e2005-04-16 15:20:36 -0700189/* allocate dst with ip6_dst_ops */
Benjamin Theryf2fc6a52008-03-04 13:49:23 -0800190static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700191{
Benjamin Theryf2fc6a52008-03-04 13:49:23 -0800192 return (struct rt6_info *)dst_alloc(ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700193}
194
195static void ip6_dst_destroy(struct dst_entry *dst)
196{
197 struct rt6_info *rt = (struct rt6_info *)dst;
198 struct inet6_dev *idev = rt->rt6i_idev;
199
200 if (idev != NULL) {
201 rt->rt6i_idev = NULL;
202 in6_dev_put(idev);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900203 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700204}
205
206static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
207 int how)
208{
209 struct rt6_info *rt = (struct rt6_info *)dst;
210 struct inet6_dev *idev = rt->rt6i_idev;
Denis V. Lunev5a3e55d2007-12-07 00:38:10 -0800211 struct net_device *loopback_dev =
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900212 dev_net(dev)->loopback_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700213
Denis V. Lunev5a3e55d2007-12-07 00:38:10 -0800214 if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
215 struct inet6_dev *loopback_idev =
216 in6_dev_get(loopback_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700217 if (loopback_idev != NULL) {
218 rt->rt6i_idev = loopback_idev;
219 in6_dev_put(idev);
220 }
221 }
222}
223
224static __inline__ int rt6_check_expired(const struct rt6_info *rt)
225{
226 return (rt->rt6i_flags & RTF_EXPIRES &&
227 time_after(jiffies, rt->rt6i_expires));
228}
229
Thomas Grafc71099a2006-08-04 23:20:06 -0700230static inline int rt6_need_strict(struct in6_addr *daddr)
231{
232 return (ipv6_addr_type(daddr) &
YOSHIFUJI Hideaki5ce83af2008-06-25 16:58:17 +0900233 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK));
Thomas Grafc71099a2006-08-04 23:20:06 -0700234}
235
Linus Torvalds1da177e2005-04-16 15:20:36 -0700236/*
Thomas Grafc71099a2006-08-04 23:20:06 -0700237 * Route lookup. Any table->tb6_lock is implied.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700238 */
239
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800240static inline struct rt6_info *rt6_device_match(struct net *net,
241 struct rt6_info *rt,
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900242 struct in6_addr *saddr,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700243 int oif,
YOSHIFUJI Hideakid4208952008-06-27 20:14:54 -0700244 int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700245{
246 struct rt6_info *local = NULL;
247 struct rt6_info *sprt;
248
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900249 if (!oif && ipv6_addr_any(saddr))
250 goto out;
251
252 for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) {
253 struct net_device *dev = sprt->rt6i_dev;
254
255 if (oif) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700256 if (dev->ifindex == oif)
257 return sprt;
258 if (dev->flags & IFF_LOOPBACK) {
259 if (sprt->rt6i_idev == NULL ||
260 sprt->rt6i_idev->dev->ifindex != oif) {
YOSHIFUJI Hideakid4208952008-06-27 20:14:54 -0700261 if (flags & RT6_LOOKUP_F_IFACE && oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700262 continue;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900263 if (local && (!oif ||
Linus Torvalds1da177e2005-04-16 15:20:36 -0700264 local->rt6i_idev->dev->ifindex == oif))
265 continue;
266 }
267 local = sprt;
268 }
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900269 } else {
270 if (ipv6_chk_addr(net, saddr, dev,
271 flags & RT6_LOOKUP_F_IFACE))
272 return sprt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700273 }
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900274 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700275
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900276 if (oif) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700277 if (local)
278 return local;
279
YOSHIFUJI Hideakid4208952008-06-27 20:14:54 -0700280 if (flags & RT6_LOOKUP_F_IFACE)
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800281 return net->ipv6.ip6_null_entry;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700282 }
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900283out:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700284 return rt;
285}
286
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800287#ifdef CONFIG_IPV6_ROUTER_PREF
288static void rt6_probe(struct rt6_info *rt)
289{
290 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
291 /*
292 * Okay, this does not seem to be appropriate
293 * for now, however, we need to check if it
294 * is really so; aka Router Reachability Probing.
295 *
296 * Router Reachability Probe MUST be rate-limited
297 * to no more than one per minute.
298 */
299 if (!neigh || (neigh->nud_state & NUD_VALID))
300 return;
301 read_lock_bh(&neigh->lock);
302 if (!(neigh->nud_state & NUD_VALID) &&
YOSHIFUJI Hideaki52e16352006-03-20 17:05:47 -0800303 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800304 struct in6_addr mcaddr;
305 struct in6_addr *target;
306
307 neigh->updated = jiffies;
308 read_unlock_bh(&neigh->lock);
309
310 target = (struct in6_addr *)&neigh->primary_key;
311 addrconf_addr_solict_mult(target, &mcaddr);
312 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
313 } else
314 read_unlock_bh(&neigh->lock);
315}
316#else
317static inline void rt6_probe(struct rt6_info *rt)
318{
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800319}
320#endif
321
Linus Torvalds1da177e2005-04-16 15:20:36 -0700322/*
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800323 * Default Router Selection (RFC 2461 6.3.6)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700324 */
Dave Jonesb6f99a22007-03-22 12:27:49 -0700325static inline int rt6_check_dev(struct rt6_info *rt, int oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700326{
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800327 struct net_device *dev = rt->rt6i_dev;
David S. Miller161980f2007-04-06 11:42:27 -0700328 if (!oif || dev->ifindex == oif)
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800329 return 2;
David S. Miller161980f2007-04-06 11:42:27 -0700330 if ((dev->flags & IFF_LOOPBACK) &&
331 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
332 return 1;
333 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700334}
335
Dave Jonesb6f99a22007-03-22 12:27:49 -0700336static inline int rt6_check_neigh(struct rt6_info *rt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700337{
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800338 struct neighbour *neigh = rt->rt6i_nexthop;
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800339 int m;
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700340 if (rt->rt6i_flags & RTF_NONEXTHOP ||
341 !(rt->rt6i_flags & RTF_GATEWAY))
342 m = 1;
343 else if (neigh) {
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800344 read_lock_bh(&neigh->lock);
345 if (neigh->nud_state & NUD_VALID)
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700346 m = 2;
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800347#ifdef CONFIG_IPV6_ROUTER_PREF
348 else if (neigh->nud_state & NUD_FAILED)
349 m = 0;
350#endif
351 else
YOSHIFUJI Hideakiea73ee22006-11-06 09:45:44 -0800352 m = 1;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800353 read_unlock_bh(&neigh->lock);
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800354 } else
355 m = 0;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800356 return m;
357}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700358
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800359static int rt6_score_route(struct rt6_info *rt, int oif,
360 int strict)
361{
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700362 int m, n;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900363
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700364 m = rt6_check_dev(rt, oif);
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700365 if (!m && (strict & RT6_LOOKUP_F_IFACE))
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800366 return -1;
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -0800367#ifdef CONFIG_IPV6_ROUTER_PREF
368 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
369#endif
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700370 n = rt6_check_neigh(rt);
YOSHIFUJI Hideaki557e92e2006-11-06 09:45:45 -0800371 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800372 return -1;
373 return m;
374}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700375
David S. Millerf11e6652007-03-24 20:36:25 -0700376static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
377 int *mpri, struct rt6_info *match)
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800378{
David S. Millerf11e6652007-03-24 20:36:25 -0700379 int m;
380
381 if (rt6_check_expired(rt))
382 goto out;
383
384 m = rt6_score_route(rt, oif, strict);
385 if (m < 0)
386 goto out;
387
388 if (m > *mpri) {
389 if (strict & RT6_LOOKUP_F_REACHABLE)
390 rt6_probe(match);
391 *mpri = m;
392 match = rt;
393 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
394 rt6_probe(rt);
395 }
396
397out:
398 return match;
399}
400
401static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
402 struct rt6_info *rr_head,
403 u32 metric, int oif, int strict)
404{
405 struct rt6_info *rt, *match;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800406 int mpri = -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700407
David S. Millerf11e6652007-03-24 20:36:25 -0700408 match = NULL;
409 for (rt = rr_head; rt && rt->rt6i_metric == metric;
410 rt = rt->u.dst.rt6_next)
411 match = find_match(rt, oif, strict, &mpri, match);
412 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
413 rt = rt->u.dst.rt6_next)
414 match = find_match(rt, oif, strict, &mpri, match);
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800415
David S. Millerf11e6652007-03-24 20:36:25 -0700416 return match;
417}
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800418
David S. Millerf11e6652007-03-24 20:36:25 -0700419static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
420{
421 struct rt6_info *match, *rt0;
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800422 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700423
David S. Millerf11e6652007-03-24 20:36:25 -0700424 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
Harvey Harrison0dc47872008-03-05 20:47:47 -0800425 __func__, fn->leaf, oif);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700426
David S. Millerf11e6652007-03-24 20:36:25 -0700427 rt0 = fn->rr_ptr;
428 if (!rt0)
429 fn->rr_ptr = rt0 = fn->leaf;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700430
David S. Millerf11e6652007-03-24 20:36:25 -0700431 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700432
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800433 if (!match &&
David S. Millerf11e6652007-03-24 20:36:25 -0700434 (strict & RT6_LOOKUP_F_REACHABLE)) {
435 struct rt6_info *next = rt0->u.dst.rt6_next;
436
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800437 /* no entries matched; do round-robin */
David S. Millerf11e6652007-03-24 20:36:25 -0700438 if (!next || next->rt6i_metric != rt0->rt6i_metric)
439 next = fn->leaf;
440
441 if (next != rt0)
442 fn->rr_ptr = next;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700443 }
444
David S. Millerf11e6652007-03-24 20:36:25 -0700445 RT6_TRACE("%s() => %p\n",
Harvey Harrison0dc47872008-03-05 20:47:47 -0800446 __func__, match);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700447
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900448 net = dev_net(rt0->rt6i_dev);
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800449 return (match ? match : net->ipv6.ip6_null_entry);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700450}
451
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800452#ifdef CONFIG_IPV6_ROUTE_INFO
453int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
454 struct in6_addr *gwaddr)
455{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900456 struct net *net = dev_net(dev);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800457 struct route_info *rinfo = (struct route_info *) opt;
458 struct in6_addr prefix_buf, *prefix;
459 unsigned int pref;
YOSHIFUJI Hideaki4bed72e2008-05-27 17:37:49 +0900460 unsigned long lifetime;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800461 struct rt6_info *rt;
462
463 if (len < sizeof(struct route_info)) {
464 return -EINVAL;
465 }
466
467 /* Sanity check for prefix_len and length */
468 if (rinfo->length > 3) {
469 return -EINVAL;
470 } else if (rinfo->prefix_len > 128) {
471 return -EINVAL;
472 } else if (rinfo->prefix_len > 64) {
473 if (rinfo->length < 2) {
474 return -EINVAL;
475 }
476 } else if (rinfo->prefix_len > 0) {
477 if (rinfo->length < 1) {
478 return -EINVAL;
479 }
480 }
481
482 pref = rinfo->route_pref;
483 if (pref == ICMPV6_ROUTER_PREF_INVALID)
Jens Rosenboom3933fc92009-09-10 06:25:11 +0000484 return -EINVAL;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800485
YOSHIFUJI Hideaki4bed72e2008-05-27 17:37:49 +0900486 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800487
488 if (rinfo->length == 3)
489 prefix = (struct in6_addr *)rinfo->prefix;
490 else {
491 /* this function is safe */
492 ipv6_addr_prefix(&prefix_buf,
493 (struct in6_addr *)rinfo->prefix,
494 rinfo->prefix_len);
495 prefix = &prefix_buf;
496 }
497
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -0800498 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
499 dev->ifindex);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800500
501 if (rt && !lifetime) {
Thomas Grafe0a1ad732006-08-22 00:00:21 -0700502 ip6_del_rt(rt);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800503 rt = NULL;
504 }
505
506 if (!rt && lifetime)
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -0800507 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800508 pref);
509 else if (rt)
510 rt->rt6i_flags = RTF_ROUTEINFO |
511 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
512
513 if (rt) {
YOSHIFUJI Hideaki4bed72e2008-05-27 17:37:49 +0900514 if (!addrconf_finite_timeout(lifetime)) {
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800515 rt->rt6i_flags &= ~RTF_EXPIRES;
516 } else {
517 rt->rt6i_expires = jiffies + HZ * lifetime;
518 rt->rt6i_flags |= RTF_EXPIRES;
519 }
520 dst_release(&rt->u.dst);
521 }
522 return 0;
523}
524#endif
525
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800526#define BACKTRACK(__net, saddr) \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700527do { \
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800528 if (rt == __net->ipv6.ip6_null_entry) { \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700529 struct fib6_node *pn; \
Ville Nuorvalae0eda7b2006-10-16 22:11:11 -0700530 while (1) { \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700531 if (fn->fn_flags & RTN_TL_ROOT) \
532 goto out; \
533 pn = fn->parent; \
534 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
Kim Nordlund8bce65b2006-12-13 16:38:29 -0800535 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700536 else \
537 fn = pn; \
538 if (fn->fn_flags & RTN_RTINFO) \
539 goto restart; \
Thomas Grafc71099a2006-08-04 23:20:06 -0700540 } \
Thomas Grafc71099a2006-08-04 23:20:06 -0700541 } \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700542} while(0)
Thomas Grafc71099a2006-08-04 23:20:06 -0700543
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800544static struct rt6_info *ip6_pol_route_lookup(struct net *net,
545 struct fib6_table *table,
Thomas Grafc71099a2006-08-04 23:20:06 -0700546 struct flowi *fl, int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700547{
548 struct fib6_node *fn;
549 struct rt6_info *rt;
550
Thomas Grafc71099a2006-08-04 23:20:06 -0700551 read_lock_bh(&table->tb6_lock);
552 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
553restart:
554 rt = fn->leaf;
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900555 rt = rt6_device_match(net, rt, &fl->fl6_src, fl->oif, flags);
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800556 BACKTRACK(net, &fl->fl6_src);
Thomas Grafc71099a2006-08-04 23:20:06 -0700557out:
Pavel Emelyanov03f49f32007-11-10 21:28:34 -0800558 dst_use(&rt->u.dst, jiffies);
Thomas Grafc71099a2006-08-04 23:20:06 -0700559 read_unlock_bh(&table->tb6_lock);
Thomas Grafc71099a2006-08-04 23:20:06 -0700560 return rt;
561
562}
563
YOSHIFUJI Hideaki9acd9f32008-04-10 15:42:10 +0900564struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
565 const struct in6_addr *saddr, int oif, int strict)
Thomas Grafc71099a2006-08-04 23:20:06 -0700566{
567 struct flowi fl = {
568 .oif = oif,
569 .nl_u = {
570 .ip6_u = {
571 .daddr = *daddr,
Thomas Grafc71099a2006-08-04 23:20:06 -0700572 },
573 },
574 };
575 struct dst_entry *dst;
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700576 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
Thomas Grafc71099a2006-08-04 23:20:06 -0700577
Thomas Grafadaa70b2006-10-13 15:01:03 -0700578 if (saddr) {
579 memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
580 flags |= RT6_LOOKUP_F_HAS_SADDR;
581 }
582
Daniel Lezcano606a2b42008-03-04 13:45:59 -0800583 dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_lookup);
Thomas Grafc71099a2006-08-04 23:20:06 -0700584 if (dst->error == 0)
585 return (struct rt6_info *) dst;
586
587 dst_release(dst);
588
Linus Torvalds1da177e2005-04-16 15:20:36 -0700589 return NULL;
590}
591
YOSHIFUJI Hideaki71590392007-02-22 22:05:40 +0900592EXPORT_SYMBOL(rt6_lookup);
593
Thomas Grafc71099a2006-08-04 23:20:06 -0700594/* ip6_ins_rt is called with FREE table->tb6_lock.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700595 It takes new route entry, the addition fails by any reason the
596 route is freed. In any case, if caller does not hold it, it may
597 be destroyed.
598 */
599
Thomas Graf86872cb2006-08-22 00:01:08 -0700600static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700601{
602 int err;
Thomas Grafc71099a2006-08-04 23:20:06 -0700603 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700604
Thomas Grafc71099a2006-08-04 23:20:06 -0700605 table = rt->rt6i_table;
606 write_lock_bh(&table->tb6_lock);
Thomas Graf86872cb2006-08-22 00:01:08 -0700607 err = fib6_add(&table->tb6_root, rt, info);
Thomas Grafc71099a2006-08-04 23:20:06 -0700608 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700609
610 return err;
611}
612
Thomas Graf40e22e82006-08-22 00:00:45 -0700613int ip6_ins_rt(struct rt6_info *rt)
614{
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800615 struct nl_info info = {
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900616 .nl_net = dev_net(rt->rt6i_dev),
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800617 };
Denis V. Lunev528c4ce2007-12-13 09:45:12 -0800618 return __ip6_ins_rt(rt, &info);
Thomas Graf40e22e82006-08-22 00:00:45 -0700619}
620
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800621static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
622 struct in6_addr *saddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700623{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700624 struct rt6_info *rt;
625
626 /*
627 * Clone the route.
628 */
629
630 rt = ip6_rt_copy(ort);
631
632 if (rt) {
David S. Miller14deae42009-01-04 16:04:39 -0800633 struct neighbour *neigh;
634 int attempts = !in_softirq();
635
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900636 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
637 if (rt->rt6i_dst.plen != 128 &&
638 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
639 rt->rt6i_flags |= RTF_ANYCAST;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700640 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900641 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700642
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900643 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700644 rt->rt6i_dst.plen = 128;
645 rt->rt6i_flags |= RTF_CACHE;
646 rt->u.dst.flags |= DST_HOST;
647
648#ifdef CONFIG_IPV6_SUBTREES
649 if (rt->rt6i_src.plen && saddr) {
650 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
651 rt->rt6i_src.plen = 128;
652 }
653#endif
654
David S. Miller14deae42009-01-04 16:04:39 -0800655 retry:
656 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
657 if (IS_ERR(neigh)) {
658 struct net *net = dev_net(rt->rt6i_dev);
659 int saved_rt_min_interval =
660 net->ipv6.sysctl.ip6_rt_gc_min_interval;
661 int saved_rt_elasticity =
662 net->ipv6.sysctl.ip6_rt_gc_elasticity;
663
664 if (attempts-- > 0) {
665 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
666 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
667
Alexey Dobriyan86393e52009-08-29 01:34:49 +0000668 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
David S. Miller14deae42009-01-04 16:04:39 -0800669
670 net->ipv6.sysctl.ip6_rt_gc_elasticity =
671 saved_rt_elasticity;
672 net->ipv6.sysctl.ip6_rt_gc_min_interval =
673 saved_rt_min_interval;
674 goto retry;
675 }
676
677 if (net_ratelimit())
678 printk(KERN_WARNING
679 "Neighbour table overflow.\n");
680 dst_free(&rt->u.dst);
681 return NULL;
682 }
683 rt->rt6i_nexthop = neigh;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700684
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800685 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700686
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800687 return rt;
688}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700689
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800690static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
691{
692 struct rt6_info *rt = ip6_rt_copy(ort);
693 if (rt) {
694 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
695 rt->rt6i_dst.plen = 128;
696 rt->rt6i_flags |= RTF_CACHE;
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800697 rt->u.dst.flags |= DST_HOST;
698 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
699 }
700 return rt;
701}
702
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800703static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
704 struct flowi *fl, int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700705{
706 struct fib6_node *fn;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800707 struct rt6_info *rt, *nrt;
Thomas Grafc71099a2006-08-04 23:20:06 -0700708 int strict = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700709 int attempts = 3;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800710 int err;
YOSHIFUJI Hideaki53b79972008-07-19 22:35:03 -0700711 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700712
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700713 strict |= flags & RT6_LOOKUP_F_IFACE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700714
715relookup:
Thomas Grafc71099a2006-08-04 23:20:06 -0700716 read_lock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700717
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800718restart_2:
Thomas Grafc71099a2006-08-04 23:20:06 -0700719 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700720
721restart:
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700722 rt = rt6_select(fn, oif, strict | reachable);
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800723
724 BACKTRACK(net, &fl->fl6_src);
725 if (rt == net->ipv6.ip6_null_entry ||
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800726 rt->rt6i_flags & RTF_CACHE)
YOSHIFUJI Hideaki1ddef0442006-03-20 17:01:24 -0800727 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700728
YOSHIFUJI Hideakifb9de912006-03-20 16:59:08 -0800729 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700730 read_unlock_bh(&table->tb6_lock);
YOSHIFUJI Hideakifb9de912006-03-20 16:59:08 -0800731
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800732 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
YOSHIFUJI Hideakie40cf352006-03-20 16:59:27 -0800733 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800734 else {
735#if CLONE_OFFLINK_ROUTE
736 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
737#else
738 goto out2;
739#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700740 }
YOSHIFUJI Hideakie40cf352006-03-20 16:59:27 -0800741
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800742 dst_release(&rt->u.dst);
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800743 rt = nrt ? : net->ipv6.ip6_null_entry;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800744
745 dst_hold(&rt->u.dst);
746 if (nrt) {
Thomas Graf40e22e82006-08-22 00:00:45 -0700747 err = ip6_ins_rt(nrt);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800748 if (!err)
749 goto out2;
750 }
751
752 if (--attempts <= 0)
753 goto out2;
754
755 /*
Thomas Grafc71099a2006-08-04 23:20:06 -0700756 * Race condition! In the gap, when table->tb6_lock was
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800757 * released someone could insert this route. Relookup.
758 */
759 dst_release(&rt->u.dst);
760 goto relookup;
761
762out:
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800763 if (reachable) {
764 reachable = 0;
765 goto restart_2;
766 }
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800767 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700768 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700769out2:
770 rt->u.dst.lastuse = jiffies;
771 rt->u.dst.__use++;
Thomas Grafc71099a2006-08-04 23:20:06 -0700772
773 return rt;
774}
775
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800776static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700777 struct flowi *fl, int flags)
778{
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800779 return ip6_pol_route(net, table, fl->iif, fl, flags);
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700780}
781
Thomas Grafc71099a2006-08-04 23:20:06 -0700782void ip6_route_input(struct sk_buff *skb)
783{
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -0700784 struct ipv6hdr *iph = ipv6_hdr(skb);
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900785 struct net *net = dev_net(skb->dev);
Thomas Grafadaa70b2006-10-13 15:01:03 -0700786 int flags = RT6_LOOKUP_F_HAS_SADDR;
Thomas Grafc71099a2006-08-04 23:20:06 -0700787 struct flowi fl = {
788 .iif = skb->dev->ifindex,
789 .nl_u = {
790 .ip6_u = {
791 .daddr = iph->daddr,
792 .saddr = iph->saddr,
Al Viro90bcaf72006-11-08 00:25:17 -0800793 .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
Thomas Grafc71099a2006-08-04 23:20:06 -0700794 },
795 },
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900796 .mark = skb->mark,
Thomas Grafc71099a2006-08-04 23:20:06 -0700797 .proto = iph->nexthdr,
798 };
Thomas Grafadaa70b2006-10-13 15:01:03 -0700799
Thomas Goff1d6e55f2009-01-27 22:39:59 -0800800 if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
Thomas Grafadaa70b2006-10-13 15:01:03 -0700801 flags |= RT6_LOOKUP_F_IFACE;
Thomas Grafc71099a2006-08-04 23:20:06 -0700802
Eric Dumazetadf30902009-06-02 05:19:30 +0000803 skb_dst_set(skb, fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input));
Thomas Grafc71099a2006-08-04 23:20:06 -0700804}
805
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800806static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
Thomas Grafc71099a2006-08-04 23:20:06 -0700807 struct flowi *fl, int flags)
808{
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800809 return ip6_pol_route(net, table, fl->oif, fl, flags);
Thomas Grafc71099a2006-08-04 23:20:06 -0700810}
811
Daniel Lezcano4591db42008-03-05 10:48:10 -0800812struct dst_entry * ip6_route_output(struct net *net, struct sock *sk,
813 struct flowi *fl)
Thomas Grafc71099a2006-08-04 23:20:06 -0700814{
815 int flags = 0;
816
Brian Haley6057fd72010-05-28 23:02:35 -0700817 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl->fl6_dst))
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700818 flags |= RT6_LOOKUP_F_IFACE;
Thomas Grafc71099a2006-08-04 23:20:06 -0700819
Thomas Grafadaa70b2006-10-13 15:01:03 -0700820 if (!ipv6_addr_any(&fl->fl6_src))
821 flags |= RT6_LOOKUP_F_HAS_SADDR;
YOSHIFUJI Hideaki / 吉藤英明0c9a2ac2010-03-07 00:14:44 +0000822 else if (sk)
823 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
Thomas Grafadaa70b2006-10-13 15:01:03 -0700824
Daniel Lezcano4591db42008-03-05 10:48:10 -0800825 return fib6_rule_lookup(net, fl, flags, ip6_pol_route_output);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700826}
827
YOSHIFUJI Hideaki71590392007-02-22 22:05:40 +0900828EXPORT_SYMBOL(ip6_route_output);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700829
David S. Miller14e50e52007-05-24 18:17:54 -0700830int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
831{
832 struct rt6_info *ort = (struct rt6_info *) *dstp;
833 struct rt6_info *rt = (struct rt6_info *)
834 dst_alloc(&ip6_dst_blackhole_ops);
835 struct dst_entry *new = NULL;
836
837 if (rt) {
838 new = &rt->u.dst;
839
840 atomic_set(&new->__refcnt, 1);
841 new->__use = 1;
Herbert Xu352e5122007-11-13 21:34:06 -0800842 new->input = dst_discard;
843 new->output = dst_discard;
David S. Miller14e50e52007-05-24 18:17:54 -0700844
845 memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
846 new->dev = ort->u.dst.dev;
847 if (new->dev)
848 dev_hold(new->dev);
849 rt->rt6i_idev = ort->rt6i_idev;
850 if (rt->rt6i_idev)
851 in6_dev_hold(rt->rt6i_idev);
852 rt->rt6i_expires = 0;
853
854 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
855 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
856 rt->rt6i_metric = 0;
857
858 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
859#ifdef CONFIG_IPV6_SUBTREES
860 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
861#endif
862
863 dst_free(new);
864 }
865
866 dst_release(*dstp);
867 *dstp = new;
868 return (new ? 0 : -ENOMEM);
869}
870EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
871
Linus Torvalds1da177e2005-04-16 15:20:36 -0700872/*
873 * Destination cache support functions
874 */
875
876static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
877{
878 struct rt6_info *rt;
879
880 rt = (struct rt6_info *) dst;
881
Herbert Xu10414442010-03-18 23:00:22 +0000882 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700883 return dst;
884
885 return NULL;
886}
887
888static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
889{
890 struct rt6_info *rt = (struct rt6_info *) dst;
891
892 if (rt) {
YOSHIFUJI Hideaki / 吉藤英明54c1a852010-03-28 07:15:45 +0000893 if (rt->rt6i_flags & RTF_CACHE) {
894 if (rt6_check_expired(rt)) {
895 ip6_del_rt(rt);
896 dst = NULL;
897 }
898 } else {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700899 dst_release(dst);
YOSHIFUJI Hideaki / 吉藤英明54c1a852010-03-28 07:15:45 +0000900 dst = NULL;
901 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700902 }
YOSHIFUJI Hideaki / 吉藤英明54c1a852010-03-28 07:15:45 +0000903 return dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700904}
905
906static void ip6_link_failure(struct sk_buff *skb)
907{
908 struct rt6_info *rt;
909
Alexey Dobriyan3ffe5332010-02-18 08:25:24 +0000910 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700911
Eric Dumazetadf30902009-06-02 05:19:30 +0000912 rt = (struct rt6_info *) skb_dst(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700913 if (rt) {
914 if (rt->rt6i_flags&RTF_CACHE) {
915 dst_set_expires(&rt->u.dst, 0);
916 rt->rt6i_flags |= RTF_EXPIRES;
917 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
918 rt->rt6i_node->fn_sernum = -1;
919 }
920}
921
922static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
923{
924 struct rt6_info *rt6 = (struct rt6_info*)dst;
925
926 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
927 rt6->rt6i_flags |= RTF_MODIFIED;
928 if (mtu < IPV6_MIN_MTU) {
929 mtu = IPV6_MIN_MTU;
930 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
931 }
932 dst->metrics[RTAX_MTU-1] = mtu;
Tom Tucker8d717402006-07-30 20:43:36 -0700933 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700934 }
935}
936
Linus Torvalds1da177e2005-04-16 15:20:36 -0700937static int ipv6_get_mtu(struct net_device *dev);
938
Daniel Lezcano55786892008-03-04 13:47:47 -0800939static inline unsigned int ipv6_advmss(struct net *net, unsigned int mtu)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700940{
941 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
942
Daniel Lezcano55786892008-03-04 13:47:47 -0800943 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
944 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700945
946 /*
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900947 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
948 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
949 * IPV6_MAXPLEN is also valid and means: "any MSS,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700950 * rely only on pmtu discovery"
951 */
952 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
953 mtu = IPV6_MAXPLEN;
954 return mtu;
955}
956
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -0800957static struct dst_entry *icmp6_dst_gc_list;
958static DEFINE_SPINLOCK(icmp6_dst_lock);
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700959
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -0800960struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700961 struct neighbour *neigh,
YOSHIFUJI Hideaki9acd9f32008-04-10 15:42:10 +0900962 const struct in6_addr *addr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700963{
964 struct rt6_info *rt;
965 struct inet6_dev *idev = in6_dev_get(dev);
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900966 struct net *net = dev_net(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700967
968 if (unlikely(idev == NULL))
969 return NULL;
970
Alexey Dobriyan86393e52009-08-29 01:34:49 +0000971 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700972 if (unlikely(rt == NULL)) {
973 in6_dev_put(idev);
974 goto out;
975 }
976
977 dev_hold(dev);
978 if (neigh)
979 neigh_hold(neigh);
David S. Miller14deae42009-01-04 16:04:39 -0800980 else {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700981 neigh = ndisc_get_neigh(dev, addr);
David S. Miller14deae42009-01-04 16:04:39 -0800982 if (IS_ERR(neigh))
983 neigh = NULL;
984 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700985
986 rt->rt6i_dev = dev;
987 rt->rt6i_idev = idev;
988 rt->rt6i_nexthop = neigh;
989 atomic_set(&rt->u.dst.__refcnt, 1);
990 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
991 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
Daniel Lezcano55786892008-03-04 13:47:47 -0800992 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -0800993 rt->u.dst.output = ip6_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700994
995#if 0 /* there's no chance to use these for ndisc */
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900996 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
997 ? DST_HOST
Linus Torvalds1da177e2005-04-16 15:20:36 -0700998 : 0;
999 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1000 rt->rt6i_dst.plen = 128;
1001#endif
1002
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001003 spin_lock_bh(&icmp6_dst_lock);
1004 rt->u.dst.next = icmp6_dst_gc_list;
1005 icmp6_dst_gc_list = &rt->u.dst;
1006 spin_unlock_bh(&icmp6_dst_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001007
Daniel Lezcano55786892008-03-04 13:47:47 -08001008 fib6_force_start_gc(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001009
1010out:
YOSHIFUJI Hideaki40aa7b92006-10-19 13:50:09 +09001011 return &rt->u.dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001012}
1013
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -07001014int icmp6_dst_gc(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001015{
1016 struct dst_entry *dst, *next, **pprev;
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -07001017 int more = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001018
1019 next = NULL;
Thomas Graf5d0bbee2006-08-04 03:37:36 -07001020
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001021 spin_lock_bh(&icmp6_dst_lock);
1022 pprev = &icmp6_dst_gc_list;
Thomas Graf5d0bbee2006-08-04 03:37:36 -07001023
Linus Torvalds1da177e2005-04-16 15:20:36 -07001024 while ((dst = *pprev) != NULL) {
1025 if (!atomic_read(&dst->__refcnt)) {
1026 *pprev = dst->next;
1027 dst_free(dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001028 } else {
1029 pprev = &dst->next;
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -07001030 ++more;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001031 }
1032 }
1033
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001034 spin_unlock_bh(&icmp6_dst_lock);
Thomas Graf5d0bbee2006-08-04 03:37:36 -07001035
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -07001036 return more;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001037}
1038
David S. Miller1e493d12008-09-10 17:27:15 -07001039static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1040 void *arg)
1041{
1042 struct dst_entry *dst, **pprev;
1043
1044 spin_lock_bh(&icmp6_dst_lock);
1045 pprev = &icmp6_dst_gc_list;
1046 while ((dst = *pprev) != NULL) {
1047 struct rt6_info *rt = (struct rt6_info *) dst;
1048 if (func(rt, arg)) {
1049 *pprev = dst->next;
1050 dst_free(dst);
1051 } else {
1052 pprev = &dst->next;
1053 }
1054 }
1055 spin_unlock_bh(&icmp6_dst_lock);
1056}
1057
Daniel Lezcano569d3642008-01-18 03:56:57 -08001058static int ip6_dst_gc(struct dst_ops *ops)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001059{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001060 unsigned long now = jiffies;
Alexey Dobriyan86393e52009-08-29 01:34:49 +00001061 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
Daniel Lezcano7019b782008-03-04 13:50:14 -08001062 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1063 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1064 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1065 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1066 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001067
Daniel Lezcano7019b782008-03-04 13:50:14 -08001068 if (time_after(rt_last_gc + rt_min_interval, now) &&
1069 atomic_read(&ops->entries) <= rt_max_size)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001070 goto out;
1071
Benjamin Thery6891a342008-03-04 13:49:47 -08001072 net->ipv6.ip6_rt_gc_expire++;
1073 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1074 net->ipv6.ip6_rt_last_gc = now;
Daniel Lezcano7019b782008-03-04 13:50:14 -08001075 if (atomic_read(&ops->entries) < ops->gc_thresh)
1076 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001077out:
Daniel Lezcano7019b782008-03-04 13:50:14 -08001078 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1079 return (atomic_read(&ops->entries) > rt_max_size);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001080}
1081
1082/* Clean host part of a prefix. Not necessary in radix tree,
1083 but results in cleaner routing tables.
1084
1085 Remove it only when all the things will work!
1086 */
1087
1088static int ipv6_get_mtu(struct net_device *dev)
1089{
1090 int mtu = IPV6_MIN_MTU;
1091 struct inet6_dev *idev;
1092
1093 idev = in6_dev_get(dev);
1094 if (idev) {
1095 mtu = idev->cnf.mtu6;
1096 in6_dev_put(idev);
1097 }
1098 return mtu;
1099}
1100
YOSHIFUJI Hideaki6b75d092008-03-10 06:00:30 -04001101int ip6_dst_hoplimit(struct dst_entry *dst)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001102{
YOSHIFUJI Hideaki6b75d092008-03-10 06:00:30 -04001103 int hoplimit = dst_metric(dst, RTAX_HOPLIMIT);
1104 if (hoplimit < 0) {
1105 struct net_device *dev = dst->dev;
1106 struct inet6_dev *idev = in6_dev_get(dev);
1107 if (idev) {
1108 hoplimit = idev->cnf.hop_limit;
1109 in6_dev_put(idev);
1110 } else
YOSHIFUJI Hideaki53b79972008-07-19 22:35:03 -07001111 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001112 }
1113 return hoplimit;
1114}
1115
1116/*
1117 *
1118 */
1119
Thomas Graf86872cb2006-08-22 00:01:08 -07001120int ip6_route_add(struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001121{
1122 int err;
Daniel Lezcano55786892008-03-04 13:47:47 -08001123 struct net *net = cfg->fc_nlinfo.nl_net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001124 struct rt6_info *rt = NULL;
1125 struct net_device *dev = NULL;
1126 struct inet6_dev *idev = NULL;
Thomas Grafc71099a2006-08-04 23:20:06 -07001127 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001128 int addr_type;
1129
Thomas Graf86872cb2006-08-22 00:01:08 -07001130 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001131 return -EINVAL;
1132#ifndef CONFIG_IPV6_SUBTREES
Thomas Graf86872cb2006-08-22 00:01:08 -07001133 if (cfg->fc_src_len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001134 return -EINVAL;
1135#endif
Thomas Graf86872cb2006-08-22 00:01:08 -07001136 if (cfg->fc_ifindex) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001137 err = -ENODEV;
Daniel Lezcano55786892008-03-04 13:47:47 -08001138 dev = dev_get_by_index(net, cfg->fc_ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001139 if (!dev)
1140 goto out;
1141 idev = in6_dev_get(dev);
1142 if (!idev)
1143 goto out;
1144 }
1145
Thomas Graf86872cb2006-08-22 00:01:08 -07001146 if (cfg->fc_metric == 0)
1147 cfg->fc_metric = IP6_RT_PRIO_USER;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001148
Daniel Lezcano55786892008-03-04 13:47:47 -08001149 table = fib6_new_table(net, cfg->fc_table);
Thomas Grafc71099a2006-08-04 23:20:06 -07001150 if (table == NULL) {
1151 err = -ENOBUFS;
1152 goto out;
1153 }
1154
Alexey Dobriyan86393e52009-08-29 01:34:49 +00001155 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001156
1157 if (rt == NULL) {
1158 err = -ENOMEM;
1159 goto out;
1160 }
1161
1162 rt->u.dst.obsolete = -1;
YOSHIFUJI Hideaki6f704992008-05-19 16:56:11 -07001163 rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
1164 jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1165 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001166
Thomas Graf86872cb2006-08-22 00:01:08 -07001167 if (cfg->fc_protocol == RTPROT_UNSPEC)
1168 cfg->fc_protocol = RTPROT_BOOT;
1169 rt->rt6i_protocol = cfg->fc_protocol;
1170
1171 addr_type = ipv6_addr_type(&cfg->fc_dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001172
1173 if (addr_type & IPV6_ADDR_MULTICAST)
1174 rt->u.dst.input = ip6_mc_input;
1175 else
1176 rt->u.dst.input = ip6_forward;
1177
1178 rt->u.dst.output = ip6_output;
1179
Thomas Graf86872cb2006-08-22 00:01:08 -07001180 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1181 rt->rt6i_dst.plen = cfg->fc_dst_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001182 if (rt->rt6i_dst.plen == 128)
1183 rt->u.dst.flags = DST_HOST;
1184
1185#ifdef CONFIG_IPV6_SUBTREES
Thomas Graf86872cb2006-08-22 00:01:08 -07001186 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1187 rt->rt6i_src.plen = cfg->fc_src_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001188#endif
1189
Thomas Graf86872cb2006-08-22 00:01:08 -07001190 rt->rt6i_metric = cfg->fc_metric;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001191
1192 /* We cannot add true routes via loopback here,
1193 they would result in kernel looping; promote them to reject routes
1194 */
Thomas Graf86872cb2006-08-22 00:01:08 -07001195 if ((cfg->fc_flags & RTF_REJECT) ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07001196 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
1197 /* hold loopback dev/idev if we haven't done so. */
Daniel Lezcano55786892008-03-04 13:47:47 -08001198 if (dev != net->loopback_dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001199 if (dev) {
1200 dev_put(dev);
1201 in6_dev_put(idev);
1202 }
Daniel Lezcano55786892008-03-04 13:47:47 -08001203 dev = net->loopback_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001204 dev_hold(dev);
1205 idev = in6_dev_get(dev);
1206 if (!idev) {
1207 err = -ENODEV;
1208 goto out;
1209 }
1210 }
1211 rt->u.dst.output = ip6_pkt_discard_out;
1212 rt->u.dst.input = ip6_pkt_discard;
1213 rt->u.dst.error = -ENETUNREACH;
1214 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1215 goto install_route;
1216 }
1217
Thomas Graf86872cb2006-08-22 00:01:08 -07001218 if (cfg->fc_flags & RTF_GATEWAY) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001219 struct in6_addr *gw_addr;
1220 int gwa_type;
1221
Thomas Graf86872cb2006-08-22 00:01:08 -07001222 gw_addr = &cfg->fc_gateway;
1223 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001224 gwa_type = ipv6_addr_type(gw_addr);
1225
1226 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1227 struct rt6_info *grt;
1228
1229 /* IPv6 strictly inhibits using not link-local
1230 addresses as nexthop address.
1231 Otherwise, router will not able to send redirects.
1232 It is very good, but in some (rare!) circumstances
1233 (SIT, PtP, NBMA NOARP links) it is handy to allow
1234 some exceptions. --ANK
1235 */
1236 err = -EINVAL;
1237 if (!(gwa_type&IPV6_ADDR_UNICAST))
1238 goto out;
1239
Daniel Lezcano55786892008-03-04 13:47:47 -08001240 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001241
1242 err = -EHOSTUNREACH;
1243 if (grt == NULL)
1244 goto out;
1245 if (dev) {
1246 if (dev != grt->rt6i_dev) {
1247 dst_release(&grt->u.dst);
1248 goto out;
1249 }
1250 } else {
1251 dev = grt->rt6i_dev;
1252 idev = grt->rt6i_idev;
1253 dev_hold(dev);
1254 in6_dev_hold(grt->rt6i_idev);
1255 }
1256 if (!(grt->rt6i_flags&RTF_GATEWAY))
1257 err = 0;
1258 dst_release(&grt->u.dst);
1259
1260 if (err)
1261 goto out;
1262 }
1263 err = -EINVAL;
1264 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1265 goto out;
1266 }
1267
1268 err = -ENODEV;
1269 if (dev == NULL)
1270 goto out;
1271
Thomas Graf86872cb2006-08-22 00:01:08 -07001272 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001273 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1274 if (IS_ERR(rt->rt6i_nexthop)) {
1275 err = PTR_ERR(rt->rt6i_nexthop);
1276 rt->rt6i_nexthop = NULL;
1277 goto out;
1278 }
1279 }
1280
Thomas Graf86872cb2006-08-22 00:01:08 -07001281 rt->rt6i_flags = cfg->fc_flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001282
1283install_route:
Thomas Graf86872cb2006-08-22 00:01:08 -07001284 if (cfg->fc_mx) {
1285 struct nlattr *nla;
1286 int remaining;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001287
Thomas Graf86872cb2006-08-22 00:01:08 -07001288 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
Thomas Graf8f4c1f92007-09-12 14:44:36 +02001289 int type = nla_type(nla);
Thomas Graf86872cb2006-08-22 00:01:08 -07001290
1291 if (type) {
1292 if (type > RTAX_MAX) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001293 err = -EINVAL;
1294 goto out;
1295 }
Thomas Graf86872cb2006-08-22 00:01:08 -07001296
1297 rt->u.dst.metrics[type - 1] = nla_get_u32(nla);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001298 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001299 }
1300 }
1301
Satoru SATOH5ffc02a2008-05-04 22:14:42 -07001302 if (dst_metric(&rt->u.dst, RTAX_HOPLIMIT) == 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001303 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
Rami Rosen1ca615f2008-08-06 02:34:21 -07001304 if (!dst_mtu(&rt->u.dst))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001305 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
Satoru SATOH5ffc02a2008-05-04 22:14:42 -07001306 if (!dst_metric(&rt->u.dst, RTAX_ADVMSS))
Daniel Lezcano55786892008-03-04 13:47:47 -08001307 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001308 rt->u.dst.dev = dev;
1309 rt->rt6i_idev = idev;
Thomas Grafc71099a2006-08-04 23:20:06 -07001310 rt->rt6i_table = table;
Daniel Lezcano63152fc2008-03-03 23:31:11 -08001311
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001312 cfg->fc_nlinfo.nl_net = dev_net(dev);
Daniel Lezcano63152fc2008-03-03 23:31:11 -08001313
Thomas Graf86872cb2006-08-22 00:01:08 -07001314 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001315
1316out:
1317 if (dev)
1318 dev_put(dev);
1319 if (idev)
1320 in6_dev_put(idev);
1321 if (rt)
YOSHIFUJI Hideaki40aa7b92006-10-19 13:50:09 +09001322 dst_free(&rt->u.dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001323 return err;
1324}
1325
Thomas Graf86872cb2006-08-22 00:01:08 -07001326static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001327{
1328 int err;
Thomas Grafc71099a2006-08-04 23:20:06 -07001329 struct fib6_table *table;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001330 struct net *net = dev_net(rt->rt6i_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001331
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001332 if (rt == net->ipv6.ip6_null_entry)
Patrick McHardy6c813a72006-08-06 22:22:47 -07001333 return -ENOENT;
1334
Thomas Grafc71099a2006-08-04 23:20:06 -07001335 table = rt->rt6i_table;
1336 write_lock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001337
Thomas Graf86872cb2006-08-22 00:01:08 -07001338 err = fib6_del(rt, info);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001339 dst_release(&rt->u.dst);
1340
Thomas Grafc71099a2006-08-04 23:20:06 -07001341 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001342
1343 return err;
1344}
1345
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001346int ip6_del_rt(struct rt6_info *rt)
1347{
Denis V. Lunev4d1169c2008-01-10 03:26:13 -08001348 struct nl_info info = {
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001349 .nl_net = dev_net(rt->rt6i_dev),
Denis V. Lunev4d1169c2008-01-10 03:26:13 -08001350 };
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08001351 return __ip6_del_rt(rt, &info);
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001352}
1353
Thomas Graf86872cb2006-08-22 00:01:08 -07001354static int ip6_route_del(struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001355{
Thomas Grafc71099a2006-08-04 23:20:06 -07001356 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001357 struct fib6_node *fn;
1358 struct rt6_info *rt;
1359 int err = -ESRCH;
1360
Daniel Lezcano55786892008-03-04 13:47:47 -08001361 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
Thomas Grafc71099a2006-08-04 23:20:06 -07001362 if (table == NULL)
1363 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001364
Thomas Grafc71099a2006-08-04 23:20:06 -07001365 read_lock_bh(&table->tb6_lock);
1366
1367 fn = fib6_locate(&table->tb6_root,
Thomas Graf86872cb2006-08-22 00:01:08 -07001368 &cfg->fc_dst, cfg->fc_dst_len,
1369 &cfg->fc_src, cfg->fc_src_len);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001370
Linus Torvalds1da177e2005-04-16 15:20:36 -07001371 if (fn) {
Eric Dumazet7cc48262007-02-09 16:22:57 -08001372 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
Thomas Graf86872cb2006-08-22 00:01:08 -07001373 if (cfg->fc_ifindex &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001374 (rt->rt6i_dev == NULL ||
Thomas Graf86872cb2006-08-22 00:01:08 -07001375 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001376 continue;
Thomas Graf86872cb2006-08-22 00:01:08 -07001377 if (cfg->fc_flags & RTF_GATEWAY &&
1378 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001379 continue;
Thomas Graf86872cb2006-08-22 00:01:08 -07001380 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001381 continue;
1382 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001383 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001384
Thomas Graf86872cb2006-08-22 00:01:08 -07001385 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001386 }
1387 }
Thomas Grafc71099a2006-08-04 23:20:06 -07001388 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001389
1390 return err;
1391}
1392
1393/*
1394 * Handle redirects
1395 */
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001396struct ip6rd_flowi {
1397 struct flowi fl;
1398 struct in6_addr gateway;
1399};
Linus Torvalds1da177e2005-04-16 15:20:36 -07001400
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001401static struct rt6_info *__ip6_route_redirect(struct net *net,
1402 struct fib6_table *table,
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001403 struct flowi *fl,
1404 int flags)
1405{
1406 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1407 struct rt6_info *rt;
1408 struct fib6_node *fn;
Thomas Grafc71099a2006-08-04 23:20:06 -07001409
Linus Torvalds1da177e2005-04-16 15:20:36 -07001410 /*
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001411 * Get the "current" route for this destination and
1412 * check if the redirect has come from approriate router.
1413 *
1414 * RFC 2461 specifies that redirects should only be
1415 * accepted if they come from the nexthop to the target.
1416 * Due to the way the routes are chosen, this notion
1417 * is a bit fuzzy and one might need to check all possible
1418 * routes.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001419 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001420
Thomas Grafc71099a2006-08-04 23:20:06 -07001421 read_lock_bh(&table->tb6_lock);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001422 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001423restart:
Eric Dumazet7cc48262007-02-09 16:22:57 -08001424 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001425 /*
1426 * Current route is on-link; redirect is always invalid.
1427 *
1428 * Seems, previous statement is not true. It could
1429 * be node, which looks for us as on-link (f.e. proxy ndisc)
1430 * But then router serving it might decide, that we should
1431 * know truth 8)8) --ANK (980726).
1432 */
1433 if (rt6_check_expired(rt))
1434 continue;
1435 if (!(rt->rt6i_flags & RTF_GATEWAY))
1436 continue;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001437 if (fl->oif != rt->rt6i_dev->ifindex)
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001438 continue;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001439 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001440 continue;
1441 break;
1442 }
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001443
YOSHIFUJI Hideakicb15d9c2006-08-23 17:23:11 -07001444 if (!rt)
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001445 rt = net->ipv6.ip6_null_entry;
1446 BACKTRACK(net, &fl->fl6_src);
YOSHIFUJI Hideakicb15d9c2006-08-23 17:23:11 -07001447out:
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001448 dst_hold(&rt->u.dst);
1449
1450 read_unlock_bh(&table->tb6_lock);
1451
1452 return rt;
1453};
1454
1455static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1456 struct in6_addr *src,
1457 struct in6_addr *gateway,
1458 struct net_device *dev)
1459{
Thomas Grafadaa70b2006-10-13 15:01:03 -07001460 int flags = RT6_LOOKUP_F_HAS_SADDR;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001461 struct net *net = dev_net(dev);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001462 struct ip6rd_flowi rdfl = {
1463 .fl = {
1464 .oif = dev->ifindex,
1465 .nl_u = {
1466 .ip6_u = {
1467 .daddr = *dest,
1468 .saddr = *src,
1469 },
1470 },
1471 },
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001472 };
Thomas Grafadaa70b2006-10-13 15:01:03 -07001473
Brian Haley86c36ce2009-10-07 13:58:01 -07001474 ipv6_addr_copy(&rdfl.gateway, gateway);
1475
Thomas Grafadaa70b2006-10-13 15:01:03 -07001476 if (rt6_need_strict(dest))
1477 flags |= RT6_LOOKUP_F_IFACE;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001478
Daniel Lezcano55786892008-03-04 13:47:47 -08001479 return (struct rt6_info *)fib6_rule_lookup(net, (struct flowi *)&rdfl,
Daniel Lezcano58f09b72008-03-03 23:25:27 -08001480 flags, __ip6_route_redirect);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001481}
1482
1483void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1484 struct in6_addr *saddr,
1485 struct neighbour *neigh, u8 *lladdr, int on_link)
1486{
1487 struct rt6_info *rt, *nrt = NULL;
1488 struct netevent_redirect netevent;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001489 struct net *net = dev_net(neigh->dev);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001490
1491 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1492
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001493 if (rt == net->ipv6.ip6_null_entry) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001494 if (net_ratelimit())
1495 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1496 "for redirect target\n");
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001497 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001498 }
1499
Linus Torvalds1da177e2005-04-16 15:20:36 -07001500 /*
1501 * We have finally decided to accept it.
1502 */
1503
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001504 neigh_update(neigh, lladdr, NUD_STALE,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001505 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1506 NEIGH_UPDATE_F_OVERRIDE|
1507 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1508 NEIGH_UPDATE_F_ISROUTER))
1509 );
1510
1511 /*
1512 * Redirect received -> path was valid.
1513 * Look, redirects are sent only in response to data packets,
1514 * so that this nexthop apparently is reachable. --ANK
1515 */
1516 dst_confirm(&rt->u.dst);
1517
1518 /* Duplicate redirect: silently ignore. */
1519 if (neigh == rt->u.dst.neighbour)
1520 goto out;
1521
1522 nrt = ip6_rt_copy(rt);
1523 if (nrt == NULL)
1524 goto out;
1525
1526 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1527 if (on_link)
1528 nrt->rt6i_flags &= ~RTF_GATEWAY;
1529
1530 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1531 nrt->rt6i_dst.plen = 128;
1532 nrt->u.dst.flags |= DST_HOST;
1533
1534 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1535 nrt->rt6i_nexthop = neigh_clone(neigh);
1536 /* Reset pmtu, it may be better */
1537 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001538 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dev_net(neigh->dev),
Daniel Lezcano55786892008-03-04 13:47:47 -08001539 dst_mtu(&nrt->u.dst));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001540
Thomas Graf40e22e82006-08-22 00:00:45 -07001541 if (ip6_ins_rt(nrt))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001542 goto out;
1543
Tom Tucker8d717402006-07-30 20:43:36 -07001544 netevent.old = &rt->u.dst;
1545 netevent.new = &nrt->u.dst;
1546 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1547
Linus Torvalds1da177e2005-04-16 15:20:36 -07001548 if (rt->rt6i_flags&RTF_CACHE) {
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001549 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001550 return;
1551 }
1552
1553out:
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001554 dst_release(&rt->u.dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001555}
1556
1557/*
1558 * Handle ICMP "packet too big" messages
1559 * i.e. Path MTU discovery
1560 */
1561
1562void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1563 struct net_device *dev, u32 pmtu)
1564{
1565 struct rt6_info *rt, *nrt;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001566 struct net *net = dev_net(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001567 int allfrag = 0;
1568
Daniel Lezcano55786892008-03-04 13:47:47 -08001569 rt = rt6_lookup(net, daddr, saddr, dev->ifindex, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001570 if (rt == NULL)
1571 return;
1572
1573 if (pmtu >= dst_mtu(&rt->u.dst))
1574 goto out;
1575
1576 if (pmtu < IPV6_MIN_MTU) {
1577 /*
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001578 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
Linus Torvalds1da177e2005-04-16 15:20:36 -07001579 * MTU (1280) and a fragment header should always be included
1580 * after a node receiving Too Big message reporting PMTU is
1581 * less than the IPv6 Minimum Link MTU.
1582 */
1583 pmtu = IPV6_MIN_MTU;
1584 allfrag = 1;
1585 }
1586
1587 /* New mtu received -> path was valid.
1588 They are sent only in response to data packets,
1589 so that this nexthop apparently is reachable. --ANK
1590 */
1591 dst_confirm(&rt->u.dst);
1592
1593 /* Host route. If it is static, it would be better
1594 not to override it, but add new one, so that
1595 when cache entry will expire old pmtu
1596 would return automatically.
1597 */
1598 if (rt->rt6i_flags & RTF_CACHE) {
1599 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1600 if (allfrag)
1601 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
Daniel Lezcano55786892008-03-04 13:47:47 -08001602 dst_set_expires(&rt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001603 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1604 goto out;
1605 }
1606
1607 /* Network route.
1608 Two cases are possible:
1609 1. It is connected route. Action: COW
1610 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1611 */
YOSHIFUJI Hideakid5315b52006-03-20 16:58:48 -08001612 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001613 nrt = rt6_alloc_cow(rt, daddr, saddr);
YOSHIFUJI Hideakid5315b52006-03-20 16:58:48 -08001614 else
1615 nrt = rt6_alloc_clone(rt, daddr);
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001616
YOSHIFUJI Hideakid5315b52006-03-20 16:58:48 -08001617 if (nrt) {
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001618 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1619 if (allfrag)
1620 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1621
1622 /* According to RFC 1981, detecting PMTU increase shouldn't be
1623 * happened within 5 mins, the recommended timer is 10 mins.
1624 * Here this route expiration time is set to ip6_rt_mtu_expires
1625 * which is 10 mins. After 10 mins the decreased pmtu is expired
1626 * and detecting PMTU increase will be automatically happened.
1627 */
Daniel Lezcano55786892008-03-04 13:47:47 -08001628 dst_set_expires(&nrt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001629 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1630
Thomas Graf40e22e82006-08-22 00:00:45 -07001631 ip6_ins_rt(nrt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001632 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001633out:
1634 dst_release(&rt->u.dst);
1635}
1636
1637/*
1638 * Misc support functions
1639 */
1640
1641static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1642{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001643 struct net *net = dev_net(ort->rt6i_dev);
Alexey Dobriyan86393e52009-08-29 01:34:49 +00001644 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001645
1646 if (rt) {
1647 rt->u.dst.input = ort->u.dst.input;
1648 rt->u.dst.output = ort->u.dst.output;
1649
1650 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
Ville Nuorvala22e1e4d2006-10-16 22:14:26 -07001651 rt->u.dst.error = ort->u.dst.error;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001652 rt->u.dst.dev = ort->u.dst.dev;
1653 if (rt->u.dst.dev)
1654 dev_hold(rt->u.dst.dev);
1655 rt->rt6i_idev = ort->rt6i_idev;
1656 if (rt->rt6i_idev)
1657 in6_dev_hold(rt->rt6i_idev);
1658 rt->u.dst.lastuse = jiffies;
1659 rt->rt6i_expires = 0;
1660
1661 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1662 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1663 rt->rt6i_metric = 0;
1664
1665 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1666#ifdef CONFIG_IPV6_SUBTREES
1667 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1668#endif
Thomas Grafc71099a2006-08-04 23:20:06 -07001669 rt->rt6i_table = ort->rt6i_table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001670 }
1671 return rt;
1672}
1673
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001674#ifdef CONFIG_IPV6_ROUTE_INFO
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001675static struct rt6_info *rt6_get_route_info(struct net *net,
1676 struct in6_addr *prefix, int prefixlen,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001677 struct in6_addr *gwaddr, int ifindex)
1678{
1679 struct fib6_node *fn;
1680 struct rt6_info *rt = NULL;
Thomas Grafc71099a2006-08-04 23:20:06 -07001681 struct fib6_table *table;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001682
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001683 table = fib6_get_table(net, RT6_TABLE_INFO);
Thomas Grafc71099a2006-08-04 23:20:06 -07001684 if (table == NULL)
1685 return NULL;
1686
1687 write_lock_bh(&table->tb6_lock);
1688 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001689 if (!fn)
1690 goto out;
1691
Eric Dumazet7cc48262007-02-09 16:22:57 -08001692 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001693 if (rt->rt6i_dev->ifindex != ifindex)
1694 continue;
1695 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1696 continue;
1697 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1698 continue;
1699 dst_hold(&rt->u.dst);
1700 break;
1701 }
1702out:
Thomas Grafc71099a2006-08-04 23:20:06 -07001703 write_unlock_bh(&table->tb6_lock);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001704 return rt;
1705}
1706
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001707static struct rt6_info *rt6_add_route_info(struct net *net,
1708 struct in6_addr *prefix, int prefixlen,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001709 struct in6_addr *gwaddr, int ifindex,
1710 unsigned pref)
1711{
Thomas Graf86872cb2006-08-22 00:01:08 -07001712 struct fib6_config cfg = {
1713 .fc_table = RT6_TABLE_INFO,
Rami Rosen238fc7e2008-02-09 23:43:11 -08001714 .fc_metric = IP6_RT_PRIO_USER,
Thomas Graf86872cb2006-08-22 00:01:08 -07001715 .fc_ifindex = ifindex,
1716 .fc_dst_len = prefixlen,
1717 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1718 RTF_UP | RTF_PREF(pref),
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001719 .fc_nlinfo.pid = 0,
1720 .fc_nlinfo.nlh = NULL,
1721 .fc_nlinfo.nl_net = net,
Thomas Graf86872cb2006-08-22 00:01:08 -07001722 };
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001723
Thomas Graf86872cb2006-08-22 00:01:08 -07001724 ipv6_addr_copy(&cfg.fc_dst, prefix);
1725 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1726
YOSHIFUJI Hideakie317da92006-03-20 17:06:42 -08001727 /* We should treat it as a default route if prefix length is 0. */
1728 if (!prefixlen)
Thomas Graf86872cb2006-08-22 00:01:08 -07001729 cfg.fc_flags |= RTF_DEFAULT;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001730
Thomas Graf86872cb2006-08-22 00:01:08 -07001731 ip6_route_add(&cfg);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001732
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001733 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001734}
1735#endif
1736
Linus Torvalds1da177e2005-04-16 15:20:36 -07001737struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001738{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001739 struct rt6_info *rt;
Thomas Grafc71099a2006-08-04 23:20:06 -07001740 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001741
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001742 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
Thomas Grafc71099a2006-08-04 23:20:06 -07001743 if (table == NULL)
1744 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001745
Thomas Grafc71099a2006-08-04 23:20:06 -07001746 write_lock_bh(&table->tb6_lock);
Eric Dumazet7cc48262007-02-09 16:22:57 -08001747 for (rt = table->tb6_root.leaf; rt; rt=rt->u.dst.rt6_next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001748 if (dev == rt->rt6i_dev &&
YOSHIFUJI Hideaki045927f2006-03-20 17:00:48 -08001749 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001750 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1751 break;
1752 }
1753 if (rt)
1754 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001755 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001756 return rt;
1757}
1758
1759struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -08001760 struct net_device *dev,
1761 unsigned int pref)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001762{
Thomas Graf86872cb2006-08-22 00:01:08 -07001763 struct fib6_config cfg = {
1764 .fc_table = RT6_TABLE_DFLT,
Rami Rosen238fc7e2008-02-09 23:43:11 -08001765 .fc_metric = IP6_RT_PRIO_USER,
Thomas Graf86872cb2006-08-22 00:01:08 -07001766 .fc_ifindex = dev->ifindex,
1767 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1768 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
Daniel Lezcano55786892008-03-04 13:47:47 -08001769 .fc_nlinfo.pid = 0,
1770 .fc_nlinfo.nlh = NULL,
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001771 .fc_nlinfo.nl_net = dev_net(dev),
Thomas Graf86872cb2006-08-22 00:01:08 -07001772 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07001773
Thomas Graf86872cb2006-08-22 00:01:08 -07001774 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001775
Thomas Graf86872cb2006-08-22 00:01:08 -07001776 ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001777
Linus Torvalds1da177e2005-04-16 15:20:36 -07001778 return rt6_get_dflt_router(gwaddr, dev);
1779}
1780
Daniel Lezcano7b4da532008-03-04 13:47:14 -08001781void rt6_purge_dflt_routers(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001782{
1783 struct rt6_info *rt;
Thomas Grafc71099a2006-08-04 23:20:06 -07001784 struct fib6_table *table;
1785
1786 /* NOTE: Keep consistent with rt6_get_dflt_router */
Daniel Lezcano7b4da532008-03-04 13:47:14 -08001787 table = fib6_get_table(net, RT6_TABLE_DFLT);
Thomas Grafc71099a2006-08-04 23:20:06 -07001788 if (table == NULL)
1789 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001790
1791restart:
Thomas Grafc71099a2006-08-04 23:20:06 -07001792 read_lock_bh(&table->tb6_lock);
Eric Dumazet7cc48262007-02-09 16:22:57 -08001793 for (rt = table->tb6_root.leaf; rt; rt = rt->u.dst.rt6_next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001794 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1795 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001796 read_unlock_bh(&table->tb6_lock);
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001797 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001798 goto restart;
1799 }
1800 }
Thomas Grafc71099a2006-08-04 23:20:06 -07001801 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001802}
1803
Daniel Lezcano55786892008-03-04 13:47:47 -08001804static void rtmsg_to_fib6_config(struct net *net,
1805 struct in6_rtmsg *rtmsg,
Thomas Graf86872cb2006-08-22 00:01:08 -07001806 struct fib6_config *cfg)
1807{
1808 memset(cfg, 0, sizeof(*cfg));
1809
1810 cfg->fc_table = RT6_TABLE_MAIN;
1811 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1812 cfg->fc_metric = rtmsg->rtmsg_metric;
1813 cfg->fc_expires = rtmsg->rtmsg_info;
1814 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1815 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1816 cfg->fc_flags = rtmsg->rtmsg_flags;
1817
Daniel Lezcano55786892008-03-04 13:47:47 -08001818 cfg->fc_nlinfo.nl_net = net;
Benjamin Theryf1243c22008-02-26 18:10:03 -08001819
Thomas Graf86872cb2006-08-22 00:01:08 -07001820 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1821 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1822 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1823}
1824
Daniel Lezcano55786892008-03-04 13:47:47 -08001825int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001826{
Thomas Graf86872cb2006-08-22 00:01:08 -07001827 struct fib6_config cfg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001828 struct in6_rtmsg rtmsg;
1829 int err;
1830
1831 switch(cmd) {
1832 case SIOCADDRT: /* Add a route */
1833 case SIOCDELRT: /* Delete a route */
1834 if (!capable(CAP_NET_ADMIN))
1835 return -EPERM;
1836 err = copy_from_user(&rtmsg, arg,
1837 sizeof(struct in6_rtmsg));
1838 if (err)
1839 return -EFAULT;
Thomas Graf86872cb2006-08-22 00:01:08 -07001840
Daniel Lezcano55786892008-03-04 13:47:47 -08001841 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
Thomas Graf86872cb2006-08-22 00:01:08 -07001842
Linus Torvalds1da177e2005-04-16 15:20:36 -07001843 rtnl_lock();
1844 switch (cmd) {
1845 case SIOCADDRT:
Thomas Graf86872cb2006-08-22 00:01:08 -07001846 err = ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001847 break;
1848 case SIOCDELRT:
Thomas Graf86872cb2006-08-22 00:01:08 -07001849 err = ip6_route_del(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001850 break;
1851 default:
1852 err = -EINVAL;
1853 }
1854 rtnl_unlock();
1855
1856 return err;
Stephen Hemminger3ff50b72007-04-20 17:09:22 -07001857 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001858
1859 return -EINVAL;
1860}
1861
1862/*
1863 * Drop the packet on the floor
1864 */
1865
Brian Haleyd5fdd6b2009-06-23 04:31:07 -07001866static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001867{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001868 int type;
Eric Dumazetadf30902009-06-02 05:19:30 +00001869 struct dst_entry *dst = skb_dst(skb);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001870 switch (ipstats_mib_noroutes) {
1871 case IPSTATS_MIB_INNOROUTES:
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -07001872 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
Ulrich Weber45bb0062010-02-25 23:28:58 +00001873 if (type == IPV6_ADDR_ANY) {
Denis V. Lunev3bd653c2008-10-08 10:54:51 -07001874 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1875 IPSTATS_MIB_INADDRERRORS);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001876 break;
1877 }
1878 /* FALLTHROUGH */
1879 case IPSTATS_MIB_OUTNOROUTES:
Denis V. Lunev3bd653c2008-10-08 10:54:51 -07001880 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1881 ipstats_mib_noroutes);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001882 break;
1883 }
Alexey Dobriyan3ffe5332010-02-18 08:25:24 +00001884 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001885 kfree_skb(skb);
1886 return 0;
1887}
1888
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001889static int ip6_pkt_discard(struct sk_buff *skb)
1890{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001891 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001892}
1893
Arnaldo Carvalho de Melo20380732005-08-16 02:18:02 -03001894static int ip6_pkt_discard_out(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001895{
Eric Dumazetadf30902009-06-02 05:19:30 +00001896 skb->dev = skb_dst(skb)->dev;
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001897 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001898}
1899
David S. Miller6723ab52006-10-18 21:20:57 -07001900#ifdef CONFIG_IPV6_MULTIPLE_TABLES
1901
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001902static int ip6_pkt_prohibit(struct sk_buff *skb)
1903{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001904 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001905}
1906
1907static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1908{
Eric Dumazetadf30902009-06-02 05:19:30 +00001909 skb->dev = skb_dst(skb)->dev;
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001910 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001911}
1912
David S. Miller6723ab52006-10-18 21:20:57 -07001913#endif
1914
Linus Torvalds1da177e2005-04-16 15:20:36 -07001915/*
1916 * Allocate a dst for local (unicast / anycast) address.
1917 */
1918
1919struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1920 const struct in6_addr *addr,
1921 int anycast)
1922{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001923 struct net *net = dev_net(idev->dev);
Alexey Dobriyan86393e52009-08-29 01:34:49 +00001924 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
David S. Miller14deae42009-01-04 16:04:39 -08001925 struct neighbour *neigh;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001926
1927 if (rt == NULL)
1928 return ERR_PTR(-ENOMEM);
1929
Daniel Lezcano55786892008-03-04 13:47:47 -08001930 dev_hold(net->loopback_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001931 in6_dev_hold(idev);
1932
1933 rt->u.dst.flags = DST_HOST;
1934 rt->u.dst.input = ip6_input;
1935 rt->u.dst.output = ip6_output;
Daniel Lezcano55786892008-03-04 13:47:47 -08001936 rt->rt6i_dev = net->loopback_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001937 rt->rt6i_idev = idev;
1938 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
Daniel Lezcano55786892008-03-04 13:47:47 -08001939 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001940 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1941 rt->u.dst.obsolete = -1;
1942
1943 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +09001944 if (anycast)
1945 rt->rt6i_flags |= RTF_ANYCAST;
1946 else
Linus Torvalds1da177e2005-04-16 15:20:36 -07001947 rt->rt6i_flags |= RTF_LOCAL;
David S. Miller14deae42009-01-04 16:04:39 -08001948 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1949 if (IS_ERR(neigh)) {
YOSHIFUJI Hideaki40aa7b92006-10-19 13:50:09 +09001950 dst_free(&rt->u.dst);
David S. Miller14deae42009-01-04 16:04:39 -08001951
1952 /* We are casting this because that is the return
1953 * value type. But an errno encoded pointer is the
1954 * same regardless of the underlying pointer type,
1955 * and that's what we are returning. So this is OK.
1956 */
1957 return (struct rt6_info *) neigh;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001958 }
David S. Miller14deae42009-01-04 16:04:39 -08001959 rt->rt6i_nexthop = neigh;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001960
1961 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1962 rt->rt6i_dst.plen = 128;
Daniel Lezcano55786892008-03-04 13:47:47 -08001963 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001964
1965 atomic_set(&rt->u.dst.__refcnt, 1);
1966
1967 return rt;
1968}
1969
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001970struct arg_dev_net {
1971 struct net_device *dev;
1972 struct net *net;
1973};
1974
Linus Torvalds1da177e2005-04-16 15:20:36 -07001975static int fib6_ifdown(struct rt6_info *rt, void *arg)
1976{
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001977 struct net_device *dev = ((struct arg_dev_net *)arg)->dev;
1978 struct net *net = ((struct arg_dev_net *)arg)->net;
1979
1980 if (((void *)rt->rt6i_dev == dev || dev == NULL) &&
1981 rt != net->ipv6.ip6_null_entry) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001982 RT6_TRACE("deleted by ifdown %p\n", rt);
1983 return -1;
1984 }
1985 return 0;
1986}
1987
Daniel Lezcanof3db4852008-03-03 23:27:06 -08001988void rt6_ifdown(struct net *net, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001989{
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001990 struct arg_dev_net adn = {
1991 .dev = dev,
1992 .net = net,
1993 };
1994
1995 fib6_clean_all(net, fib6_ifdown, 0, &adn);
David S. Miller1e493d12008-09-10 17:27:15 -07001996 icmp6_clean_all(fib6_ifdown, &adn);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001997}
1998
1999struct rt6_mtu_change_arg
2000{
2001 struct net_device *dev;
2002 unsigned mtu;
2003};
2004
2005static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2006{
2007 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2008 struct inet6_dev *idev;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09002009 struct net *net = dev_net(arg->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002010
2011 /* In IPv6 pmtu discovery is not optional,
2012 so that RTAX_MTU lock cannot disable it.
2013 We still use this lock to block changes
2014 caused by addrconf/ndisc.
2015 */
2016
2017 idev = __in6_dev_get(arg->dev);
2018 if (idev == NULL)
2019 return 0;
2020
2021 /* For administrative MTU increase, there is no way to discover
2022 IPv6 PMTU increase, so PMTU increase should be updated here.
2023 Since RFC 1981 doesn't include administrative MTU increase
2024 update PMTU increase is a MUST. (i.e. jumbo frame)
2025 */
2026 /*
2027 If new MTU is less than route PMTU, this new MTU will be the
2028 lowest MTU in the path, update the route PMTU to reflect PMTU
2029 decreases; if new MTU is greater than route PMTU, and the
2030 old MTU is the lowest MTU in the path, update the route PMTU
2031 to reflect the increase. In this case if the other nodes' MTU
2032 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2033 PMTU discouvery.
2034 */
2035 if (rt->rt6i_dev == arg->dev &&
2036 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
Jim Paris23717792008-01-31 16:36:25 -08002037 (dst_mtu(&rt->u.dst) >= arg->mtu ||
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002038 (dst_mtu(&rt->u.dst) < arg->mtu &&
Simon Arlott566cfd82007-07-26 00:09:55 -07002039 dst_mtu(&rt->u.dst) == idev->cnf.mtu6))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002040 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
Daniel Lezcano55786892008-03-04 13:47:47 -08002041 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, arg->mtu);
Simon Arlott566cfd82007-07-26 00:09:55 -07002042 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002043 return 0;
2044}
2045
2046void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2047{
Thomas Grafc71099a2006-08-04 23:20:06 -07002048 struct rt6_mtu_change_arg arg = {
2049 .dev = dev,
2050 .mtu = mtu,
2051 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07002052
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09002053 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002054}
2055
Patrick McHardyef7c79e2007-06-05 12:38:30 -07002056static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
Thomas Graf5176f912006-08-26 20:13:18 -07002057 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
Thomas Graf86872cb2006-08-22 00:01:08 -07002058 [RTA_OIF] = { .type = NLA_U32 },
Thomas Grafab364a62006-08-22 00:01:47 -07002059 [RTA_IIF] = { .type = NLA_U32 },
Thomas Graf86872cb2006-08-22 00:01:08 -07002060 [RTA_PRIORITY] = { .type = NLA_U32 },
2061 [RTA_METRICS] = { .type = NLA_NESTED },
2062};
2063
2064static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2065 struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002066{
Thomas Graf86872cb2006-08-22 00:01:08 -07002067 struct rtmsg *rtm;
2068 struct nlattr *tb[RTA_MAX+1];
2069 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002070
Thomas Graf86872cb2006-08-22 00:01:08 -07002071 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2072 if (err < 0)
2073 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002074
Thomas Graf86872cb2006-08-22 00:01:08 -07002075 err = -EINVAL;
2076 rtm = nlmsg_data(nlh);
2077 memset(cfg, 0, sizeof(*cfg));
2078
2079 cfg->fc_table = rtm->rtm_table;
2080 cfg->fc_dst_len = rtm->rtm_dst_len;
2081 cfg->fc_src_len = rtm->rtm_src_len;
2082 cfg->fc_flags = RTF_UP;
2083 cfg->fc_protocol = rtm->rtm_protocol;
2084
2085 if (rtm->rtm_type == RTN_UNREACHABLE)
2086 cfg->fc_flags |= RTF_REJECT;
2087
2088 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2089 cfg->fc_nlinfo.nlh = nlh;
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09002090 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
Thomas Graf86872cb2006-08-22 00:01:08 -07002091
2092 if (tb[RTA_GATEWAY]) {
2093 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2094 cfg->fc_flags |= RTF_GATEWAY;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002095 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002096
2097 if (tb[RTA_DST]) {
2098 int plen = (rtm->rtm_dst_len + 7) >> 3;
2099
2100 if (nla_len(tb[RTA_DST]) < plen)
2101 goto errout;
2102
2103 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002104 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002105
2106 if (tb[RTA_SRC]) {
2107 int plen = (rtm->rtm_src_len + 7) >> 3;
2108
2109 if (nla_len(tb[RTA_SRC]) < plen)
2110 goto errout;
2111
2112 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002113 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002114
2115 if (tb[RTA_OIF])
2116 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2117
2118 if (tb[RTA_PRIORITY])
2119 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2120
2121 if (tb[RTA_METRICS]) {
2122 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2123 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002124 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002125
2126 if (tb[RTA_TABLE])
2127 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2128
2129 err = 0;
2130errout:
2131 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002132}
2133
Thomas Grafc127ea22007-03-22 11:58:32 -07002134static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002135{
Thomas Graf86872cb2006-08-22 00:01:08 -07002136 struct fib6_config cfg;
2137 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002138
Thomas Graf86872cb2006-08-22 00:01:08 -07002139 err = rtm_to_fib6_config(skb, nlh, &cfg);
2140 if (err < 0)
2141 return err;
2142
2143 return ip6_route_del(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002144}
2145
Thomas Grafc127ea22007-03-22 11:58:32 -07002146static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002147{
Thomas Graf86872cb2006-08-22 00:01:08 -07002148 struct fib6_config cfg;
2149 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002150
Thomas Graf86872cb2006-08-22 00:01:08 -07002151 err = rtm_to_fib6_config(skb, nlh, &cfg);
2152 if (err < 0)
2153 return err;
2154
2155 return ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002156}
2157
Thomas Graf339bf982006-11-10 14:10:15 -08002158static inline size_t rt6_nlmsg_size(void)
2159{
2160 return NLMSG_ALIGN(sizeof(struct rtmsg))
2161 + nla_total_size(16) /* RTA_SRC */
2162 + nla_total_size(16) /* RTA_DST */
2163 + nla_total_size(16) /* RTA_GATEWAY */
2164 + nla_total_size(16) /* RTA_PREFSRC */
2165 + nla_total_size(4) /* RTA_TABLE */
2166 + nla_total_size(4) /* RTA_IIF */
2167 + nla_total_size(4) /* RTA_OIF */
2168 + nla_total_size(4) /* RTA_PRIORITY */
Noriaki TAKAMIYA6a2b9ce2007-01-23 22:09:41 -08002169 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
Thomas Graf339bf982006-11-10 14:10:15 -08002170 + nla_total_size(sizeof(struct rta_cacheinfo));
2171}
2172
Brian Haley191cd582008-08-14 15:33:21 -07002173static int rt6_fill_node(struct net *net,
2174 struct sk_buff *skb, struct rt6_info *rt,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07002175 struct in6_addr *dst, struct in6_addr *src,
2176 int iif, int type, u32 pid, u32 seq,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002177 int prefix, int nowait, unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002178{
2179 struct rtmsg *rtm;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002180 struct nlmsghdr *nlh;
Thomas Grafe3703b32006-11-27 09:27:07 -08002181 long expires;
Patrick McHardy9e762a42006-08-10 23:09:48 -07002182 u32 table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002183
2184 if (prefix) { /* user wants prefix routes only */
2185 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2186 /* success since this is not a prefix route */
2187 return 1;
2188 }
2189 }
2190
Thomas Graf2d7202b2006-08-22 00:01:27 -07002191 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2192 if (nlh == NULL)
Patrick McHardy26932562007-01-31 23:16:40 -08002193 return -EMSGSIZE;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002194
2195 rtm = nlmsg_data(nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002196 rtm->rtm_family = AF_INET6;
2197 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2198 rtm->rtm_src_len = rt->rt6i_src.plen;
2199 rtm->rtm_tos = 0;
Thomas Grafc71099a2006-08-04 23:20:06 -07002200 if (rt->rt6i_table)
Patrick McHardy9e762a42006-08-10 23:09:48 -07002201 table = rt->rt6i_table->tb6_id;
Thomas Grafc71099a2006-08-04 23:20:06 -07002202 else
Patrick McHardy9e762a42006-08-10 23:09:48 -07002203 table = RT6_TABLE_UNSPEC;
2204 rtm->rtm_table = table;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002205 NLA_PUT_U32(skb, RTA_TABLE, table);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002206 if (rt->rt6i_flags&RTF_REJECT)
2207 rtm->rtm_type = RTN_UNREACHABLE;
2208 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2209 rtm->rtm_type = RTN_LOCAL;
2210 else
2211 rtm->rtm_type = RTN_UNICAST;
2212 rtm->rtm_flags = 0;
2213 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2214 rtm->rtm_protocol = rt->rt6i_protocol;
2215 if (rt->rt6i_flags&RTF_DYNAMIC)
2216 rtm->rtm_protocol = RTPROT_REDIRECT;
2217 else if (rt->rt6i_flags & RTF_ADDRCONF)
2218 rtm->rtm_protocol = RTPROT_KERNEL;
2219 else if (rt->rt6i_flags&RTF_DEFAULT)
2220 rtm->rtm_protocol = RTPROT_RA;
2221
2222 if (rt->rt6i_flags&RTF_CACHE)
2223 rtm->rtm_flags |= RTM_F_CLONED;
2224
2225 if (dst) {
Thomas Graf2d7202b2006-08-22 00:01:27 -07002226 NLA_PUT(skb, RTA_DST, 16, dst);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002227 rtm->rtm_dst_len = 128;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002228 } else if (rtm->rtm_dst_len)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002229 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002230#ifdef CONFIG_IPV6_SUBTREES
2231 if (src) {
Thomas Graf2d7202b2006-08-22 00:01:27 -07002232 NLA_PUT(skb, RTA_SRC, 16, src);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002233 rtm->rtm_src_len = 128;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002234 } else if (rtm->rtm_src_len)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002235 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002236#endif
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002237 if (iif) {
2238#ifdef CONFIG_IPV6_MROUTE
2239 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
Benjamin Thery8229efd2008-12-10 16:30:15 -08002240 int err = ip6mr_get_route(net, skb, rtm, nowait);
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002241 if (err <= 0) {
2242 if (!nowait) {
2243 if (err == 0)
2244 return 0;
2245 goto nla_put_failure;
2246 } else {
2247 if (err == -EMSGSIZE)
2248 goto nla_put_failure;
2249 }
2250 }
2251 } else
2252#endif
2253 NLA_PUT_U32(skb, RTA_IIF, iif);
2254 } else if (dst) {
Brian Haley5e0115e2008-08-13 01:58:57 -07002255 struct inet6_dev *idev = ip6_dst_idev(&rt->u.dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002256 struct in6_addr saddr_buf;
Brian Haley191cd582008-08-14 15:33:21 -07002257 if (ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
YOSHIFUJI Hideaki7cbca672008-03-25 09:37:42 +09002258 dst, 0, &saddr_buf) == 0)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002259 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002260 }
Thomas Graf2d7202b2006-08-22 00:01:27 -07002261
Linus Torvalds1da177e2005-04-16 15:20:36 -07002262 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002263 goto nla_put_failure;
2264
Linus Torvalds1da177e2005-04-16 15:20:36 -07002265 if (rt->u.dst.neighbour)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002266 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
2267
Linus Torvalds1da177e2005-04-16 15:20:36 -07002268 if (rt->u.dst.dev)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002269 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2270
2271 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
Thomas Grafe3703b32006-11-27 09:27:07 -08002272
YOSHIFUJI Hideaki36e3dea2008-05-13 02:52:55 +09002273 if (!(rt->rt6i_flags & RTF_EXPIRES))
2274 expires = 0;
2275 else if (rt->rt6i_expires - jiffies < INT_MAX)
2276 expires = rt->rt6i_expires - jiffies;
2277 else
2278 expires = INT_MAX;
YOSHIFUJI Hideaki69cdf8f2008-05-19 16:55:13 -07002279
Thomas Grafe3703b32006-11-27 09:27:07 -08002280 if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0,
2281 expires, rt->u.dst.error) < 0)
2282 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002283
Thomas Graf2d7202b2006-08-22 00:01:27 -07002284 return nlmsg_end(skb, nlh);
2285
2286nla_put_failure:
Patrick McHardy26932562007-01-31 23:16:40 -08002287 nlmsg_cancel(skb, nlh);
2288 return -EMSGSIZE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002289}
2290
Patrick McHardy1b43af52006-08-10 23:11:17 -07002291int rt6_dump_route(struct rt6_info *rt, void *p_arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002292{
2293 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2294 int prefix;
2295
Thomas Graf2d7202b2006-08-22 00:01:27 -07002296 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2297 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002298 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2299 } else
2300 prefix = 0;
2301
Brian Haley191cd582008-08-14 15:33:21 -07002302 return rt6_fill_node(arg->net,
2303 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002304 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002305 prefix, 0, NLM_F_MULTI);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002306}
2307
Thomas Grafc127ea22007-03-22 11:58:32 -07002308static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002309{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09002310 struct net *net = sock_net(in_skb->sk);
Thomas Grafab364a62006-08-22 00:01:47 -07002311 struct nlattr *tb[RTA_MAX+1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07002312 struct rt6_info *rt;
Thomas Grafab364a62006-08-22 00:01:47 -07002313 struct sk_buff *skb;
2314 struct rtmsg *rtm;
2315 struct flowi fl;
2316 int err, iif = 0;
2317
2318 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2319 if (err < 0)
2320 goto errout;
2321
2322 err = -EINVAL;
2323 memset(&fl, 0, sizeof(fl));
2324
2325 if (tb[RTA_SRC]) {
2326 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2327 goto errout;
2328
2329 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2330 }
2331
2332 if (tb[RTA_DST]) {
2333 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2334 goto errout;
2335
2336 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2337 }
2338
2339 if (tb[RTA_IIF])
2340 iif = nla_get_u32(tb[RTA_IIF]);
2341
2342 if (tb[RTA_OIF])
2343 fl.oif = nla_get_u32(tb[RTA_OIF]);
2344
2345 if (iif) {
2346 struct net_device *dev;
Daniel Lezcano55786892008-03-04 13:47:47 -08002347 dev = __dev_get_by_index(net, iif);
Thomas Grafab364a62006-08-22 00:01:47 -07002348 if (!dev) {
2349 err = -ENODEV;
2350 goto errout;
2351 }
2352 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002353
2354 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
Thomas Grafab364a62006-08-22 00:01:47 -07002355 if (skb == NULL) {
2356 err = -ENOBUFS;
2357 goto errout;
2358 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002359
2360 /* Reserve room for dummy headers, this skb can pass
2361 through good chunk of routing engine.
2362 */
Arnaldo Carvalho de Melo459a98e2007-03-19 15:30:44 -07002363 skb_reset_mac_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002364 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2365
Daniel Lezcano8a3edd82008-03-07 11:14:16 -08002366 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl);
Eric Dumazetadf30902009-06-02 05:19:30 +00002367 skb_dst_set(skb, &rt->u.dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002368
Brian Haley191cd582008-08-14 15:33:21 -07002369 err = rt6_fill_node(net, skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002370 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002371 nlh->nlmsg_seq, 0, 0, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002372 if (err < 0) {
Thomas Grafab364a62006-08-22 00:01:47 -07002373 kfree_skb(skb);
2374 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002375 }
2376
Daniel Lezcano55786892008-03-04 13:47:47 -08002377 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
Thomas Grafab364a62006-08-22 00:01:47 -07002378errout:
Linus Torvalds1da177e2005-04-16 15:20:36 -07002379 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002380}
2381
Thomas Graf86872cb2006-08-22 00:01:08 -07002382void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002383{
2384 struct sk_buff *skb;
Daniel Lezcano55786892008-03-04 13:47:47 -08002385 struct net *net = info->nl_net;
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08002386 u32 seq;
2387 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002388
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08002389 err = -ENOBUFS;
2390 seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
Thomas Graf86872cb2006-08-22 00:01:08 -07002391
Thomas Graf339bf982006-11-10 14:10:15 -08002392 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
Thomas Graf21713eb2006-08-15 00:35:24 -07002393 if (skb == NULL)
2394 goto errout;
2395
Brian Haley191cd582008-08-14 15:33:21 -07002396 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002397 event, info->pid, seq, 0, 0, 0);
Patrick McHardy26932562007-01-31 23:16:40 -08002398 if (err < 0) {
2399 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2400 WARN_ON(err == -EMSGSIZE);
2401 kfree_skb(skb);
2402 goto errout;
2403 }
Pablo Neira Ayuso1ce85fe2009-02-24 23:18:28 -08002404 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2405 info->nlh, gfp_any());
2406 return;
Thomas Graf21713eb2006-08-15 00:35:24 -07002407errout:
2408 if (err < 0)
Daniel Lezcano55786892008-03-04 13:47:47 -08002409 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002410}
2411
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002412static int ip6_route_dev_notify(struct notifier_block *this,
2413 unsigned long event, void *data)
2414{
2415 struct net_device *dev = (struct net_device *)data;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09002416 struct net *net = dev_net(dev);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002417
2418 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2419 net->ipv6.ip6_null_entry->u.dst.dev = dev;
2420 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2421#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2422 net->ipv6.ip6_prohibit_entry->u.dst.dev = dev;
2423 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2424 net->ipv6.ip6_blk_hole_entry->u.dst.dev = dev;
2425 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2426#endif
2427 }
2428
2429 return NOTIFY_OK;
2430}
2431
Linus Torvalds1da177e2005-04-16 15:20:36 -07002432/*
2433 * /proc
2434 */
2435
2436#ifdef CONFIG_PROC_FS
2437
2438#define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2439
2440struct rt6_proc_arg
2441{
2442 char *buffer;
2443 int offset;
2444 int length;
2445 int skip;
2446 int len;
2447};
2448
2449static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2450{
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002451 struct seq_file *m = p_arg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002452
Harvey Harrison4b7a4272008-10-29 12:50:24 -07002453 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002454
2455#ifdef CONFIG_IPV6_SUBTREES
Harvey Harrison4b7a4272008-10-29 12:50:24 -07002456 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002457#else
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002458 seq_puts(m, "00000000000000000000000000000000 00 ");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002459#endif
2460
2461 if (rt->rt6i_nexthop) {
Harvey Harrison4b7a4272008-10-29 12:50:24 -07002462 seq_printf(m, "%pi6", rt->rt6i_nexthop->primary_key);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002463 } else {
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002464 seq_puts(m, "00000000000000000000000000000000");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002465 }
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002466 seq_printf(m, " %08x %08x %08x %08x %8s\n",
2467 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
2468 rt->u.dst.__use, rt->rt6i_flags,
2469 rt->rt6i_dev ? rt->rt6i_dev->name : "");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002470 return 0;
2471}
2472
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002473static int ipv6_route_show(struct seq_file *m, void *v)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002474{
Daniel Lezcanof3db4852008-03-03 23:27:06 -08002475 struct net *net = (struct net *)m->private;
2476 fib6_clean_all(net, rt6_info_route, 0, m);
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002477 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002478}
2479
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002480static int ipv6_route_open(struct inode *inode, struct file *file)
2481{
Pavel Emelyanovde05c552008-07-18 04:07:21 -07002482 return single_open_net(inode, file, ipv6_route_show);
Daniel Lezcanof3db4852008-03-03 23:27:06 -08002483}
2484
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002485static const struct file_operations ipv6_route_proc_fops = {
2486 .owner = THIS_MODULE,
2487 .open = ipv6_route_open,
2488 .read = seq_read,
2489 .llseek = seq_lseek,
Pavel Emelyanovb6fcbdb2008-07-18 04:07:44 -07002490 .release = single_release_net,
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002491};
2492
Linus Torvalds1da177e2005-04-16 15:20:36 -07002493static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2494{
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002495 struct net *net = (struct net *)seq->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002496 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002497 net->ipv6.rt6_stats->fib_nodes,
2498 net->ipv6.rt6_stats->fib_route_nodes,
2499 net->ipv6.rt6_stats->fib_rt_alloc,
2500 net->ipv6.rt6_stats->fib_rt_entries,
2501 net->ipv6.rt6_stats->fib_rt_cache,
Alexey Dobriyan86393e52009-08-29 01:34:49 +00002502 atomic_read(&net->ipv6.ip6_dst_ops.entries),
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002503 net->ipv6.rt6_stats->fib_discarded_routes);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002504
2505 return 0;
2506}
2507
2508static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2509{
Pavel Emelyanovde05c552008-07-18 04:07:21 -07002510 return single_open_net(inode, file, rt6_stats_seq_show);
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002511}
2512
Arjan van de Ven9a321442007-02-12 00:55:35 -08002513static const struct file_operations rt6_stats_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002514 .owner = THIS_MODULE,
2515 .open = rt6_stats_seq_open,
2516 .read = seq_read,
2517 .llseek = seq_lseek,
Pavel Emelyanovb6fcbdb2008-07-18 04:07:44 -07002518 .release = single_release_net,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002519};
2520#endif /* CONFIG_PROC_FS */
2521
2522#ifdef CONFIG_SYSCTL
2523
Linus Torvalds1da177e2005-04-16 15:20:36 -07002524static
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07002525int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002526 void __user *buffer, size_t *lenp, loff_t *ppos)
2527{
Daniel Lezcano5b7c9312008-03-03 23:28:58 -08002528 struct net *net = current->nsproxy->net_ns;
2529 int delay = net->ipv6.sysctl.flush_delay;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002530 if (write) {
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07002531 proc_dointvec(ctl, write, buffer, lenp, ppos);
Daniel Lezcano5b7c9312008-03-03 23:28:58 -08002532 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002533 return 0;
2534 } else
2535 return -EINVAL;
2536}
2537
Daniel Lezcano760f2d02008-01-10 02:53:43 -08002538ctl_table ipv6_route_table_template[] = {
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002539 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002540 .procname = "flush",
Daniel Lezcano49905092008-01-10 03:01:01 -08002541 .data = &init_net.ipv6.sysctl.flush_delay,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002542 .maxlen = sizeof(int),
Dave Jones89c8b3a12005-04-28 12:11:49 -07002543 .mode = 0200,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002544 .proc_handler = ipv6_sysctl_rtcache_flush
Linus Torvalds1da177e2005-04-16 15:20:36 -07002545 },
2546 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002547 .procname = "gc_thresh",
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -08002548 .data = &ip6_dst_ops_template.gc_thresh,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002549 .maxlen = sizeof(int),
2550 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002551 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002552 },
2553 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002554 .procname = "max_size",
Daniel Lezcano49905092008-01-10 03:01:01 -08002555 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002556 .maxlen = sizeof(int),
2557 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002558 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002559 },
2560 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002561 .procname = "gc_min_interval",
Daniel Lezcano49905092008-01-10 03:01:01 -08002562 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002563 .maxlen = sizeof(int),
2564 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002565 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002566 },
2567 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002568 .procname = "gc_timeout",
Daniel Lezcano49905092008-01-10 03:01:01 -08002569 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002570 .maxlen = sizeof(int),
2571 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002572 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002573 },
2574 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002575 .procname = "gc_interval",
Daniel Lezcano49905092008-01-10 03:01:01 -08002576 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002577 .maxlen = sizeof(int),
2578 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002579 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002580 },
2581 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002582 .procname = "gc_elasticity",
Daniel Lezcano49905092008-01-10 03:01:01 -08002583 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002584 .maxlen = sizeof(int),
2585 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002586 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002587 },
2588 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002589 .procname = "mtu_expires",
Daniel Lezcano49905092008-01-10 03:01:01 -08002590 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002591 .maxlen = sizeof(int),
2592 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002593 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002594 },
2595 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002596 .procname = "min_adv_mss",
Daniel Lezcano49905092008-01-10 03:01:01 -08002597 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002598 .maxlen = sizeof(int),
2599 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002600 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002601 },
2602 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002603 .procname = "gc_min_interval_ms",
Daniel Lezcano49905092008-01-10 03:01:01 -08002604 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002605 .maxlen = sizeof(int),
2606 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002607 .proc_handler = proc_dointvec_ms_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002608 },
Eric W. Biedermanf8572d82009-11-05 13:32:03 -08002609 { }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002610};
2611
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00002612struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
Daniel Lezcano760f2d02008-01-10 02:53:43 -08002613{
2614 struct ctl_table *table;
2615
2616 table = kmemdup(ipv6_route_table_template,
2617 sizeof(ipv6_route_table_template),
2618 GFP_KERNEL);
YOSHIFUJI Hideaki5ee09102008-02-28 00:24:28 +09002619
2620 if (table) {
2621 table[0].data = &net->ipv6.sysctl.flush_delay;
Alexey Dobriyan86393e52009-08-29 01:34:49 +00002622 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
YOSHIFUJI Hideaki5ee09102008-02-28 00:24:28 +09002623 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2624 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2625 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2626 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2627 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2628 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2629 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
Alexey Dobriyan9c69fab2009-12-18 20:11:03 -08002630 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
YOSHIFUJI Hideaki5ee09102008-02-28 00:24:28 +09002631 }
2632
Daniel Lezcano760f2d02008-01-10 02:53:43 -08002633 return table;
2634}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002635#endif
2636
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00002637static int __net_init ip6_route_net_init(struct net *net)
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002638{
Pavel Emelyanov633d424b2008-04-21 14:25:23 -07002639 int ret = -ENOMEM;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002640
Alexey Dobriyan86393e52009-08-29 01:34:49 +00002641 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2642 sizeof(net->ipv6.ip6_dst_ops));
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002643
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002644 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2645 sizeof(*net->ipv6.ip6_null_entry),
2646 GFP_KERNEL);
2647 if (!net->ipv6.ip6_null_entry)
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002648 goto out_ip6_dst_ops;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002649 net->ipv6.ip6_null_entry->u.dst.path =
2650 (struct dst_entry *)net->ipv6.ip6_null_entry;
Alexey Dobriyan86393e52009-08-29 01:34:49 +00002651 net->ipv6.ip6_null_entry->u.dst.ops = &net->ipv6.ip6_dst_ops;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002652
2653#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2654 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2655 sizeof(*net->ipv6.ip6_prohibit_entry),
2656 GFP_KERNEL);
Peter Zijlstra68fffc62008-10-07 14:12:10 -07002657 if (!net->ipv6.ip6_prohibit_entry)
2658 goto out_ip6_null_entry;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002659 net->ipv6.ip6_prohibit_entry->u.dst.path =
2660 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
Alexey Dobriyan86393e52009-08-29 01:34:49 +00002661 net->ipv6.ip6_prohibit_entry->u.dst.ops = &net->ipv6.ip6_dst_ops;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002662
2663 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2664 sizeof(*net->ipv6.ip6_blk_hole_entry),
2665 GFP_KERNEL);
Peter Zijlstra68fffc62008-10-07 14:12:10 -07002666 if (!net->ipv6.ip6_blk_hole_entry)
2667 goto out_ip6_prohibit_entry;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002668 net->ipv6.ip6_blk_hole_entry->u.dst.path =
2669 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
Alexey Dobriyan86393e52009-08-29 01:34:49 +00002670 net->ipv6.ip6_blk_hole_entry->u.dst.ops = &net->ipv6.ip6_dst_ops;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002671#endif
2672
Peter Zijlstrab339a47c2008-10-07 14:15:00 -07002673 net->ipv6.sysctl.flush_delay = 0;
2674 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2675 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2676 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2677 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2678 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2679 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2680 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2681
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002682#ifdef CONFIG_PROC_FS
2683 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2684 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2685#endif
Benjamin Thery6891a342008-03-04 13:49:47 -08002686 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2687
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002688 ret = 0;
2689out:
2690 return ret;
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002691
Peter Zijlstra68fffc62008-10-07 14:12:10 -07002692#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2693out_ip6_prohibit_entry:
2694 kfree(net->ipv6.ip6_prohibit_entry);
2695out_ip6_null_entry:
2696 kfree(net->ipv6.ip6_null_entry);
2697#endif
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002698out_ip6_dst_ops:
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002699 goto out;
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002700}
2701
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00002702static void __net_exit ip6_route_net_exit(struct net *net)
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002703{
2704#ifdef CONFIG_PROC_FS
2705 proc_net_remove(net, "ipv6_route");
2706 proc_net_remove(net, "rt6_stats");
2707#endif
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002708 kfree(net->ipv6.ip6_null_entry);
2709#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2710 kfree(net->ipv6.ip6_prohibit_entry);
2711 kfree(net->ipv6.ip6_blk_hole_entry);
2712#endif
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002713}
2714
2715static struct pernet_operations ip6_route_net_ops = {
2716 .init = ip6_route_net_init,
2717 .exit = ip6_route_net_exit,
2718};
2719
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002720static struct notifier_block ip6_route_dev_notifier = {
2721 .notifier_call = ip6_route_dev_notify,
2722 .priority = 0,
2723};
2724
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002725int __init ip6_route_init(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002726{
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002727 int ret;
2728
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -08002729 ret = -ENOMEM;
2730 ip6_dst_ops_template.kmem_cachep =
2731 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2732 SLAB_HWCACHE_ALIGN, NULL);
2733 if (!ip6_dst_ops_template.kmem_cachep)
Fernando Carrijoc19a28e2009-01-07 18:09:08 -08002734 goto out;
David S. Miller14e50e52007-05-24 18:17:54 -07002735
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002736 ret = register_pernet_subsys(&ip6_route_net_ops);
2737 if (ret)
Daniel Lezcanobdb32892008-03-04 13:48:10 -08002738 goto out_kmem_cache;
Daniel Lezcanobdb32892008-03-04 13:48:10 -08002739
Arnaud Ebalard5dc121e2008-10-01 02:37:56 -07002740 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2741
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002742 /* Registering of the loopback is done before this portion of code,
2743 * the loopback reference in rt6_info will not be taken, do it
2744 * manually for init_net */
2745 init_net.ipv6.ip6_null_entry->u.dst.dev = init_net.loopback_dev;
2746 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2747 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2748 init_net.ipv6.ip6_prohibit_entry->u.dst.dev = init_net.loopback_dev;
2749 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2750 init_net.ipv6.ip6_blk_hole_entry->u.dst.dev = init_net.loopback_dev;
2751 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2752 #endif
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002753 ret = fib6_init();
2754 if (ret)
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002755 goto out_register_subsys;
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002756
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002757 ret = xfrm6_init();
2758 if (ret)
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002759 goto out_fib6_init;
Daniel Lezcanoc35b7e72007-12-08 00:14:11 -08002760
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002761 ret = fib6_rules_init();
2762 if (ret)
2763 goto xfrm6_init;
Daniel Lezcano7e5449c2007-12-08 00:14:54 -08002764
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002765 ret = -ENOBUFS;
2766 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) ||
2767 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) ||
2768 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL))
2769 goto fib6_rules_init;
2770
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002771 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002772 if (ret)
2773 goto fib6_rules_init;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002774
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002775out:
2776 return ret;
2777
2778fib6_rules_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002779 fib6_rules_cleanup();
2780xfrm6_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002781 xfrm6_fini();
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002782out_fib6_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002783 fib6_gc_cleanup();
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002784out_register_subsys:
2785 unregister_pernet_subsys(&ip6_route_net_ops);
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002786out_kmem_cache:
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002787 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002788 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002789}
2790
2791void ip6_route_cleanup(void)
2792{
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002793 unregister_netdevice_notifier(&ip6_route_dev_notifier);
Thomas Graf101367c2006-08-04 03:39:02 -07002794 fib6_rules_cleanup();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002795 xfrm6_fini();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002796 fib6_gc_cleanup();
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002797 unregister_pernet_subsys(&ip6_route_net_ops);
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002798 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002799}