blob: 52cd3eff31dcccf7f79abed6d2860ceb73b09780 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09006 * Pedro Roque <roque@di.fc.ul.pt>
Linus Torvalds1da177e2005-04-16 15:20:36 -07007 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07008 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
YOSHIFUJI Hideakic0bece92006-08-23 17:23:25 -070023 * Ville Nuorvala
24 * Fixed routing subtrees.
Linus Torvalds1da177e2005-04-16 15:20:36 -070025 */
26
Randy Dunlap4fc268d2006-01-11 12:17:47 -080027#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070028#include <linux/errno.h>
29#include <linux/types.h>
30#include <linux/times.h>
31#include <linux/socket.h>
32#include <linux/sockios.h>
33#include <linux/net.h>
34#include <linux/route.h>
35#include <linux/netdevice.h>
36#include <linux/in6.h>
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +090037#include <linux/mroute6.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070038#include <linux/init.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070039#include <linux/if_arp.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070040#include <linux/proc_fs.h>
41#include <linux/seq_file.h>
Daniel Lezcano5b7c9312008-03-03 23:28:58 -080042#include <linux/nsproxy.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020043#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070044#include <net/snmp.h>
45#include <net/ipv6.h>
46#include <net/ip6_fib.h>
47#include <net/ip6_route.h>
48#include <net/ndisc.h>
49#include <net/addrconf.h>
50#include <net/tcp.h>
51#include <linux/rtnetlink.h>
52#include <net/dst.h>
53#include <net/xfrm.h>
Tom Tucker8d717402006-07-30 20:43:36 -070054#include <net/netevent.h>
Thomas Graf21713eb2006-08-15 00:35:24 -070055#include <net/netlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070056
57#include <asm/uaccess.h>
58
59#ifdef CONFIG_SYSCTL
60#include <linux/sysctl.h>
61#endif
62
63/* Set to 3 to get tracing. */
64#define RT6_DEBUG 2
65
66#if RT6_DEBUG >= 3
67#define RDBG(x) printk x
68#define RT6_TRACE(x...) printk(KERN_DEBUG x)
69#else
70#define RDBG(x)
71#define RT6_TRACE(x...) do { ; } while (0)
72#endif
73
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -080074#define CLONE_OFFLINK_ROUTE 0
Linus Torvalds1da177e2005-04-16 15:20:36 -070075
Linus Torvalds1da177e2005-04-16 15:20:36 -070076static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
77static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
78static struct dst_entry *ip6_negative_advice(struct dst_entry *);
79static void ip6_dst_destroy(struct dst_entry *);
80static void ip6_dst_ifdown(struct dst_entry *,
81 struct net_device *dev, int how);
Daniel Lezcano569d3642008-01-18 03:56:57 -080082static int ip6_dst_gc(struct dst_ops *ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -070083
84static int ip6_pkt_discard(struct sk_buff *skb);
85static int ip6_pkt_discard_out(struct sk_buff *skb);
86static void ip6_link_failure(struct sk_buff *skb);
87static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
88
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -080089#ifdef CONFIG_IPV6_ROUTE_INFO
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -080090static struct rt6_info *rt6_add_route_info(struct net *net,
91 struct in6_addr *prefix, int prefixlen,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -080092 struct in6_addr *gwaddr, int ifindex,
93 unsigned pref);
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -080094static struct rt6_info *rt6_get_route_info(struct net *net,
95 struct in6_addr *prefix, int prefixlen,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -080096 struct in6_addr *gwaddr, int ifindex);
97#endif
98
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -080099static struct dst_ops ip6_dst_ops_template = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700100 .family = AF_INET6,
Harvey Harrison09640e62009-02-01 00:45:17 -0800101 .protocol = cpu_to_be16(ETH_P_IPV6),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700102 .gc = ip6_dst_gc,
103 .gc_thresh = 1024,
104 .check = ip6_dst_check,
105 .destroy = ip6_dst_destroy,
106 .ifdown = ip6_dst_ifdown,
107 .negative_advice = ip6_negative_advice,
108 .link_failure = ip6_link_failure,
109 .update_pmtu = ip6_rt_update_pmtu,
Herbert Xu1ac06e02008-05-20 14:32:14 -0700110 .local_out = __ip6_local_out,
Eric Dumazete2422972008-01-30 20:07:45 -0800111 .entries = ATOMIC_INIT(0),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700112};
113
David S. Miller14e50e52007-05-24 18:17:54 -0700114static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
115{
116}
117
118static struct dst_ops ip6_dst_blackhole_ops = {
119 .family = AF_INET6,
Harvey Harrison09640e62009-02-01 00:45:17 -0800120 .protocol = cpu_to_be16(ETH_P_IPV6),
David S. Miller14e50e52007-05-24 18:17:54 -0700121 .destroy = ip6_dst_destroy,
122 .check = ip6_dst_check,
123 .update_pmtu = ip6_rt_blackhole_update_pmtu,
Eric Dumazete2422972008-01-30 20:07:45 -0800124 .entries = ATOMIC_INIT(0),
David S. Miller14e50e52007-05-24 18:17:54 -0700125};
126
Daniel Lezcanobdb32892008-03-04 13:48:10 -0800127static struct rt6_info ip6_null_entry_template = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700128 .u = {
129 .dst = {
130 .__refcnt = ATOMIC_INIT(1),
131 .__use = 1,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700132 .obsolete = -1,
133 .error = -ENETUNREACH,
134 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
135 .input = ip6_pkt_discard,
136 .output = ip6_pkt_discard_out,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700137 }
138 },
139 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
Jean-Mickael Guerin4f724272009-05-20 17:38:59 -0700140 .rt6i_protocol = RTPROT_KERNEL,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700141 .rt6i_metric = ~(u32) 0,
142 .rt6i_ref = ATOMIC_INIT(1),
143};
144
Thomas Graf101367c2006-08-04 03:39:02 -0700145#ifdef CONFIG_IPV6_MULTIPLE_TABLES
146
David S. Miller6723ab52006-10-18 21:20:57 -0700147static int ip6_pkt_prohibit(struct sk_buff *skb);
148static int ip6_pkt_prohibit_out(struct sk_buff *skb);
David S. Miller6723ab52006-10-18 21:20:57 -0700149
Adrian Bunk280a34c2008-04-21 02:29:32 -0700150static struct rt6_info ip6_prohibit_entry_template = {
Thomas Graf101367c2006-08-04 03:39:02 -0700151 .u = {
152 .dst = {
153 .__refcnt = ATOMIC_INIT(1),
154 .__use = 1,
Thomas Graf101367c2006-08-04 03:39:02 -0700155 .obsolete = -1,
156 .error = -EACCES,
157 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
Thomas Graf9ce8ade2006-10-18 20:46:54 -0700158 .input = ip6_pkt_prohibit,
159 .output = ip6_pkt_prohibit_out,
Thomas Graf101367c2006-08-04 03:39:02 -0700160 }
161 },
162 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
Jean-Mickael Guerin4f724272009-05-20 17:38:59 -0700163 .rt6i_protocol = RTPROT_KERNEL,
Thomas Graf101367c2006-08-04 03:39:02 -0700164 .rt6i_metric = ~(u32) 0,
165 .rt6i_ref = ATOMIC_INIT(1),
166};
167
Daniel Lezcanobdb32892008-03-04 13:48:10 -0800168static struct rt6_info ip6_blk_hole_entry_template = {
Thomas Graf101367c2006-08-04 03:39:02 -0700169 .u = {
170 .dst = {
171 .__refcnt = ATOMIC_INIT(1),
172 .__use = 1,
Thomas Graf101367c2006-08-04 03:39:02 -0700173 .obsolete = -1,
174 .error = -EINVAL,
175 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
Herbert Xu352e5122007-11-13 21:34:06 -0800176 .input = dst_discard,
177 .output = dst_discard,
Thomas Graf101367c2006-08-04 03:39:02 -0700178 }
179 },
180 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
Jean-Mickael Guerin4f724272009-05-20 17:38:59 -0700181 .rt6i_protocol = RTPROT_KERNEL,
Thomas Graf101367c2006-08-04 03:39:02 -0700182 .rt6i_metric = ~(u32) 0,
183 .rt6i_ref = ATOMIC_INIT(1),
184};
185
186#endif
187
Linus Torvalds1da177e2005-04-16 15:20:36 -0700188/* allocate dst with ip6_dst_ops */
Benjamin Theryf2fc6a52008-03-04 13:49:23 -0800189static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700190{
Benjamin Theryf2fc6a52008-03-04 13:49:23 -0800191 return (struct rt6_info *)dst_alloc(ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700192}
193
194static void ip6_dst_destroy(struct dst_entry *dst)
195{
196 struct rt6_info *rt = (struct rt6_info *)dst;
197 struct inet6_dev *idev = rt->rt6i_idev;
198
199 if (idev != NULL) {
200 rt->rt6i_idev = NULL;
201 in6_dev_put(idev);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900202 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700203}
204
205static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
206 int how)
207{
208 struct rt6_info *rt = (struct rt6_info *)dst;
209 struct inet6_dev *idev = rt->rt6i_idev;
Denis V. Lunev5a3e55d2007-12-07 00:38:10 -0800210 struct net_device *loopback_dev =
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900211 dev_net(dev)->loopback_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700212
Denis V. Lunev5a3e55d2007-12-07 00:38:10 -0800213 if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
214 struct inet6_dev *loopback_idev =
215 in6_dev_get(loopback_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700216 if (loopback_idev != NULL) {
217 rt->rt6i_idev = loopback_idev;
218 in6_dev_put(idev);
219 }
220 }
221}
222
223static __inline__ int rt6_check_expired(const struct rt6_info *rt)
224{
225 return (rt->rt6i_flags & RTF_EXPIRES &&
226 time_after(jiffies, rt->rt6i_expires));
227}
228
Thomas Grafc71099a2006-08-04 23:20:06 -0700229static inline int rt6_need_strict(struct in6_addr *daddr)
230{
231 return (ipv6_addr_type(daddr) &
YOSHIFUJI Hideaki5ce83af2008-06-25 16:58:17 +0900232 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK));
Thomas Grafc71099a2006-08-04 23:20:06 -0700233}
234
Linus Torvalds1da177e2005-04-16 15:20:36 -0700235/*
Thomas Grafc71099a2006-08-04 23:20:06 -0700236 * Route lookup. Any table->tb6_lock is implied.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700237 */
238
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800239static inline struct rt6_info *rt6_device_match(struct net *net,
240 struct rt6_info *rt,
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900241 struct in6_addr *saddr,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700242 int oif,
YOSHIFUJI Hideakid4208952008-06-27 20:14:54 -0700243 int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700244{
245 struct rt6_info *local = NULL;
246 struct rt6_info *sprt;
247
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900248 if (!oif && ipv6_addr_any(saddr))
249 goto out;
250
251 for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) {
252 struct net_device *dev = sprt->rt6i_dev;
253
254 if (oif) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700255 if (dev->ifindex == oif)
256 return sprt;
257 if (dev->flags & IFF_LOOPBACK) {
258 if (sprt->rt6i_idev == NULL ||
259 sprt->rt6i_idev->dev->ifindex != oif) {
YOSHIFUJI Hideakid4208952008-06-27 20:14:54 -0700260 if (flags & RT6_LOOKUP_F_IFACE && oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700261 continue;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900262 if (local && (!oif ||
Linus Torvalds1da177e2005-04-16 15:20:36 -0700263 local->rt6i_idev->dev->ifindex == oif))
264 continue;
265 }
266 local = sprt;
267 }
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900268 } else {
269 if (ipv6_chk_addr(net, saddr, dev,
270 flags & RT6_LOOKUP_F_IFACE))
271 return sprt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700272 }
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900273 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700274
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900275 if (oif) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700276 if (local)
277 return local;
278
YOSHIFUJI Hideakid4208952008-06-27 20:14:54 -0700279 if (flags & RT6_LOOKUP_F_IFACE)
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800280 return net->ipv6.ip6_null_entry;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700281 }
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900282out:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700283 return rt;
284}
285
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800286#ifdef CONFIG_IPV6_ROUTER_PREF
287static void rt6_probe(struct rt6_info *rt)
288{
289 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
290 /*
291 * Okay, this does not seem to be appropriate
292 * for now, however, we need to check if it
293 * is really so; aka Router Reachability Probing.
294 *
295 * Router Reachability Probe MUST be rate-limited
296 * to no more than one per minute.
297 */
298 if (!neigh || (neigh->nud_state & NUD_VALID))
299 return;
300 read_lock_bh(&neigh->lock);
301 if (!(neigh->nud_state & NUD_VALID) &&
YOSHIFUJI Hideaki52e16352006-03-20 17:05:47 -0800302 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800303 struct in6_addr mcaddr;
304 struct in6_addr *target;
305
306 neigh->updated = jiffies;
307 read_unlock_bh(&neigh->lock);
308
309 target = (struct in6_addr *)&neigh->primary_key;
310 addrconf_addr_solict_mult(target, &mcaddr);
311 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
312 } else
313 read_unlock_bh(&neigh->lock);
314}
315#else
316static inline void rt6_probe(struct rt6_info *rt)
317{
318 return;
319}
320#endif
321
Linus Torvalds1da177e2005-04-16 15:20:36 -0700322/*
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800323 * Default Router Selection (RFC 2461 6.3.6)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700324 */
Dave Jonesb6f99a22007-03-22 12:27:49 -0700325static inline int rt6_check_dev(struct rt6_info *rt, int oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700326{
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800327 struct net_device *dev = rt->rt6i_dev;
David S. Miller161980f2007-04-06 11:42:27 -0700328 if (!oif || dev->ifindex == oif)
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800329 return 2;
David S. Miller161980f2007-04-06 11:42:27 -0700330 if ((dev->flags & IFF_LOOPBACK) &&
331 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
332 return 1;
333 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700334}
335
Dave Jonesb6f99a22007-03-22 12:27:49 -0700336static inline int rt6_check_neigh(struct rt6_info *rt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700337{
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800338 struct neighbour *neigh = rt->rt6i_nexthop;
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800339 int m;
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700340 if (rt->rt6i_flags & RTF_NONEXTHOP ||
341 !(rt->rt6i_flags & RTF_GATEWAY))
342 m = 1;
343 else if (neigh) {
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800344 read_lock_bh(&neigh->lock);
345 if (neigh->nud_state & NUD_VALID)
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700346 m = 2;
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800347#ifdef CONFIG_IPV6_ROUTER_PREF
348 else if (neigh->nud_state & NUD_FAILED)
349 m = 0;
350#endif
351 else
YOSHIFUJI Hideakiea73ee22006-11-06 09:45:44 -0800352 m = 1;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800353 read_unlock_bh(&neigh->lock);
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800354 } else
355 m = 0;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800356 return m;
357}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700358
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800359static int rt6_score_route(struct rt6_info *rt, int oif,
360 int strict)
361{
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700362 int m, n;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900363
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700364 m = rt6_check_dev(rt, oif);
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700365 if (!m && (strict & RT6_LOOKUP_F_IFACE))
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800366 return -1;
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -0800367#ifdef CONFIG_IPV6_ROUTER_PREF
368 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
369#endif
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700370 n = rt6_check_neigh(rt);
YOSHIFUJI Hideaki557e92e2006-11-06 09:45:45 -0800371 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800372 return -1;
373 return m;
374}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700375
David S. Millerf11e6652007-03-24 20:36:25 -0700376static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
377 int *mpri, struct rt6_info *match)
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800378{
David S. Millerf11e6652007-03-24 20:36:25 -0700379 int m;
380
381 if (rt6_check_expired(rt))
382 goto out;
383
384 m = rt6_score_route(rt, oif, strict);
385 if (m < 0)
386 goto out;
387
388 if (m > *mpri) {
389 if (strict & RT6_LOOKUP_F_REACHABLE)
390 rt6_probe(match);
391 *mpri = m;
392 match = rt;
393 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
394 rt6_probe(rt);
395 }
396
397out:
398 return match;
399}
400
401static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
402 struct rt6_info *rr_head,
403 u32 metric, int oif, int strict)
404{
405 struct rt6_info *rt, *match;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800406 int mpri = -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700407
David S. Millerf11e6652007-03-24 20:36:25 -0700408 match = NULL;
409 for (rt = rr_head; rt && rt->rt6i_metric == metric;
410 rt = rt->u.dst.rt6_next)
411 match = find_match(rt, oif, strict, &mpri, match);
412 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
413 rt = rt->u.dst.rt6_next)
414 match = find_match(rt, oif, strict, &mpri, match);
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800415
David S. Millerf11e6652007-03-24 20:36:25 -0700416 return match;
417}
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800418
David S. Millerf11e6652007-03-24 20:36:25 -0700419static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
420{
421 struct rt6_info *match, *rt0;
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800422 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700423
David S. Millerf11e6652007-03-24 20:36:25 -0700424 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
Harvey Harrison0dc47872008-03-05 20:47:47 -0800425 __func__, fn->leaf, oif);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700426
David S. Millerf11e6652007-03-24 20:36:25 -0700427 rt0 = fn->rr_ptr;
428 if (!rt0)
429 fn->rr_ptr = rt0 = fn->leaf;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700430
David S. Millerf11e6652007-03-24 20:36:25 -0700431 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700432
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800433 if (!match &&
David S. Millerf11e6652007-03-24 20:36:25 -0700434 (strict & RT6_LOOKUP_F_REACHABLE)) {
435 struct rt6_info *next = rt0->u.dst.rt6_next;
436
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800437 /* no entries matched; do round-robin */
David S. Millerf11e6652007-03-24 20:36:25 -0700438 if (!next || next->rt6i_metric != rt0->rt6i_metric)
439 next = fn->leaf;
440
441 if (next != rt0)
442 fn->rr_ptr = next;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700443 }
444
David S. Millerf11e6652007-03-24 20:36:25 -0700445 RT6_TRACE("%s() => %p\n",
Harvey Harrison0dc47872008-03-05 20:47:47 -0800446 __func__, match);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700447
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900448 net = dev_net(rt0->rt6i_dev);
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800449 return (match ? match : net->ipv6.ip6_null_entry);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700450}
451
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800452#ifdef CONFIG_IPV6_ROUTE_INFO
453int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
454 struct in6_addr *gwaddr)
455{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900456 struct net *net = dev_net(dev);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800457 struct route_info *rinfo = (struct route_info *) opt;
458 struct in6_addr prefix_buf, *prefix;
459 unsigned int pref;
YOSHIFUJI Hideaki4bed72e2008-05-27 17:37:49 +0900460 unsigned long lifetime;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800461 struct rt6_info *rt;
462
463 if (len < sizeof(struct route_info)) {
464 return -EINVAL;
465 }
466
467 /* Sanity check for prefix_len and length */
468 if (rinfo->length > 3) {
469 return -EINVAL;
470 } else if (rinfo->prefix_len > 128) {
471 return -EINVAL;
472 } else if (rinfo->prefix_len > 64) {
473 if (rinfo->length < 2) {
474 return -EINVAL;
475 }
476 } else if (rinfo->prefix_len > 0) {
477 if (rinfo->length < 1) {
478 return -EINVAL;
479 }
480 }
481
482 pref = rinfo->route_pref;
483 if (pref == ICMPV6_ROUTER_PREF_INVALID)
Jens Rosenboom3933fc92009-09-10 06:25:11 +0000484 return -EINVAL;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800485
YOSHIFUJI Hideaki4bed72e2008-05-27 17:37:49 +0900486 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800487
488 if (rinfo->length == 3)
489 prefix = (struct in6_addr *)rinfo->prefix;
490 else {
491 /* this function is safe */
492 ipv6_addr_prefix(&prefix_buf,
493 (struct in6_addr *)rinfo->prefix,
494 rinfo->prefix_len);
495 prefix = &prefix_buf;
496 }
497
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -0800498 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
499 dev->ifindex);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800500
501 if (rt && !lifetime) {
Thomas Grafe0a1ad732006-08-22 00:00:21 -0700502 ip6_del_rt(rt);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800503 rt = NULL;
504 }
505
506 if (!rt && lifetime)
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -0800507 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800508 pref);
509 else if (rt)
510 rt->rt6i_flags = RTF_ROUTEINFO |
511 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
512
513 if (rt) {
YOSHIFUJI Hideaki4bed72e2008-05-27 17:37:49 +0900514 if (!addrconf_finite_timeout(lifetime)) {
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800515 rt->rt6i_flags &= ~RTF_EXPIRES;
516 } else {
517 rt->rt6i_expires = jiffies + HZ * lifetime;
518 rt->rt6i_flags |= RTF_EXPIRES;
519 }
520 dst_release(&rt->u.dst);
521 }
522 return 0;
523}
524#endif
525
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800526#define BACKTRACK(__net, saddr) \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700527do { \
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800528 if (rt == __net->ipv6.ip6_null_entry) { \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700529 struct fib6_node *pn; \
Ville Nuorvalae0eda7b2006-10-16 22:11:11 -0700530 while (1) { \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700531 if (fn->fn_flags & RTN_TL_ROOT) \
532 goto out; \
533 pn = fn->parent; \
534 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
Kim Nordlund8bce65b2006-12-13 16:38:29 -0800535 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700536 else \
537 fn = pn; \
538 if (fn->fn_flags & RTN_RTINFO) \
539 goto restart; \
Thomas Grafc71099a2006-08-04 23:20:06 -0700540 } \
Thomas Grafc71099a2006-08-04 23:20:06 -0700541 } \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700542} while(0)
Thomas Grafc71099a2006-08-04 23:20:06 -0700543
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800544static struct rt6_info *ip6_pol_route_lookup(struct net *net,
545 struct fib6_table *table,
Thomas Grafc71099a2006-08-04 23:20:06 -0700546 struct flowi *fl, int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700547{
548 struct fib6_node *fn;
549 struct rt6_info *rt;
550
Thomas Grafc71099a2006-08-04 23:20:06 -0700551 read_lock_bh(&table->tb6_lock);
552 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
553restart:
554 rt = fn->leaf;
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900555 rt = rt6_device_match(net, rt, &fl->fl6_src, fl->oif, flags);
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800556 BACKTRACK(net, &fl->fl6_src);
Thomas Grafc71099a2006-08-04 23:20:06 -0700557out:
Pavel Emelyanov03f49f32007-11-10 21:28:34 -0800558 dst_use(&rt->u.dst, jiffies);
Thomas Grafc71099a2006-08-04 23:20:06 -0700559 read_unlock_bh(&table->tb6_lock);
Thomas Grafc71099a2006-08-04 23:20:06 -0700560 return rt;
561
562}
563
YOSHIFUJI Hideaki9acd9f32008-04-10 15:42:10 +0900564struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
565 const struct in6_addr *saddr, int oif, int strict)
Thomas Grafc71099a2006-08-04 23:20:06 -0700566{
567 struct flowi fl = {
568 .oif = oif,
569 .nl_u = {
570 .ip6_u = {
571 .daddr = *daddr,
Thomas Grafc71099a2006-08-04 23:20:06 -0700572 },
573 },
574 };
575 struct dst_entry *dst;
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700576 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
Thomas Grafc71099a2006-08-04 23:20:06 -0700577
Thomas Grafadaa70b2006-10-13 15:01:03 -0700578 if (saddr) {
579 memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
580 flags |= RT6_LOOKUP_F_HAS_SADDR;
581 }
582
Daniel Lezcano606a2b42008-03-04 13:45:59 -0800583 dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_lookup);
Thomas Grafc71099a2006-08-04 23:20:06 -0700584 if (dst->error == 0)
585 return (struct rt6_info *) dst;
586
587 dst_release(dst);
588
Linus Torvalds1da177e2005-04-16 15:20:36 -0700589 return NULL;
590}
591
YOSHIFUJI Hideaki71590392007-02-22 22:05:40 +0900592EXPORT_SYMBOL(rt6_lookup);
593
Thomas Grafc71099a2006-08-04 23:20:06 -0700594/* ip6_ins_rt is called with FREE table->tb6_lock.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700595 It takes new route entry, the addition fails by any reason the
596 route is freed. In any case, if caller does not hold it, it may
597 be destroyed.
598 */
599
Thomas Graf86872cb2006-08-22 00:01:08 -0700600static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700601{
602 int err;
Thomas Grafc71099a2006-08-04 23:20:06 -0700603 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700604
Thomas Grafc71099a2006-08-04 23:20:06 -0700605 table = rt->rt6i_table;
606 write_lock_bh(&table->tb6_lock);
Thomas Graf86872cb2006-08-22 00:01:08 -0700607 err = fib6_add(&table->tb6_root, rt, info);
Thomas Grafc71099a2006-08-04 23:20:06 -0700608 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700609
610 return err;
611}
612
Thomas Graf40e22e82006-08-22 00:00:45 -0700613int ip6_ins_rt(struct rt6_info *rt)
614{
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800615 struct nl_info info = {
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900616 .nl_net = dev_net(rt->rt6i_dev),
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800617 };
Denis V. Lunev528c4ce2007-12-13 09:45:12 -0800618 return __ip6_ins_rt(rt, &info);
Thomas Graf40e22e82006-08-22 00:00:45 -0700619}
620
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800621static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
622 struct in6_addr *saddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700623{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700624 struct rt6_info *rt;
625
626 /*
627 * Clone the route.
628 */
629
630 rt = ip6_rt_copy(ort);
631
632 if (rt) {
David S. Miller14deae42009-01-04 16:04:39 -0800633 struct neighbour *neigh;
634 int attempts = !in_softirq();
635
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900636 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
637 if (rt->rt6i_dst.plen != 128 &&
638 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
639 rt->rt6i_flags |= RTF_ANYCAST;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700640 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900641 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700642
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900643 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700644 rt->rt6i_dst.plen = 128;
645 rt->rt6i_flags |= RTF_CACHE;
646 rt->u.dst.flags |= DST_HOST;
647
648#ifdef CONFIG_IPV6_SUBTREES
649 if (rt->rt6i_src.plen && saddr) {
650 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
651 rt->rt6i_src.plen = 128;
652 }
653#endif
654
David S. Miller14deae42009-01-04 16:04:39 -0800655 retry:
656 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
657 if (IS_ERR(neigh)) {
658 struct net *net = dev_net(rt->rt6i_dev);
659 int saved_rt_min_interval =
660 net->ipv6.sysctl.ip6_rt_gc_min_interval;
661 int saved_rt_elasticity =
662 net->ipv6.sysctl.ip6_rt_gc_elasticity;
663
664 if (attempts-- > 0) {
665 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
666 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
667
Alexey Dobriyan86393e52009-08-29 01:34:49 +0000668 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
David S. Miller14deae42009-01-04 16:04:39 -0800669
670 net->ipv6.sysctl.ip6_rt_gc_elasticity =
671 saved_rt_elasticity;
672 net->ipv6.sysctl.ip6_rt_gc_min_interval =
673 saved_rt_min_interval;
674 goto retry;
675 }
676
677 if (net_ratelimit())
678 printk(KERN_WARNING
679 "Neighbour table overflow.\n");
680 dst_free(&rt->u.dst);
681 return NULL;
682 }
683 rt->rt6i_nexthop = neigh;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700684
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800685 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700686
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800687 return rt;
688}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700689
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800690static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
691{
692 struct rt6_info *rt = ip6_rt_copy(ort);
693 if (rt) {
694 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
695 rt->rt6i_dst.plen = 128;
696 rt->rt6i_flags |= RTF_CACHE;
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800697 rt->u.dst.flags |= DST_HOST;
698 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
699 }
700 return rt;
701}
702
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800703static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
704 struct flowi *fl, int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700705{
706 struct fib6_node *fn;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800707 struct rt6_info *rt, *nrt;
Thomas Grafc71099a2006-08-04 23:20:06 -0700708 int strict = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700709 int attempts = 3;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800710 int err;
YOSHIFUJI Hideaki53b79972008-07-19 22:35:03 -0700711 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700712
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700713 strict |= flags & RT6_LOOKUP_F_IFACE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700714
715relookup:
Thomas Grafc71099a2006-08-04 23:20:06 -0700716 read_lock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700717
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800718restart_2:
Thomas Grafc71099a2006-08-04 23:20:06 -0700719 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700720
721restart:
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700722 rt = rt6_select(fn, oif, strict | reachable);
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800723
724 BACKTRACK(net, &fl->fl6_src);
725 if (rt == net->ipv6.ip6_null_entry ||
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800726 rt->rt6i_flags & RTF_CACHE)
YOSHIFUJI Hideaki1ddef042006-03-20 17:01:24 -0800727 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700728
YOSHIFUJI Hideakifb9de912006-03-20 16:59:08 -0800729 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700730 read_unlock_bh(&table->tb6_lock);
YOSHIFUJI Hideakifb9de912006-03-20 16:59:08 -0800731
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800732 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
YOSHIFUJI Hideakie40cf352006-03-20 16:59:27 -0800733 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800734 else {
735#if CLONE_OFFLINK_ROUTE
736 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
737#else
738 goto out2;
739#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700740 }
YOSHIFUJI Hideakie40cf352006-03-20 16:59:27 -0800741
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800742 dst_release(&rt->u.dst);
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800743 rt = nrt ? : net->ipv6.ip6_null_entry;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800744
745 dst_hold(&rt->u.dst);
746 if (nrt) {
Thomas Graf40e22e82006-08-22 00:00:45 -0700747 err = ip6_ins_rt(nrt);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800748 if (!err)
749 goto out2;
750 }
751
752 if (--attempts <= 0)
753 goto out2;
754
755 /*
Thomas Grafc71099a2006-08-04 23:20:06 -0700756 * Race condition! In the gap, when table->tb6_lock was
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800757 * released someone could insert this route. Relookup.
758 */
759 dst_release(&rt->u.dst);
760 goto relookup;
761
762out:
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800763 if (reachable) {
764 reachable = 0;
765 goto restart_2;
766 }
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800767 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700768 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700769out2:
770 rt->u.dst.lastuse = jiffies;
771 rt->u.dst.__use++;
Thomas Grafc71099a2006-08-04 23:20:06 -0700772
773 return rt;
774}
775
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800776static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700777 struct flowi *fl, int flags)
778{
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800779 return ip6_pol_route(net, table, fl->iif, fl, flags);
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700780}
781
Thomas Grafc71099a2006-08-04 23:20:06 -0700782void ip6_route_input(struct sk_buff *skb)
783{
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -0700784 struct ipv6hdr *iph = ipv6_hdr(skb);
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900785 struct net *net = dev_net(skb->dev);
Thomas Grafadaa70b2006-10-13 15:01:03 -0700786 int flags = RT6_LOOKUP_F_HAS_SADDR;
Thomas Grafc71099a2006-08-04 23:20:06 -0700787 struct flowi fl = {
788 .iif = skb->dev->ifindex,
789 .nl_u = {
790 .ip6_u = {
791 .daddr = iph->daddr,
792 .saddr = iph->saddr,
Al Viro90bcaf72006-11-08 00:25:17 -0800793 .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
Thomas Grafc71099a2006-08-04 23:20:06 -0700794 },
795 },
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900796 .mark = skb->mark,
Thomas Grafc71099a2006-08-04 23:20:06 -0700797 .proto = iph->nexthdr,
798 };
Thomas Grafadaa70b2006-10-13 15:01:03 -0700799
Thomas Goff1d6e55f2009-01-27 22:39:59 -0800800 if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
Thomas Grafadaa70b2006-10-13 15:01:03 -0700801 flags |= RT6_LOOKUP_F_IFACE;
Thomas Grafc71099a2006-08-04 23:20:06 -0700802
Eric Dumazetadf30902009-06-02 05:19:30 +0000803 skb_dst_set(skb, fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input));
Thomas Grafc71099a2006-08-04 23:20:06 -0700804}
805
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800806static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
Thomas Grafc71099a2006-08-04 23:20:06 -0700807 struct flowi *fl, int flags)
808{
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800809 return ip6_pol_route(net, table, fl->oif, fl, flags);
Thomas Grafc71099a2006-08-04 23:20:06 -0700810}
811
Daniel Lezcano4591db42008-03-05 10:48:10 -0800812struct dst_entry * ip6_route_output(struct net *net, struct sock *sk,
813 struct flowi *fl)
Thomas Grafc71099a2006-08-04 23:20:06 -0700814{
815 int flags = 0;
816
817 if (rt6_need_strict(&fl->fl6_dst))
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700818 flags |= RT6_LOOKUP_F_IFACE;
Thomas Grafc71099a2006-08-04 23:20:06 -0700819
Thomas Grafadaa70b2006-10-13 15:01:03 -0700820 if (!ipv6_addr_any(&fl->fl6_src))
821 flags |= RT6_LOOKUP_F_HAS_SADDR;
YOSHIFUJI Hideaki / 吉藤英明0c9a2ac2010-03-07 00:14:44 +0000822 else if (sk)
823 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
Thomas Grafadaa70b2006-10-13 15:01:03 -0700824
Daniel Lezcano4591db42008-03-05 10:48:10 -0800825 return fib6_rule_lookup(net, fl, flags, ip6_pol_route_output);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700826}
827
YOSHIFUJI Hideaki71590392007-02-22 22:05:40 +0900828EXPORT_SYMBOL(ip6_route_output);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700829
David S. Miller14e50e52007-05-24 18:17:54 -0700830int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
831{
832 struct rt6_info *ort = (struct rt6_info *) *dstp;
833 struct rt6_info *rt = (struct rt6_info *)
834 dst_alloc(&ip6_dst_blackhole_ops);
835 struct dst_entry *new = NULL;
836
837 if (rt) {
838 new = &rt->u.dst;
839
840 atomic_set(&new->__refcnt, 1);
841 new->__use = 1;
Herbert Xu352e5122007-11-13 21:34:06 -0800842 new->input = dst_discard;
843 new->output = dst_discard;
David S. Miller14e50e52007-05-24 18:17:54 -0700844
845 memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
846 new->dev = ort->u.dst.dev;
847 if (new->dev)
848 dev_hold(new->dev);
849 rt->rt6i_idev = ort->rt6i_idev;
850 if (rt->rt6i_idev)
851 in6_dev_hold(rt->rt6i_idev);
852 rt->rt6i_expires = 0;
853
854 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
855 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
856 rt->rt6i_metric = 0;
857
858 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
859#ifdef CONFIG_IPV6_SUBTREES
860 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
861#endif
862
863 dst_free(new);
864 }
865
866 dst_release(*dstp);
867 *dstp = new;
868 return (new ? 0 : -ENOMEM);
869}
870EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
871
Linus Torvalds1da177e2005-04-16 15:20:36 -0700872/*
873 * Destination cache support functions
874 */
875
876static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
877{
878 struct rt6_info *rt;
879
880 rt = (struct rt6_info *) dst;
881
882 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
883 return dst;
884
885 return NULL;
886}
887
888static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
889{
890 struct rt6_info *rt = (struct rt6_info *) dst;
891
892 if (rt) {
893 if (rt->rt6i_flags & RTF_CACHE)
Thomas Grafe0a1ad732006-08-22 00:00:21 -0700894 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700895 else
896 dst_release(dst);
897 }
898 return NULL;
899}
900
901static void ip6_link_failure(struct sk_buff *skb)
902{
903 struct rt6_info *rt;
904
Alexey Dobriyan3ffe5332010-02-18 08:25:24 +0000905 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700906
Eric Dumazetadf30902009-06-02 05:19:30 +0000907 rt = (struct rt6_info *) skb_dst(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700908 if (rt) {
909 if (rt->rt6i_flags&RTF_CACHE) {
910 dst_set_expires(&rt->u.dst, 0);
911 rt->rt6i_flags |= RTF_EXPIRES;
912 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
913 rt->rt6i_node->fn_sernum = -1;
914 }
915}
916
917static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
918{
919 struct rt6_info *rt6 = (struct rt6_info*)dst;
920
921 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
922 rt6->rt6i_flags |= RTF_MODIFIED;
923 if (mtu < IPV6_MIN_MTU) {
924 mtu = IPV6_MIN_MTU;
925 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
926 }
927 dst->metrics[RTAX_MTU-1] = mtu;
Tom Tucker8d717402006-07-30 20:43:36 -0700928 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700929 }
930}
931
Linus Torvalds1da177e2005-04-16 15:20:36 -0700932static int ipv6_get_mtu(struct net_device *dev);
933
Daniel Lezcano55786892008-03-04 13:47:47 -0800934static inline unsigned int ipv6_advmss(struct net *net, unsigned int mtu)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700935{
936 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
937
Daniel Lezcano55786892008-03-04 13:47:47 -0800938 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
939 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700940
941 /*
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900942 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
943 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
944 * IPV6_MAXPLEN is also valid and means: "any MSS,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700945 * rely only on pmtu discovery"
946 */
947 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
948 mtu = IPV6_MAXPLEN;
949 return mtu;
950}
951
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -0800952static struct dst_entry *icmp6_dst_gc_list;
953static DEFINE_SPINLOCK(icmp6_dst_lock);
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700954
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -0800955struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700956 struct neighbour *neigh,
YOSHIFUJI Hideaki9acd9f32008-04-10 15:42:10 +0900957 const struct in6_addr *addr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700958{
959 struct rt6_info *rt;
960 struct inet6_dev *idev = in6_dev_get(dev);
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900961 struct net *net = dev_net(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700962
963 if (unlikely(idev == NULL))
964 return NULL;
965
Alexey Dobriyan86393e52009-08-29 01:34:49 +0000966 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700967 if (unlikely(rt == NULL)) {
968 in6_dev_put(idev);
969 goto out;
970 }
971
972 dev_hold(dev);
973 if (neigh)
974 neigh_hold(neigh);
David S. Miller14deae42009-01-04 16:04:39 -0800975 else {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700976 neigh = ndisc_get_neigh(dev, addr);
David S. Miller14deae42009-01-04 16:04:39 -0800977 if (IS_ERR(neigh))
978 neigh = NULL;
979 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700980
981 rt->rt6i_dev = dev;
982 rt->rt6i_idev = idev;
983 rt->rt6i_nexthop = neigh;
984 atomic_set(&rt->u.dst.__refcnt, 1);
985 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
986 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
Daniel Lezcano55786892008-03-04 13:47:47 -0800987 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -0800988 rt->u.dst.output = ip6_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700989
990#if 0 /* there's no chance to use these for ndisc */
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900991 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
992 ? DST_HOST
Linus Torvalds1da177e2005-04-16 15:20:36 -0700993 : 0;
994 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
995 rt->rt6i_dst.plen = 128;
996#endif
997
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -0800998 spin_lock_bh(&icmp6_dst_lock);
999 rt->u.dst.next = icmp6_dst_gc_list;
1000 icmp6_dst_gc_list = &rt->u.dst;
1001 spin_unlock_bh(&icmp6_dst_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001002
Daniel Lezcano55786892008-03-04 13:47:47 -08001003 fib6_force_start_gc(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001004
1005out:
YOSHIFUJI Hideaki40aa7b92006-10-19 13:50:09 +09001006 return &rt->u.dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001007}
1008
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -07001009int icmp6_dst_gc(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001010{
1011 struct dst_entry *dst, *next, **pprev;
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -07001012 int more = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001013
1014 next = NULL;
Thomas Graf5d0bbee2006-08-04 03:37:36 -07001015
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001016 spin_lock_bh(&icmp6_dst_lock);
1017 pprev = &icmp6_dst_gc_list;
Thomas Graf5d0bbee2006-08-04 03:37:36 -07001018
Linus Torvalds1da177e2005-04-16 15:20:36 -07001019 while ((dst = *pprev) != NULL) {
1020 if (!atomic_read(&dst->__refcnt)) {
1021 *pprev = dst->next;
1022 dst_free(dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001023 } else {
1024 pprev = &dst->next;
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -07001025 ++more;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001026 }
1027 }
1028
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001029 spin_unlock_bh(&icmp6_dst_lock);
Thomas Graf5d0bbee2006-08-04 03:37:36 -07001030
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -07001031 return more;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001032}
1033
David S. Miller1e493d12008-09-10 17:27:15 -07001034static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1035 void *arg)
1036{
1037 struct dst_entry *dst, **pprev;
1038
1039 spin_lock_bh(&icmp6_dst_lock);
1040 pprev = &icmp6_dst_gc_list;
1041 while ((dst = *pprev) != NULL) {
1042 struct rt6_info *rt = (struct rt6_info *) dst;
1043 if (func(rt, arg)) {
1044 *pprev = dst->next;
1045 dst_free(dst);
1046 } else {
1047 pprev = &dst->next;
1048 }
1049 }
1050 spin_unlock_bh(&icmp6_dst_lock);
1051}
1052
Daniel Lezcano569d3642008-01-18 03:56:57 -08001053static int ip6_dst_gc(struct dst_ops *ops)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001054{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001055 unsigned long now = jiffies;
Alexey Dobriyan86393e52009-08-29 01:34:49 +00001056 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
Daniel Lezcano7019b782008-03-04 13:50:14 -08001057 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1058 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1059 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1060 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1061 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001062
Daniel Lezcano7019b782008-03-04 13:50:14 -08001063 if (time_after(rt_last_gc + rt_min_interval, now) &&
1064 atomic_read(&ops->entries) <= rt_max_size)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001065 goto out;
1066
Benjamin Thery6891a342008-03-04 13:49:47 -08001067 net->ipv6.ip6_rt_gc_expire++;
1068 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1069 net->ipv6.ip6_rt_last_gc = now;
Daniel Lezcano7019b782008-03-04 13:50:14 -08001070 if (atomic_read(&ops->entries) < ops->gc_thresh)
1071 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001072out:
Daniel Lezcano7019b782008-03-04 13:50:14 -08001073 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1074 return (atomic_read(&ops->entries) > rt_max_size);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001075}
1076
1077/* Clean host part of a prefix. Not necessary in radix tree,
1078 but results in cleaner routing tables.
1079
1080 Remove it only when all the things will work!
1081 */
1082
1083static int ipv6_get_mtu(struct net_device *dev)
1084{
1085 int mtu = IPV6_MIN_MTU;
1086 struct inet6_dev *idev;
1087
1088 idev = in6_dev_get(dev);
1089 if (idev) {
1090 mtu = idev->cnf.mtu6;
1091 in6_dev_put(idev);
1092 }
1093 return mtu;
1094}
1095
YOSHIFUJI Hideaki6b75d092008-03-10 06:00:30 -04001096int ip6_dst_hoplimit(struct dst_entry *dst)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001097{
YOSHIFUJI Hideaki6b75d092008-03-10 06:00:30 -04001098 int hoplimit = dst_metric(dst, RTAX_HOPLIMIT);
1099 if (hoplimit < 0) {
1100 struct net_device *dev = dst->dev;
1101 struct inet6_dev *idev = in6_dev_get(dev);
1102 if (idev) {
1103 hoplimit = idev->cnf.hop_limit;
1104 in6_dev_put(idev);
1105 } else
YOSHIFUJI Hideaki53b79972008-07-19 22:35:03 -07001106 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001107 }
1108 return hoplimit;
1109}
1110
1111/*
1112 *
1113 */
1114
Thomas Graf86872cb2006-08-22 00:01:08 -07001115int ip6_route_add(struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001116{
1117 int err;
Daniel Lezcano55786892008-03-04 13:47:47 -08001118 struct net *net = cfg->fc_nlinfo.nl_net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001119 struct rt6_info *rt = NULL;
1120 struct net_device *dev = NULL;
1121 struct inet6_dev *idev = NULL;
Thomas Grafc71099a2006-08-04 23:20:06 -07001122 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001123 int addr_type;
1124
Thomas Graf86872cb2006-08-22 00:01:08 -07001125 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001126 return -EINVAL;
1127#ifndef CONFIG_IPV6_SUBTREES
Thomas Graf86872cb2006-08-22 00:01:08 -07001128 if (cfg->fc_src_len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001129 return -EINVAL;
1130#endif
Thomas Graf86872cb2006-08-22 00:01:08 -07001131 if (cfg->fc_ifindex) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001132 err = -ENODEV;
Daniel Lezcano55786892008-03-04 13:47:47 -08001133 dev = dev_get_by_index(net, cfg->fc_ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001134 if (!dev)
1135 goto out;
1136 idev = in6_dev_get(dev);
1137 if (!idev)
1138 goto out;
1139 }
1140
Thomas Graf86872cb2006-08-22 00:01:08 -07001141 if (cfg->fc_metric == 0)
1142 cfg->fc_metric = IP6_RT_PRIO_USER;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001143
Daniel Lezcano55786892008-03-04 13:47:47 -08001144 table = fib6_new_table(net, cfg->fc_table);
Thomas Grafc71099a2006-08-04 23:20:06 -07001145 if (table == NULL) {
1146 err = -ENOBUFS;
1147 goto out;
1148 }
1149
Alexey Dobriyan86393e52009-08-29 01:34:49 +00001150 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001151
1152 if (rt == NULL) {
1153 err = -ENOMEM;
1154 goto out;
1155 }
1156
1157 rt->u.dst.obsolete = -1;
YOSHIFUJI Hideaki6f704992008-05-19 16:56:11 -07001158 rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
1159 jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1160 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001161
Thomas Graf86872cb2006-08-22 00:01:08 -07001162 if (cfg->fc_protocol == RTPROT_UNSPEC)
1163 cfg->fc_protocol = RTPROT_BOOT;
1164 rt->rt6i_protocol = cfg->fc_protocol;
1165
1166 addr_type = ipv6_addr_type(&cfg->fc_dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001167
1168 if (addr_type & IPV6_ADDR_MULTICAST)
1169 rt->u.dst.input = ip6_mc_input;
1170 else
1171 rt->u.dst.input = ip6_forward;
1172
1173 rt->u.dst.output = ip6_output;
1174
Thomas Graf86872cb2006-08-22 00:01:08 -07001175 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1176 rt->rt6i_dst.plen = cfg->fc_dst_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001177 if (rt->rt6i_dst.plen == 128)
1178 rt->u.dst.flags = DST_HOST;
1179
1180#ifdef CONFIG_IPV6_SUBTREES
Thomas Graf86872cb2006-08-22 00:01:08 -07001181 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1182 rt->rt6i_src.plen = cfg->fc_src_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001183#endif
1184
Thomas Graf86872cb2006-08-22 00:01:08 -07001185 rt->rt6i_metric = cfg->fc_metric;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001186
1187 /* We cannot add true routes via loopback here,
1188 they would result in kernel looping; promote them to reject routes
1189 */
Thomas Graf86872cb2006-08-22 00:01:08 -07001190 if ((cfg->fc_flags & RTF_REJECT) ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07001191 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
1192 /* hold loopback dev/idev if we haven't done so. */
Daniel Lezcano55786892008-03-04 13:47:47 -08001193 if (dev != net->loopback_dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001194 if (dev) {
1195 dev_put(dev);
1196 in6_dev_put(idev);
1197 }
Daniel Lezcano55786892008-03-04 13:47:47 -08001198 dev = net->loopback_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001199 dev_hold(dev);
1200 idev = in6_dev_get(dev);
1201 if (!idev) {
1202 err = -ENODEV;
1203 goto out;
1204 }
1205 }
1206 rt->u.dst.output = ip6_pkt_discard_out;
1207 rt->u.dst.input = ip6_pkt_discard;
1208 rt->u.dst.error = -ENETUNREACH;
1209 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1210 goto install_route;
1211 }
1212
Thomas Graf86872cb2006-08-22 00:01:08 -07001213 if (cfg->fc_flags & RTF_GATEWAY) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001214 struct in6_addr *gw_addr;
1215 int gwa_type;
1216
Thomas Graf86872cb2006-08-22 00:01:08 -07001217 gw_addr = &cfg->fc_gateway;
1218 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001219 gwa_type = ipv6_addr_type(gw_addr);
1220
1221 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1222 struct rt6_info *grt;
1223
1224 /* IPv6 strictly inhibits using not link-local
1225 addresses as nexthop address.
1226 Otherwise, router will not able to send redirects.
1227 It is very good, but in some (rare!) circumstances
1228 (SIT, PtP, NBMA NOARP links) it is handy to allow
1229 some exceptions. --ANK
1230 */
1231 err = -EINVAL;
1232 if (!(gwa_type&IPV6_ADDR_UNICAST))
1233 goto out;
1234
Daniel Lezcano55786892008-03-04 13:47:47 -08001235 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001236
1237 err = -EHOSTUNREACH;
1238 if (grt == NULL)
1239 goto out;
1240 if (dev) {
1241 if (dev != grt->rt6i_dev) {
1242 dst_release(&grt->u.dst);
1243 goto out;
1244 }
1245 } else {
1246 dev = grt->rt6i_dev;
1247 idev = grt->rt6i_idev;
1248 dev_hold(dev);
1249 in6_dev_hold(grt->rt6i_idev);
1250 }
1251 if (!(grt->rt6i_flags&RTF_GATEWAY))
1252 err = 0;
1253 dst_release(&grt->u.dst);
1254
1255 if (err)
1256 goto out;
1257 }
1258 err = -EINVAL;
1259 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1260 goto out;
1261 }
1262
1263 err = -ENODEV;
1264 if (dev == NULL)
1265 goto out;
1266
Thomas Graf86872cb2006-08-22 00:01:08 -07001267 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001268 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1269 if (IS_ERR(rt->rt6i_nexthop)) {
1270 err = PTR_ERR(rt->rt6i_nexthop);
1271 rt->rt6i_nexthop = NULL;
1272 goto out;
1273 }
1274 }
1275
Thomas Graf86872cb2006-08-22 00:01:08 -07001276 rt->rt6i_flags = cfg->fc_flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001277
1278install_route:
Thomas Graf86872cb2006-08-22 00:01:08 -07001279 if (cfg->fc_mx) {
1280 struct nlattr *nla;
1281 int remaining;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001282
Thomas Graf86872cb2006-08-22 00:01:08 -07001283 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
Thomas Graf8f4c1f92007-09-12 14:44:36 +02001284 int type = nla_type(nla);
Thomas Graf86872cb2006-08-22 00:01:08 -07001285
1286 if (type) {
1287 if (type > RTAX_MAX) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001288 err = -EINVAL;
1289 goto out;
1290 }
Thomas Graf86872cb2006-08-22 00:01:08 -07001291
1292 rt->u.dst.metrics[type - 1] = nla_get_u32(nla);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001293 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001294 }
1295 }
1296
Satoru SATOH5ffc02a2008-05-04 22:14:42 -07001297 if (dst_metric(&rt->u.dst, RTAX_HOPLIMIT) == 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001298 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
Rami Rosen1ca615f2008-08-06 02:34:21 -07001299 if (!dst_mtu(&rt->u.dst))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001300 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
Satoru SATOH5ffc02a2008-05-04 22:14:42 -07001301 if (!dst_metric(&rt->u.dst, RTAX_ADVMSS))
Daniel Lezcano55786892008-03-04 13:47:47 -08001302 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001303 rt->u.dst.dev = dev;
1304 rt->rt6i_idev = idev;
Thomas Grafc71099a2006-08-04 23:20:06 -07001305 rt->rt6i_table = table;
Daniel Lezcano63152fc2008-03-03 23:31:11 -08001306
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001307 cfg->fc_nlinfo.nl_net = dev_net(dev);
Daniel Lezcano63152fc2008-03-03 23:31:11 -08001308
Thomas Graf86872cb2006-08-22 00:01:08 -07001309 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001310
1311out:
1312 if (dev)
1313 dev_put(dev);
1314 if (idev)
1315 in6_dev_put(idev);
1316 if (rt)
YOSHIFUJI Hideaki40aa7b92006-10-19 13:50:09 +09001317 dst_free(&rt->u.dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001318 return err;
1319}
1320
Thomas Graf86872cb2006-08-22 00:01:08 -07001321static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001322{
1323 int err;
Thomas Grafc71099a2006-08-04 23:20:06 -07001324 struct fib6_table *table;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001325 struct net *net = dev_net(rt->rt6i_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001326
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001327 if (rt == net->ipv6.ip6_null_entry)
Patrick McHardy6c813a72006-08-06 22:22:47 -07001328 return -ENOENT;
1329
Thomas Grafc71099a2006-08-04 23:20:06 -07001330 table = rt->rt6i_table;
1331 write_lock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001332
Thomas Graf86872cb2006-08-22 00:01:08 -07001333 err = fib6_del(rt, info);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001334 dst_release(&rt->u.dst);
1335
Thomas Grafc71099a2006-08-04 23:20:06 -07001336 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001337
1338 return err;
1339}
1340
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001341int ip6_del_rt(struct rt6_info *rt)
1342{
Denis V. Lunev4d1169c2008-01-10 03:26:13 -08001343 struct nl_info info = {
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001344 .nl_net = dev_net(rt->rt6i_dev),
Denis V. Lunev4d1169c2008-01-10 03:26:13 -08001345 };
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08001346 return __ip6_del_rt(rt, &info);
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001347}
1348
Thomas Graf86872cb2006-08-22 00:01:08 -07001349static int ip6_route_del(struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001350{
Thomas Grafc71099a2006-08-04 23:20:06 -07001351 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001352 struct fib6_node *fn;
1353 struct rt6_info *rt;
1354 int err = -ESRCH;
1355
Daniel Lezcano55786892008-03-04 13:47:47 -08001356 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
Thomas Grafc71099a2006-08-04 23:20:06 -07001357 if (table == NULL)
1358 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001359
Thomas Grafc71099a2006-08-04 23:20:06 -07001360 read_lock_bh(&table->tb6_lock);
1361
1362 fn = fib6_locate(&table->tb6_root,
Thomas Graf86872cb2006-08-22 00:01:08 -07001363 &cfg->fc_dst, cfg->fc_dst_len,
1364 &cfg->fc_src, cfg->fc_src_len);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001365
Linus Torvalds1da177e2005-04-16 15:20:36 -07001366 if (fn) {
Eric Dumazet7cc48262007-02-09 16:22:57 -08001367 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
Thomas Graf86872cb2006-08-22 00:01:08 -07001368 if (cfg->fc_ifindex &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001369 (rt->rt6i_dev == NULL ||
Thomas Graf86872cb2006-08-22 00:01:08 -07001370 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001371 continue;
Thomas Graf86872cb2006-08-22 00:01:08 -07001372 if (cfg->fc_flags & RTF_GATEWAY &&
1373 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001374 continue;
Thomas Graf86872cb2006-08-22 00:01:08 -07001375 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001376 continue;
1377 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001378 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001379
Thomas Graf86872cb2006-08-22 00:01:08 -07001380 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001381 }
1382 }
Thomas Grafc71099a2006-08-04 23:20:06 -07001383 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001384
1385 return err;
1386}
1387
1388/*
1389 * Handle redirects
1390 */
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001391struct ip6rd_flowi {
1392 struct flowi fl;
1393 struct in6_addr gateway;
1394};
Linus Torvalds1da177e2005-04-16 15:20:36 -07001395
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001396static struct rt6_info *__ip6_route_redirect(struct net *net,
1397 struct fib6_table *table,
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001398 struct flowi *fl,
1399 int flags)
1400{
1401 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1402 struct rt6_info *rt;
1403 struct fib6_node *fn;
Thomas Grafc71099a2006-08-04 23:20:06 -07001404
Linus Torvalds1da177e2005-04-16 15:20:36 -07001405 /*
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001406 * Get the "current" route for this destination and
1407 * check if the redirect has come from approriate router.
1408 *
1409 * RFC 2461 specifies that redirects should only be
1410 * accepted if they come from the nexthop to the target.
1411 * Due to the way the routes are chosen, this notion
1412 * is a bit fuzzy and one might need to check all possible
1413 * routes.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001414 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001415
Thomas Grafc71099a2006-08-04 23:20:06 -07001416 read_lock_bh(&table->tb6_lock);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001417 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001418restart:
Eric Dumazet7cc48262007-02-09 16:22:57 -08001419 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001420 /*
1421 * Current route is on-link; redirect is always invalid.
1422 *
1423 * Seems, previous statement is not true. It could
1424 * be node, which looks for us as on-link (f.e. proxy ndisc)
1425 * But then router serving it might decide, that we should
1426 * know truth 8)8) --ANK (980726).
1427 */
1428 if (rt6_check_expired(rt))
1429 continue;
1430 if (!(rt->rt6i_flags & RTF_GATEWAY))
1431 continue;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001432 if (fl->oif != rt->rt6i_dev->ifindex)
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001433 continue;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001434 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001435 continue;
1436 break;
1437 }
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001438
YOSHIFUJI Hideakicb15d9c2006-08-23 17:23:11 -07001439 if (!rt)
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001440 rt = net->ipv6.ip6_null_entry;
1441 BACKTRACK(net, &fl->fl6_src);
YOSHIFUJI Hideakicb15d9c2006-08-23 17:23:11 -07001442out:
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001443 dst_hold(&rt->u.dst);
1444
1445 read_unlock_bh(&table->tb6_lock);
1446
1447 return rt;
1448};
1449
1450static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1451 struct in6_addr *src,
1452 struct in6_addr *gateway,
1453 struct net_device *dev)
1454{
Thomas Grafadaa70b2006-10-13 15:01:03 -07001455 int flags = RT6_LOOKUP_F_HAS_SADDR;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001456 struct net *net = dev_net(dev);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001457 struct ip6rd_flowi rdfl = {
1458 .fl = {
1459 .oif = dev->ifindex,
1460 .nl_u = {
1461 .ip6_u = {
1462 .daddr = *dest,
1463 .saddr = *src,
1464 },
1465 },
1466 },
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001467 };
Thomas Grafadaa70b2006-10-13 15:01:03 -07001468
Brian Haley86c36ce2009-10-07 13:58:01 -07001469 ipv6_addr_copy(&rdfl.gateway, gateway);
1470
Thomas Grafadaa70b2006-10-13 15:01:03 -07001471 if (rt6_need_strict(dest))
1472 flags |= RT6_LOOKUP_F_IFACE;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001473
Daniel Lezcano55786892008-03-04 13:47:47 -08001474 return (struct rt6_info *)fib6_rule_lookup(net, (struct flowi *)&rdfl,
Daniel Lezcano58f09b72008-03-03 23:25:27 -08001475 flags, __ip6_route_redirect);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001476}
1477
1478void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1479 struct in6_addr *saddr,
1480 struct neighbour *neigh, u8 *lladdr, int on_link)
1481{
1482 struct rt6_info *rt, *nrt = NULL;
1483 struct netevent_redirect netevent;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001484 struct net *net = dev_net(neigh->dev);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001485
1486 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1487
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001488 if (rt == net->ipv6.ip6_null_entry) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001489 if (net_ratelimit())
1490 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1491 "for redirect target\n");
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001492 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001493 }
1494
Linus Torvalds1da177e2005-04-16 15:20:36 -07001495 /*
1496 * We have finally decided to accept it.
1497 */
1498
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001499 neigh_update(neigh, lladdr, NUD_STALE,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001500 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1501 NEIGH_UPDATE_F_OVERRIDE|
1502 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1503 NEIGH_UPDATE_F_ISROUTER))
1504 );
1505
1506 /*
1507 * Redirect received -> path was valid.
1508 * Look, redirects are sent only in response to data packets,
1509 * so that this nexthop apparently is reachable. --ANK
1510 */
1511 dst_confirm(&rt->u.dst);
1512
1513 /* Duplicate redirect: silently ignore. */
1514 if (neigh == rt->u.dst.neighbour)
1515 goto out;
1516
1517 nrt = ip6_rt_copy(rt);
1518 if (nrt == NULL)
1519 goto out;
1520
1521 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1522 if (on_link)
1523 nrt->rt6i_flags &= ~RTF_GATEWAY;
1524
1525 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1526 nrt->rt6i_dst.plen = 128;
1527 nrt->u.dst.flags |= DST_HOST;
1528
1529 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1530 nrt->rt6i_nexthop = neigh_clone(neigh);
1531 /* Reset pmtu, it may be better */
1532 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001533 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dev_net(neigh->dev),
Daniel Lezcano55786892008-03-04 13:47:47 -08001534 dst_mtu(&nrt->u.dst));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001535
Thomas Graf40e22e82006-08-22 00:00:45 -07001536 if (ip6_ins_rt(nrt))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001537 goto out;
1538
Tom Tucker8d717402006-07-30 20:43:36 -07001539 netevent.old = &rt->u.dst;
1540 netevent.new = &nrt->u.dst;
1541 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1542
Linus Torvalds1da177e2005-04-16 15:20:36 -07001543 if (rt->rt6i_flags&RTF_CACHE) {
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001544 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001545 return;
1546 }
1547
1548out:
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001549 dst_release(&rt->u.dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001550 return;
1551}
1552
1553/*
1554 * Handle ICMP "packet too big" messages
1555 * i.e. Path MTU discovery
1556 */
1557
1558void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1559 struct net_device *dev, u32 pmtu)
1560{
1561 struct rt6_info *rt, *nrt;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001562 struct net *net = dev_net(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001563 int allfrag = 0;
1564
Daniel Lezcano55786892008-03-04 13:47:47 -08001565 rt = rt6_lookup(net, daddr, saddr, dev->ifindex, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001566 if (rt == NULL)
1567 return;
1568
1569 if (pmtu >= dst_mtu(&rt->u.dst))
1570 goto out;
1571
1572 if (pmtu < IPV6_MIN_MTU) {
1573 /*
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001574 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
Linus Torvalds1da177e2005-04-16 15:20:36 -07001575 * MTU (1280) and a fragment header should always be included
1576 * after a node receiving Too Big message reporting PMTU is
1577 * less than the IPv6 Minimum Link MTU.
1578 */
1579 pmtu = IPV6_MIN_MTU;
1580 allfrag = 1;
1581 }
1582
1583 /* New mtu received -> path was valid.
1584 They are sent only in response to data packets,
1585 so that this nexthop apparently is reachable. --ANK
1586 */
1587 dst_confirm(&rt->u.dst);
1588
1589 /* Host route. If it is static, it would be better
1590 not to override it, but add new one, so that
1591 when cache entry will expire old pmtu
1592 would return automatically.
1593 */
1594 if (rt->rt6i_flags & RTF_CACHE) {
1595 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1596 if (allfrag)
1597 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
Daniel Lezcano55786892008-03-04 13:47:47 -08001598 dst_set_expires(&rt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001599 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1600 goto out;
1601 }
1602
1603 /* Network route.
1604 Two cases are possible:
1605 1. It is connected route. Action: COW
1606 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1607 */
YOSHIFUJI Hideakid5315b52006-03-20 16:58:48 -08001608 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001609 nrt = rt6_alloc_cow(rt, daddr, saddr);
YOSHIFUJI Hideakid5315b52006-03-20 16:58:48 -08001610 else
1611 nrt = rt6_alloc_clone(rt, daddr);
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001612
YOSHIFUJI Hideakid5315b52006-03-20 16:58:48 -08001613 if (nrt) {
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001614 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1615 if (allfrag)
1616 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1617
1618 /* According to RFC 1981, detecting PMTU increase shouldn't be
1619 * happened within 5 mins, the recommended timer is 10 mins.
1620 * Here this route expiration time is set to ip6_rt_mtu_expires
1621 * which is 10 mins. After 10 mins the decreased pmtu is expired
1622 * and detecting PMTU increase will be automatically happened.
1623 */
Daniel Lezcano55786892008-03-04 13:47:47 -08001624 dst_set_expires(&nrt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001625 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1626
Thomas Graf40e22e82006-08-22 00:00:45 -07001627 ip6_ins_rt(nrt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001628 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001629out:
1630 dst_release(&rt->u.dst);
1631}
1632
1633/*
1634 * Misc support functions
1635 */
1636
1637static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1638{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001639 struct net *net = dev_net(ort->rt6i_dev);
Alexey Dobriyan86393e52009-08-29 01:34:49 +00001640 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001641
1642 if (rt) {
1643 rt->u.dst.input = ort->u.dst.input;
1644 rt->u.dst.output = ort->u.dst.output;
1645
1646 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
Ville Nuorvala22e1e4d2006-10-16 22:14:26 -07001647 rt->u.dst.error = ort->u.dst.error;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001648 rt->u.dst.dev = ort->u.dst.dev;
1649 if (rt->u.dst.dev)
1650 dev_hold(rt->u.dst.dev);
1651 rt->rt6i_idev = ort->rt6i_idev;
1652 if (rt->rt6i_idev)
1653 in6_dev_hold(rt->rt6i_idev);
1654 rt->u.dst.lastuse = jiffies;
1655 rt->rt6i_expires = 0;
1656
1657 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1658 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1659 rt->rt6i_metric = 0;
1660
1661 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1662#ifdef CONFIG_IPV6_SUBTREES
1663 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1664#endif
Thomas Grafc71099a2006-08-04 23:20:06 -07001665 rt->rt6i_table = ort->rt6i_table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001666 }
1667 return rt;
1668}
1669
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001670#ifdef CONFIG_IPV6_ROUTE_INFO
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001671static struct rt6_info *rt6_get_route_info(struct net *net,
1672 struct in6_addr *prefix, int prefixlen,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001673 struct in6_addr *gwaddr, int ifindex)
1674{
1675 struct fib6_node *fn;
1676 struct rt6_info *rt = NULL;
Thomas Grafc71099a2006-08-04 23:20:06 -07001677 struct fib6_table *table;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001678
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001679 table = fib6_get_table(net, RT6_TABLE_INFO);
Thomas Grafc71099a2006-08-04 23:20:06 -07001680 if (table == NULL)
1681 return NULL;
1682
1683 write_lock_bh(&table->tb6_lock);
1684 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001685 if (!fn)
1686 goto out;
1687
Eric Dumazet7cc48262007-02-09 16:22:57 -08001688 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001689 if (rt->rt6i_dev->ifindex != ifindex)
1690 continue;
1691 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1692 continue;
1693 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1694 continue;
1695 dst_hold(&rt->u.dst);
1696 break;
1697 }
1698out:
Thomas Grafc71099a2006-08-04 23:20:06 -07001699 write_unlock_bh(&table->tb6_lock);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001700 return rt;
1701}
1702
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001703static struct rt6_info *rt6_add_route_info(struct net *net,
1704 struct in6_addr *prefix, int prefixlen,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001705 struct in6_addr *gwaddr, int ifindex,
1706 unsigned pref)
1707{
Thomas Graf86872cb2006-08-22 00:01:08 -07001708 struct fib6_config cfg = {
1709 .fc_table = RT6_TABLE_INFO,
Rami Rosen238fc7e2008-02-09 23:43:11 -08001710 .fc_metric = IP6_RT_PRIO_USER,
Thomas Graf86872cb2006-08-22 00:01:08 -07001711 .fc_ifindex = ifindex,
1712 .fc_dst_len = prefixlen,
1713 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1714 RTF_UP | RTF_PREF(pref),
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001715 .fc_nlinfo.pid = 0,
1716 .fc_nlinfo.nlh = NULL,
1717 .fc_nlinfo.nl_net = net,
Thomas Graf86872cb2006-08-22 00:01:08 -07001718 };
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001719
Thomas Graf86872cb2006-08-22 00:01:08 -07001720 ipv6_addr_copy(&cfg.fc_dst, prefix);
1721 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1722
YOSHIFUJI Hideakie317da92006-03-20 17:06:42 -08001723 /* We should treat it as a default route if prefix length is 0. */
1724 if (!prefixlen)
Thomas Graf86872cb2006-08-22 00:01:08 -07001725 cfg.fc_flags |= RTF_DEFAULT;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001726
Thomas Graf86872cb2006-08-22 00:01:08 -07001727 ip6_route_add(&cfg);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001728
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001729 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001730}
1731#endif
1732
Linus Torvalds1da177e2005-04-16 15:20:36 -07001733struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001734{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001735 struct rt6_info *rt;
Thomas Grafc71099a2006-08-04 23:20:06 -07001736 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001737
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001738 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
Thomas Grafc71099a2006-08-04 23:20:06 -07001739 if (table == NULL)
1740 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001741
Thomas Grafc71099a2006-08-04 23:20:06 -07001742 write_lock_bh(&table->tb6_lock);
Eric Dumazet7cc48262007-02-09 16:22:57 -08001743 for (rt = table->tb6_root.leaf; rt; rt=rt->u.dst.rt6_next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001744 if (dev == rt->rt6i_dev &&
YOSHIFUJI Hideaki045927f2006-03-20 17:00:48 -08001745 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001746 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1747 break;
1748 }
1749 if (rt)
1750 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001751 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001752 return rt;
1753}
1754
1755struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -08001756 struct net_device *dev,
1757 unsigned int pref)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001758{
Thomas Graf86872cb2006-08-22 00:01:08 -07001759 struct fib6_config cfg = {
1760 .fc_table = RT6_TABLE_DFLT,
Rami Rosen238fc7e2008-02-09 23:43:11 -08001761 .fc_metric = IP6_RT_PRIO_USER,
Thomas Graf86872cb2006-08-22 00:01:08 -07001762 .fc_ifindex = dev->ifindex,
1763 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1764 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
Daniel Lezcano55786892008-03-04 13:47:47 -08001765 .fc_nlinfo.pid = 0,
1766 .fc_nlinfo.nlh = NULL,
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001767 .fc_nlinfo.nl_net = dev_net(dev),
Thomas Graf86872cb2006-08-22 00:01:08 -07001768 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07001769
Thomas Graf86872cb2006-08-22 00:01:08 -07001770 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001771
Thomas Graf86872cb2006-08-22 00:01:08 -07001772 ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001773
Linus Torvalds1da177e2005-04-16 15:20:36 -07001774 return rt6_get_dflt_router(gwaddr, dev);
1775}
1776
Daniel Lezcano7b4da532008-03-04 13:47:14 -08001777void rt6_purge_dflt_routers(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001778{
1779 struct rt6_info *rt;
Thomas Grafc71099a2006-08-04 23:20:06 -07001780 struct fib6_table *table;
1781
1782 /* NOTE: Keep consistent with rt6_get_dflt_router */
Daniel Lezcano7b4da532008-03-04 13:47:14 -08001783 table = fib6_get_table(net, RT6_TABLE_DFLT);
Thomas Grafc71099a2006-08-04 23:20:06 -07001784 if (table == NULL)
1785 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001786
1787restart:
Thomas Grafc71099a2006-08-04 23:20:06 -07001788 read_lock_bh(&table->tb6_lock);
Eric Dumazet7cc48262007-02-09 16:22:57 -08001789 for (rt = table->tb6_root.leaf; rt; rt = rt->u.dst.rt6_next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001790 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1791 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001792 read_unlock_bh(&table->tb6_lock);
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001793 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001794 goto restart;
1795 }
1796 }
Thomas Grafc71099a2006-08-04 23:20:06 -07001797 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001798}
1799
Daniel Lezcano55786892008-03-04 13:47:47 -08001800static void rtmsg_to_fib6_config(struct net *net,
1801 struct in6_rtmsg *rtmsg,
Thomas Graf86872cb2006-08-22 00:01:08 -07001802 struct fib6_config *cfg)
1803{
1804 memset(cfg, 0, sizeof(*cfg));
1805
1806 cfg->fc_table = RT6_TABLE_MAIN;
1807 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1808 cfg->fc_metric = rtmsg->rtmsg_metric;
1809 cfg->fc_expires = rtmsg->rtmsg_info;
1810 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1811 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1812 cfg->fc_flags = rtmsg->rtmsg_flags;
1813
Daniel Lezcano55786892008-03-04 13:47:47 -08001814 cfg->fc_nlinfo.nl_net = net;
Benjamin Theryf1243c22008-02-26 18:10:03 -08001815
Thomas Graf86872cb2006-08-22 00:01:08 -07001816 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1817 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1818 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1819}
1820
Daniel Lezcano55786892008-03-04 13:47:47 -08001821int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001822{
Thomas Graf86872cb2006-08-22 00:01:08 -07001823 struct fib6_config cfg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001824 struct in6_rtmsg rtmsg;
1825 int err;
1826
1827 switch(cmd) {
1828 case SIOCADDRT: /* Add a route */
1829 case SIOCDELRT: /* Delete a route */
1830 if (!capable(CAP_NET_ADMIN))
1831 return -EPERM;
1832 err = copy_from_user(&rtmsg, arg,
1833 sizeof(struct in6_rtmsg));
1834 if (err)
1835 return -EFAULT;
Thomas Graf86872cb2006-08-22 00:01:08 -07001836
Daniel Lezcano55786892008-03-04 13:47:47 -08001837 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
Thomas Graf86872cb2006-08-22 00:01:08 -07001838
Linus Torvalds1da177e2005-04-16 15:20:36 -07001839 rtnl_lock();
1840 switch (cmd) {
1841 case SIOCADDRT:
Thomas Graf86872cb2006-08-22 00:01:08 -07001842 err = ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001843 break;
1844 case SIOCDELRT:
Thomas Graf86872cb2006-08-22 00:01:08 -07001845 err = ip6_route_del(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001846 break;
1847 default:
1848 err = -EINVAL;
1849 }
1850 rtnl_unlock();
1851
1852 return err;
Stephen Hemminger3ff50b72007-04-20 17:09:22 -07001853 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001854
1855 return -EINVAL;
1856}
1857
1858/*
1859 * Drop the packet on the floor
1860 */
1861
Brian Haleyd5fdd6b2009-06-23 04:31:07 -07001862static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001863{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001864 int type;
Eric Dumazetadf30902009-06-02 05:19:30 +00001865 struct dst_entry *dst = skb_dst(skb);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001866 switch (ipstats_mib_noroutes) {
1867 case IPSTATS_MIB_INNOROUTES:
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -07001868 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
Ulrich Weber45bb0062010-02-25 23:28:58 +00001869 if (type == IPV6_ADDR_ANY) {
Denis V. Lunev3bd653c2008-10-08 10:54:51 -07001870 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1871 IPSTATS_MIB_INADDRERRORS);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001872 break;
1873 }
1874 /* FALLTHROUGH */
1875 case IPSTATS_MIB_OUTNOROUTES:
Denis V. Lunev3bd653c2008-10-08 10:54:51 -07001876 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1877 ipstats_mib_noroutes);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001878 break;
1879 }
Alexey Dobriyan3ffe5332010-02-18 08:25:24 +00001880 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001881 kfree_skb(skb);
1882 return 0;
1883}
1884
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001885static int ip6_pkt_discard(struct sk_buff *skb)
1886{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001887 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001888}
1889
Arnaldo Carvalho de Melo20380732005-08-16 02:18:02 -03001890static int ip6_pkt_discard_out(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001891{
Eric Dumazetadf30902009-06-02 05:19:30 +00001892 skb->dev = skb_dst(skb)->dev;
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001893 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001894}
1895
David S. Miller6723ab52006-10-18 21:20:57 -07001896#ifdef CONFIG_IPV6_MULTIPLE_TABLES
1897
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001898static int ip6_pkt_prohibit(struct sk_buff *skb)
1899{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001900 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001901}
1902
1903static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1904{
Eric Dumazetadf30902009-06-02 05:19:30 +00001905 skb->dev = skb_dst(skb)->dev;
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001906 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001907}
1908
David S. Miller6723ab52006-10-18 21:20:57 -07001909#endif
1910
Linus Torvalds1da177e2005-04-16 15:20:36 -07001911/*
1912 * Allocate a dst for local (unicast / anycast) address.
1913 */
1914
1915struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1916 const struct in6_addr *addr,
1917 int anycast)
1918{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001919 struct net *net = dev_net(idev->dev);
Alexey Dobriyan86393e52009-08-29 01:34:49 +00001920 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
David S. Miller14deae42009-01-04 16:04:39 -08001921 struct neighbour *neigh;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001922
1923 if (rt == NULL)
1924 return ERR_PTR(-ENOMEM);
1925
Daniel Lezcano55786892008-03-04 13:47:47 -08001926 dev_hold(net->loopback_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001927 in6_dev_hold(idev);
1928
1929 rt->u.dst.flags = DST_HOST;
1930 rt->u.dst.input = ip6_input;
1931 rt->u.dst.output = ip6_output;
Daniel Lezcano55786892008-03-04 13:47:47 -08001932 rt->rt6i_dev = net->loopback_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001933 rt->rt6i_idev = idev;
1934 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
Daniel Lezcano55786892008-03-04 13:47:47 -08001935 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001936 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1937 rt->u.dst.obsolete = -1;
1938
1939 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +09001940 if (anycast)
1941 rt->rt6i_flags |= RTF_ANYCAST;
1942 else
Linus Torvalds1da177e2005-04-16 15:20:36 -07001943 rt->rt6i_flags |= RTF_LOCAL;
David S. Miller14deae42009-01-04 16:04:39 -08001944 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1945 if (IS_ERR(neigh)) {
YOSHIFUJI Hideaki40aa7b92006-10-19 13:50:09 +09001946 dst_free(&rt->u.dst);
David S. Miller14deae42009-01-04 16:04:39 -08001947
1948 /* We are casting this because that is the return
1949 * value type. But an errno encoded pointer is the
1950 * same regardless of the underlying pointer type,
1951 * and that's what we are returning. So this is OK.
1952 */
1953 return (struct rt6_info *) neigh;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001954 }
David S. Miller14deae42009-01-04 16:04:39 -08001955 rt->rt6i_nexthop = neigh;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001956
1957 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1958 rt->rt6i_dst.plen = 128;
Daniel Lezcano55786892008-03-04 13:47:47 -08001959 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001960
1961 atomic_set(&rt->u.dst.__refcnt, 1);
1962
1963 return rt;
1964}
1965
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001966struct arg_dev_net {
1967 struct net_device *dev;
1968 struct net *net;
1969};
1970
Linus Torvalds1da177e2005-04-16 15:20:36 -07001971static int fib6_ifdown(struct rt6_info *rt, void *arg)
1972{
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001973 struct net_device *dev = ((struct arg_dev_net *)arg)->dev;
1974 struct net *net = ((struct arg_dev_net *)arg)->net;
1975
1976 if (((void *)rt->rt6i_dev == dev || dev == NULL) &&
1977 rt != net->ipv6.ip6_null_entry) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001978 RT6_TRACE("deleted by ifdown %p\n", rt);
1979 return -1;
1980 }
1981 return 0;
1982}
1983
Daniel Lezcanof3db4852008-03-03 23:27:06 -08001984void rt6_ifdown(struct net *net, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001985{
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001986 struct arg_dev_net adn = {
1987 .dev = dev,
1988 .net = net,
1989 };
1990
1991 fib6_clean_all(net, fib6_ifdown, 0, &adn);
David S. Miller1e493d12008-09-10 17:27:15 -07001992 icmp6_clean_all(fib6_ifdown, &adn);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001993}
1994
1995struct rt6_mtu_change_arg
1996{
1997 struct net_device *dev;
1998 unsigned mtu;
1999};
2000
2001static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2002{
2003 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2004 struct inet6_dev *idev;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09002005 struct net *net = dev_net(arg->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002006
2007 /* In IPv6 pmtu discovery is not optional,
2008 so that RTAX_MTU lock cannot disable it.
2009 We still use this lock to block changes
2010 caused by addrconf/ndisc.
2011 */
2012
2013 idev = __in6_dev_get(arg->dev);
2014 if (idev == NULL)
2015 return 0;
2016
2017 /* For administrative MTU increase, there is no way to discover
2018 IPv6 PMTU increase, so PMTU increase should be updated here.
2019 Since RFC 1981 doesn't include administrative MTU increase
2020 update PMTU increase is a MUST. (i.e. jumbo frame)
2021 */
2022 /*
2023 If new MTU is less than route PMTU, this new MTU will be the
2024 lowest MTU in the path, update the route PMTU to reflect PMTU
2025 decreases; if new MTU is greater than route PMTU, and the
2026 old MTU is the lowest MTU in the path, update the route PMTU
2027 to reflect the increase. In this case if the other nodes' MTU
2028 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2029 PMTU discouvery.
2030 */
2031 if (rt->rt6i_dev == arg->dev &&
2032 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
Jim Paris23717792008-01-31 16:36:25 -08002033 (dst_mtu(&rt->u.dst) >= arg->mtu ||
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002034 (dst_mtu(&rt->u.dst) < arg->mtu &&
Simon Arlott566cfd82007-07-26 00:09:55 -07002035 dst_mtu(&rt->u.dst) == idev->cnf.mtu6))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002036 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
Daniel Lezcano55786892008-03-04 13:47:47 -08002037 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, arg->mtu);
Simon Arlott566cfd82007-07-26 00:09:55 -07002038 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002039 return 0;
2040}
2041
2042void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2043{
Thomas Grafc71099a2006-08-04 23:20:06 -07002044 struct rt6_mtu_change_arg arg = {
2045 .dev = dev,
2046 .mtu = mtu,
2047 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07002048
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09002049 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002050}
2051
Patrick McHardyef7c79e2007-06-05 12:38:30 -07002052static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
Thomas Graf5176f912006-08-26 20:13:18 -07002053 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
Thomas Graf86872cb2006-08-22 00:01:08 -07002054 [RTA_OIF] = { .type = NLA_U32 },
Thomas Grafab364a62006-08-22 00:01:47 -07002055 [RTA_IIF] = { .type = NLA_U32 },
Thomas Graf86872cb2006-08-22 00:01:08 -07002056 [RTA_PRIORITY] = { .type = NLA_U32 },
2057 [RTA_METRICS] = { .type = NLA_NESTED },
2058};
2059
2060static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2061 struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002062{
Thomas Graf86872cb2006-08-22 00:01:08 -07002063 struct rtmsg *rtm;
2064 struct nlattr *tb[RTA_MAX+1];
2065 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002066
Thomas Graf86872cb2006-08-22 00:01:08 -07002067 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2068 if (err < 0)
2069 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002070
Thomas Graf86872cb2006-08-22 00:01:08 -07002071 err = -EINVAL;
2072 rtm = nlmsg_data(nlh);
2073 memset(cfg, 0, sizeof(*cfg));
2074
2075 cfg->fc_table = rtm->rtm_table;
2076 cfg->fc_dst_len = rtm->rtm_dst_len;
2077 cfg->fc_src_len = rtm->rtm_src_len;
2078 cfg->fc_flags = RTF_UP;
2079 cfg->fc_protocol = rtm->rtm_protocol;
2080
2081 if (rtm->rtm_type == RTN_UNREACHABLE)
2082 cfg->fc_flags |= RTF_REJECT;
2083
2084 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2085 cfg->fc_nlinfo.nlh = nlh;
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09002086 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
Thomas Graf86872cb2006-08-22 00:01:08 -07002087
2088 if (tb[RTA_GATEWAY]) {
2089 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2090 cfg->fc_flags |= RTF_GATEWAY;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002091 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002092
2093 if (tb[RTA_DST]) {
2094 int plen = (rtm->rtm_dst_len + 7) >> 3;
2095
2096 if (nla_len(tb[RTA_DST]) < plen)
2097 goto errout;
2098
2099 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002100 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002101
2102 if (tb[RTA_SRC]) {
2103 int plen = (rtm->rtm_src_len + 7) >> 3;
2104
2105 if (nla_len(tb[RTA_SRC]) < plen)
2106 goto errout;
2107
2108 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002109 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002110
2111 if (tb[RTA_OIF])
2112 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2113
2114 if (tb[RTA_PRIORITY])
2115 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2116
2117 if (tb[RTA_METRICS]) {
2118 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2119 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002120 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002121
2122 if (tb[RTA_TABLE])
2123 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2124
2125 err = 0;
2126errout:
2127 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002128}
2129
Thomas Grafc127ea22007-03-22 11:58:32 -07002130static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002131{
Thomas Graf86872cb2006-08-22 00:01:08 -07002132 struct fib6_config cfg;
2133 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002134
Thomas Graf86872cb2006-08-22 00:01:08 -07002135 err = rtm_to_fib6_config(skb, nlh, &cfg);
2136 if (err < 0)
2137 return err;
2138
2139 return ip6_route_del(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002140}
2141
Thomas Grafc127ea22007-03-22 11:58:32 -07002142static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002143{
Thomas Graf86872cb2006-08-22 00:01:08 -07002144 struct fib6_config cfg;
2145 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002146
Thomas Graf86872cb2006-08-22 00:01:08 -07002147 err = rtm_to_fib6_config(skb, nlh, &cfg);
2148 if (err < 0)
2149 return err;
2150
2151 return ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002152}
2153
Thomas Graf339bf982006-11-10 14:10:15 -08002154static inline size_t rt6_nlmsg_size(void)
2155{
2156 return NLMSG_ALIGN(sizeof(struct rtmsg))
2157 + nla_total_size(16) /* RTA_SRC */
2158 + nla_total_size(16) /* RTA_DST */
2159 + nla_total_size(16) /* RTA_GATEWAY */
2160 + nla_total_size(16) /* RTA_PREFSRC */
2161 + nla_total_size(4) /* RTA_TABLE */
2162 + nla_total_size(4) /* RTA_IIF */
2163 + nla_total_size(4) /* RTA_OIF */
2164 + nla_total_size(4) /* RTA_PRIORITY */
Noriaki TAKAMIYA6a2b9ce2007-01-23 22:09:41 -08002165 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
Thomas Graf339bf982006-11-10 14:10:15 -08002166 + nla_total_size(sizeof(struct rta_cacheinfo));
2167}
2168
Brian Haley191cd582008-08-14 15:33:21 -07002169static int rt6_fill_node(struct net *net,
2170 struct sk_buff *skb, struct rt6_info *rt,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07002171 struct in6_addr *dst, struct in6_addr *src,
2172 int iif, int type, u32 pid, u32 seq,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002173 int prefix, int nowait, unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002174{
2175 struct rtmsg *rtm;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002176 struct nlmsghdr *nlh;
Thomas Grafe3703b32006-11-27 09:27:07 -08002177 long expires;
Patrick McHardy9e762a42006-08-10 23:09:48 -07002178 u32 table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002179
2180 if (prefix) { /* user wants prefix routes only */
2181 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2182 /* success since this is not a prefix route */
2183 return 1;
2184 }
2185 }
2186
Thomas Graf2d7202b2006-08-22 00:01:27 -07002187 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2188 if (nlh == NULL)
Patrick McHardy26932562007-01-31 23:16:40 -08002189 return -EMSGSIZE;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002190
2191 rtm = nlmsg_data(nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002192 rtm->rtm_family = AF_INET6;
2193 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2194 rtm->rtm_src_len = rt->rt6i_src.plen;
2195 rtm->rtm_tos = 0;
Thomas Grafc71099a2006-08-04 23:20:06 -07002196 if (rt->rt6i_table)
Patrick McHardy9e762a42006-08-10 23:09:48 -07002197 table = rt->rt6i_table->tb6_id;
Thomas Grafc71099a2006-08-04 23:20:06 -07002198 else
Patrick McHardy9e762a42006-08-10 23:09:48 -07002199 table = RT6_TABLE_UNSPEC;
2200 rtm->rtm_table = table;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002201 NLA_PUT_U32(skb, RTA_TABLE, table);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002202 if (rt->rt6i_flags&RTF_REJECT)
2203 rtm->rtm_type = RTN_UNREACHABLE;
2204 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2205 rtm->rtm_type = RTN_LOCAL;
2206 else
2207 rtm->rtm_type = RTN_UNICAST;
2208 rtm->rtm_flags = 0;
2209 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2210 rtm->rtm_protocol = rt->rt6i_protocol;
2211 if (rt->rt6i_flags&RTF_DYNAMIC)
2212 rtm->rtm_protocol = RTPROT_REDIRECT;
2213 else if (rt->rt6i_flags & RTF_ADDRCONF)
2214 rtm->rtm_protocol = RTPROT_KERNEL;
2215 else if (rt->rt6i_flags&RTF_DEFAULT)
2216 rtm->rtm_protocol = RTPROT_RA;
2217
2218 if (rt->rt6i_flags&RTF_CACHE)
2219 rtm->rtm_flags |= RTM_F_CLONED;
2220
2221 if (dst) {
Thomas Graf2d7202b2006-08-22 00:01:27 -07002222 NLA_PUT(skb, RTA_DST, 16, dst);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002223 rtm->rtm_dst_len = 128;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002224 } else if (rtm->rtm_dst_len)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002225 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002226#ifdef CONFIG_IPV6_SUBTREES
2227 if (src) {
Thomas Graf2d7202b2006-08-22 00:01:27 -07002228 NLA_PUT(skb, RTA_SRC, 16, src);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002229 rtm->rtm_src_len = 128;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002230 } else if (rtm->rtm_src_len)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002231 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002232#endif
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002233 if (iif) {
2234#ifdef CONFIG_IPV6_MROUTE
2235 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
Benjamin Thery8229efd2008-12-10 16:30:15 -08002236 int err = ip6mr_get_route(net, skb, rtm, nowait);
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002237 if (err <= 0) {
2238 if (!nowait) {
2239 if (err == 0)
2240 return 0;
2241 goto nla_put_failure;
2242 } else {
2243 if (err == -EMSGSIZE)
2244 goto nla_put_failure;
2245 }
2246 }
2247 } else
2248#endif
2249 NLA_PUT_U32(skb, RTA_IIF, iif);
2250 } else if (dst) {
Brian Haley5e0115e2008-08-13 01:58:57 -07002251 struct inet6_dev *idev = ip6_dst_idev(&rt->u.dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002252 struct in6_addr saddr_buf;
Brian Haley191cd582008-08-14 15:33:21 -07002253 if (ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
YOSHIFUJI Hideaki7cbca672008-03-25 09:37:42 +09002254 dst, 0, &saddr_buf) == 0)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002255 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002256 }
Thomas Graf2d7202b2006-08-22 00:01:27 -07002257
Linus Torvalds1da177e2005-04-16 15:20:36 -07002258 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002259 goto nla_put_failure;
2260
Linus Torvalds1da177e2005-04-16 15:20:36 -07002261 if (rt->u.dst.neighbour)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002262 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
2263
Linus Torvalds1da177e2005-04-16 15:20:36 -07002264 if (rt->u.dst.dev)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002265 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2266
2267 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
Thomas Grafe3703b32006-11-27 09:27:07 -08002268
YOSHIFUJI Hideaki36e3dea2008-05-13 02:52:55 +09002269 if (!(rt->rt6i_flags & RTF_EXPIRES))
2270 expires = 0;
2271 else if (rt->rt6i_expires - jiffies < INT_MAX)
2272 expires = rt->rt6i_expires - jiffies;
2273 else
2274 expires = INT_MAX;
YOSHIFUJI Hideaki69cdf8f2008-05-19 16:55:13 -07002275
Thomas Grafe3703b32006-11-27 09:27:07 -08002276 if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0,
2277 expires, rt->u.dst.error) < 0)
2278 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002279
Thomas Graf2d7202b2006-08-22 00:01:27 -07002280 return nlmsg_end(skb, nlh);
2281
2282nla_put_failure:
Patrick McHardy26932562007-01-31 23:16:40 -08002283 nlmsg_cancel(skb, nlh);
2284 return -EMSGSIZE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002285}
2286
Patrick McHardy1b43af52006-08-10 23:11:17 -07002287int rt6_dump_route(struct rt6_info *rt, void *p_arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002288{
2289 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2290 int prefix;
2291
Thomas Graf2d7202b2006-08-22 00:01:27 -07002292 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2293 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002294 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2295 } else
2296 prefix = 0;
2297
Brian Haley191cd582008-08-14 15:33:21 -07002298 return rt6_fill_node(arg->net,
2299 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002300 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002301 prefix, 0, NLM_F_MULTI);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002302}
2303
Thomas Grafc127ea22007-03-22 11:58:32 -07002304static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002305{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09002306 struct net *net = sock_net(in_skb->sk);
Thomas Grafab364a62006-08-22 00:01:47 -07002307 struct nlattr *tb[RTA_MAX+1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07002308 struct rt6_info *rt;
Thomas Grafab364a62006-08-22 00:01:47 -07002309 struct sk_buff *skb;
2310 struct rtmsg *rtm;
2311 struct flowi fl;
2312 int err, iif = 0;
2313
2314 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2315 if (err < 0)
2316 goto errout;
2317
2318 err = -EINVAL;
2319 memset(&fl, 0, sizeof(fl));
2320
2321 if (tb[RTA_SRC]) {
2322 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2323 goto errout;
2324
2325 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2326 }
2327
2328 if (tb[RTA_DST]) {
2329 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2330 goto errout;
2331
2332 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2333 }
2334
2335 if (tb[RTA_IIF])
2336 iif = nla_get_u32(tb[RTA_IIF]);
2337
2338 if (tb[RTA_OIF])
2339 fl.oif = nla_get_u32(tb[RTA_OIF]);
2340
2341 if (iif) {
2342 struct net_device *dev;
Daniel Lezcano55786892008-03-04 13:47:47 -08002343 dev = __dev_get_by_index(net, iif);
Thomas Grafab364a62006-08-22 00:01:47 -07002344 if (!dev) {
2345 err = -ENODEV;
2346 goto errout;
2347 }
2348 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002349
2350 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
Thomas Grafab364a62006-08-22 00:01:47 -07002351 if (skb == NULL) {
2352 err = -ENOBUFS;
2353 goto errout;
2354 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002355
2356 /* Reserve room for dummy headers, this skb can pass
2357 through good chunk of routing engine.
2358 */
Arnaldo Carvalho de Melo459a98e2007-03-19 15:30:44 -07002359 skb_reset_mac_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002360 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2361
Daniel Lezcano8a3edd82008-03-07 11:14:16 -08002362 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl);
Eric Dumazetadf30902009-06-02 05:19:30 +00002363 skb_dst_set(skb, &rt->u.dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002364
Brian Haley191cd582008-08-14 15:33:21 -07002365 err = rt6_fill_node(net, skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002366 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002367 nlh->nlmsg_seq, 0, 0, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002368 if (err < 0) {
Thomas Grafab364a62006-08-22 00:01:47 -07002369 kfree_skb(skb);
2370 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002371 }
2372
Daniel Lezcano55786892008-03-04 13:47:47 -08002373 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
Thomas Grafab364a62006-08-22 00:01:47 -07002374errout:
Linus Torvalds1da177e2005-04-16 15:20:36 -07002375 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002376}
2377
Thomas Graf86872cb2006-08-22 00:01:08 -07002378void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002379{
2380 struct sk_buff *skb;
Daniel Lezcano55786892008-03-04 13:47:47 -08002381 struct net *net = info->nl_net;
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08002382 u32 seq;
2383 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002384
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08002385 err = -ENOBUFS;
2386 seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
Thomas Graf86872cb2006-08-22 00:01:08 -07002387
Thomas Graf339bf982006-11-10 14:10:15 -08002388 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
Thomas Graf21713eb2006-08-15 00:35:24 -07002389 if (skb == NULL)
2390 goto errout;
2391
Brian Haley191cd582008-08-14 15:33:21 -07002392 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002393 event, info->pid, seq, 0, 0, 0);
Patrick McHardy26932562007-01-31 23:16:40 -08002394 if (err < 0) {
2395 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2396 WARN_ON(err == -EMSGSIZE);
2397 kfree_skb(skb);
2398 goto errout;
2399 }
Pablo Neira Ayuso1ce85fe2009-02-24 23:18:28 -08002400 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2401 info->nlh, gfp_any());
2402 return;
Thomas Graf21713eb2006-08-15 00:35:24 -07002403errout:
2404 if (err < 0)
Daniel Lezcano55786892008-03-04 13:47:47 -08002405 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002406}
2407
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002408static int ip6_route_dev_notify(struct notifier_block *this,
2409 unsigned long event, void *data)
2410{
2411 struct net_device *dev = (struct net_device *)data;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09002412 struct net *net = dev_net(dev);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002413
2414 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2415 net->ipv6.ip6_null_entry->u.dst.dev = dev;
2416 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2417#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2418 net->ipv6.ip6_prohibit_entry->u.dst.dev = dev;
2419 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2420 net->ipv6.ip6_blk_hole_entry->u.dst.dev = dev;
2421 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2422#endif
2423 }
2424
2425 return NOTIFY_OK;
2426}
2427
Linus Torvalds1da177e2005-04-16 15:20:36 -07002428/*
2429 * /proc
2430 */
2431
2432#ifdef CONFIG_PROC_FS
2433
2434#define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2435
2436struct rt6_proc_arg
2437{
2438 char *buffer;
2439 int offset;
2440 int length;
2441 int skip;
2442 int len;
2443};
2444
2445static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2446{
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002447 struct seq_file *m = p_arg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002448
Harvey Harrison4b7a4272008-10-29 12:50:24 -07002449 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002450
2451#ifdef CONFIG_IPV6_SUBTREES
Harvey Harrison4b7a4272008-10-29 12:50:24 -07002452 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002453#else
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002454 seq_puts(m, "00000000000000000000000000000000 00 ");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002455#endif
2456
2457 if (rt->rt6i_nexthop) {
Harvey Harrison4b7a4272008-10-29 12:50:24 -07002458 seq_printf(m, "%pi6", rt->rt6i_nexthop->primary_key);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002459 } else {
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002460 seq_puts(m, "00000000000000000000000000000000");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002461 }
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002462 seq_printf(m, " %08x %08x %08x %08x %8s\n",
2463 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
2464 rt->u.dst.__use, rt->rt6i_flags,
2465 rt->rt6i_dev ? rt->rt6i_dev->name : "");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002466 return 0;
2467}
2468
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002469static int ipv6_route_show(struct seq_file *m, void *v)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002470{
Daniel Lezcanof3db4852008-03-03 23:27:06 -08002471 struct net *net = (struct net *)m->private;
2472 fib6_clean_all(net, rt6_info_route, 0, m);
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002473 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002474}
2475
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002476static int ipv6_route_open(struct inode *inode, struct file *file)
2477{
Pavel Emelyanovde05c552008-07-18 04:07:21 -07002478 return single_open_net(inode, file, ipv6_route_show);
Daniel Lezcanof3db4852008-03-03 23:27:06 -08002479}
2480
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002481static const struct file_operations ipv6_route_proc_fops = {
2482 .owner = THIS_MODULE,
2483 .open = ipv6_route_open,
2484 .read = seq_read,
2485 .llseek = seq_lseek,
Pavel Emelyanovb6fcbdb2008-07-18 04:07:44 -07002486 .release = single_release_net,
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002487};
2488
Linus Torvalds1da177e2005-04-16 15:20:36 -07002489static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2490{
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002491 struct net *net = (struct net *)seq->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002492 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002493 net->ipv6.rt6_stats->fib_nodes,
2494 net->ipv6.rt6_stats->fib_route_nodes,
2495 net->ipv6.rt6_stats->fib_rt_alloc,
2496 net->ipv6.rt6_stats->fib_rt_entries,
2497 net->ipv6.rt6_stats->fib_rt_cache,
Alexey Dobriyan86393e52009-08-29 01:34:49 +00002498 atomic_read(&net->ipv6.ip6_dst_ops.entries),
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002499 net->ipv6.rt6_stats->fib_discarded_routes);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002500
2501 return 0;
2502}
2503
2504static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2505{
Pavel Emelyanovde05c552008-07-18 04:07:21 -07002506 return single_open_net(inode, file, rt6_stats_seq_show);
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002507}
2508
Arjan van de Ven9a321442007-02-12 00:55:35 -08002509static const struct file_operations rt6_stats_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002510 .owner = THIS_MODULE,
2511 .open = rt6_stats_seq_open,
2512 .read = seq_read,
2513 .llseek = seq_lseek,
Pavel Emelyanovb6fcbdb2008-07-18 04:07:44 -07002514 .release = single_release_net,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002515};
2516#endif /* CONFIG_PROC_FS */
2517
2518#ifdef CONFIG_SYSCTL
2519
Linus Torvalds1da177e2005-04-16 15:20:36 -07002520static
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07002521int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002522 void __user *buffer, size_t *lenp, loff_t *ppos)
2523{
Daniel Lezcano5b7c9312008-03-03 23:28:58 -08002524 struct net *net = current->nsproxy->net_ns;
2525 int delay = net->ipv6.sysctl.flush_delay;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002526 if (write) {
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07002527 proc_dointvec(ctl, write, buffer, lenp, ppos);
Daniel Lezcano5b7c9312008-03-03 23:28:58 -08002528 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002529 return 0;
2530 } else
2531 return -EINVAL;
2532}
2533
Daniel Lezcano760f2d02008-01-10 02:53:43 -08002534ctl_table ipv6_route_table_template[] = {
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002535 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002536 .procname = "flush",
Daniel Lezcano49905092008-01-10 03:01:01 -08002537 .data = &init_net.ipv6.sysctl.flush_delay,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002538 .maxlen = sizeof(int),
Dave Jones89c8b3a2005-04-28 12:11:49 -07002539 .mode = 0200,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002540 .proc_handler = ipv6_sysctl_rtcache_flush
Linus Torvalds1da177e2005-04-16 15:20:36 -07002541 },
2542 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002543 .procname = "gc_thresh",
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -08002544 .data = &ip6_dst_ops_template.gc_thresh,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002545 .maxlen = sizeof(int),
2546 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002547 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002548 },
2549 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002550 .procname = "max_size",
Daniel Lezcano49905092008-01-10 03:01:01 -08002551 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002552 .maxlen = sizeof(int),
2553 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002554 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002555 },
2556 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002557 .procname = "gc_min_interval",
Daniel Lezcano49905092008-01-10 03:01:01 -08002558 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002559 .maxlen = sizeof(int),
2560 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002561 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002562 },
2563 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002564 .procname = "gc_timeout",
Daniel Lezcano49905092008-01-10 03:01:01 -08002565 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002566 .maxlen = sizeof(int),
2567 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002568 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002569 },
2570 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002571 .procname = "gc_interval",
Daniel Lezcano49905092008-01-10 03:01:01 -08002572 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002573 .maxlen = sizeof(int),
2574 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002575 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002576 },
2577 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002578 .procname = "gc_elasticity",
Daniel Lezcano49905092008-01-10 03:01:01 -08002579 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002580 .maxlen = sizeof(int),
2581 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002582 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002583 },
2584 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002585 .procname = "mtu_expires",
Daniel Lezcano49905092008-01-10 03:01:01 -08002586 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002587 .maxlen = sizeof(int),
2588 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002589 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002590 },
2591 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002592 .procname = "min_adv_mss",
Daniel Lezcano49905092008-01-10 03:01:01 -08002593 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002594 .maxlen = sizeof(int),
2595 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002596 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002597 },
2598 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002599 .procname = "gc_min_interval_ms",
Daniel Lezcano49905092008-01-10 03:01:01 -08002600 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002601 .maxlen = sizeof(int),
2602 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002603 .proc_handler = proc_dointvec_ms_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002604 },
Eric W. Biedermanf8572d82009-11-05 13:32:03 -08002605 { }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002606};
2607
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00002608struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
Daniel Lezcano760f2d02008-01-10 02:53:43 -08002609{
2610 struct ctl_table *table;
2611
2612 table = kmemdup(ipv6_route_table_template,
2613 sizeof(ipv6_route_table_template),
2614 GFP_KERNEL);
YOSHIFUJI Hideaki5ee09102008-02-28 00:24:28 +09002615
2616 if (table) {
2617 table[0].data = &net->ipv6.sysctl.flush_delay;
Alexey Dobriyan86393e52009-08-29 01:34:49 +00002618 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
YOSHIFUJI Hideaki5ee09102008-02-28 00:24:28 +09002619 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2620 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2621 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2622 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2623 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2624 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2625 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
Alexey Dobriyan9c69fab2009-12-18 20:11:03 -08002626 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
YOSHIFUJI Hideaki5ee09102008-02-28 00:24:28 +09002627 }
2628
Daniel Lezcano760f2d02008-01-10 02:53:43 -08002629 return table;
2630}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002631#endif
2632
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00002633static int __net_init ip6_route_net_init(struct net *net)
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002634{
Pavel Emelyanov633d4242008-04-21 14:25:23 -07002635 int ret = -ENOMEM;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002636
Alexey Dobriyan86393e52009-08-29 01:34:49 +00002637 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2638 sizeof(net->ipv6.ip6_dst_ops));
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002639
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002640 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2641 sizeof(*net->ipv6.ip6_null_entry),
2642 GFP_KERNEL);
2643 if (!net->ipv6.ip6_null_entry)
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002644 goto out_ip6_dst_ops;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002645 net->ipv6.ip6_null_entry->u.dst.path =
2646 (struct dst_entry *)net->ipv6.ip6_null_entry;
Alexey Dobriyan86393e52009-08-29 01:34:49 +00002647 net->ipv6.ip6_null_entry->u.dst.ops = &net->ipv6.ip6_dst_ops;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002648
2649#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2650 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2651 sizeof(*net->ipv6.ip6_prohibit_entry),
2652 GFP_KERNEL);
Peter Zijlstra68fffc62008-10-07 14:12:10 -07002653 if (!net->ipv6.ip6_prohibit_entry)
2654 goto out_ip6_null_entry;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002655 net->ipv6.ip6_prohibit_entry->u.dst.path =
2656 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
Alexey Dobriyan86393e52009-08-29 01:34:49 +00002657 net->ipv6.ip6_prohibit_entry->u.dst.ops = &net->ipv6.ip6_dst_ops;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002658
2659 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2660 sizeof(*net->ipv6.ip6_blk_hole_entry),
2661 GFP_KERNEL);
Peter Zijlstra68fffc62008-10-07 14:12:10 -07002662 if (!net->ipv6.ip6_blk_hole_entry)
2663 goto out_ip6_prohibit_entry;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002664 net->ipv6.ip6_blk_hole_entry->u.dst.path =
2665 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
Alexey Dobriyan86393e52009-08-29 01:34:49 +00002666 net->ipv6.ip6_blk_hole_entry->u.dst.ops = &net->ipv6.ip6_dst_ops;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002667#endif
2668
Peter Zijlstrab339a472008-10-07 14:15:00 -07002669 net->ipv6.sysctl.flush_delay = 0;
2670 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2671 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2672 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2673 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2674 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2675 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2676 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2677
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002678#ifdef CONFIG_PROC_FS
2679 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2680 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2681#endif
Benjamin Thery6891a342008-03-04 13:49:47 -08002682 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2683
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002684 ret = 0;
2685out:
2686 return ret;
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002687
Peter Zijlstra68fffc62008-10-07 14:12:10 -07002688#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2689out_ip6_prohibit_entry:
2690 kfree(net->ipv6.ip6_prohibit_entry);
2691out_ip6_null_entry:
2692 kfree(net->ipv6.ip6_null_entry);
2693#endif
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002694out_ip6_dst_ops:
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002695 goto out;
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002696}
2697
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00002698static void __net_exit ip6_route_net_exit(struct net *net)
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002699{
2700#ifdef CONFIG_PROC_FS
2701 proc_net_remove(net, "ipv6_route");
2702 proc_net_remove(net, "rt6_stats");
2703#endif
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002704 kfree(net->ipv6.ip6_null_entry);
2705#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2706 kfree(net->ipv6.ip6_prohibit_entry);
2707 kfree(net->ipv6.ip6_blk_hole_entry);
2708#endif
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002709}
2710
2711static struct pernet_operations ip6_route_net_ops = {
2712 .init = ip6_route_net_init,
2713 .exit = ip6_route_net_exit,
2714};
2715
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002716static struct notifier_block ip6_route_dev_notifier = {
2717 .notifier_call = ip6_route_dev_notify,
2718 .priority = 0,
2719};
2720
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002721int __init ip6_route_init(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002722{
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002723 int ret;
2724
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -08002725 ret = -ENOMEM;
2726 ip6_dst_ops_template.kmem_cachep =
2727 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2728 SLAB_HWCACHE_ALIGN, NULL);
2729 if (!ip6_dst_ops_template.kmem_cachep)
Fernando Carrijoc19a28e2009-01-07 18:09:08 -08002730 goto out;
David S. Miller14e50e52007-05-24 18:17:54 -07002731
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002732 ret = register_pernet_subsys(&ip6_route_net_ops);
2733 if (ret)
Daniel Lezcanobdb32892008-03-04 13:48:10 -08002734 goto out_kmem_cache;
Daniel Lezcanobdb32892008-03-04 13:48:10 -08002735
Arnaud Ebalard5dc121e2008-10-01 02:37:56 -07002736 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2737
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002738 /* Registering of the loopback is done before this portion of code,
2739 * the loopback reference in rt6_info will not be taken, do it
2740 * manually for init_net */
2741 init_net.ipv6.ip6_null_entry->u.dst.dev = init_net.loopback_dev;
2742 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2743 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2744 init_net.ipv6.ip6_prohibit_entry->u.dst.dev = init_net.loopback_dev;
2745 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2746 init_net.ipv6.ip6_blk_hole_entry->u.dst.dev = init_net.loopback_dev;
2747 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2748 #endif
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002749 ret = fib6_init();
2750 if (ret)
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002751 goto out_register_subsys;
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002752
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002753 ret = xfrm6_init();
2754 if (ret)
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002755 goto out_fib6_init;
Daniel Lezcanoc35b7e72007-12-08 00:14:11 -08002756
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002757 ret = fib6_rules_init();
2758 if (ret)
2759 goto xfrm6_init;
Daniel Lezcano7e5449c2007-12-08 00:14:54 -08002760
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002761 ret = -ENOBUFS;
2762 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) ||
2763 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) ||
2764 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL))
2765 goto fib6_rules_init;
2766
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002767 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002768 if (ret)
2769 goto fib6_rules_init;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002770
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002771out:
2772 return ret;
2773
2774fib6_rules_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002775 fib6_rules_cleanup();
2776xfrm6_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002777 xfrm6_fini();
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002778out_fib6_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002779 fib6_gc_cleanup();
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002780out_register_subsys:
2781 unregister_pernet_subsys(&ip6_route_net_ops);
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002782out_kmem_cache:
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002783 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002784 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002785}
2786
2787void ip6_route_cleanup(void)
2788{
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002789 unregister_netdevice_notifier(&ip6_route_dev_notifier);
Thomas Graf101367c2006-08-04 03:39:02 -07002790 fib6_rules_cleanup();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002791 xfrm6_fini();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002792 fib6_gc_cleanup();
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002793 unregister_pernet_subsys(&ip6_route_net_ops);
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002794 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002795}