blob: c2bd74c5f8d979dee714b169750250250f1022e6 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09006 * Pedro Roque <roque@di.fc.ul.pt>
Linus Torvalds1da177e2005-04-16 15:20:36 -07007 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07008 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
YOSHIFUJI Hideakic0bece92006-08-23 17:23:25 -070023 * Ville Nuorvala
24 * Fixed routing subtrees.
Linus Torvalds1da177e2005-04-16 15:20:36 -070025 */
26
Randy Dunlap4fc268d2006-01-11 12:17:47 -080027#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070028#include <linux/errno.h>
29#include <linux/types.h>
30#include <linux/times.h>
31#include <linux/socket.h>
32#include <linux/sockios.h>
33#include <linux/net.h>
34#include <linux/route.h>
35#include <linux/netdevice.h>
36#include <linux/in6.h>
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +090037#include <linux/mroute6.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070038#include <linux/init.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070039#include <linux/if_arp.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070040#include <linux/proc_fs.h>
41#include <linux/seq_file.h>
Daniel Lezcano5b7c9312008-03-03 23:28:58 -080042#include <linux/nsproxy.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020043#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070044#include <net/snmp.h>
45#include <net/ipv6.h>
46#include <net/ip6_fib.h>
47#include <net/ip6_route.h>
48#include <net/ndisc.h>
49#include <net/addrconf.h>
50#include <net/tcp.h>
51#include <linux/rtnetlink.h>
52#include <net/dst.h>
53#include <net/xfrm.h>
Tom Tucker8d717402006-07-30 20:43:36 -070054#include <net/netevent.h>
Thomas Graf21713eb2006-08-15 00:35:24 -070055#include <net/netlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070056
57#include <asm/uaccess.h>
58
59#ifdef CONFIG_SYSCTL
60#include <linux/sysctl.h>
61#endif
62
63/* Set to 3 to get tracing. */
64#define RT6_DEBUG 2
65
66#if RT6_DEBUG >= 3
67#define RDBG(x) printk x
68#define RT6_TRACE(x...) printk(KERN_DEBUG x)
69#else
70#define RDBG(x)
71#define RT6_TRACE(x...) do { ; } while (0)
72#endif
73
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -080074#define CLONE_OFFLINK_ROUTE 0
Linus Torvalds1da177e2005-04-16 15:20:36 -070075
Linus Torvalds1da177e2005-04-16 15:20:36 -070076static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
77static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
78static struct dst_entry *ip6_negative_advice(struct dst_entry *);
79static void ip6_dst_destroy(struct dst_entry *);
80static void ip6_dst_ifdown(struct dst_entry *,
81 struct net_device *dev, int how);
Daniel Lezcano569d3642008-01-18 03:56:57 -080082static int ip6_dst_gc(struct dst_ops *ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -070083
84static int ip6_pkt_discard(struct sk_buff *skb);
85static int ip6_pkt_discard_out(struct sk_buff *skb);
86static void ip6_link_failure(struct sk_buff *skb);
87static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
88
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -080089#ifdef CONFIG_IPV6_ROUTE_INFO
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -080090static struct rt6_info *rt6_add_route_info(struct net *net,
91 struct in6_addr *prefix, int prefixlen,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -080092 struct in6_addr *gwaddr, int ifindex,
93 unsigned pref);
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -080094static struct rt6_info *rt6_get_route_info(struct net *net,
95 struct in6_addr *prefix, int prefixlen,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -080096 struct in6_addr *gwaddr, int ifindex);
97#endif
98
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -080099static struct dst_ops ip6_dst_ops_template = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700100 .family = AF_INET6,
Harvey Harrison09640e62009-02-01 00:45:17 -0800101 .protocol = cpu_to_be16(ETH_P_IPV6),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700102 .gc = ip6_dst_gc,
103 .gc_thresh = 1024,
104 .check = ip6_dst_check,
105 .destroy = ip6_dst_destroy,
106 .ifdown = ip6_dst_ifdown,
107 .negative_advice = ip6_negative_advice,
108 .link_failure = ip6_link_failure,
109 .update_pmtu = ip6_rt_update_pmtu,
Herbert Xu1ac06e02008-05-20 14:32:14 -0700110 .local_out = __ip6_local_out,
Eric Dumazete2422972008-01-30 20:07:45 -0800111 .entries = ATOMIC_INIT(0),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700112};
113
David S. Miller14e50e52007-05-24 18:17:54 -0700114static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
115{
116}
117
118static struct dst_ops ip6_dst_blackhole_ops = {
119 .family = AF_INET6,
Harvey Harrison09640e62009-02-01 00:45:17 -0800120 .protocol = cpu_to_be16(ETH_P_IPV6),
David S. Miller14e50e52007-05-24 18:17:54 -0700121 .destroy = ip6_dst_destroy,
122 .check = ip6_dst_check,
123 .update_pmtu = ip6_rt_blackhole_update_pmtu,
Eric Dumazete2422972008-01-30 20:07:45 -0800124 .entries = ATOMIC_INIT(0),
David S. Miller14e50e52007-05-24 18:17:54 -0700125};
126
Daniel Lezcanobdb32892008-03-04 13:48:10 -0800127static struct rt6_info ip6_null_entry_template = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700128 .u = {
129 .dst = {
130 .__refcnt = ATOMIC_INIT(1),
131 .__use = 1,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700132 .obsolete = -1,
133 .error = -ENETUNREACH,
134 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
135 .input = ip6_pkt_discard,
136 .output = ip6_pkt_discard_out,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700137 }
138 },
139 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
Jean-Mickael Guerin4f724272009-05-20 17:38:59 -0700140 .rt6i_protocol = RTPROT_KERNEL,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700141 .rt6i_metric = ~(u32) 0,
142 .rt6i_ref = ATOMIC_INIT(1),
143};
144
Thomas Graf101367c2006-08-04 03:39:02 -0700145#ifdef CONFIG_IPV6_MULTIPLE_TABLES
146
David S. Miller6723ab52006-10-18 21:20:57 -0700147static int ip6_pkt_prohibit(struct sk_buff *skb);
148static int ip6_pkt_prohibit_out(struct sk_buff *skb);
David S. Miller6723ab52006-10-18 21:20:57 -0700149
Adrian Bunk280a34c2008-04-21 02:29:32 -0700150static struct rt6_info ip6_prohibit_entry_template = {
Thomas Graf101367c2006-08-04 03:39:02 -0700151 .u = {
152 .dst = {
153 .__refcnt = ATOMIC_INIT(1),
154 .__use = 1,
Thomas Graf101367c2006-08-04 03:39:02 -0700155 .obsolete = -1,
156 .error = -EACCES,
157 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
Thomas Graf9ce8ade2006-10-18 20:46:54 -0700158 .input = ip6_pkt_prohibit,
159 .output = ip6_pkt_prohibit_out,
Thomas Graf101367c2006-08-04 03:39:02 -0700160 }
161 },
162 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
Jean-Mickael Guerin4f724272009-05-20 17:38:59 -0700163 .rt6i_protocol = RTPROT_KERNEL,
Thomas Graf101367c2006-08-04 03:39:02 -0700164 .rt6i_metric = ~(u32) 0,
165 .rt6i_ref = ATOMIC_INIT(1),
166};
167
Daniel Lezcanobdb32892008-03-04 13:48:10 -0800168static struct rt6_info ip6_blk_hole_entry_template = {
Thomas Graf101367c2006-08-04 03:39:02 -0700169 .u = {
170 .dst = {
171 .__refcnt = ATOMIC_INIT(1),
172 .__use = 1,
Thomas Graf101367c2006-08-04 03:39:02 -0700173 .obsolete = -1,
174 .error = -EINVAL,
175 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
Herbert Xu352e5122007-11-13 21:34:06 -0800176 .input = dst_discard,
177 .output = dst_discard,
Thomas Graf101367c2006-08-04 03:39:02 -0700178 }
179 },
180 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
Jean-Mickael Guerin4f724272009-05-20 17:38:59 -0700181 .rt6i_protocol = RTPROT_KERNEL,
Thomas Graf101367c2006-08-04 03:39:02 -0700182 .rt6i_metric = ~(u32) 0,
183 .rt6i_ref = ATOMIC_INIT(1),
184};
185
186#endif
187
Linus Torvalds1da177e2005-04-16 15:20:36 -0700188/* allocate dst with ip6_dst_ops */
Benjamin Theryf2fc6a52008-03-04 13:49:23 -0800189static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700190{
Benjamin Theryf2fc6a52008-03-04 13:49:23 -0800191 return (struct rt6_info *)dst_alloc(ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700192}
193
194static void ip6_dst_destroy(struct dst_entry *dst)
195{
196 struct rt6_info *rt = (struct rt6_info *)dst;
197 struct inet6_dev *idev = rt->rt6i_idev;
198
199 if (idev != NULL) {
200 rt->rt6i_idev = NULL;
201 in6_dev_put(idev);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900202 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700203}
204
205static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
206 int how)
207{
208 struct rt6_info *rt = (struct rt6_info *)dst;
209 struct inet6_dev *idev = rt->rt6i_idev;
Denis V. Lunev5a3e55d2007-12-07 00:38:10 -0800210 struct net_device *loopback_dev =
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900211 dev_net(dev)->loopback_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700212
Denis V. Lunev5a3e55d2007-12-07 00:38:10 -0800213 if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
214 struct inet6_dev *loopback_idev =
215 in6_dev_get(loopback_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700216 if (loopback_idev != NULL) {
217 rt->rt6i_idev = loopback_idev;
218 in6_dev_put(idev);
219 }
220 }
221}
222
223static __inline__ int rt6_check_expired(const struct rt6_info *rt)
224{
225 return (rt->rt6i_flags & RTF_EXPIRES &&
226 time_after(jiffies, rt->rt6i_expires));
227}
228
Thomas Grafc71099a2006-08-04 23:20:06 -0700229static inline int rt6_need_strict(struct in6_addr *daddr)
230{
231 return (ipv6_addr_type(daddr) &
YOSHIFUJI Hideaki5ce83af2008-06-25 16:58:17 +0900232 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK));
Thomas Grafc71099a2006-08-04 23:20:06 -0700233}
234
Linus Torvalds1da177e2005-04-16 15:20:36 -0700235/*
Thomas Grafc71099a2006-08-04 23:20:06 -0700236 * Route lookup. Any table->tb6_lock is implied.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700237 */
238
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800239static inline struct rt6_info *rt6_device_match(struct net *net,
240 struct rt6_info *rt,
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900241 struct in6_addr *saddr,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700242 int oif,
YOSHIFUJI Hideakid4208952008-06-27 20:14:54 -0700243 int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700244{
245 struct rt6_info *local = NULL;
246 struct rt6_info *sprt;
247
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900248 if (!oif && ipv6_addr_any(saddr))
249 goto out;
250
251 for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) {
252 struct net_device *dev = sprt->rt6i_dev;
253
254 if (oif) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700255 if (dev->ifindex == oif)
256 return sprt;
257 if (dev->flags & IFF_LOOPBACK) {
258 if (sprt->rt6i_idev == NULL ||
259 sprt->rt6i_idev->dev->ifindex != oif) {
YOSHIFUJI Hideakid4208952008-06-27 20:14:54 -0700260 if (flags & RT6_LOOKUP_F_IFACE && oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700261 continue;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900262 if (local && (!oif ||
Linus Torvalds1da177e2005-04-16 15:20:36 -0700263 local->rt6i_idev->dev->ifindex == oif))
264 continue;
265 }
266 local = sprt;
267 }
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900268 } else {
269 if (ipv6_chk_addr(net, saddr, dev,
270 flags & RT6_LOOKUP_F_IFACE))
271 return sprt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700272 }
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900273 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700274
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900275 if (oif) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700276 if (local)
277 return local;
278
YOSHIFUJI Hideakid4208952008-06-27 20:14:54 -0700279 if (flags & RT6_LOOKUP_F_IFACE)
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800280 return net->ipv6.ip6_null_entry;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700281 }
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900282out:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700283 return rt;
284}
285
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800286#ifdef CONFIG_IPV6_ROUTER_PREF
287static void rt6_probe(struct rt6_info *rt)
288{
289 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
290 /*
291 * Okay, this does not seem to be appropriate
292 * for now, however, we need to check if it
293 * is really so; aka Router Reachability Probing.
294 *
295 * Router Reachability Probe MUST be rate-limited
296 * to no more than one per minute.
297 */
298 if (!neigh || (neigh->nud_state & NUD_VALID))
299 return;
300 read_lock_bh(&neigh->lock);
301 if (!(neigh->nud_state & NUD_VALID) &&
YOSHIFUJI Hideaki52e16352006-03-20 17:05:47 -0800302 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800303 struct in6_addr mcaddr;
304 struct in6_addr *target;
305
306 neigh->updated = jiffies;
307 read_unlock_bh(&neigh->lock);
308
309 target = (struct in6_addr *)&neigh->primary_key;
310 addrconf_addr_solict_mult(target, &mcaddr);
311 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
312 } else
313 read_unlock_bh(&neigh->lock);
314}
315#else
316static inline void rt6_probe(struct rt6_info *rt)
317{
318 return;
319}
320#endif
321
Linus Torvalds1da177e2005-04-16 15:20:36 -0700322/*
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800323 * Default Router Selection (RFC 2461 6.3.6)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700324 */
Dave Jonesb6f99a22007-03-22 12:27:49 -0700325static inline int rt6_check_dev(struct rt6_info *rt, int oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700326{
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800327 struct net_device *dev = rt->rt6i_dev;
David S. Miller161980f2007-04-06 11:42:27 -0700328 if (!oif || dev->ifindex == oif)
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800329 return 2;
David S. Miller161980f2007-04-06 11:42:27 -0700330 if ((dev->flags & IFF_LOOPBACK) &&
331 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
332 return 1;
333 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700334}
335
Dave Jonesb6f99a22007-03-22 12:27:49 -0700336static inline int rt6_check_neigh(struct rt6_info *rt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700337{
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800338 struct neighbour *neigh = rt->rt6i_nexthop;
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800339 int m;
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700340 if (rt->rt6i_flags & RTF_NONEXTHOP ||
341 !(rt->rt6i_flags & RTF_GATEWAY))
342 m = 1;
343 else if (neigh) {
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800344 read_lock_bh(&neigh->lock);
345 if (neigh->nud_state & NUD_VALID)
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700346 m = 2;
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800347#ifdef CONFIG_IPV6_ROUTER_PREF
348 else if (neigh->nud_state & NUD_FAILED)
349 m = 0;
350#endif
351 else
YOSHIFUJI Hideakiea73ee22006-11-06 09:45:44 -0800352 m = 1;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800353 read_unlock_bh(&neigh->lock);
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800354 } else
355 m = 0;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800356 return m;
357}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700358
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800359static int rt6_score_route(struct rt6_info *rt, int oif,
360 int strict)
361{
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700362 int m, n;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900363
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700364 m = rt6_check_dev(rt, oif);
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700365 if (!m && (strict & RT6_LOOKUP_F_IFACE))
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800366 return -1;
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -0800367#ifdef CONFIG_IPV6_ROUTER_PREF
368 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
369#endif
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700370 n = rt6_check_neigh(rt);
YOSHIFUJI Hideaki557e92e2006-11-06 09:45:45 -0800371 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800372 return -1;
373 return m;
374}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700375
David S. Millerf11e6652007-03-24 20:36:25 -0700376static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
377 int *mpri, struct rt6_info *match)
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800378{
David S. Millerf11e6652007-03-24 20:36:25 -0700379 int m;
380
381 if (rt6_check_expired(rt))
382 goto out;
383
384 m = rt6_score_route(rt, oif, strict);
385 if (m < 0)
386 goto out;
387
388 if (m > *mpri) {
389 if (strict & RT6_LOOKUP_F_REACHABLE)
390 rt6_probe(match);
391 *mpri = m;
392 match = rt;
393 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
394 rt6_probe(rt);
395 }
396
397out:
398 return match;
399}
400
401static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
402 struct rt6_info *rr_head,
403 u32 metric, int oif, int strict)
404{
405 struct rt6_info *rt, *match;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800406 int mpri = -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700407
David S. Millerf11e6652007-03-24 20:36:25 -0700408 match = NULL;
409 for (rt = rr_head; rt && rt->rt6i_metric == metric;
410 rt = rt->u.dst.rt6_next)
411 match = find_match(rt, oif, strict, &mpri, match);
412 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
413 rt = rt->u.dst.rt6_next)
414 match = find_match(rt, oif, strict, &mpri, match);
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800415
David S. Millerf11e6652007-03-24 20:36:25 -0700416 return match;
417}
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800418
David S. Millerf11e6652007-03-24 20:36:25 -0700419static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
420{
421 struct rt6_info *match, *rt0;
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800422 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700423
David S. Millerf11e6652007-03-24 20:36:25 -0700424 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
Harvey Harrison0dc47872008-03-05 20:47:47 -0800425 __func__, fn->leaf, oif);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700426
David S. Millerf11e6652007-03-24 20:36:25 -0700427 rt0 = fn->rr_ptr;
428 if (!rt0)
429 fn->rr_ptr = rt0 = fn->leaf;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700430
David S. Millerf11e6652007-03-24 20:36:25 -0700431 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700432
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800433 if (!match &&
David S. Millerf11e6652007-03-24 20:36:25 -0700434 (strict & RT6_LOOKUP_F_REACHABLE)) {
435 struct rt6_info *next = rt0->u.dst.rt6_next;
436
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800437 /* no entries matched; do round-robin */
David S. Millerf11e6652007-03-24 20:36:25 -0700438 if (!next || next->rt6i_metric != rt0->rt6i_metric)
439 next = fn->leaf;
440
441 if (next != rt0)
442 fn->rr_ptr = next;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700443 }
444
David S. Millerf11e6652007-03-24 20:36:25 -0700445 RT6_TRACE("%s() => %p\n",
Harvey Harrison0dc47872008-03-05 20:47:47 -0800446 __func__, match);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700447
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900448 net = dev_net(rt0->rt6i_dev);
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800449 return (match ? match : net->ipv6.ip6_null_entry);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700450}
451
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800452#ifdef CONFIG_IPV6_ROUTE_INFO
453int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
454 struct in6_addr *gwaddr)
455{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900456 struct net *net = dev_net(dev);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800457 struct route_info *rinfo = (struct route_info *) opt;
458 struct in6_addr prefix_buf, *prefix;
459 unsigned int pref;
YOSHIFUJI Hideaki4bed72e2008-05-27 17:37:49 +0900460 unsigned long lifetime;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800461 struct rt6_info *rt;
462
463 if (len < sizeof(struct route_info)) {
464 return -EINVAL;
465 }
466
467 /* Sanity check for prefix_len and length */
468 if (rinfo->length > 3) {
469 return -EINVAL;
470 } else if (rinfo->prefix_len > 128) {
471 return -EINVAL;
472 } else if (rinfo->prefix_len > 64) {
473 if (rinfo->length < 2) {
474 return -EINVAL;
475 }
476 } else if (rinfo->prefix_len > 0) {
477 if (rinfo->length < 1) {
478 return -EINVAL;
479 }
480 }
481
482 pref = rinfo->route_pref;
483 if (pref == ICMPV6_ROUTER_PREF_INVALID)
Jens Rosenboom3933fc92009-09-10 06:25:11 +0000484 return -EINVAL;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800485
YOSHIFUJI Hideaki4bed72e2008-05-27 17:37:49 +0900486 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800487
488 if (rinfo->length == 3)
489 prefix = (struct in6_addr *)rinfo->prefix;
490 else {
491 /* this function is safe */
492 ipv6_addr_prefix(&prefix_buf,
493 (struct in6_addr *)rinfo->prefix,
494 rinfo->prefix_len);
495 prefix = &prefix_buf;
496 }
497
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -0800498 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
499 dev->ifindex);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800500
501 if (rt && !lifetime) {
Thomas Grafe0a1ad732006-08-22 00:00:21 -0700502 ip6_del_rt(rt);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800503 rt = NULL;
504 }
505
506 if (!rt && lifetime)
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -0800507 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800508 pref);
509 else if (rt)
510 rt->rt6i_flags = RTF_ROUTEINFO |
511 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
512
513 if (rt) {
YOSHIFUJI Hideaki4bed72e2008-05-27 17:37:49 +0900514 if (!addrconf_finite_timeout(lifetime)) {
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800515 rt->rt6i_flags &= ~RTF_EXPIRES;
516 } else {
517 rt->rt6i_expires = jiffies + HZ * lifetime;
518 rt->rt6i_flags |= RTF_EXPIRES;
519 }
520 dst_release(&rt->u.dst);
521 }
522 return 0;
523}
524#endif
525
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800526#define BACKTRACK(__net, saddr) \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700527do { \
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800528 if (rt == __net->ipv6.ip6_null_entry) { \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700529 struct fib6_node *pn; \
Ville Nuorvalae0eda7b2006-10-16 22:11:11 -0700530 while (1) { \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700531 if (fn->fn_flags & RTN_TL_ROOT) \
532 goto out; \
533 pn = fn->parent; \
534 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
Kim Nordlund8bce65b2006-12-13 16:38:29 -0800535 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700536 else \
537 fn = pn; \
538 if (fn->fn_flags & RTN_RTINFO) \
539 goto restart; \
Thomas Grafc71099a2006-08-04 23:20:06 -0700540 } \
Thomas Grafc71099a2006-08-04 23:20:06 -0700541 } \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700542} while(0)
Thomas Grafc71099a2006-08-04 23:20:06 -0700543
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800544static struct rt6_info *ip6_pol_route_lookup(struct net *net,
545 struct fib6_table *table,
Thomas Grafc71099a2006-08-04 23:20:06 -0700546 struct flowi *fl, int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700547{
548 struct fib6_node *fn;
549 struct rt6_info *rt;
550
Thomas Grafc71099a2006-08-04 23:20:06 -0700551 read_lock_bh(&table->tb6_lock);
552 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
553restart:
554 rt = fn->leaf;
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900555 rt = rt6_device_match(net, rt, &fl->fl6_src, fl->oif, flags);
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800556 BACKTRACK(net, &fl->fl6_src);
Thomas Grafc71099a2006-08-04 23:20:06 -0700557out:
Pavel Emelyanov03f49f32007-11-10 21:28:34 -0800558 dst_use(&rt->u.dst, jiffies);
Thomas Grafc71099a2006-08-04 23:20:06 -0700559 read_unlock_bh(&table->tb6_lock);
Thomas Grafc71099a2006-08-04 23:20:06 -0700560 return rt;
561
562}
563
YOSHIFUJI Hideaki9acd9f32008-04-10 15:42:10 +0900564struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
565 const struct in6_addr *saddr, int oif, int strict)
Thomas Grafc71099a2006-08-04 23:20:06 -0700566{
567 struct flowi fl = {
568 .oif = oif,
569 .nl_u = {
570 .ip6_u = {
571 .daddr = *daddr,
Thomas Grafc71099a2006-08-04 23:20:06 -0700572 },
573 },
574 };
575 struct dst_entry *dst;
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700576 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
Thomas Grafc71099a2006-08-04 23:20:06 -0700577
Thomas Grafadaa70b2006-10-13 15:01:03 -0700578 if (saddr) {
579 memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
580 flags |= RT6_LOOKUP_F_HAS_SADDR;
581 }
582
Daniel Lezcano606a2b42008-03-04 13:45:59 -0800583 dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_lookup);
Thomas Grafc71099a2006-08-04 23:20:06 -0700584 if (dst->error == 0)
585 return (struct rt6_info *) dst;
586
587 dst_release(dst);
588
Linus Torvalds1da177e2005-04-16 15:20:36 -0700589 return NULL;
590}
591
YOSHIFUJI Hideaki71590392007-02-22 22:05:40 +0900592EXPORT_SYMBOL(rt6_lookup);
593
Thomas Grafc71099a2006-08-04 23:20:06 -0700594/* ip6_ins_rt is called with FREE table->tb6_lock.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700595 It takes new route entry, the addition fails by any reason the
596 route is freed. In any case, if caller does not hold it, it may
597 be destroyed.
598 */
599
Thomas Graf86872cb2006-08-22 00:01:08 -0700600static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700601{
602 int err;
Thomas Grafc71099a2006-08-04 23:20:06 -0700603 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700604
Thomas Grafc71099a2006-08-04 23:20:06 -0700605 table = rt->rt6i_table;
606 write_lock_bh(&table->tb6_lock);
Thomas Graf86872cb2006-08-22 00:01:08 -0700607 err = fib6_add(&table->tb6_root, rt, info);
Thomas Grafc71099a2006-08-04 23:20:06 -0700608 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700609
610 return err;
611}
612
Thomas Graf40e22e82006-08-22 00:00:45 -0700613int ip6_ins_rt(struct rt6_info *rt)
614{
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800615 struct nl_info info = {
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900616 .nl_net = dev_net(rt->rt6i_dev),
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800617 };
Denis V. Lunev528c4ce2007-12-13 09:45:12 -0800618 return __ip6_ins_rt(rt, &info);
Thomas Graf40e22e82006-08-22 00:00:45 -0700619}
620
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800621static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
622 struct in6_addr *saddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700623{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700624 struct rt6_info *rt;
625
626 /*
627 * Clone the route.
628 */
629
630 rt = ip6_rt_copy(ort);
631
632 if (rt) {
David S. Miller14deae42009-01-04 16:04:39 -0800633 struct neighbour *neigh;
634 int attempts = !in_softirq();
635
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900636 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
637 if (rt->rt6i_dst.plen != 128 &&
638 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
639 rt->rt6i_flags |= RTF_ANYCAST;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700640 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900641 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700642
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900643 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700644 rt->rt6i_dst.plen = 128;
645 rt->rt6i_flags |= RTF_CACHE;
646 rt->u.dst.flags |= DST_HOST;
647
648#ifdef CONFIG_IPV6_SUBTREES
649 if (rt->rt6i_src.plen && saddr) {
650 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
651 rt->rt6i_src.plen = 128;
652 }
653#endif
654
David S. Miller14deae42009-01-04 16:04:39 -0800655 retry:
656 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
657 if (IS_ERR(neigh)) {
658 struct net *net = dev_net(rt->rt6i_dev);
659 int saved_rt_min_interval =
660 net->ipv6.sysctl.ip6_rt_gc_min_interval;
661 int saved_rt_elasticity =
662 net->ipv6.sysctl.ip6_rt_gc_elasticity;
663
664 if (attempts-- > 0) {
665 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
666 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
667
Alexey Dobriyan86393e52009-08-29 01:34:49 +0000668 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
David S. Miller14deae42009-01-04 16:04:39 -0800669
670 net->ipv6.sysctl.ip6_rt_gc_elasticity =
671 saved_rt_elasticity;
672 net->ipv6.sysctl.ip6_rt_gc_min_interval =
673 saved_rt_min_interval;
674 goto retry;
675 }
676
677 if (net_ratelimit())
678 printk(KERN_WARNING
679 "Neighbour table overflow.\n");
680 dst_free(&rt->u.dst);
681 return NULL;
682 }
683 rt->rt6i_nexthop = neigh;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700684
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800685 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700686
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800687 return rt;
688}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700689
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800690static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
691{
692 struct rt6_info *rt = ip6_rt_copy(ort);
693 if (rt) {
694 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
695 rt->rt6i_dst.plen = 128;
696 rt->rt6i_flags |= RTF_CACHE;
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800697 rt->u.dst.flags |= DST_HOST;
698 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
699 }
700 return rt;
701}
702
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800703static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
704 struct flowi *fl, int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700705{
706 struct fib6_node *fn;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800707 struct rt6_info *rt, *nrt;
Thomas Grafc71099a2006-08-04 23:20:06 -0700708 int strict = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700709 int attempts = 3;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800710 int err;
YOSHIFUJI Hideaki53b79972008-07-19 22:35:03 -0700711 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700712
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700713 strict |= flags & RT6_LOOKUP_F_IFACE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700714
715relookup:
Thomas Grafc71099a2006-08-04 23:20:06 -0700716 read_lock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700717
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800718restart_2:
Thomas Grafc71099a2006-08-04 23:20:06 -0700719 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700720
721restart:
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700722 rt = rt6_select(fn, oif, strict | reachable);
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800723
724 BACKTRACK(net, &fl->fl6_src);
725 if (rt == net->ipv6.ip6_null_entry ||
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800726 rt->rt6i_flags & RTF_CACHE)
YOSHIFUJI Hideaki1ddef0442006-03-20 17:01:24 -0800727 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700728
YOSHIFUJI Hideakifb9de912006-03-20 16:59:08 -0800729 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700730 read_unlock_bh(&table->tb6_lock);
YOSHIFUJI Hideakifb9de912006-03-20 16:59:08 -0800731
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800732 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
YOSHIFUJI Hideakie40cf352006-03-20 16:59:27 -0800733 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800734 else {
735#if CLONE_OFFLINK_ROUTE
736 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
737#else
738 goto out2;
739#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700740 }
YOSHIFUJI Hideakie40cf352006-03-20 16:59:27 -0800741
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800742 dst_release(&rt->u.dst);
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800743 rt = nrt ? : net->ipv6.ip6_null_entry;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800744
745 dst_hold(&rt->u.dst);
746 if (nrt) {
Thomas Graf40e22e82006-08-22 00:00:45 -0700747 err = ip6_ins_rt(nrt);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800748 if (!err)
749 goto out2;
750 }
751
752 if (--attempts <= 0)
753 goto out2;
754
755 /*
Thomas Grafc71099a2006-08-04 23:20:06 -0700756 * Race condition! In the gap, when table->tb6_lock was
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800757 * released someone could insert this route. Relookup.
758 */
759 dst_release(&rt->u.dst);
760 goto relookup;
761
762out:
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800763 if (reachable) {
764 reachable = 0;
765 goto restart_2;
766 }
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800767 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700768 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700769out2:
770 rt->u.dst.lastuse = jiffies;
771 rt->u.dst.__use++;
Thomas Grafc71099a2006-08-04 23:20:06 -0700772
773 return rt;
774}
775
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800776static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700777 struct flowi *fl, int flags)
778{
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800779 return ip6_pol_route(net, table, fl->iif, fl, flags);
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700780}
781
Thomas Grafc71099a2006-08-04 23:20:06 -0700782void ip6_route_input(struct sk_buff *skb)
783{
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -0700784 struct ipv6hdr *iph = ipv6_hdr(skb);
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900785 struct net *net = dev_net(skb->dev);
Thomas Grafadaa70b2006-10-13 15:01:03 -0700786 int flags = RT6_LOOKUP_F_HAS_SADDR;
Thomas Grafc71099a2006-08-04 23:20:06 -0700787 struct flowi fl = {
788 .iif = skb->dev->ifindex,
789 .nl_u = {
790 .ip6_u = {
791 .daddr = iph->daddr,
792 .saddr = iph->saddr,
Al Viro90bcaf72006-11-08 00:25:17 -0800793 .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
Thomas Grafc71099a2006-08-04 23:20:06 -0700794 },
795 },
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900796 .mark = skb->mark,
Thomas Grafc71099a2006-08-04 23:20:06 -0700797 .proto = iph->nexthdr,
798 };
Thomas Grafadaa70b2006-10-13 15:01:03 -0700799
Thomas Goff1d6e55f2009-01-27 22:39:59 -0800800 if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
Thomas Grafadaa70b2006-10-13 15:01:03 -0700801 flags |= RT6_LOOKUP_F_IFACE;
Thomas Grafc71099a2006-08-04 23:20:06 -0700802
Eric Dumazetadf30902009-06-02 05:19:30 +0000803 skb_dst_set(skb, fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input));
Thomas Grafc71099a2006-08-04 23:20:06 -0700804}
805
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800806static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
Thomas Grafc71099a2006-08-04 23:20:06 -0700807 struct flowi *fl, int flags)
808{
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800809 return ip6_pol_route(net, table, fl->oif, fl, flags);
Thomas Grafc71099a2006-08-04 23:20:06 -0700810}
811
Daniel Lezcano4591db42008-03-05 10:48:10 -0800812struct dst_entry * ip6_route_output(struct net *net, struct sock *sk,
813 struct flowi *fl)
Thomas Grafc71099a2006-08-04 23:20:06 -0700814{
815 int flags = 0;
816
817 if (rt6_need_strict(&fl->fl6_dst))
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700818 flags |= RT6_LOOKUP_F_IFACE;
Thomas Grafc71099a2006-08-04 23:20:06 -0700819
Thomas Grafadaa70b2006-10-13 15:01:03 -0700820 if (!ipv6_addr_any(&fl->fl6_src))
821 flags |= RT6_LOOKUP_F_HAS_SADDR;
YOSHIFUJI Hideaki7cbca672008-03-25 09:37:42 +0900822 else if (sk) {
823 unsigned int prefs = inet6_sk(sk)->srcprefs;
824 if (prefs & IPV6_PREFER_SRC_TMP)
825 flags |= RT6_LOOKUP_F_SRCPREF_TMP;
826 if (prefs & IPV6_PREFER_SRC_PUBLIC)
827 flags |= RT6_LOOKUP_F_SRCPREF_PUBLIC;
828 if (prefs & IPV6_PREFER_SRC_COA)
829 flags |= RT6_LOOKUP_F_SRCPREF_COA;
830 }
Thomas Grafadaa70b2006-10-13 15:01:03 -0700831
Daniel Lezcano4591db42008-03-05 10:48:10 -0800832 return fib6_rule_lookup(net, fl, flags, ip6_pol_route_output);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700833}
834
YOSHIFUJI Hideaki71590392007-02-22 22:05:40 +0900835EXPORT_SYMBOL(ip6_route_output);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700836
David S. Miller14e50e52007-05-24 18:17:54 -0700837int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
838{
839 struct rt6_info *ort = (struct rt6_info *) *dstp;
840 struct rt6_info *rt = (struct rt6_info *)
841 dst_alloc(&ip6_dst_blackhole_ops);
842 struct dst_entry *new = NULL;
843
844 if (rt) {
845 new = &rt->u.dst;
846
847 atomic_set(&new->__refcnt, 1);
848 new->__use = 1;
Herbert Xu352e5122007-11-13 21:34:06 -0800849 new->input = dst_discard;
850 new->output = dst_discard;
David S. Miller14e50e52007-05-24 18:17:54 -0700851
852 memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
853 new->dev = ort->u.dst.dev;
854 if (new->dev)
855 dev_hold(new->dev);
856 rt->rt6i_idev = ort->rt6i_idev;
857 if (rt->rt6i_idev)
858 in6_dev_hold(rt->rt6i_idev);
859 rt->rt6i_expires = 0;
860
861 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
862 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
863 rt->rt6i_metric = 0;
864
865 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
866#ifdef CONFIG_IPV6_SUBTREES
867 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
868#endif
869
870 dst_free(new);
871 }
872
873 dst_release(*dstp);
874 *dstp = new;
875 return (new ? 0 : -ENOMEM);
876}
877EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
878
Linus Torvalds1da177e2005-04-16 15:20:36 -0700879/*
880 * Destination cache support functions
881 */
882
883static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
884{
885 struct rt6_info *rt;
886
887 rt = (struct rt6_info *) dst;
888
889 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
890 return dst;
891
892 return NULL;
893}
894
895static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
896{
897 struct rt6_info *rt = (struct rt6_info *) dst;
898
899 if (rt) {
900 if (rt->rt6i_flags & RTF_CACHE)
Thomas Grafe0a1ad732006-08-22 00:00:21 -0700901 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700902 else
903 dst_release(dst);
904 }
905 return NULL;
906}
907
908static void ip6_link_failure(struct sk_buff *skb)
909{
910 struct rt6_info *rt;
911
912 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
913
Eric Dumazetadf30902009-06-02 05:19:30 +0000914 rt = (struct rt6_info *) skb_dst(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700915 if (rt) {
916 if (rt->rt6i_flags&RTF_CACHE) {
917 dst_set_expires(&rt->u.dst, 0);
918 rt->rt6i_flags |= RTF_EXPIRES;
919 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
920 rt->rt6i_node->fn_sernum = -1;
921 }
922}
923
924static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
925{
926 struct rt6_info *rt6 = (struct rt6_info*)dst;
927
928 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
929 rt6->rt6i_flags |= RTF_MODIFIED;
930 if (mtu < IPV6_MIN_MTU) {
931 mtu = IPV6_MIN_MTU;
932 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
933 }
934 dst->metrics[RTAX_MTU-1] = mtu;
Tom Tucker8d717402006-07-30 20:43:36 -0700935 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700936 }
937}
938
Linus Torvalds1da177e2005-04-16 15:20:36 -0700939static int ipv6_get_mtu(struct net_device *dev);
940
Daniel Lezcano55786892008-03-04 13:47:47 -0800941static inline unsigned int ipv6_advmss(struct net *net, unsigned int mtu)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700942{
943 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
944
Daniel Lezcano55786892008-03-04 13:47:47 -0800945 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
946 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700947
948 /*
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900949 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
950 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
951 * IPV6_MAXPLEN is also valid and means: "any MSS,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700952 * rely only on pmtu discovery"
953 */
954 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
955 mtu = IPV6_MAXPLEN;
956 return mtu;
957}
958
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -0800959static struct dst_entry *icmp6_dst_gc_list;
960static DEFINE_SPINLOCK(icmp6_dst_lock);
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700961
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -0800962struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700963 struct neighbour *neigh,
YOSHIFUJI Hideaki9acd9f32008-04-10 15:42:10 +0900964 const struct in6_addr *addr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700965{
966 struct rt6_info *rt;
967 struct inet6_dev *idev = in6_dev_get(dev);
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900968 struct net *net = dev_net(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700969
970 if (unlikely(idev == NULL))
971 return NULL;
972
Alexey Dobriyan86393e52009-08-29 01:34:49 +0000973 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700974 if (unlikely(rt == NULL)) {
975 in6_dev_put(idev);
976 goto out;
977 }
978
979 dev_hold(dev);
980 if (neigh)
981 neigh_hold(neigh);
David S. Miller14deae42009-01-04 16:04:39 -0800982 else {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700983 neigh = ndisc_get_neigh(dev, addr);
David S. Miller14deae42009-01-04 16:04:39 -0800984 if (IS_ERR(neigh))
985 neigh = NULL;
986 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700987
988 rt->rt6i_dev = dev;
989 rt->rt6i_idev = idev;
990 rt->rt6i_nexthop = neigh;
991 atomic_set(&rt->u.dst.__refcnt, 1);
992 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
993 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
Daniel Lezcano55786892008-03-04 13:47:47 -0800994 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -0800995 rt->u.dst.output = ip6_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700996
997#if 0 /* there's no chance to use these for ndisc */
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900998 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
999 ? DST_HOST
Linus Torvalds1da177e2005-04-16 15:20:36 -07001000 : 0;
1001 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1002 rt->rt6i_dst.plen = 128;
1003#endif
1004
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001005 spin_lock_bh(&icmp6_dst_lock);
1006 rt->u.dst.next = icmp6_dst_gc_list;
1007 icmp6_dst_gc_list = &rt->u.dst;
1008 spin_unlock_bh(&icmp6_dst_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001009
Daniel Lezcano55786892008-03-04 13:47:47 -08001010 fib6_force_start_gc(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001011
1012out:
YOSHIFUJI Hideaki40aa7b92006-10-19 13:50:09 +09001013 return &rt->u.dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001014}
1015
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -07001016int icmp6_dst_gc(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001017{
1018 struct dst_entry *dst, *next, **pprev;
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -07001019 int more = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001020
1021 next = NULL;
Thomas Graf5d0bbee2006-08-04 03:37:36 -07001022
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001023 spin_lock_bh(&icmp6_dst_lock);
1024 pprev = &icmp6_dst_gc_list;
Thomas Graf5d0bbee2006-08-04 03:37:36 -07001025
Linus Torvalds1da177e2005-04-16 15:20:36 -07001026 while ((dst = *pprev) != NULL) {
1027 if (!atomic_read(&dst->__refcnt)) {
1028 *pprev = dst->next;
1029 dst_free(dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001030 } else {
1031 pprev = &dst->next;
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -07001032 ++more;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001033 }
1034 }
1035
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001036 spin_unlock_bh(&icmp6_dst_lock);
Thomas Graf5d0bbee2006-08-04 03:37:36 -07001037
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -07001038 return more;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001039}
1040
David S. Miller1e493d12008-09-10 17:27:15 -07001041static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1042 void *arg)
1043{
1044 struct dst_entry *dst, **pprev;
1045
1046 spin_lock_bh(&icmp6_dst_lock);
1047 pprev = &icmp6_dst_gc_list;
1048 while ((dst = *pprev) != NULL) {
1049 struct rt6_info *rt = (struct rt6_info *) dst;
1050 if (func(rt, arg)) {
1051 *pprev = dst->next;
1052 dst_free(dst);
1053 } else {
1054 pprev = &dst->next;
1055 }
1056 }
1057 spin_unlock_bh(&icmp6_dst_lock);
1058}
1059
Daniel Lezcano569d3642008-01-18 03:56:57 -08001060static int ip6_dst_gc(struct dst_ops *ops)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001061{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001062 unsigned long now = jiffies;
Alexey Dobriyan86393e52009-08-29 01:34:49 +00001063 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
Daniel Lezcano7019b782008-03-04 13:50:14 -08001064 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1065 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1066 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1067 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1068 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001069
Daniel Lezcano7019b782008-03-04 13:50:14 -08001070 if (time_after(rt_last_gc + rt_min_interval, now) &&
1071 atomic_read(&ops->entries) <= rt_max_size)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001072 goto out;
1073
Benjamin Thery6891a342008-03-04 13:49:47 -08001074 net->ipv6.ip6_rt_gc_expire++;
1075 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1076 net->ipv6.ip6_rt_last_gc = now;
Daniel Lezcano7019b782008-03-04 13:50:14 -08001077 if (atomic_read(&ops->entries) < ops->gc_thresh)
1078 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001079out:
Daniel Lezcano7019b782008-03-04 13:50:14 -08001080 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1081 return (atomic_read(&ops->entries) > rt_max_size);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001082}
1083
1084/* Clean host part of a prefix. Not necessary in radix tree,
1085 but results in cleaner routing tables.
1086
1087 Remove it only when all the things will work!
1088 */
1089
1090static int ipv6_get_mtu(struct net_device *dev)
1091{
1092 int mtu = IPV6_MIN_MTU;
1093 struct inet6_dev *idev;
1094
1095 idev = in6_dev_get(dev);
1096 if (idev) {
1097 mtu = idev->cnf.mtu6;
1098 in6_dev_put(idev);
1099 }
1100 return mtu;
1101}
1102
YOSHIFUJI Hideaki6b75d092008-03-10 06:00:30 -04001103int ip6_dst_hoplimit(struct dst_entry *dst)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001104{
YOSHIFUJI Hideaki6b75d092008-03-10 06:00:30 -04001105 int hoplimit = dst_metric(dst, RTAX_HOPLIMIT);
1106 if (hoplimit < 0) {
1107 struct net_device *dev = dst->dev;
1108 struct inet6_dev *idev = in6_dev_get(dev);
1109 if (idev) {
1110 hoplimit = idev->cnf.hop_limit;
1111 in6_dev_put(idev);
1112 } else
YOSHIFUJI Hideaki53b79972008-07-19 22:35:03 -07001113 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001114 }
1115 return hoplimit;
1116}
1117
1118/*
1119 *
1120 */
1121
Thomas Graf86872cb2006-08-22 00:01:08 -07001122int ip6_route_add(struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001123{
1124 int err;
Daniel Lezcano55786892008-03-04 13:47:47 -08001125 struct net *net = cfg->fc_nlinfo.nl_net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001126 struct rt6_info *rt = NULL;
1127 struct net_device *dev = NULL;
1128 struct inet6_dev *idev = NULL;
Thomas Grafc71099a2006-08-04 23:20:06 -07001129 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001130 int addr_type;
1131
Thomas Graf86872cb2006-08-22 00:01:08 -07001132 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001133 return -EINVAL;
1134#ifndef CONFIG_IPV6_SUBTREES
Thomas Graf86872cb2006-08-22 00:01:08 -07001135 if (cfg->fc_src_len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001136 return -EINVAL;
1137#endif
Thomas Graf86872cb2006-08-22 00:01:08 -07001138 if (cfg->fc_ifindex) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001139 err = -ENODEV;
Daniel Lezcano55786892008-03-04 13:47:47 -08001140 dev = dev_get_by_index(net, cfg->fc_ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001141 if (!dev)
1142 goto out;
1143 idev = in6_dev_get(dev);
1144 if (!idev)
1145 goto out;
1146 }
1147
Thomas Graf86872cb2006-08-22 00:01:08 -07001148 if (cfg->fc_metric == 0)
1149 cfg->fc_metric = IP6_RT_PRIO_USER;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001150
Daniel Lezcano55786892008-03-04 13:47:47 -08001151 table = fib6_new_table(net, cfg->fc_table);
Thomas Grafc71099a2006-08-04 23:20:06 -07001152 if (table == NULL) {
1153 err = -ENOBUFS;
1154 goto out;
1155 }
1156
Alexey Dobriyan86393e52009-08-29 01:34:49 +00001157 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001158
1159 if (rt == NULL) {
1160 err = -ENOMEM;
1161 goto out;
1162 }
1163
1164 rt->u.dst.obsolete = -1;
YOSHIFUJI Hideaki6f704992008-05-19 16:56:11 -07001165 rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
1166 jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1167 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001168
Thomas Graf86872cb2006-08-22 00:01:08 -07001169 if (cfg->fc_protocol == RTPROT_UNSPEC)
1170 cfg->fc_protocol = RTPROT_BOOT;
1171 rt->rt6i_protocol = cfg->fc_protocol;
1172
1173 addr_type = ipv6_addr_type(&cfg->fc_dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001174
1175 if (addr_type & IPV6_ADDR_MULTICAST)
1176 rt->u.dst.input = ip6_mc_input;
1177 else
1178 rt->u.dst.input = ip6_forward;
1179
1180 rt->u.dst.output = ip6_output;
1181
Thomas Graf86872cb2006-08-22 00:01:08 -07001182 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1183 rt->rt6i_dst.plen = cfg->fc_dst_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001184 if (rt->rt6i_dst.plen == 128)
1185 rt->u.dst.flags = DST_HOST;
1186
1187#ifdef CONFIG_IPV6_SUBTREES
Thomas Graf86872cb2006-08-22 00:01:08 -07001188 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1189 rt->rt6i_src.plen = cfg->fc_src_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001190#endif
1191
Thomas Graf86872cb2006-08-22 00:01:08 -07001192 rt->rt6i_metric = cfg->fc_metric;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001193
1194 /* We cannot add true routes via loopback here,
1195 they would result in kernel looping; promote them to reject routes
1196 */
Thomas Graf86872cb2006-08-22 00:01:08 -07001197 if ((cfg->fc_flags & RTF_REJECT) ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07001198 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
1199 /* hold loopback dev/idev if we haven't done so. */
Daniel Lezcano55786892008-03-04 13:47:47 -08001200 if (dev != net->loopback_dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001201 if (dev) {
1202 dev_put(dev);
1203 in6_dev_put(idev);
1204 }
Daniel Lezcano55786892008-03-04 13:47:47 -08001205 dev = net->loopback_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001206 dev_hold(dev);
1207 idev = in6_dev_get(dev);
1208 if (!idev) {
1209 err = -ENODEV;
1210 goto out;
1211 }
1212 }
1213 rt->u.dst.output = ip6_pkt_discard_out;
1214 rt->u.dst.input = ip6_pkt_discard;
1215 rt->u.dst.error = -ENETUNREACH;
1216 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1217 goto install_route;
1218 }
1219
Thomas Graf86872cb2006-08-22 00:01:08 -07001220 if (cfg->fc_flags & RTF_GATEWAY) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001221 struct in6_addr *gw_addr;
1222 int gwa_type;
1223
Thomas Graf86872cb2006-08-22 00:01:08 -07001224 gw_addr = &cfg->fc_gateway;
1225 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001226 gwa_type = ipv6_addr_type(gw_addr);
1227
1228 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1229 struct rt6_info *grt;
1230
1231 /* IPv6 strictly inhibits using not link-local
1232 addresses as nexthop address.
1233 Otherwise, router will not able to send redirects.
1234 It is very good, but in some (rare!) circumstances
1235 (SIT, PtP, NBMA NOARP links) it is handy to allow
1236 some exceptions. --ANK
1237 */
1238 err = -EINVAL;
1239 if (!(gwa_type&IPV6_ADDR_UNICAST))
1240 goto out;
1241
Daniel Lezcano55786892008-03-04 13:47:47 -08001242 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001243
1244 err = -EHOSTUNREACH;
1245 if (grt == NULL)
1246 goto out;
1247 if (dev) {
1248 if (dev != grt->rt6i_dev) {
1249 dst_release(&grt->u.dst);
1250 goto out;
1251 }
1252 } else {
1253 dev = grt->rt6i_dev;
1254 idev = grt->rt6i_idev;
1255 dev_hold(dev);
1256 in6_dev_hold(grt->rt6i_idev);
1257 }
1258 if (!(grt->rt6i_flags&RTF_GATEWAY))
1259 err = 0;
1260 dst_release(&grt->u.dst);
1261
1262 if (err)
1263 goto out;
1264 }
1265 err = -EINVAL;
1266 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1267 goto out;
1268 }
1269
1270 err = -ENODEV;
1271 if (dev == NULL)
1272 goto out;
1273
Thomas Graf86872cb2006-08-22 00:01:08 -07001274 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001275 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1276 if (IS_ERR(rt->rt6i_nexthop)) {
1277 err = PTR_ERR(rt->rt6i_nexthop);
1278 rt->rt6i_nexthop = NULL;
1279 goto out;
1280 }
1281 }
1282
Thomas Graf86872cb2006-08-22 00:01:08 -07001283 rt->rt6i_flags = cfg->fc_flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001284
1285install_route:
Thomas Graf86872cb2006-08-22 00:01:08 -07001286 if (cfg->fc_mx) {
1287 struct nlattr *nla;
1288 int remaining;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001289
Thomas Graf86872cb2006-08-22 00:01:08 -07001290 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
Thomas Graf8f4c1f92007-09-12 14:44:36 +02001291 int type = nla_type(nla);
Thomas Graf86872cb2006-08-22 00:01:08 -07001292
1293 if (type) {
1294 if (type > RTAX_MAX) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001295 err = -EINVAL;
1296 goto out;
1297 }
Thomas Graf86872cb2006-08-22 00:01:08 -07001298
1299 rt->u.dst.metrics[type - 1] = nla_get_u32(nla);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001300 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001301 }
1302 }
1303
Satoru SATOH5ffc02a2008-05-04 22:14:42 -07001304 if (dst_metric(&rt->u.dst, RTAX_HOPLIMIT) == 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001305 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
Rami Rosen1ca615f2008-08-06 02:34:21 -07001306 if (!dst_mtu(&rt->u.dst))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001307 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
Satoru SATOH5ffc02a2008-05-04 22:14:42 -07001308 if (!dst_metric(&rt->u.dst, RTAX_ADVMSS))
Daniel Lezcano55786892008-03-04 13:47:47 -08001309 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001310 rt->u.dst.dev = dev;
1311 rt->rt6i_idev = idev;
Thomas Grafc71099a2006-08-04 23:20:06 -07001312 rt->rt6i_table = table;
Daniel Lezcano63152fc2008-03-03 23:31:11 -08001313
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001314 cfg->fc_nlinfo.nl_net = dev_net(dev);
Daniel Lezcano63152fc2008-03-03 23:31:11 -08001315
Thomas Graf86872cb2006-08-22 00:01:08 -07001316 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001317
1318out:
1319 if (dev)
1320 dev_put(dev);
1321 if (idev)
1322 in6_dev_put(idev);
1323 if (rt)
YOSHIFUJI Hideaki40aa7b92006-10-19 13:50:09 +09001324 dst_free(&rt->u.dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001325 return err;
1326}
1327
Thomas Graf86872cb2006-08-22 00:01:08 -07001328static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001329{
1330 int err;
Thomas Grafc71099a2006-08-04 23:20:06 -07001331 struct fib6_table *table;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001332 struct net *net = dev_net(rt->rt6i_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001333
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001334 if (rt == net->ipv6.ip6_null_entry)
Patrick McHardy6c813a72006-08-06 22:22:47 -07001335 return -ENOENT;
1336
Thomas Grafc71099a2006-08-04 23:20:06 -07001337 table = rt->rt6i_table;
1338 write_lock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001339
Thomas Graf86872cb2006-08-22 00:01:08 -07001340 err = fib6_del(rt, info);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001341 dst_release(&rt->u.dst);
1342
Thomas Grafc71099a2006-08-04 23:20:06 -07001343 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001344
1345 return err;
1346}
1347
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001348int ip6_del_rt(struct rt6_info *rt)
1349{
Denis V. Lunev4d1169c2008-01-10 03:26:13 -08001350 struct nl_info info = {
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001351 .nl_net = dev_net(rt->rt6i_dev),
Denis V. Lunev4d1169c2008-01-10 03:26:13 -08001352 };
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08001353 return __ip6_del_rt(rt, &info);
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001354}
1355
Thomas Graf86872cb2006-08-22 00:01:08 -07001356static int ip6_route_del(struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001357{
Thomas Grafc71099a2006-08-04 23:20:06 -07001358 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001359 struct fib6_node *fn;
1360 struct rt6_info *rt;
1361 int err = -ESRCH;
1362
Daniel Lezcano55786892008-03-04 13:47:47 -08001363 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
Thomas Grafc71099a2006-08-04 23:20:06 -07001364 if (table == NULL)
1365 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001366
Thomas Grafc71099a2006-08-04 23:20:06 -07001367 read_lock_bh(&table->tb6_lock);
1368
1369 fn = fib6_locate(&table->tb6_root,
Thomas Graf86872cb2006-08-22 00:01:08 -07001370 &cfg->fc_dst, cfg->fc_dst_len,
1371 &cfg->fc_src, cfg->fc_src_len);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001372
Linus Torvalds1da177e2005-04-16 15:20:36 -07001373 if (fn) {
Eric Dumazet7cc48262007-02-09 16:22:57 -08001374 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
Thomas Graf86872cb2006-08-22 00:01:08 -07001375 if (cfg->fc_ifindex &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001376 (rt->rt6i_dev == NULL ||
Thomas Graf86872cb2006-08-22 00:01:08 -07001377 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001378 continue;
Thomas Graf86872cb2006-08-22 00:01:08 -07001379 if (cfg->fc_flags & RTF_GATEWAY &&
1380 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001381 continue;
Thomas Graf86872cb2006-08-22 00:01:08 -07001382 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001383 continue;
1384 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001385 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001386
Thomas Graf86872cb2006-08-22 00:01:08 -07001387 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001388 }
1389 }
Thomas Grafc71099a2006-08-04 23:20:06 -07001390 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001391
1392 return err;
1393}
1394
1395/*
1396 * Handle redirects
1397 */
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001398struct ip6rd_flowi {
1399 struct flowi fl;
1400 struct in6_addr gateway;
1401};
Linus Torvalds1da177e2005-04-16 15:20:36 -07001402
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001403static struct rt6_info *__ip6_route_redirect(struct net *net,
1404 struct fib6_table *table,
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001405 struct flowi *fl,
1406 int flags)
1407{
1408 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1409 struct rt6_info *rt;
1410 struct fib6_node *fn;
Thomas Grafc71099a2006-08-04 23:20:06 -07001411
Linus Torvalds1da177e2005-04-16 15:20:36 -07001412 /*
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001413 * Get the "current" route for this destination and
1414 * check if the redirect has come from approriate router.
1415 *
1416 * RFC 2461 specifies that redirects should only be
1417 * accepted if they come from the nexthop to the target.
1418 * Due to the way the routes are chosen, this notion
1419 * is a bit fuzzy and one might need to check all possible
1420 * routes.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001421 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001422
Thomas Grafc71099a2006-08-04 23:20:06 -07001423 read_lock_bh(&table->tb6_lock);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001424 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001425restart:
Eric Dumazet7cc48262007-02-09 16:22:57 -08001426 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001427 /*
1428 * Current route is on-link; redirect is always invalid.
1429 *
1430 * Seems, previous statement is not true. It could
1431 * be node, which looks for us as on-link (f.e. proxy ndisc)
1432 * But then router serving it might decide, that we should
1433 * know truth 8)8) --ANK (980726).
1434 */
1435 if (rt6_check_expired(rt))
1436 continue;
1437 if (!(rt->rt6i_flags & RTF_GATEWAY))
1438 continue;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001439 if (fl->oif != rt->rt6i_dev->ifindex)
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001440 continue;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001441 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001442 continue;
1443 break;
1444 }
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001445
YOSHIFUJI Hideakicb15d9c2006-08-23 17:23:11 -07001446 if (!rt)
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001447 rt = net->ipv6.ip6_null_entry;
1448 BACKTRACK(net, &fl->fl6_src);
YOSHIFUJI Hideakicb15d9c2006-08-23 17:23:11 -07001449out:
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001450 dst_hold(&rt->u.dst);
1451
1452 read_unlock_bh(&table->tb6_lock);
1453
1454 return rt;
1455};
1456
1457static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1458 struct in6_addr *src,
1459 struct in6_addr *gateway,
1460 struct net_device *dev)
1461{
Thomas Grafadaa70b2006-10-13 15:01:03 -07001462 int flags = RT6_LOOKUP_F_HAS_SADDR;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001463 struct net *net = dev_net(dev);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001464 struct ip6rd_flowi rdfl = {
1465 .fl = {
1466 .oif = dev->ifindex,
1467 .nl_u = {
1468 .ip6_u = {
1469 .daddr = *dest,
1470 .saddr = *src,
1471 },
1472 },
1473 },
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001474 };
Thomas Grafadaa70b2006-10-13 15:01:03 -07001475
Brian Haley86c36ce2009-10-07 13:58:01 -07001476 ipv6_addr_copy(&rdfl.gateway, gateway);
1477
Thomas Grafadaa70b2006-10-13 15:01:03 -07001478 if (rt6_need_strict(dest))
1479 flags |= RT6_LOOKUP_F_IFACE;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001480
Daniel Lezcano55786892008-03-04 13:47:47 -08001481 return (struct rt6_info *)fib6_rule_lookup(net, (struct flowi *)&rdfl,
Daniel Lezcano58f09b72008-03-03 23:25:27 -08001482 flags, __ip6_route_redirect);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001483}
1484
1485void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1486 struct in6_addr *saddr,
1487 struct neighbour *neigh, u8 *lladdr, int on_link)
1488{
1489 struct rt6_info *rt, *nrt = NULL;
1490 struct netevent_redirect netevent;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001491 struct net *net = dev_net(neigh->dev);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001492
1493 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1494
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001495 if (rt == net->ipv6.ip6_null_entry) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001496 if (net_ratelimit())
1497 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1498 "for redirect target\n");
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001499 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001500 }
1501
Linus Torvalds1da177e2005-04-16 15:20:36 -07001502 /*
1503 * We have finally decided to accept it.
1504 */
1505
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001506 neigh_update(neigh, lladdr, NUD_STALE,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001507 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1508 NEIGH_UPDATE_F_OVERRIDE|
1509 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1510 NEIGH_UPDATE_F_ISROUTER))
1511 );
1512
1513 /*
1514 * Redirect received -> path was valid.
1515 * Look, redirects are sent only in response to data packets,
1516 * so that this nexthop apparently is reachable. --ANK
1517 */
1518 dst_confirm(&rt->u.dst);
1519
1520 /* Duplicate redirect: silently ignore. */
1521 if (neigh == rt->u.dst.neighbour)
1522 goto out;
1523
1524 nrt = ip6_rt_copy(rt);
1525 if (nrt == NULL)
1526 goto out;
1527
1528 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1529 if (on_link)
1530 nrt->rt6i_flags &= ~RTF_GATEWAY;
1531
1532 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1533 nrt->rt6i_dst.plen = 128;
1534 nrt->u.dst.flags |= DST_HOST;
1535
1536 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1537 nrt->rt6i_nexthop = neigh_clone(neigh);
1538 /* Reset pmtu, it may be better */
1539 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001540 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dev_net(neigh->dev),
Daniel Lezcano55786892008-03-04 13:47:47 -08001541 dst_mtu(&nrt->u.dst));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001542
Thomas Graf40e22e82006-08-22 00:00:45 -07001543 if (ip6_ins_rt(nrt))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001544 goto out;
1545
Tom Tucker8d717402006-07-30 20:43:36 -07001546 netevent.old = &rt->u.dst;
1547 netevent.new = &nrt->u.dst;
1548 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1549
Linus Torvalds1da177e2005-04-16 15:20:36 -07001550 if (rt->rt6i_flags&RTF_CACHE) {
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001551 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001552 return;
1553 }
1554
1555out:
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001556 dst_release(&rt->u.dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001557 return;
1558}
1559
1560/*
1561 * Handle ICMP "packet too big" messages
1562 * i.e. Path MTU discovery
1563 */
1564
1565void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1566 struct net_device *dev, u32 pmtu)
1567{
1568 struct rt6_info *rt, *nrt;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001569 struct net *net = dev_net(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001570 int allfrag = 0;
1571
Daniel Lezcano55786892008-03-04 13:47:47 -08001572 rt = rt6_lookup(net, daddr, saddr, dev->ifindex, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001573 if (rt == NULL)
1574 return;
1575
1576 if (pmtu >= dst_mtu(&rt->u.dst))
1577 goto out;
1578
1579 if (pmtu < IPV6_MIN_MTU) {
1580 /*
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001581 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
Linus Torvalds1da177e2005-04-16 15:20:36 -07001582 * MTU (1280) and a fragment header should always be included
1583 * after a node receiving Too Big message reporting PMTU is
1584 * less than the IPv6 Minimum Link MTU.
1585 */
1586 pmtu = IPV6_MIN_MTU;
1587 allfrag = 1;
1588 }
1589
1590 /* New mtu received -> path was valid.
1591 They are sent only in response to data packets,
1592 so that this nexthop apparently is reachable. --ANK
1593 */
1594 dst_confirm(&rt->u.dst);
1595
1596 /* Host route. If it is static, it would be better
1597 not to override it, but add new one, so that
1598 when cache entry will expire old pmtu
1599 would return automatically.
1600 */
1601 if (rt->rt6i_flags & RTF_CACHE) {
1602 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1603 if (allfrag)
1604 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
Daniel Lezcano55786892008-03-04 13:47:47 -08001605 dst_set_expires(&rt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001606 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1607 goto out;
1608 }
1609
1610 /* Network route.
1611 Two cases are possible:
1612 1. It is connected route. Action: COW
1613 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1614 */
YOSHIFUJI Hideakid5315b52006-03-20 16:58:48 -08001615 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001616 nrt = rt6_alloc_cow(rt, daddr, saddr);
YOSHIFUJI Hideakid5315b52006-03-20 16:58:48 -08001617 else
1618 nrt = rt6_alloc_clone(rt, daddr);
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001619
YOSHIFUJI Hideakid5315b52006-03-20 16:58:48 -08001620 if (nrt) {
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001621 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1622 if (allfrag)
1623 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1624
1625 /* According to RFC 1981, detecting PMTU increase shouldn't be
1626 * happened within 5 mins, the recommended timer is 10 mins.
1627 * Here this route expiration time is set to ip6_rt_mtu_expires
1628 * which is 10 mins. After 10 mins the decreased pmtu is expired
1629 * and detecting PMTU increase will be automatically happened.
1630 */
Daniel Lezcano55786892008-03-04 13:47:47 -08001631 dst_set_expires(&nrt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001632 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1633
Thomas Graf40e22e82006-08-22 00:00:45 -07001634 ip6_ins_rt(nrt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001635 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001636out:
1637 dst_release(&rt->u.dst);
1638}
1639
1640/*
1641 * Misc support functions
1642 */
1643
1644static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1645{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001646 struct net *net = dev_net(ort->rt6i_dev);
Alexey Dobriyan86393e52009-08-29 01:34:49 +00001647 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001648
1649 if (rt) {
1650 rt->u.dst.input = ort->u.dst.input;
1651 rt->u.dst.output = ort->u.dst.output;
1652
1653 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
Ville Nuorvala22e1e4d2006-10-16 22:14:26 -07001654 rt->u.dst.error = ort->u.dst.error;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001655 rt->u.dst.dev = ort->u.dst.dev;
1656 if (rt->u.dst.dev)
1657 dev_hold(rt->u.dst.dev);
1658 rt->rt6i_idev = ort->rt6i_idev;
1659 if (rt->rt6i_idev)
1660 in6_dev_hold(rt->rt6i_idev);
1661 rt->u.dst.lastuse = jiffies;
1662 rt->rt6i_expires = 0;
1663
1664 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1665 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1666 rt->rt6i_metric = 0;
1667
1668 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1669#ifdef CONFIG_IPV6_SUBTREES
1670 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1671#endif
Thomas Grafc71099a2006-08-04 23:20:06 -07001672 rt->rt6i_table = ort->rt6i_table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001673 }
1674 return rt;
1675}
1676
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001677#ifdef CONFIG_IPV6_ROUTE_INFO
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001678static struct rt6_info *rt6_get_route_info(struct net *net,
1679 struct in6_addr *prefix, int prefixlen,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001680 struct in6_addr *gwaddr, int ifindex)
1681{
1682 struct fib6_node *fn;
1683 struct rt6_info *rt = NULL;
Thomas Grafc71099a2006-08-04 23:20:06 -07001684 struct fib6_table *table;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001685
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001686 table = fib6_get_table(net, RT6_TABLE_INFO);
Thomas Grafc71099a2006-08-04 23:20:06 -07001687 if (table == NULL)
1688 return NULL;
1689
1690 write_lock_bh(&table->tb6_lock);
1691 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001692 if (!fn)
1693 goto out;
1694
Eric Dumazet7cc48262007-02-09 16:22:57 -08001695 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001696 if (rt->rt6i_dev->ifindex != ifindex)
1697 continue;
1698 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1699 continue;
1700 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1701 continue;
1702 dst_hold(&rt->u.dst);
1703 break;
1704 }
1705out:
Thomas Grafc71099a2006-08-04 23:20:06 -07001706 write_unlock_bh(&table->tb6_lock);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001707 return rt;
1708}
1709
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001710static struct rt6_info *rt6_add_route_info(struct net *net,
1711 struct in6_addr *prefix, int prefixlen,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001712 struct in6_addr *gwaddr, int ifindex,
1713 unsigned pref)
1714{
Thomas Graf86872cb2006-08-22 00:01:08 -07001715 struct fib6_config cfg = {
1716 .fc_table = RT6_TABLE_INFO,
Rami Rosen238fc7e2008-02-09 23:43:11 -08001717 .fc_metric = IP6_RT_PRIO_USER,
Thomas Graf86872cb2006-08-22 00:01:08 -07001718 .fc_ifindex = ifindex,
1719 .fc_dst_len = prefixlen,
1720 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1721 RTF_UP | RTF_PREF(pref),
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001722 .fc_nlinfo.pid = 0,
1723 .fc_nlinfo.nlh = NULL,
1724 .fc_nlinfo.nl_net = net,
Thomas Graf86872cb2006-08-22 00:01:08 -07001725 };
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001726
Thomas Graf86872cb2006-08-22 00:01:08 -07001727 ipv6_addr_copy(&cfg.fc_dst, prefix);
1728 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1729
YOSHIFUJI Hideakie317da92006-03-20 17:06:42 -08001730 /* We should treat it as a default route if prefix length is 0. */
1731 if (!prefixlen)
Thomas Graf86872cb2006-08-22 00:01:08 -07001732 cfg.fc_flags |= RTF_DEFAULT;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001733
Thomas Graf86872cb2006-08-22 00:01:08 -07001734 ip6_route_add(&cfg);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001735
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001736 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001737}
1738#endif
1739
Linus Torvalds1da177e2005-04-16 15:20:36 -07001740struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001741{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001742 struct rt6_info *rt;
Thomas Grafc71099a2006-08-04 23:20:06 -07001743 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001744
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001745 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
Thomas Grafc71099a2006-08-04 23:20:06 -07001746 if (table == NULL)
1747 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001748
Thomas Grafc71099a2006-08-04 23:20:06 -07001749 write_lock_bh(&table->tb6_lock);
Eric Dumazet7cc48262007-02-09 16:22:57 -08001750 for (rt = table->tb6_root.leaf; rt; rt=rt->u.dst.rt6_next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001751 if (dev == rt->rt6i_dev &&
YOSHIFUJI Hideaki045927f2006-03-20 17:00:48 -08001752 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001753 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1754 break;
1755 }
1756 if (rt)
1757 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001758 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001759 return rt;
1760}
1761
1762struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -08001763 struct net_device *dev,
1764 unsigned int pref)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001765{
Thomas Graf86872cb2006-08-22 00:01:08 -07001766 struct fib6_config cfg = {
1767 .fc_table = RT6_TABLE_DFLT,
Rami Rosen238fc7e2008-02-09 23:43:11 -08001768 .fc_metric = IP6_RT_PRIO_USER,
Thomas Graf86872cb2006-08-22 00:01:08 -07001769 .fc_ifindex = dev->ifindex,
1770 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1771 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
Daniel Lezcano55786892008-03-04 13:47:47 -08001772 .fc_nlinfo.pid = 0,
1773 .fc_nlinfo.nlh = NULL,
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001774 .fc_nlinfo.nl_net = dev_net(dev),
Thomas Graf86872cb2006-08-22 00:01:08 -07001775 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07001776
Thomas Graf86872cb2006-08-22 00:01:08 -07001777 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001778
Thomas Graf86872cb2006-08-22 00:01:08 -07001779 ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001780
Linus Torvalds1da177e2005-04-16 15:20:36 -07001781 return rt6_get_dflt_router(gwaddr, dev);
1782}
1783
Daniel Lezcano7b4da532008-03-04 13:47:14 -08001784void rt6_purge_dflt_routers(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001785{
1786 struct rt6_info *rt;
Thomas Grafc71099a2006-08-04 23:20:06 -07001787 struct fib6_table *table;
1788
1789 /* NOTE: Keep consistent with rt6_get_dflt_router */
Daniel Lezcano7b4da532008-03-04 13:47:14 -08001790 table = fib6_get_table(net, RT6_TABLE_DFLT);
Thomas Grafc71099a2006-08-04 23:20:06 -07001791 if (table == NULL)
1792 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001793
1794restart:
Thomas Grafc71099a2006-08-04 23:20:06 -07001795 read_lock_bh(&table->tb6_lock);
Eric Dumazet7cc48262007-02-09 16:22:57 -08001796 for (rt = table->tb6_root.leaf; rt; rt = rt->u.dst.rt6_next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001797 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1798 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001799 read_unlock_bh(&table->tb6_lock);
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001800 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001801 goto restart;
1802 }
1803 }
Thomas Grafc71099a2006-08-04 23:20:06 -07001804 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001805}
1806
Daniel Lezcano55786892008-03-04 13:47:47 -08001807static void rtmsg_to_fib6_config(struct net *net,
1808 struct in6_rtmsg *rtmsg,
Thomas Graf86872cb2006-08-22 00:01:08 -07001809 struct fib6_config *cfg)
1810{
1811 memset(cfg, 0, sizeof(*cfg));
1812
1813 cfg->fc_table = RT6_TABLE_MAIN;
1814 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1815 cfg->fc_metric = rtmsg->rtmsg_metric;
1816 cfg->fc_expires = rtmsg->rtmsg_info;
1817 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1818 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1819 cfg->fc_flags = rtmsg->rtmsg_flags;
1820
Daniel Lezcano55786892008-03-04 13:47:47 -08001821 cfg->fc_nlinfo.nl_net = net;
Benjamin Theryf1243c22008-02-26 18:10:03 -08001822
Thomas Graf86872cb2006-08-22 00:01:08 -07001823 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1824 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1825 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1826}
1827
Daniel Lezcano55786892008-03-04 13:47:47 -08001828int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001829{
Thomas Graf86872cb2006-08-22 00:01:08 -07001830 struct fib6_config cfg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001831 struct in6_rtmsg rtmsg;
1832 int err;
1833
1834 switch(cmd) {
1835 case SIOCADDRT: /* Add a route */
1836 case SIOCDELRT: /* Delete a route */
1837 if (!capable(CAP_NET_ADMIN))
1838 return -EPERM;
1839 err = copy_from_user(&rtmsg, arg,
1840 sizeof(struct in6_rtmsg));
1841 if (err)
1842 return -EFAULT;
Thomas Graf86872cb2006-08-22 00:01:08 -07001843
Daniel Lezcano55786892008-03-04 13:47:47 -08001844 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
Thomas Graf86872cb2006-08-22 00:01:08 -07001845
Linus Torvalds1da177e2005-04-16 15:20:36 -07001846 rtnl_lock();
1847 switch (cmd) {
1848 case SIOCADDRT:
Thomas Graf86872cb2006-08-22 00:01:08 -07001849 err = ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001850 break;
1851 case SIOCDELRT:
Thomas Graf86872cb2006-08-22 00:01:08 -07001852 err = ip6_route_del(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001853 break;
1854 default:
1855 err = -EINVAL;
1856 }
1857 rtnl_unlock();
1858
1859 return err;
Stephen Hemminger3ff50b72007-04-20 17:09:22 -07001860 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001861
1862 return -EINVAL;
1863}
1864
1865/*
1866 * Drop the packet on the floor
1867 */
1868
Brian Haleyd5fdd6b2009-06-23 04:31:07 -07001869static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001870{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001871 int type;
Eric Dumazetadf30902009-06-02 05:19:30 +00001872 struct dst_entry *dst = skb_dst(skb);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001873 switch (ipstats_mib_noroutes) {
1874 case IPSTATS_MIB_INNOROUTES:
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -07001875 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001876 if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED) {
Denis V. Lunev3bd653c2008-10-08 10:54:51 -07001877 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1878 IPSTATS_MIB_INADDRERRORS);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001879 break;
1880 }
1881 /* FALLTHROUGH */
1882 case IPSTATS_MIB_OUTNOROUTES:
Denis V. Lunev3bd653c2008-10-08 10:54:51 -07001883 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1884 ipstats_mib_noroutes);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001885 break;
1886 }
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001887 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0, skb->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001888 kfree_skb(skb);
1889 return 0;
1890}
1891
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001892static int ip6_pkt_discard(struct sk_buff *skb)
1893{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001894 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001895}
1896
Arnaldo Carvalho de Melo20380732005-08-16 02:18:02 -03001897static int ip6_pkt_discard_out(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001898{
Eric Dumazetadf30902009-06-02 05:19:30 +00001899 skb->dev = skb_dst(skb)->dev;
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001900 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001901}
1902
David S. Miller6723ab52006-10-18 21:20:57 -07001903#ifdef CONFIG_IPV6_MULTIPLE_TABLES
1904
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001905static int ip6_pkt_prohibit(struct sk_buff *skb)
1906{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001907 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001908}
1909
1910static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1911{
Eric Dumazetadf30902009-06-02 05:19:30 +00001912 skb->dev = skb_dst(skb)->dev;
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001913 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001914}
1915
David S. Miller6723ab52006-10-18 21:20:57 -07001916#endif
1917
Linus Torvalds1da177e2005-04-16 15:20:36 -07001918/*
1919 * Allocate a dst for local (unicast / anycast) address.
1920 */
1921
1922struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1923 const struct in6_addr *addr,
1924 int anycast)
1925{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001926 struct net *net = dev_net(idev->dev);
Alexey Dobriyan86393e52009-08-29 01:34:49 +00001927 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
David S. Miller14deae42009-01-04 16:04:39 -08001928 struct neighbour *neigh;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001929
1930 if (rt == NULL)
1931 return ERR_PTR(-ENOMEM);
1932
Daniel Lezcano55786892008-03-04 13:47:47 -08001933 dev_hold(net->loopback_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001934 in6_dev_hold(idev);
1935
1936 rt->u.dst.flags = DST_HOST;
1937 rt->u.dst.input = ip6_input;
1938 rt->u.dst.output = ip6_output;
Daniel Lezcano55786892008-03-04 13:47:47 -08001939 rt->rt6i_dev = net->loopback_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001940 rt->rt6i_idev = idev;
1941 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
Daniel Lezcano55786892008-03-04 13:47:47 -08001942 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001943 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1944 rt->u.dst.obsolete = -1;
1945
1946 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +09001947 if (anycast)
1948 rt->rt6i_flags |= RTF_ANYCAST;
1949 else
Linus Torvalds1da177e2005-04-16 15:20:36 -07001950 rt->rt6i_flags |= RTF_LOCAL;
David S. Miller14deae42009-01-04 16:04:39 -08001951 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1952 if (IS_ERR(neigh)) {
YOSHIFUJI Hideaki40aa7b92006-10-19 13:50:09 +09001953 dst_free(&rt->u.dst);
David S. Miller14deae42009-01-04 16:04:39 -08001954
1955 /* We are casting this because that is the return
1956 * value type. But an errno encoded pointer is the
1957 * same regardless of the underlying pointer type,
1958 * and that's what we are returning. So this is OK.
1959 */
1960 return (struct rt6_info *) neigh;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001961 }
David S. Miller14deae42009-01-04 16:04:39 -08001962 rt->rt6i_nexthop = neigh;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001963
1964 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1965 rt->rt6i_dst.plen = 128;
Daniel Lezcano55786892008-03-04 13:47:47 -08001966 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001967
1968 atomic_set(&rt->u.dst.__refcnt, 1);
1969
1970 return rt;
1971}
1972
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001973struct arg_dev_net {
1974 struct net_device *dev;
1975 struct net *net;
1976};
1977
Linus Torvalds1da177e2005-04-16 15:20:36 -07001978static int fib6_ifdown(struct rt6_info *rt, void *arg)
1979{
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001980 struct net_device *dev = ((struct arg_dev_net *)arg)->dev;
1981 struct net *net = ((struct arg_dev_net *)arg)->net;
1982
1983 if (((void *)rt->rt6i_dev == dev || dev == NULL) &&
1984 rt != net->ipv6.ip6_null_entry) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001985 RT6_TRACE("deleted by ifdown %p\n", rt);
1986 return -1;
1987 }
1988 return 0;
1989}
1990
Daniel Lezcanof3db4852008-03-03 23:27:06 -08001991void rt6_ifdown(struct net *net, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001992{
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001993 struct arg_dev_net adn = {
1994 .dev = dev,
1995 .net = net,
1996 };
1997
1998 fib6_clean_all(net, fib6_ifdown, 0, &adn);
David S. Miller1e493d12008-09-10 17:27:15 -07001999 icmp6_clean_all(fib6_ifdown, &adn);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002000}
2001
2002struct rt6_mtu_change_arg
2003{
2004 struct net_device *dev;
2005 unsigned mtu;
2006};
2007
2008static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2009{
2010 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2011 struct inet6_dev *idev;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09002012 struct net *net = dev_net(arg->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002013
2014 /* In IPv6 pmtu discovery is not optional,
2015 so that RTAX_MTU lock cannot disable it.
2016 We still use this lock to block changes
2017 caused by addrconf/ndisc.
2018 */
2019
2020 idev = __in6_dev_get(arg->dev);
2021 if (idev == NULL)
2022 return 0;
2023
2024 /* For administrative MTU increase, there is no way to discover
2025 IPv6 PMTU increase, so PMTU increase should be updated here.
2026 Since RFC 1981 doesn't include administrative MTU increase
2027 update PMTU increase is a MUST. (i.e. jumbo frame)
2028 */
2029 /*
2030 If new MTU is less than route PMTU, this new MTU will be the
2031 lowest MTU in the path, update the route PMTU to reflect PMTU
2032 decreases; if new MTU is greater than route PMTU, and the
2033 old MTU is the lowest MTU in the path, update the route PMTU
2034 to reflect the increase. In this case if the other nodes' MTU
2035 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2036 PMTU discouvery.
2037 */
2038 if (rt->rt6i_dev == arg->dev &&
2039 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
Jim Paris23717792008-01-31 16:36:25 -08002040 (dst_mtu(&rt->u.dst) >= arg->mtu ||
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002041 (dst_mtu(&rt->u.dst) < arg->mtu &&
Simon Arlott566cfd82007-07-26 00:09:55 -07002042 dst_mtu(&rt->u.dst) == idev->cnf.mtu6))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002043 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
Daniel Lezcano55786892008-03-04 13:47:47 -08002044 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, arg->mtu);
Simon Arlott566cfd82007-07-26 00:09:55 -07002045 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002046 return 0;
2047}
2048
2049void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2050{
Thomas Grafc71099a2006-08-04 23:20:06 -07002051 struct rt6_mtu_change_arg arg = {
2052 .dev = dev,
2053 .mtu = mtu,
2054 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07002055
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09002056 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002057}
2058
Patrick McHardyef7c79e2007-06-05 12:38:30 -07002059static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
Thomas Graf5176f912006-08-26 20:13:18 -07002060 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
Thomas Graf86872cb2006-08-22 00:01:08 -07002061 [RTA_OIF] = { .type = NLA_U32 },
Thomas Grafab364a62006-08-22 00:01:47 -07002062 [RTA_IIF] = { .type = NLA_U32 },
Thomas Graf86872cb2006-08-22 00:01:08 -07002063 [RTA_PRIORITY] = { .type = NLA_U32 },
2064 [RTA_METRICS] = { .type = NLA_NESTED },
2065};
2066
2067static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2068 struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002069{
Thomas Graf86872cb2006-08-22 00:01:08 -07002070 struct rtmsg *rtm;
2071 struct nlattr *tb[RTA_MAX+1];
2072 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002073
Thomas Graf86872cb2006-08-22 00:01:08 -07002074 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2075 if (err < 0)
2076 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002077
Thomas Graf86872cb2006-08-22 00:01:08 -07002078 err = -EINVAL;
2079 rtm = nlmsg_data(nlh);
2080 memset(cfg, 0, sizeof(*cfg));
2081
2082 cfg->fc_table = rtm->rtm_table;
2083 cfg->fc_dst_len = rtm->rtm_dst_len;
2084 cfg->fc_src_len = rtm->rtm_src_len;
2085 cfg->fc_flags = RTF_UP;
2086 cfg->fc_protocol = rtm->rtm_protocol;
2087
2088 if (rtm->rtm_type == RTN_UNREACHABLE)
2089 cfg->fc_flags |= RTF_REJECT;
2090
2091 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2092 cfg->fc_nlinfo.nlh = nlh;
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09002093 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
Thomas Graf86872cb2006-08-22 00:01:08 -07002094
2095 if (tb[RTA_GATEWAY]) {
2096 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2097 cfg->fc_flags |= RTF_GATEWAY;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002098 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002099
2100 if (tb[RTA_DST]) {
2101 int plen = (rtm->rtm_dst_len + 7) >> 3;
2102
2103 if (nla_len(tb[RTA_DST]) < plen)
2104 goto errout;
2105
2106 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002107 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002108
2109 if (tb[RTA_SRC]) {
2110 int plen = (rtm->rtm_src_len + 7) >> 3;
2111
2112 if (nla_len(tb[RTA_SRC]) < plen)
2113 goto errout;
2114
2115 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002116 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002117
2118 if (tb[RTA_OIF])
2119 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2120
2121 if (tb[RTA_PRIORITY])
2122 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2123
2124 if (tb[RTA_METRICS]) {
2125 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2126 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002127 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002128
2129 if (tb[RTA_TABLE])
2130 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2131
2132 err = 0;
2133errout:
2134 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002135}
2136
Thomas Grafc127ea22007-03-22 11:58:32 -07002137static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002138{
Thomas Graf86872cb2006-08-22 00:01:08 -07002139 struct fib6_config cfg;
2140 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002141
Thomas Graf86872cb2006-08-22 00:01:08 -07002142 err = rtm_to_fib6_config(skb, nlh, &cfg);
2143 if (err < 0)
2144 return err;
2145
2146 return ip6_route_del(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002147}
2148
Thomas Grafc127ea22007-03-22 11:58:32 -07002149static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002150{
Thomas Graf86872cb2006-08-22 00:01:08 -07002151 struct fib6_config cfg;
2152 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002153
Thomas Graf86872cb2006-08-22 00:01:08 -07002154 err = rtm_to_fib6_config(skb, nlh, &cfg);
2155 if (err < 0)
2156 return err;
2157
2158 return ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002159}
2160
Thomas Graf339bf982006-11-10 14:10:15 -08002161static inline size_t rt6_nlmsg_size(void)
2162{
2163 return NLMSG_ALIGN(sizeof(struct rtmsg))
2164 + nla_total_size(16) /* RTA_SRC */
2165 + nla_total_size(16) /* RTA_DST */
2166 + nla_total_size(16) /* RTA_GATEWAY */
2167 + nla_total_size(16) /* RTA_PREFSRC */
2168 + nla_total_size(4) /* RTA_TABLE */
2169 + nla_total_size(4) /* RTA_IIF */
2170 + nla_total_size(4) /* RTA_OIF */
2171 + nla_total_size(4) /* RTA_PRIORITY */
Noriaki TAKAMIYA6a2b9ce2007-01-23 22:09:41 -08002172 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
Thomas Graf339bf982006-11-10 14:10:15 -08002173 + nla_total_size(sizeof(struct rta_cacheinfo));
2174}
2175
Brian Haley191cd582008-08-14 15:33:21 -07002176static int rt6_fill_node(struct net *net,
2177 struct sk_buff *skb, struct rt6_info *rt,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07002178 struct in6_addr *dst, struct in6_addr *src,
2179 int iif, int type, u32 pid, u32 seq,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002180 int prefix, int nowait, unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002181{
2182 struct rtmsg *rtm;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002183 struct nlmsghdr *nlh;
Thomas Grafe3703b32006-11-27 09:27:07 -08002184 long expires;
Patrick McHardy9e762a42006-08-10 23:09:48 -07002185 u32 table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002186
2187 if (prefix) { /* user wants prefix routes only */
2188 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2189 /* success since this is not a prefix route */
2190 return 1;
2191 }
2192 }
2193
Thomas Graf2d7202b2006-08-22 00:01:27 -07002194 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2195 if (nlh == NULL)
Patrick McHardy26932562007-01-31 23:16:40 -08002196 return -EMSGSIZE;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002197
2198 rtm = nlmsg_data(nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002199 rtm->rtm_family = AF_INET6;
2200 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2201 rtm->rtm_src_len = rt->rt6i_src.plen;
2202 rtm->rtm_tos = 0;
Thomas Grafc71099a2006-08-04 23:20:06 -07002203 if (rt->rt6i_table)
Patrick McHardy9e762a42006-08-10 23:09:48 -07002204 table = rt->rt6i_table->tb6_id;
Thomas Grafc71099a2006-08-04 23:20:06 -07002205 else
Patrick McHardy9e762a42006-08-10 23:09:48 -07002206 table = RT6_TABLE_UNSPEC;
2207 rtm->rtm_table = table;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002208 NLA_PUT_U32(skb, RTA_TABLE, table);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002209 if (rt->rt6i_flags&RTF_REJECT)
2210 rtm->rtm_type = RTN_UNREACHABLE;
2211 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2212 rtm->rtm_type = RTN_LOCAL;
2213 else
2214 rtm->rtm_type = RTN_UNICAST;
2215 rtm->rtm_flags = 0;
2216 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2217 rtm->rtm_protocol = rt->rt6i_protocol;
2218 if (rt->rt6i_flags&RTF_DYNAMIC)
2219 rtm->rtm_protocol = RTPROT_REDIRECT;
2220 else if (rt->rt6i_flags & RTF_ADDRCONF)
2221 rtm->rtm_protocol = RTPROT_KERNEL;
2222 else if (rt->rt6i_flags&RTF_DEFAULT)
2223 rtm->rtm_protocol = RTPROT_RA;
2224
2225 if (rt->rt6i_flags&RTF_CACHE)
2226 rtm->rtm_flags |= RTM_F_CLONED;
2227
2228 if (dst) {
Thomas Graf2d7202b2006-08-22 00:01:27 -07002229 NLA_PUT(skb, RTA_DST, 16, dst);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002230 rtm->rtm_dst_len = 128;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002231 } else if (rtm->rtm_dst_len)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002232 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002233#ifdef CONFIG_IPV6_SUBTREES
2234 if (src) {
Thomas Graf2d7202b2006-08-22 00:01:27 -07002235 NLA_PUT(skb, RTA_SRC, 16, src);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002236 rtm->rtm_src_len = 128;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002237 } else if (rtm->rtm_src_len)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002238 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002239#endif
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002240 if (iif) {
2241#ifdef CONFIG_IPV6_MROUTE
2242 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
Benjamin Thery8229efd2008-12-10 16:30:15 -08002243 int err = ip6mr_get_route(net, skb, rtm, nowait);
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002244 if (err <= 0) {
2245 if (!nowait) {
2246 if (err == 0)
2247 return 0;
2248 goto nla_put_failure;
2249 } else {
2250 if (err == -EMSGSIZE)
2251 goto nla_put_failure;
2252 }
2253 }
2254 } else
2255#endif
2256 NLA_PUT_U32(skb, RTA_IIF, iif);
2257 } else if (dst) {
Brian Haley5e0115e2008-08-13 01:58:57 -07002258 struct inet6_dev *idev = ip6_dst_idev(&rt->u.dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002259 struct in6_addr saddr_buf;
Brian Haley191cd582008-08-14 15:33:21 -07002260 if (ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
YOSHIFUJI Hideaki7cbca672008-03-25 09:37:42 +09002261 dst, 0, &saddr_buf) == 0)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002262 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002263 }
Thomas Graf2d7202b2006-08-22 00:01:27 -07002264
Linus Torvalds1da177e2005-04-16 15:20:36 -07002265 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002266 goto nla_put_failure;
2267
Linus Torvalds1da177e2005-04-16 15:20:36 -07002268 if (rt->u.dst.neighbour)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002269 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
2270
Linus Torvalds1da177e2005-04-16 15:20:36 -07002271 if (rt->u.dst.dev)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002272 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2273
2274 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
Thomas Grafe3703b32006-11-27 09:27:07 -08002275
YOSHIFUJI Hideaki36e3dea2008-05-13 02:52:55 +09002276 if (!(rt->rt6i_flags & RTF_EXPIRES))
2277 expires = 0;
2278 else if (rt->rt6i_expires - jiffies < INT_MAX)
2279 expires = rt->rt6i_expires - jiffies;
2280 else
2281 expires = INT_MAX;
YOSHIFUJI Hideaki69cdf8f2008-05-19 16:55:13 -07002282
Thomas Grafe3703b32006-11-27 09:27:07 -08002283 if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0,
2284 expires, rt->u.dst.error) < 0)
2285 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002286
Thomas Graf2d7202b2006-08-22 00:01:27 -07002287 return nlmsg_end(skb, nlh);
2288
2289nla_put_failure:
Patrick McHardy26932562007-01-31 23:16:40 -08002290 nlmsg_cancel(skb, nlh);
2291 return -EMSGSIZE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002292}
2293
Patrick McHardy1b43af52006-08-10 23:11:17 -07002294int rt6_dump_route(struct rt6_info *rt, void *p_arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002295{
2296 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2297 int prefix;
2298
Thomas Graf2d7202b2006-08-22 00:01:27 -07002299 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2300 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002301 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2302 } else
2303 prefix = 0;
2304
Brian Haley191cd582008-08-14 15:33:21 -07002305 return rt6_fill_node(arg->net,
2306 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002307 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002308 prefix, 0, NLM_F_MULTI);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002309}
2310
Thomas Grafc127ea22007-03-22 11:58:32 -07002311static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002312{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09002313 struct net *net = sock_net(in_skb->sk);
Thomas Grafab364a62006-08-22 00:01:47 -07002314 struct nlattr *tb[RTA_MAX+1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07002315 struct rt6_info *rt;
Thomas Grafab364a62006-08-22 00:01:47 -07002316 struct sk_buff *skb;
2317 struct rtmsg *rtm;
2318 struct flowi fl;
2319 int err, iif = 0;
2320
2321 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2322 if (err < 0)
2323 goto errout;
2324
2325 err = -EINVAL;
2326 memset(&fl, 0, sizeof(fl));
2327
2328 if (tb[RTA_SRC]) {
2329 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2330 goto errout;
2331
2332 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2333 }
2334
2335 if (tb[RTA_DST]) {
2336 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2337 goto errout;
2338
2339 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2340 }
2341
2342 if (tb[RTA_IIF])
2343 iif = nla_get_u32(tb[RTA_IIF]);
2344
2345 if (tb[RTA_OIF])
2346 fl.oif = nla_get_u32(tb[RTA_OIF]);
2347
2348 if (iif) {
2349 struct net_device *dev;
Daniel Lezcano55786892008-03-04 13:47:47 -08002350 dev = __dev_get_by_index(net, iif);
Thomas Grafab364a62006-08-22 00:01:47 -07002351 if (!dev) {
2352 err = -ENODEV;
2353 goto errout;
2354 }
2355 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002356
2357 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
Thomas Grafab364a62006-08-22 00:01:47 -07002358 if (skb == NULL) {
2359 err = -ENOBUFS;
2360 goto errout;
2361 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002362
2363 /* Reserve room for dummy headers, this skb can pass
2364 through good chunk of routing engine.
2365 */
Arnaldo Carvalho de Melo459a98e2007-03-19 15:30:44 -07002366 skb_reset_mac_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002367 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2368
Daniel Lezcano8a3edd82008-03-07 11:14:16 -08002369 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl);
Eric Dumazetadf30902009-06-02 05:19:30 +00002370 skb_dst_set(skb, &rt->u.dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002371
Brian Haley191cd582008-08-14 15:33:21 -07002372 err = rt6_fill_node(net, skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002373 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002374 nlh->nlmsg_seq, 0, 0, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002375 if (err < 0) {
Thomas Grafab364a62006-08-22 00:01:47 -07002376 kfree_skb(skb);
2377 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002378 }
2379
Daniel Lezcano55786892008-03-04 13:47:47 -08002380 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
Thomas Grafab364a62006-08-22 00:01:47 -07002381errout:
Linus Torvalds1da177e2005-04-16 15:20:36 -07002382 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002383}
2384
Thomas Graf86872cb2006-08-22 00:01:08 -07002385void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002386{
2387 struct sk_buff *skb;
Daniel Lezcano55786892008-03-04 13:47:47 -08002388 struct net *net = info->nl_net;
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08002389 u32 seq;
2390 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002391
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08002392 err = -ENOBUFS;
2393 seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
Thomas Graf86872cb2006-08-22 00:01:08 -07002394
Thomas Graf339bf982006-11-10 14:10:15 -08002395 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
Thomas Graf21713eb2006-08-15 00:35:24 -07002396 if (skb == NULL)
2397 goto errout;
2398
Brian Haley191cd582008-08-14 15:33:21 -07002399 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002400 event, info->pid, seq, 0, 0, 0);
Patrick McHardy26932562007-01-31 23:16:40 -08002401 if (err < 0) {
2402 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2403 WARN_ON(err == -EMSGSIZE);
2404 kfree_skb(skb);
2405 goto errout;
2406 }
Pablo Neira Ayuso1ce85fe2009-02-24 23:18:28 -08002407 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2408 info->nlh, gfp_any());
2409 return;
Thomas Graf21713eb2006-08-15 00:35:24 -07002410errout:
2411 if (err < 0)
Daniel Lezcano55786892008-03-04 13:47:47 -08002412 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002413}
2414
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002415static int ip6_route_dev_notify(struct notifier_block *this,
2416 unsigned long event, void *data)
2417{
2418 struct net_device *dev = (struct net_device *)data;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09002419 struct net *net = dev_net(dev);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002420
2421 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2422 net->ipv6.ip6_null_entry->u.dst.dev = dev;
2423 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2424#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2425 net->ipv6.ip6_prohibit_entry->u.dst.dev = dev;
2426 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2427 net->ipv6.ip6_blk_hole_entry->u.dst.dev = dev;
2428 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2429#endif
2430 }
2431
2432 return NOTIFY_OK;
2433}
2434
Linus Torvalds1da177e2005-04-16 15:20:36 -07002435/*
2436 * /proc
2437 */
2438
2439#ifdef CONFIG_PROC_FS
2440
2441#define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2442
2443struct rt6_proc_arg
2444{
2445 char *buffer;
2446 int offset;
2447 int length;
2448 int skip;
2449 int len;
2450};
2451
2452static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2453{
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002454 struct seq_file *m = p_arg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002455
Harvey Harrison4b7a4272008-10-29 12:50:24 -07002456 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002457
2458#ifdef CONFIG_IPV6_SUBTREES
Harvey Harrison4b7a4272008-10-29 12:50:24 -07002459 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002460#else
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002461 seq_puts(m, "00000000000000000000000000000000 00 ");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002462#endif
2463
2464 if (rt->rt6i_nexthop) {
Harvey Harrison4b7a4272008-10-29 12:50:24 -07002465 seq_printf(m, "%pi6", rt->rt6i_nexthop->primary_key);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002466 } else {
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002467 seq_puts(m, "00000000000000000000000000000000");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002468 }
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002469 seq_printf(m, " %08x %08x %08x %08x %8s\n",
2470 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
2471 rt->u.dst.__use, rt->rt6i_flags,
2472 rt->rt6i_dev ? rt->rt6i_dev->name : "");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002473 return 0;
2474}
2475
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002476static int ipv6_route_show(struct seq_file *m, void *v)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002477{
Daniel Lezcanof3db4852008-03-03 23:27:06 -08002478 struct net *net = (struct net *)m->private;
2479 fib6_clean_all(net, rt6_info_route, 0, m);
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002480 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002481}
2482
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002483static int ipv6_route_open(struct inode *inode, struct file *file)
2484{
Pavel Emelyanovde05c552008-07-18 04:07:21 -07002485 return single_open_net(inode, file, ipv6_route_show);
Daniel Lezcanof3db4852008-03-03 23:27:06 -08002486}
2487
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002488static const struct file_operations ipv6_route_proc_fops = {
2489 .owner = THIS_MODULE,
2490 .open = ipv6_route_open,
2491 .read = seq_read,
2492 .llseek = seq_lseek,
Pavel Emelyanovb6fcbdb2008-07-18 04:07:44 -07002493 .release = single_release_net,
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002494};
2495
Linus Torvalds1da177e2005-04-16 15:20:36 -07002496static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2497{
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002498 struct net *net = (struct net *)seq->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002499 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002500 net->ipv6.rt6_stats->fib_nodes,
2501 net->ipv6.rt6_stats->fib_route_nodes,
2502 net->ipv6.rt6_stats->fib_rt_alloc,
2503 net->ipv6.rt6_stats->fib_rt_entries,
2504 net->ipv6.rt6_stats->fib_rt_cache,
Alexey Dobriyan86393e52009-08-29 01:34:49 +00002505 atomic_read(&net->ipv6.ip6_dst_ops.entries),
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002506 net->ipv6.rt6_stats->fib_discarded_routes);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002507
2508 return 0;
2509}
2510
2511static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2512{
Pavel Emelyanovde05c552008-07-18 04:07:21 -07002513 return single_open_net(inode, file, rt6_stats_seq_show);
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002514}
2515
Arjan van de Ven9a321442007-02-12 00:55:35 -08002516static const struct file_operations rt6_stats_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002517 .owner = THIS_MODULE,
2518 .open = rt6_stats_seq_open,
2519 .read = seq_read,
2520 .llseek = seq_lseek,
Pavel Emelyanovb6fcbdb2008-07-18 04:07:44 -07002521 .release = single_release_net,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002522};
2523#endif /* CONFIG_PROC_FS */
2524
2525#ifdef CONFIG_SYSCTL
2526
Linus Torvalds1da177e2005-04-16 15:20:36 -07002527static
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07002528int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002529 void __user *buffer, size_t *lenp, loff_t *ppos)
2530{
Daniel Lezcano5b7c9312008-03-03 23:28:58 -08002531 struct net *net = current->nsproxy->net_ns;
2532 int delay = net->ipv6.sysctl.flush_delay;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002533 if (write) {
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07002534 proc_dointvec(ctl, write, buffer, lenp, ppos);
Daniel Lezcano5b7c9312008-03-03 23:28:58 -08002535 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002536 return 0;
2537 } else
2538 return -EINVAL;
2539}
2540
Daniel Lezcano760f2d02008-01-10 02:53:43 -08002541ctl_table ipv6_route_table_template[] = {
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002542 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002543 .procname = "flush",
Daniel Lezcano49905092008-01-10 03:01:01 -08002544 .data = &init_net.ipv6.sysctl.flush_delay,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002545 .maxlen = sizeof(int),
Dave Jones89c8b3a12005-04-28 12:11:49 -07002546 .mode = 0200,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002547 .proc_handler = ipv6_sysctl_rtcache_flush
Linus Torvalds1da177e2005-04-16 15:20:36 -07002548 },
2549 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002550 .procname = "gc_thresh",
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -08002551 .data = &ip6_dst_ops_template.gc_thresh,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002552 .maxlen = sizeof(int),
2553 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002554 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002555 },
2556 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002557 .procname = "max_size",
Daniel Lezcano49905092008-01-10 03:01:01 -08002558 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002559 .maxlen = sizeof(int),
2560 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002561 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002562 },
2563 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002564 .procname = "gc_min_interval",
Daniel Lezcano49905092008-01-10 03:01:01 -08002565 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002566 .maxlen = sizeof(int),
2567 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002568 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002569 },
2570 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002571 .procname = "gc_timeout",
Daniel Lezcano49905092008-01-10 03:01:01 -08002572 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002573 .maxlen = sizeof(int),
2574 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002575 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002576 },
2577 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002578 .procname = "gc_interval",
Daniel Lezcano49905092008-01-10 03:01:01 -08002579 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002580 .maxlen = sizeof(int),
2581 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002582 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002583 },
2584 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002585 .procname = "gc_elasticity",
Daniel Lezcano49905092008-01-10 03:01:01 -08002586 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002587 .maxlen = sizeof(int),
2588 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002589 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002590 },
2591 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002592 .procname = "mtu_expires",
Daniel Lezcano49905092008-01-10 03:01:01 -08002593 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002594 .maxlen = sizeof(int),
2595 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002596 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002597 },
2598 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002599 .procname = "min_adv_mss",
Daniel Lezcano49905092008-01-10 03:01:01 -08002600 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002601 .maxlen = sizeof(int),
2602 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002603 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002604 },
2605 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002606 .procname = "gc_min_interval_ms",
Daniel Lezcano49905092008-01-10 03:01:01 -08002607 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002608 .maxlen = sizeof(int),
2609 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002610 .proc_handler = proc_dointvec_ms_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002611 },
Eric W. Biedermanf8572d82009-11-05 13:32:03 -08002612 { }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002613};
2614
Daniel Lezcano760f2d02008-01-10 02:53:43 -08002615struct ctl_table *ipv6_route_sysctl_init(struct net *net)
2616{
2617 struct ctl_table *table;
2618
2619 table = kmemdup(ipv6_route_table_template,
2620 sizeof(ipv6_route_table_template),
2621 GFP_KERNEL);
YOSHIFUJI Hideaki5ee09102008-02-28 00:24:28 +09002622
2623 if (table) {
2624 table[0].data = &net->ipv6.sysctl.flush_delay;
Alexey Dobriyan86393e52009-08-29 01:34:49 +00002625 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
YOSHIFUJI Hideaki5ee09102008-02-28 00:24:28 +09002626 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2627 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2628 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2629 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2630 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2631 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2632 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
Alexey Dobriyan9c69fab2009-12-18 20:11:03 -08002633 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
YOSHIFUJI Hideaki5ee09102008-02-28 00:24:28 +09002634 }
2635
Daniel Lezcano760f2d02008-01-10 02:53:43 -08002636 return table;
2637}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002638#endif
2639
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002640static int ip6_route_net_init(struct net *net)
2641{
Pavel Emelyanov633d424b2008-04-21 14:25:23 -07002642 int ret = -ENOMEM;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002643
Alexey Dobriyan86393e52009-08-29 01:34:49 +00002644 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2645 sizeof(net->ipv6.ip6_dst_ops));
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002646
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002647 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2648 sizeof(*net->ipv6.ip6_null_entry),
2649 GFP_KERNEL);
2650 if (!net->ipv6.ip6_null_entry)
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002651 goto out_ip6_dst_ops;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002652 net->ipv6.ip6_null_entry->u.dst.path =
2653 (struct dst_entry *)net->ipv6.ip6_null_entry;
Alexey Dobriyan86393e52009-08-29 01:34:49 +00002654 net->ipv6.ip6_null_entry->u.dst.ops = &net->ipv6.ip6_dst_ops;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002655
2656#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2657 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2658 sizeof(*net->ipv6.ip6_prohibit_entry),
2659 GFP_KERNEL);
Peter Zijlstra68fffc62008-10-07 14:12:10 -07002660 if (!net->ipv6.ip6_prohibit_entry)
2661 goto out_ip6_null_entry;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002662 net->ipv6.ip6_prohibit_entry->u.dst.path =
2663 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
Alexey Dobriyan86393e52009-08-29 01:34:49 +00002664 net->ipv6.ip6_prohibit_entry->u.dst.ops = &net->ipv6.ip6_dst_ops;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002665
2666 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2667 sizeof(*net->ipv6.ip6_blk_hole_entry),
2668 GFP_KERNEL);
Peter Zijlstra68fffc62008-10-07 14:12:10 -07002669 if (!net->ipv6.ip6_blk_hole_entry)
2670 goto out_ip6_prohibit_entry;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002671 net->ipv6.ip6_blk_hole_entry->u.dst.path =
2672 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
Alexey Dobriyan86393e52009-08-29 01:34:49 +00002673 net->ipv6.ip6_blk_hole_entry->u.dst.ops = &net->ipv6.ip6_dst_ops;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002674#endif
2675
Peter Zijlstrab339a47c2008-10-07 14:15:00 -07002676 net->ipv6.sysctl.flush_delay = 0;
2677 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2678 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2679 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2680 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2681 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2682 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2683 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2684
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002685#ifdef CONFIG_PROC_FS
2686 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2687 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2688#endif
Benjamin Thery6891a342008-03-04 13:49:47 -08002689 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2690
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002691 ret = 0;
2692out:
2693 return ret;
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002694
Peter Zijlstra68fffc62008-10-07 14:12:10 -07002695#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2696out_ip6_prohibit_entry:
2697 kfree(net->ipv6.ip6_prohibit_entry);
2698out_ip6_null_entry:
2699 kfree(net->ipv6.ip6_null_entry);
2700#endif
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002701out_ip6_dst_ops:
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002702 goto out;
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002703}
2704
2705static void ip6_route_net_exit(struct net *net)
2706{
2707#ifdef CONFIG_PROC_FS
2708 proc_net_remove(net, "ipv6_route");
2709 proc_net_remove(net, "rt6_stats");
2710#endif
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002711 kfree(net->ipv6.ip6_null_entry);
2712#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2713 kfree(net->ipv6.ip6_prohibit_entry);
2714 kfree(net->ipv6.ip6_blk_hole_entry);
2715#endif
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002716}
2717
2718static struct pernet_operations ip6_route_net_ops = {
2719 .init = ip6_route_net_init,
2720 .exit = ip6_route_net_exit,
2721};
2722
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002723static struct notifier_block ip6_route_dev_notifier = {
2724 .notifier_call = ip6_route_dev_notify,
2725 .priority = 0,
2726};
2727
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002728int __init ip6_route_init(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002729{
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002730 int ret;
2731
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -08002732 ret = -ENOMEM;
2733 ip6_dst_ops_template.kmem_cachep =
2734 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2735 SLAB_HWCACHE_ALIGN, NULL);
2736 if (!ip6_dst_ops_template.kmem_cachep)
Fernando Carrijoc19a28e2009-01-07 18:09:08 -08002737 goto out;
David S. Miller14e50e52007-05-24 18:17:54 -07002738
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002739 ret = register_pernet_subsys(&ip6_route_net_ops);
2740 if (ret)
Daniel Lezcanobdb32892008-03-04 13:48:10 -08002741 goto out_kmem_cache;
Daniel Lezcanobdb32892008-03-04 13:48:10 -08002742
Arnaud Ebalard5dc121e2008-10-01 02:37:56 -07002743 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2744
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002745 /* Registering of the loopback is done before this portion of code,
2746 * the loopback reference in rt6_info will not be taken, do it
2747 * manually for init_net */
2748 init_net.ipv6.ip6_null_entry->u.dst.dev = init_net.loopback_dev;
2749 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2750 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2751 init_net.ipv6.ip6_prohibit_entry->u.dst.dev = init_net.loopback_dev;
2752 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2753 init_net.ipv6.ip6_blk_hole_entry->u.dst.dev = init_net.loopback_dev;
2754 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2755 #endif
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002756 ret = fib6_init();
2757 if (ret)
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002758 goto out_register_subsys;
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002759
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002760 ret = xfrm6_init();
2761 if (ret)
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002762 goto out_fib6_init;
Daniel Lezcanoc35b7e72007-12-08 00:14:11 -08002763
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002764 ret = fib6_rules_init();
2765 if (ret)
2766 goto xfrm6_init;
Daniel Lezcano7e5449c2007-12-08 00:14:54 -08002767
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002768 ret = -ENOBUFS;
2769 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) ||
2770 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) ||
2771 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL))
2772 goto fib6_rules_init;
2773
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002774 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002775 if (ret)
2776 goto fib6_rules_init;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002777
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002778out:
2779 return ret;
2780
2781fib6_rules_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002782 fib6_rules_cleanup();
2783xfrm6_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002784 xfrm6_fini();
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002785out_fib6_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002786 fib6_gc_cleanup();
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002787out_register_subsys:
2788 unregister_pernet_subsys(&ip6_route_net_ops);
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002789out_kmem_cache:
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002790 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002791 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002792}
2793
2794void ip6_route_cleanup(void)
2795{
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002796 unregister_netdevice_notifier(&ip6_route_dev_notifier);
Thomas Graf101367c2006-08-04 03:39:02 -07002797 fib6_rules_cleanup();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002798 xfrm6_fini();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002799 fib6_gc_cleanup();
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002800 unregister_pernet_subsys(&ip6_route_net_ops);
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002801 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002802}