blob: 658293ea05ba736d143947ceca915cacdf4e5f89 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09006 * Pedro Roque <roque@di.fc.ul.pt>
Linus Torvalds1da177e2005-04-16 15:20:36 -07007 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07008 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
YOSHIFUJI Hideakic0bece92006-08-23 17:23:25 -070023 * Ville Nuorvala
24 * Fixed routing subtrees.
Linus Torvalds1da177e2005-04-16 15:20:36 -070025 */
26
Randy Dunlap4fc268d2006-01-11 12:17:47 -080027#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070028#include <linux/errno.h>
29#include <linux/types.h>
30#include <linux/times.h>
31#include <linux/socket.h>
32#include <linux/sockios.h>
33#include <linux/net.h>
34#include <linux/route.h>
35#include <linux/netdevice.h>
36#include <linux/in6.h>
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +090037#include <linux/mroute6.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070038#include <linux/init.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070039#include <linux/if_arp.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070040#include <linux/proc_fs.h>
41#include <linux/seq_file.h>
Daniel Lezcano5b7c9312008-03-03 23:28:58 -080042#include <linux/nsproxy.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020043#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070044#include <net/snmp.h>
45#include <net/ipv6.h>
46#include <net/ip6_fib.h>
47#include <net/ip6_route.h>
48#include <net/ndisc.h>
49#include <net/addrconf.h>
50#include <net/tcp.h>
51#include <linux/rtnetlink.h>
52#include <net/dst.h>
53#include <net/xfrm.h>
Tom Tucker8d717402006-07-30 20:43:36 -070054#include <net/netevent.h>
Thomas Graf21713eb2006-08-15 00:35:24 -070055#include <net/netlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070056
57#include <asm/uaccess.h>
58
59#ifdef CONFIG_SYSCTL
60#include <linux/sysctl.h>
61#endif
62
63/* Set to 3 to get tracing. */
64#define RT6_DEBUG 2
65
66#if RT6_DEBUG >= 3
67#define RDBG(x) printk x
68#define RT6_TRACE(x...) printk(KERN_DEBUG x)
69#else
70#define RDBG(x)
71#define RT6_TRACE(x...) do { ; } while (0)
72#endif
73
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -080074#define CLONE_OFFLINK_ROUTE 0
Linus Torvalds1da177e2005-04-16 15:20:36 -070075
Linus Torvalds1da177e2005-04-16 15:20:36 -070076static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
77static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
78static struct dst_entry *ip6_negative_advice(struct dst_entry *);
79static void ip6_dst_destroy(struct dst_entry *);
80static void ip6_dst_ifdown(struct dst_entry *,
81 struct net_device *dev, int how);
Daniel Lezcano569d3642008-01-18 03:56:57 -080082static int ip6_dst_gc(struct dst_ops *ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -070083
84static int ip6_pkt_discard(struct sk_buff *skb);
85static int ip6_pkt_discard_out(struct sk_buff *skb);
86static void ip6_link_failure(struct sk_buff *skb);
87static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
88
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -080089#ifdef CONFIG_IPV6_ROUTE_INFO
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -080090static struct rt6_info *rt6_add_route_info(struct net *net,
91 struct in6_addr *prefix, int prefixlen,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -080092 struct in6_addr *gwaddr, int ifindex,
93 unsigned pref);
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -080094static struct rt6_info *rt6_get_route_info(struct net *net,
95 struct in6_addr *prefix, int prefixlen,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -080096 struct in6_addr *gwaddr, int ifindex);
97#endif
98
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -080099static struct dst_ops ip6_dst_ops_template = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700100 .family = AF_INET6,
Harvey Harrison09640e62009-02-01 00:45:17 -0800101 .protocol = cpu_to_be16(ETH_P_IPV6),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700102 .gc = ip6_dst_gc,
103 .gc_thresh = 1024,
104 .check = ip6_dst_check,
105 .destroy = ip6_dst_destroy,
106 .ifdown = ip6_dst_ifdown,
107 .negative_advice = ip6_negative_advice,
108 .link_failure = ip6_link_failure,
109 .update_pmtu = ip6_rt_update_pmtu,
Herbert Xu1ac06e02008-05-20 14:32:14 -0700110 .local_out = __ip6_local_out,
Eric Dumazete2422972008-01-30 20:07:45 -0800111 .entries = ATOMIC_INIT(0),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700112};
113
David S. Miller14e50e52007-05-24 18:17:54 -0700114static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
115{
116}
117
118static struct dst_ops ip6_dst_blackhole_ops = {
119 .family = AF_INET6,
Harvey Harrison09640e62009-02-01 00:45:17 -0800120 .protocol = cpu_to_be16(ETH_P_IPV6),
David S. Miller14e50e52007-05-24 18:17:54 -0700121 .destroy = ip6_dst_destroy,
122 .check = ip6_dst_check,
123 .update_pmtu = ip6_rt_blackhole_update_pmtu,
Eric Dumazete2422972008-01-30 20:07:45 -0800124 .entries = ATOMIC_INIT(0),
David S. Miller14e50e52007-05-24 18:17:54 -0700125};
126
Daniel Lezcanobdb32892008-03-04 13:48:10 -0800127static struct rt6_info ip6_null_entry_template = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700128 .u = {
129 .dst = {
130 .__refcnt = ATOMIC_INIT(1),
131 .__use = 1,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700132 .obsolete = -1,
133 .error = -ENETUNREACH,
134 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
135 .input = ip6_pkt_discard,
136 .output = ip6_pkt_discard_out,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700137 }
138 },
139 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
Jean-Mickael Guerin4f724272009-05-20 17:38:59 -0700140 .rt6i_protocol = RTPROT_KERNEL,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700141 .rt6i_metric = ~(u32) 0,
142 .rt6i_ref = ATOMIC_INIT(1),
143};
144
Thomas Graf101367c2006-08-04 03:39:02 -0700145#ifdef CONFIG_IPV6_MULTIPLE_TABLES
146
David S. Miller6723ab52006-10-18 21:20:57 -0700147static int ip6_pkt_prohibit(struct sk_buff *skb);
148static int ip6_pkt_prohibit_out(struct sk_buff *skb);
David S. Miller6723ab52006-10-18 21:20:57 -0700149
Adrian Bunk280a34c2008-04-21 02:29:32 -0700150static struct rt6_info ip6_prohibit_entry_template = {
Thomas Graf101367c2006-08-04 03:39:02 -0700151 .u = {
152 .dst = {
153 .__refcnt = ATOMIC_INIT(1),
154 .__use = 1,
Thomas Graf101367c2006-08-04 03:39:02 -0700155 .obsolete = -1,
156 .error = -EACCES,
157 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
Thomas Graf9ce8ade2006-10-18 20:46:54 -0700158 .input = ip6_pkt_prohibit,
159 .output = ip6_pkt_prohibit_out,
Thomas Graf101367c2006-08-04 03:39:02 -0700160 }
161 },
162 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
Jean-Mickael Guerin4f724272009-05-20 17:38:59 -0700163 .rt6i_protocol = RTPROT_KERNEL,
Thomas Graf101367c2006-08-04 03:39:02 -0700164 .rt6i_metric = ~(u32) 0,
165 .rt6i_ref = ATOMIC_INIT(1),
166};
167
Daniel Lezcanobdb32892008-03-04 13:48:10 -0800168static struct rt6_info ip6_blk_hole_entry_template = {
Thomas Graf101367c2006-08-04 03:39:02 -0700169 .u = {
170 .dst = {
171 .__refcnt = ATOMIC_INIT(1),
172 .__use = 1,
Thomas Graf101367c2006-08-04 03:39:02 -0700173 .obsolete = -1,
174 .error = -EINVAL,
175 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
Herbert Xu352e5122007-11-13 21:34:06 -0800176 .input = dst_discard,
177 .output = dst_discard,
Thomas Graf101367c2006-08-04 03:39:02 -0700178 }
179 },
180 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
Jean-Mickael Guerin4f724272009-05-20 17:38:59 -0700181 .rt6i_protocol = RTPROT_KERNEL,
Thomas Graf101367c2006-08-04 03:39:02 -0700182 .rt6i_metric = ~(u32) 0,
183 .rt6i_ref = ATOMIC_INIT(1),
184};
185
186#endif
187
Linus Torvalds1da177e2005-04-16 15:20:36 -0700188/* allocate dst with ip6_dst_ops */
Benjamin Theryf2fc6a52008-03-04 13:49:23 -0800189static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700190{
Benjamin Theryf2fc6a52008-03-04 13:49:23 -0800191 return (struct rt6_info *)dst_alloc(ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700192}
193
194static void ip6_dst_destroy(struct dst_entry *dst)
195{
196 struct rt6_info *rt = (struct rt6_info *)dst;
197 struct inet6_dev *idev = rt->rt6i_idev;
198
199 if (idev != NULL) {
200 rt->rt6i_idev = NULL;
201 in6_dev_put(idev);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900202 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700203}
204
205static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
206 int how)
207{
208 struct rt6_info *rt = (struct rt6_info *)dst;
209 struct inet6_dev *idev = rt->rt6i_idev;
Denis V. Lunev5a3e55d2007-12-07 00:38:10 -0800210 struct net_device *loopback_dev =
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900211 dev_net(dev)->loopback_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700212
Denis V. Lunev5a3e55d2007-12-07 00:38:10 -0800213 if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
214 struct inet6_dev *loopback_idev =
215 in6_dev_get(loopback_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700216 if (loopback_idev != NULL) {
217 rt->rt6i_idev = loopback_idev;
218 in6_dev_put(idev);
219 }
220 }
221}
222
223static __inline__ int rt6_check_expired(const struct rt6_info *rt)
224{
225 return (rt->rt6i_flags & RTF_EXPIRES &&
226 time_after(jiffies, rt->rt6i_expires));
227}
228
Thomas Grafc71099a2006-08-04 23:20:06 -0700229static inline int rt6_need_strict(struct in6_addr *daddr)
230{
231 return (ipv6_addr_type(daddr) &
YOSHIFUJI Hideaki5ce83af2008-06-25 16:58:17 +0900232 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK));
Thomas Grafc71099a2006-08-04 23:20:06 -0700233}
234
Linus Torvalds1da177e2005-04-16 15:20:36 -0700235/*
Thomas Grafc71099a2006-08-04 23:20:06 -0700236 * Route lookup. Any table->tb6_lock is implied.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700237 */
238
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800239static inline struct rt6_info *rt6_device_match(struct net *net,
240 struct rt6_info *rt,
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900241 struct in6_addr *saddr,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700242 int oif,
YOSHIFUJI Hideakid4208952008-06-27 20:14:54 -0700243 int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700244{
245 struct rt6_info *local = NULL;
246 struct rt6_info *sprt;
247
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900248 if (!oif && ipv6_addr_any(saddr))
249 goto out;
250
251 for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) {
252 struct net_device *dev = sprt->rt6i_dev;
253
254 if (oif) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700255 if (dev->ifindex == oif)
256 return sprt;
257 if (dev->flags & IFF_LOOPBACK) {
258 if (sprt->rt6i_idev == NULL ||
259 sprt->rt6i_idev->dev->ifindex != oif) {
YOSHIFUJI Hideakid4208952008-06-27 20:14:54 -0700260 if (flags & RT6_LOOKUP_F_IFACE && oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700261 continue;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900262 if (local && (!oif ||
Linus Torvalds1da177e2005-04-16 15:20:36 -0700263 local->rt6i_idev->dev->ifindex == oif))
264 continue;
265 }
266 local = sprt;
267 }
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900268 } else {
269 if (ipv6_chk_addr(net, saddr, dev,
270 flags & RT6_LOOKUP_F_IFACE))
271 return sprt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700272 }
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900273 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700274
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900275 if (oif) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700276 if (local)
277 return local;
278
YOSHIFUJI Hideakid4208952008-06-27 20:14:54 -0700279 if (flags & RT6_LOOKUP_F_IFACE)
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800280 return net->ipv6.ip6_null_entry;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700281 }
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900282out:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700283 return rt;
284}
285
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800286#ifdef CONFIG_IPV6_ROUTER_PREF
287static void rt6_probe(struct rt6_info *rt)
288{
289 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
290 /*
291 * Okay, this does not seem to be appropriate
292 * for now, however, we need to check if it
293 * is really so; aka Router Reachability Probing.
294 *
295 * Router Reachability Probe MUST be rate-limited
296 * to no more than one per minute.
297 */
298 if (!neigh || (neigh->nud_state & NUD_VALID))
299 return;
300 read_lock_bh(&neigh->lock);
301 if (!(neigh->nud_state & NUD_VALID) &&
YOSHIFUJI Hideaki52e16352006-03-20 17:05:47 -0800302 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800303 struct in6_addr mcaddr;
304 struct in6_addr *target;
305
306 neigh->updated = jiffies;
307 read_unlock_bh(&neigh->lock);
308
309 target = (struct in6_addr *)&neigh->primary_key;
310 addrconf_addr_solict_mult(target, &mcaddr);
311 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
312 } else
313 read_unlock_bh(&neigh->lock);
314}
315#else
316static inline void rt6_probe(struct rt6_info *rt)
317{
318 return;
319}
320#endif
321
Linus Torvalds1da177e2005-04-16 15:20:36 -0700322/*
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800323 * Default Router Selection (RFC 2461 6.3.6)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700324 */
Dave Jonesb6f99a22007-03-22 12:27:49 -0700325static inline int rt6_check_dev(struct rt6_info *rt, int oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700326{
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800327 struct net_device *dev = rt->rt6i_dev;
David S. Miller161980f2007-04-06 11:42:27 -0700328 if (!oif || dev->ifindex == oif)
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800329 return 2;
David S. Miller161980f2007-04-06 11:42:27 -0700330 if ((dev->flags & IFF_LOOPBACK) &&
331 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
332 return 1;
333 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700334}
335
Dave Jonesb6f99a22007-03-22 12:27:49 -0700336static inline int rt6_check_neigh(struct rt6_info *rt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700337{
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800338 struct neighbour *neigh = rt->rt6i_nexthop;
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800339 int m;
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700340 if (rt->rt6i_flags & RTF_NONEXTHOP ||
341 !(rt->rt6i_flags & RTF_GATEWAY))
342 m = 1;
343 else if (neigh) {
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800344 read_lock_bh(&neigh->lock);
345 if (neigh->nud_state & NUD_VALID)
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700346 m = 2;
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800347#ifdef CONFIG_IPV6_ROUTER_PREF
348 else if (neigh->nud_state & NUD_FAILED)
349 m = 0;
350#endif
351 else
YOSHIFUJI Hideakiea73ee22006-11-06 09:45:44 -0800352 m = 1;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800353 read_unlock_bh(&neigh->lock);
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800354 } else
355 m = 0;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800356 return m;
357}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700358
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800359static int rt6_score_route(struct rt6_info *rt, int oif,
360 int strict)
361{
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700362 int m, n;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900363
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700364 m = rt6_check_dev(rt, oif);
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700365 if (!m && (strict & RT6_LOOKUP_F_IFACE))
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800366 return -1;
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -0800367#ifdef CONFIG_IPV6_ROUTER_PREF
368 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
369#endif
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700370 n = rt6_check_neigh(rt);
YOSHIFUJI Hideaki557e92e2006-11-06 09:45:45 -0800371 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800372 return -1;
373 return m;
374}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700375
David S. Millerf11e6652007-03-24 20:36:25 -0700376static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
377 int *mpri, struct rt6_info *match)
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800378{
David S. Millerf11e6652007-03-24 20:36:25 -0700379 int m;
380
381 if (rt6_check_expired(rt))
382 goto out;
383
384 m = rt6_score_route(rt, oif, strict);
385 if (m < 0)
386 goto out;
387
388 if (m > *mpri) {
389 if (strict & RT6_LOOKUP_F_REACHABLE)
390 rt6_probe(match);
391 *mpri = m;
392 match = rt;
393 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
394 rt6_probe(rt);
395 }
396
397out:
398 return match;
399}
400
401static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
402 struct rt6_info *rr_head,
403 u32 metric, int oif, int strict)
404{
405 struct rt6_info *rt, *match;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800406 int mpri = -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700407
David S. Millerf11e6652007-03-24 20:36:25 -0700408 match = NULL;
409 for (rt = rr_head; rt && rt->rt6i_metric == metric;
410 rt = rt->u.dst.rt6_next)
411 match = find_match(rt, oif, strict, &mpri, match);
412 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
413 rt = rt->u.dst.rt6_next)
414 match = find_match(rt, oif, strict, &mpri, match);
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800415
David S. Millerf11e6652007-03-24 20:36:25 -0700416 return match;
417}
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800418
David S. Millerf11e6652007-03-24 20:36:25 -0700419static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
420{
421 struct rt6_info *match, *rt0;
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800422 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700423
David S. Millerf11e6652007-03-24 20:36:25 -0700424 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
Harvey Harrison0dc47872008-03-05 20:47:47 -0800425 __func__, fn->leaf, oif);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700426
David S. Millerf11e6652007-03-24 20:36:25 -0700427 rt0 = fn->rr_ptr;
428 if (!rt0)
429 fn->rr_ptr = rt0 = fn->leaf;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700430
David S. Millerf11e6652007-03-24 20:36:25 -0700431 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700432
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800433 if (!match &&
David S. Millerf11e6652007-03-24 20:36:25 -0700434 (strict & RT6_LOOKUP_F_REACHABLE)) {
435 struct rt6_info *next = rt0->u.dst.rt6_next;
436
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800437 /* no entries matched; do round-robin */
David S. Millerf11e6652007-03-24 20:36:25 -0700438 if (!next || next->rt6i_metric != rt0->rt6i_metric)
439 next = fn->leaf;
440
441 if (next != rt0)
442 fn->rr_ptr = next;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700443 }
444
David S. Millerf11e6652007-03-24 20:36:25 -0700445 RT6_TRACE("%s() => %p\n",
Harvey Harrison0dc47872008-03-05 20:47:47 -0800446 __func__, match);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700447
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900448 net = dev_net(rt0->rt6i_dev);
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800449 return (match ? match : net->ipv6.ip6_null_entry);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700450}
451
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800452#ifdef CONFIG_IPV6_ROUTE_INFO
453int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
454 struct in6_addr *gwaddr)
455{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900456 struct net *net = dev_net(dev);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800457 struct route_info *rinfo = (struct route_info *) opt;
458 struct in6_addr prefix_buf, *prefix;
459 unsigned int pref;
YOSHIFUJI Hideaki4bed72e2008-05-27 17:37:49 +0900460 unsigned long lifetime;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800461 struct rt6_info *rt;
462
463 if (len < sizeof(struct route_info)) {
464 return -EINVAL;
465 }
466
467 /* Sanity check for prefix_len and length */
468 if (rinfo->length > 3) {
469 return -EINVAL;
470 } else if (rinfo->prefix_len > 128) {
471 return -EINVAL;
472 } else if (rinfo->prefix_len > 64) {
473 if (rinfo->length < 2) {
474 return -EINVAL;
475 }
476 } else if (rinfo->prefix_len > 0) {
477 if (rinfo->length < 1) {
478 return -EINVAL;
479 }
480 }
481
482 pref = rinfo->route_pref;
483 if (pref == ICMPV6_ROUTER_PREF_INVALID)
484 pref = ICMPV6_ROUTER_PREF_MEDIUM;
485
YOSHIFUJI Hideaki4bed72e2008-05-27 17:37:49 +0900486 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800487
488 if (rinfo->length == 3)
489 prefix = (struct in6_addr *)rinfo->prefix;
490 else {
491 /* this function is safe */
492 ipv6_addr_prefix(&prefix_buf,
493 (struct in6_addr *)rinfo->prefix,
494 rinfo->prefix_len);
495 prefix = &prefix_buf;
496 }
497
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -0800498 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
499 dev->ifindex);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800500
501 if (rt && !lifetime) {
Thomas Grafe0a1ad732006-08-22 00:00:21 -0700502 ip6_del_rt(rt);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800503 rt = NULL;
504 }
505
506 if (!rt && lifetime)
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -0800507 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800508 pref);
509 else if (rt)
510 rt->rt6i_flags = RTF_ROUTEINFO |
511 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
512
513 if (rt) {
YOSHIFUJI Hideaki4bed72e2008-05-27 17:37:49 +0900514 if (!addrconf_finite_timeout(lifetime)) {
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800515 rt->rt6i_flags &= ~RTF_EXPIRES;
516 } else {
517 rt->rt6i_expires = jiffies + HZ * lifetime;
518 rt->rt6i_flags |= RTF_EXPIRES;
519 }
520 dst_release(&rt->u.dst);
521 }
522 return 0;
523}
524#endif
525
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800526#define BACKTRACK(__net, saddr) \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700527do { \
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800528 if (rt == __net->ipv6.ip6_null_entry) { \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700529 struct fib6_node *pn; \
Ville Nuorvalae0eda7b2006-10-16 22:11:11 -0700530 while (1) { \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700531 if (fn->fn_flags & RTN_TL_ROOT) \
532 goto out; \
533 pn = fn->parent; \
534 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
Kim Nordlund8bce65b2006-12-13 16:38:29 -0800535 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700536 else \
537 fn = pn; \
538 if (fn->fn_flags & RTN_RTINFO) \
539 goto restart; \
Thomas Grafc71099a2006-08-04 23:20:06 -0700540 } \
Thomas Grafc71099a2006-08-04 23:20:06 -0700541 } \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700542} while(0)
Thomas Grafc71099a2006-08-04 23:20:06 -0700543
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800544static struct rt6_info *ip6_pol_route_lookup(struct net *net,
545 struct fib6_table *table,
Thomas Grafc71099a2006-08-04 23:20:06 -0700546 struct flowi *fl, int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700547{
548 struct fib6_node *fn;
549 struct rt6_info *rt;
550
Thomas Grafc71099a2006-08-04 23:20:06 -0700551 read_lock_bh(&table->tb6_lock);
552 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
553restart:
554 rt = fn->leaf;
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900555 rt = rt6_device_match(net, rt, &fl->fl6_src, fl->oif, flags);
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800556 BACKTRACK(net, &fl->fl6_src);
Thomas Grafc71099a2006-08-04 23:20:06 -0700557out:
Pavel Emelyanov03f49f32007-11-10 21:28:34 -0800558 dst_use(&rt->u.dst, jiffies);
Thomas Grafc71099a2006-08-04 23:20:06 -0700559 read_unlock_bh(&table->tb6_lock);
Thomas Grafc71099a2006-08-04 23:20:06 -0700560 return rt;
561
562}
563
YOSHIFUJI Hideaki9acd9f32008-04-10 15:42:10 +0900564struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
565 const struct in6_addr *saddr, int oif, int strict)
Thomas Grafc71099a2006-08-04 23:20:06 -0700566{
567 struct flowi fl = {
568 .oif = oif,
569 .nl_u = {
570 .ip6_u = {
571 .daddr = *daddr,
Thomas Grafc71099a2006-08-04 23:20:06 -0700572 },
573 },
574 };
575 struct dst_entry *dst;
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700576 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
Thomas Grafc71099a2006-08-04 23:20:06 -0700577
Thomas Grafadaa70b2006-10-13 15:01:03 -0700578 if (saddr) {
579 memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
580 flags |= RT6_LOOKUP_F_HAS_SADDR;
581 }
582
Daniel Lezcano606a2b42008-03-04 13:45:59 -0800583 dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_lookup);
Thomas Grafc71099a2006-08-04 23:20:06 -0700584 if (dst->error == 0)
585 return (struct rt6_info *) dst;
586
587 dst_release(dst);
588
Linus Torvalds1da177e2005-04-16 15:20:36 -0700589 return NULL;
590}
591
YOSHIFUJI Hideaki71590392007-02-22 22:05:40 +0900592EXPORT_SYMBOL(rt6_lookup);
593
Thomas Grafc71099a2006-08-04 23:20:06 -0700594/* ip6_ins_rt is called with FREE table->tb6_lock.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700595 It takes new route entry, the addition fails by any reason the
596 route is freed. In any case, if caller does not hold it, it may
597 be destroyed.
598 */
599
Thomas Graf86872cb2006-08-22 00:01:08 -0700600static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700601{
602 int err;
Thomas Grafc71099a2006-08-04 23:20:06 -0700603 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700604
Thomas Grafc71099a2006-08-04 23:20:06 -0700605 table = rt->rt6i_table;
606 write_lock_bh(&table->tb6_lock);
Thomas Graf86872cb2006-08-22 00:01:08 -0700607 err = fib6_add(&table->tb6_root, rt, info);
Thomas Grafc71099a2006-08-04 23:20:06 -0700608 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700609
610 return err;
611}
612
Thomas Graf40e22e82006-08-22 00:00:45 -0700613int ip6_ins_rt(struct rt6_info *rt)
614{
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800615 struct nl_info info = {
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900616 .nl_net = dev_net(rt->rt6i_dev),
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800617 };
Denis V. Lunev528c4ce2007-12-13 09:45:12 -0800618 return __ip6_ins_rt(rt, &info);
Thomas Graf40e22e82006-08-22 00:00:45 -0700619}
620
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800621static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
622 struct in6_addr *saddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700623{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700624 struct rt6_info *rt;
625
626 /*
627 * Clone the route.
628 */
629
630 rt = ip6_rt_copy(ort);
631
632 if (rt) {
David S. Miller14deae42009-01-04 16:04:39 -0800633 struct neighbour *neigh;
634 int attempts = !in_softirq();
635
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900636 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
637 if (rt->rt6i_dst.plen != 128 &&
638 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
639 rt->rt6i_flags |= RTF_ANYCAST;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700640 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900641 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700642
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900643 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700644 rt->rt6i_dst.plen = 128;
645 rt->rt6i_flags |= RTF_CACHE;
646 rt->u.dst.flags |= DST_HOST;
647
648#ifdef CONFIG_IPV6_SUBTREES
649 if (rt->rt6i_src.plen && saddr) {
650 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
651 rt->rt6i_src.plen = 128;
652 }
653#endif
654
David S. Miller14deae42009-01-04 16:04:39 -0800655 retry:
656 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
657 if (IS_ERR(neigh)) {
658 struct net *net = dev_net(rt->rt6i_dev);
659 int saved_rt_min_interval =
660 net->ipv6.sysctl.ip6_rt_gc_min_interval;
661 int saved_rt_elasticity =
662 net->ipv6.sysctl.ip6_rt_gc_elasticity;
663
664 if (attempts-- > 0) {
665 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
666 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
667
668 ip6_dst_gc(net->ipv6.ip6_dst_ops);
669
670 net->ipv6.sysctl.ip6_rt_gc_elasticity =
671 saved_rt_elasticity;
672 net->ipv6.sysctl.ip6_rt_gc_min_interval =
673 saved_rt_min_interval;
674 goto retry;
675 }
676
677 if (net_ratelimit())
678 printk(KERN_WARNING
679 "Neighbour table overflow.\n");
680 dst_free(&rt->u.dst);
681 return NULL;
682 }
683 rt->rt6i_nexthop = neigh;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700684
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800685 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700686
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800687 return rt;
688}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700689
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800690static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
691{
692 struct rt6_info *rt = ip6_rt_copy(ort);
693 if (rt) {
694 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
695 rt->rt6i_dst.plen = 128;
696 rt->rt6i_flags |= RTF_CACHE;
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800697 rt->u.dst.flags |= DST_HOST;
698 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
699 }
700 return rt;
701}
702
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800703static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
704 struct flowi *fl, int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700705{
706 struct fib6_node *fn;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800707 struct rt6_info *rt, *nrt;
Thomas Grafc71099a2006-08-04 23:20:06 -0700708 int strict = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700709 int attempts = 3;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800710 int err;
YOSHIFUJI Hideaki53b79972008-07-19 22:35:03 -0700711 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700712
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700713 strict |= flags & RT6_LOOKUP_F_IFACE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700714
715relookup:
Thomas Grafc71099a2006-08-04 23:20:06 -0700716 read_lock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700717
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800718restart_2:
Thomas Grafc71099a2006-08-04 23:20:06 -0700719 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700720
721restart:
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700722 rt = rt6_select(fn, oif, strict | reachable);
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800723
724 BACKTRACK(net, &fl->fl6_src);
725 if (rt == net->ipv6.ip6_null_entry ||
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800726 rt->rt6i_flags & RTF_CACHE)
YOSHIFUJI Hideaki1ddef0442006-03-20 17:01:24 -0800727 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700728
YOSHIFUJI Hideakifb9de912006-03-20 16:59:08 -0800729 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700730 read_unlock_bh(&table->tb6_lock);
YOSHIFUJI Hideakifb9de912006-03-20 16:59:08 -0800731
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800732 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
YOSHIFUJI Hideakie40cf352006-03-20 16:59:27 -0800733 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800734 else {
735#if CLONE_OFFLINK_ROUTE
736 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
737#else
738 goto out2;
739#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700740 }
YOSHIFUJI Hideakie40cf352006-03-20 16:59:27 -0800741
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800742 dst_release(&rt->u.dst);
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800743 rt = nrt ? : net->ipv6.ip6_null_entry;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800744
745 dst_hold(&rt->u.dst);
746 if (nrt) {
Thomas Graf40e22e82006-08-22 00:00:45 -0700747 err = ip6_ins_rt(nrt);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800748 if (!err)
749 goto out2;
750 }
751
752 if (--attempts <= 0)
753 goto out2;
754
755 /*
Thomas Grafc71099a2006-08-04 23:20:06 -0700756 * Race condition! In the gap, when table->tb6_lock was
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800757 * released someone could insert this route. Relookup.
758 */
759 dst_release(&rt->u.dst);
760 goto relookup;
761
762out:
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800763 if (reachable) {
764 reachable = 0;
765 goto restart_2;
766 }
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800767 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700768 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700769out2:
770 rt->u.dst.lastuse = jiffies;
771 rt->u.dst.__use++;
Thomas Grafc71099a2006-08-04 23:20:06 -0700772
773 return rt;
774}
775
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800776static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700777 struct flowi *fl, int flags)
778{
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800779 return ip6_pol_route(net, table, fl->iif, fl, flags);
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700780}
781
Thomas Grafc71099a2006-08-04 23:20:06 -0700782void ip6_route_input(struct sk_buff *skb)
783{
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -0700784 struct ipv6hdr *iph = ipv6_hdr(skb);
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900785 struct net *net = dev_net(skb->dev);
Thomas Grafadaa70b2006-10-13 15:01:03 -0700786 int flags = RT6_LOOKUP_F_HAS_SADDR;
Thomas Grafc71099a2006-08-04 23:20:06 -0700787 struct flowi fl = {
788 .iif = skb->dev->ifindex,
789 .nl_u = {
790 .ip6_u = {
791 .daddr = iph->daddr,
792 .saddr = iph->saddr,
Al Viro90bcaf72006-11-08 00:25:17 -0800793 .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
Thomas Grafc71099a2006-08-04 23:20:06 -0700794 },
795 },
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900796 .mark = skb->mark,
Thomas Grafc71099a2006-08-04 23:20:06 -0700797 .proto = iph->nexthdr,
798 };
Thomas Grafadaa70b2006-10-13 15:01:03 -0700799
Thomas Goff1d6e55f2009-01-27 22:39:59 -0800800 if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
Thomas Grafadaa70b2006-10-13 15:01:03 -0700801 flags |= RT6_LOOKUP_F_IFACE;
Thomas Grafc71099a2006-08-04 23:20:06 -0700802
Eric Dumazetadf30902009-06-02 05:19:30 +0000803 skb_dst_set(skb, fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input));
Thomas Grafc71099a2006-08-04 23:20:06 -0700804}
805
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800806static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
Thomas Grafc71099a2006-08-04 23:20:06 -0700807 struct flowi *fl, int flags)
808{
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800809 return ip6_pol_route(net, table, fl->oif, fl, flags);
Thomas Grafc71099a2006-08-04 23:20:06 -0700810}
811
Daniel Lezcano4591db42008-03-05 10:48:10 -0800812struct dst_entry * ip6_route_output(struct net *net, struct sock *sk,
813 struct flowi *fl)
Thomas Grafc71099a2006-08-04 23:20:06 -0700814{
815 int flags = 0;
816
817 if (rt6_need_strict(&fl->fl6_dst))
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700818 flags |= RT6_LOOKUP_F_IFACE;
Thomas Grafc71099a2006-08-04 23:20:06 -0700819
Thomas Grafadaa70b2006-10-13 15:01:03 -0700820 if (!ipv6_addr_any(&fl->fl6_src))
821 flags |= RT6_LOOKUP_F_HAS_SADDR;
YOSHIFUJI Hideaki7cbca672008-03-25 09:37:42 +0900822 else if (sk) {
823 unsigned int prefs = inet6_sk(sk)->srcprefs;
824 if (prefs & IPV6_PREFER_SRC_TMP)
825 flags |= RT6_LOOKUP_F_SRCPREF_TMP;
826 if (prefs & IPV6_PREFER_SRC_PUBLIC)
827 flags |= RT6_LOOKUP_F_SRCPREF_PUBLIC;
828 if (prefs & IPV6_PREFER_SRC_COA)
829 flags |= RT6_LOOKUP_F_SRCPREF_COA;
830 }
Thomas Grafadaa70b2006-10-13 15:01:03 -0700831
Daniel Lezcano4591db42008-03-05 10:48:10 -0800832 return fib6_rule_lookup(net, fl, flags, ip6_pol_route_output);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700833}
834
YOSHIFUJI Hideaki71590392007-02-22 22:05:40 +0900835EXPORT_SYMBOL(ip6_route_output);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700836
David S. Miller14e50e52007-05-24 18:17:54 -0700837int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
838{
839 struct rt6_info *ort = (struct rt6_info *) *dstp;
840 struct rt6_info *rt = (struct rt6_info *)
841 dst_alloc(&ip6_dst_blackhole_ops);
842 struct dst_entry *new = NULL;
843
844 if (rt) {
845 new = &rt->u.dst;
846
847 atomic_set(&new->__refcnt, 1);
848 new->__use = 1;
Herbert Xu352e5122007-11-13 21:34:06 -0800849 new->input = dst_discard;
850 new->output = dst_discard;
David S. Miller14e50e52007-05-24 18:17:54 -0700851
852 memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
853 new->dev = ort->u.dst.dev;
854 if (new->dev)
855 dev_hold(new->dev);
856 rt->rt6i_idev = ort->rt6i_idev;
857 if (rt->rt6i_idev)
858 in6_dev_hold(rt->rt6i_idev);
859 rt->rt6i_expires = 0;
860
861 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
862 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
863 rt->rt6i_metric = 0;
864
865 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
866#ifdef CONFIG_IPV6_SUBTREES
867 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
868#endif
869
870 dst_free(new);
871 }
872
873 dst_release(*dstp);
874 *dstp = new;
875 return (new ? 0 : -ENOMEM);
876}
877EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
878
Linus Torvalds1da177e2005-04-16 15:20:36 -0700879/*
880 * Destination cache support functions
881 */
882
883static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
884{
885 struct rt6_info *rt;
886
887 rt = (struct rt6_info *) dst;
888
889 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
890 return dst;
891
892 return NULL;
893}
894
895static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
896{
897 struct rt6_info *rt = (struct rt6_info *) dst;
898
899 if (rt) {
900 if (rt->rt6i_flags & RTF_CACHE)
Thomas Grafe0a1ad732006-08-22 00:00:21 -0700901 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700902 else
903 dst_release(dst);
904 }
905 return NULL;
906}
907
908static void ip6_link_failure(struct sk_buff *skb)
909{
910 struct rt6_info *rt;
911
912 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
913
Eric Dumazetadf30902009-06-02 05:19:30 +0000914 rt = (struct rt6_info *) skb_dst(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700915 if (rt) {
916 if (rt->rt6i_flags&RTF_CACHE) {
917 dst_set_expires(&rt->u.dst, 0);
918 rt->rt6i_flags |= RTF_EXPIRES;
919 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
920 rt->rt6i_node->fn_sernum = -1;
921 }
922}
923
924static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
925{
926 struct rt6_info *rt6 = (struct rt6_info*)dst;
927
928 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
929 rt6->rt6i_flags |= RTF_MODIFIED;
930 if (mtu < IPV6_MIN_MTU) {
931 mtu = IPV6_MIN_MTU;
932 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
933 }
934 dst->metrics[RTAX_MTU-1] = mtu;
Tom Tucker8d717402006-07-30 20:43:36 -0700935 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700936 }
937}
938
Linus Torvalds1da177e2005-04-16 15:20:36 -0700939static int ipv6_get_mtu(struct net_device *dev);
940
Daniel Lezcano55786892008-03-04 13:47:47 -0800941static inline unsigned int ipv6_advmss(struct net *net, unsigned int mtu)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700942{
943 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
944
Daniel Lezcano55786892008-03-04 13:47:47 -0800945 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
946 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700947
948 /*
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900949 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
950 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
951 * IPV6_MAXPLEN is also valid and means: "any MSS,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700952 * rely only on pmtu discovery"
953 */
954 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
955 mtu = IPV6_MAXPLEN;
956 return mtu;
957}
958
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -0800959static struct dst_entry *icmp6_dst_gc_list;
960static DEFINE_SPINLOCK(icmp6_dst_lock);
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700961
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -0800962struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700963 struct neighbour *neigh,
YOSHIFUJI Hideaki9acd9f32008-04-10 15:42:10 +0900964 const struct in6_addr *addr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700965{
966 struct rt6_info *rt;
967 struct inet6_dev *idev = in6_dev_get(dev);
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900968 struct net *net = dev_net(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700969
970 if (unlikely(idev == NULL))
971 return NULL;
972
Benjamin Theryf2fc6a52008-03-04 13:49:23 -0800973 rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700974 if (unlikely(rt == NULL)) {
975 in6_dev_put(idev);
976 goto out;
977 }
978
979 dev_hold(dev);
980 if (neigh)
981 neigh_hold(neigh);
David S. Miller14deae42009-01-04 16:04:39 -0800982 else {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700983 neigh = ndisc_get_neigh(dev, addr);
David S. Miller14deae42009-01-04 16:04:39 -0800984 if (IS_ERR(neigh))
985 neigh = NULL;
986 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700987
988 rt->rt6i_dev = dev;
989 rt->rt6i_idev = idev;
990 rt->rt6i_nexthop = neigh;
991 atomic_set(&rt->u.dst.__refcnt, 1);
992 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
993 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
Daniel Lezcano55786892008-03-04 13:47:47 -0800994 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -0800995 rt->u.dst.output = ip6_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700996
997#if 0 /* there's no chance to use these for ndisc */
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900998 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
999 ? DST_HOST
Linus Torvalds1da177e2005-04-16 15:20:36 -07001000 : 0;
1001 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1002 rt->rt6i_dst.plen = 128;
1003#endif
1004
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001005 spin_lock_bh(&icmp6_dst_lock);
1006 rt->u.dst.next = icmp6_dst_gc_list;
1007 icmp6_dst_gc_list = &rt->u.dst;
1008 spin_unlock_bh(&icmp6_dst_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001009
Daniel Lezcano55786892008-03-04 13:47:47 -08001010 fib6_force_start_gc(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001011
1012out:
YOSHIFUJI Hideaki40aa7b92006-10-19 13:50:09 +09001013 return &rt->u.dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001014}
1015
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -07001016int icmp6_dst_gc(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001017{
1018 struct dst_entry *dst, *next, **pprev;
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -07001019 int more = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001020
1021 next = NULL;
Thomas Graf5d0bbee2006-08-04 03:37:36 -07001022
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001023 spin_lock_bh(&icmp6_dst_lock);
1024 pprev = &icmp6_dst_gc_list;
Thomas Graf5d0bbee2006-08-04 03:37:36 -07001025
Linus Torvalds1da177e2005-04-16 15:20:36 -07001026 while ((dst = *pprev) != NULL) {
1027 if (!atomic_read(&dst->__refcnt)) {
1028 *pprev = dst->next;
1029 dst_free(dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001030 } else {
1031 pprev = &dst->next;
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -07001032 ++more;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001033 }
1034 }
1035
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001036 spin_unlock_bh(&icmp6_dst_lock);
Thomas Graf5d0bbee2006-08-04 03:37:36 -07001037
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -07001038 return more;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001039}
1040
David S. Miller1e493d12008-09-10 17:27:15 -07001041static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1042 void *arg)
1043{
1044 struct dst_entry *dst, **pprev;
1045
1046 spin_lock_bh(&icmp6_dst_lock);
1047 pprev = &icmp6_dst_gc_list;
1048 while ((dst = *pprev) != NULL) {
1049 struct rt6_info *rt = (struct rt6_info *) dst;
1050 if (func(rt, arg)) {
1051 *pprev = dst->next;
1052 dst_free(dst);
1053 } else {
1054 pprev = &dst->next;
1055 }
1056 }
1057 spin_unlock_bh(&icmp6_dst_lock);
1058}
1059
Daniel Lezcano569d3642008-01-18 03:56:57 -08001060static int ip6_dst_gc(struct dst_ops *ops)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001061{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001062 unsigned long now = jiffies;
Daniel Lezcano7019b782008-03-04 13:50:14 -08001063 struct net *net = ops->dst_net;
1064 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1065 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1066 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1067 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1068 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001069
Daniel Lezcano7019b782008-03-04 13:50:14 -08001070 if (time_after(rt_last_gc + rt_min_interval, now) &&
1071 atomic_read(&ops->entries) <= rt_max_size)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001072 goto out;
1073
Benjamin Thery6891a342008-03-04 13:49:47 -08001074 net->ipv6.ip6_rt_gc_expire++;
1075 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1076 net->ipv6.ip6_rt_last_gc = now;
Daniel Lezcano7019b782008-03-04 13:50:14 -08001077 if (atomic_read(&ops->entries) < ops->gc_thresh)
1078 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001079out:
Daniel Lezcano7019b782008-03-04 13:50:14 -08001080 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1081 return (atomic_read(&ops->entries) > rt_max_size);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001082}
1083
1084/* Clean host part of a prefix. Not necessary in radix tree,
1085 but results in cleaner routing tables.
1086
1087 Remove it only when all the things will work!
1088 */
1089
1090static int ipv6_get_mtu(struct net_device *dev)
1091{
1092 int mtu = IPV6_MIN_MTU;
1093 struct inet6_dev *idev;
1094
1095 idev = in6_dev_get(dev);
1096 if (idev) {
1097 mtu = idev->cnf.mtu6;
1098 in6_dev_put(idev);
1099 }
1100 return mtu;
1101}
1102
YOSHIFUJI Hideaki6b75d092008-03-10 06:00:30 -04001103int ip6_dst_hoplimit(struct dst_entry *dst)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001104{
YOSHIFUJI Hideaki6b75d092008-03-10 06:00:30 -04001105 int hoplimit = dst_metric(dst, RTAX_HOPLIMIT);
1106 if (hoplimit < 0) {
1107 struct net_device *dev = dst->dev;
1108 struct inet6_dev *idev = in6_dev_get(dev);
1109 if (idev) {
1110 hoplimit = idev->cnf.hop_limit;
1111 in6_dev_put(idev);
1112 } else
YOSHIFUJI Hideaki53b79972008-07-19 22:35:03 -07001113 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001114 }
1115 return hoplimit;
1116}
1117
1118/*
1119 *
1120 */
1121
Thomas Graf86872cb2006-08-22 00:01:08 -07001122int ip6_route_add(struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001123{
1124 int err;
Daniel Lezcano55786892008-03-04 13:47:47 -08001125 struct net *net = cfg->fc_nlinfo.nl_net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001126 struct rt6_info *rt = NULL;
1127 struct net_device *dev = NULL;
1128 struct inet6_dev *idev = NULL;
Thomas Grafc71099a2006-08-04 23:20:06 -07001129 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001130 int addr_type;
1131
Thomas Graf86872cb2006-08-22 00:01:08 -07001132 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001133 return -EINVAL;
1134#ifndef CONFIG_IPV6_SUBTREES
Thomas Graf86872cb2006-08-22 00:01:08 -07001135 if (cfg->fc_src_len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001136 return -EINVAL;
1137#endif
Thomas Graf86872cb2006-08-22 00:01:08 -07001138 if (cfg->fc_ifindex) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001139 err = -ENODEV;
Daniel Lezcano55786892008-03-04 13:47:47 -08001140 dev = dev_get_by_index(net, cfg->fc_ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001141 if (!dev)
1142 goto out;
1143 idev = in6_dev_get(dev);
1144 if (!idev)
1145 goto out;
1146 }
1147
Thomas Graf86872cb2006-08-22 00:01:08 -07001148 if (cfg->fc_metric == 0)
1149 cfg->fc_metric = IP6_RT_PRIO_USER;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001150
Daniel Lezcano55786892008-03-04 13:47:47 -08001151 table = fib6_new_table(net, cfg->fc_table);
Thomas Grafc71099a2006-08-04 23:20:06 -07001152 if (table == NULL) {
1153 err = -ENOBUFS;
1154 goto out;
1155 }
1156
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08001157 rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001158
1159 if (rt == NULL) {
1160 err = -ENOMEM;
1161 goto out;
1162 }
1163
1164 rt->u.dst.obsolete = -1;
YOSHIFUJI Hideaki6f704992008-05-19 16:56:11 -07001165 rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
1166 jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1167 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001168
Thomas Graf86872cb2006-08-22 00:01:08 -07001169 if (cfg->fc_protocol == RTPROT_UNSPEC)
1170 cfg->fc_protocol = RTPROT_BOOT;
1171 rt->rt6i_protocol = cfg->fc_protocol;
1172
1173 addr_type = ipv6_addr_type(&cfg->fc_dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001174
1175 if (addr_type & IPV6_ADDR_MULTICAST)
1176 rt->u.dst.input = ip6_mc_input;
1177 else
1178 rt->u.dst.input = ip6_forward;
1179
1180 rt->u.dst.output = ip6_output;
1181
Thomas Graf86872cb2006-08-22 00:01:08 -07001182 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1183 rt->rt6i_dst.plen = cfg->fc_dst_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001184 if (rt->rt6i_dst.plen == 128)
1185 rt->u.dst.flags = DST_HOST;
1186
1187#ifdef CONFIG_IPV6_SUBTREES
Thomas Graf86872cb2006-08-22 00:01:08 -07001188 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1189 rt->rt6i_src.plen = cfg->fc_src_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001190#endif
1191
Thomas Graf86872cb2006-08-22 00:01:08 -07001192 rt->rt6i_metric = cfg->fc_metric;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001193
1194 /* We cannot add true routes via loopback here,
1195 they would result in kernel looping; promote them to reject routes
1196 */
Thomas Graf86872cb2006-08-22 00:01:08 -07001197 if ((cfg->fc_flags & RTF_REJECT) ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07001198 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
1199 /* hold loopback dev/idev if we haven't done so. */
Daniel Lezcano55786892008-03-04 13:47:47 -08001200 if (dev != net->loopback_dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001201 if (dev) {
1202 dev_put(dev);
1203 in6_dev_put(idev);
1204 }
Daniel Lezcano55786892008-03-04 13:47:47 -08001205 dev = net->loopback_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001206 dev_hold(dev);
1207 idev = in6_dev_get(dev);
1208 if (!idev) {
1209 err = -ENODEV;
1210 goto out;
1211 }
1212 }
1213 rt->u.dst.output = ip6_pkt_discard_out;
1214 rt->u.dst.input = ip6_pkt_discard;
1215 rt->u.dst.error = -ENETUNREACH;
1216 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1217 goto install_route;
1218 }
1219
Thomas Graf86872cb2006-08-22 00:01:08 -07001220 if (cfg->fc_flags & RTF_GATEWAY) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001221 struct in6_addr *gw_addr;
1222 int gwa_type;
1223
Thomas Graf86872cb2006-08-22 00:01:08 -07001224 gw_addr = &cfg->fc_gateway;
1225 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001226 gwa_type = ipv6_addr_type(gw_addr);
1227
1228 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1229 struct rt6_info *grt;
1230
1231 /* IPv6 strictly inhibits using not link-local
1232 addresses as nexthop address.
1233 Otherwise, router will not able to send redirects.
1234 It is very good, but in some (rare!) circumstances
1235 (SIT, PtP, NBMA NOARP links) it is handy to allow
1236 some exceptions. --ANK
1237 */
1238 err = -EINVAL;
1239 if (!(gwa_type&IPV6_ADDR_UNICAST))
1240 goto out;
1241
Daniel Lezcano55786892008-03-04 13:47:47 -08001242 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001243
1244 err = -EHOSTUNREACH;
1245 if (grt == NULL)
1246 goto out;
1247 if (dev) {
1248 if (dev != grt->rt6i_dev) {
1249 dst_release(&grt->u.dst);
1250 goto out;
1251 }
1252 } else {
1253 dev = grt->rt6i_dev;
1254 idev = grt->rt6i_idev;
1255 dev_hold(dev);
1256 in6_dev_hold(grt->rt6i_idev);
1257 }
1258 if (!(grt->rt6i_flags&RTF_GATEWAY))
1259 err = 0;
1260 dst_release(&grt->u.dst);
1261
1262 if (err)
1263 goto out;
1264 }
1265 err = -EINVAL;
1266 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1267 goto out;
1268 }
1269
1270 err = -ENODEV;
1271 if (dev == NULL)
1272 goto out;
1273
Thomas Graf86872cb2006-08-22 00:01:08 -07001274 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001275 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1276 if (IS_ERR(rt->rt6i_nexthop)) {
1277 err = PTR_ERR(rt->rt6i_nexthop);
1278 rt->rt6i_nexthop = NULL;
1279 goto out;
1280 }
1281 }
1282
Thomas Graf86872cb2006-08-22 00:01:08 -07001283 rt->rt6i_flags = cfg->fc_flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001284
1285install_route:
Thomas Graf86872cb2006-08-22 00:01:08 -07001286 if (cfg->fc_mx) {
1287 struct nlattr *nla;
1288 int remaining;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001289
Thomas Graf86872cb2006-08-22 00:01:08 -07001290 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
Thomas Graf8f4c1f92007-09-12 14:44:36 +02001291 int type = nla_type(nla);
Thomas Graf86872cb2006-08-22 00:01:08 -07001292
1293 if (type) {
1294 if (type > RTAX_MAX) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001295 err = -EINVAL;
1296 goto out;
1297 }
Thomas Graf86872cb2006-08-22 00:01:08 -07001298
1299 rt->u.dst.metrics[type - 1] = nla_get_u32(nla);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001300 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001301 }
1302 }
1303
Satoru SATOH5ffc02a2008-05-04 22:14:42 -07001304 if (dst_metric(&rt->u.dst, RTAX_HOPLIMIT) == 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001305 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
Rami Rosen1ca615f2008-08-06 02:34:21 -07001306 if (!dst_mtu(&rt->u.dst))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001307 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
Satoru SATOH5ffc02a2008-05-04 22:14:42 -07001308 if (!dst_metric(&rt->u.dst, RTAX_ADVMSS))
Daniel Lezcano55786892008-03-04 13:47:47 -08001309 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001310 rt->u.dst.dev = dev;
1311 rt->rt6i_idev = idev;
Thomas Grafc71099a2006-08-04 23:20:06 -07001312 rt->rt6i_table = table;
Daniel Lezcano63152fc2008-03-03 23:31:11 -08001313
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001314 cfg->fc_nlinfo.nl_net = dev_net(dev);
Daniel Lezcano63152fc2008-03-03 23:31:11 -08001315
Thomas Graf86872cb2006-08-22 00:01:08 -07001316 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001317
1318out:
1319 if (dev)
1320 dev_put(dev);
1321 if (idev)
1322 in6_dev_put(idev);
1323 if (rt)
YOSHIFUJI Hideaki40aa7b92006-10-19 13:50:09 +09001324 dst_free(&rt->u.dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001325 return err;
1326}
1327
Thomas Graf86872cb2006-08-22 00:01:08 -07001328static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001329{
1330 int err;
Thomas Grafc71099a2006-08-04 23:20:06 -07001331 struct fib6_table *table;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001332 struct net *net = dev_net(rt->rt6i_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001333
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001334 if (rt == net->ipv6.ip6_null_entry)
Patrick McHardy6c813a72006-08-06 22:22:47 -07001335 return -ENOENT;
1336
Thomas Grafc71099a2006-08-04 23:20:06 -07001337 table = rt->rt6i_table;
1338 write_lock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001339
Thomas Graf86872cb2006-08-22 00:01:08 -07001340 err = fib6_del(rt, info);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001341 dst_release(&rt->u.dst);
1342
Thomas Grafc71099a2006-08-04 23:20:06 -07001343 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001344
1345 return err;
1346}
1347
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001348int ip6_del_rt(struct rt6_info *rt)
1349{
Denis V. Lunev4d1169c2008-01-10 03:26:13 -08001350 struct nl_info info = {
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001351 .nl_net = dev_net(rt->rt6i_dev),
Denis V. Lunev4d1169c2008-01-10 03:26:13 -08001352 };
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08001353 return __ip6_del_rt(rt, &info);
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001354}
1355
Thomas Graf86872cb2006-08-22 00:01:08 -07001356static int ip6_route_del(struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001357{
Thomas Grafc71099a2006-08-04 23:20:06 -07001358 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001359 struct fib6_node *fn;
1360 struct rt6_info *rt;
1361 int err = -ESRCH;
1362
Daniel Lezcano55786892008-03-04 13:47:47 -08001363 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
Thomas Grafc71099a2006-08-04 23:20:06 -07001364 if (table == NULL)
1365 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001366
Thomas Grafc71099a2006-08-04 23:20:06 -07001367 read_lock_bh(&table->tb6_lock);
1368
1369 fn = fib6_locate(&table->tb6_root,
Thomas Graf86872cb2006-08-22 00:01:08 -07001370 &cfg->fc_dst, cfg->fc_dst_len,
1371 &cfg->fc_src, cfg->fc_src_len);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001372
Linus Torvalds1da177e2005-04-16 15:20:36 -07001373 if (fn) {
Eric Dumazet7cc48262007-02-09 16:22:57 -08001374 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
Thomas Graf86872cb2006-08-22 00:01:08 -07001375 if (cfg->fc_ifindex &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001376 (rt->rt6i_dev == NULL ||
Thomas Graf86872cb2006-08-22 00:01:08 -07001377 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001378 continue;
Thomas Graf86872cb2006-08-22 00:01:08 -07001379 if (cfg->fc_flags & RTF_GATEWAY &&
1380 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001381 continue;
Thomas Graf86872cb2006-08-22 00:01:08 -07001382 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001383 continue;
1384 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001385 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001386
Thomas Graf86872cb2006-08-22 00:01:08 -07001387 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001388 }
1389 }
Thomas Grafc71099a2006-08-04 23:20:06 -07001390 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001391
1392 return err;
1393}
1394
1395/*
1396 * Handle redirects
1397 */
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001398struct ip6rd_flowi {
1399 struct flowi fl;
1400 struct in6_addr gateway;
1401};
Linus Torvalds1da177e2005-04-16 15:20:36 -07001402
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001403static struct rt6_info *__ip6_route_redirect(struct net *net,
1404 struct fib6_table *table,
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001405 struct flowi *fl,
1406 int flags)
1407{
1408 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1409 struct rt6_info *rt;
1410 struct fib6_node *fn;
Thomas Grafc71099a2006-08-04 23:20:06 -07001411
Linus Torvalds1da177e2005-04-16 15:20:36 -07001412 /*
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001413 * Get the "current" route for this destination and
1414 * check if the redirect has come from approriate router.
1415 *
1416 * RFC 2461 specifies that redirects should only be
1417 * accepted if they come from the nexthop to the target.
1418 * Due to the way the routes are chosen, this notion
1419 * is a bit fuzzy and one might need to check all possible
1420 * routes.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001421 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001422
Thomas Grafc71099a2006-08-04 23:20:06 -07001423 read_lock_bh(&table->tb6_lock);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001424 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001425restart:
Eric Dumazet7cc48262007-02-09 16:22:57 -08001426 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001427 /*
1428 * Current route is on-link; redirect is always invalid.
1429 *
1430 * Seems, previous statement is not true. It could
1431 * be node, which looks for us as on-link (f.e. proxy ndisc)
1432 * But then router serving it might decide, that we should
1433 * know truth 8)8) --ANK (980726).
1434 */
1435 if (rt6_check_expired(rt))
1436 continue;
1437 if (!(rt->rt6i_flags & RTF_GATEWAY))
1438 continue;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001439 if (fl->oif != rt->rt6i_dev->ifindex)
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001440 continue;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001441 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001442 continue;
1443 break;
1444 }
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001445
YOSHIFUJI Hideakicb15d9c2006-08-23 17:23:11 -07001446 if (!rt)
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001447 rt = net->ipv6.ip6_null_entry;
1448 BACKTRACK(net, &fl->fl6_src);
YOSHIFUJI Hideakicb15d9c2006-08-23 17:23:11 -07001449out:
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001450 dst_hold(&rt->u.dst);
1451
1452 read_unlock_bh(&table->tb6_lock);
1453
1454 return rt;
1455};
1456
1457static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1458 struct in6_addr *src,
1459 struct in6_addr *gateway,
1460 struct net_device *dev)
1461{
Thomas Grafadaa70b2006-10-13 15:01:03 -07001462 int flags = RT6_LOOKUP_F_HAS_SADDR;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001463 struct net *net = dev_net(dev);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001464 struct ip6rd_flowi rdfl = {
1465 .fl = {
1466 .oif = dev->ifindex,
1467 .nl_u = {
1468 .ip6_u = {
1469 .daddr = *dest,
1470 .saddr = *src,
1471 },
1472 },
1473 },
1474 .gateway = *gateway,
1475 };
Thomas Grafadaa70b2006-10-13 15:01:03 -07001476
1477 if (rt6_need_strict(dest))
1478 flags |= RT6_LOOKUP_F_IFACE;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001479
Daniel Lezcano55786892008-03-04 13:47:47 -08001480 return (struct rt6_info *)fib6_rule_lookup(net, (struct flowi *)&rdfl,
Daniel Lezcano58f09b72008-03-03 23:25:27 -08001481 flags, __ip6_route_redirect);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001482}
1483
1484void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1485 struct in6_addr *saddr,
1486 struct neighbour *neigh, u8 *lladdr, int on_link)
1487{
1488 struct rt6_info *rt, *nrt = NULL;
1489 struct netevent_redirect netevent;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001490 struct net *net = dev_net(neigh->dev);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001491
1492 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1493
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001494 if (rt == net->ipv6.ip6_null_entry) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001495 if (net_ratelimit())
1496 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1497 "for redirect target\n");
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001498 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001499 }
1500
Linus Torvalds1da177e2005-04-16 15:20:36 -07001501 /*
1502 * We have finally decided to accept it.
1503 */
1504
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001505 neigh_update(neigh, lladdr, NUD_STALE,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001506 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1507 NEIGH_UPDATE_F_OVERRIDE|
1508 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1509 NEIGH_UPDATE_F_ISROUTER))
1510 );
1511
1512 /*
1513 * Redirect received -> path was valid.
1514 * Look, redirects are sent only in response to data packets,
1515 * so that this nexthop apparently is reachable. --ANK
1516 */
1517 dst_confirm(&rt->u.dst);
1518
1519 /* Duplicate redirect: silently ignore. */
1520 if (neigh == rt->u.dst.neighbour)
1521 goto out;
1522
1523 nrt = ip6_rt_copy(rt);
1524 if (nrt == NULL)
1525 goto out;
1526
1527 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1528 if (on_link)
1529 nrt->rt6i_flags &= ~RTF_GATEWAY;
1530
1531 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1532 nrt->rt6i_dst.plen = 128;
1533 nrt->u.dst.flags |= DST_HOST;
1534
1535 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1536 nrt->rt6i_nexthop = neigh_clone(neigh);
1537 /* Reset pmtu, it may be better */
1538 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001539 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dev_net(neigh->dev),
Daniel Lezcano55786892008-03-04 13:47:47 -08001540 dst_mtu(&nrt->u.dst));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001541
Thomas Graf40e22e82006-08-22 00:00:45 -07001542 if (ip6_ins_rt(nrt))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001543 goto out;
1544
Tom Tucker8d717402006-07-30 20:43:36 -07001545 netevent.old = &rt->u.dst;
1546 netevent.new = &nrt->u.dst;
1547 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1548
Linus Torvalds1da177e2005-04-16 15:20:36 -07001549 if (rt->rt6i_flags&RTF_CACHE) {
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001550 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001551 return;
1552 }
1553
1554out:
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001555 dst_release(&rt->u.dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001556 return;
1557}
1558
1559/*
1560 * Handle ICMP "packet too big" messages
1561 * i.e. Path MTU discovery
1562 */
1563
1564void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1565 struct net_device *dev, u32 pmtu)
1566{
1567 struct rt6_info *rt, *nrt;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001568 struct net *net = dev_net(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001569 int allfrag = 0;
1570
Daniel Lezcano55786892008-03-04 13:47:47 -08001571 rt = rt6_lookup(net, daddr, saddr, dev->ifindex, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001572 if (rt == NULL)
1573 return;
1574
1575 if (pmtu >= dst_mtu(&rt->u.dst))
1576 goto out;
1577
1578 if (pmtu < IPV6_MIN_MTU) {
1579 /*
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001580 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
Linus Torvalds1da177e2005-04-16 15:20:36 -07001581 * MTU (1280) and a fragment header should always be included
1582 * after a node receiving Too Big message reporting PMTU is
1583 * less than the IPv6 Minimum Link MTU.
1584 */
1585 pmtu = IPV6_MIN_MTU;
1586 allfrag = 1;
1587 }
1588
1589 /* New mtu received -> path was valid.
1590 They are sent only in response to data packets,
1591 so that this nexthop apparently is reachable. --ANK
1592 */
1593 dst_confirm(&rt->u.dst);
1594
1595 /* Host route. If it is static, it would be better
1596 not to override it, but add new one, so that
1597 when cache entry will expire old pmtu
1598 would return automatically.
1599 */
1600 if (rt->rt6i_flags & RTF_CACHE) {
1601 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1602 if (allfrag)
1603 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
Daniel Lezcano55786892008-03-04 13:47:47 -08001604 dst_set_expires(&rt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001605 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1606 goto out;
1607 }
1608
1609 /* Network route.
1610 Two cases are possible:
1611 1. It is connected route. Action: COW
1612 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1613 */
YOSHIFUJI Hideakid5315b52006-03-20 16:58:48 -08001614 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001615 nrt = rt6_alloc_cow(rt, daddr, saddr);
YOSHIFUJI Hideakid5315b52006-03-20 16:58:48 -08001616 else
1617 nrt = rt6_alloc_clone(rt, daddr);
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001618
YOSHIFUJI Hideakid5315b52006-03-20 16:58:48 -08001619 if (nrt) {
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001620 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1621 if (allfrag)
1622 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1623
1624 /* According to RFC 1981, detecting PMTU increase shouldn't be
1625 * happened within 5 mins, the recommended timer is 10 mins.
1626 * Here this route expiration time is set to ip6_rt_mtu_expires
1627 * which is 10 mins. After 10 mins the decreased pmtu is expired
1628 * and detecting PMTU increase will be automatically happened.
1629 */
Daniel Lezcano55786892008-03-04 13:47:47 -08001630 dst_set_expires(&nrt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001631 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1632
Thomas Graf40e22e82006-08-22 00:00:45 -07001633 ip6_ins_rt(nrt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001634 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001635out:
1636 dst_release(&rt->u.dst);
1637}
1638
1639/*
1640 * Misc support functions
1641 */
1642
1643static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1644{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001645 struct net *net = dev_net(ort->rt6i_dev);
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08001646 struct rt6_info *rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001647
1648 if (rt) {
1649 rt->u.dst.input = ort->u.dst.input;
1650 rt->u.dst.output = ort->u.dst.output;
1651
1652 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
Ville Nuorvala22e1e4d2006-10-16 22:14:26 -07001653 rt->u.dst.error = ort->u.dst.error;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001654 rt->u.dst.dev = ort->u.dst.dev;
1655 if (rt->u.dst.dev)
1656 dev_hold(rt->u.dst.dev);
1657 rt->rt6i_idev = ort->rt6i_idev;
1658 if (rt->rt6i_idev)
1659 in6_dev_hold(rt->rt6i_idev);
1660 rt->u.dst.lastuse = jiffies;
1661 rt->rt6i_expires = 0;
1662
1663 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1664 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1665 rt->rt6i_metric = 0;
1666
1667 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1668#ifdef CONFIG_IPV6_SUBTREES
1669 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1670#endif
Thomas Grafc71099a2006-08-04 23:20:06 -07001671 rt->rt6i_table = ort->rt6i_table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001672 }
1673 return rt;
1674}
1675
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001676#ifdef CONFIG_IPV6_ROUTE_INFO
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001677static struct rt6_info *rt6_get_route_info(struct net *net,
1678 struct in6_addr *prefix, int prefixlen,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001679 struct in6_addr *gwaddr, int ifindex)
1680{
1681 struct fib6_node *fn;
1682 struct rt6_info *rt = NULL;
Thomas Grafc71099a2006-08-04 23:20:06 -07001683 struct fib6_table *table;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001684
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001685 table = fib6_get_table(net, RT6_TABLE_INFO);
Thomas Grafc71099a2006-08-04 23:20:06 -07001686 if (table == NULL)
1687 return NULL;
1688
1689 write_lock_bh(&table->tb6_lock);
1690 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001691 if (!fn)
1692 goto out;
1693
Eric Dumazet7cc48262007-02-09 16:22:57 -08001694 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001695 if (rt->rt6i_dev->ifindex != ifindex)
1696 continue;
1697 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1698 continue;
1699 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1700 continue;
1701 dst_hold(&rt->u.dst);
1702 break;
1703 }
1704out:
Thomas Grafc71099a2006-08-04 23:20:06 -07001705 write_unlock_bh(&table->tb6_lock);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001706 return rt;
1707}
1708
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001709static struct rt6_info *rt6_add_route_info(struct net *net,
1710 struct in6_addr *prefix, int prefixlen,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001711 struct in6_addr *gwaddr, int ifindex,
1712 unsigned pref)
1713{
Thomas Graf86872cb2006-08-22 00:01:08 -07001714 struct fib6_config cfg = {
1715 .fc_table = RT6_TABLE_INFO,
Rami Rosen238fc7e2008-02-09 23:43:11 -08001716 .fc_metric = IP6_RT_PRIO_USER,
Thomas Graf86872cb2006-08-22 00:01:08 -07001717 .fc_ifindex = ifindex,
1718 .fc_dst_len = prefixlen,
1719 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1720 RTF_UP | RTF_PREF(pref),
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001721 .fc_nlinfo.pid = 0,
1722 .fc_nlinfo.nlh = NULL,
1723 .fc_nlinfo.nl_net = net,
Thomas Graf86872cb2006-08-22 00:01:08 -07001724 };
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001725
Thomas Graf86872cb2006-08-22 00:01:08 -07001726 ipv6_addr_copy(&cfg.fc_dst, prefix);
1727 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1728
YOSHIFUJI Hideakie317da92006-03-20 17:06:42 -08001729 /* We should treat it as a default route if prefix length is 0. */
1730 if (!prefixlen)
Thomas Graf86872cb2006-08-22 00:01:08 -07001731 cfg.fc_flags |= RTF_DEFAULT;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001732
Thomas Graf86872cb2006-08-22 00:01:08 -07001733 ip6_route_add(&cfg);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001734
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001735 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001736}
1737#endif
1738
Linus Torvalds1da177e2005-04-16 15:20:36 -07001739struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001740{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001741 struct rt6_info *rt;
Thomas Grafc71099a2006-08-04 23:20:06 -07001742 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001743
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001744 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
Thomas Grafc71099a2006-08-04 23:20:06 -07001745 if (table == NULL)
1746 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001747
Thomas Grafc71099a2006-08-04 23:20:06 -07001748 write_lock_bh(&table->tb6_lock);
Eric Dumazet7cc48262007-02-09 16:22:57 -08001749 for (rt = table->tb6_root.leaf; rt; rt=rt->u.dst.rt6_next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001750 if (dev == rt->rt6i_dev &&
YOSHIFUJI Hideaki045927f2006-03-20 17:00:48 -08001751 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001752 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1753 break;
1754 }
1755 if (rt)
1756 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001757 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001758 return rt;
1759}
1760
1761struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -08001762 struct net_device *dev,
1763 unsigned int pref)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001764{
Thomas Graf86872cb2006-08-22 00:01:08 -07001765 struct fib6_config cfg = {
1766 .fc_table = RT6_TABLE_DFLT,
Rami Rosen238fc7e2008-02-09 23:43:11 -08001767 .fc_metric = IP6_RT_PRIO_USER,
Thomas Graf86872cb2006-08-22 00:01:08 -07001768 .fc_ifindex = dev->ifindex,
1769 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1770 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
Daniel Lezcano55786892008-03-04 13:47:47 -08001771 .fc_nlinfo.pid = 0,
1772 .fc_nlinfo.nlh = NULL,
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001773 .fc_nlinfo.nl_net = dev_net(dev),
Thomas Graf86872cb2006-08-22 00:01:08 -07001774 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07001775
Thomas Graf86872cb2006-08-22 00:01:08 -07001776 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001777
Thomas Graf86872cb2006-08-22 00:01:08 -07001778 ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001779
Linus Torvalds1da177e2005-04-16 15:20:36 -07001780 return rt6_get_dflt_router(gwaddr, dev);
1781}
1782
Daniel Lezcano7b4da532008-03-04 13:47:14 -08001783void rt6_purge_dflt_routers(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001784{
1785 struct rt6_info *rt;
Thomas Grafc71099a2006-08-04 23:20:06 -07001786 struct fib6_table *table;
1787
1788 /* NOTE: Keep consistent with rt6_get_dflt_router */
Daniel Lezcano7b4da532008-03-04 13:47:14 -08001789 table = fib6_get_table(net, RT6_TABLE_DFLT);
Thomas Grafc71099a2006-08-04 23:20:06 -07001790 if (table == NULL)
1791 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001792
1793restart:
Thomas Grafc71099a2006-08-04 23:20:06 -07001794 read_lock_bh(&table->tb6_lock);
Eric Dumazet7cc48262007-02-09 16:22:57 -08001795 for (rt = table->tb6_root.leaf; rt; rt = rt->u.dst.rt6_next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001796 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1797 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001798 read_unlock_bh(&table->tb6_lock);
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001799 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001800 goto restart;
1801 }
1802 }
Thomas Grafc71099a2006-08-04 23:20:06 -07001803 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001804}
1805
Daniel Lezcano55786892008-03-04 13:47:47 -08001806static void rtmsg_to_fib6_config(struct net *net,
1807 struct in6_rtmsg *rtmsg,
Thomas Graf86872cb2006-08-22 00:01:08 -07001808 struct fib6_config *cfg)
1809{
1810 memset(cfg, 0, sizeof(*cfg));
1811
1812 cfg->fc_table = RT6_TABLE_MAIN;
1813 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1814 cfg->fc_metric = rtmsg->rtmsg_metric;
1815 cfg->fc_expires = rtmsg->rtmsg_info;
1816 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1817 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1818 cfg->fc_flags = rtmsg->rtmsg_flags;
1819
Daniel Lezcano55786892008-03-04 13:47:47 -08001820 cfg->fc_nlinfo.nl_net = net;
Benjamin Theryf1243c22008-02-26 18:10:03 -08001821
Thomas Graf86872cb2006-08-22 00:01:08 -07001822 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1823 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1824 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1825}
1826
Daniel Lezcano55786892008-03-04 13:47:47 -08001827int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001828{
Thomas Graf86872cb2006-08-22 00:01:08 -07001829 struct fib6_config cfg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001830 struct in6_rtmsg rtmsg;
1831 int err;
1832
1833 switch(cmd) {
1834 case SIOCADDRT: /* Add a route */
1835 case SIOCDELRT: /* Delete a route */
1836 if (!capable(CAP_NET_ADMIN))
1837 return -EPERM;
1838 err = copy_from_user(&rtmsg, arg,
1839 sizeof(struct in6_rtmsg));
1840 if (err)
1841 return -EFAULT;
Thomas Graf86872cb2006-08-22 00:01:08 -07001842
Daniel Lezcano55786892008-03-04 13:47:47 -08001843 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
Thomas Graf86872cb2006-08-22 00:01:08 -07001844
Linus Torvalds1da177e2005-04-16 15:20:36 -07001845 rtnl_lock();
1846 switch (cmd) {
1847 case SIOCADDRT:
Thomas Graf86872cb2006-08-22 00:01:08 -07001848 err = ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001849 break;
1850 case SIOCDELRT:
Thomas Graf86872cb2006-08-22 00:01:08 -07001851 err = ip6_route_del(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001852 break;
1853 default:
1854 err = -EINVAL;
1855 }
1856 rtnl_unlock();
1857
1858 return err;
Stephen Hemminger3ff50b72007-04-20 17:09:22 -07001859 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001860
1861 return -EINVAL;
1862}
1863
1864/*
1865 * Drop the packet on the floor
1866 */
1867
Ilpo Järvinen50eb4312008-01-12 03:21:00 -08001868static int ip6_pkt_drop(struct sk_buff *skb, int code, int ipstats_mib_noroutes)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001869{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001870 int type;
Eric Dumazetadf30902009-06-02 05:19:30 +00001871 struct dst_entry *dst = skb_dst(skb);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001872 switch (ipstats_mib_noroutes) {
1873 case IPSTATS_MIB_INNOROUTES:
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -07001874 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001875 if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED) {
Denis V. Lunev3bd653c2008-10-08 10:54:51 -07001876 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1877 IPSTATS_MIB_INADDRERRORS);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001878 break;
1879 }
1880 /* FALLTHROUGH */
1881 case IPSTATS_MIB_OUTNOROUTES:
Denis V. Lunev3bd653c2008-10-08 10:54:51 -07001882 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1883 ipstats_mib_noroutes);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001884 break;
1885 }
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001886 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0, skb->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001887 kfree_skb(skb);
1888 return 0;
1889}
1890
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001891static int ip6_pkt_discard(struct sk_buff *skb)
1892{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001893 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001894}
1895
Arnaldo Carvalho de Melo20380732005-08-16 02:18:02 -03001896static int ip6_pkt_discard_out(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001897{
Eric Dumazetadf30902009-06-02 05:19:30 +00001898 skb->dev = skb_dst(skb)->dev;
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001899 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001900}
1901
David S. Miller6723ab52006-10-18 21:20:57 -07001902#ifdef CONFIG_IPV6_MULTIPLE_TABLES
1903
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001904static int ip6_pkt_prohibit(struct sk_buff *skb)
1905{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001906 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001907}
1908
1909static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1910{
Eric Dumazetadf30902009-06-02 05:19:30 +00001911 skb->dev = skb_dst(skb)->dev;
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001912 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001913}
1914
David S. Miller6723ab52006-10-18 21:20:57 -07001915#endif
1916
Linus Torvalds1da177e2005-04-16 15:20:36 -07001917/*
1918 * Allocate a dst for local (unicast / anycast) address.
1919 */
1920
1921struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1922 const struct in6_addr *addr,
1923 int anycast)
1924{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001925 struct net *net = dev_net(idev->dev);
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08001926 struct rt6_info *rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops);
David S. Miller14deae42009-01-04 16:04:39 -08001927 struct neighbour *neigh;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001928
1929 if (rt == NULL)
1930 return ERR_PTR(-ENOMEM);
1931
Daniel Lezcano55786892008-03-04 13:47:47 -08001932 dev_hold(net->loopback_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001933 in6_dev_hold(idev);
1934
1935 rt->u.dst.flags = DST_HOST;
1936 rt->u.dst.input = ip6_input;
1937 rt->u.dst.output = ip6_output;
Daniel Lezcano55786892008-03-04 13:47:47 -08001938 rt->rt6i_dev = net->loopback_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001939 rt->rt6i_idev = idev;
1940 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
Daniel Lezcano55786892008-03-04 13:47:47 -08001941 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001942 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1943 rt->u.dst.obsolete = -1;
1944
1945 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +09001946 if (anycast)
1947 rt->rt6i_flags |= RTF_ANYCAST;
1948 else
Linus Torvalds1da177e2005-04-16 15:20:36 -07001949 rt->rt6i_flags |= RTF_LOCAL;
David S. Miller14deae42009-01-04 16:04:39 -08001950 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1951 if (IS_ERR(neigh)) {
YOSHIFUJI Hideaki40aa7b92006-10-19 13:50:09 +09001952 dst_free(&rt->u.dst);
David S. Miller14deae42009-01-04 16:04:39 -08001953
1954 /* We are casting this because that is the return
1955 * value type. But an errno encoded pointer is the
1956 * same regardless of the underlying pointer type,
1957 * and that's what we are returning. So this is OK.
1958 */
1959 return (struct rt6_info *) neigh;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001960 }
David S. Miller14deae42009-01-04 16:04:39 -08001961 rt->rt6i_nexthop = neigh;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001962
1963 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1964 rt->rt6i_dst.plen = 128;
Daniel Lezcano55786892008-03-04 13:47:47 -08001965 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001966
1967 atomic_set(&rt->u.dst.__refcnt, 1);
1968
1969 return rt;
1970}
1971
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001972struct arg_dev_net {
1973 struct net_device *dev;
1974 struct net *net;
1975};
1976
Linus Torvalds1da177e2005-04-16 15:20:36 -07001977static int fib6_ifdown(struct rt6_info *rt, void *arg)
1978{
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001979 struct net_device *dev = ((struct arg_dev_net *)arg)->dev;
1980 struct net *net = ((struct arg_dev_net *)arg)->net;
1981
1982 if (((void *)rt->rt6i_dev == dev || dev == NULL) &&
1983 rt != net->ipv6.ip6_null_entry) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001984 RT6_TRACE("deleted by ifdown %p\n", rt);
1985 return -1;
1986 }
1987 return 0;
1988}
1989
Daniel Lezcanof3db4852008-03-03 23:27:06 -08001990void rt6_ifdown(struct net *net, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001991{
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001992 struct arg_dev_net adn = {
1993 .dev = dev,
1994 .net = net,
1995 };
1996
1997 fib6_clean_all(net, fib6_ifdown, 0, &adn);
David S. Miller1e493d12008-09-10 17:27:15 -07001998 icmp6_clean_all(fib6_ifdown, &adn);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001999}
2000
2001struct rt6_mtu_change_arg
2002{
2003 struct net_device *dev;
2004 unsigned mtu;
2005};
2006
2007static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2008{
2009 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2010 struct inet6_dev *idev;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09002011 struct net *net = dev_net(arg->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002012
2013 /* In IPv6 pmtu discovery is not optional,
2014 so that RTAX_MTU lock cannot disable it.
2015 We still use this lock to block changes
2016 caused by addrconf/ndisc.
2017 */
2018
2019 idev = __in6_dev_get(arg->dev);
2020 if (idev == NULL)
2021 return 0;
2022
2023 /* For administrative MTU increase, there is no way to discover
2024 IPv6 PMTU increase, so PMTU increase should be updated here.
2025 Since RFC 1981 doesn't include administrative MTU increase
2026 update PMTU increase is a MUST. (i.e. jumbo frame)
2027 */
2028 /*
2029 If new MTU is less than route PMTU, this new MTU will be the
2030 lowest MTU in the path, update the route PMTU to reflect PMTU
2031 decreases; if new MTU is greater than route PMTU, and the
2032 old MTU is the lowest MTU in the path, update the route PMTU
2033 to reflect the increase. In this case if the other nodes' MTU
2034 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2035 PMTU discouvery.
2036 */
2037 if (rt->rt6i_dev == arg->dev &&
2038 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
Jim Paris23717792008-01-31 16:36:25 -08002039 (dst_mtu(&rt->u.dst) >= arg->mtu ||
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002040 (dst_mtu(&rt->u.dst) < arg->mtu &&
Simon Arlott566cfd82007-07-26 00:09:55 -07002041 dst_mtu(&rt->u.dst) == idev->cnf.mtu6))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002042 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
Daniel Lezcano55786892008-03-04 13:47:47 -08002043 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, arg->mtu);
Simon Arlott566cfd82007-07-26 00:09:55 -07002044 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002045 return 0;
2046}
2047
2048void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2049{
Thomas Grafc71099a2006-08-04 23:20:06 -07002050 struct rt6_mtu_change_arg arg = {
2051 .dev = dev,
2052 .mtu = mtu,
2053 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07002054
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09002055 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002056}
2057
Patrick McHardyef7c79e2007-06-05 12:38:30 -07002058static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
Thomas Graf5176f912006-08-26 20:13:18 -07002059 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
Thomas Graf86872cb2006-08-22 00:01:08 -07002060 [RTA_OIF] = { .type = NLA_U32 },
Thomas Grafab364a62006-08-22 00:01:47 -07002061 [RTA_IIF] = { .type = NLA_U32 },
Thomas Graf86872cb2006-08-22 00:01:08 -07002062 [RTA_PRIORITY] = { .type = NLA_U32 },
2063 [RTA_METRICS] = { .type = NLA_NESTED },
2064};
2065
2066static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2067 struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002068{
Thomas Graf86872cb2006-08-22 00:01:08 -07002069 struct rtmsg *rtm;
2070 struct nlattr *tb[RTA_MAX+1];
2071 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002072
Thomas Graf86872cb2006-08-22 00:01:08 -07002073 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2074 if (err < 0)
2075 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002076
Thomas Graf86872cb2006-08-22 00:01:08 -07002077 err = -EINVAL;
2078 rtm = nlmsg_data(nlh);
2079 memset(cfg, 0, sizeof(*cfg));
2080
2081 cfg->fc_table = rtm->rtm_table;
2082 cfg->fc_dst_len = rtm->rtm_dst_len;
2083 cfg->fc_src_len = rtm->rtm_src_len;
2084 cfg->fc_flags = RTF_UP;
2085 cfg->fc_protocol = rtm->rtm_protocol;
2086
2087 if (rtm->rtm_type == RTN_UNREACHABLE)
2088 cfg->fc_flags |= RTF_REJECT;
2089
2090 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2091 cfg->fc_nlinfo.nlh = nlh;
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09002092 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
Thomas Graf86872cb2006-08-22 00:01:08 -07002093
2094 if (tb[RTA_GATEWAY]) {
2095 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2096 cfg->fc_flags |= RTF_GATEWAY;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002097 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002098
2099 if (tb[RTA_DST]) {
2100 int plen = (rtm->rtm_dst_len + 7) >> 3;
2101
2102 if (nla_len(tb[RTA_DST]) < plen)
2103 goto errout;
2104
2105 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002106 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002107
2108 if (tb[RTA_SRC]) {
2109 int plen = (rtm->rtm_src_len + 7) >> 3;
2110
2111 if (nla_len(tb[RTA_SRC]) < plen)
2112 goto errout;
2113
2114 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002115 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002116
2117 if (tb[RTA_OIF])
2118 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2119
2120 if (tb[RTA_PRIORITY])
2121 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2122
2123 if (tb[RTA_METRICS]) {
2124 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2125 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002126 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002127
2128 if (tb[RTA_TABLE])
2129 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2130
2131 err = 0;
2132errout:
2133 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002134}
2135
Thomas Grafc127ea22007-03-22 11:58:32 -07002136static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002137{
Thomas Graf86872cb2006-08-22 00:01:08 -07002138 struct fib6_config cfg;
2139 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002140
Thomas Graf86872cb2006-08-22 00:01:08 -07002141 err = rtm_to_fib6_config(skb, nlh, &cfg);
2142 if (err < 0)
2143 return err;
2144
2145 return ip6_route_del(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002146}
2147
Thomas Grafc127ea22007-03-22 11:58:32 -07002148static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002149{
Thomas Graf86872cb2006-08-22 00:01:08 -07002150 struct fib6_config cfg;
2151 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002152
Thomas Graf86872cb2006-08-22 00:01:08 -07002153 err = rtm_to_fib6_config(skb, nlh, &cfg);
2154 if (err < 0)
2155 return err;
2156
2157 return ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002158}
2159
Thomas Graf339bf982006-11-10 14:10:15 -08002160static inline size_t rt6_nlmsg_size(void)
2161{
2162 return NLMSG_ALIGN(sizeof(struct rtmsg))
2163 + nla_total_size(16) /* RTA_SRC */
2164 + nla_total_size(16) /* RTA_DST */
2165 + nla_total_size(16) /* RTA_GATEWAY */
2166 + nla_total_size(16) /* RTA_PREFSRC */
2167 + nla_total_size(4) /* RTA_TABLE */
2168 + nla_total_size(4) /* RTA_IIF */
2169 + nla_total_size(4) /* RTA_OIF */
2170 + nla_total_size(4) /* RTA_PRIORITY */
Noriaki TAKAMIYA6a2b9ce2007-01-23 22:09:41 -08002171 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
Thomas Graf339bf982006-11-10 14:10:15 -08002172 + nla_total_size(sizeof(struct rta_cacheinfo));
2173}
2174
Brian Haley191cd582008-08-14 15:33:21 -07002175static int rt6_fill_node(struct net *net,
2176 struct sk_buff *skb, struct rt6_info *rt,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07002177 struct in6_addr *dst, struct in6_addr *src,
2178 int iif, int type, u32 pid, u32 seq,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002179 int prefix, int nowait, unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002180{
2181 struct rtmsg *rtm;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002182 struct nlmsghdr *nlh;
Thomas Grafe3703b32006-11-27 09:27:07 -08002183 long expires;
Patrick McHardy9e762a42006-08-10 23:09:48 -07002184 u32 table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002185
2186 if (prefix) { /* user wants prefix routes only */
2187 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2188 /* success since this is not a prefix route */
2189 return 1;
2190 }
2191 }
2192
Thomas Graf2d7202b2006-08-22 00:01:27 -07002193 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2194 if (nlh == NULL)
Patrick McHardy26932562007-01-31 23:16:40 -08002195 return -EMSGSIZE;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002196
2197 rtm = nlmsg_data(nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002198 rtm->rtm_family = AF_INET6;
2199 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2200 rtm->rtm_src_len = rt->rt6i_src.plen;
2201 rtm->rtm_tos = 0;
Thomas Grafc71099a2006-08-04 23:20:06 -07002202 if (rt->rt6i_table)
Patrick McHardy9e762a42006-08-10 23:09:48 -07002203 table = rt->rt6i_table->tb6_id;
Thomas Grafc71099a2006-08-04 23:20:06 -07002204 else
Patrick McHardy9e762a42006-08-10 23:09:48 -07002205 table = RT6_TABLE_UNSPEC;
2206 rtm->rtm_table = table;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002207 NLA_PUT_U32(skb, RTA_TABLE, table);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002208 if (rt->rt6i_flags&RTF_REJECT)
2209 rtm->rtm_type = RTN_UNREACHABLE;
2210 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2211 rtm->rtm_type = RTN_LOCAL;
2212 else
2213 rtm->rtm_type = RTN_UNICAST;
2214 rtm->rtm_flags = 0;
2215 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2216 rtm->rtm_protocol = rt->rt6i_protocol;
2217 if (rt->rt6i_flags&RTF_DYNAMIC)
2218 rtm->rtm_protocol = RTPROT_REDIRECT;
2219 else if (rt->rt6i_flags & RTF_ADDRCONF)
2220 rtm->rtm_protocol = RTPROT_KERNEL;
2221 else if (rt->rt6i_flags&RTF_DEFAULT)
2222 rtm->rtm_protocol = RTPROT_RA;
2223
2224 if (rt->rt6i_flags&RTF_CACHE)
2225 rtm->rtm_flags |= RTM_F_CLONED;
2226
2227 if (dst) {
Thomas Graf2d7202b2006-08-22 00:01:27 -07002228 NLA_PUT(skb, RTA_DST, 16, dst);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002229 rtm->rtm_dst_len = 128;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002230 } else if (rtm->rtm_dst_len)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002231 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002232#ifdef CONFIG_IPV6_SUBTREES
2233 if (src) {
Thomas Graf2d7202b2006-08-22 00:01:27 -07002234 NLA_PUT(skb, RTA_SRC, 16, src);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002235 rtm->rtm_src_len = 128;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002236 } else if (rtm->rtm_src_len)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002237 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002238#endif
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002239 if (iif) {
2240#ifdef CONFIG_IPV6_MROUTE
2241 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
Benjamin Thery8229efd2008-12-10 16:30:15 -08002242 int err = ip6mr_get_route(net, skb, rtm, nowait);
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002243 if (err <= 0) {
2244 if (!nowait) {
2245 if (err == 0)
2246 return 0;
2247 goto nla_put_failure;
2248 } else {
2249 if (err == -EMSGSIZE)
2250 goto nla_put_failure;
2251 }
2252 }
2253 } else
2254#endif
2255 NLA_PUT_U32(skb, RTA_IIF, iif);
2256 } else if (dst) {
Brian Haley5e0115e2008-08-13 01:58:57 -07002257 struct inet6_dev *idev = ip6_dst_idev(&rt->u.dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002258 struct in6_addr saddr_buf;
Brian Haley191cd582008-08-14 15:33:21 -07002259 if (ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
YOSHIFUJI Hideaki7cbca672008-03-25 09:37:42 +09002260 dst, 0, &saddr_buf) == 0)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002261 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002262 }
Thomas Graf2d7202b2006-08-22 00:01:27 -07002263
Linus Torvalds1da177e2005-04-16 15:20:36 -07002264 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002265 goto nla_put_failure;
2266
Linus Torvalds1da177e2005-04-16 15:20:36 -07002267 if (rt->u.dst.neighbour)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002268 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
2269
Linus Torvalds1da177e2005-04-16 15:20:36 -07002270 if (rt->u.dst.dev)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002271 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2272
2273 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
Thomas Grafe3703b32006-11-27 09:27:07 -08002274
YOSHIFUJI Hideaki36e3dea2008-05-13 02:52:55 +09002275 if (!(rt->rt6i_flags & RTF_EXPIRES))
2276 expires = 0;
2277 else if (rt->rt6i_expires - jiffies < INT_MAX)
2278 expires = rt->rt6i_expires - jiffies;
2279 else
2280 expires = INT_MAX;
YOSHIFUJI Hideaki69cdf8f2008-05-19 16:55:13 -07002281
Thomas Grafe3703b32006-11-27 09:27:07 -08002282 if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0,
2283 expires, rt->u.dst.error) < 0)
2284 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002285
Thomas Graf2d7202b2006-08-22 00:01:27 -07002286 return nlmsg_end(skb, nlh);
2287
2288nla_put_failure:
Patrick McHardy26932562007-01-31 23:16:40 -08002289 nlmsg_cancel(skb, nlh);
2290 return -EMSGSIZE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002291}
2292
Patrick McHardy1b43af52006-08-10 23:11:17 -07002293int rt6_dump_route(struct rt6_info *rt, void *p_arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002294{
2295 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2296 int prefix;
2297
Thomas Graf2d7202b2006-08-22 00:01:27 -07002298 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2299 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002300 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2301 } else
2302 prefix = 0;
2303
Brian Haley191cd582008-08-14 15:33:21 -07002304 return rt6_fill_node(arg->net,
2305 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002306 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002307 prefix, 0, NLM_F_MULTI);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002308}
2309
Thomas Grafc127ea22007-03-22 11:58:32 -07002310static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002311{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09002312 struct net *net = sock_net(in_skb->sk);
Thomas Grafab364a62006-08-22 00:01:47 -07002313 struct nlattr *tb[RTA_MAX+1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07002314 struct rt6_info *rt;
Thomas Grafab364a62006-08-22 00:01:47 -07002315 struct sk_buff *skb;
2316 struct rtmsg *rtm;
2317 struct flowi fl;
2318 int err, iif = 0;
2319
2320 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2321 if (err < 0)
2322 goto errout;
2323
2324 err = -EINVAL;
2325 memset(&fl, 0, sizeof(fl));
2326
2327 if (tb[RTA_SRC]) {
2328 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2329 goto errout;
2330
2331 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2332 }
2333
2334 if (tb[RTA_DST]) {
2335 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2336 goto errout;
2337
2338 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2339 }
2340
2341 if (tb[RTA_IIF])
2342 iif = nla_get_u32(tb[RTA_IIF]);
2343
2344 if (tb[RTA_OIF])
2345 fl.oif = nla_get_u32(tb[RTA_OIF]);
2346
2347 if (iif) {
2348 struct net_device *dev;
Daniel Lezcano55786892008-03-04 13:47:47 -08002349 dev = __dev_get_by_index(net, iif);
Thomas Grafab364a62006-08-22 00:01:47 -07002350 if (!dev) {
2351 err = -ENODEV;
2352 goto errout;
2353 }
2354 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002355
2356 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
Thomas Grafab364a62006-08-22 00:01:47 -07002357 if (skb == NULL) {
2358 err = -ENOBUFS;
2359 goto errout;
2360 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002361
2362 /* Reserve room for dummy headers, this skb can pass
2363 through good chunk of routing engine.
2364 */
Arnaldo Carvalho de Melo459a98e2007-03-19 15:30:44 -07002365 skb_reset_mac_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002366 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2367
Daniel Lezcano8a3edd82008-03-07 11:14:16 -08002368 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl);
Eric Dumazetadf30902009-06-02 05:19:30 +00002369 skb_dst_set(skb, &rt->u.dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002370
Brian Haley191cd582008-08-14 15:33:21 -07002371 err = rt6_fill_node(net, skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002372 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002373 nlh->nlmsg_seq, 0, 0, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002374 if (err < 0) {
Thomas Grafab364a62006-08-22 00:01:47 -07002375 kfree_skb(skb);
2376 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002377 }
2378
Daniel Lezcano55786892008-03-04 13:47:47 -08002379 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
Thomas Grafab364a62006-08-22 00:01:47 -07002380errout:
Linus Torvalds1da177e2005-04-16 15:20:36 -07002381 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002382}
2383
Thomas Graf86872cb2006-08-22 00:01:08 -07002384void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002385{
2386 struct sk_buff *skb;
Daniel Lezcano55786892008-03-04 13:47:47 -08002387 struct net *net = info->nl_net;
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08002388 u32 seq;
2389 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002390
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08002391 err = -ENOBUFS;
2392 seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
Thomas Graf86872cb2006-08-22 00:01:08 -07002393
Thomas Graf339bf982006-11-10 14:10:15 -08002394 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
Thomas Graf21713eb2006-08-15 00:35:24 -07002395 if (skb == NULL)
2396 goto errout;
2397
Brian Haley191cd582008-08-14 15:33:21 -07002398 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002399 event, info->pid, seq, 0, 0, 0);
Patrick McHardy26932562007-01-31 23:16:40 -08002400 if (err < 0) {
2401 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2402 WARN_ON(err == -EMSGSIZE);
2403 kfree_skb(skb);
2404 goto errout;
2405 }
Pablo Neira Ayuso1ce85fe2009-02-24 23:18:28 -08002406 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2407 info->nlh, gfp_any());
2408 return;
Thomas Graf21713eb2006-08-15 00:35:24 -07002409errout:
2410 if (err < 0)
Daniel Lezcano55786892008-03-04 13:47:47 -08002411 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002412}
2413
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002414static int ip6_route_dev_notify(struct notifier_block *this,
2415 unsigned long event, void *data)
2416{
2417 struct net_device *dev = (struct net_device *)data;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09002418 struct net *net = dev_net(dev);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002419
2420 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2421 net->ipv6.ip6_null_entry->u.dst.dev = dev;
2422 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2423#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2424 net->ipv6.ip6_prohibit_entry->u.dst.dev = dev;
2425 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2426 net->ipv6.ip6_blk_hole_entry->u.dst.dev = dev;
2427 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2428#endif
2429 }
2430
2431 return NOTIFY_OK;
2432}
2433
Linus Torvalds1da177e2005-04-16 15:20:36 -07002434/*
2435 * /proc
2436 */
2437
2438#ifdef CONFIG_PROC_FS
2439
2440#define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2441
2442struct rt6_proc_arg
2443{
2444 char *buffer;
2445 int offset;
2446 int length;
2447 int skip;
2448 int len;
2449};
2450
2451static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2452{
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002453 struct seq_file *m = p_arg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002454
Harvey Harrison4b7a4272008-10-29 12:50:24 -07002455 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002456
2457#ifdef CONFIG_IPV6_SUBTREES
Harvey Harrison4b7a4272008-10-29 12:50:24 -07002458 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002459#else
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002460 seq_puts(m, "00000000000000000000000000000000 00 ");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002461#endif
2462
2463 if (rt->rt6i_nexthop) {
Harvey Harrison4b7a4272008-10-29 12:50:24 -07002464 seq_printf(m, "%pi6", rt->rt6i_nexthop->primary_key);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002465 } else {
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002466 seq_puts(m, "00000000000000000000000000000000");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002467 }
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002468 seq_printf(m, " %08x %08x %08x %08x %8s\n",
2469 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
2470 rt->u.dst.__use, rt->rt6i_flags,
2471 rt->rt6i_dev ? rt->rt6i_dev->name : "");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002472 return 0;
2473}
2474
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002475static int ipv6_route_show(struct seq_file *m, void *v)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002476{
Daniel Lezcanof3db4852008-03-03 23:27:06 -08002477 struct net *net = (struct net *)m->private;
2478 fib6_clean_all(net, rt6_info_route, 0, m);
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002479 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002480}
2481
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002482static int ipv6_route_open(struct inode *inode, struct file *file)
2483{
Pavel Emelyanovde05c552008-07-18 04:07:21 -07002484 return single_open_net(inode, file, ipv6_route_show);
Daniel Lezcanof3db4852008-03-03 23:27:06 -08002485}
2486
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002487static const struct file_operations ipv6_route_proc_fops = {
2488 .owner = THIS_MODULE,
2489 .open = ipv6_route_open,
2490 .read = seq_read,
2491 .llseek = seq_lseek,
Pavel Emelyanovb6fcbdb2008-07-18 04:07:44 -07002492 .release = single_release_net,
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002493};
2494
Linus Torvalds1da177e2005-04-16 15:20:36 -07002495static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2496{
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002497 struct net *net = (struct net *)seq->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002498 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002499 net->ipv6.rt6_stats->fib_nodes,
2500 net->ipv6.rt6_stats->fib_route_nodes,
2501 net->ipv6.rt6_stats->fib_rt_alloc,
2502 net->ipv6.rt6_stats->fib_rt_entries,
2503 net->ipv6.rt6_stats->fib_rt_cache,
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002504 atomic_read(&net->ipv6.ip6_dst_ops->entries),
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002505 net->ipv6.rt6_stats->fib_discarded_routes);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002506
2507 return 0;
2508}
2509
2510static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2511{
Pavel Emelyanovde05c552008-07-18 04:07:21 -07002512 return single_open_net(inode, file, rt6_stats_seq_show);
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002513}
2514
Arjan van de Ven9a321442007-02-12 00:55:35 -08002515static const struct file_operations rt6_stats_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002516 .owner = THIS_MODULE,
2517 .open = rt6_stats_seq_open,
2518 .read = seq_read,
2519 .llseek = seq_lseek,
Pavel Emelyanovb6fcbdb2008-07-18 04:07:44 -07002520 .release = single_release_net,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002521};
2522#endif /* CONFIG_PROC_FS */
2523
2524#ifdef CONFIG_SYSCTL
2525
Linus Torvalds1da177e2005-04-16 15:20:36 -07002526static
2527int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2528 void __user *buffer, size_t *lenp, loff_t *ppos)
2529{
Daniel Lezcano5b7c9312008-03-03 23:28:58 -08002530 struct net *net = current->nsproxy->net_ns;
2531 int delay = net->ipv6.sysctl.flush_delay;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002532 if (write) {
2533 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
Daniel Lezcano5b7c9312008-03-03 23:28:58 -08002534 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002535 return 0;
2536 } else
2537 return -EINVAL;
2538}
2539
Daniel Lezcano760f2d02008-01-10 02:53:43 -08002540ctl_table ipv6_route_table_template[] = {
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002541 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002542 .procname = "flush",
Daniel Lezcano49905092008-01-10 03:01:01 -08002543 .data = &init_net.ipv6.sysctl.flush_delay,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002544 .maxlen = sizeof(int),
Dave Jones89c8b3a12005-04-28 12:11:49 -07002545 .mode = 0200,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002546 .proc_handler = ipv6_sysctl_rtcache_flush
Linus Torvalds1da177e2005-04-16 15:20:36 -07002547 },
2548 {
2549 .ctl_name = NET_IPV6_ROUTE_GC_THRESH,
2550 .procname = "gc_thresh",
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -08002551 .data = &ip6_dst_ops_template.gc_thresh,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002552 .maxlen = sizeof(int),
2553 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002554 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002555 },
2556 {
2557 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE,
2558 .procname = "max_size",
Daniel Lezcano49905092008-01-10 03:01:01 -08002559 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002560 .maxlen = sizeof(int),
2561 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002562 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002563 },
2564 {
2565 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2566 .procname = "gc_min_interval",
Daniel Lezcano49905092008-01-10 03:01:01 -08002567 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002568 .maxlen = sizeof(int),
2569 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002570 .proc_handler = proc_dointvec_jiffies,
2571 .strategy = sysctl_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002572 },
2573 {
2574 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT,
2575 .procname = "gc_timeout",
Daniel Lezcano49905092008-01-10 03:01:01 -08002576 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002577 .maxlen = sizeof(int),
2578 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002579 .proc_handler = proc_dointvec_jiffies,
2580 .strategy = sysctl_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002581 },
2582 {
2583 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL,
2584 .procname = "gc_interval",
Daniel Lezcano49905092008-01-10 03:01:01 -08002585 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002586 .maxlen = sizeof(int),
2587 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002588 .proc_handler = proc_dointvec_jiffies,
2589 .strategy = sysctl_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002590 },
2591 {
2592 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY,
2593 .procname = "gc_elasticity",
Daniel Lezcano49905092008-01-10 03:01:01 -08002594 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002595 .maxlen = sizeof(int),
2596 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002597 .proc_handler = proc_dointvec_jiffies,
2598 .strategy = sysctl_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002599 },
2600 {
2601 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES,
2602 .procname = "mtu_expires",
Daniel Lezcano49905092008-01-10 03:01:01 -08002603 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002604 .maxlen = sizeof(int),
2605 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002606 .proc_handler = proc_dointvec_jiffies,
2607 .strategy = sysctl_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002608 },
2609 {
2610 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS,
2611 .procname = "min_adv_mss",
Daniel Lezcano49905092008-01-10 03:01:01 -08002612 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002613 .maxlen = sizeof(int),
2614 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002615 .proc_handler = proc_dointvec_jiffies,
2616 .strategy = sysctl_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002617 },
2618 {
2619 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2620 .procname = "gc_min_interval_ms",
Daniel Lezcano49905092008-01-10 03:01:01 -08002621 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002622 .maxlen = sizeof(int),
2623 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002624 .proc_handler = proc_dointvec_ms_jiffies,
2625 .strategy = sysctl_ms_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002626 },
2627 { .ctl_name = 0 }
2628};
2629
Daniel Lezcano760f2d02008-01-10 02:53:43 -08002630struct ctl_table *ipv6_route_sysctl_init(struct net *net)
2631{
2632 struct ctl_table *table;
2633
2634 table = kmemdup(ipv6_route_table_template,
2635 sizeof(ipv6_route_table_template),
2636 GFP_KERNEL);
YOSHIFUJI Hideaki5ee09102008-02-28 00:24:28 +09002637
2638 if (table) {
2639 table[0].data = &net->ipv6.sysctl.flush_delay;
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002640 table[1].data = &net->ipv6.ip6_dst_ops->gc_thresh;
YOSHIFUJI Hideaki5ee09102008-02-28 00:24:28 +09002641 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2642 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2643 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2644 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2645 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2646 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2647 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2648 }
2649
Daniel Lezcano760f2d02008-01-10 02:53:43 -08002650 return table;
2651}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002652#endif
2653
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002654static int ip6_route_net_init(struct net *net)
2655{
Pavel Emelyanov633d424b2008-04-21 14:25:23 -07002656 int ret = -ENOMEM;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002657
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002658 net->ipv6.ip6_dst_ops = kmemdup(&ip6_dst_ops_template,
2659 sizeof(*net->ipv6.ip6_dst_ops),
2660 GFP_KERNEL);
2661 if (!net->ipv6.ip6_dst_ops)
2662 goto out;
Denis V. Lunev48115be2008-04-16 02:01:34 -07002663 net->ipv6.ip6_dst_ops->dst_net = hold_net(net);
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002664
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002665 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2666 sizeof(*net->ipv6.ip6_null_entry),
2667 GFP_KERNEL);
2668 if (!net->ipv6.ip6_null_entry)
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002669 goto out_ip6_dst_ops;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002670 net->ipv6.ip6_null_entry->u.dst.path =
2671 (struct dst_entry *)net->ipv6.ip6_null_entry;
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002672 net->ipv6.ip6_null_entry->u.dst.ops = net->ipv6.ip6_dst_ops;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002673
2674#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2675 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2676 sizeof(*net->ipv6.ip6_prohibit_entry),
2677 GFP_KERNEL);
Peter Zijlstra68fffc62008-10-07 14:12:10 -07002678 if (!net->ipv6.ip6_prohibit_entry)
2679 goto out_ip6_null_entry;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002680 net->ipv6.ip6_prohibit_entry->u.dst.path =
2681 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002682 net->ipv6.ip6_prohibit_entry->u.dst.ops = net->ipv6.ip6_dst_ops;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002683
2684 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2685 sizeof(*net->ipv6.ip6_blk_hole_entry),
2686 GFP_KERNEL);
Peter Zijlstra68fffc62008-10-07 14:12:10 -07002687 if (!net->ipv6.ip6_blk_hole_entry)
2688 goto out_ip6_prohibit_entry;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002689 net->ipv6.ip6_blk_hole_entry->u.dst.path =
2690 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002691 net->ipv6.ip6_blk_hole_entry->u.dst.ops = net->ipv6.ip6_dst_ops;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002692#endif
2693
Peter Zijlstrab339a47c2008-10-07 14:15:00 -07002694 net->ipv6.sysctl.flush_delay = 0;
2695 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2696 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2697 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2698 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2699 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2700 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2701 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2702
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002703#ifdef CONFIG_PROC_FS
2704 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2705 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2706#endif
Benjamin Thery6891a342008-03-04 13:49:47 -08002707 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2708
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002709 ret = 0;
2710out:
2711 return ret;
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002712
Peter Zijlstra68fffc62008-10-07 14:12:10 -07002713#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2714out_ip6_prohibit_entry:
2715 kfree(net->ipv6.ip6_prohibit_entry);
2716out_ip6_null_entry:
2717 kfree(net->ipv6.ip6_null_entry);
2718#endif
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002719out_ip6_dst_ops:
Denis V. Lunev48115be2008-04-16 02:01:34 -07002720 release_net(net->ipv6.ip6_dst_ops->dst_net);
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002721 kfree(net->ipv6.ip6_dst_ops);
2722 goto out;
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002723}
2724
2725static void ip6_route_net_exit(struct net *net)
2726{
2727#ifdef CONFIG_PROC_FS
2728 proc_net_remove(net, "ipv6_route");
2729 proc_net_remove(net, "rt6_stats");
2730#endif
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002731 kfree(net->ipv6.ip6_null_entry);
2732#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2733 kfree(net->ipv6.ip6_prohibit_entry);
2734 kfree(net->ipv6.ip6_blk_hole_entry);
2735#endif
Denis V. Lunev48115be2008-04-16 02:01:34 -07002736 release_net(net->ipv6.ip6_dst_ops->dst_net);
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002737 kfree(net->ipv6.ip6_dst_ops);
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002738}
2739
2740static struct pernet_operations ip6_route_net_ops = {
2741 .init = ip6_route_net_init,
2742 .exit = ip6_route_net_exit,
2743};
2744
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002745static struct notifier_block ip6_route_dev_notifier = {
2746 .notifier_call = ip6_route_dev_notify,
2747 .priority = 0,
2748};
2749
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002750int __init ip6_route_init(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002751{
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002752 int ret;
2753
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -08002754 ret = -ENOMEM;
2755 ip6_dst_ops_template.kmem_cachep =
2756 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2757 SLAB_HWCACHE_ALIGN, NULL);
2758 if (!ip6_dst_ops_template.kmem_cachep)
Fernando Carrijoc19a28e2009-01-07 18:09:08 -08002759 goto out;
David S. Miller14e50e52007-05-24 18:17:54 -07002760
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002761 ret = register_pernet_subsys(&ip6_route_net_ops);
2762 if (ret)
Daniel Lezcanobdb32892008-03-04 13:48:10 -08002763 goto out_kmem_cache;
Daniel Lezcanobdb32892008-03-04 13:48:10 -08002764
Arnaud Ebalard5dc121e2008-10-01 02:37:56 -07002765 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2766
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002767 /* Registering of the loopback is done before this portion of code,
2768 * the loopback reference in rt6_info will not be taken, do it
2769 * manually for init_net */
2770 init_net.ipv6.ip6_null_entry->u.dst.dev = init_net.loopback_dev;
2771 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2772 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2773 init_net.ipv6.ip6_prohibit_entry->u.dst.dev = init_net.loopback_dev;
2774 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2775 init_net.ipv6.ip6_blk_hole_entry->u.dst.dev = init_net.loopback_dev;
2776 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2777 #endif
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002778 ret = fib6_init();
2779 if (ret)
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002780 goto out_register_subsys;
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002781
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002782 ret = xfrm6_init();
2783 if (ret)
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002784 goto out_fib6_init;
Daniel Lezcanoc35b7e72007-12-08 00:14:11 -08002785
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002786 ret = fib6_rules_init();
2787 if (ret)
2788 goto xfrm6_init;
Daniel Lezcano7e5449c2007-12-08 00:14:54 -08002789
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002790 ret = -ENOBUFS;
2791 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) ||
2792 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) ||
2793 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL))
2794 goto fib6_rules_init;
2795
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002796 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002797 if (ret)
2798 goto fib6_rules_init;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002799
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002800out:
2801 return ret;
2802
2803fib6_rules_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002804 fib6_rules_cleanup();
2805xfrm6_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002806 xfrm6_fini();
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002807out_fib6_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002808 fib6_gc_cleanup();
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002809out_register_subsys:
2810 unregister_pernet_subsys(&ip6_route_net_ops);
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002811out_kmem_cache:
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002812 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002813 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002814}
2815
2816void ip6_route_cleanup(void)
2817{
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002818 unregister_netdevice_notifier(&ip6_route_dev_notifier);
Thomas Graf101367c2006-08-04 03:39:02 -07002819 fib6_rules_cleanup();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002820 xfrm6_fini();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002821 fib6_gc_cleanup();
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002822 unregister_pernet_subsys(&ip6_route_net_ops);
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002823 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002824}