blob: 026caef0326caa90aee54a147d54014a20a0c039 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09006 * Pedro Roque <roque@di.fc.ul.pt>
Linus Torvalds1da177e2005-04-16 15:20:36 -07007 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07008 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
YOSHIFUJI Hideakic0bece92006-08-23 17:23:25 -070023 * Ville Nuorvala
24 * Fixed routing subtrees.
Linus Torvalds1da177e2005-04-16 15:20:36 -070025 */
26
Randy Dunlap4fc268d2006-01-11 12:17:47 -080027#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070028#include <linux/errno.h>
29#include <linux/types.h>
30#include <linux/times.h>
31#include <linux/socket.h>
32#include <linux/sockios.h>
33#include <linux/net.h>
34#include <linux/route.h>
35#include <linux/netdevice.h>
36#include <linux/in6.h>
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +090037#include <linux/mroute6.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070038#include <linux/init.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070039#include <linux/if_arp.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070040#include <linux/proc_fs.h>
41#include <linux/seq_file.h>
Daniel Lezcano5b7c9312008-03-03 23:28:58 -080042#include <linux/nsproxy.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090043#include <linux/slab.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020044#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070045#include <net/snmp.h>
46#include <net/ipv6.h>
47#include <net/ip6_fib.h>
48#include <net/ip6_route.h>
49#include <net/ndisc.h>
50#include <net/addrconf.h>
51#include <net/tcp.h>
52#include <linux/rtnetlink.h>
53#include <net/dst.h>
54#include <net/xfrm.h>
Tom Tucker8d717402006-07-30 20:43:36 -070055#include <net/netevent.h>
Thomas Graf21713eb2006-08-15 00:35:24 -070056#include <net/netlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070057
58#include <asm/uaccess.h>
59
60#ifdef CONFIG_SYSCTL
61#include <linux/sysctl.h>
62#endif
63
64/* Set to 3 to get tracing. */
65#define RT6_DEBUG 2
66
67#if RT6_DEBUG >= 3
68#define RDBG(x) printk x
69#define RT6_TRACE(x...) printk(KERN_DEBUG x)
70#else
71#define RDBG(x)
72#define RT6_TRACE(x...) do { ; } while (0)
73#endif
74
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -080075#define CLONE_OFFLINK_ROUTE 0
Linus Torvalds1da177e2005-04-16 15:20:36 -070076
Linus Torvalds1da177e2005-04-16 15:20:36 -070077static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
78static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
79static struct dst_entry *ip6_negative_advice(struct dst_entry *);
80static void ip6_dst_destroy(struct dst_entry *);
81static void ip6_dst_ifdown(struct dst_entry *,
82 struct net_device *dev, int how);
Daniel Lezcano569d3642008-01-18 03:56:57 -080083static int ip6_dst_gc(struct dst_ops *ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -070084
85static int ip6_pkt_discard(struct sk_buff *skb);
86static int ip6_pkt_discard_out(struct sk_buff *skb);
87static void ip6_link_failure(struct sk_buff *skb);
88static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
89
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -080090#ifdef CONFIG_IPV6_ROUTE_INFO
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -080091static struct rt6_info *rt6_add_route_info(struct net *net,
92 struct in6_addr *prefix, int prefixlen,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -080093 struct in6_addr *gwaddr, int ifindex,
94 unsigned pref);
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -080095static struct rt6_info *rt6_get_route_info(struct net *net,
96 struct in6_addr *prefix, int prefixlen,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -080097 struct in6_addr *gwaddr, int ifindex);
98#endif
99
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -0800100static struct dst_ops ip6_dst_ops_template = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700101 .family = AF_INET6,
Harvey Harrison09640e62009-02-01 00:45:17 -0800102 .protocol = cpu_to_be16(ETH_P_IPV6),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700103 .gc = ip6_dst_gc,
104 .gc_thresh = 1024,
105 .check = ip6_dst_check,
106 .destroy = ip6_dst_destroy,
107 .ifdown = ip6_dst_ifdown,
108 .negative_advice = ip6_negative_advice,
109 .link_failure = ip6_link_failure,
110 .update_pmtu = ip6_rt_update_pmtu,
Herbert Xu1ac06e02008-05-20 14:32:14 -0700111 .local_out = __ip6_local_out,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700112};
113
David S. Miller14e50e52007-05-24 18:17:54 -0700114static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
115{
116}
117
118static struct dst_ops ip6_dst_blackhole_ops = {
119 .family = AF_INET6,
Harvey Harrison09640e62009-02-01 00:45:17 -0800120 .protocol = cpu_to_be16(ETH_P_IPV6),
David S. Miller14e50e52007-05-24 18:17:54 -0700121 .destroy = ip6_dst_destroy,
122 .check = ip6_dst_check,
123 .update_pmtu = ip6_rt_blackhole_update_pmtu,
David S. Miller14e50e52007-05-24 18:17:54 -0700124};
125
Daniel Lezcanobdb32892008-03-04 13:48:10 -0800126static struct rt6_info ip6_null_entry_template = {
Changli Gaod8d1f302010-06-10 23:31:35 -0700127 .dst = {
128 .__refcnt = ATOMIC_INIT(1),
129 .__use = 1,
130 .obsolete = -1,
131 .error = -ENETUNREACH,
132 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
133 .input = ip6_pkt_discard,
134 .output = ip6_pkt_discard_out,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700135 },
136 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
Jean-Mickael Guerin4f724272009-05-20 17:38:59 -0700137 .rt6i_protocol = RTPROT_KERNEL,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700138 .rt6i_metric = ~(u32) 0,
139 .rt6i_ref = ATOMIC_INIT(1),
140};
141
Thomas Graf101367c2006-08-04 03:39:02 -0700142#ifdef CONFIG_IPV6_MULTIPLE_TABLES
143
David S. Miller6723ab52006-10-18 21:20:57 -0700144static int ip6_pkt_prohibit(struct sk_buff *skb);
145static int ip6_pkt_prohibit_out(struct sk_buff *skb);
David S. Miller6723ab52006-10-18 21:20:57 -0700146
Adrian Bunk280a34c2008-04-21 02:29:32 -0700147static struct rt6_info ip6_prohibit_entry_template = {
Changli Gaod8d1f302010-06-10 23:31:35 -0700148 .dst = {
149 .__refcnt = ATOMIC_INIT(1),
150 .__use = 1,
151 .obsolete = -1,
152 .error = -EACCES,
153 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
154 .input = ip6_pkt_prohibit,
155 .output = ip6_pkt_prohibit_out,
Thomas Graf101367c2006-08-04 03:39:02 -0700156 },
157 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
Jean-Mickael Guerin4f724272009-05-20 17:38:59 -0700158 .rt6i_protocol = RTPROT_KERNEL,
Thomas Graf101367c2006-08-04 03:39:02 -0700159 .rt6i_metric = ~(u32) 0,
160 .rt6i_ref = ATOMIC_INIT(1),
161};
162
Daniel Lezcanobdb32892008-03-04 13:48:10 -0800163static struct rt6_info ip6_blk_hole_entry_template = {
Changli Gaod8d1f302010-06-10 23:31:35 -0700164 .dst = {
165 .__refcnt = ATOMIC_INIT(1),
166 .__use = 1,
167 .obsolete = -1,
168 .error = -EINVAL,
169 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
170 .input = dst_discard,
171 .output = dst_discard,
Thomas Graf101367c2006-08-04 03:39:02 -0700172 },
173 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
Jean-Mickael Guerin4f724272009-05-20 17:38:59 -0700174 .rt6i_protocol = RTPROT_KERNEL,
Thomas Graf101367c2006-08-04 03:39:02 -0700175 .rt6i_metric = ~(u32) 0,
176 .rt6i_ref = ATOMIC_INIT(1),
177};
178
179#endif
180
Linus Torvalds1da177e2005-04-16 15:20:36 -0700181/* allocate dst with ip6_dst_ops */
Benjamin Theryf2fc6a52008-03-04 13:49:23 -0800182static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700183{
Benjamin Theryf2fc6a52008-03-04 13:49:23 -0800184 return (struct rt6_info *)dst_alloc(ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700185}
186
187static void ip6_dst_destroy(struct dst_entry *dst)
188{
189 struct rt6_info *rt = (struct rt6_info *)dst;
190 struct inet6_dev *idev = rt->rt6i_idev;
David S. Millerb3419362010-11-30 12:27:11 -0800191 struct inet_peer *peer = rt->rt6i_peer;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700192
193 if (idev != NULL) {
194 rt->rt6i_idev = NULL;
195 in6_dev_put(idev);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900196 }
David S. Millerb3419362010-11-30 12:27:11 -0800197 if (peer) {
198 BUG_ON(!(rt->rt6i_flags & RTF_CACHE));
199 rt->rt6i_peer = NULL;
200 inet_putpeer(peer);
201 }
202}
203
204void rt6_bind_peer(struct rt6_info *rt, int create)
205{
206 struct inet_peer *peer;
207
208 if (WARN_ON(!(rt->rt6i_flags & RTF_CACHE)))
209 return;
210
211 peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
212 if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
213 inet_putpeer(peer);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700214}
215
216static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
217 int how)
218{
219 struct rt6_info *rt = (struct rt6_info *)dst;
220 struct inet6_dev *idev = rt->rt6i_idev;
Denis V. Lunev5a3e55d2007-12-07 00:38:10 -0800221 struct net_device *loopback_dev =
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900222 dev_net(dev)->loopback_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700223
Denis V. Lunev5a3e55d2007-12-07 00:38:10 -0800224 if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
225 struct inet6_dev *loopback_idev =
226 in6_dev_get(loopback_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700227 if (loopback_idev != NULL) {
228 rt->rt6i_idev = loopback_idev;
229 in6_dev_put(idev);
230 }
231 }
232}
233
234static __inline__ int rt6_check_expired(const struct rt6_info *rt)
235{
Eric Dumazeta02cec22010-09-22 20:43:57 +0000236 return (rt->rt6i_flags & RTF_EXPIRES) &&
237 time_after(jiffies, rt->rt6i_expires);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700238}
239
Thomas Grafc71099a2006-08-04 23:20:06 -0700240static inline int rt6_need_strict(struct in6_addr *daddr)
241{
Eric Dumazeta02cec22010-09-22 20:43:57 +0000242 return ipv6_addr_type(daddr) &
243 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
Thomas Grafc71099a2006-08-04 23:20:06 -0700244}
245
Linus Torvalds1da177e2005-04-16 15:20:36 -0700246/*
Thomas Grafc71099a2006-08-04 23:20:06 -0700247 * Route lookup. Any table->tb6_lock is implied.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700248 */
249
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800250static inline struct rt6_info *rt6_device_match(struct net *net,
251 struct rt6_info *rt,
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900252 struct in6_addr *saddr,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700253 int oif,
YOSHIFUJI Hideakid4208952008-06-27 20:14:54 -0700254 int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700255{
256 struct rt6_info *local = NULL;
257 struct rt6_info *sprt;
258
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900259 if (!oif && ipv6_addr_any(saddr))
260 goto out;
261
Changli Gaod8d1f302010-06-10 23:31:35 -0700262 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900263 struct net_device *dev = sprt->rt6i_dev;
264
265 if (oif) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700266 if (dev->ifindex == oif)
267 return sprt;
268 if (dev->flags & IFF_LOOPBACK) {
269 if (sprt->rt6i_idev == NULL ||
270 sprt->rt6i_idev->dev->ifindex != oif) {
YOSHIFUJI Hideakid4208952008-06-27 20:14:54 -0700271 if (flags & RT6_LOOKUP_F_IFACE && oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700272 continue;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900273 if (local && (!oif ||
Linus Torvalds1da177e2005-04-16 15:20:36 -0700274 local->rt6i_idev->dev->ifindex == oif))
275 continue;
276 }
277 local = sprt;
278 }
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900279 } else {
280 if (ipv6_chk_addr(net, saddr, dev,
281 flags & RT6_LOOKUP_F_IFACE))
282 return sprt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700283 }
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900284 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700285
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900286 if (oif) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700287 if (local)
288 return local;
289
YOSHIFUJI Hideakid4208952008-06-27 20:14:54 -0700290 if (flags & RT6_LOOKUP_F_IFACE)
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800291 return net->ipv6.ip6_null_entry;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700292 }
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900293out:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700294 return rt;
295}
296
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800297#ifdef CONFIG_IPV6_ROUTER_PREF
298static void rt6_probe(struct rt6_info *rt)
299{
300 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
301 /*
302 * Okay, this does not seem to be appropriate
303 * for now, however, we need to check if it
304 * is really so; aka Router Reachability Probing.
305 *
306 * Router Reachability Probe MUST be rate-limited
307 * to no more than one per minute.
308 */
309 if (!neigh || (neigh->nud_state & NUD_VALID))
310 return;
311 read_lock_bh(&neigh->lock);
312 if (!(neigh->nud_state & NUD_VALID) &&
YOSHIFUJI Hideaki52e16352006-03-20 17:05:47 -0800313 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800314 struct in6_addr mcaddr;
315 struct in6_addr *target;
316
317 neigh->updated = jiffies;
318 read_unlock_bh(&neigh->lock);
319
320 target = (struct in6_addr *)&neigh->primary_key;
321 addrconf_addr_solict_mult(target, &mcaddr);
322 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
323 } else
324 read_unlock_bh(&neigh->lock);
325}
326#else
327static inline void rt6_probe(struct rt6_info *rt)
328{
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800329}
330#endif
331
Linus Torvalds1da177e2005-04-16 15:20:36 -0700332/*
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800333 * Default Router Selection (RFC 2461 6.3.6)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700334 */
Dave Jonesb6f99a22007-03-22 12:27:49 -0700335static inline int rt6_check_dev(struct rt6_info *rt, int oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700336{
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800337 struct net_device *dev = rt->rt6i_dev;
David S. Miller161980f2007-04-06 11:42:27 -0700338 if (!oif || dev->ifindex == oif)
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800339 return 2;
David S. Miller161980f2007-04-06 11:42:27 -0700340 if ((dev->flags & IFF_LOOPBACK) &&
341 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
342 return 1;
343 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700344}
345
Dave Jonesb6f99a22007-03-22 12:27:49 -0700346static inline int rt6_check_neigh(struct rt6_info *rt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700347{
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800348 struct neighbour *neigh = rt->rt6i_nexthop;
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800349 int m;
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700350 if (rt->rt6i_flags & RTF_NONEXTHOP ||
351 !(rt->rt6i_flags & RTF_GATEWAY))
352 m = 1;
353 else if (neigh) {
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800354 read_lock_bh(&neigh->lock);
355 if (neigh->nud_state & NUD_VALID)
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700356 m = 2;
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800357#ifdef CONFIG_IPV6_ROUTER_PREF
358 else if (neigh->nud_state & NUD_FAILED)
359 m = 0;
360#endif
361 else
YOSHIFUJI Hideakiea73ee22006-11-06 09:45:44 -0800362 m = 1;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800363 read_unlock_bh(&neigh->lock);
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800364 } else
365 m = 0;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800366 return m;
367}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700368
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800369static int rt6_score_route(struct rt6_info *rt, int oif,
370 int strict)
371{
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700372 int m, n;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900373
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700374 m = rt6_check_dev(rt, oif);
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700375 if (!m && (strict & RT6_LOOKUP_F_IFACE))
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800376 return -1;
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -0800377#ifdef CONFIG_IPV6_ROUTER_PREF
378 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
379#endif
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700380 n = rt6_check_neigh(rt);
YOSHIFUJI Hideaki557e92e2006-11-06 09:45:45 -0800381 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800382 return -1;
383 return m;
384}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700385
David S. Millerf11e6652007-03-24 20:36:25 -0700386static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
387 int *mpri, struct rt6_info *match)
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800388{
David S. Millerf11e6652007-03-24 20:36:25 -0700389 int m;
390
391 if (rt6_check_expired(rt))
392 goto out;
393
394 m = rt6_score_route(rt, oif, strict);
395 if (m < 0)
396 goto out;
397
398 if (m > *mpri) {
399 if (strict & RT6_LOOKUP_F_REACHABLE)
400 rt6_probe(match);
401 *mpri = m;
402 match = rt;
403 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
404 rt6_probe(rt);
405 }
406
407out:
408 return match;
409}
410
411static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
412 struct rt6_info *rr_head,
413 u32 metric, int oif, int strict)
414{
415 struct rt6_info *rt, *match;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800416 int mpri = -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700417
David S. Millerf11e6652007-03-24 20:36:25 -0700418 match = NULL;
419 for (rt = rr_head; rt && rt->rt6i_metric == metric;
Changli Gaod8d1f302010-06-10 23:31:35 -0700420 rt = rt->dst.rt6_next)
David S. Millerf11e6652007-03-24 20:36:25 -0700421 match = find_match(rt, oif, strict, &mpri, match);
422 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
Changli Gaod8d1f302010-06-10 23:31:35 -0700423 rt = rt->dst.rt6_next)
David S. Millerf11e6652007-03-24 20:36:25 -0700424 match = find_match(rt, oif, strict, &mpri, match);
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800425
David S. Millerf11e6652007-03-24 20:36:25 -0700426 return match;
427}
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800428
David S. Millerf11e6652007-03-24 20:36:25 -0700429static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
430{
431 struct rt6_info *match, *rt0;
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800432 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700433
David S. Millerf11e6652007-03-24 20:36:25 -0700434 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
Harvey Harrison0dc47872008-03-05 20:47:47 -0800435 __func__, fn->leaf, oif);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700436
David S. Millerf11e6652007-03-24 20:36:25 -0700437 rt0 = fn->rr_ptr;
438 if (!rt0)
439 fn->rr_ptr = rt0 = fn->leaf;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700440
David S. Millerf11e6652007-03-24 20:36:25 -0700441 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700442
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800443 if (!match &&
David S. Millerf11e6652007-03-24 20:36:25 -0700444 (strict & RT6_LOOKUP_F_REACHABLE)) {
Changli Gaod8d1f302010-06-10 23:31:35 -0700445 struct rt6_info *next = rt0->dst.rt6_next;
David S. Millerf11e6652007-03-24 20:36:25 -0700446
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800447 /* no entries matched; do round-robin */
David S. Millerf11e6652007-03-24 20:36:25 -0700448 if (!next || next->rt6i_metric != rt0->rt6i_metric)
449 next = fn->leaf;
450
451 if (next != rt0)
452 fn->rr_ptr = next;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700453 }
454
David S. Millerf11e6652007-03-24 20:36:25 -0700455 RT6_TRACE("%s() => %p\n",
Harvey Harrison0dc47872008-03-05 20:47:47 -0800456 __func__, match);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700457
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900458 net = dev_net(rt0->rt6i_dev);
Eric Dumazeta02cec22010-09-22 20:43:57 +0000459 return match ? match : net->ipv6.ip6_null_entry;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700460}
461
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800462#ifdef CONFIG_IPV6_ROUTE_INFO
463int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
464 struct in6_addr *gwaddr)
465{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900466 struct net *net = dev_net(dev);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800467 struct route_info *rinfo = (struct route_info *) opt;
468 struct in6_addr prefix_buf, *prefix;
469 unsigned int pref;
YOSHIFUJI Hideaki4bed72e2008-05-27 17:37:49 +0900470 unsigned long lifetime;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800471 struct rt6_info *rt;
472
473 if (len < sizeof(struct route_info)) {
474 return -EINVAL;
475 }
476
477 /* Sanity check for prefix_len and length */
478 if (rinfo->length > 3) {
479 return -EINVAL;
480 } else if (rinfo->prefix_len > 128) {
481 return -EINVAL;
482 } else if (rinfo->prefix_len > 64) {
483 if (rinfo->length < 2) {
484 return -EINVAL;
485 }
486 } else if (rinfo->prefix_len > 0) {
487 if (rinfo->length < 1) {
488 return -EINVAL;
489 }
490 }
491
492 pref = rinfo->route_pref;
493 if (pref == ICMPV6_ROUTER_PREF_INVALID)
Jens Rosenboom3933fc92009-09-10 06:25:11 +0000494 return -EINVAL;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800495
YOSHIFUJI Hideaki4bed72e2008-05-27 17:37:49 +0900496 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800497
498 if (rinfo->length == 3)
499 prefix = (struct in6_addr *)rinfo->prefix;
500 else {
501 /* this function is safe */
502 ipv6_addr_prefix(&prefix_buf,
503 (struct in6_addr *)rinfo->prefix,
504 rinfo->prefix_len);
505 prefix = &prefix_buf;
506 }
507
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -0800508 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
509 dev->ifindex);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800510
511 if (rt && !lifetime) {
Thomas Grafe0a1ad732006-08-22 00:00:21 -0700512 ip6_del_rt(rt);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800513 rt = NULL;
514 }
515
516 if (!rt && lifetime)
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -0800517 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800518 pref);
519 else if (rt)
520 rt->rt6i_flags = RTF_ROUTEINFO |
521 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
522
523 if (rt) {
YOSHIFUJI Hideaki4bed72e2008-05-27 17:37:49 +0900524 if (!addrconf_finite_timeout(lifetime)) {
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800525 rt->rt6i_flags &= ~RTF_EXPIRES;
526 } else {
527 rt->rt6i_expires = jiffies + HZ * lifetime;
528 rt->rt6i_flags |= RTF_EXPIRES;
529 }
Changli Gaod8d1f302010-06-10 23:31:35 -0700530 dst_release(&rt->dst);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800531 }
532 return 0;
533}
534#endif
535
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800536#define BACKTRACK(__net, saddr) \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700537do { \
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800538 if (rt == __net->ipv6.ip6_null_entry) { \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700539 struct fib6_node *pn; \
Ville Nuorvalae0eda7b2006-10-16 22:11:11 -0700540 while (1) { \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700541 if (fn->fn_flags & RTN_TL_ROOT) \
542 goto out; \
543 pn = fn->parent; \
544 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
Kim Nordlund8bce65b2006-12-13 16:38:29 -0800545 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700546 else \
547 fn = pn; \
548 if (fn->fn_flags & RTN_RTINFO) \
549 goto restart; \
Thomas Grafc71099a2006-08-04 23:20:06 -0700550 } \
Thomas Grafc71099a2006-08-04 23:20:06 -0700551 } \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700552} while(0)
Thomas Grafc71099a2006-08-04 23:20:06 -0700553
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800554static struct rt6_info *ip6_pol_route_lookup(struct net *net,
555 struct fib6_table *table,
Thomas Grafc71099a2006-08-04 23:20:06 -0700556 struct flowi *fl, int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700557{
558 struct fib6_node *fn;
559 struct rt6_info *rt;
560
Thomas Grafc71099a2006-08-04 23:20:06 -0700561 read_lock_bh(&table->tb6_lock);
562 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
563restart:
564 rt = fn->leaf;
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900565 rt = rt6_device_match(net, rt, &fl->fl6_src, fl->oif, flags);
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800566 BACKTRACK(net, &fl->fl6_src);
Thomas Grafc71099a2006-08-04 23:20:06 -0700567out:
Changli Gaod8d1f302010-06-10 23:31:35 -0700568 dst_use(&rt->dst, jiffies);
Thomas Grafc71099a2006-08-04 23:20:06 -0700569 read_unlock_bh(&table->tb6_lock);
Thomas Grafc71099a2006-08-04 23:20:06 -0700570 return rt;
571
572}
573
YOSHIFUJI Hideaki9acd9f32008-04-10 15:42:10 +0900574struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
575 const struct in6_addr *saddr, int oif, int strict)
Thomas Grafc71099a2006-08-04 23:20:06 -0700576{
577 struct flowi fl = {
578 .oif = oif,
Changli Gao58116622010-11-12 18:43:55 +0000579 .fl6_dst = *daddr,
Thomas Grafc71099a2006-08-04 23:20:06 -0700580 };
581 struct dst_entry *dst;
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700582 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
Thomas Grafc71099a2006-08-04 23:20:06 -0700583
Thomas Grafadaa70b2006-10-13 15:01:03 -0700584 if (saddr) {
585 memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
586 flags |= RT6_LOOKUP_F_HAS_SADDR;
587 }
588
Daniel Lezcano606a2b42008-03-04 13:45:59 -0800589 dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_lookup);
Thomas Grafc71099a2006-08-04 23:20:06 -0700590 if (dst->error == 0)
591 return (struct rt6_info *) dst;
592
593 dst_release(dst);
594
Linus Torvalds1da177e2005-04-16 15:20:36 -0700595 return NULL;
596}
597
YOSHIFUJI Hideaki71590392007-02-22 22:05:40 +0900598EXPORT_SYMBOL(rt6_lookup);
599
Thomas Grafc71099a2006-08-04 23:20:06 -0700600/* ip6_ins_rt is called with FREE table->tb6_lock.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700601 It takes new route entry, the addition fails by any reason the
602 route is freed. In any case, if caller does not hold it, it may
603 be destroyed.
604 */
605
Thomas Graf86872cb2006-08-22 00:01:08 -0700606static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700607{
608 int err;
Thomas Grafc71099a2006-08-04 23:20:06 -0700609 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700610
Thomas Grafc71099a2006-08-04 23:20:06 -0700611 table = rt->rt6i_table;
612 write_lock_bh(&table->tb6_lock);
Thomas Graf86872cb2006-08-22 00:01:08 -0700613 err = fib6_add(&table->tb6_root, rt, info);
Thomas Grafc71099a2006-08-04 23:20:06 -0700614 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700615
616 return err;
617}
618
Thomas Graf40e22e82006-08-22 00:00:45 -0700619int ip6_ins_rt(struct rt6_info *rt)
620{
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800621 struct nl_info info = {
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900622 .nl_net = dev_net(rt->rt6i_dev),
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800623 };
Denis V. Lunev528c4ce2007-12-13 09:45:12 -0800624 return __ip6_ins_rt(rt, &info);
Thomas Graf40e22e82006-08-22 00:00:45 -0700625}
626
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800627static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
628 struct in6_addr *saddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700629{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700630 struct rt6_info *rt;
631
632 /*
633 * Clone the route.
634 */
635
636 rt = ip6_rt_copy(ort);
637
638 if (rt) {
David S. Miller14deae42009-01-04 16:04:39 -0800639 struct neighbour *neigh;
640 int attempts = !in_softirq();
641
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900642 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
643 if (rt->rt6i_dst.plen != 128 &&
644 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
645 rt->rt6i_flags |= RTF_ANYCAST;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700646 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900647 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700648
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900649 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700650 rt->rt6i_dst.plen = 128;
651 rt->rt6i_flags |= RTF_CACHE;
Changli Gaod8d1f302010-06-10 23:31:35 -0700652 rt->dst.flags |= DST_HOST;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700653
654#ifdef CONFIG_IPV6_SUBTREES
655 if (rt->rt6i_src.plen && saddr) {
656 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
657 rt->rt6i_src.plen = 128;
658 }
659#endif
660
David S. Miller14deae42009-01-04 16:04:39 -0800661 retry:
662 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
663 if (IS_ERR(neigh)) {
664 struct net *net = dev_net(rt->rt6i_dev);
665 int saved_rt_min_interval =
666 net->ipv6.sysctl.ip6_rt_gc_min_interval;
667 int saved_rt_elasticity =
668 net->ipv6.sysctl.ip6_rt_gc_elasticity;
669
670 if (attempts-- > 0) {
671 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
672 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
673
Alexey Dobriyan86393e52009-08-29 01:34:49 +0000674 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
David S. Miller14deae42009-01-04 16:04:39 -0800675
676 net->ipv6.sysctl.ip6_rt_gc_elasticity =
677 saved_rt_elasticity;
678 net->ipv6.sysctl.ip6_rt_gc_min_interval =
679 saved_rt_min_interval;
680 goto retry;
681 }
682
683 if (net_ratelimit())
684 printk(KERN_WARNING
Ulrich Weber7e1b33e2010-09-27 15:02:18 -0700685 "ipv6: Neighbour table overflow.\n");
Changli Gaod8d1f302010-06-10 23:31:35 -0700686 dst_free(&rt->dst);
David S. Miller14deae42009-01-04 16:04:39 -0800687 return NULL;
688 }
689 rt->rt6i_nexthop = neigh;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700690
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800691 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700692
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800693 return rt;
694}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700695
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800696static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
697{
698 struct rt6_info *rt = ip6_rt_copy(ort);
699 if (rt) {
700 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
701 rt->rt6i_dst.plen = 128;
702 rt->rt6i_flags |= RTF_CACHE;
Changli Gaod8d1f302010-06-10 23:31:35 -0700703 rt->dst.flags |= DST_HOST;
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800704 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
705 }
706 return rt;
707}
708
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800709static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
710 struct flowi *fl, int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700711{
712 struct fib6_node *fn;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800713 struct rt6_info *rt, *nrt;
Thomas Grafc71099a2006-08-04 23:20:06 -0700714 int strict = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700715 int attempts = 3;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800716 int err;
YOSHIFUJI Hideaki53b79972008-07-19 22:35:03 -0700717 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700718
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700719 strict |= flags & RT6_LOOKUP_F_IFACE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700720
721relookup:
Thomas Grafc71099a2006-08-04 23:20:06 -0700722 read_lock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700723
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800724restart_2:
Thomas Grafc71099a2006-08-04 23:20:06 -0700725 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700726
727restart:
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700728 rt = rt6_select(fn, oif, strict | reachable);
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800729
730 BACKTRACK(net, &fl->fl6_src);
731 if (rt == net->ipv6.ip6_null_entry ||
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800732 rt->rt6i_flags & RTF_CACHE)
YOSHIFUJI Hideaki1ddef0442006-03-20 17:01:24 -0800733 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700734
Changli Gaod8d1f302010-06-10 23:31:35 -0700735 dst_hold(&rt->dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700736 read_unlock_bh(&table->tb6_lock);
YOSHIFUJI Hideakifb9de912006-03-20 16:59:08 -0800737
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800738 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
YOSHIFUJI Hideakie40cf352006-03-20 16:59:27 -0800739 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800740 else {
741#if CLONE_OFFLINK_ROUTE
742 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
743#else
744 goto out2;
745#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700746 }
YOSHIFUJI Hideakie40cf352006-03-20 16:59:27 -0800747
Changli Gaod8d1f302010-06-10 23:31:35 -0700748 dst_release(&rt->dst);
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800749 rt = nrt ? : net->ipv6.ip6_null_entry;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800750
Changli Gaod8d1f302010-06-10 23:31:35 -0700751 dst_hold(&rt->dst);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800752 if (nrt) {
Thomas Graf40e22e82006-08-22 00:00:45 -0700753 err = ip6_ins_rt(nrt);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800754 if (!err)
755 goto out2;
756 }
757
758 if (--attempts <= 0)
759 goto out2;
760
761 /*
Thomas Grafc71099a2006-08-04 23:20:06 -0700762 * Race condition! In the gap, when table->tb6_lock was
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800763 * released someone could insert this route. Relookup.
764 */
Changli Gaod8d1f302010-06-10 23:31:35 -0700765 dst_release(&rt->dst);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800766 goto relookup;
767
768out:
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800769 if (reachable) {
770 reachable = 0;
771 goto restart_2;
772 }
Changli Gaod8d1f302010-06-10 23:31:35 -0700773 dst_hold(&rt->dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700774 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700775out2:
Changli Gaod8d1f302010-06-10 23:31:35 -0700776 rt->dst.lastuse = jiffies;
777 rt->dst.__use++;
Thomas Grafc71099a2006-08-04 23:20:06 -0700778
779 return rt;
780}
781
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800782static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700783 struct flowi *fl, int flags)
784{
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800785 return ip6_pol_route(net, table, fl->iif, fl, flags);
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700786}
787
Thomas Grafc71099a2006-08-04 23:20:06 -0700788void ip6_route_input(struct sk_buff *skb)
789{
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -0700790 struct ipv6hdr *iph = ipv6_hdr(skb);
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900791 struct net *net = dev_net(skb->dev);
Thomas Grafadaa70b2006-10-13 15:01:03 -0700792 int flags = RT6_LOOKUP_F_HAS_SADDR;
Thomas Grafc71099a2006-08-04 23:20:06 -0700793 struct flowi fl = {
794 .iif = skb->dev->ifindex,
Changli Gao58116622010-11-12 18:43:55 +0000795 .fl6_dst = iph->daddr,
796 .fl6_src = iph->saddr,
797 .fl6_flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900798 .mark = skb->mark,
Thomas Grafc71099a2006-08-04 23:20:06 -0700799 .proto = iph->nexthdr,
800 };
Thomas Grafadaa70b2006-10-13 15:01:03 -0700801
Thomas Goff1d6e55f2009-01-27 22:39:59 -0800802 if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
Thomas Grafadaa70b2006-10-13 15:01:03 -0700803 flags |= RT6_LOOKUP_F_IFACE;
Thomas Grafc71099a2006-08-04 23:20:06 -0700804
Eric Dumazetadf30902009-06-02 05:19:30 +0000805 skb_dst_set(skb, fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input));
Thomas Grafc71099a2006-08-04 23:20:06 -0700806}
807
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800808static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
Thomas Grafc71099a2006-08-04 23:20:06 -0700809 struct flowi *fl, int flags)
810{
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800811 return ip6_pol_route(net, table, fl->oif, fl, flags);
Thomas Grafc71099a2006-08-04 23:20:06 -0700812}
813
Daniel Lezcano4591db42008-03-05 10:48:10 -0800814struct dst_entry * ip6_route_output(struct net *net, struct sock *sk,
815 struct flowi *fl)
Thomas Grafc71099a2006-08-04 23:20:06 -0700816{
817 int flags = 0;
818
Brian Haley6057fd72010-05-28 23:02:35 -0700819 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl->fl6_dst))
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700820 flags |= RT6_LOOKUP_F_IFACE;
Thomas Grafc71099a2006-08-04 23:20:06 -0700821
Thomas Grafadaa70b2006-10-13 15:01:03 -0700822 if (!ipv6_addr_any(&fl->fl6_src))
823 flags |= RT6_LOOKUP_F_HAS_SADDR;
YOSHIFUJI Hideaki / 吉藤英明0c9a2ac2010-03-07 00:14:44 +0000824 else if (sk)
825 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
Thomas Grafadaa70b2006-10-13 15:01:03 -0700826
Daniel Lezcano4591db42008-03-05 10:48:10 -0800827 return fib6_rule_lookup(net, fl, flags, ip6_pol_route_output);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700828}
829
YOSHIFUJI Hideaki71590392007-02-22 22:05:40 +0900830EXPORT_SYMBOL(ip6_route_output);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700831
David S. Miller14e50e52007-05-24 18:17:54 -0700832int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
833{
834 struct rt6_info *ort = (struct rt6_info *) *dstp;
835 struct rt6_info *rt = (struct rt6_info *)
836 dst_alloc(&ip6_dst_blackhole_ops);
837 struct dst_entry *new = NULL;
838
839 if (rt) {
Changli Gaod8d1f302010-06-10 23:31:35 -0700840 new = &rt->dst;
David S. Miller14e50e52007-05-24 18:17:54 -0700841
842 atomic_set(&new->__refcnt, 1);
843 new->__use = 1;
Herbert Xu352e5122007-11-13 21:34:06 -0800844 new->input = dst_discard;
845 new->output = dst_discard;
David S. Miller14e50e52007-05-24 18:17:54 -0700846
Changli Gaod8d1f302010-06-10 23:31:35 -0700847 memcpy(new->metrics, ort->dst.metrics, RTAX_MAX*sizeof(u32));
848 new->dev = ort->dst.dev;
David S. Miller14e50e52007-05-24 18:17:54 -0700849 if (new->dev)
850 dev_hold(new->dev);
851 rt->rt6i_idev = ort->rt6i_idev;
852 if (rt->rt6i_idev)
853 in6_dev_hold(rt->rt6i_idev);
854 rt->rt6i_expires = 0;
855
856 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
857 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
858 rt->rt6i_metric = 0;
859
860 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
861#ifdef CONFIG_IPV6_SUBTREES
862 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
863#endif
864
865 dst_free(new);
866 }
867
868 dst_release(*dstp);
869 *dstp = new;
Eric Dumazeta02cec22010-09-22 20:43:57 +0000870 return new ? 0 : -ENOMEM;
David S. Miller14e50e52007-05-24 18:17:54 -0700871}
872EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
873
Linus Torvalds1da177e2005-04-16 15:20:36 -0700874/*
875 * Destination cache support functions
876 */
877
878static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
879{
880 struct rt6_info *rt;
881
882 rt = (struct rt6_info *) dst;
883
Herbert Xu10414442010-03-18 23:00:22 +0000884 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700885 return dst;
886
887 return NULL;
888}
889
890static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
891{
892 struct rt6_info *rt = (struct rt6_info *) dst;
893
894 if (rt) {
YOSHIFUJI Hideaki / 吉藤英明54c1a852010-03-28 07:15:45 +0000895 if (rt->rt6i_flags & RTF_CACHE) {
896 if (rt6_check_expired(rt)) {
897 ip6_del_rt(rt);
898 dst = NULL;
899 }
900 } else {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700901 dst_release(dst);
YOSHIFUJI Hideaki / 吉藤英明54c1a852010-03-28 07:15:45 +0000902 dst = NULL;
903 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700904 }
YOSHIFUJI Hideaki / 吉藤英明54c1a852010-03-28 07:15:45 +0000905 return dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700906}
907
908static void ip6_link_failure(struct sk_buff *skb)
909{
910 struct rt6_info *rt;
911
Alexey Dobriyan3ffe5332010-02-18 08:25:24 +0000912 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700913
Eric Dumazetadf30902009-06-02 05:19:30 +0000914 rt = (struct rt6_info *) skb_dst(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700915 if (rt) {
916 if (rt->rt6i_flags&RTF_CACHE) {
Changli Gaod8d1f302010-06-10 23:31:35 -0700917 dst_set_expires(&rt->dst, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700918 rt->rt6i_flags |= RTF_EXPIRES;
919 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
920 rt->rt6i_node->fn_sernum = -1;
921 }
922}
923
924static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
925{
926 struct rt6_info *rt6 = (struct rt6_info*)dst;
927
928 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
929 rt6->rt6i_flags |= RTF_MODIFIED;
930 if (mtu < IPV6_MIN_MTU) {
931 mtu = IPV6_MIN_MTU;
932 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
933 }
934 dst->metrics[RTAX_MTU-1] = mtu;
Tom Tucker8d717402006-07-30 20:43:36 -0700935 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700936 }
937}
938
Linus Torvalds1da177e2005-04-16 15:20:36 -0700939static int ipv6_get_mtu(struct net_device *dev);
940
Daniel Lezcano55786892008-03-04 13:47:47 -0800941static inline unsigned int ipv6_advmss(struct net *net, unsigned int mtu)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700942{
943 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
944
Daniel Lezcano55786892008-03-04 13:47:47 -0800945 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
946 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700947
948 /*
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900949 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
950 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
951 * IPV6_MAXPLEN is also valid and means: "any MSS,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700952 * rely only on pmtu discovery"
953 */
954 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
955 mtu = IPV6_MAXPLEN;
956 return mtu;
957}
958
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -0800959static struct dst_entry *icmp6_dst_gc_list;
960static DEFINE_SPINLOCK(icmp6_dst_lock);
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700961
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -0800962struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700963 struct neighbour *neigh,
YOSHIFUJI Hideaki9acd9f32008-04-10 15:42:10 +0900964 const struct in6_addr *addr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700965{
966 struct rt6_info *rt;
967 struct inet6_dev *idev = in6_dev_get(dev);
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900968 struct net *net = dev_net(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700969
970 if (unlikely(idev == NULL))
971 return NULL;
972
Alexey Dobriyan86393e52009-08-29 01:34:49 +0000973 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700974 if (unlikely(rt == NULL)) {
975 in6_dev_put(idev);
976 goto out;
977 }
978
979 dev_hold(dev);
980 if (neigh)
981 neigh_hold(neigh);
David S. Miller14deae42009-01-04 16:04:39 -0800982 else {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700983 neigh = ndisc_get_neigh(dev, addr);
David S. Miller14deae42009-01-04 16:04:39 -0800984 if (IS_ERR(neigh))
985 neigh = NULL;
986 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700987
988 rt->rt6i_dev = dev;
989 rt->rt6i_idev = idev;
990 rt->rt6i_nexthop = neigh;
Changli Gaod8d1f302010-06-10 23:31:35 -0700991 atomic_set(&rt->dst.__refcnt, 1);
992 rt->dst.metrics[RTAX_HOPLIMIT-1] = 255;
993 rt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
994 rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->dst));
995 rt->dst.output = ip6_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700996
997#if 0 /* there's no chance to use these for ndisc */
Changli Gaod8d1f302010-06-10 23:31:35 -0700998 rt->dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900999 ? DST_HOST
Linus Torvalds1da177e2005-04-16 15:20:36 -07001000 : 0;
1001 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1002 rt->rt6i_dst.plen = 128;
1003#endif
1004
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001005 spin_lock_bh(&icmp6_dst_lock);
Changli Gaod8d1f302010-06-10 23:31:35 -07001006 rt->dst.next = icmp6_dst_gc_list;
1007 icmp6_dst_gc_list = &rt->dst;
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001008 spin_unlock_bh(&icmp6_dst_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001009
Daniel Lezcano55786892008-03-04 13:47:47 -08001010 fib6_force_start_gc(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001011
1012out:
Changli Gaod8d1f302010-06-10 23:31:35 -07001013 return &rt->dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001014}
1015
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -07001016int icmp6_dst_gc(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001017{
1018 struct dst_entry *dst, *next, **pprev;
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -07001019 int more = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001020
1021 next = NULL;
Thomas Graf5d0bbee2006-08-04 03:37:36 -07001022
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001023 spin_lock_bh(&icmp6_dst_lock);
1024 pprev = &icmp6_dst_gc_list;
Thomas Graf5d0bbee2006-08-04 03:37:36 -07001025
Linus Torvalds1da177e2005-04-16 15:20:36 -07001026 while ((dst = *pprev) != NULL) {
1027 if (!atomic_read(&dst->__refcnt)) {
1028 *pprev = dst->next;
1029 dst_free(dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001030 } else {
1031 pprev = &dst->next;
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -07001032 ++more;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001033 }
1034 }
1035
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001036 spin_unlock_bh(&icmp6_dst_lock);
Thomas Graf5d0bbee2006-08-04 03:37:36 -07001037
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -07001038 return more;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001039}
1040
David S. Miller1e493d12008-09-10 17:27:15 -07001041static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1042 void *arg)
1043{
1044 struct dst_entry *dst, **pprev;
1045
1046 spin_lock_bh(&icmp6_dst_lock);
1047 pprev = &icmp6_dst_gc_list;
1048 while ((dst = *pprev) != NULL) {
1049 struct rt6_info *rt = (struct rt6_info *) dst;
1050 if (func(rt, arg)) {
1051 *pprev = dst->next;
1052 dst_free(dst);
1053 } else {
1054 pprev = &dst->next;
1055 }
1056 }
1057 spin_unlock_bh(&icmp6_dst_lock);
1058}
1059
Daniel Lezcano569d3642008-01-18 03:56:57 -08001060static int ip6_dst_gc(struct dst_ops *ops)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001061{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001062 unsigned long now = jiffies;
Alexey Dobriyan86393e52009-08-29 01:34:49 +00001063 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
Daniel Lezcano7019b782008-03-04 13:50:14 -08001064 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1065 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1066 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1067 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1068 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
Eric Dumazetfc66f952010-10-08 06:37:34 +00001069 int entries;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001070
Eric Dumazetfc66f952010-10-08 06:37:34 +00001071 entries = dst_entries_get_fast(ops);
Daniel Lezcano7019b782008-03-04 13:50:14 -08001072 if (time_after(rt_last_gc + rt_min_interval, now) &&
Eric Dumazetfc66f952010-10-08 06:37:34 +00001073 entries <= rt_max_size)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001074 goto out;
1075
Benjamin Thery6891a342008-03-04 13:49:47 -08001076 net->ipv6.ip6_rt_gc_expire++;
1077 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1078 net->ipv6.ip6_rt_last_gc = now;
Eric Dumazetfc66f952010-10-08 06:37:34 +00001079 entries = dst_entries_get_slow(ops);
1080 if (entries < ops->gc_thresh)
Daniel Lezcano7019b782008-03-04 13:50:14 -08001081 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001082out:
Daniel Lezcano7019b782008-03-04 13:50:14 -08001083 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
Eric Dumazetfc66f952010-10-08 06:37:34 +00001084 return entries > rt_max_size;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001085}
1086
1087/* Clean host part of a prefix. Not necessary in radix tree,
1088 but results in cleaner routing tables.
1089
1090 Remove it only when all the things will work!
1091 */
1092
1093static int ipv6_get_mtu(struct net_device *dev)
1094{
1095 int mtu = IPV6_MIN_MTU;
1096 struct inet6_dev *idev;
1097
Eric Dumazetc68f24c2010-06-14 04:46:20 +00001098 rcu_read_lock();
1099 idev = __in6_dev_get(dev);
1100 if (idev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001101 mtu = idev->cnf.mtu6;
Eric Dumazetc68f24c2010-06-14 04:46:20 +00001102 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001103 return mtu;
1104}
1105
YOSHIFUJI Hideaki6b75d092008-03-10 06:00:30 -04001106int ip6_dst_hoplimit(struct dst_entry *dst)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001107{
YOSHIFUJI Hideaki6b75d092008-03-10 06:00:30 -04001108 int hoplimit = dst_metric(dst, RTAX_HOPLIMIT);
1109 if (hoplimit < 0) {
1110 struct net_device *dev = dst->dev;
Eric Dumazetc68f24c2010-06-14 04:46:20 +00001111 struct inet6_dev *idev;
1112
1113 rcu_read_lock();
1114 idev = __in6_dev_get(dev);
1115 if (idev)
YOSHIFUJI Hideaki6b75d092008-03-10 06:00:30 -04001116 hoplimit = idev->cnf.hop_limit;
Eric Dumazetc68f24c2010-06-14 04:46:20 +00001117 else
YOSHIFUJI Hideaki53b79972008-07-19 22:35:03 -07001118 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
Eric Dumazetc68f24c2010-06-14 04:46:20 +00001119 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001120 }
1121 return hoplimit;
1122}
1123
1124/*
1125 *
1126 */
1127
Thomas Graf86872cb2006-08-22 00:01:08 -07001128int ip6_route_add(struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001129{
1130 int err;
Daniel Lezcano55786892008-03-04 13:47:47 -08001131 struct net *net = cfg->fc_nlinfo.nl_net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001132 struct rt6_info *rt = NULL;
1133 struct net_device *dev = NULL;
1134 struct inet6_dev *idev = NULL;
Thomas Grafc71099a2006-08-04 23:20:06 -07001135 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001136 int addr_type;
1137
Thomas Graf86872cb2006-08-22 00:01:08 -07001138 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001139 return -EINVAL;
1140#ifndef CONFIG_IPV6_SUBTREES
Thomas Graf86872cb2006-08-22 00:01:08 -07001141 if (cfg->fc_src_len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001142 return -EINVAL;
1143#endif
Thomas Graf86872cb2006-08-22 00:01:08 -07001144 if (cfg->fc_ifindex) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001145 err = -ENODEV;
Daniel Lezcano55786892008-03-04 13:47:47 -08001146 dev = dev_get_by_index(net, cfg->fc_ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001147 if (!dev)
1148 goto out;
1149 idev = in6_dev_get(dev);
1150 if (!idev)
1151 goto out;
1152 }
1153
Thomas Graf86872cb2006-08-22 00:01:08 -07001154 if (cfg->fc_metric == 0)
1155 cfg->fc_metric = IP6_RT_PRIO_USER;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001156
Daniel Lezcano55786892008-03-04 13:47:47 -08001157 table = fib6_new_table(net, cfg->fc_table);
Thomas Grafc71099a2006-08-04 23:20:06 -07001158 if (table == NULL) {
1159 err = -ENOBUFS;
1160 goto out;
1161 }
1162
Alexey Dobriyan86393e52009-08-29 01:34:49 +00001163 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001164
1165 if (rt == NULL) {
1166 err = -ENOMEM;
1167 goto out;
1168 }
1169
Changli Gaod8d1f302010-06-10 23:31:35 -07001170 rt->dst.obsolete = -1;
YOSHIFUJI Hideaki6f704992008-05-19 16:56:11 -07001171 rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
1172 jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1173 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001174
Thomas Graf86872cb2006-08-22 00:01:08 -07001175 if (cfg->fc_protocol == RTPROT_UNSPEC)
1176 cfg->fc_protocol = RTPROT_BOOT;
1177 rt->rt6i_protocol = cfg->fc_protocol;
1178
1179 addr_type = ipv6_addr_type(&cfg->fc_dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001180
1181 if (addr_type & IPV6_ADDR_MULTICAST)
Changli Gaod8d1f302010-06-10 23:31:35 -07001182 rt->dst.input = ip6_mc_input;
Maciej Żenczykowskiab79ad12010-09-27 00:07:02 +00001183 else if (cfg->fc_flags & RTF_LOCAL)
1184 rt->dst.input = ip6_input;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001185 else
Changli Gaod8d1f302010-06-10 23:31:35 -07001186 rt->dst.input = ip6_forward;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001187
Changli Gaod8d1f302010-06-10 23:31:35 -07001188 rt->dst.output = ip6_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001189
Thomas Graf86872cb2006-08-22 00:01:08 -07001190 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1191 rt->rt6i_dst.plen = cfg->fc_dst_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001192 if (rt->rt6i_dst.plen == 128)
Changli Gaod8d1f302010-06-10 23:31:35 -07001193 rt->dst.flags = DST_HOST;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001194
1195#ifdef CONFIG_IPV6_SUBTREES
Thomas Graf86872cb2006-08-22 00:01:08 -07001196 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1197 rt->rt6i_src.plen = cfg->fc_src_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001198#endif
1199
Thomas Graf86872cb2006-08-22 00:01:08 -07001200 rt->rt6i_metric = cfg->fc_metric;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001201
1202 /* We cannot add true routes via loopback here,
1203 they would result in kernel looping; promote them to reject routes
1204 */
Thomas Graf86872cb2006-08-22 00:01:08 -07001205 if ((cfg->fc_flags & RTF_REJECT) ||
Maciej Żenczykowskiab79ad12010-09-27 00:07:02 +00001206 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK)
1207 && !(cfg->fc_flags&RTF_LOCAL))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001208 /* hold loopback dev/idev if we haven't done so. */
Daniel Lezcano55786892008-03-04 13:47:47 -08001209 if (dev != net->loopback_dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001210 if (dev) {
1211 dev_put(dev);
1212 in6_dev_put(idev);
1213 }
Daniel Lezcano55786892008-03-04 13:47:47 -08001214 dev = net->loopback_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001215 dev_hold(dev);
1216 idev = in6_dev_get(dev);
1217 if (!idev) {
1218 err = -ENODEV;
1219 goto out;
1220 }
1221 }
Changli Gaod8d1f302010-06-10 23:31:35 -07001222 rt->dst.output = ip6_pkt_discard_out;
1223 rt->dst.input = ip6_pkt_discard;
1224 rt->dst.error = -ENETUNREACH;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001225 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1226 goto install_route;
1227 }
1228
Thomas Graf86872cb2006-08-22 00:01:08 -07001229 if (cfg->fc_flags & RTF_GATEWAY) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001230 struct in6_addr *gw_addr;
1231 int gwa_type;
1232
Thomas Graf86872cb2006-08-22 00:01:08 -07001233 gw_addr = &cfg->fc_gateway;
1234 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001235 gwa_type = ipv6_addr_type(gw_addr);
1236
1237 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1238 struct rt6_info *grt;
1239
1240 /* IPv6 strictly inhibits using not link-local
1241 addresses as nexthop address.
1242 Otherwise, router will not able to send redirects.
1243 It is very good, but in some (rare!) circumstances
1244 (SIT, PtP, NBMA NOARP links) it is handy to allow
1245 some exceptions. --ANK
1246 */
1247 err = -EINVAL;
1248 if (!(gwa_type&IPV6_ADDR_UNICAST))
1249 goto out;
1250
Daniel Lezcano55786892008-03-04 13:47:47 -08001251 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001252
1253 err = -EHOSTUNREACH;
1254 if (grt == NULL)
1255 goto out;
1256 if (dev) {
1257 if (dev != grt->rt6i_dev) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001258 dst_release(&grt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001259 goto out;
1260 }
1261 } else {
1262 dev = grt->rt6i_dev;
1263 idev = grt->rt6i_idev;
1264 dev_hold(dev);
1265 in6_dev_hold(grt->rt6i_idev);
1266 }
1267 if (!(grt->rt6i_flags&RTF_GATEWAY))
1268 err = 0;
Changli Gaod8d1f302010-06-10 23:31:35 -07001269 dst_release(&grt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001270
1271 if (err)
1272 goto out;
1273 }
1274 err = -EINVAL;
1275 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1276 goto out;
1277 }
1278
1279 err = -ENODEV;
1280 if (dev == NULL)
1281 goto out;
1282
Thomas Graf86872cb2006-08-22 00:01:08 -07001283 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001284 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1285 if (IS_ERR(rt->rt6i_nexthop)) {
1286 err = PTR_ERR(rt->rt6i_nexthop);
1287 rt->rt6i_nexthop = NULL;
1288 goto out;
1289 }
1290 }
1291
Thomas Graf86872cb2006-08-22 00:01:08 -07001292 rt->rt6i_flags = cfg->fc_flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001293
1294install_route:
Thomas Graf86872cb2006-08-22 00:01:08 -07001295 if (cfg->fc_mx) {
1296 struct nlattr *nla;
1297 int remaining;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001298
Thomas Graf86872cb2006-08-22 00:01:08 -07001299 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
Thomas Graf8f4c1f92007-09-12 14:44:36 +02001300 int type = nla_type(nla);
Thomas Graf86872cb2006-08-22 00:01:08 -07001301
1302 if (type) {
1303 if (type > RTAX_MAX) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001304 err = -EINVAL;
1305 goto out;
1306 }
Thomas Graf86872cb2006-08-22 00:01:08 -07001307
Changli Gaod8d1f302010-06-10 23:31:35 -07001308 rt->dst.metrics[type - 1] = nla_get_u32(nla);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001309 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001310 }
1311 }
1312
Changli Gaod8d1f302010-06-10 23:31:35 -07001313 if (dst_metric(&rt->dst, RTAX_HOPLIMIT) == 0)
1314 rt->dst.metrics[RTAX_HOPLIMIT-1] = -1;
1315 if (!dst_mtu(&rt->dst))
1316 rt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1317 if (!dst_metric(&rt->dst, RTAX_ADVMSS))
1318 rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->dst));
1319 rt->dst.dev = dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001320 rt->rt6i_idev = idev;
Thomas Grafc71099a2006-08-04 23:20:06 -07001321 rt->rt6i_table = table;
Daniel Lezcano63152fc2008-03-03 23:31:11 -08001322
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001323 cfg->fc_nlinfo.nl_net = dev_net(dev);
Daniel Lezcano63152fc2008-03-03 23:31:11 -08001324
Thomas Graf86872cb2006-08-22 00:01:08 -07001325 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001326
1327out:
1328 if (dev)
1329 dev_put(dev);
1330 if (idev)
1331 in6_dev_put(idev);
1332 if (rt)
Changli Gaod8d1f302010-06-10 23:31:35 -07001333 dst_free(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001334 return err;
1335}
1336
Thomas Graf86872cb2006-08-22 00:01:08 -07001337static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001338{
1339 int err;
Thomas Grafc71099a2006-08-04 23:20:06 -07001340 struct fib6_table *table;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001341 struct net *net = dev_net(rt->rt6i_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001342
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001343 if (rt == net->ipv6.ip6_null_entry)
Patrick McHardy6c813a72006-08-06 22:22:47 -07001344 return -ENOENT;
1345
Thomas Grafc71099a2006-08-04 23:20:06 -07001346 table = rt->rt6i_table;
1347 write_lock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001348
Thomas Graf86872cb2006-08-22 00:01:08 -07001349 err = fib6_del(rt, info);
Changli Gaod8d1f302010-06-10 23:31:35 -07001350 dst_release(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001351
Thomas Grafc71099a2006-08-04 23:20:06 -07001352 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001353
1354 return err;
1355}
1356
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001357int ip6_del_rt(struct rt6_info *rt)
1358{
Denis V. Lunev4d1169c2008-01-10 03:26:13 -08001359 struct nl_info info = {
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001360 .nl_net = dev_net(rt->rt6i_dev),
Denis V. Lunev4d1169c2008-01-10 03:26:13 -08001361 };
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08001362 return __ip6_del_rt(rt, &info);
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001363}
1364
Thomas Graf86872cb2006-08-22 00:01:08 -07001365static int ip6_route_del(struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001366{
Thomas Grafc71099a2006-08-04 23:20:06 -07001367 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001368 struct fib6_node *fn;
1369 struct rt6_info *rt;
1370 int err = -ESRCH;
1371
Daniel Lezcano55786892008-03-04 13:47:47 -08001372 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
Thomas Grafc71099a2006-08-04 23:20:06 -07001373 if (table == NULL)
1374 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001375
Thomas Grafc71099a2006-08-04 23:20:06 -07001376 read_lock_bh(&table->tb6_lock);
1377
1378 fn = fib6_locate(&table->tb6_root,
Thomas Graf86872cb2006-08-22 00:01:08 -07001379 &cfg->fc_dst, cfg->fc_dst_len,
1380 &cfg->fc_src, cfg->fc_src_len);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001381
Linus Torvalds1da177e2005-04-16 15:20:36 -07001382 if (fn) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001383 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
Thomas Graf86872cb2006-08-22 00:01:08 -07001384 if (cfg->fc_ifindex &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001385 (rt->rt6i_dev == NULL ||
Thomas Graf86872cb2006-08-22 00:01:08 -07001386 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001387 continue;
Thomas Graf86872cb2006-08-22 00:01:08 -07001388 if (cfg->fc_flags & RTF_GATEWAY &&
1389 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001390 continue;
Thomas Graf86872cb2006-08-22 00:01:08 -07001391 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001392 continue;
Changli Gaod8d1f302010-06-10 23:31:35 -07001393 dst_hold(&rt->dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001394 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001395
Thomas Graf86872cb2006-08-22 00:01:08 -07001396 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001397 }
1398 }
Thomas Grafc71099a2006-08-04 23:20:06 -07001399 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001400
1401 return err;
1402}
1403
1404/*
1405 * Handle redirects
1406 */
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001407struct ip6rd_flowi {
1408 struct flowi fl;
1409 struct in6_addr gateway;
1410};
Linus Torvalds1da177e2005-04-16 15:20:36 -07001411
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001412static struct rt6_info *__ip6_route_redirect(struct net *net,
1413 struct fib6_table *table,
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001414 struct flowi *fl,
1415 int flags)
1416{
1417 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1418 struct rt6_info *rt;
1419 struct fib6_node *fn;
Thomas Grafc71099a2006-08-04 23:20:06 -07001420
Linus Torvalds1da177e2005-04-16 15:20:36 -07001421 /*
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001422 * Get the "current" route for this destination and
1423 * check if the redirect has come from approriate router.
1424 *
1425 * RFC 2461 specifies that redirects should only be
1426 * accepted if they come from the nexthop to the target.
1427 * Due to the way the routes are chosen, this notion
1428 * is a bit fuzzy and one might need to check all possible
1429 * routes.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001430 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001431
Thomas Grafc71099a2006-08-04 23:20:06 -07001432 read_lock_bh(&table->tb6_lock);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001433 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001434restart:
Changli Gaod8d1f302010-06-10 23:31:35 -07001435 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001436 /*
1437 * Current route is on-link; redirect is always invalid.
1438 *
1439 * Seems, previous statement is not true. It could
1440 * be node, which looks for us as on-link (f.e. proxy ndisc)
1441 * But then router serving it might decide, that we should
1442 * know truth 8)8) --ANK (980726).
1443 */
1444 if (rt6_check_expired(rt))
1445 continue;
1446 if (!(rt->rt6i_flags & RTF_GATEWAY))
1447 continue;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001448 if (fl->oif != rt->rt6i_dev->ifindex)
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001449 continue;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001450 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001451 continue;
1452 break;
1453 }
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001454
YOSHIFUJI Hideakicb15d9c2006-08-23 17:23:11 -07001455 if (!rt)
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001456 rt = net->ipv6.ip6_null_entry;
1457 BACKTRACK(net, &fl->fl6_src);
YOSHIFUJI Hideakicb15d9c2006-08-23 17:23:11 -07001458out:
Changli Gaod8d1f302010-06-10 23:31:35 -07001459 dst_hold(&rt->dst);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001460
1461 read_unlock_bh(&table->tb6_lock);
1462
1463 return rt;
1464};
1465
1466static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1467 struct in6_addr *src,
1468 struct in6_addr *gateway,
1469 struct net_device *dev)
1470{
Thomas Grafadaa70b2006-10-13 15:01:03 -07001471 int flags = RT6_LOOKUP_F_HAS_SADDR;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001472 struct net *net = dev_net(dev);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001473 struct ip6rd_flowi rdfl = {
1474 .fl = {
1475 .oif = dev->ifindex,
Changli Gao58116622010-11-12 18:43:55 +00001476 .fl6_dst = *dest,
1477 .fl6_src = *src,
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001478 },
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001479 };
Thomas Grafadaa70b2006-10-13 15:01:03 -07001480
Brian Haley86c36ce2009-10-07 13:58:01 -07001481 ipv6_addr_copy(&rdfl.gateway, gateway);
1482
Thomas Grafadaa70b2006-10-13 15:01:03 -07001483 if (rt6_need_strict(dest))
1484 flags |= RT6_LOOKUP_F_IFACE;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001485
Daniel Lezcano55786892008-03-04 13:47:47 -08001486 return (struct rt6_info *)fib6_rule_lookup(net, (struct flowi *)&rdfl,
Daniel Lezcano58f09b72008-03-03 23:25:27 -08001487 flags, __ip6_route_redirect);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001488}
1489
1490void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1491 struct in6_addr *saddr,
1492 struct neighbour *neigh, u8 *lladdr, int on_link)
1493{
1494 struct rt6_info *rt, *nrt = NULL;
1495 struct netevent_redirect netevent;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001496 struct net *net = dev_net(neigh->dev);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001497
1498 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1499
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001500 if (rt == net->ipv6.ip6_null_entry) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001501 if (net_ratelimit())
1502 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1503 "for redirect target\n");
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001504 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001505 }
1506
Linus Torvalds1da177e2005-04-16 15:20:36 -07001507 /*
1508 * We have finally decided to accept it.
1509 */
1510
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001511 neigh_update(neigh, lladdr, NUD_STALE,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001512 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1513 NEIGH_UPDATE_F_OVERRIDE|
1514 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1515 NEIGH_UPDATE_F_ISROUTER))
1516 );
1517
1518 /*
1519 * Redirect received -> path was valid.
1520 * Look, redirects are sent only in response to data packets,
1521 * so that this nexthop apparently is reachable. --ANK
1522 */
Changli Gaod8d1f302010-06-10 23:31:35 -07001523 dst_confirm(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001524
1525 /* Duplicate redirect: silently ignore. */
Changli Gaod8d1f302010-06-10 23:31:35 -07001526 if (neigh == rt->dst.neighbour)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001527 goto out;
1528
1529 nrt = ip6_rt_copy(rt);
1530 if (nrt == NULL)
1531 goto out;
1532
1533 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1534 if (on_link)
1535 nrt->rt6i_flags &= ~RTF_GATEWAY;
1536
1537 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1538 nrt->rt6i_dst.plen = 128;
Changli Gaod8d1f302010-06-10 23:31:35 -07001539 nrt->dst.flags |= DST_HOST;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001540
1541 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1542 nrt->rt6i_nexthop = neigh_clone(neigh);
1543 /* Reset pmtu, it may be better */
Changli Gaod8d1f302010-06-10 23:31:35 -07001544 nrt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1545 nrt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dev_net(neigh->dev),
1546 dst_mtu(&nrt->dst));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001547
Thomas Graf40e22e82006-08-22 00:00:45 -07001548 if (ip6_ins_rt(nrt))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001549 goto out;
1550
Changli Gaod8d1f302010-06-10 23:31:35 -07001551 netevent.old = &rt->dst;
1552 netevent.new = &nrt->dst;
Tom Tucker8d717402006-07-30 20:43:36 -07001553 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1554
Linus Torvalds1da177e2005-04-16 15:20:36 -07001555 if (rt->rt6i_flags&RTF_CACHE) {
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001556 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001557 return;
1558 }
1559
1560out:
Changli Gaod8d1f302010-06-10 23:31:35 -07001561 dst_release(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001562}
1563
1564/*
1565 * Handle ICMP "packet too big" messages
1566 * i.e. Path MTU discovery
1567 */
1568
Maciej Żenczykowskiae878ae2010-10-03 14:49:00 -07001569static void rt6_do_pmtu_disc(struct in6_addr *daddr, struct in6_addr *saddr,
1570 struct net *net, u32 pmtu, int ifindex)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001571{
1572 struct rt6_info *rt, *nrt;
1573 int allfrag = 0;
1574
Maciej Żenczykowskiae878ae2010-10-03 14:49:00 -07001575 rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001576 if (rt == NULL)
1577 return;
1578
Changli Gaod8d1f302010-06-10 23:31:35 -07001579 if (pmtu >= dst_mtu(&rt->dst))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001580 goto out;
1581
1582 if (pmtu < IPV6_MIN_MTU) {
1583 /*
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001584 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
Linus Torvalds1da177e2005-04-16 15:20:36 -07001585 * MTU (1280) and a fragment header should always be included
1586 * after a node receiving Too Big message reporting PMTU is
1587 * less than the IPv6 Minimum Link MTU.
1588 */
1589 pmtu = IPV6_MIN_MTU;
1590 allfrag = 1;
1591 }
1592
1593 /* New mtu received -> path was valid.
1594 They are sent only in response to data packets,
1595 so that this nexthop apparently is reachable. --ANK
1596 */
Changli Gaod8d1f302010-06-10 23:31:35 -07001597 dst_confirm(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001598
1599 /* Host route. If it is static, it would be better
1600 not to override it, but add new one, so that
1601 when cache entry will expire old pmtu
1602 would return automatically.
1603 */
1604 if (rt->rt6i_flags & RTF_CACHE) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001605 rt->dst.metrics[RTAX_MTU-1] = pmtu;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001606 if (allfrag)
Changli Gaod8d1f302010-06-10 23:31:35 -07001607 rt->dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1608 dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001609 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1610 goto out;
1611 }
1612
1613 /* Network route.
1614 Two cases are possible:
1615 1. It is connected route. Action: COW
1616 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1617 */
YOSHIFUJI Hideakid5315b52006-03-20 16:58:48 -08001618 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001619 nrt = rt6_alloc_cow(rt, daddr, saddr);
YOSHIFUJI Hideakid5315b52006-03-20 16:58:48 -08001620 else
1621 nrt = rt6_alloc_clone(rt, daddr);
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001622
YOSHIFUJI Hideakid5315b52006-03-20 16:58:48 -08001623 if (nrt) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001624 nrt->dst.metrics[RTAX_MTU-1] = pmtu;
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001625 if (allfrag)
Changli Gaod8d1f302010-06-10 23:31:35 -07001626 nrt->dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001627
1628 /* According to RFC 1981, detecting PMTU increase shouldn't be
1629 * happened within 5 mins, the recommended timer is 10 mins.
1630 * Here this route expiration time is set to ip6_rt_mtu_expires
1631 * which is 10 mins. After 10 mins the decreased pmtu is expired
1632 * and detecting PMTU increase will be automatically happened.
1633 */
Changli Gaod8d1f302010-06-10 23:31:35 -07001634 dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001635 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1636
Thomas Graf40e22e82006-08-22 00:00:45 -07001637 ip6_ins_rt(nrt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001638 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001639out:
Changli Gaod8d1f302010-06-10 23:31:35 -07001640 dst_release(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001641}
1642
Maciej Żenczykowskiae878ae2010-10-03 14:49:00 -07001643void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1644 struct net_device *dev, u32 pmtu)
1645{
1646 struct net *net = dev_net(dev);
1647
1648 /*
1649 * RFC 1981 states that a node "MUST reduce the size of the packets it
1650 * is sending along the path" that caused the Packet Too Big message.
1651 * Since it's not possible in the general case to determine which
1652 * interface was used to send the original packet, we update the MTU
1653 * on the interface that will be used to send future packets. We also
1654 * update the MTU on the interface that received the Packet Too Big in
1655 * case the original packet was forced out that interface with
1656 * SO_BINDTODEVICE or similar. This is the next best thing to the
1657 * correct behaviour, which would be to update the MTU on all
1658 * interfaces.
1659 */
1660 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1661 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1662}
1663
Linus Torvalds1da177e2005-04-16 15:20:36 -07001664/*
1665 * Misc support functions
1666 */
1667
1668static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1669{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001670 struct net *net = dev_net(ort->rt6i_dev);
Alexey Dobriyan86393e52009-08-29 01:34:49 +00001671 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001672
1673 if (rt) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001674 rt->dst.input = ort->dst.input;
1675 rt->dst.output = ort->dst.output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001676
Changli Gaod8d1f302010-06-10 23:31:35 -07001677 memcpy(rt->dst.metrics, ort->dst.metrics, RTAX_MAX*sizeof(u32));
1678 rt->dst.error = ort->dst.error;
1679 rt->dst.dev = ort->dst.dev;
1680 if (rt->dst.dev)
1681 dev_hold(rt->dst.dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001682 rt->rt6i_idev = ort->rt6i_idev;
1683 if (rt->rt6i_idev)
1684 in6_dev_hold(rt->rt6i_idev);
Changli Gaod8d1f302010-06-10 23:31:35 -07001685 rt->dst.lastuse = jiffies;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001686 rt->rt6i_expires = 0;
1687
1688 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1689 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1690 rt->rt6i_metric = 0;
1691
1692 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1693#ifdef CONFIG_IPV6_SUBTREES
1694 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1695#endif
Thomas Grafc71099a2006-08-04 23:20:06 -07001696 rt->rt6i_table = ort->rt6i_table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001697 }
1698 return rt;
1699}
1700
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001701#ifdef CONFIG_IPV6_ROUTE_INFO
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001702static struct rt6_info *rt6_get_route_info(struct net *net,
1703 struct in6_addr *prefix, int prefixlen,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001704 struct in6_addr *gwaddr, int ifindex)
1705{
1706 struct fib6_node *fn;
1707 struct rt6_info *rt = NULL;
Thomas Grafc71099a2006-08-04 23:20:06 -07001708 struct fib6_table *table;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001709
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001710 table = fib6_get_table(net, RT6_TABLE_INFO);
Thomas Grafc71099a2006-08-04 23:20:06 -07001711 if (table == NULL)
1712 return NULL;
1713
1714 write_lock_bh(&table->tb6_lock);
1715 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001716 if (!fn)
1717 goto out;
1718
Changli Gaod8d1f302010-06-10 23:31:35 -07001719 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001720 if (rt->rt6i_dev->ifindex != ifindex)
1721 continue;
1722 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1723 continue;
1724 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1725 continue;
Changli Gaod8d1f302010-06-10 23:31:35 -07001726 dst_hold(&rt->dst);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001727 break;
1728 }
1729out:
Thomas Grafc71099a2006-08-04 23:20:06 -07001730 write_unlock_bh(&table->tb6_lock);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001731 return rt;
1732}
1733
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001734static struct rt6_info *rt6_add_route_info(struct net *net,
1735 struct in6_addr *prefix, int prefixlen,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001736 struct in6_addr *gwaddr, int ifindex,
1737 unsigned pref)
1738{
Thomas Graf86872cb2006-08-22 00:01:08 -07001739 struct fib6_config cfg = {
1740 .fc_table = RT6_TABLE_INFO,
Rami Rosen238fc7e2008-02-09 23:43:11 -08001741 .fc_metric = IP6_RT_PRIO_USER,
Thomas Graf86872cb2006-08-22 00:01:08 -07001742 .fc_ifindex = ifindex,
1743 .fc_dst_len = prefixlen,
1744 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1745 RTF_UP | RTF_PREF(pref),
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001746 .fc_nlinfo.pid = 0,
1747 .fc_nlinfo.nlh = NULL,
1748 .fc_nlinfo.nl_net = net,
Thomas Graf86872cb2006-08-22 00:01:08 -07001749 };
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001750
Thomas Graf86872cb2006-08-22 00:01:08 -07001751 ipv6_addr_copy(&cfg.fc_dst, prefix);
1752 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1753
YOSHIFUJI Hideakie317da92006-03-20 17:06:42 -08001754 /* We should treat it as a default route if prefix length is 0. */
1755 if (!prefixlen)
Thomas Graf86872cb2006-08-22 00:01:08 -07001756 cfg.fc_flags |= RTF_DEFAULT;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001757
Thomas Graf86872cb2006-08-22 00:01:08 -07001758 ip6_route_add(&cfg);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001759
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001760 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001761}
1762#endif
1763
Linus Torvalds1da177e2005-04-16 15:20:36 -07001764struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001765{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001766 struct rt6_info *rt;
Thomas Grafc71099a2006-08-04 23:20:06 -07001767 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001768
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001769 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
Thomas Grafc71099a2006-08-04 23:20:06 -07001770 if (table == NULL)
1771 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001772
Thomas Grafc71099a2006-08-04 23:20:06 -07001773 write_lock_bh(&table->tb6_lock);
Changli Gaod8d1f302010-06-10 23:31:35 -07001774 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001775 if (dev == rt->rt6i_dev &&
YOSHIFUJI Hideaki045927f2006-03-20 17:00:48 -08001776 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001777 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1778 break;
1779 }
1780 if (rt)
Changli Gaod8d1f302010-06-10 23:31:35 -07001781 dst_hold(&rt->dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001782 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001783 return rt;
1784}
1785
1786struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -08001787 struct net_device *dev,
1788 unsigned int pref)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001789{
Thomas Graf86872cb2006-08-22 00:01:08 -07001790 struct fib6_config cfg = {
1791 .fc_table = RT6_TABLE_DFLT,
Rami Rosen238fc7e2008-02-09 23:43:11 -08001792 .fc_metric = IP6_RT_PRIO_USER,
Thomas Graf86872cb2006-08-22 00:01:08 -07001793 .fc_ifindex = dev->ifindex,
1794 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1795 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
Daniel Lezcano55786892008-03-04 13:47:47 -08001796 .fc_nlinfo.pid = 0,
1797 .fc_nlinfo.nlh = NULL,
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001798 .fc_nlinfo.nl_net = dev_net(dev),
Thomas Graf86872cb2006-08-22 00:01:08 -07001799 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07001800
Thomas Graf86872cb2006-08-22 00:01:08 -07001801 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001802
Thomas Graf86872cb2006-08-22 00:01:08 -07001803 ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001804
Linus Torvalds1da177e2005-04-16 15:20:36 -07001805 return rt6_get_dflt_router(gwaddr, dev);
1806}
1807
Daniel Lezcano7b4da532008-03-04 13:47:14 -08001808void rt6_purge_dflt_routers(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001809{
1810 struct rt6_info *rt;
Thomas Grafc71099a2006-08-04 23:20:06 -07001811 struct fib6_table *table;
1812
1813 /* NOTE: Keep consistent with rt6_get_dflt_router */
Daniel Lezcano7b4da532008-03-04 13:47:14 -08001814 table = fib6_get_table(net, RT6_TABLE_DFLT);
Thomas Grafc71099a2006-08-04 23:20:06 -07001815 if (table == NULL)
1816 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001817
1818restart:
Thomas Grafc71099a2006-08-04 23:20:06 -07001819 read_lock_bh(&table->tb6_lock);
Changli Gaod8d1f302010-06-10 23:31:35 -07001820 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001821 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001822 dst_hold(&rt->dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001823 read_unlock_bh(&table->tb6_lock);
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001824 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001825 goto restart;
1826 }
1827 }
Thomas Grafc71099a2006-08-04 23:20:06 -07001828 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001829}
1830
Daniel Lezcano55786892008-03-04 13:47:47 -08001831static void rtmsg_to_fib6_config(struct net *net,
1832 struct in6_rtmsg *rtmsg,
Thomas Graf86872cb2006-08-22 00:01:08 -07001833 struct fib6_config *cfg)
1834{
1835 memset(cfg, 0, sizeof(*cfg));
1836
1837 cfg->fc_table = RT6_TABLE_MAIN;
1838 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1839 cfg->fc_metric = rtmsg->rtmsg_metric;
1840 cfg->fc_expires = rtmsg->rtmsg_info;
1841 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1842 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1843 cfg->fc_flags = rtmsg->rtmsg_flags;
1844
Daniel Lezcano55786892008-03-04 13:47:47 -08001845 cfg->fc_nlinfo.nl_net = net;
Benjamin Theryf1243c22008-02-26 18:10:03 -08001846
Thomas Graf86872cb2006-08-22 00:01:08 -07001847 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1848 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1849 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1850}
1851
Daniel Lezcano55786892008-03-04 13:47:47 -08001852int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001853{
Thomas Graf86872cb2006-08-22 00:01:08 -07001854 struct fib6_config cfg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001855 struct in6_rtmsg rtmsg;
1856 int err;
1857
1858 switch(cmd) {
1859 case SIOCADDRT: /* Add a route */
1860 case SIOCDELRT: /* Delete a route */
1861 if (!capable(CAP_NET_ADMIN))
1862 return -EPERM;
1863 err = copy_from_user(&rtmsg, arg,
1864 sizeof(struct in6_rtmsg));
1865 if (err)
1866 return -EFAULT;
Thomas Graf86872cb2006-08-22 00:01:08 -07001867
Daniel Lezcano55786892008-03-04 13:47:47 -08001868 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
Thomas Graf86872cb2006-08-22 00:01:08 -07001869
Linus Torvalds1da177e2005-04-16 15:20:36 -07001870 rtnl_lock();
1871 switch (cmd) {
1872 case SIOCADDRT:
Thomas Graf86872cb2006-08-22 00:01:08 -07001873 err = ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001874 break;
1875 case SIOCDELRT:
Thomas Graf86872cb2006-08-22 00:01:08 -07001876 err = ip6_route_del(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001877 break;
1878 default:
1879 err = -EINVAL;
1880 }
1881 rtnl_unlock();
1882
1883 return err;
Stephen Hemminger3ff50b72007-04-20 17:09:22 -07001884 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001885
1886 return -EINVAL;
1887}
1888
1889/*
1890 * Drop the packet on the floor
1891 */
1892
Brian Haleyd5fdd6b2009-06-23 04:31:07 -07001893static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001894{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001895 int type;
Eric Dumazetadf30902009-06-02 05:19:30 +00001896 struct dst_entry *dst = skb_dst(skb);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001897 switch (ipstats_mib_noroutes) {
1898 case IPSTATS_MIB_INNOROUTES:
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -07001899 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
Ulrich Weber45bb0062010-02-25 23:28:58 +00001900 if (type == IPV6_ADDR_ANY) {
Denis V. Lunev3bd653c2008-10-08 10:54:51 -07001901 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1902 IPSTATS_MIB_INADDRERRORS);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001903 break;
1904 }
1905 /* FALLTHROUGH */
1906 case IPSTATS_MIB_OUTNOROUTES:
Denis V. Lunev3bd653c2008-10-08 10:54:51 -07001907 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1908 ipstats_mib_noroutes);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001909 break;
1910 }
Alexey Dobriyan3ffe5332010-02-18 08:25:24 +00001911 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001912 kfree_skb(skb);
1913 return 0;
1914}
1915
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001916static int ip6_pkt_discard(struct sk_buff *skb)
1917{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001918 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001919}
1920
Arnaldo Carvalho de Melo20380732005-08-16 02:18:02 -03001921static int ip6_pkt_discard_out(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001922{
Eric Dumazetadf30902009-06-02 05:19:30 +00001923 skb->dev = skb_dst(skb)->dev;
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001924 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001925}
1926
David S. Miller6723ab52006-10-18 21:20:57 -07001927#ifdef CONFIG_IPV6_MULTIPLE_TABLES
1928
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001929static int ip6_pkt_prohibit(struct sk_buff *skb)
1930{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001931 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001932}
1933
1934static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1935{
Eric Dumazetadf30902009-06-02 05:19:30 +00001936 skb->dev = skb_dst(skb)->dev;
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001937 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001938}
1939
David S. Miller6723ab52006-10-18 21:20:57 -07001940#endif
1941
Linus Torvalds1da177e2005-04-16 15:20:36 -07001942/*
1943 * Allocate a dst for local (unicast / anycast) address.
1944 */
1945
1946struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1947 const struct in6_addr *addr,
1948 int anycast)
1949{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001950 struct net *net = dev_net(idev->dev);
Alexey Dobriyan86393e52009-08-29 01:34:49 +00001951 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
David S. Miller14deae42009-01-04 16:04:39 -08001952 struct neighbour *neigh;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001953
Ben Greear40385652010-11-08 12:33:48 +00001954 if (rt == NULL) {
1955 if (net_ratelimit())
1956 pr_warning("IPv6: Maximum number of routes reached,"
1957 " consider increasing route/max_size.\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001958 return ERR_PTR(-ENOMEM);
Ben Greear40385652010-11-08 12:33:48 +00001959 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001960
Daniel Lezcano55786892008-03-04 13:47:47 -08001961 dev_hold(net->loopback_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001962 in6_dev_hold(idev);
1963
Changli Gaod8d1f302010-06-10 23:31:35 -07001964 rt->dst.flags = DST_HOST;
1965 rt->dst.input = ip6_input;
1966 rt->dst.output = ip6_output;
Daniel Lezcano55786892008-03-04 13:47:47 -08001967 rt->rt6i_dev = net->loopback_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001968 rt->rt6i_idev = idev;
Changli Gaod8d1f302010-06-10 23:31:35 -07001969 rt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1970 rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->dst));
1971 rt->dst.metrics[RTAX_HOPLIMIT-1] = -1;
1972 rt->dst.obsolete = -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001973
1974 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +09001975 if (anycast)
1976 rt->rt6i_flags |= RTF_ANYCAST;
1977 else
Linus Torvalds1da177e2005-04-16 15:20:36 -07001978 rt->rt6i_flags |= RTF_LOCAL;
David S. Miller14deae42009-01-04 16:04:39 -08001979 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1980 if (IS_ERR(neigh)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001981 dst_free(&rt->dst);
David S. Miller14deae42009-01-04 16:04:39 -08001982
1983 /* We are casting this because that is the return
1984 * value type. But an errno encoded pointer is the
1985 * same regardless of the underlying pointer type,
1986 * and that's what we are returning. So this is OK.
1987 */
1988 return (struct rt6_info *) neigh;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001989 }
David S. Miller14deae42009-01-04 16:04:39 -08001990 rt->rt6i_nexthop = neigh;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001991
1992 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1993 rt->rt6i_dst.plen = 128;
Daniel Lezcano55786892008-03-04 13:47:47 -08001994 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001995
Changli Gaod8d1f302010-06-10 23:31:35 -07001996 atomic_set(&rt->dst.__refcnt, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001997
1998 return rt;
1999}
2000
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002001struct arg_dev_net {
2002 struct net_device *dev;
2003 struct net *net;
2004};
2005
Linus Torvalds1da177e2005-04-16 15:20:36 -07002006static int fib6_ifdown(struct rt6_info *rt, void *arg)
2007{
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002008 struct net_device *dev = ((struct arg_dev_net *)arg)->dev;
2009 struct net *net = ((struct arg_dev_net *)arg)->net;
2010
2011 if (((void *)rt->rt6i_dev == dev || dev == NULL) &&
2012 rt != net->ipv6.ip6_null_entry) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002013 RT6_TRACE("deleted by ifdown %p\n", rt);
2014 return -1;
2015 }
2016 return 0;
2017}
2018
Daniel Lezcanof3db4852008-03-03 23:27:06 -08002019void rt6_ifdown(struct net *net, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002020{
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002021 struct arg_dev_net adn = {
2022 .dev = dev,
2023 .net = net,
2024 };
2025
2026 fib6_clean_all(net, fib6_ifdown, 0, &adn);
David S. Miller1e493d12008-09-10 17:27:15 -07002027 icmp6_clean_all(fib6_ifdown, &adn);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002028}
2029
2030struct rt6_mtu_change_arg
2031{
2032 struct net_device *dev;
2033 unsigned mtu;
2034};
2035
2036static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2037{
2038 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2039 struct inet6_dev *idev;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09002040 struct net *net = dev_net(arg->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002041
2042 /* In IPv6 pmtu discovery is not optional,
2043 so that RTAX_MTU lock cannot disable it.
2044 We still use this lock to block changes
2045 caused by addrconf/ndisc.
2046 */
2047
2048 idev = __in6_dev_get(arg->dev);
2049 if (idev == NULL)
2050 return 0;
2051
2052 /* For administrative MTU increase, there is no way to discover
2053 IPv6 PMTU increase, so PMTU increase should be updated here.
2054 Since RFC 1981 doesn't include administrative MTU increase
2055 update PMTU increase is a MUST. (i.e. jumbo frame)
2056 */
2057 /*
2058 If new MTU is less than route PMTU, this new MTU will be the
2059 lowest MTU in the path, update the route PMTU to reflect PMTU
2060 decreases; if new MTU is greater than route PMTU, and the
2061 old MTU is the lowest MTU in the path, update the route PMTU
2062 to reflect the increase. In this case if the other nodes' MTU
2063 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2064 PMTU discouvery.
2065 */
2066 if (rt->rt6i_dev == arg->dev &&
Changli Gaod8d1f302010-06-10 23:31:35 -07002067 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2068 (dst_mtu(&rt->dst) >= arg->mtu ||
2069 (dst_mtu(&rt->dst) < arg->mtu &&
2070 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2071 rt->dst.metrics[RTAX_MTU-1] = arg->mtu;
2072 rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, arg->mtu);
Simon Arlott566cfd82007-07-26 00:09:55 -07002073 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002074 return 0;
2075}
2076
2077void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2078{
Thomas Grafc71099a2006-08-04 23:20:06 -07002079 struct rt6_mtu_change_arg arg = {
2080 .dev = dev,
2081 .mtu = mtu,
2082 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07002083
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09002084 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002085}
2086
Patrick McHardyef7c79e2007-06-05 12:38:30 -07002087static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
Thomas Graf5176f912006-08-26 20:13:18 -07002088 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
Thomas Graf86872cb2006-08-22 00:01:08 -07002089 [RTA_OIF] = { .type = NLA_U32 },
Thomas Grafab364a62006-08-22 00:01:47 -07002090 [RTA_IIF] = { .type = NLA_U32 },
Thomas Graf86872cb2006-08-22 00:01:08 -07002091 [RTA_PRIORITY] = { .type = NLA_U32 },
2092 [RTA_METRICS] = { .type = NLA_NESTED },
2093};
2094
2095static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2096 struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002097{
Thomas Graf86872cb2006-08-22 00:01:08 -07002098 struct rtmsg *rtm;
2099 struct nlattr *tb[RTA_MAX+1];
2100 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002101
Thomas Graf86872cb2006-08-22 00:01:08 -07002102 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2103 if (err < 0)
2104 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002105
Thomas Graf86872cb2006-08-22 00:01:08 -07002106 err = -EINVAL;
2107 rtm = nlmsg_data(nlh);
2108 memset(cfg, 0, sizeof(*cfg));
2109
2110 cfg->fc_table = rtm->rtm_table;
2111 cfg->fc_dst_len = rtm->rtm_dst_len;
2112 cfg->fc_src_len = rtm->rtm_src_len;
2113 cfg->fc_flags = RTF_UP;
2114 cfg->fc_protocol = rtm->rtm_protocol;
2115
2116 if (rtm->rtm_type == RTN_UNREACHABLE)
2117 cfg->fc_flags |= RTF_REJECT;
2118
Maciej Żenczykowskiab79ad12010-09-27 00:07:02 +00002119 if (rtm->rtm_type == RTN_LOCAL)
2120 cfg->fc_flags |= RTF_LOCAL;
2121
Thomas Graf86872cb2006-08-22 00:01:08 -07002122 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2123 cfg->fc_nlinfo.nlh = nlh;
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09002124 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
Thomas Graf86872cb2006-08-22 00:01:08 -07002125
2126 if (tb[RTA_GATEWAY]) {
2127 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2128 cfg->fc_flags |= RTF_GATEWAY;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002129 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002130
2131 if (tb[RTA_DST]) {
2132 int plen = (rtm->rtm_dst_len + 7) >> 3;
2133
2134 if (nla_len(tb[RTA_DST]) < plen)
2135 goto errout;
2136
2137 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002138 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002139
2140 if (tb[RTA_SRC]) {
2141 int plen = (rtm->rtm_src_len + 7) >> 3;
2142
2143 if (nla_len(tb[RTA_SRC]) < plen)
2144 goto errout;
2145
2146 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002147 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002148
2149 if (tb[RTA_OIF])
2150 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2151
2152 if (tb[RTA_PRIORITY])
2153 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2154
2155 if (tb[RTA_METRICS]) {
2156 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2157 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002158 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002159
2160 if (tb[RTA_TABLE])
2161 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2162
2163 err = 0;
2164errout:
2165 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002166}
2167
Thomas Grafc127ea22007-03-22 11:58:32 -07002168static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002169{
Thomas Graf86872cb2006-08-22 00:01:08 -07002170 struct fib6_config cfg;
2171 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002172
Thomas Graf86872cb2006-08-22 00:01:08 -07002173 err = rtm_to_fib6_config(skb, nlh, &cfg);
2174 if (err < 0)
2175 return err;
2176
2177 return ip6_route_del(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002178}
2179
Thomas Grafc127ea22007-03-22 11:58:32 -07002180static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002181{
Thomas Graf86872cb2006-08-22 00:01:08 -07002182 struct fib6_config cfg;
2183 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002184
Thomas Graf86872cb2006-08-22 00:01:08 -07002185 err = rtm_to_fib6_config(skb, nlh, &cfg);
2186 if (err < 0)
2187 return err;
2188
2189 return ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002190}
2191
Thomas Graf339bf982006-11-10 14:10:15 -08002192static inline size_t rt6_nlmsg_size(void)
2193{
2194 return NLMSG_ALIGN(sizeof(struct rtmsg))
2195 + nla_total_size(16) /* RTA_SRC */
2196 + nla_total_size(16) /* RTA_DST */
2197 + nla_total_size(16) /* RTA_GATEWAY */
2198 + nla_total_size(16) /* RTA_PREFSRC */
2199 + nla_total_size(4) /* RTA_TABLE */
2200 + nla_total_size(4) /* RTA_IIF */
2201 + nla_total_size(4) /* RTA_OIF */
2202 + nla_total_size(4) /* RTA_PRIORITY */
Noriaki TAKAMIYA6a2b9ce2007-01-23 22:09:41 -08002203 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
Thomas Graf339bf982006-11-10 14:10:15 -08002204 + nla_total_size(sizeof(struct rta_cacheinfo));
2205}
2206
Brian Haley191cd582008-08-14 15:33:21 -07002207static int rt6_fill_node(struct net *net,
2208 struct sk_buff *skb, struct rt6_info *rt,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07002209 struct in6_addr *dst, struct in6_addr *src,
2210 int iif, int type, u32 pid, u32 seq,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002211 int prefix, int nowait, unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002212{
2213 struct rtmsg *rtm;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002214 struct nlmsghdr *nlh;
Thomas Grafe3703b32006-11-27 09:27:07 -08002215 long expires;
Patrick McHardy9e762a42006-08-10 23:09:48 -07002216 u32 table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002217
2218 if (prefix) { /* user wants prefix routes only */
2219 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2220 /* success since this is not a prefix route */
2221 return 1;
2222 }
2223 }
2224
Thomas Graf2d7202b2006-08-22 00:01:27 -07002225 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2226 if (nlh == NULL)
Patrick McHardy26932562007-01-31 23:16:40 -08002227 return -EMSGSIZE;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002228
2229 rtm = nlmsg_data(nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002230 rtm->rtm_family = AF_INET6;
2231 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2232 rtm->rtm_src_len = rt->rt6i_src.plen;
2233 rtm->rtm_tos = 0;
Thomas Grafc71099a2006-08-04 23:20:06 -07002234 if (rt->rt6i_table)
Patrick McHardy9e762a42006-08-10 23:09:48 -07002235 table = rt->rt6i_table->tb6_id;
Thomas Grafc71099a2006-08-04 23:20:06 -07002236 else
Patrick McHardy9e762a42006-08-10 23:09:48 -07002237 table = RT6_TABLE_UNSPEC;
2238 rtm->rtm_table = table;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002239 NLA_PUT_U32(skb, RTA_TABLE, table);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002240 if (rt->rt6i_flags&RTF_REJECT)
2241 rtm->rtm_type = RTN_UNREACHABLE;
Maciej Żenczykowskiab79ad12010-09-27 00:07:02 +00002242 else if (rt->rt6i_flags&RTF_LOCAL)
2243 rtm->rtm_type = RTN_LOCAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002244 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2245 rtm->rtm_type = RTN_LOCAL;
2246 else
2247 rtm->rtm_type = RTN_UNICAST;
2248 rtm->rtm_flags = 0;
2249 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2250 rtm->rtm_protocol = rt->rt6i_protocol;
2251 if (rt->rt6i_flags&RTF_DYNAMIC)
2252 rtm->rtm_protocol = RTPROT_REDIRECT;
2253 else if (rt->rt6i_flags & RTF_ADDRCONF)
2254 rtm->rtm_protocol = RTPROT_KERNEL;
2255 else if (rt->rt6i_flags&RTF_DEFAULT)
2256 rtm->rtm_protocol = RTPROT_RA;
2257
2258 if (rt->rt6i_flags&RTF_CACHE)
2259 rtm->rtm_flags |= RTM_F_CLONED;
2260
2261 if (dst) {
Thomas Graf2d7202b2006-08-22 00:01:27 -07002262 NLA_PUT(skb, RTA_DST, 16, dst);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002263 rtm->rtm_dst_len = 128;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002264 } else if (rtm->rtm_dst_len)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002265 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002266#ifdef CONFIG_IPV6_SUBTREES
2267 if (src) {
Thomas Graf2d7202b2006-08-22 00:01:27 -07002268 NLA_PUT(skb, RTA_SRC, 16, src);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002269 rtm->rtm_src_len = 128;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002270 } else if (rtm->rtm_src_len)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002271 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002272#endif
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002273 if (iif) {
2274#ifdef CONFIG_IPV6_MROUTE
2275 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
Benjamin Thery8229efd2008-12-10 16:30:15 -08002276 int err = ip6mr_get_route(net, skb, rtm, nowait);
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002277 if (err <= 0) {
2278 if (!nowait) {
2279 if (err == 0)
2280 return 0;
2281 goto nla_put_failure;
2282 } else {
2283 if (err == -EMSGSIZE)
2284 goto nla_put_failure;
2285 }
2286 }
2287 } else
2288#endif
2289 NLA_PUT_U32(skb, RTA_IIF, iif);
2290 } else if (dst) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002291 struct inet6_dev *idev = ip6_dst_idev(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002292 struct in6_addr saddr_buf;
Brian Haley191cd582008-08-14 15:33:21 -07002293 if (ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
YOSHIFUJI Hideaki7cbca672008-03-25 09:37:42 +09002294 dst, 0, &saddr_buf) == 0)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002295 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002296 }
Thomas Graf2d7202b2006-08-22 00:01:27 -07002297
Changli Gaod8d1f302010-06-10 23:31:35 -07002298 if (rtnetlink_put_metrics(skb, rt->dst.metrics) < 0)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002299 goto nla_put_failure;
2300
Changli Gaod8d1f302010-06-10 23:31:35 -07002301 if (rt->dst.neighbour)
2302 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->dst.neighbour->primary_key);
Thomas Graf2d7202b2006-08-22 00:01:27 -07002303
Changli Gaod8d1f302010-06-10 23:31:35 -07002304 if (rt->dst.dev)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002305 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2306
2307 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
Thomas Grafe3703b32006-11-27 09:27:07 -08002308
YOSHIFUJI Hideaki36e3dea2008-05-13 02:52:55 +09002309 if (!(rt->rt6i_flags & RTF_EXPIRES))
2310 expires = 0;
2311 else if (rt->rt6i_expires - jiffies < INT_MAX)
2312 expires = rt->rt6i_expires - jiffies;
2313 else
2314 expires = INT_MAX;
YOSHIFUJI Hideaki69cdf8f2008-05-19 16:55:13 -07002315
Changli Gaod8d1f302010-06-10 23:31:35 -07002316 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0,
2317 expires, rt->dst.error) < 0)
Thomas Grafe3703b32006-11-27 09:27:07 -08002318 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002319
Thomas Graf2d7202b2006-08-22 00:01:27 -07002320 return nlmsg_end(skb, nlh);
2321
2322nla_put_failure:
Patrick McHardy26932562007-01-31 23:16:40 -08002323 nlmsg_cancel(skb, nlh);
2324 return -EMSGSIZE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002325}
2326
Patrick McHardy1b43af52006-08-10 23:11:17 -07002327int rt6_dump_route(struct rt6_info *rt, void *p_arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002328{
2329 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2330 int prefix;
2331
Thomas Graf2d7202b2006-08-22 00:01:27 -07002332 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2333 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002334 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2335 } else
2336 prefix = 0;
2337
Brian Haley191cd582008-08-14 15:33:21 -07002338 return rt6_fill_node(arg->net,
2339 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002340 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002341 prefix, 0, NLM_F_MULTI);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002342}
2343
Thomas Grafc127ea22007-03-22 11:58:32 -07002344static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002345{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09002346 struct net *net = sock_net(in_skb->sk);
Thomas Grafab364a62006-08-22 00:01:47 -07002347 struct nlattr *tb[RTA_MAX+1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07002348 struct rt6_info *rt;
Thomas Grafab364a62006-08-22 00:01:47 -07002349 struct sk_buff *skb;
2350 struct rtmsg *rtm;
2351 struct flowi fl;
2352 int err, iif = 0;
2353
2354 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2355 if (err < 0)
2356 goto errout;
2357
2358 err = -EINVAL;
2359 memset(&fl, 0, sizeof(fl));
2360
2361 if (tb[RTA_SRC]) {
2362 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2363 goto errout;
2364
2365 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2366 }
2367
2368 if (tb[RTA_DST]) {
2369 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2370 goto errout;
2371
2372 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2373 }
2374
2375 if (tb[RTA_IIF])
2376 iif = nla_get_u32(tb[RTA_IIF]);
2377
2378 if (tb[RTA_OIF])
2379 fl.oif = nla_get_u32(tb[RTA_OIF]);
2380
2381 if (iif) {
2382 struct net_device *dev;
Daniel Lezcano55786892008-03-04 13:47:47 -08002383 dev = __dev_get_by_index(net, iif);
Thomas Grafab364a62006-08-22 00:01:47 -07002384 if (!dev) {
2385 err = -ENODEV;
2386 goto errout;
2387 }
2388 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002389
2390 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
Thomas Grafab364a62006-08-22 00:01:47 -07002391 if (skb == NULL) {
2392 err = -ENOBUFS;
2393 goto errout;
2394 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002395
2396 /* Reserve room for dummy headers, this skb can pass
2397 through good chunk of routing engine.
2398 */
Arnaldo Carvalho de Melo459a98e2007-03-19 15:30:44 -07002399 skb_reset_mac_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002400 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2401
Daniel Lezcano8a3edd82008-03-07 11:14:16 -08002402 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl);
Changli Gaod8d1f302010-06-10 23:31:35 -07002403 skb_dst_set(skb, &rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002404
Brian Haley191cd582008-08-14 15:33:21 -07002405 err = rt6_fill_node(net, skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002406 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002407 nlh->nlmsg_seq, 0, 0, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002408 if (err < 0) {
Thomas Grafab364a62006-08-22 00:01:47 -07002409 kfree_skb(skb);
2410 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002411 }
2412
Daniel Lezcano55786892008-03-04 13:47:47 -08002413 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
Thomas Grafab364a62006-08-22 00:01:47 -07002414errout:
Linus Torvalds1da177e2005-04-16 15:20:36 -07002415 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002416}
2417
Thomas Graf86872cb2006-08-22 00:01:08 -07002418void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002419{
2420 struct sk_buff *skb;
Daniel Lezcano55786892008-03-04 13:47:47 -08002421 struct net *net = info->nl_net;
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08002422 u32 seq;
2423 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002424
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08002425 err = -ENOBUFS;
2426 seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
Thomas Graf86872cb2006-08-22 00:01:08 -07002427
Thomas Graf339bf982006-11-10 14:10:15 -08002428 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
Thomas Graf21713eb2006-08-15 00:35:24 -07002429 if (skb == NULL)
2430 goto errout;
2431
Brian Haley191cd582008-08-14 15:33:21 -07002432 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002433 event, info->pid, seq, 0, 0, 0);
Patrick McHardy26932562007-01-31 23:16:40 -08002434 if (err < 0) {
2435 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2436 WARN_ON(err == -EMSGSIZE);
2437 kfree_skb(skb);
2438 goto errout;
2439 }
Pablo Neira Ayuso1ce85fe2009-02-24 23:18:28 -08002440 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2441 info->nlh, gfp_any());
2442 return;
Thomas Graf21713eb2006-08-15 00:35:24 -07002443errout:
2444 if (err < 0)
Daniel Lezcano55786892008-03-04 13:47:47 -08002445 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002446}
2447
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002448static int ip6_route_dev_notify(struct notifier_block *this,
2449 unsigned long event, void *data)
2450{
2451 struct net_device *dev = (struct net_device *)data;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09002452 struct net *net = dev_net(dev);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002453
2454 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002455 net->ipv6.ip6_null_entry->dst.dev = dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002456 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2457#ifdef CONFIG_IPV6_MULTIPLE_TABLES
Changli Gaod8d1f302010-06-10 23:31:35 -07002458 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002459 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
Changli Gaod8d1f302010-06-10 23:31:35 -07002460 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002461 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2462#endif
2463 }
2464
2465 return NOTIFY_OK;
2466}
2467
Linus Torvalds1da177e2005-04-16 15:20:36 -07002468/*
2469 * /proc
2470 */
2471
2472#ifdef CONFIG_PROC_FS
2473
Linus Torvalds1da177e2005-04-16 15:20:36 -07002474struct rt6_proc_arg
2475{
2476 char *buffer;
2477 int offset;
2478 int length;
2479 int skip;
2480 int len;
2481};
2482
2483static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2484{
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002485 struct seq_file *m = p_arg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002486
Harvey Harrison4b7a4272008-10-29 12:50:24 -07002487 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002488
2489#ifdef CONFIG_IPV6_SUBTREES
Harvey Harrison4b7a4272008-10-29 12:50:24 -07002490 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002491#else
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002492 seq_puts(m, "00000000000000000000000000000000 00 ");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002493#endif
2494
2495 if (rt->rt6i_nexthop) {
Harvey Harrison4b7a4272008-10-29 12:50:24 -07002496 seq_printf(m, "%pi6", rt->rt6i_nexthop->primary_key);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002497 } else {
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002498 seq_puts(m, "00000000000000000000000000000000");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002499 }
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002500 seq_printf(m, " %08x %08x %08x %08x %8s\n",
Changli Gaod8d1f302010-06-10 23:31:35 -07002501 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2502 rt->dst.__use, rt->rt6i_flags,
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002503 rt->rt6i_dev ? rt->rt6i_dev->name : "");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002504 return 0;
2505}
2506
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002507static int ipv6_route_show(struct seq_file *m, void *v)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002508{
Daniel Lezcanof3db4852008-03-03 23:27:06 -08002509 struct net *net = (struct net *)m->private;
2510 fib6_clean_all(net, rt6_info_route, 0, m);
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002511 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002512}
2513
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002514static int ipv6_route_open(struct inode *inode, struct file *file)
2515{
Pavel Emelyanovde05c552008-07-18 04:07:21 -07002516 return single_open_net(inode, file, ipv6_route_show);
Daniel Lezcanof3db4852008-03-03 23:27:06 -08002517}
2518
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002519static const struct file_operations ipv6_route_proc_fops = {
2520 .owner = THIS_MODULE,
2521 .open = ipv6_route_open,
2522 .read = seq_read,
2523 .llseek = seq_lseek,
Pavel Emelyanovb6fcbdb2008-07-18 04:07:44 -07002524 .release = single_release_net,
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002525};
2526
Linus Torvalds1da177e2005-04-16 15:20:36 -07002527static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2528{
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002529 struct net *net = (struct net *)seq->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002530 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002531 net->ipv6.rt6_stats->fib_nodes,
2532 net->ipv6.rt6_stats->fib_route_nodes,
2533 net->ipv6.rt6_stats->fib_rt_alloc,
2534 net->ipv6.rt6_stats->fib_rt_entries,
2535 net->ipv6.rt6_stats->fib_rt_cache,
Eric Dumazetfc66f952010-10-08 06:37:34 +00002536 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002537 net->ipv6.rt6_stats->fib_discarded_routes);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002538
2539 return 0;
2540}
2541
2542static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2543{
Pavel Emelyanovde05c552008-07-18 04:07:21 -07002544 return single_open_net(inode, file, rt6_stats_seq_show);
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002545}
2546
Arjan van de Ven9a321442007-02-12 00:55:35 -08002547static const struct file_operations rt6_stats_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002548 .owner = THIS_MODULE,
2549 .open = rt6_stats_seq_open,
2550 .read = seq_read,
2551 .llseek = seq_lseek,
Pavel Emelyanovb6fcbdb2008-07-18 04:07:44 -07002552 .release = single_release_net,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002553};
2554#endif /* CONFIG_PROC_FS */
2555
2556#ifdef CONFIG_SYSCTL
2557
Linus Torvalds1da177e2005-04-16 15:20:36 -07002558static
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07002559int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002560 void __user *buffer, size_t *lenp, loff_t *ppos)
2561{
Daniel Lezcano5b7c9312008-03-03 23:28:58 -08002562 struct net *net = current->nsproxy->net_ns;
2563 int delay = net->ipv6.sysctl.flush_delay;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002564 if (write) {
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07002565 proc_dointvec(ctl, write, buffer, lenp, ppos);
Daniel Lezcano5b7c9312008-03-03 23:28:58 -08002566 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002567 return 0;
2568 } else
2569 return -EINVAL;
2570}
2571
Daniel Lezcano760f2d02008-01-10 02:53:43 -08002572ctl_table ipv6_route_table_template[] = {
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002573 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002574 .procname = "flush",
Daniel Lezcano49905092008-01-10 03:01:01 -08002575 .data = &init_net.ipv6.sysctl.flush_delay,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002576 .maxlen = sizeof(int),
Dave Jones89c8b3a12005-04-28 12:11:49 -07002577 .mode = 0200,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002578 .proc_handler = ipv6_sysctl_rtcache_flush
Linus Torvalds1da177e2005-04-16 15:20:36 -07002579 },
2580 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002581 .procname = "gc_thresh",
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -08002582 .data = &ip6_dst_ops_template.gc_thresh,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002583 .maxlen = sizeof(int),
2584 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002585 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002586 },
2587 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002588 .procname = "max_size",
Daniel Lezcano49905092008-01-10 03:01:01 -08002589 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002590 .maxlen = sizeof(int),
2591 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002592 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002593 },
2594 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002595 .procname = "gc_min_interval",
Daniel Lezcano49905092008-01-10 03:01:01 -08002596 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002597 .maxlen = sizeof(int),
2598 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002599 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002600 },
2601 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002602 .procname = "gc_timeout",
Daniel Lezcano49905092008-01-10 03:01:01 -08002603 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002604 .maxlen = sizeof(int),
2605 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002606 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002607 },
2608 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002609 .procname = "gc_interval",
Daniel Lezcano49905092008-01-10 03:01:01 -08002610 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002611 .maxlen = sizeof(int),
2612 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002613 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002614 },
2615 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002616 .procname = "gc_elasticity",
Daniel Lezcano49905092008-01-10 03:01:01 -08002617 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002618 .maxlen = sizeof(int),
2619 .mode = 0644,
Min Zhangf3d3f612010-08-14 22:42:51 -07002620 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002621 },
2622 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002623 .procname = "mtu_expires",
Daniel Lezcano49905092008-01-10 03:01:01 -08002624 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002625 .maxlen = sizeof(int),
2626 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002627 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002628 },
2629 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002630 .procname = "min_adv_mss",
Daniel Lezcano49905092008-01-10 03:01:01 -08002631 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002632 .maxlen = sizeof(int),
2633 .mode = 0644,
Min Zhangf3d3f612010-08-14 22:42:51 -07002634 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002635 },
2636 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002637 .procname = "gc_min_interval_ms",
Daniel Lezcano49905092008-01-10 03:01:01 -08002638 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002639 .maxlen = sizeof(int),
2640 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002641 .proc_handler = proc_dointvec_ms_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002642 },
Eric W. Biedermanf8572d82009-11-05 13:32:03 -08002643 { }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002644};
2645
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00002646struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
Daniel Lezcano760f2d02008-01-10 02:53:43 -08002647{
2648 struct ctl_table *table;
2649
2650 table = kmemdup(ipv6_route_table_template,
2651 sizeof(ipv6_route_table_template),
2652 GFP_KERNEL);
YOSHIFUJI Hideaki5ee09102008-02-28 00:24:28 +09002653
2654 if (table) {
2655 table[0].data = &net->ipv6.sysctl.flush_delay;
Alexey Dobriyan86393e52009-08-29 01:34:49 +00002656 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
YOSHIFUJI Hideaki5ee09102008-02-28 00:24:28 +09002657 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2658 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2659 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2660 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2661 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2662 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2663 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
Alexey Dobriyan9c69fab2009-12-18 20:11:03 -08002664 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
YOSHIFUJI Hideaki5ee09102008-02-28 00:24:28 +09002665 }
2666
Daniel Lezcano760f2d02008-01-10 02:53:43 -08002667 return table;
2668}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002669#endif
2670
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00002671static int __net_init ip6_route_net_init(struct net *net)
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002672{
Pavel Emelyanov633d424b2008-04-21 14:25:23 -07002673 int ret = -ENOMEM;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002674
Alexey Dobriyan86393e52009-08-29 01:34:49 +00002675 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2676 sizeof(net->ipv6.ip6_dst_ops));
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002677
Eric Dumazetfc66f952010-10-08 06:37:34 +00002678 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2679 goto out_ip6_dst_ops;
2680
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002681 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2682 sizeof(*net->ipv6.ip6_null_entry),
2683 GFP_KERNEL);
2684 if (!net->ipv6.ip6_null_entry)
Eric Dumazetfc66f952010-10-08 06:37:34 +00002685 goto out_ip6_dst_entries;
Changli Gaod8d1f302010-06-10 23:31:35 -07002686 net->ipv6.ip6_null_entry->dst.path =
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002687 (struct dst_entry *)net->ipv6.ip6_null_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07002688 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002689
2690#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2691 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2692 sizeof(*net->ipv6.ip6_prohibit_entry),
2693 GFP_KERNEL);
Peter Zijlstra68fffc62008-10-07 14:12:10 -07002694 if (!net->ipv6.ip6_prohibit_entry)
2695 goto out_ip6_null_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07002696 net->ipv6.ip6_prohibit_entry->dst.path =
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002697 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07002698 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002699
2700 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2701 sizeof(*net->ipv6.ip6_blk_hole_entry),
2702 GFP_KERNEL);
Peter Zijlstra68fffc62008-10-07 14:12:10 -07002703 if (!net->ipv6.ip6_blk_hole_entry)
2704 goto out_ip6_prohibit_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07002705 net->ipv6.ip6_blk_hole_entry->dst.path =
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002706 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07002707 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002708#endif
2709
Peter Zijlstrab339a47c2008-10-07 14:15:00 -07002710 net->ipv6.sysctl.flush_delay = 0;
2711 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2712 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2713 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2714 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2715 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2716 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2717 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2718
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002719#ifdef CONFIG_PROC_FS
2720 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2721 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2722#endif
Benjamin Thery6891a342008-03-04 13:49:47 -08002723 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2724
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002725 ret = 0;
2726out:
2727 return ret;
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002728
Peter Zijlstra68fffc62008-10-07 14:12:10 -07002729#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2730out_ip6_prohibit_entry:
2731 kfree(net->ipv6.ip6_prohibit_entry);
2732out_ip6_null_entry:
2733 kfree(net->ipv6.ip6_null_entry);
2734#endif
Eric Dumazetfc66f952010-10-08 06:37:34 +00002735out_ip6_dst_entries:
2736 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002737out_ip6_dst_ops:
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002738 goto out;
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002739}
2740
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00002741static void __net_exit ip6_route_net_exit(struct net *net)
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002742{
2743#ifdef CONFIG_PROC_FS
2744 proc_net_remove(net, "ipv6_route");
2745 proc_net_remove(net, "rt6_stats");
2746#endif
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002747 kfree(net->ipv6.ip6_null_entry);
2748#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2749 kfree(net->ipv6.ip6_prohibit_entry);
2750 kfree(net->ipv6.ip6_blk_hole_entry);
2751#endif
Xiaotian Feng41bb78b2010-11-02 16:11:05 +00002752 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002753}
2754
2755static struct pernet_operations ip6_route_net_ops = {
2756 .init = ip6_route_net_init,
2757 .exit = ip6_route_net_exit,
2758};
2759
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002760static struct notifier_block ip6_route_dev_notifier = {
2761 .notifier_call = ip6_route_dev_notify,
2762 .priority = 0,
2763};
2764
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002765int __init ip6_route_init(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002766{
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002767 int ret;
2768
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -08002769 ret = -ENOMEM;
2770 ip6_dst_ops_template.kmem_cachep =
2771 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2772 SLAB_HWCACHE_ALIGN, NULL);
2773 if (!ip6_dst_ops_template.kmem_cachep)
Fernando Carrijoc19a28e2009-01-07 18:09:08 -08002774 goto out;
David S. Miller14e50e52007-05-24 18:17:54 -07002775
Eric Dumazetfc66f952010-10-08 06:37:34 +00002776 ret = dst_entries_init(&ip6_dst_blackhole_ops);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002777 if (ret)
Daniel Lezcanobdb32892008-03-04 13:48:10 -08002778 goto out_kmem_cache;
Daniel Lezcanobdb32892008-03-04 13:48:10 -08002779
Eric Dumazetfc66f952010-10-08 06:37:34 +00002780 ret = register_pernet_subsys(&ip6_route_net_ops);
2781 if (ret)
2782 goto out_dst_entries;
2783
Arnaud Ebalard5dc121e2008-10-01 02:37:56 -07002784 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2785
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002786 /* Registering of the loopback is done before this portion of code,
2787 * the loopback reference in rt6_info will not be taken, do it
2788 * manually for init_net */
Changli Gaod8d1f302010-06-10 23:31:35 -07002789 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002790 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2791 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
Changli Gaod8d1f302010-06-10 23:31:35 -07002792 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002793 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
Changli Gaod8d1f302010-06-10 23:31:35 -07002794 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002795 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2796 #endif
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002797 ret = fib6_init();
2798 if (ret)
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002799 goto out_register_subsys;
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002800
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002801 ret = xfrm6_init();
2802 if (ret)
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002803 goto out_fib6_init;
Daniel Lezcanoc35b7e72007-12-08 00:14:11 -08002804
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002805 ret = fib6_rules_init();
2806 if (ret)
2807 goto xfrm6_init;
Daniel Lezcano7e5449c2007-12-08 00:14:54 -08002808
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002809 ret = -ENOBUFS;
2810 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) ||
2811 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) ||
2812 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL))
2813 goto fib6_rules_init;
2814
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002815 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002816 if (ret)
2817 goto fib6_rules_init;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002818
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002819out:
2820 return ret;
2821
2822fib6_rules_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002823 fib6_rules_cleanup();
2824xfrm6_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002825 xfrm6_fini();
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002826out_fib6_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002827 fib6_gc_cleanup();
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002828out_register_subsys:
2829 unregister_pernet_subsys(&ip6_route_net_ops);
Eric Dumazetfc66f952010-10-08 06:37:34 +00002830out_dst_entries:
2831 dst_entries_destroy(&ip6_dst_blackhole_ops);
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002832out_kmem_cache:
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002833 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002834 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002835}
2836
2837void ip6_route_cleanup(void)
2838{
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002839 unregister_netdevice_notifier(&ip6_route_dev_notifier);
Thomas Graf101367c2006-08-04 03:39:02 -07002840 fib6_rules_cleanup();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002841 xfrm6_fini();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002842 fib6_gc_cleanup();
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002843 unregister_pernet_subsys(&ip6_route_net_ops);
Xiaotian Feng41bb78b2010-11-02 16:11:05 +00002844 dst_entries_destroy(&ip6_dst_blackhole_ops);
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002845 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002846}