blob: 496b62712fe8c9c42f15e59d86cfd94bc936e59b [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09006 * Pedro Roque <roque@di.fc.ul.pt>
Linus Torvalds1da177e2005-04-16 15:20:36 -07007 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07008 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
YOSHIFUJI Hideakic0bece92006-08-23 17:23:25 -070023 * Ville Nuorvala
24 * Fixed routing subtrees.
Linus Torvalds1da177e2005-04-16 15:20:36 -070025 */
26
Randy Dunlap4fc268d2006-01-11 12:17:47 -080027#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070028#include <linux/errno.h>
Paul Gortmakerbc3b2d72011-07-15 11:47:34 -040029#include <linux/export.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070030#include <linux/types.h>
31#include <linux/times.h>
32#include <linux/socket.h>
33#include <linux/sockios.h>
34#include <linux/net.h>
35#include <linux/route.h>
36#include <linux/netdevice.h>
37#include <linux/in6.h>
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +090038#include <linux/mroute6.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070039#include <linux/init.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070040#include <linux/if_arp.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070041#include <linux/proc_fs.h>
42#include <linux/seq_file.h>
Daniel Lezcano5b7c9312008-03-03 23:28:58 -080043#include <linux/nsproxy.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090044#include <linux/slab.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020045#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070046#include <net/snmp.h>
47#include <net/ipv6.h>
48#include <net/ip6_fib.h>
49#include <net/ip6_route.h>
50#include <net/ndisc.h>
51#include <net/addrconf.h>
52#include <net/tcp.h>
53#include <linux/rtnetlink.h>
54#include <net/dst.h>
55#include <net/xfrm.h>
Tom Tucker8d717402006-07-30 20:43:36 -070056#include <net/netevent.h>
Thomas Graf21713eb2006-08-15 00:35:24 -070057#include <net/netlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070058
59#include <asm/uaccess.h>
60
61#ifdef CONFIG_SYSCTL
62#include <linux/sysctl.h>
63#endif
64
Eric Dumazet21efcfa2011-07-19 20:18:36 +000065static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
66 const struct in6_addr *dest);
Linus Torvalds1da177e2005-04-16 15:20:36 -070067static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
David S. Miller0dbaee32010-12-13 12:52:14 -080068static unsigned int ip6_default_advmss(const struct dst_entry *dst);
Steffen Klassertebb762f2011-11-23 02:12:51 +000069static unsigned int ip6_mtu(const struct dst_entry *dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -070070static struct dst_entry *ip6_negative_advice(struct dst_entry *);
71static void ip6_dst_destroy(struct dst_entry *);
72static void ip6_dst_ifdown(struct dst_entry *,
73 struct net_device *dev, int how);
Daniel Lezcano569d3642008-01-18 03:56:57 -080074static int ip6_dst_gc(struct dst_ops *ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -070075
76static int ip6_pkt_discard(struct sk_buff *skb);
77static int ip6_pkt_discard_out(struct sk_buff *skb);
78static void ip6_link_failure(struct sk_buff *skb);
79static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
80
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -080081#ifdef CONFIG_IPV6_ROUTE_INFO
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -080082static struct rt6_info *rt6_add_route_info(struct net *net,
Eric Dumazetb71d1d42011-04-22 04:53:02 +000083 const struct in6_addr *prefix, int prefixlen,
84 const struct in6_addr *gwaddr, int ifindex,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -080085 unsigned pref);
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -080086static struct rt6_info *rt6_get_route_info(struct net *net,
Eric Dumazetb71d1d42011-04-22 04:53:02 +000087 const struct in6_addr *prefix, int prefixlen,
88 const struct in6_addr *gwaddr, int ifindex);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -080089#endif
90
David S. Miller06582542011-01-27 14:58:42 -080091static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
92{
93 struct rt6_info *rt = (struct rt6_info *) dst;
94 struct inet_peer *peer;
95 u32 *p = NULL;
96
Yan, Zheng8e2ec632011-09-05 21:34:30 +000097 if (!(rt->dst.flags & DST_HOST))
98 return NULL;
99
David S. Miller06582542011-01-27 14:58:42 -0800100 if (!rt->rt6i_peer)
101 rt6_bind_peer(rt, 1);
102
103 peer = rt->rt6i_peer;
104 if (peer) {
105 u32 *old_p = __DST_METRICS_PTR(old);
106 unsigned long prev, new;
107
108 p = peer->metrics;
109 if (inet_metrics_new(peer))
110 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
111
112 new = (unsigned long) p;
113 prev = cmpxchg(&dst->_metrics, old, new);
114
115 if (prev != old) {
116 p = __DST_METRICS_PTR(prev);
117 if (prev & DST_METRICS_READ_ONLY)
118 p = NULL;
119 }
120 }
121 return p;
122}
123
David S. Miller39232972012-01-26 15:22:32 -0500124static inline const void *choose_neigh_daddr(struct rt6_info *rt, const void *daddr)
125{
126 struct in6_addr *p = &rt->rt6i_gateway;
127
David S. Millera7563f32012-01-26 16:29:16 -0500128 if (!ipv6_addr_any(p))
David S. Miller39232972012-01-26 15:22:32 -0500129 return (const void *) p;
130 return daddr;
131}
132
David S. Millerd3aaeb32011-07-18 00:40:17 -0700133static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, const void *daddr)
134{
David S. Miller39232972012-01-26 15:22:32 -0500135 struct rt6_info *rt = (struct rt6_info *) dst;
136 struct neighbour *n;
137
138 daddr = choose_neigh_daddr(rt, daddr);
139 n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr);
David S. Millerf83c7792011-12-28 15:41:23 -0500140 if (n)
141 return n;
142 return neigh_create(&nd_tbl, daddr, dst->dev);
143}
144
David S. Miller8ade06c2011-12-29 18:51:57 -0500145static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev)
David S. Millerf83c7792011-12-28 15:41:23 -0500146{
David S. Miller8ade06c2011-12-29 18:51:57 -0500147 struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dev, &rt->rt6i_gateway);
148 if (!n) {
149 n = neigh_create(&nd_tbl, &rt->rt6i_gateway, dev);
150 if (IS_ERR(n))
151 return PTR_ERR(n);
152 }
David S. Millerf83c7792011-12-28 15:41:23 -0500153 dst_set_neighbour(&rt->dst, n);
154
155 return 0;
David S. Millerd3aaeb32011-07-18 00:40:17 -0700156}
157
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -0800158static struct dst_ops ip6_dst_ops_template = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700159 .family = AF_INET6,
Harvey Harrison09640e62009-02-01 00:45:17 -0800160 .protocol = cpu_to_be16(ETH_P_IPV6),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700161 .gc = ip6_dst_gc,
162 .gc_thresh = 1024,
163 .check = ip6_dst_check,
David S. Miller0dbaee32010-12-13 12:52:14 -0800164 .default_advmss = ip6_default_advmss,
Steffen Klassertebb762f2011-11-23 02:12:51 +0000165 .mtu = ip6_mtu,
David S. Miller06582542011-01-27 14:58:42 -0800166 .cow_metrics = ipv6_cow_metrics,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700167 .destroy = ip6_dst_destroy,
168 .ifdown = ip6_dst_ifdown,
169 .negative_advice = ip6_negative_advice,
170 .link_failure = ip6_link_failure,
171 .update_pmtu = ip6_rt_update_pmtu,
Herbert Xu1ac06e02008-05-20 14:32:14 -0700172 .local_out = __ip6_local_out,
David S. Millerd3aaeb32011-07-18 00:40:17 -0700173 .neigh_lookup = ip6_neigh_lookup,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700174};
175
Steffen Klassertebb762f2011-11-23 02:12:51 +0000176static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
Roland Dreierec831ea2011-01-31 13:16:00 -0800177{
Steffen Klassert618f9bc2011-11-23 02:13:31 +0000178 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
179
180 return mtu ? : dst->dev->mtu;
Roland Dreierec831ea2011-01-31 13:16:00 -0800181}
182
David S. Miller14e50e52007-05-24 18:17:54 -0700183static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
184{
185}
186
Held Bernhard0972ddb2011-04-24 22:07:32 +0000187static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
188 unsigned long old)
189{
190 return NULL;
191}
192
David S. Miller14e50e52007-05-24 18:17:54 -0700193static struct dst_ops ip6_dst_blackhole_ops = {
194 .family = AF_INET6,
Harvey Harrison09640e62009-02-01 00:45:17 -0800195 .protocol = cpu_to_be16(ETH_P_IPV6),
David S. Miller14e50e52007-05-24 18:17:54 -0700196 .destroy = ip6_dst_destroy,
197 .check = ip6_dst_check,
Steffen Klassertebb762f2011-11-23 02:12:51 +0000198 .mtu = ip6_blackhole_mtu,
Eric Dumazet214f45c2011-02-18 11:39:01 -0800199 .default_advmss = ip6_default_advmss,
David S. Miller14e50e52007-05-24 18:17:54 -0700200 .update_pmtu = ip6_rt_blackhole_update_pmtu,
Held Bernhard0972ddb2011-04-24 22:07:32 +0000201 .cow_metrics = ip6_rt_blackhole_cow_metrics,
David S. Millerd3aaeb32011-07-18 00:40:17 -0700202 .neigh_lookup = ip6_neigh_lookup,
David S. Miller14e50e52007-05-24 18:17:54 -0700203};
204
David S. Miller62fa8a82011-01-26 20:51:05 -0800205static const u32 ip6_template_metrics[RTAX_MAX] = {
206 [RTAX_HOPLIMIT - 1] = 255,
207};
208
Daniel Lezcanobdb32892008-03-04 13:48:10 -0800209static struct rt6_info ip6_null_entry_template = {
Changli Gaod8d1f302010-06-10 23:31:35 -0700210 .dst = {
211 .__refcnt = ATOMIC_INIT(1),
212 .__use = 1,
213 .obsolete = -1,
214 .error = -ENETUNREACH,
Changli Gaod8d1f302010-06-10 23:31:35 -0700215 .input = ip6_pkt_discard,
216 .output = ip6_pkt_discard_out,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700217 },
218 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
Jean-Mickael Guerin4f724272009-05-20 17:38:59 -0700219 .rt6i_protocol = RTPROT_KERNEL,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700220 .rt6i_metric = ~(u32) 0,
221 .rt6i_ref = ATOMIC_INIT(1),
222};
223
Thomas Graf101367c2006-08-04 03:39:02 -0700224#ifdef CONFIG_IPV6_MULTIPLE_TABLES
225
David S. Miller6723ab52006-10-18 21:20:57 -0700226static int ip6_pkt_prohibit(struct sk_buff *skb);
227static int ip6_pkt_prohibit_out(struct sk_buff *skb);
David S. Miller6723ab52006-10-18 21:20:57 -0700228
Adrian Bunk280a34c2008-04-21 02:29:32 -0700229static struct rt6_info ip6_prohibit_entry_template = {
Changli Gaod8d1f302010-06-10 23:31:35 -0700230 .dst = {
231 .__refcnt = ATOMIC_INIT(1),
232 .__use = 1,
233 .obsolete = -1,
234 .error = -EACCES,
Changli Gaod8d1f302010-06-10 23:31:35 -0700235 .input = ip6_pkt_prohibit,
236 .output = ip6_pkt_prohibit_out,
Thomas Graf101367c2006-08-04 03:39:02 -0700237 },
238 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
Jean-Mickael Guerin4f724272009-05-20 17:38:59 -0700239 .rt6i_protocol = RTPROT_KERNEL,
Thomas Graf101367c2006-08-04 03:39:02 -0700240 .rt6i_metric = ~(u32) 0,
241 .rt6i_ref = ATOMIC_INIT(1),
242};
243
Daniel Lezcanobdb32892008-03-04 13:48:10 -0800244static struct rt6_info ip6_blk_hole_entry_template = {
Changli Gaod8d1f302010-06-10 23:31:35 -0700245 .dst = {
246 .__refcnt = ATOMIC_INIT(1),
247 .__use = 1,
248 .obsolete = -1,
249 .error = -EINVAL,
Changli Gaod8d1f302010-06-10 23:31:35 -0700250 .input = dst_discard,
251 .output = dst_discard,
Thomas Graf101367c2006-08-04 03:39:02 -0700252 },
253 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
Jean-Mickael Guerin4f724272009-05-20 17:38:59 -0700254 .rt6i_protocol = RTPROT_KERNEL,
Thomas Graf101367c2006-08-04 03:39:02 -0700255 .rt6i_metric = ~(u32) 0,
256 .rt6i_ref = ATOMIC_INIT(1),
257};
258
259#endif
260
Linus Torvalds1da177e2005-04-16 15:20:36 -0700261/* allocate dst with ip6_dst_ops */
David S. Miller5c1e6aa2011-04-28 14:13:38 -0700262static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops,
David S. Miller957c6652011-06-24 15:25:00 -0700263 struct net_device *dev,
264 int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700265{
David S. Miller957c6652011-06-24 15:25:00 -0700266 struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, flags);
David S. Millercf911662011-04-28 14:31:47 -0700267
David S. Miller38308472011-12-03 18:02:47 -0500268 if (rt)
Madalin Bucurfbe58182011-09-26 07:04:56 +0000269 memset(&rt->rt6i_table, 0,
David S. Miller38308472011-12-03 18:02:47 -0500270 sizeof(*rt) - sizeof(struct dst_entry));
David S. Millercf911662011-04-28 14:31:47 -0700271
272 return rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700273}
274
275static void ip6_dst_destroy(struct dst_entry *dst)
276{
277 struct rt6_info *rt = (struct rt6_info *)dst;
278 struct inet6_dev *idev = rt->rt6i_idev;
David S. Millerb3419362010-11-30 12:27:11 -0800279 struct inet_peer *peer = rt->rt6i_peer;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700280
Yan, Zheng8e2ec632011-09-05 21:34:30 +0000281 if (!(rt->dst.flags & DST_HOST))
282 dst_destroy_metrics_generic(dst);
283
David S. Miller38308472011-12-03 18:02:47 -0500284 if (idev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700285 rt->rt6i_idev = NULL;
286 in6_dev_put(idev);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900287 }
David S. Millerb3419362010-11-30 12:27:11 -0800288 if (peer) {
David S. Millerb3419362010-11-30 12:27:11 -0800289 rt->rt6i_peer = NULL;
290 inet_putpeer(peer);
291 }
292}
293
David S. Miller6431cbc2011-02-07 20:38:06 -0800294static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
295
296static u32 rt6_peer_genid(void)
297{
298 return atomic_read(&__rt6_peer_genid);
299}
300
David S. Millerb3419362010-11-30 12:27:11 -0800301void rt6_bind_peer(struct rt6_info *rt, int create)
302{
303 struct inet_peer *peer;
304
David S. Millerb3419362010-11-30 12:27:11 -0800305 peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
306 if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
307 inet_putpeer(peer);
David S. Miller6431cbc2011-02-07 20:38:06 -0800308 else
309 rt->rt6i_peer_genid = rt6_peer_genid();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700310}
311
312static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
313 int how)
314{
315 struct rt6_info *rt = (struct rt6_info *)dst;
316 struct inet6_dev *idev = rt->rt6i_idev;
Denis V. Lunev5a3e55d2007-12-07 00:38:10 -0800317 struct net_device *loopback_dev =
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900318 dev_net(dev)->loopback_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700319
David S. Miller38308472011-12-03 18:02:47 -0500320 if (dev != loopback_dev && idev && idev->dev == dev) {
Denis V. Lunev5a3e55d2007-12-07 00:38:10 -0800321 struct inet6_dev *loopback_idev =
322 in6_dev_get(loopback_dev);
David S. Miller38308472011-12-03 18:02:47 -0500323 if (loopback_idev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700324 rt->rt6i_idev = loopback_idev;
325 in6_dev_put(idev);
326 }
327 }
328}
329
330static __inline__ int rt6_check_expired(const struct rt6_info *rt)
331{
Eric Dumazeta02cec22010-09-22 20:43:57 +0000332 return (rt->rt6i_flags & RTF_EXPIRES) &&
David S. Millerd1918542011-12-28 20:19:20 -0500333 time_after(jiffies, rt->dst.expires);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700334}
335
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000336static inline int rt6_need_strict(const struct in6_addr *daddr)
Thomas Grafc71099a2006-08-04 23:20:06 -0700337{
Eric Dumazeta02cec22010-09-22 20:43:57 +0000338 return ipv6_addr_type(daddr) &
339 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
Thomas Grafc71099a2006-08-04 23:20:06 -0700340}
341
Linus Torvalds1da177e2005-04-16 15:20:36 -0700342/*
Thomas Grafc71099a2006-08-04 23:20:06 -0700343 * Route lookup. Any table->tb6_lock is implied.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700344 */
345
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800346static inline struct rt6_info *rt6_device_match(struct net *net,
347 struct rt6_info *rt,
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000348 const struct in6_addr *saddr,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700349 int oif,
YOSHIFUJI Hideakid4208952008-06-27 20:14:54 -0700350 int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700351{
352 struct rt6_info *local = NULL;
353 struct rt6_info *sprt;
354
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900355 if (!oif && ipv6_addr_any(saddr))
356 goto out;
357
Changli Gaod8d1f302010-06-10 23:31:35 -0700358 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
David S. Millerd1918542011-12-28 20:19:20 -0500359 struct net_device *dev = sprt->dst.dev;
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900360
361 if (oif) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700362 if (dev->ifindex == oif)
363 return sprt;
364 if (dev->flags & IFF_LOOPBACK) {
David S. Miller38308472011-12-03 18:02:47 -0500365 if (!sprt->rt6i_idev ||
Linus Torvalds1da177e2005-04-16 15:20:36 -0700366 sprt->rt6i_idev->dev->ifindex != oif) {
YOSHIFUJI Hideakid4208952008-06-27 20:14:54 -0700367 if (flags & RT6_LOOKUP_F_IFACE && oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700368 continue;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900369 if (local && (!oif ||
Linus Torvalds1da177e2005-04-16 15:20:36 -0700370 local->rt6i_idev->dev->ifindex == oif))
371 continue;
372 }
373 local = sprt;
374 }
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900375 } else {
376 if (ipv6_chk_addr(net, saddr, dev,
377 flags & RT6_LOOKUP_F_IFACE))
378 return sprt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700379 }
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900380 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700381
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900382 if (oif) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700383 if (local)
384 return local;
385
YOSHIFUJI Hideakid4208952008-06-27 20:14:54 -0700386 if (flags & RT6_LOOKUP_F_IFACE)
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800387 return net->ipv6.ip6_null_entry;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700388 }
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900389out:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700390 return rt;
391}
392
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800393#ifdef CONFIG_IPV6_ROUTER_PREF
394static void rt6_probe(struct rt6_info *rt)
395{
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000396 struct neighbour *neigh;
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800397 /*
398 * Okay, this does not seem to be appropriate
399 * for now, however, we need to check if it
400 * is really so; aka Router Reachability Probing.
401 *
402 * Router Reachability Probe MUST be rate-limited
403 * to no more than one per minute.
404 */
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000405 rcu_read_lock();
David Miller27217452011-12-02 16:52:08 +0000406 neigh = rt ? dst_get_neighbour_noref(&rt->dst) : NULL;
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800407 if (!neigh || (neigh->nud_state & NUD_VALID))
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000408 goto out;
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800409 read_lock_bh(&neigh->lock);
410 if (!(neigh->nud_state & NUD_VALID) &&
YOSHIFUJI Hideaki52e16352006-03-20 17:05:47 -0800411 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800412 struct in6_addr mcaddr;
413 struct in6_addr *target;
414
415 neigh->updated = jiffies;
416 read_unlock_bh(&neigh->lock);
417
418 target = (struct in6_addr *)&neigh->primary_key;
419 addrconf_addr_solict_mult(target, &mcaddr);
David S. Millerd1918542011-12-28 20:19:20 -0500420 ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000421 } else {
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800422 read_unlock_bh(&neigh->lock);
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000423 }
424out:
425 rcu_read_unlock();
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800426}
427#else
428static inline void rt6_probe(struct rt6_info *rt)
429{
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800430}
431#endif
432
Linus Torvalds1da177e2005-04-16 15:20:36 -0700433/*
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800434 * Default Router Selection (RFC 2461 6.3.6)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700435 */
Dave Jonesb6f99a22007-03-22 12:27:49 -0700436static inline int rt6_check_dev(struct rt6_info *rt, int oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700437{
David S. Millerd1918542011-12-28 20:19:20 -0500438 struct net_device *dev = rt->dst.dev;
David S. Miller161980f2007-04-06 11:42:27 -0700439 if (!oif || dev->ifindex == oif)
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800440 return 2;
David S. Miller161980f2007-04-06 11:42:27 -0700441 if ((dev->flags & IFF_LOOPBACK) &&
442 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
443 return 1;
444 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700445}
446
Dave Jonesb6f99a22007-03-22 12:27:49 -0700447static inline int rt6_check_neigh(struct rt6_info *rt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700448{
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000449 struct neighbour *neigh;
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800450 int m;
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000451
452 rcu_read_lock();
David Miller27217452011-12-02 16:52:08 +0000453 neigh = dst_get_neighbour_noref(&rt->dst);
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700454 if (rt->rt6i_flags & RTF_NONEXTHOP ||
455 !(rt->rt6i_flags & RTF_GATEWAY))
456 m = 1;
457 else if (neigh) {
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800458 read_lock_bh(&neigh->lock);
459 if (neigh->nud_state & NUD_VALID)
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700460 m = 2;
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800461#ifdef CONFIG_IPV6_ROUTER_PREF
462 else if (neigh->nud_state & NUD_FAILED)
463 m = 0;
464#endif
465 else
YOSHIFUJI Hideakiea73ee22006-11-06 09:45:44 -0800466 m = 1;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800467 read_unlock_bh(&neigh->lock);
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800468 } else
469 m = 0;
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000470 rcu_read_unlock();
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800471 return m;
472}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700473
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800474static int rt6_score_route(struct rt6_info *rt, int oif,
475 int strict)
476{
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700477 int m, n;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900478
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700479 m = rt6_check_dev(rt, oif);
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700480 if (!m && (strict & RT6_LOOKUP_F_IFACE))
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800481 return -1;
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -0800482#ifdef CONFIG_IPV6_ROUTER_PREF
483 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
484#endif
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700485 n = rt6_check_neigh(rt);
YOSHIFUJI Hideaki557e92e2006-11-06 09:45:45 -0800486 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800487 return -1;
488 return m;
489}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700490
David S. Millerf11e6652007-03-24 20:36:25 -0700491static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
492 int *mpri, struct rt6_info *match)
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800493{
David S. Millerf11e6652007-03-24 20:36:25 -0700494 int m;
495
496 if (rt6_check_expired(rt))
497 goto out;
498
499 m = rt6_score_route(rt, oif, strict);
500 if (m < 0)
501 goto out;
502
503 if (m > *mpri) {
504 if (strict & RT6_LOOKUP_F_REACHABLE)
505 rt6_probe(match);
506 *mpri = m;
507 match = rt;
508 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
509 rt6_probe(rt);
510 }
511
512out:
513 return match;
514}
515
516static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
517 struct rt6_info *rr_head,
518 u32 metric, int oif, int strict)
519{
520 struct rt6_info *rt, *match;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800521 int mpri = -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700522
David S. Millerf11e6652007-03-24 20:36:25 -0700523 match = NULL;
524 for (rt = rr_head; rt && rt->rt6i_metric == metric;
Changli Gaod8d1f302010-06-10 23:31:35 -0700525 rt = rt->dst.rt6_next)
David S. Millerf11e6652007-03-24 20:36:25 -0700526 match = find_match(rt, oif, strict, &mpri, match);
527 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
Changli Gaod8d1f302010-06-10 23:31:35 -0700528 rt = rt->dst.rt6_next)
David S. Millerf11e6652007-03-24 20:36:25 -0700529 match = find_match(rt, oif, strict, &mpri, match);
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800530
David S. Millerf11e6652007-03-24 20:36:25 -0700531 return match;
532}
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800533
David S. Millerf11e6652007-03-24 20:36:25 -0700534static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
535{
536 struct rt6_info *match, *rt0;
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800537 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700538
David S. Millerf11e6652007-03-24 20:36:25 -0700539 rt0 = fn->rr_ptr;
540 if (!rt0)
541 fn->rr_ptr = rt0 = fn->leaf;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700542
David S. Millerf11e6652007-03-24 20:36:25 -0700543 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700544
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800545 if (!match &&
David S. Millerf11e6652007-03-24 20:36:25 -0700546 (strict & RT6_LOOKUP_F_REACHABLE)) {
Changli Gaod8d1f302010-06-10 23:31:35 -0700547 struct rt6_info *next = rt0->dst.rt6_next;
David S. Millerf11e6652007-03-24 20:36:25 -0700548
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800549 /* no entries matched; do round-robin */
David S. Millerf11e6652007-03-24 20:36:25 -0700550 if (!next || next->rt6i_metric != rt0->rt6i_metric)
551 next = fn->leaf;
552
553 if (next != rt0)
554 fn->rr_ptr = next;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700555 }
556
David S. Millerd1918542011-12-28 20:19:20 -0500557 net = dev_net(rt0->dst.dev);
Eric Dumazeta02cec22010-09-22 20:43:57 +0000558 return match ? match : net->ipv6.ip6_null_entry;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700559}
560
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800561#ifdef CONFIG_IPV6_ROUTE_INFO
562int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000563 const struct in6_addr *gwaddr)
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800564{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900565 struct net *net = dev_net(dev);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800566 struct route_info *rinfo = (struct route_info *) opt;
567 struct in6_addr prefix_buf, *prefix;
568 unsigned int pref;
YOSHIFUJI Hideaki4bed72e2008-05-27 17:37:49 +0900569 unsigned long lifetime;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800570 struct rt6_info *rt;
571
572 if (len < sizeof(struct route_info)) {
573 return -EINVAL;
574 }
575
576 /* Sanity check for prefix_len and length */
577 if (rinfo->length > 3) {
578 return -EINVAL;
579 } else if (rinfo->prefix_len > 128) {
580 return -EINVAL;
581 } else if (rinfo->prefix_len > 64) {
582 if (rinfo->length < 2) {
583 return -EINVAL;
584 }
585 } else if (rinfo->prefix_len > 0) {
586 if (rinfo->length < 1) {
587 return -EINVAL;
588 }
589 }
590
591 pref = rinfo->route_pref;
592 if (pref == ICMPV6_ROUTER_PREF_INVALID)
Jens Rosenboom3933fc92009-09-10 06:25:11 +0000593 return -EINVAL;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800594
YOSHIFUJI Hideaki4bed72e2008-05-27 17:37:49 +0900595 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800596
597 if (rinfo->length == 3)
598 prefix = (struct in6_addr *)rinfo->prefix;
599 else {
600 /* this function is safe */
601 ipv6_addr_prefix(&prefix_buf,
602 (struct in6_addr *)rinfo->prefix,
603 rinfo->prefix_len);
604 prefix = &prefix_buf;
605 }
606
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -0800607 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
608 dev->ifindex);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800609
610 if (rt && !lifetime) {
Thomas Grafe0a1ad732006-08-22 00:00:21 -0700611 ip6_del_rt(rt);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800612 rt = NULL;
613 }
614
615 if (!rt && lifetime)
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -0800616 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800617 pref);
618 else if (rt)
619 rt->rt6i_flags = RTF_ROUTEINFO |
620 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
621
622 if (rt) {
YOSHIFUJI Hideaki4bed72e2008-05-27 17:37:49 +0900623 if (!addrconf_finite_timeout(lifetime)) {
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800624 rt->rt6i_flags &= ~RTF_EXPIRES;
625 } else {
David S. Millerd1918542011-12-28 20:19:20 -0500626 rt->dst.expires = jiffies + HZ * lifetime;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800627 rt->rt6i_flags |= RTF_EXPIRES;
628 }
Changli Gaod8d1f302010-06-10 23:31:35 -0700629 dst_release(&rt->dst);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800630 }
631 return 0;
632}
633#endif
634
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800635#define BACKTRACK(__net, saddr) \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700636do { \
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800637 if (rt == __net->ipv6.ip6_null_entry) { \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700638 struct fib6_node *pn; \
Ville Nuorvalae0eda7b2006-10-16 22:11:11 -0700639 while (1) { \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700640 if (fn->fn_flags & RTN_TL_ROOT) \
641 goto out; \
642 pn = fn->parent; \
643 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
Kim Nordlund8bce65b2006-12-13 16:38:29 -0800644 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700645 else \
646 fn = pn; \
647 if (fn->fn_flags & RTN_RTINFO) \
648 goto restart; \
Thomas Grafc71099a2006-08-04 23:20:06 -0700649 } \
Thomas Grafc71099a2006-08-04 23:20:06 -0700650 } \
David S. Miller38308472011-12-03 18:02:47 -0500651} while (0)
Thomas Grafc71099a2006-08-04 23:20:06 -0700652
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800653static struct rt6_info *ip6_pol_route_lookup(struct net *net,
654 struct fib6_table *table,
David S. Miller4c9483b2011-03-12 16:22:43 -0500655 struct flowi6 *fl6, int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700656{
657 struct fib6_node *fn;
658 struct rt6_info *rt;
659
Thomas Grafc71099a2006-08-04 23:20:06 -0700660 read_lock_bh(&table->tb6_lock);
David S. Miller4c9483b2011-03-12 16:22:43 -0500661 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
Thomas Grafc71099a2006-08-04 23:20:06 -0700662restart:
663 rt = fn->leaf;
David S. Miller4c9483b2011-03-12 16:22:43 -0500664 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
665 BACKTRACK(net, &fl6->saddr);
Thomas Grafc71099a2006-08-04 23:20:06 -0700666out:
Changli Gaod8d1f302010-06-10 23:31:35 -0700667 dst_use(&rt->dst, jiffies);
Thomas Grafc71099a2006-08-04 23:20:06 -0700668 read_unlock_bh(&table->tb6_lock);
Thomas Grafc71099a2006-08-04 23:20:06 -0700669 return rt;
670
671}
672
Florian Westphalea6e5742011-09-05 16:05:44 +0200673struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
674 int flags)
675{
676 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
677}
678EXPORT_SYMBOL_GPL(ip6_route_lookup);
679
YOSHIFUJI Hideaki9acd9f32008-04-10 15:42:10 +0900680struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
681 const struct in6_addr *saddr, int oif, int strict)
Thomas Grafc71099a2006-08-04 23:20:06 -0700682{
David S. Miller4c9483b2011-03-12 16:22:43 -0500683 struct flowi6 fl6 = {
684 .flowi6_oif = oif,
685 .daddr = *daddr,
Thomas Grafc71099a2006-08-04 23:20:06 -0700686 };
687 struct dst_entry *dst;
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700688 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
Thomas Grafc71099a2006-08-04 23:20:06 -0700689
Thomas Grafadaa70b2006-10-13 15:01:03 -0700690 if (saddr) {
David S. Miller4c9483b2011-03-12 16:22:43 -0500691 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
Thomas Grafadaa70b2006-10-13 15:01:03 -0700692 flags |= RT6_LOOKUP_F_HAS_SADDR;
693 }
694
David S. Miller4c9483b2011-03-12 16:22:43 -0500695 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
Thomas Grafc71099a2006-08-04 23:20:06 -0700696 if (dst->error == 0)
697 return (struct rt6_info *) dst;
698
699 dst_release(dst);
700
Linus Torvalds1da177e2005-04-16 15:20:36 -0700701 return NULL;
702}
703
YOSHIFUJI Hideaki71590392007-02-22 22:05:40 +0900704EXPORT_SYMBOL(rt6_lookup);
705
Thomas Grafc71099a2006-08-04 23:20:06 -0700706/* ip6_ins_rt is called with FREE table->tb6_lock.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700707 It takes new route entry, the addition fails by any reason the
708 route is freed. In any case, if caller does not hold it, it may
709 be destroyed.
710 */
711
Thomas Graf86872cb2006-08-22 00:01:08 -0700712static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700713{
714 int err;
Thomas Grafc71099a2006-08-04 23:20:06 -0700715 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700716
Thomas Grafc71099a2006-08-04 23:20:06 -0700717 table = rt->rt6i_table;
718 write_lock_bh(&table->tb6_lock);
Thomas Graf86872cb2006-08-22 00:01:08 -0700719 err = fib6_add(&table->tb6_root, rt, info);
Thomas Grafc71099a2006-08-04 23:20:06 -0700720 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700721
722 return err;
723}
724
Thomas Graf40e22e82006-08-22 00:00:45 -0700725int ip6_ins_rt(struct rt6_info *rt)
726{
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800727 struct nl_info info = {
David S. Millerd1918542011-12-28 20:19:20 -0500728 .nl_net = dev_net(rt->dst.dev),
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800729 };
Denis V. Lunev528c4ce2007-12-13 09:45:12 -0800730 return __ip6_ins_rt(rt, &info);
Thomas Graf40e22e82006-08-22 00:00:45 -0700731}
732
Eric Dumazet21efcfa2011-07-19 20:18:36 +0000733static struct rt6_info *rt6_alloc_cow(const struct rt6_info *ort,
734 const struct in6_addr *daddr,
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000735 const struct in6_addr *saddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700736{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700737 struct rt6_info *rt;
738
739 /*
740 * Clone the route.
741 */
742
Eric Dumazet21efcfa2011-07-19 20:18:36 +0000743 rt = ip6_rt_copy(ort, daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700744
745 if (rt) {
David S. Miller14deae42009-01-04 16:04:39 -0800746 int attempts = !in_softirq();
747
David S. Miller38308472011-12-03 18:02:47 -0500748 if (!(rt->rt6i_flags & RTF_GATEWAY)) {
David S. Millerbb3c3682011-12-13 17:35:06 -0500749 if (ort->rt6i_dst.plen != 128 &&
Eric Dumazet21efcfa2011-07-19 20:18:36 +0000750 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900751 rt->rt6i_flags |= RTF_ANYCAST;
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +0000752 rt->rt6i_gateway = *daddr;
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900753 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700754
Linus Torvalds1da177e2005-04-16 15:20:36 -0700755 rt->rt6i_flags |= RTF_CACHE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700756
757#ifdef CONFIG_IPV6_SUBTREES
758 if (rt->rt6i_src.plen && saddr) {
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +0000759 rt->rt6i_src.addr = *saddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700760 rt->rt6i_src.plen = 128;
761 }
762#endif
763
David S. Miller14deae42009-01-04 16:04:39 -0800764 retry:
David S. Miller8ade06c2011-12-29 18:51:57 -0500765 if (rt6_bind_neighbour(rt, rt->dst.dev)) {
David S. Millerd1918542011-12-28 20:19:20 -0500766 struct net *net = dev_net(rt->dst.dev);
David S. Miller14deae42009-01-04 16:04:39 -0800767 int saved_rt_min_interval =
768 net->ipv6.sysctl.ip6_rt_gc_min_interval;
769 int saved_rt_elasticity =
770 net->ipv6.sysctl.ip6_rt_gc_elasticity;
771
772 if (attempts-- > 0) {
773 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
774 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
775
Alexey Dobriyan86393e52009-08-29 01:34:49 +0000776 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
David S. Miller14deae42009-01-04 16:04:39 -0800777
778 net->ipv6.sysctl.ip6_rt_gc_elasticity =
779 saved_rt_elasticity;
780 net->ipv6.sysctl.ip6_rt_gc_min_interval =
781 saved_rt_min_interval;
782 goto retry;
783 }
784
785 if (net_ratelimit())
786 printk(KERN_WARNING
Ulrich Weber7e1b33e2010-09-27 15:02:18 -0700787 "ipv6: Neighbour table overflow.\n");
Changli Gaod8d1f302010-06-10 23:31:35 -0700788 dst_free(&rt->dst);
David S. Miller14deae42009-01-04 16:04:39 -0800789 return NULL;
790 }
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800791 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700792
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800793 return rt;
794}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700795
Eric Dumazet21efcfa2011-07-19 20:18:36 +0000796static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
797 const struct in6_addr *daddr)
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800798{
Eric Dumazet21efcfa2011-07-19 20:18:36 +0000799 struct rt6_info *rt = ip6_rt_copy(ort, daddr);
800
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800801 if (rt) {
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800802 rt->rt6i_flags |= RTF_CACHE;
David Miller27217452011-12-02 16:52:08 +0000803 dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_noref_raw(&ort->dst)));
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800804 }
805 return rt;
806}
807
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800808static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
David S. Miller4c9483b2011-03-12 16:22:43 -0500809 struct flowi6 *fl6, int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700810{
811 struct fib6_node *fn;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800812 struct rt6_info *rt, *nrt;
Thomas Grafc71099a2006-08-04 23:20:06 -0700813 int strict = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700814 int attempts = 3;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800815 int err;
YOSHIFUJI Hideaki53b79972008-07-19 22:35:03 -0700816 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700817
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700818 strict |= flags & RT6_LOOKUP_F_IFACE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700819
820relookup:
Thomas Grafc71099a2006-08-04 23:20:06 -0700821 read_lock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700822
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800823restart_2:
David S. Miller4c9483b2011-03-12 16:22:43 -0500824 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700825
826restart:
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700827 rt = rt6_select(fn, oif, strict | reachable);
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800828
David S. Miller4c9483b2011-03-12 16:22:43 -0500829 BACKTRACK(net, &fl6->saddr);
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800830 if (rt == net->ipv6.ip6_null_entry ||
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800831 rt->rt6i_flags & RTF_CACHE)
YOSHIFUJI Hideaki1ddef0442006-03-20 17:01:24 -0800832 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700833
Changli Gaod8d1f302010-06-10 23:31:35 -0700834 dst_hold(&rt->dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700835 read_unlock_bh(&table->tb6_lock);
YOSHIFUJI Hideakifb9de912006-03-20 16:59:08 -0800836
David Miller27217452011-12-02 16:52:08 +0000837 if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
David S. Miller4c9483b2011-03-12 16:22:43 -0500838 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
David S. Miller7343ff32011-03-09 19:55:25 -0800839 else if (!(rt->dst.flags & DST_HOST))
David S. Miller4c9483b2011-03-12 16:22:43 -0500840 nrt = rt6_alloc_clone(rt, &fl6->daddr);
David S. Miller7343ff32011-03-09 19:55:25 -0800841 else
842 goto out2;
YOSHIFUJI Hideakie40cf352006-03-20 16:59:27 -0800843
Changli Gaod8d1f302010-06-10 23:31:35 -0700844 dst_release(&rt->dst);
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800845 rt = nrt ? : net->ipv6.ip6_null_entry;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800846
Changli Gaod8d1f302010-06-10 23:31:35 -0700847 dst_hold(&rt->dst);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800848 if (nrt) {
Thomas Graf40e22e82006-08-22 00:00:45 -0700849 err = ip6_ins_rt(nrt);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800850 if (!err)
851 goto out2;
852 }
853
854 if (--attempts <= 0)
855 goto out2;
856
857 /*
Thomas Grafc71099a2006-08-04 23:20:06 -0700858 * Race condition! In the gap, when table->tb6_lock was
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800859 * released someone could insert this route. Relookup.
860 */
Changli Gaod8d1f302010-06-10 23:31:35 -0700861 dst_release(&rt->dst);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800862 goto relookup;
863
864out:
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800865 if (reachable) {
866 reachable = 0;
867 goto restart_2;
868 }
Changli Gaod8d1f302010-06-10 23:31:35 -0700869 dst_hold(&rt->dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700870 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700871out2:
Changli Gaod8d1f302010-06-10 23:31:35 -0700872 rt->dst.lastuse = jiffies;
873 rt->dst.__use++;
Thomas Grafc71099a2006-08-04 23:20:06 -0700874
875 return rt;
876}
877
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800878static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
David S. Miller4c9483b2011-03-12 16:22:43 -0500879 struct flowi6 *fl6, int flags)
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700880{
David S. Miller4c9483b2011-03-12 16:22:43 -0500881 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700882}
883
Thomas Grafc71099a2006-08-04 23:20:06 -0700884void ip6_route_input(struct sk_buff *skb)
885{
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000886 const struct ipv6hdr *iph = ipv6_hdr(skb);
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900887 struct net *net = dev_net(skb->dev);
Thomas Grafadaa70b2006-10-13 15:01:03 -0700888 int flags = RT6_LOOKUP_F_HAS_SADDR;
David S. Miller4c9483b2011-03-12 16:22:43 -0500889 struct flowi6 fl6 = {
890 .flowi6_iif = skb->dev->ifindex,
891 .daddr = iph->daddr,
892 .saddr = iph->saddr,
David S. Miller38308472011-12-03 18:02:47 -0500893 .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
David S. Miller4c9483b2011-03-12 16:22:43 -0500894 .flowi6_mark = skb->mark,
895 .flowi6_proto = iph->nexthdr,
Thomas Grafc71099a2006-08-04 23:20:06 -0700896 };
Thomas Grafadaa70b2006-10-13 15:01:03 -0700897
Thomas Goff1d6e55f2009-01-27 22:39:59 -0800898 if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
Thomas Grafadaa70b2006-10-13 15:01:03 -0700899 flags |= RT6_LOOKUP_F_IFACE;
Thomas Grafc71099a2006-08-04 23:20:06 -0700900
David S. Miller4c9483b2011-03-12 16:22:43 -0500901 skb_dst_set(skb, fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_input));
Thomas Grafc71099a2006-08-04 23:20:06 -0700902}
903
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800904static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
David S. Miller4c9483b2011-03-12 16:22:43 -0500905 struct flowi6 *fl6, int flags)
Thomas Grafc71099a2006-08-04 23:20:06 -0700906{
David S. Miller4c9483b2011-03-12 16:22:43 -0500907 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
Thomas Grafc71099a2006-08-04 23:20:06 -0700908}
909
Florian Westphal9c7a4f92011-03-22 19:17:36 -0700910struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
David S. Miller4c9483b2011-03-12 16:22:43 -0500911 struct flowi6 *fl6)
Thomas Grafc71099a2006-08-04 23:20:06 -0700912{
913 int flags = 0;
914
David S. Miller4c9483b2011-03-12 16:22:43 -0500915 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700916 flags |= RT6_LOOKUP_F_IFACE;
Thomas Grafc71099a2006-08-04 23:20:06 -0700917
David S. Miller4c9483b2011-03-12 16:22:43 -0500918 if (!ipv6_addr_any(&fl6->saddr))
Thomas Grafadaa70b2006-10-13 15:01:03 -0700919 flags |= RT6_LOOKUP_F_HAS_SADDR;
YOSHIFUJI Hideaki / 吉藤英明0c9a2ac2010-03-07 00:14:44 +0000920 else if (sk)
921 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
Thomas Grafadaa70b2006-10-13 15:01:03 -0700922
David S. Miller4c9483b2011-03-12 16:22:43 -0500923 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700924}
925
YOSHIFUJI Hideaki71590392007-02-22 22:05:40 +0900926EXPORT_SYMBOL(ip6_route_output);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700927
David S. Miller2774c132011-03-01 14:59:04 -0800928struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
David S. Miller14e50e52007-05-24 18:17:54 -0700929{
David S. Miller5c1e6aa2011-04-28 14:13:38 -0700930 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
David S. Miller14e50e52007-05-24 18:17:54 -0700931 struct dst_entry *new = NULL;
932
David S. Miller5c1e6aa2011-04-28 14:13:38 -0700933 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0);
David S. Miller14e50e52007-05-24 18:17:54 -0700934 if (rt) {
David S. Millercf911662011-04-28 14:31:47 -0700935 memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
936
Changli Gaod8d1f302010-06-10 23:31:35 -0700937 new = &rt->dst;
David S. Miller14e50e52007-05-24 18:17:54 -0700938
David S. Miller14e50e52007-05-24 18:17:54 -0700939 new->__use = 1;
Herbert Xu352e5122007-11-13 21:34:06 -0800940 new->input = dst_discard;
941 new->output = dst_discard;
David S. Miller14e50e52007-05-24 18:17:54 -0700942
Eric Dumazet21efcfa2011-07-19 20:18:36 +0000943 if (dst_metrics_read_only(&ort->dst))
944 new->_metrics = ort->dst._metrics;
945 else
946 dst_copy_metrics(new, &ort->dst);
David S. Miller14e50e52007-05-24 18:17:54 -0700947 rt->rt6i_idev = ort->rt6i_idev;
948 if (rt->rt6i_idev)
949 in6_dev_hold(rt->rt6i_idev);
David S. Millerd1918542011-12-28 20:19:20 -0500950 rt->dst.expires = 0;
David S. Miller14e50e52007-05-24 18:17:54 -0700951
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +0000952 rt->rt6i_gateway = ort->rt6i_gateway;
David S. Miller14e50e52007-05-24 18:17:54 -0700953 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
954 rt->rt6i_metric = 0;
955
956 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
957#ifdef CONFIG_IPV6_SUBTREES
958 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
959#endif
960
961 dst_free(new);
962 }
963
David S. Miller69ead7a2011-03-01 14:45:33 -0800964 dst_release(dst_orig);
965 return new ? new : ERR_PTR(-ENOMEM);
David S. Miller14e50e52007-05-24 18:17:54 -0700966}
David S. Miller14e50e52007-05-24 18:17:54 -0700967
Linus Torvalds1da177e2005-04-16 15:20:36 -0700968/*
969 * Destination cache support functions
970 */
971
972static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
973{
974 struct rt6_info *rt;
975
976 rt = (struct rt6_info *) dst;
977
David S. Miller6431cbc2011-02-07 20:38:06 -0800978 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
979 if (rt->rt6i_peer_genid != rt6_peer_genid()) {
980 if (!rt->rt6i_peer)
981 rt6_bind_peer(rt, 0);
982 rt->rt6i_peer_genid = rt6_peer_genid();
983 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700984 return dst;
David S. Miller6431cbc2011-02-07 20:38:06 -0800985 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700986 return NULL;
987}
988
989static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
990{
991 struct rt6_info *rt = (struct rt6_info *) dst;
992
993 if (rt) {
YOSHIFUJI Hideaki / 吉藤英明54c1a852010-03-28 07:15:45 +0000994 if (rt->rt6i_flags & RTF_CACHE) {
995 if (rt6_check_expired(rt)) {
996 ip6_del_rt(rt);
997 dst = NULL;
998 }
999 } else {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001000 dst_release(dst);
YOSHIFUJI Hideaki / 吉藤英明54c1a852010-03-28 07:15:45 +00001001 dst = NULL;
1002 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001003 }
YOSHIFUJI Hideaki / 吉藤英明54c1a852010-03-28 07:15:45 +00001004 return dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001005}
1006
1007static void ip6_link_failure(struct sk_buff *skb)
1008{
1009 struct rt6_info *rt;
1010
Alexey Dobriyan3ffe5332010-02-18 08:25:24 +00001011 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001012
Eric Dumazetadf30902009-06-02 05:19:30 +00001013 rt = (struct rt6_info *) skb_dst(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001014 if (rt) {
David S. Miller38308472011-12-03 18:02:47 -05001015 if (rt->rt6i_flags & RTF_CACHE) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001016 dst_set_expires(&rt->dst, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001017 rt->rt6i_flags |= RTF_EXPIRES;
1018 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1019 rt->rt6i_node->fn_sernum = -1;
1020 }
1021}
1022
1023static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1024{
1025 struct rt6_info *rt6 = (struct rt6_info*)dst;
1026
1027 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1028 rt6->rt6i_flags |= RTF_MODIFIED;
1029 if (mtu < IPV6_MIN_MTU) {
David S. Millerdefb3512010-12-08 21:16:57 -08001030 u32 features = dst_metric(dst, RTAX_FEATURES);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001031 mtu = IPV6_MIN_MTU;
David S. Millerdefb3512010-12-08 21:16:57 -08001032 features |= RTAX_FEATURE_ALLFRAG;
1033 dst_metric_set(dst, RTAX_FEATURES, features);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001034 }
David S. Millerdefb3512010-12-08 21:16:57 -08001035 dst_metric_set(dst, RTAX_MTU, mtu);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001036 }
1037}
1038
David S. Miller0dbaee32010-12-13 12:52:14 -08001039static unsigned int ip6_default_advmss(const struct dst_entry *dst)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001040{
David S. Miller0dbaee32010-12-13 12:52:14 -08001041 struct net_device *dev = dst->dev;
1042 unsigned int mtu = dst_mtu(dst);
1043 struct net *net = dev_net(dev);
1044
Linus Torvalds1da177e2005-04-16 15:20:36 -07001045 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1046
Daniel Lezcano55786892008-03-04 13:47:47 -08001047 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1048 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001049
1050 /*
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001051 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1052 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1053 * IPV6_MAXPLEN is also valid and means: "any MSS,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001054 * rely only on pmtu discovery"
1055 */
1056 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1057 mtu = IPV6_MAXPLEN;
1058 return mtu;
1059}
1060
Steffen Klassertebb762f2011-11-23 02:12:51 +00001061static unsigned int ip6_mtu(const struct dst_entry *dst)
David S. Millerd33e4552010-12-14 13:01:14 -08001062{
David S. Millerd33e4552010-12-14 13:01:14 -08001063 struct inet6_dev *idev;
Steffen Klassert618f9bc2011-11-23 02:13:31 +00001064 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1065
1066 if (mtu)
1067 return mtu;
1068
1069 mtu = IPV6_MIN_MTU;
David S. Millerd33e4552010-12-14 13:01:14 -08001070
1071 rcu_read_lock();
1072 idev = __in6_dev_get(dst->dev);
1073 if (idev)
1074 mtu = idev->cnf.mtu6;
1075 rcu_read_unlock();
1076
1077 return mtu;
1078}
1079
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001080static struct dst_entry *icmp6_dst_gc_list;
1081static DEFINE_SPINLOCK(icmp6_dst_lock);
Thomas Graf5d0bbee2006-08-04 03:37:36 -07001082
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001083struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001084 struct neighbour *neigh,
David S. Miller87a11572011-12-06 17:04:13 -05001085 struct flowi6 *fl6)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001086{
David S. Miller87a11572011-12-06 17:04:13 -05001087 struct dst_entry *dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001088 struct rt6_info *rt;
1089 struct inet6_dev *idev = in6_dev_get(dev);
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001090 struct net *net = dev_net(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001091
David S. Miller38308472011-12-03 18:02:47 -05001092 if (unlikely(!idev))
Eric Dumazet122bdf62012-03-14 21:13:11 +00001093 return ERR_PTR(-ENODEV);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001094
David S. Miller957c6652011-06-24 15:25:00 -07001095 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0);
David S. Miller38308472011-12-03 18:02:47 -05001096 if (unlikely(!rt)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001097 in6_dev_put(idev);
David S. Miller87a11572011-12-06 17:04:13 -05001098 dst = ERR_PTR(-ENOMEM);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001099 goto out;
1100 }
1101
Linus Torvalds1da177e2005-04-16 15:20:36 -07001102 if (neigh)
1103 neigh_hold(neigh);
David S. Miller14deae42009-01-04 16:04:39 -08001104 else {
David S. Millerf83c7792011-12-28 15:41:23 -05001105 neigh = ip6_neigh_lookup(&rt->dst, &fl6->daddr);
David S. Millerb43faac2011-12-13 16:48:21 -05001106 if (IS_ERR(neigh)) {
RongQing.Li252c3d82012-01-12 22:33:46 +00001107 in6_dev_put(idev);
David S. Millerb43faac2011-12-13 16:48:21 -05001108 dst_free(&rt->dst);
1109 return ERR_CAST(neigh);
1110 }
David S. Miller14deae42009-01-04 16:04:39 -08001111 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001112
Yan, Zheng8e2ec632011-09-05 21:34:30 +00001113 rt->dst.flags |= DST_HOST;
1114 rt->dst.output = ip6_output;
David S. Miller69cce1d2011-07-17 23:09:49 -07001115 dst_set_neighbour(&rt->dst, neigh);
Changli Gaod8d1f302010-06-10 23:31:35 -07001116 atomic_set(&rt->dst.__refcnt, 1);
David S. Miller87a11572011-12-06 17:04:13 -05001117 rt->rt6i_dst.addr = fl6->daddr;
Yan, Zheng8e2ec632011-09-05 21:34:30 +00001118 rt->rt6i_dst.plen = 128;
1119 rt->rt6i_idev = idev;
Gao feng70116872011-10-28 02:46:57 +00001120 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001121
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001122 spin_lock_bh(&icmp6_dst_lock);
Changli Gaod8d1f302010-06-10 23:31:35 -07001123 rt->dst.next = icmp6_dst_gc_list;
1124 icmp6_dst_gc_list = &rt->dst;
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001125 spin_unlock_bh(&icmp6_dst_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001126
Daniel Lezcano55786892008-03-04 13:47:47 -08001127 fib6_force_start_gc(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001128
David S. Miller87a11572011-12-06 17:04:13 -05001129 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1130
Linus Torvalds1da177e2005-04-16 15:20:36 -07001131out:
David S. Miller87a11572011-12-06 17:04:13 -05001132 return dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001133}
1134
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -07001135int icmp6_dst_gc(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001136{
Hagen Paul Pfeifere9476e92011-02-25 05:45:19 +00001137 struct dst_entry *dst, **pprev;
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -07001138 int more = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001139
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001140 spin_lock_bh(&icmp6_dst_lock);
1141 pprev = &icmp6_dst_gc_list;
Thomas Graf5d0bbee2006-08-04 03:37:36 -07001142
Linus Torvalds1da177e2005-04-16 15:20:36 -07001143 while ((dst = *pprev) != NULL) {
1144 if (!atomic_read(&dst->__refcnt)) {
1145 *pprev = dst->next;
1146 dst_free(dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001147 } else {
1148 pprev = &dst->next;
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -07001149 ++more;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001150 }
1151 }
1152
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001153 spin_unlock_bh(&icmp6_dst_lock);
Thomas Graf5d0bbee2006-08-04 03:37:36 -07001154
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -07001155 return more;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001156}
1157
David S. Miller1e493d12008-09-10 17:27:15 -07001158static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1159 void *arg)
1160{
1161 struct dst_entry *dst, **pprev;
1162
1163 spin_lock_bh(&icmp6_dst_lock);
1164 pprev = &icmp6_dst_gc_list;
1165 while ((dst = *pprev) != NULL) {
1166 struct rt6_info *rt = (struct rt6_info *) dst;
1167 if (func(rt, arg)) {
1168 *pprev = dst->next;
1169 dst_free(dst);
1170 } else {
1171 pprev = &dst->next;
1172 }
1173 }
1174 spin_unlock_bh(&icmp6_dst_lock);
1175}
1176
Daniel Lezcano569d3642008-01-18 03:56:57 -08001177static int ip6_dst_gc(struct dst_ops *ops)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001178{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001179 unsigned long now = jiffies;
Alexey Dobriyan86393e52009-08-29 01:34:49 +00001180 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
Daniel Lezcano7019b782008-03-04 13:50:14 -08001181 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1182 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1183 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1184 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1185 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
Eric Dumazetfc66f952010-10-08 06:37:34 +00001186 int entries;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001187
Eric Dumazetfc66f952010-10-08 06:37:34 +00001188 entries = dst_entries_get_fast(ops);
Daniel Lezcano7019b782008-03-04 13:50:14 -08001189 if (time_after(rt_last_gc + rt_min_interval, now) &&
Eric Dumazetfc66f952010-10-08 06:37:34 +00001190 entries <= rt_max_size)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001191 goto out;
1192
Benjamin Thery6891a342008-03-04 13:49:47 -08001193 net->ipv6.ip6_rt_gc_expire++;
1194 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1195 net->ipv6.ip6_rt_last_gc = now;
Eric Dumazetfc66f952010-10-08 06:37:34 +00001196 entries = dst_entries_get_slow(ops);
1197 if (entries < ops->gc_thresh)
Daniel Lezcano7019b782008-03-04 13:50:14 -08001198 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001199out:
Daniel Lezcano7019b782008-03-04 13:50:14 -08001200 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
Eric Dumazetfc66f952010-10-08 06:37:34 +00001201 return entries > rt_max_size;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001202}
1203
1204/* Clean host part of a prefix. Not necessary in radix tree,
1205 but results in cleaner routing tables.
1206
1207 Remove it only when all the things will work!
1208 */
1209
YOSHIFUJI Hideaki6b75d092008-03-10 06:00:30 -04001210int ip6_dst_hoplimit(struct dst_entry *dst)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001211{
David S. Miller5170ae82010-12-12 21:35:57 -08001212 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
David S. Millera02e4b72010-12-12 21:39:02 -08001213 if (hoplimit == 0) {
YOSHIFUJI Hideaki6b75d092008-03-10 06:00:30 -04001214 struct net_device *dev = dst->dev;
Eric Dumazetc68f24c2010-06-14 04:46:20 +00001215 struct inet6_dev *idev;
1216
1217 rcu_read_lock();
1218 idev = __in6_dev_get(dev);
1219 if (idev)
YOSHIFUJI Hideaki6b75d092008-03-10 06:00:30 -04001220 hoplimit = idev->cnf.hop_limit;
Eric Dumazetc68f24c2010-06-14 04:46:20 +00001221 else
YOSHIFUJI Hideaki53b79972008-07-19 22:35:03 -07001222 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
Eric Dumazetc68f24c2010-06-14 04:46:20 +00001223 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001224 }
1225 return hoplimit;
1226}
David S. Millerabbf46a2010-12-12 21:14:46 -08001227EXPORT_SYMBOL(ip6_dst_hoplimit);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001228
1229/*
1230 *
1231 */
1232
Thomas Graf86872cb2006-08-22 00:01:08 -07001233int ip6_route_add(struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001234{
1235 int err;
Daniel Lezcano55786892008-03-04 13:47:47 -08001236 struct net *net = cfg->fc_nlinfo.nl_net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001237 struct rt6_info *rt = NULL;
1238 struct net_device *dev = NULL;
1239 struct inet6_dev *idev = NULL;
Thomas Grafc71099a2006-08-04 23:20:06 -07001240 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001241 int addr_type;
1242
Thomas Graf86872cb2006-08-22 00:01:08 -07001243 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001244 return -EINVAL;
1245#ifndef CONFIG_IPV6_SUBTREES
Thomas Graf86872cb2006-08-22 00:01:08 -07001246 if (cfg->fc_src_len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001247 return -EINVAL;
1248#endif
Thomas Graf86872cb2006-08-22 00:01:08 -07001249 if (cfg->fc_ifindex) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001250 err = -ENODEV;
Daniel Lezcano55786892008-03-04 13:47:47 -08001251 dev = dev_get_by_index(net, cfg->fc_ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001252 if (!dev)
1253 goto out;
1254 idev = in6_dev_get(dev);
1255 if (!idev)
1256 goto out;
1257 }
1258
Thomas Graf86872cb2006-08-22 00:01:08 -07001259 if (cfg->fc_metric == 0)
1260 cfg->fc_metric = IP6_RT_PRIO_USER;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001261
Matti Vaittinend71314b2011-11-14 00:14:49 +00001262 err = -ENOBUFS;
David S. Miller38308472011-12-03 18:02:47 -05001263 if (cfg->fc_nlinfo.nlh &&
1264 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
Matti Vaittinend71314b2011-11-14 00:14:49 +00001265 table = fib6_get_table(net, cfg->fc_table);
David S. Miller38308472011-12-03 18:02:47 -05001266 if (!table) {
Matti Vaittinend71314b2011-11-14 00:14:49 +00001267 printk(KERN_WARNING "IPv6: NLM_F_CREATE should be specified when creating new route\n");
1268 table = fib6_new_table(net, cfg->fc_table);
1269 }
1270 } else {
1271 table = fib6_new_table(net, cfg->fc_table);
1272 }
David S. Miller38308472011-12-03 18:02:47 -05001273
1274 if (!table)
Thomas Grafc71099a2006-08-04 23:20:06 -07001275 goto out;
Thomas Grafc71099a2006-08-04 23:20:06 -07001276
David S. Miller957c6652011-06-24 15:25:00 -07001277 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL, DST_NOCOUNT);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001278
David S. Miller38308472011-12-03 18:02:47 -05001279 if (!rt) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001280 err = -ENOMEM;
1281 goto out;
1282 }
1283
Changli Gaod8d1f302010-06-10 23:31:35 -07001284 rt->dst.obsolete = -1;
David S. Millerd1918542011-12-28 20:19:20 -05001285 rt->dst.expires = (cfg->fc_flags & RTF_EXPIRES) ?
YOSHIFUJI Hideaki6f704992008-05-19 16:56:11 -07001286 jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1287 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001288
Thomas Graf86872cb2006-08-22 00:01:08 -07001289 if (cfg->fc_protocol == RTPROT_UNSPEC)
1290 cfg->fc_protocol = RTPROT_BOOT;
1291 rt->rt6i_protocol = cfg->fc_protocol;
1292
1293 addr_type = ipv6_addr_type(&cfg->fc_dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001294
1295 if (addr_type & IPV6_ADDR_MULTICAST)
Changli Gaod8d1f302010-06-10 23:31:35 -07001296 rt->dst.input = ip6_mc_input;
Maciej Żenczykowskiab79ad12010-09-27 00:07:02 +00001297 else if (cfg->fc_flags & RTF_LOCAL)
1298 rt->dst.input = ip6_input;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001299 else
Changli Gaod8d1f302010-06-10 23:31:35 -07001300 rt->dst.input = ip6_forward;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001301
Changli Gaod8d1f302010-06-10 23:31:35 -07001302 rt->dst.output = ip6_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001303
Thomas Graf86872cb2006-08-22 00:01:08 -07001304 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1305 rt->rt6i_dst.plen = cfg->fc_dst_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001306 if (rt->rt6i_dst.plen == 128)
David S. Miller11d53b42011-06-24 15:23:34 -07001307 rt->dst.flags |= DST_HOST;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001308
Yan, Zheng8e2ec632011-09-05 21:34:30 +00001309 if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1310 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1311 if (!metrics) {
1312 err = -ENOMEM;
1313 goto out;
1314 }
1315 dst_init_metrics(&rt->dst, metrics, 0);
1316 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001317#ifdef CONFIG_IPV6_SUBTREES
Thomas Graf86872cb2006-08-22 00:01:08 -07001318 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1319 rt->rt6i_src.plen = cfg->fc_src_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001320#endif
1321
Thomas Graf86872cb2006-08-22 00:01:08 -07001322 rt->rt6i_metric = cfg->fc_metric;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001323
1324 /* We cannot add true routes via loopback here,
1325 they would result in kernel looping; promote them to reject routes
1326 */
Thomas Graf86872cb2006-08-22 00:01:08 -07001327 if ((cfg->fc_flags & RTF_REJECT) ||
David S. Miller38308472011-12-03 18:02:47 -05001328 (dev && (dev->flags & IFF_LOOPBACK) &&
1329 !(addr_type & IPV6_ADDR_LOOPBACK) &&
1330 !(cfg->fc_flags & RTF_LOCAL))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001331 /* hold loopback dev/idev if we haven't done so. */
Daniel Lezcano55786892008-03-04 13:47:47 -08001332 if (dev != net->loopback_dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001333 if (dev) {
1334 dev_put(dev);
1335 in6_dev_put(idev);
1336 }
Daniel Lezcano55786892008-03-04 13:47:47 -08001337 dev = net->loopback_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001338 dev_hold(dev);
1339 idev = in6_dev_get(dev);
1340 if (!idev) {
1341 err = -ENODEV;
1342 goto out;
1343 }
1344 }
Changli Gaod8d1f302010-06-10 23:31:35 -07001345 rt->dst.output = ip6_pkt_discard_out;
1346 rt->dst.input = ip6_pkt_discard;
1347 rt->dst.error = -ENETUNREACH;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001348 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1349 goto install_route;
1350 }
1351
Thomas Graf86872cb2006-08-22 00:01:08 -07001352 if (cfg->fc_flags & RTF_GATEWAY) {
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001353 const struct in6_addr *gw_addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001354 int gwa_type;
1355
Thomas Graf86872cb2006-08-22 00:01:08 -07001356 gw_addr = &cfg->fc_gateway;
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00001357 rt->rt6i_gateway = *gw_addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001358 gwa_type = ipv6_addr_type(gw_addr);
1359
1360 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1361 struct rt6_info *grt;
1362
1363 /* IPv6 strictly inhibits using not link-local
1364 addresses as nexthop address.
1365 Otherwise, router will not able to send redirects.
1366 It is very good, but in some (rare!) circumstances
1367 (SIT, PtP, NBMA NOARP links) it is handy to allow
1368 some exceptions. --ANK
1369 */
1370 err = -EINVAL;
David S. Miller38308472011-12-03 18:02:47 -05001371 if (!(gwa_type & IPV6_ADDR_UNICAST))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001372 goto out;
1373
Daniel Lezcano55786892008-03-04 13:47:47 -08001374 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001375
1376 err = -EHOSTUNREACH;
David S. Miller38308472011-12-03 18:02:47 -05001377 if (!grt)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001378 goto out;
1379 if (dev) {
David S. Millerd1918542011-12-28 20:19:20 -05001380 if (dev != grt->dst.dev) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001381 dst_release(&grt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001382 goto out;
1383 }
1384 } else {
David S. Millerd1918542011-12-28 20:19:20 -05001385 dev = grt->dst.dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001386 idev = grt->rt6i_idev;
1387 dev_hold(dev);
1388 in6_dev_hold(grt->rt6i_idev);
1389 }
David S. Miller38308472011-12-03 18:02:47 -05001390 if (!(grt->rt6i_flags & RTF_GATEWAY))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001391 err = 0;
Changli Gaod8d1f302010-06-10 23:31:35 -07001392 dst_release(&grt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001393
1394 if (err)
1395 goto out;
1396 }
1397 err = -EINVAL;
David S. Miller38308472011-12-03 18:02:47 -05001398 if (!dev || (dev->flags & IFF_LOOPBACK))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001399 goto out;
1400 }
1401
1402 err = -ENODEV;
David S. Miller38308472011-12-03 18:02:47 -05001403 if (!dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001404 goto out;
1405
Daniel Walterc3968a82011-04-13 21:10:57 +00001406 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1407 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1408 err = -EINVAL;
1409 goto out;
1410 }
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00001411 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
Daniel Walterc3968a82011-04-13 21:10:57 +00001412 rt->rt6i_prefsrc.plen = 128;
1413 } else
1414 rt->rt6i_prefsrc.plen = 0;
1415
Thomas Graf86872cb2006-08-22 00:01:08 -07001416 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
David S. Miller8ade06c2011-12-29 18:51:57 -05001417 err = rt6_bind_neighbour(rt, dev);
David S. Millerf83c7792011-12-28 15:41:23 -05001418 if (err)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001419 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001420 }
1421
Thomas Graf86872cb2006-08-22 00:01:08 -07001422 rt->rt6i_flags = cfg->fc_flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001423
1424install_route:
Thomas Graf86872cb2006-08-22 00:01:08 -07001425 if (cfg->fc_mx) {
1426 struct nlattr *nla;
1427 int remaining;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001428
Thomas Graf86872cb2006-08-22 00:01:08 -07001429 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
Thomas Graf8f4c1f92007-09-12 14:44:36 +02001430 int type = nla_type(nla);
Thomas Graf86872cb2006-08-22 00:01:08 -07001431
1432 if (type) {
1433 if (type > RTAX_MAX) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001434 err = -EINVAL;
1435 goto out;
1436 }
Thomas Graf86872cb2006-08-22 00:01:08 -07001437
David S. Millerdefb3512010-12-08 21:16:57 -08001438 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001439 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001440 }
1441 }
1442
Changli Gaod8d1f302010-06-10 23:31:35 -07001443 rt->dst.dev = dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001444 rt->rt6i_idev = idev;
Thomas Grafc71099a2006-08-04 23:20:06 -07001445 rt->rt6i_table = table;
Daniel Lezcano63152fc2008-03-03 23:31:11 -08001446
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001447 cfg->fc_nlinfo.nl_net = dev_net(dev);
Daniel Lezcano63152fc2008-03-03 23:31:11 -08001448
Thomas Graf86872cb2006-08-22 00:01:08 -07001449 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001450
1451out:
1452 if (dev)
1453 dev_put(dev);
1454 if (idev)
1455 in6_dev_put(idev);
1456 if (rt)
Changli Gaod8d1f302010-06-10 23:31:35 -07001457 dst_free(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001458 return err;
1459}
1460
Thomas Graf86872cb2006-08-22 00:01:08 -07001461static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001462{
1463 int err;
Thomas Grafc71099a2006-08-04 23:20:06 -07001464 struct fib6_table *table;
David S. Millerd1918542011-12-28 20:19:20 -05001465 struct net *net = dev_net(rt->dst.dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001466
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001467 if (rt == net->ipv6.ip6_null_entry)
Patrick McHardy6c813a72006-08-06 22:22:47 -07001468 return -ENOENT;
1469
Thomas Grafc71099a2006-08-04 23:20:06 -07001470 table = rt->rt6i_table;
1471 write_lock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001472
Thomas Graf86872cb2006-08-22 00:01:08 -07001473 err = fib6_del(rt, info);
Changli Gaod8d1f302010-06-10 23:31:35 -07001474 dst_release(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001475
Thomas Grafc71099a2006-08-04 23:20:06 -07001476 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001477
1478 return err;
1479}
1480
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001481int ip6_del_rt(struct rt6_info *rt)
1482{
Denis V. Lunev4d1169c2008-01-10 03:26:13 -08001483 struct nl_info info = {
David S. Millerd1918542011-12-28 20:19:20 -05001484 .nl_net = dev_net(rt->dst.dev),
Denis V. Lunev4d1169c2008-01-10 03:26:13 -08001485 };
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08001486 return __ip6_del_rt(rt, &info);
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001487}
1488
Thomas Graf86872cb2006-08-22 00:01:08 -07001489static int ip6_route_del(struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001490{
Thomas Grafc71099a2006-08-04 23:20:06 -07001491 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001492 struct fib6_node *fn;
1493 struct rt6_info *rt;
1494 int err = -ESRCH;
1495
Daniel Lezcano55786892008-03-04 13:47:47 -08001496 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
David S. Miller38308472011-12-03 18:02:47 -05001497 if (!table)
Thomas Grafc71099a2006-08-04 23:20:06 -07001498 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001499
Thomas Grafc71099a2006-08-04 23:20:06 -07001500 read_lock_bh(&table->tb6_lock);
1501
1502 fn = fib6_locate(&table->tb6_root,
Thomas Graf86872cb2006-08-22 00:01:08 -07001503 &cfg->fc_dst, cfg->fc_dst_len,
1504 &cfg->fc_src, cfg->fc_src_len);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001505
Linus Torvalds1da177e2005-04-16 15:20:36 -07001506 if (fn) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001507 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
Thomas Graf86872cb2006-08-22 00:01:08 -07001508 if (cfg->fc_ifindex &&
David S. Millerd1918542011-12-28 20:19:20 -05001509 (!rt->dst.dev ||
1510 rt->dst.dev->ifindex != cfg->fc_ifindex))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001511 continue;
Thomas Graf86872cb2006-08-22 00:01:08 -07001512 if (cfg->fc_flags & RTF_GATEWAY &&
1513 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001514 continue;
Thomas Graf86872cb2006-08-22 00:01:08 -07001515 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001516 continue;
Changli Gaod8d1f302010-06-10 23:31:35 -07001517 dst_hold(&rt->dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001518 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001519
Thomas Graf86872cb2006-08-22 00:01:08 -07001520 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001521 }
1522 }
Thomas Grafc71099a2006-08-04 23:20:06 -07001523 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001524
1525 return err;
1526}
1527
1528/*
1529 * Handle redirects
1530 */
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001531struct ip6rd_flowi {
David S. Miller4c9483b2011-03-12 16:22:43 -05001532 struct flowi6 fl6;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001533 struct in6_addr gateway;
1534};
Linus Torvalds1da177e2005-04-16 15:20:36 -07001535
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001536static struct rt6_info *__ip6_route_redirect(struct net *net,
1537 struct fib6_table *table,
David S. Miller4c9483b2011-03-12 16:22:43 -05001538 struct flowi6 *fl6,
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001539 int flags)
1540{
David S. Miller4c9483b2011-03-12 16:22:43 -05001541 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001542 struct rt6_info *rt;
1543 struct fib6_node *fn;
Thomas Grafc71099a2006-08-04 23:20:06 -07001544
Linus Torvalds1da177e2005-04-16 15:20:36 -07001545 /*
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001546 * Get the "current" route for this destination and
1547 * check if the redirect has come from approriate router.
1548 *
1549 * RFC 2461 specifies that redirects should only be
1550 * accepted if they come from the nexthop to the target.
1551 * Due to the way the routes are chosen, this notion
1552 * is a bit fuzzy and one might need to check all possible
1553 * routes.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001554 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001555
Thomas Grafc71099a2006-08-04 23:20:06 -07001556 read_lock_bh(&table->tb6_lock);
David S. Miller4c9483b2011-03-12 16:22:43 -05001557 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001558restart:
Changli Gaod8d1f302010-06-10 23:31:35 -07001559 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001560 /*
1561 * Current route is on-link; redirect is always invalid.
1562 *
1563 * Seems, previous statement is not true. It could
1564 * be node, which looks for us as on-link (f.e. proxy ndisc)
1565 * But then router serving it might decide, that we should
1566 * know truth 8)8) --ANK (980726).
1567 */
1568 if (rt6_check_expired(rt))
1569 continue;
1570 if (!(rt->rt6i_flags & RTF_GATEWAY))
1571 continue;
David S. Millerd1918542011-12-28 20:19:20 -05001572 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001573 continue;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001574 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001575 continue;
1576 break;
1577 }
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001578
YOSHIFUJI Hideakicb15d9c2006-08-23 17:23:11 -07001579 if (!rt)
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001580 rt = net->ipv6.ip6_null_entry;
David S. Miller4c9483b2011-03-12 16:22:43 -05001581 BACKTRACK(net, &fl6->saddr);
YOSHIFUJI Hideakicb15d9c2006-08-23 17:23:11 -07001582out:
Changli Gaod8d1f302010-06-10 23:31:35 -07001583 dst_hold(&rt->dst);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001584
1585 read_unlock_bh(&table->tb6_lock);
1586
1587 return rt;
1588};
1589
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001590static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest,
1591 const struct in6_addr *src,
1592 const struct in6_addr *gateway,
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001593 struct net_device *dev)
1594{
Thomas Grafadaa70b2006-10-13 15:01:03 -07001595 int flags = RT6_LOOKUP_F_HAS_SADDR;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001596 struct net *net = dev_net(dev);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001597 struct ip6rd_flowi rdfl = {
David S. Miller4c9483b2011-03-12 16:22:43 -05001598 .fl6 = {
1599 .flowi6_oif = dev->ifindex,
1600 .daddr = *dest,
1601 .saddr = *src,
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001602 },
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001603 };
Thomas Grafadaa70b2006-10-13 15:01:03 -07001604
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00001605 rdfl.gateway = *gateway;
Brian Haley86c36ce2009-10-07 13:58:01 -07001606
Thomas Grafadaa70b2006-10-13 15:01:03 -07001607 if (rt6_need_strict(dest))
1608 flags |= RT6_LOOKUP_F_IFACE;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001609
David S. Miller4c9483b2011-03-12 16:22:43 -05001610 return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6,
Daniel Lezcano58f09b72008-03-03 23:25:27 -08001611 flags, __ip6_route_redirect);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001612}
1613
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001614void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
1615 const struct in6_addr *saddr,
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001616 struct neighbour *neigh, u8 *lladdr, int on_link)
1617{
1618 struct rt6_info *rt, *nrt = NULL;
1619 struct netevent_redirect netevent;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001620 struct net *net = dev_net(neigh->dev);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001621
1622 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1623
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001624 if (rt == net->ipv6.ip6_null_entry) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001625 if (net_ratelimit())
1626 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1627 "for redirect target\n");
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001628 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001629 }
1630
Linus Torvalds1da177e2005-04-16 15:20:36 -07001631 /*
1632 * We have finally decided to accept it.
1633 */
1634
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001635 neigh_update(neigh, lladdr, NUD_STALE,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001636 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1637 NEIGH_UPDATE_F_OVERRIDE|
1638 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1639 NEIGH_UPDATE_F_ISROUTER))
1640 );
1641
1642 /*
1643 * Redirect received -> path was valid.
1644 * Look, redirects are sent only in response to data packets,
1645 * so that this nexthop apparently is reachable. --ANK
1646 */
Changli Gaod8d1f302010-06-10 23:31:35 -07001647 dst_confirm(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001648
1649 /* Duplicate redirect: silently ignore. */
David Miller27217452011-12-02 16:52:08 +00001650 if (neigh == dst_get_neighbour_noref_raw(&rt->dst))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001651 goto out;
1652
Eric Dumazet21efcfa2011-07-19 20:18:36 +00001653 nrt = ip6_rt_copy(rt, dest);
David S. Miller38308472011-12-03 18:02:47 -05001654 if (!nrt)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001655 goto out;
1656
1657 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1658 if (on_link)
1659 nrt->rt6i_flags &= ~RTF_GATEWAY;
1660
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00001661 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
David S. Miller69cce1d2011-07-17 23:09:49 -07001662 dst_set_neighbour(&nrt->dst, neigh_clone(neigh));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001663
Thomas Graf40e22e82006-08-22 00:00:45 -07001664 if (ip6_ins_rt(nrt))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001665 goto out;
1666
Changli Gaod8d1f302010-06-10 23:31:35 -07001667 netevent.old = &rt->dst;
1668 netevent.new = &nrt->dst;
Tom Tucker8d717402006-07-30 20:43:36 -07001669 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1670
David S. Miller38308472011-12-03 18:02:47 -05001671 if (rt->rt6i_flags & RTF_CACHE) {
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001672 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001673 return;
1674 }
1675
1676out:
Changli Gaod8d1f302010-06-10 23:31:35 -07001677 dst_release(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001678}
1679
1680/*
1681 * Handle ICMP "packet too big" messages
1682 * i.e. Path MTU discovery
1683 */
1684
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001685static void rt6_do_pmtu_disc(const struct in6_addr *daddr, const struct in6_addr *saddr,
Maciej Żenczykowskiae878ae2010-10-03 14:49:00 -07001686 struct net *net, u32 pmtu, int ifindex)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001687{
1688 struct rt6_info *rt, *nrt;
1689 int allfrag = 0;
Andrey Vagind3052b52010-12-11 15:20:11 +00001690again:
Maciej Żenczykowskiae878ae2010-10-03 14:49:00 -07001691 rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
David S. Miller38308472011-12-03 18:02:47 -05001692 if (!rt)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001693 return;
1694
Andrey Vagind3052b52010-12-11 15:20:11 +00001695 if (rt6_check_expired(rt)) {
1696 ip6_del_rt(rt);
1697 goto again;
1698 }
1699
Changli Gaod8d1f302010-06-10 23:31:35 -07001700 if (pmtu >= dst_mtu(&rt->dst))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001701 goto out;
1702
1703 if (pmtu < IPV6_MIN_MTU) {
1704 /*
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001705 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
Linus Torvalds1da177e2005-04-16 15:20:36 -07001706 * MTU (1280) and a fragment header should always be included
1707 * after a node receiving Too Big message reporting PMTU is
1708 * less than the IPv6 Minimum Link MTU.
1709 */
1710 pmtu = IPV6_MIN_MTU;
1711 allfrag = 1;
1712 }
1713
1714 /* New mtu received -> path was valid.
1715 They are sent only in response to data packets,
1716 so that this nexthop apparently is reachable. --ANK
1717 */
Changli Gaod8d1f302010-06-10 23:31:35 -07001718 dst_confirm(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001719
1720 /* Host route. If it is static, it would be better
1721 not to override it, but add new one, so that
1722 when cache entry will expire old pmtu
1723 would return automatically.
1724 */
1725 if (rt->rt6i_flags & RTF_CACHE) {
David S. Millerdefb3512010-12-08 21:16:57 -08001726 dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1727 if (allfrag) {
1728 u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1729 features |= RTAX_FEATURE_ALLFRAG;
1730 dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1731 }
Changli Gaod8d1f302010-06-10 23:31:35 -07001732 dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001733 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1734 goto out;
1735 }
1736
1737 /* Network route.
1738 Two cases are possible:
1739 1. It is connected route. Action: COW
1740 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1741 */
David Miller27217452011-12-02 16:52:08 +00001742 if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001743 nrt = rt6_alloc_cow(rt, daddr, saddr);
YOSHIFUJI Hideakid5315b52006-03-20 16:58:48 -08001744 else
1745 nrt = rt6_alloc_clone(rt, daddr);
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001746
YOSHIFUJI Hideakid5315b52006-03-20 16:58:48 -08001747 if (nrt) {
David S. Millerdefb3512010-12-08 21:16:57 -08001748 dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1749 if (allfrag) {
1750 u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1751 features |= RTAX_FEATURE_ALLFRAG;
1752 dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1753 }
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001754
1755 /* According to RFC 1981, detecting PMTU increase shouldn't be
1756 * happened within 5 mins, the recommended timer is 10 mins.
1757 * Here this route expiration time is set to ip6_rt_mtu_expires
1758 * which is 10 mins. After 10 mins the decreased pmtu is expired
1759 * and detecting PMTU increase will be automatically happened.
1760 */
Changli Gaod8d1f302010-06-10 23:31:35 -07001761 dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001762 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1763
Thomas Graf40e22e82006-08-22 00:00:45 -07001764 ip6_ins_rt(nrt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001765 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001766out:
Changli Gaod8d1f302010-06-10 23:31:35 -07001767 dst_release(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001768}
1769
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001770void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *saddr,
Maciej Żenczykowskiae878ae2010-10-03 14:49:00 -07001771 struct net_device *dev, u32 pmtu)
1772{
1773 struct net *net = dev_net(dev);
1774
1775 /*
1776 * RFC 1981 states that a node "MUST reduce the size of the packets it
1777 * is sending along the path" that caused the Packet Too Big message.
1778 * Since it's not possible in the general case to determine which
1779 * interface was used to send the original packet, we update the MTU
1780 * on the interface that will be used to send future packets. We also
1781 * update the MTU on the interface that received the Packet Too Big in
1782 * case the original packet was forced out that interface with
1783 * SO_BINDTODEVICE or similar. This is the next best thing to the
1784 * correct behaviour, which would be to update the MTU on all
1785 * interfaces.
1786 */
1787 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1788 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1789}
1790
Linus Torvalds1da177e2005-04-16 15:20:36 -07001791/*
1792 * Misc support functions
1793 */
1794
Eric Dumazet21efcfa2011-07-19 20:18:36 +00001795static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
1796 const struct in6_addr *dest)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001797{
David S. Millerd1918542011-12-28 20:19:20 -05001798 struct net *net = dev_net(ort->dst.dev);
David S. Miller5c1e6aa2011-04-28 14:13:38 -07001799 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
David S. Miller957c6652011-06-24 15:25:00 -07001800 ort->dst.dev, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001801
1802 if (rt) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001803 rt->dst.input = ort->dst.input;
1804 rt->dst.output = ort->dst.output;
Yan, Zheng8e2ec632011-09-05 21:34:30 +00001805 rt->dst.flags |= DST_HOST;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001806
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00001807 rt->rt6i_dst.addr = *dest;
Yan, Zheng8e2ec632011-09-05 21:34:30 +00001808 rt->rt6i_dst.plen = 128;
David S. Millerdefb3512010-12-08 21:16:57 -08001809 dst_copy_metrics(&rt->dst, &ort->dst);
Changli Gaod8d1f302010-06-10 23:31:35 -07001810 rt->dst.error = ort->dst.error;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001811 rt->rt6i_idev = ort->rt6i_idev;
1812 if (rt->rt6i_idev)
1813 in6_dev_hold(rt->rt6i_idev);
Changli Gaod8d1f302010-06-10 23:31:35 -07001814 rt->dst.lastuse = jiffies;
David S. Millerd1918542011-12-28 20:19:20 -05001815 rt->dst.expires = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001816
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00001817 rt->rt6i_gateway = ort->rt6i_gateway;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001818 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1819 rt->rt6i_metric = 0;
1820
Linus Torvalds1da177e2005-04-16 15:20:36 -07001821#ifdef CONFIG_IPV6_SUBTREES
1822 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1823#endif
Florian Westphal0f6c6392011-05-20 11:27:24 +00001824 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
Thomas Grafc71099a2006-08-04 23:20:06 -07001825 rt->rt6i_table = ort->rt6i_table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001826 }
1827 return rt;
1828}
1829
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001830#ifdef CONFIG_IPV6_ROUTE_INFO
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001831static struct rt6_info *rt6_get_route_info(struct net *net,
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001832 const struct in6_addr *prefix, int prefixlen,
1833 const struct in6_addr *gwaddr, int ifindex)
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001834{
1835 struct fib6_node *fn;
1836 struct rt6_info *rt = NULL;
Thomas Grafc71099a2006-08-04 23:20:06 -07001837 struct fib6_table *table;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001838
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001839 table = fib6_get_table(net, RT6_TABLE_INFO);
David S. Miller38308472011-12-03 18:02:47 -05001840 if (!table)
Thomas Grafc71099a2006-08-04 23:20:06 -07001841 return NULL;
1842
1843 write_lock_bh(&table->tb6_lock);
1844 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001845 if (!fn)
1846 goto out;
1847
Changli Gaod8d1f302010-06-10 23:31:35 -07001848 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
David S. Millerd1918542011-12-28 20:19:20 -05001849 if (rt->dst.dev->ifindex != ifindex)
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001850 continue;
1851 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1852 continue;
1853 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1854 continue;
Changli Gaod8d1f302010-06-10 23:31:35 -07001855 dst_hold(&rt->dst);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001856 break;
1857 }
1858out:
Thomas Grafc71099a2006-08-04 23:20:06 -07001859 write_unlock_bh(&table->tb6_lock);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001860 return rt;
1861}
1862
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001863static struct rt6_info *rt6_add_route_info(struct net *net,
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001864 const struct in6_addr *prefix, int prefixlen,
1865 const struct in6_addr *gwaddr, int ifindex,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001866 unsigned pref)
1867{
Thomas Graf86872cb2006-08-22 00:01:08 -07001868 struct fib6_config cfg = {
1869 .fc_table = RT6_TABLE_INFO,
Rami Rosen238fc7e2008-02-09 23:43:11 -08001870 .fc_metric = IP6_RT_PRIO_USER,
Thomas Graf86872cb2006-08-22 00:01:08 -07001871 .fc_ifindex = ifindex,
1872 .fc_dst_len = prefixlen,
1873 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1874 RTF_UP | RTF_PREF(pref),
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001875 .fc_nlinfo.pid = 0,
1876 .fc_nlinfo.nlh = NULL,
1877 .fc_nlinfo.nl_net = net,
Thomas Graf86872cb2006-08-22 00:01:08 -07001878 };
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001879
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00001880 cfg.fc_dst = *prefix;
1881 cfg.fc_gateway = *gwaddr;
Thomas Graf86872cb2006-08-22 00:01:08 -07001882
YOSHIFUJI Hideakie317da92006-03-20 17:06:42 -08001883 /* We should treat it as a default route if prefix length is 0. */
1884 if (!prefixlen)
Thomas Graf86872cb2006-08-22 00:01:08 -07001885 cfg.fc_flags |= RTF_DEFAULT;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001886
Thomas Graf86872cb2006-08-22 00:01:08 -07001887 ip6_route_add(&cfg);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001888
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001889 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001890}
1891#endif
1892
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001893struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001894{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001895 struct rt6_info *rt;
Thomas Grafc71099a2006-08-04 23:20:06 -07001896 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001897
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001898 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
David S. Miller38308472011-12-03 18:02:47 -05001899 if (!table)
Thomas Grafc71099a2006-08-04 23:20:06 -07001900 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001901
Thomas Grafc71099a2006-08-04 23:20:06 -07001902 write_lock_bh(&table->tb6_lock);
Changli Gaod8d1f302010-06-10 23:31:35 -07001903 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
David S. Millerd1918542011-12-28 20:19:20 -05001904 if (dev == rt->dst.dev &&
YOSHIFUJI Hideaki045927f2006-03-20 17:00:48 -08001905 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001906 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1907 break;
1908 }
1909 if (rt)
Changli Gaod8d1f302010-06-10 23:31:35 -07001910 dst_hold(&rt->dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001911 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001912 return rt;
1913}
1914
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001915struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -08001916 struct net_device *dev,
1917 unsigned int pref)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001918{
Thomas Graf86872cb2006-08-22 00:01:08 -07001919 struct fib6_config cfg = {
1920 .fc_table = RT6_TABLE_DFLT,
Rami Rosen238fc7e2008-02-09 23:43:11 -08001921 .fc_metric = IP6_RT_PRIO_USER,
Thomas Graf86872cb2006-08-22 00:01:08 -07001922 .fc_ifindex = dev->ifindex,
1923 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1924 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
Daniel Lezcano55786892008-03-04 13:47:47 -08001925 .fc_nlinfo.pid = 0,
1926 .fc_nlinfo.nlh = NULL,
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001927 .fc_nlinfo.nl_net = dev_net(dev),
Thomas Graf86872cb2006-08-22 00:01:08 -07001928 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07001929
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00001930 cfg.fc_gateway = *gwaddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001931
Thomas Graf86872cb2006-08-22 00:01:08 -07001932 ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001933
Linus Torvalds1da177e2005-04-16 15:20:36 -07001934 return rt6_get_dflt_router(gwaddr, dev);
1935}
1936
Daniel Lezcano7b4da532008-03-04 13:47:14 -08001937void rt6_purge_dflt_routers(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001938{
1939 struct rt6_info *rt;
Thomas Grafc71099a2006-08-04 23:20:06 -07001940 struct fib6_table *table;
1941
1942 /* NOTE: Keep consistent with rt6_get_dflt_router */
Daniel Lezcano7b4da532008-03-04 13:47:14 -08001943 table = fib6_get_table(net, RT6_TABLE_DFLT);
David S. Miller38308472011-12-03 18:02:47 -05001944 if (!table)
Thomas Grafc71099a2006-08-04 23:20:06 -07001945 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001946
1947restart:
Thomas Grafc71099a2006-08-04 23:20:06 -07001948 read_lock_bh(&table->tb6_lock);
Changli Gaod8d1f302010-06-10 23:31:35 -07001949 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001950 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001951 dst_hold(&rt->dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001952 read_unlock_bh(&table->tb6_lock);
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001953 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001954 goto restart;
1955 }
1956 }
Thomas Grafc71099a2006-08-04 23:20:06 -07001957 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001958}
1959
Daniel Lezcano55786892008-03-04 13:47:47 -08001960static void rtmsg_to_fib6_config(struct net *net,
1961 struct in6_rtmsg *rtmsg,
Thomas Graf86872cb2006-08-22 00:01:08 -07001962 struct fib6_config *cfg)
1963{
1964 memset(cfg, 0, sizeof(*cfg));
1965
1966 cfg->fc_table = RT6_TABLE_MAIN;
1967 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1968 cfg->fc_metric = rtmsg->rtmsg_metric;
1969 cfg->fc_expires = rtmsg->rtmsg_info;
1970 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1971 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1972 cfg->fc_flags = rtmsg->rtmsg_flags;
1973
Daniel Lezcano55786892008-03-04 13:47:47 -08001974 cfg->fc_nlinfo.nl_net = net;
Benjamin Theryf1243c22008-02-26 18:10:03 -08001975
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00001976 cfg->fc_dst = rtmsg->rtmsg_dst;
1977 cfg->fc_src = rtmsg->rtmsg_src;
1978 cfg->fc_gateway = rtmsg->rtmsg_gateway;
Thomas Graf86872cb2006-08-22 00:01:08 -07001979}
1980
Daniel Lezcano55786892008-03-04 13:47:47 -08001981int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001982{
Thomas Graf86872cb2006-08-22 00:01:08 -07001983 struct fib6_config cfg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001984 struct in6_rtmsg rtmsg;
1985 int err;
1986
1987 switch(cmd) {
1988 case SIOCADDRT: /* Add a route */
1989 case SIOCDELRT: /* Delete a route */
1990 if (!capable(CAP_NET_ADMIN))
1991 return -EPERM;
1992 err = copy_from_user(&rtmsg, arg,
1993 sizeof(struct in6_rtmsg));
1994 if (err)
1995 return -EFAULT;
Thomas Graf86872cb2006-08-22 00:01:08 -07001996
Daniel Lezcano55786892008-03-04 13:47:47 -08001997 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
Thomas Graf86872cb2006-08-22 00:01:08 -07001998
Linus Torvalds1da177e2005-04-16 15:20:36 -07001999 rtnl_lock();
2000 switch (cmd) {
2001 case SIOCADDRT:
Thomas Graf86872cb2006-08-22 00:01:08 -07002002 err = ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002003 break;
2004 case SIOCDELRT:
Thomas Graf86872cb2006-08-22 00:01:08 -07002005 err = ip6_route_del(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002006 break;
2007 default:
2008 err = -EINVAL;
2009 }
2010 rtnl_unlock();
2011
2012 return err;
Stephen Hemminger3ff50b72007-04-20 17:09:22 -07002013 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002014
2015 return -EINVAL;
2016}
2017
2018/*
2019 * Drop the packet on the floor
2020 */
2021
Brian Haleyd5fdd6b2009-06-23 04:31:07 -07002022static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002023{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07002024 int type;
Eric Dumazetadf30902009-06-02 05:19:30 +00002025 struct dst_entry *dst = skb_dst(skb);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07002026 switch (ipstats_mib_noroutes) {
2027 case IPSTATS_MIB_INNOROUTES:
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -07002028 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
Ulrich Weber45bb0062010-02-25 23:28:58 +00002029 if (type == IPV6_ADDR_ANY) {
Denis V. Lunev3bd653c2008-10-08 10:54:51 -07002030 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2031 IPSTATS_MIB_INADDRERRORS);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07002032 break;
2033 }
2034 /* FALLTHROUGH */
2035 case IPSTATS_MIB_OUTNOROUTES:
Denis V. Lunev3bd653c2008-10-08 10:54:51 -07002036 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2037 ipstats_mib_noroutes);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07002038 break;
2039 }
Alexey Dobriyan3ffe5332010-02-18 08:25:24 +00002040 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002041 kfree_skb(skb);
2042 return 0;
2043}
2044
Thomas Graf9ce8ade2006-10-18 20:46:54 -07002045static int ip6_pkt_discard(struct sk_buff *skb)
2046{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07002047 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07002048}
2049
Arnaldo Carvalho de Melo20380732005-08-16 02:18:02 -03002050static int ip6_pkt_discard_out(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002051{
Eric Dumazetadf30902009-06-02 05:19:30 +00002052 skb->dev = skb_dst(skb)->dev;
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07002053 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002054}
2055
David S. Miller6723ab52006-10-18 21:20:57 -07002056#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2057
Thomas Graf9ce8ade2006-10-18 20:46:54 -07002058static int ip6_pkt_prohibit(struct sk_buff *skb)
2059{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07002060 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07002061}
2062
2063static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2064{
Eric Dumazetadf30902009-06-02 05:19:30 +00002065 skb->dev = skb_dst(skb)->dev;
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07002066 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07002067}
2068
David S. Miller6723ab52006-10-18 21:20:57 -07002069#endif
2070
Linus Torvalds1da177e2005-04-16 15:20:36 -07002071/*
2072 * Allocate a dst for local (unicast / anycast) address.
2073 */
2074
2075struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2076 const struct in6_addr *addr,
David S. Miller8f031512011-12-06 16:48:14 -05002077 bool anycast)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002078{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09002079 struct net *net = dev_net(idev->dev);
David S. Miller5c1e6aa2011-04-28 14:13:38 -07002080 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
David S. Miller957c6652011-06-24 15:25:00 -07002081 net->loopback_dev, 0);
David S. Millerf83c7792011-12-28 15:41:23 -05002082 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002083
David S. Miller38308472011-12-03 18:02:47 -05002084 if (!rt) {
Ben Greear40385652010-11-08 12:33:48 +00002085 if (net_ratelimit())
2086 pr_warning("IPv6: Maximum number of routes reached,"
2087 " consider increasing route/max_size.\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002088 return ERR_PTR(-ENOMEM);
Ben Greear40385652010-11-08 12:33:48 +00002089 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002090
Linus Torvalds1da177e2005-04-16 15:20:36 -07002091 in6_dev_hold(idev);
2092
David S. Miller11d53b42011-06-24 15:23:34 -07002093 rt->dst.flags |= DST_HOST;
Changli Gaod8d1f302010-06-10 23:31:35 -07002094 rt->dst.input = ip6_input;
2095 rt->dst.output = ip6_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002096 rt->rt6i_idev = idev;
Changli Gaod8d1f302010-06-10 23:31:35 -07002097 rt->dst.obsolete = -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002098
2099 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +09002100 if (anycast)
2101 rt->rt6i_flags |= RTF_ANYCAST;
2102 else
Linus Torvalds1da177e2005-04-16 15:20:36 -07002103 rt->rt6i_flags |= RTF_LOCAL;
David S. Miller8ade06c2011-12-29 18:51:57 -05002104 err = rt6_bind_neighbour(rt, rt->dst.dev);
David S. Millerf83c7792011-12-28 15:41:23 -05002105 if (err) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002106 dst_free(&rt->dst);
David S. Millerf83c7792011-12-28 15:41:23 -05002107 return ERR_PTR(err);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002108 }
2109
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00002110 rt->rt6i_dst.addr = *addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002111 rt->rt6i_dst.plen = 128;
Daniel Lezcano55786892008-03-04 13:47:47 -08002112 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002113
Changli Gaod8d1f302010-06-10 23:31:35 -07002114 atomic_set(&rt->dst.__refcnt, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002115
2116 return rt;
2117}
2118
Daniel Walterc3968a82011-04-13 21:10:57 +00002119int ip6_route_get_saddr(struct net *net,
2120 struct rt6_info *rt,
Eric Dumazetb71d1d42011-04-22 04:53:02 +00002121 const struct in6_addr *daddr,
Daniel Walterc3968a82011-04-13 21:10:57 +00002122 unsigned int prefs,
2123 struct in6_addr *saddr)
2124{
2125 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2126 int err = 0;
2127 if (rt->rt6i_prefsrc.plen)
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00002128 *saddr = rt->rt6i_prefsrc.addr;
Daniel Walterc3968a82011-04-13 21:10:57 +00002129 else
2130 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2131 daddr, prefs, saddr);
2132 return err;
2133}
2134
2135/* remove deleted ip from prefsrc entries */
2136struct arg_dev_net_ip {
2137 struct net_device *dev;
2138 struct net *net;
2139 struct in6_addr *addr;
2140};
2141
2142static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2143{
2144 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2145 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2146 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2147
David S. Millerd1918542011-12-28 20:19:20 -05002148 if (((void *)rt->dst.dev == dev || !dev) &&
Daniel Walterc3968a82011-04-13 21:10:57 +00002149 rt != net->ipv6.ip6_null_entry &&
2150 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2151 /* remove prefsrc entry */
2152 rt->rt6i_prefsrc.plen = 0;
2153 }
2154 return 0;
2155}
2156
2157void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2158{
2159 struct net *net = dev_net(ifp->idev->dev);
2160 struct arg_dev_net_ip adni = {
2161 .dev = ifp->idev->dev,
2162 .net = net,
2163 .addr = &ifp->addr,
2164 };
2165 fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2166}
2167
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002168struct arg_dev_net {
2169 struct net_device *dev;
2170 struct net *net;
2171};
2172
Linus Torvalds1da177e2005-04-16 15:20:36 -07002173static int fib6_ifdown(struct rt6_info *rt, void *arg)
2174{
stephen hemmingerbc3ef662010-12-16 17:42:40 +00002175 const struct arg_dev_net *adn = arg;
2176 const struct net_device *dev = adn->dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002177
David S. Millerd1918542011-12-28 20:19:20 -05002178 if ((rt->dst.dev == dev || !dev) &&
David S. Millerc159d302011-12-26 15:24:36 -05002179 rt != adn->net->ipv6.ip6_null_entry)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002180 return -1;
David S. Millerc159d302011-12-26 15:24:36 -05002181
Linus Torvalds1da177e2005-04-16 15:20:36 -07002182 return 0;
2183}
2184
Daniel Lezcanof3db4852008-03-03 23:27:06 -08002185void rt6_ifdown(struct net *net, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002186{
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002187 struct arg_dev_net adn = {
2188 .dev = dev,
2189 .net = net,
2190 };
2191
2192 fib6_clean_all(net, fib6_ifdown, 0, &adn);
David S. Miller1e493d12008-09-10 17:27:15 -07002193 icmp6_clean_all(fib6_ifdown, &adn);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002194}
2195
2196struct rt6_mtu_change_arg
2197{
2198 struct net_device *dev;
2199 unsigned mtu;
2200};
2201
2202static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2203{
2204 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2205 struct inet6_dev *idev;
2206
2207 /* In IPv6 pmtu discovery is not optional,
2208 so that RTAX_MTU lock cannot disable it.
2209 We still use this lock to block changes
2210 caused by addrconf/ndisc.
2211 */
2212
2213 idev = __in6_dev_get(arg->dev);
David S. Miller38308472011-12-03 18:02:47 -05002214 if (!idev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002215 return 0;
2216
2217 /* For administrative MTU increase, there is no way to discover
2218 IPv6 PMTU increase, so PMTU increase should be updated here.
2219 Since RFC 1981 doesn't include administrative MTU increase
2220 update PMTU increase is a MUST. (i.e. jumbo frame)
2221 */
2222 /*
2223 If new MTU is less than route PMTU, this new MTU will be the
2224 lowest MTU in the path, update the route PMTU to reflect PMTU
2225 decreases; if new MTU is greater than route PMTU, and the
2226 old MTU is the lowest MTU in the path, update the route PMTU
2227 to reflect the increase. In this case if the other nodes' MTU
2228 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2229 PMTU discouvery.
2230 */
David S. Millerd1918542011-12-28 20:19:20 -05002231 if (rt->dst.dev == arg->dev &&
Changli Gaod8d1f302010-06-10 23:31:35 -07002232 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2233 (dst_mtu(&rt->dst) >= arg->mtu ||
2234 (dst_mtu(&rt->dst) < arg->mtu &&
2235 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
David S. Millerdefb3512010-12-08 21:16:57 -08002236 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
Simon Arlott566cfd82007-07-26 00:09:55 -07002237 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002238 return 0;
2239}
2240
2241void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2242{
Thomas Grafc71099a2006-08-04 23:20:06 -07002243 struct rt6_mtu_change_arg arg = {
2244 .dev = dev,
2245 .mtu = mtu,
2246 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07002247
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09002248 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002249}
2250
Patrick McHardyef7c79e2007-06-05 12:38:30 -07002251static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
Thomas Graf5176f912006-08-26 20:13:18 -07002252 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
Thomas Graf86872cb2006-08-22 00:01:08 -07002253 [RTA_OIF] = { .type = NLA_U32 },
Thomas Grafab364a62006-08-22 00:01:47 -07002254 [RTA_IIF] = { .type = NLA_U32 },
Thomas Graf86872cb2006-08-22 00:01:08 -07002255 [RTA_PRIORITY] = { .type = NLA_U32 },
2256 [RTA_METRICS] = { .type = NLA_NESTED },
2257};
2258
2259static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2260 struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002261{
Thomas Graf86872cb2006-08-22 00:01:08 -07002262 struct rtmsg *rtm;
2263 struct nlattr *tb[RTA_MAX+1];
2264 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002265
Thomas Graf86872cb2006-08-22 00:01:08 -07002266 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2267 if (err < 0)
2268 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002269
Thomas Graf86872cb2006-08-22 00:01:08 -07002270 err = -EINVAL;
2271 rtm = nlmsg_data(nlh);
2272 memset(cfg, 0, sizeof(*cfg));
2273
2274 cfg->fc_table = rtm->rtm_table;
2275 cfg->fc_dst_len = rtm->rtm_dst_len;
2276 cfg->fc_src_len = rtm->rtm_src_len;
2277 cfg->fc_flags = RTF_UP;
2278 cfg->fc_protocol = rtm->rtm_protocol;
2279
2280 if (rtm->rtm_type == RTN_UNREACHABLE)
2281 cfg->fc_flags |= RTF_REJECT;
2282
Maciej Żenczykowskiab79ad12010-09-27 00:07:02 +00002283 if (rtm->rtm_type == RTN_LOCAL)
2284 cfg->fc_flags |= RTF_LOCAL;
2285
Thomas Graf86872cb2006-08-22 00:01:08 -07002286 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2287 cfg->fc_nlinfo.nlh = nlh;
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09002288 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
Thomas Graf86872cb2006-08-22 00:01:08 -07002289
2290 if (tb[RTA_GATEWAY]) {
2291 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2292 cfg->fc_flags |= RTF_GATEWAY;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002293 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002294
2295 if (tb[RTA_DST]) {
2296 int plen = (rtm->rtm_dst_len + 7) >> 3;
2297
2298 if (nla_len(tb[RTA_DST]) < plen)
2299 goto errout;
2300
2301 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002302 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002303
2304 if (tb[RTA_SRC]) {
2305 int plen = (rtm->rtm_src_len + 7) >> 3;
2306
2307 if (nla_len(tb[RTA_SRC]) < plen)
2308 goto errout;
2309
2310 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002311 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002312
Daniel Walterc3968a82011-04-13 21:10:57 +00002313 if (tb[RTA_PREFSRC])
2314 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2315
Thomas Graf86872cb2006-08-22 00:01:08 -07002316 if (tb[RTA_OIF])
2317 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2318
2319 if (tb[RTA_PRIORITY])
2320 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2321
2322 if (tb[RTA_METRICS]) {
2323 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2324 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002325 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002326
2327 if (tb[RTA_TABLE])
2328 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2329
2330 err = 0;
2331errout:
2332 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002333}
2334
Thomas Grafc127ea22007-03-22 11:58:32 -07002335static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002336{
Thomas Graf86872cb2006-08-22 00:01:08 -07002337 struct fib6_config cfg;
2338 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002339
Thomas Graf86872cb2006-08-22 00:01:08 -07002340 err = rtm_to_fib6_config(skb, nlh, &cfg);
2341 if (err < 0)
2342 return err;
2343
2344 return ip6_route_del(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002345}
2346
Thomas Grafc127ea22007-03-22 11:58:32 -07002347static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002348{
Thomas Graf86872cb2006-08-22 00:01:08 -07002349 struct fib6_config cfg;
2350 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002351
Thomas Graf86872cb2006-08-22 00:01:08 -07002352 err = rtm_to_fib6_config(skb, nlh, &cfg);
2353 if (err < 0)
2354 return err;
2355
2356 return ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002357}
2358
Thomas Graf339bf982006-11-10 14:10:15 -08002359static inline size_t rt6_nlmsg_size(void)
2360{
2361 return NLMSG_ALIGN(sizeof(struct rtmsg))
2362 + nla_total_size(16) /* RTA_SRC */
2363 + nla_total_size(16) /* RTA_DST */
2364 + nla_total_size(16) /* RTA_GATEWAY */
2365 + nla_total_size(16) /* RTA_PREFSRC */
2366 + nla_total_size(4) /* RTA_TABLE */
2367 + nla_total_size(4) /* RTA_IIF */
2368 + nla_total_size(4) /* RTA_OIF */
2369 + nla_total_size(4) /* RTA_PRIORITY */
Noriaki TAKAMIYA6a2b9ce2007-01-23 22:09:41 -08002370 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
Thomas Graf339bf982006-11-10 14:10:15 -08002371 + nla_total_size(sizeof(struct rta_cacheinfo));
2372}
2373
Brian Haley191cd582008-08-14 15:33:21 -07002374static int rt6_fill_node(struct net *net,
2375 struct sk_buff *skb, struct rt6_info *rt,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07002376 struct in6_addr *dst, struct in6_addr *src,
2377 int iif, int type, u32 pid, u32 seq,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002378 int prefix, int nowait, unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002379{
David S. Miller346f8702011-12-29 15:22:33 -05002380 const struct inet_peer *peer;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002381 struct rtmsg *rtm;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002382 struct nlmsghdr *nlh;
Thomas Grafe3703b32006-11-27 09:27:07 -08002383 long expires;
Patrick McHardy9e762a42006-08-10 23:09:48 -07002384 u32 table;
Eric Dumazetf2c31e32011-07-29 19:00:53 +00002385 struct neighbour *n;
David S. Miller346f8702011-12-29 15:22:33 -05002386 u32 ts, tsage;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002387
2388 if (prefix) { /* user wants prefix routes only */
2389 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2390 /* success since this is not a prefix route */
2391 return 1;
2392 }
2393 }
2394
Thomas Graf2d7202b2006-08-22 00:01:27 -07002395 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
David S. Miller38308472011-12-03 18:02:47 -05002396 if (!nlh)
Patrick McHardy26932562007-01-31 23:16:40 -08002397 return -EMSGSIZE;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002398
2399 rtm = nlmsg_data(nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002400 rtm->rtm_family = AF_INET6;
2401 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2402 rtm->rtm_src_len = rt->rt6i_src.plen;
2403 rtm->rtm_tos = 0;
Thomas Grafc71099a2006-08-04 23:20:06 -07002404 if (rt->rt6i_table)
Patrick McHardy9e762a42006-08-10 23:09:48 -07002405 table = rt->rt6i_table->tb6_id;
Thomas Grafc71099a2006-08-04 23:20:06 -07002406 else
Patrick McHardy9e762a42006-08-10 23:09:48 -07002407 table = RT6_TABLE_UNSPEC;
2408 rtm->rtm_table = table;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002409 NLA_PUT_U32(skb, RTA_TABLE, table);
David S. Miller38308472011-12-03 18:02:47 -05002410 if (rt->rt6i_flags & RTF_REJECT)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002411 rtm->rtm_type = RTN_UNREACHABLE;
David S. Miller38308472011-12-03 18:02:47 -05002412 else if (rt->rt6i_flags & RTF_LOCAL)
Maciej Żenczykowskiab79ad12010-09-27 00:07:02 +00002413 rtm->rtm_type = RTN_LOCAL;
David S. Millerd1918542011-12-28 20:19:20 -05002414 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002415 rtm->rtm_type = RTN_LOCAL;
2416 else
2417 rtm->rtm_type = RTN_UNICAST;
2418 rtm->rtm_flags = 0;
2419 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2420 rtm->rtm_protocol = rt->rt6i_protocol;
David S. Miller38308472011-12-03 18:02:47 -05002421 if (rt->rt6i_flags & RTF_DYNAMIC)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002422 rtm->rtm_protocol = RTPROT_REDIRECT;
2423 else if (rt->rt6i_flags & RTF_ADDRCONF)
2424 rtm->rtm_protocol = RTPROT_KERNEL;
David S. Miller38308472011-12-03 18:02:47 -05002425 else if (rt->rt6i_flags & RTF_DEFAULT)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002426 rtm->rtm_protocol = RTPROT_RA;
2427
David S. Miller38308472011-12-03 18:02:47 -05002428 if (rt->rt6i_flags & RTF_CACHE)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002429 rtm->rtm_flags |= RTM_F_CLONED;
2430
2431 if (dst) {
Thomas Graf2d7202b2006-08-22 00:01:27 -07002432 NLA_PUT(skb, RTA_DST, 16, dst);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002433 rtm->rtm_dst_len = 128;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002434 } else if (rtm->rtm_dst_len)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002435 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002436#ifdef CONFIG_IPV6_SUBTREES
2437 if (src) {
Thomas Graf2d7202b2006-08-22 00:01:27 -07002438 NLA_PUT(skb, RTA_SRC, 16, src);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002439 rtm->rtm_src_len = 128;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002440 } else if (rtm->rtm_src_len)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002441 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002442#endif
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002443 if (iif) {
2444#ifdef CONFIG_IPV6_MROUTE
2445 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
Benjamin Thery8229efd2008-12-10 16:30:15 -08002446 int err = ip6mr_get_route(net, skb, rtm, nowait);
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002447 if (err <= 0) {
2448 if (!nowait) {
2449 if (err == 0)
2450 return 0;
2451 goto nla_put_failure;
2452 } else {
2453 if (err == -EMSGSIZE)
2454 goto nla_put_failure;
2455 }
2456 }
2457 } else
2458#endif
2459 NLA_PUT_U32(skb, RTA_IIF, iif);
2460 } else if (dst) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002461 struct in6_addr saddr_buf;
Daniel Walterc3968a82011-04-13 21:10:57 +00002462 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002463 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002464 }
Thomas Graf2d7202b2006-08-22 00:01:27 -07002465
Daniel Walterc3968a82011-04-13 21:10:57 +00002466 if (rt->rt6i_prefsrc.plen) {
2467 struct in6_addr saddr_buf;
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00002468 saddr_buf = rt->rt6i_prefsrc.addr;
Daniel Walterc3968a82011-04-13 21:10:57 +00002469 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2470 }
2471
David S. Millerdefb3512010-12-08 21:16:57 -08002472 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002473 goto nla_put_failure;
2474
Eric Dumazetf2c31e32011-07-29 19:00:53 +00002475 rcu_read_lock();
David Miller27217452011-12-02 16:52:08 +00002476 n = dst_get_neighbour_noref(&rt->dst);
Eric Dumazet94f826b2012-03-27 09:53:52 +00002477 if (n) {
2478 if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0) {
2479 rcu_read_unlock();
2480 goto nla_put_failure;
2481 }
2482 }
Eric Dumazetf2c31e32011-07-29 19:00:53 +00002483 rcu_read_unlock();
Thomas Graf2d7202b2006-08-22 00:01:27 -07002484
Changli Gaod8d1f302010-06-10 23:31:35 -07002485 if (rt->dst.dev)
David S. Millerd1918542011-12-28 20:19:20 -05002486 NLA_PUT_U32(skb, RTA_OIF, rt->dst.dev->ifindex);
Thomas Graf2d7202b2006-08-22 00:01:27 -07002487
2488 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
Thomas Grafe3703b32006-11-27 09:27:07 -08002489
YOSHIFUJI Hideaki36e3dea2008-05-13 02:52:55 +09002490 if (!(rt->rt6i_flags & RTF_EXPIRES))
2491 expires = 0;
David S. Millerd1918542011-12-28 20:19:20 -05002492 else if (rt->dst.expires - jiffies < INT_MAX)
2493 expires = rt->dst.expires - jiffies;
YOSHIFUJI Hideaki36e3dea2008-05-13 02:52:55 +09002494 else
2495 expires = INT_MAX;
YOSHIFUJI Hideaki69cdf8f2008-05-19 16:55:13 -07002496
David S. Miller346f8702011-12-29 15:22:33 -05002497 peer = rt->rt6i_peer;
2498 ts = tsage = 0;
2499 if (peer && peer->tcp_ts_stamp) {
2500 ts = peer->tcp_ts;
2501 tsage = get_seconds() - peer->tcp_ts_stamp;
2502 }
2503
2504 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, ts, tsage,
Changli Gaod8d1f302010-06-10 23:31:35 -07002505 expires, rt->dst.error) < 0)
Thomas Grafe3703b32006-11-27 09:27:07 -08002506 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002507
Thomas Graf2d7202b2006-08-22 00:01:27 -07002508 return nlmsg_end(skb, nlh);
2509
2510nla_put_failure:
Patrick McHardy26932562007-01-31 23:16:40 -08002511 nlmsg_cancel(skb, nlh);
2512 return -EMSGSIZE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002513}
2514
Patrick McHardy1b43af52006-08-10 23:11:17 -07002515int rt6_dump_route(struct rt6_info *rt, void *p_arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002516{
2517 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2518 int prefix;
2519
Thomas Graf2d7202b2006-08-22 00:01:27 -07002520 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2521 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002522 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2523 } else
2524 prefix = 0;
2525
Brian Haley191cd582008-08-14 15:33:21 -07002526 return rt6_fill_node(arg->net,
2527 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002528 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002529 prefix, 0, NLM_F_MULTI);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002530}
2531
Thomas Grafc127ea22007-03-22 11:58:32 -07002532static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002533{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09002534 struct net *net = sock_net(in_skb->sk);
Thomas Grafab364a62006-08-22 00:01:47 -07002535 struct nlattr *tb[RTA_MAX+1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07002536 struct rt6_info *rt;
Thomas Grafab364a62006-08-22 00:01:47 -07002537 struct sk_buff *skb;
2538 struct rtmsg *rtm;
David S. Miller4c9483b2011-03-12 16:22:43 -05002539 struct flowi6 fl6;
Thomas Grafab364a62006-08-22 00:01:47 -07002540 int err, iif = 0;
2541
2542 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2543 if (err < 0)
2544 goto errout;
2545
2546 err = -EINVAL;
David S. Miller4c9483b2011-03-12 16:22:43 -05002547 memset(&fl6, 0, sizeof(fl6));
Thomas Grafab364a62006-08-22 00:01:47 -07002548
2549 if (tb[RTA_SRC]) {
2550 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2551 goto errout;
2552
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00002553 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
Thomas Grafab364a62006-08-22 00:01:47 -07002554 }
2555
2556 if (tb[RTA_DST]) {
2557 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2558 goto errout;
2559
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00002560 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
Thomas Grafab364a62006-08-22 00:01:47 -07002561 }
2562
2563 if (tb[RTA_IIF])
2564 iif = nla_get_u32(tb[RTA_IIF]);
2565
2566 if (tb[RTA_OIF])
David S. Miller4c9483b2011-03-12 16:22:43 -05002567 fl6.flowi6_oif = nla_get_u32(tb[RTA_OIF]);
Thomas Grafab364a62006-08-22 00:01:47 -07002568
2569 if (iif) {
2570 struct net_device *dev;
Daniel Lezcano55786892008-03-04 13:47:47 -08002571 dev = __dev_get_by_index(net, iif);
Thomas Grafab364a62006-08-22 00:01:47 -07002572 if (!dev) {
2573 err = -ENODEV;
2574 goto errout;
2575 }
2576 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002577
2578 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
David S. Miller38308472011-12-03 18:02:47 -05002579 if (!skb) {
Thomas Grafab364a62006-08-22 00:01:47 -07002580 err = -ENOBUFS;
2581 goto errout;
2582 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002583
2584 /* Reserve room for dummy headers, this skb can pass
2585 through good chunk of routing engine.
2586 */
Arnaldo Carvalho de Melo459a98e2007-03-19 15:30:44 -07002587 skb_reset_mac_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002588 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2589
David S. Miller4c9483b2011-03-12 16:22:43 -05002590 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl6);
Changli Gaod8d1f302010-06-10 23:31:35 -07002591 skb_dst_set(skb, &rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002592
David S. Miller4c9483b2011-03-12 16:22:43 -05002593 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002594 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002595 nlh->nlmsg_seq, 0, 0, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002596 if (err < 0) {
Thomas Grafab364a62006-08-22 00:01:47 -07002597 kfree_skb(skb);
2598 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002599 }
2600
Daniel Lezcano55786892008-03-04 13:47:47 -08002601 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
Thomas Grafab364a62006-08-22 00:01:47 -07002602errout:
Linus Torvalds1da177e2005-04-16 15:20:36 -07002603 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002604}
2605
Thomas Graf86872cb2006-08-22 00:01:08 -07002606void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002607{
2608 struct sk_buff *skb;
Daniel Lezcano55786892008-03-04 13:47:47 -08002609 struct net *net = info->nl_net;
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08002610 u32 seq;
2611 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002612
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08002613 err = -ENOBUFS;
David S. Miller38308472011-12-03 18:02:47 -05002614 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
Thomas Graf86872cb2006-08-22 00:01:08 -07002615
Thomas Graf339bf982006-11-10 14:10:15 -08002616 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
David S. Miller38308472011-12-03 18:02:47 -05002617 if (!skb)
Thomas Graf21713eb2006-08-15 00:35:24 -07002618 goto errout;
2619
Brian Haley191cd582008-08-14 15:33:21 -07002620 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002621 event, info->pid, seq, 0, 0, 0);
Patrick McHardy26932562007-01-31 23:16:40 -08002622 if (err < 0) {
2623 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2624 WARN_ON(err == -EMSGSIZE);
2625 kfree_skb(skb);
2626 goto errout;
2627 }
Pablo Neira Ayuso1ce85fe2009-02-24 23:18:28 -08002628 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2629 info->nlh, gfp_any());
2630 return;
Thomas Graf21713eb2006-08-15 00:35:24 -07002631errout:
2632 if (err < 0)
Daniel Lezcano55786892008-03-04 13:47:47 -08002633 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002634}
2635
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002636static int ip6_route_dev_notify(struct notifier_block *this,
2637 unsigned long event, void *data)
2638{
2639 struct net_device *dev = (struct net_device *)data;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09002640 struct net *net = dev_net(dev);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002641
2642 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002643 net->ipv6.ip6_null_entry->dst.dev = dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002644 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2645#ifdef CONFIG_IPV6_MULTIPLE_TABLES
Changli Gaod8d1f302010-06-10 23:31:35 -07002646 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002647 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
Changli Gaod8d1f302010-06-10 23:31:35 -07002648 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002649 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2650#endif
2651 }
2652
2653 return NOTIFY_OK;
2654}
2655
Linus Torvalds1da177e2005-04-16 15:20:36 -07002656/*
2657 * /proc
2658 */
2659
2660#ifdef CONFIG_PROC_FS
2661
Linus Torvalds1da177e2005-04-16 15:20:36 -07002662struct rt6_proc_arg
2663{
2664 char *buffer;
2665 int offset;
2666 int length;
2667 int skip;
2668 int len;
2669};
2670
2671static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2672{
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002673 struct seq_file *m = p_arg;
David S. Miller69cce1d2011-07-17 23:09:49 -07002674 struct neighbour *n;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002675
Harvey Harrison4b7a4272008-10-29 12:50:24 -07002676 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002677
2678#ifdef CONFIG_IPV6_SUBTREES
Harvey Harrison4b7a4272008-10-29 12:50:24 -07002679 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002680#else
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002681 seq_puts(m, "00000000000000000000000000000000 00 ");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002682#endif
Eric Dumazetf2c31e32011-07-29 19:00:53 +00002683 rcu_read_lock();
David Miller27217452011-12-02 16:52:08 +00002684 n = dst_get_neighbour_noref(&rt->dst);
David S. Miller69cce1d2011-07-17 23:09:49 -07002685 if (n) {
2686 seq_printf(m, "%pi6", n->primary_key);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002687 } else {
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002688 seq_puts(m, "00000000000000000000000000000000");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002689 }
Eric Dumazetf2c31e32011-07-29 19:00:53 +00002690 rcu_read_unlock();
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002691 seq_printf(m, " %08x %08x %08x %08x %8s\n",
Changli Gaod8d1f302010-06-10 23:31:35 -07002692 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2693 rt->dst.__use, rt->rt6i_flags,
David S. Millerd1918542011-12-28 20:19:20 -05002694 rt->dst.dev ? rt->dst.dev->name : "");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002695 return 0;
2696}
2697
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002698static int ipv6_route_show(struct seq_file *m, void *v)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002699{
Daniel Lezcanof3db4852008-03-03 23:27:06 -08002700 struct net *net = (struct net *)m->private;
Josh Hunt32b293a2011-12-28 13:23:07 +00002701 fib6_clean_all_ro(net, rt6_info_route, 0, m);
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002702 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002703}
2704
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002705static int ipv6_route_open(struct inode *inode, struct file *file)
2706{
Pavel Emelyanovde05c552008-07-18 04:07:21 -07002707 return single_open_net(inode, file, ipv6_route_show);
Daniel Lezcanof3db4852008-03-03 23:27:06 -08002708}
2709
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002710static const struct file_operations ipv6_route_proc_fops = {
2711 .owner = THIS_MODULE,
2712 .open = ipv6_route_open,
2713 .read = seq_read,
2714 .llseek = seq_lseek,
Pavel Emelyanovb6fcbdb2008-07-18 04:07:44 -07002715 .release = single_release_net,
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002716};
2717
Linus Torvalds1da177e2005-04-16 15:20:36 -07002718static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2719{
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002720 struct net *net = (struct net *)seq->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002721 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002722 net->ipv6.rt6_stats->fib_nodes,
2723 net->ipv6.rt6_stats->fib_route_nodes,
2724 net->ipv6.rt6_stats->fib_rt_alloc,
2725 net->ipv6.rt6_stats->fib_rt_entries,
2726 net->ipv6.rt6_stats->fib_rt_cache,
Eric Dumazetfc66f952010-10-08 06:37:34 +00002727 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002728 net->ipv6.rt6_stats->fib_discarded_routes);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002729
2730 return 0;
2731}
2732
2733static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2734{
Pavel Emelyanovde05c552008-07-18 04:07:21 -07002735 return single_open_net(inode, file, rt6_stats_seq_show);
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002736}
2737
Arjan van de Ven9a321442007-02-12 00:55:35 -08002738static const struct file_operations rt6_stats_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002739 .owner = THIS_MODULE,
2740 .open = rt6_stats_seq_open,
2741 .read = seq_read,
2742 .llseek = seq_lseek,
Pavel Emelyanovb6fcbdb2008-07-18 04:07:44 -07002743 .release = single_release_net,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002744};
2745#endif /* CONFIG_PROC_FS */
2746
2747#ifdef CONFIG_SYSCTL
2748
Linus Torvalds1da177e2005-04-16 15:20:36 -07002749static
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07002750int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002751 void __user *buffer, size_t *lenp, loff_t *ppos)
2752{
Lucian Adrian Grijincuc486da32011-02-24 19:48:03 +00002753 struct net *net;
2754 int delay;
2755 if (!write)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002756 return -EINVAL;
Lucian Adrian Grijincuc486da32011-02-24 19:48:03 +00002757
2758 net = (struct net *)ctl->extra1;
2759 delay = net->ipv6.sysctl.flush_delay;
2760 proc_dointvec(ctl, write, buffer, lenp, ppos);
2761 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2762 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002763}
2764
Daniel Lezcano760f2d02008-01-10 02:53:43 -08002765ctl_table ipv6_route_table_template[] = {
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002766 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002767 .procname = "flush",
Daniel Lezcano49905092008-01-10 03:01:01 -08002768 .data = &init_net.ipv6.sysctl.flush_delay,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002769 .maxlen = sizeof(int),
Dave Jones89c8b3a12005-04-28 12:11:49 -07002770 .mode = 0200,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002771 .proc_handler = ipv6_sysctl_rtcache_flush
Linus Torvalds1da177e2005-04-16 15:20:36 -07002772 },
2773 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002774 .procname = "gc_thresh",
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -08002775 .data = &ip6_dst_ops_template.gc_thresh,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002776 .maxlen = sizeof(int),
2777 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002778 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002779 },
2780 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002781 .procname = "max_size",
Daniel Lezcano49905092008-01-10 03:01:01 -08002782 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002783 .maxlen = sizeof(int),
2784 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002785 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002786 },
2787 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002788 .procname = "gc_min_interval",
Daniel Lezcano49905092008-01-10 03:01:01 -08002789 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002790 .maxlen = sizeof(int),
2791 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002792 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002793 },
2794 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002795 .procname = "gc_timeout",
Daniel Lezcano49905092008-01-10 03:01:01 -08002796 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002797 .maxlen = sizeof(int),
2798 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002799 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002800 },
2801 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002802 .procname = "gc_interval",
Daniel Lezcano49905092008-01-10 03:01:01 -08002803 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002804 .maxlen = sizeof(int),
2805 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002806 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002807 },
2808 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002809 .procname = "gc_elasticity",
Daniel Lezcano49905092008-01-10 03:01:01 -08002810 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002811 .maxlen = sizeof(int),
2812 .mode = 0644,
Min Zhangf3d3f612010-08-14 22:42:51 -07002813 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002814 },
2815 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002816 .procname = "mtu_expires",
Daniel Lezcano49905092008-01-10 03:01:01 -08002817 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002818 .maxlen = sizeof(int),
2819 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002820 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002821 },
2822 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002823 .procname = "min_adv_mss",
Daniel Lezcano49905092008-01-10 03:01:01 -08002824 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002825 .maxlen = sizeof(int),
2826 .mode = 0644,
Min Zhangf3d3f612010-08-14 22:42:51 -07002827 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002828 },
2829 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002830 .procname = "gc_min_interval_ms",
Daniel Lezcano49905092008-01-10 03:01:01 -08002831 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002832 .maxlen = sizeof(int),
2833 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002834 .proc_handler = proc_dointvec_ms_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002835 },
Eric W. Biedermanf8572d82009-11-05 13:32:03 -08002836 { }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002837};
2838
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00002839struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
Daniel Lezcano760f2d02008-01-10 02:53:43 -08002840{
2841 struct ctl_table *table;
2842
2843 table = kmemdup(ipv6_route_table_template,
2844 sizeof(ipv6_route_table_template),
2845 GFP_KERNEL);
YOSHIFUJI Hideaki5ee09102008-02-28 00:24:28 +09002846
2847 if (table) {
2848 table[0].data = &net->ipv6.sysctl.flush_delay;
Lucian Adrian Grijincuc486da32011-02-24 19:48:03 +00002849 table[0].extra1 = net;
Alexey Dobriyan86393e52009-08-29 01:34:49 +00002850 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
YOSHIFUJI Hideaki5ee09102008-02-28 00:24:28 +09002851 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2852 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2853 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2854 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2855 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2856 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2857 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
Alexey Dobriyan9c69fab2009-12-18 20:11:03 -08002858 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
YOSHIFUJI Hideaki5ee09102008-02-28 00:24:28 +09002859 }
2860
Daniel Lezcano760f2d02008-01-10 02:53:43 -08002861 return table;
2862}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002863#endif
2864
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00002865static int __net_init ip6_route_net_init(struct net *net)
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002866{
Pavel Emelyanov633d424b2008-04-21 14:25:23 -07002867 int ret = -ENOMEM;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002868
Alexey Dobriyan86393e52009-08-29 01:34:49 +00002869 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2870 sizeof(net->ipv6.ip6_dst_ops));
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002871
Eric Dumazetfc66f952010-10-08 06:37:34 +00002872 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2873 goto out_ip6_dst_ops;
2874
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002875 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2876 sizeof(*net->ipv6.ip6_null_entry),
2877 GFP_KERNEL);
2878 if (!net->ipv6.ip6_null_entry)
Eric Dumazetfc66f952010-10-08 06:37:34 +00002879 goto out_ip6_dst_entries;
Changli Gaod8d1f302010-06-10 23:31:35 -07002880 net->ipv6.ip6_null_entry->dst.path =
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002881 (struct dst_entry *)net->ipv6.ip6_null_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07002882 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
David S. Miller62fa8a82011-01-26 20:51:05 -08002883 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2884 ip6_template_metrics, true);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002885
2886#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2887 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2888 sizeof(*net->ipv6.ip6_prohibit_entry),
2889 GFP_KERNEL);
Peter Zijlstra68fffc62008-10-07 14:12:10 -07002890 if (!net->ipv6.ip6_prohibit_entry)
2891 goto out_ip6_null_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07002892 net->ipv6.ip6_prohibit_entry->dst.path =
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002893 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07002894 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
David S. Miller62fa8a82011-01-26 20:51:05 -08002895 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2896 ip6_template_metrics, true);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002897
2898 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2899 sizeof(*net->ipv6.ip6_blk_hole_entry),
2900 GFP_KERNEL);
Peter Zijlstra68fffc62008-10-07 14:12:10 -07002901 if (!net->ipv6.ip6_blk_hole_entry)
2902 goto out_ip6_prohibit_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07002903 net->ipv6.ip6_blk_hole_entry->dst.path =
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002904 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07002905 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
David S. Miller62fa8a82011-01-26 20:51:05 -08002906 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2907 ip6_template_metrics, true);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002908#endif
2909
Peter Zijlstrab339a47c2008-10-07 14:15:00 -07002910 net->ipv6.sysctl.flush_delay = 0;
2911 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2912 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2913 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2914 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2915 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2916 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2917 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2918
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002919#ifdef CONFIG_PROC_FS
2920 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2921 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2922#endif
Benjamin Thery6891a342008-03-04 13:49:47 -08002923 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2924
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002925 ret = 0;
2926out:
2927 return ret;
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002928
Peter Zijlstra68fffc62008-10-07 14:12:10 -07002929#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2930out_ip6_prohibit_entry:
2931 kfree(net->ipv6.ip6_prohibit_entry);
2932out_ip6_null_entry:
2933 kfree(net->ipv6.ip6_null_entry);
2934#endif
Eric Dumazetfc66f952010-10-08 06:37:34 +00002935out_ip6_dst_entries:
2936 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002937out_ip6_dst_ops:
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002938 goto out;
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002939}
2940
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00002941static void __net_exit ip6_route_net_exit(struct net *net)
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002942{
2943#ifdef CONFIG_PROC_FS
2944 proc_net_remove(net, "ipv6_route");
2945 proc_net_remove(net, "rt6_stats");
2946#endif
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002947 kfree(net->ipv6.ip6_null_entry);
2948#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2949 kfree(net->ipv6.ip6_prohibit_entry);
2950 kfree(net->ipv6.ip6_blk_hole_entry);
2951#endif
Xiaotian Feng41bb78b2010-11-02 16:11:05 +00002952 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002953}
2954
2955static struct pernet_operations ip6_route_net_ops = {
2956 .init = ip6_route_net_init,
2957 .exit = ip6_route_net_exit,
2958};
2959
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002960static struct notifier_block ip6_route_dev_notifier = {
2961 .notifier_call = ip6_route_dev_notify,
2962 .priority = 0,
2963};
2964
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002965int __init ip6_route_init(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002966{
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002967 int ret;
2968
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -08002969 ret = -ENOMEM;
2970 ip6_dst_ops_template.kmem_cachep =
2971 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2972 SLAB_HWCACHE_ALIGN, NULL);
2973 if (!ip6_dst_ops_template.kmem_cachep)
Fernando Carrijoc19a28e2009-01-07 18:09:08 -08002974 goto out;
David S. Miller14e50e52007-05-24 18:17:54 -07002975
Eric Dumazetfc66f952010-10-08 06:37:34 +00002976 ret = dst_entries_init(&ip6_dst_blackhole_ops);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002977 if (ret)
Daniel Lezcanobdb32892008-03-04 13:48:10 -08002978 goto out_kmem_cache;
Daniel Lezcanobdb32892008-03-04 13:48:10 -08002979
Eric Dumazetfc66f952010-10-08 06:37:34 +00002980 ret = register_pernet_subsys(&ip6_route_net_ops);
2981 if (ret)
2982 goto out_dst_entries;
2983
Arnaud Ebalard5dc121e2008-10-01 02:37:56 -07002984 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2985
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002986 /* Registering of the loopback is done before this portion of code,
2987 * the loopback reference in rt6_info will not be taken, do it
2988 * manually for init_net */
Changli Gaod8d1f302010-06-10 23:31:35 -07002989 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002990 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2991 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
Changli Gaod8d1f302010-06-10 23:31:35 -07002992 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002993 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
Changli Gaod8d1f302010-06-10 23:31:35 -07002994 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002995 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2996 #endif
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002997 ret = fib6_init();
2998 if (ret)
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002999 goto out_register_subsys;
Daniel Lezcano433d49c2007-12-07 00:43:48 -08003000
Daniel Lezcano433d49c2007-12-07 00:43:48 -08003001 ret = xfrm6_init();
3002 if (ret)
Daniel Lezcanocdb18762008-03-04 13:45:33 -08003003 goto out_fib6_init;
Daniel Lezcanoc35b7e72007-12-08 00:14:11 -08003004
Daniel Lezcano433d49c2007-12-07 00:43:48 -08003005 ret = fib6_rules_init();
3006 if (ret)
3007 goto xfrm6_init;
Daniel Lezcano7e5449c2007-12-08 00:14:54 -08003008
Daniel Lezcano433d49c2007-12-07 00:43:48 -08003009 ret = -ENOBUFS;
Greg Rosec7ac8672011-06-10 01:27:09 +00003010 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3011 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3012 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
Daniel Lezcano433d49c2007-12-07 00:43:48 -08003013 goto fib6_rules_init;
3014
Daniel Lezcano8ed67782008-03-04 13:48:30 -08003015 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
Daniel Lezcanocdb18762008-03-04 13:45:33 -08003016 if (ret)
3017 goto fib6_rules_init;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08003018
Daniel Lezcano433d49c2007-12-07 00:43:48 -08003019out:
3020 return ret;
3021
3022fib6_rules_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08003023 fib6_rules_cleanup();
3024xfrm6_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08003025 xfrm6_fini();
Daniel Lezcano433d49c2007-12-07 00:43:48 -08003026out_fib6_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08003027 fib6_gc_cleanup();
Daniel Lezcano8ed67782008-03-04 13:48:30 -08003028out_register_subsys:
3029 unregister_pernet_subsys(&ip6_route_net_ops);
Eric Dumazetfc66f952010-10-08 06:37:34 +00003030out_dst_entries:
3031 dst_entries_destroy(&ip6_dst_blackhole_ops);
Daniel Lezcano433d49c2007-12-07 00:43:48 -08003032out_kmem_cache:
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08003033 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
Daniel Lezcano433d49c2007-12-07 00:43:48 -08003034 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003035}
3036
3037void ip6_route_cleanup(void)
3038{
Daniel Lezcano8ed67782008-03-04 13:48:30 -08003039 unregister_netdevice_notifier(&ip6_route_dev_notifier);
Thomas Graf101367c2006-08-04 03:39:02 -07003040 fib6_rules_cleanup();
Linus Torvalds1da177e2005-04-16 15:20:36 -07003041 xfrm6_fini();
Linus Torvalds1da177e2005-04-16 15:20:36 -07003042 fib6_gc_cleanup();
Daniel Lezcano8ed67782008-03-04 13:48:30 -08003043 unregister_pernet_subsys(&ip6_route_net_ops);
Xiaotian Feng41bb78b2010-11-02 16:11:05 +00003044 dst_entries_destroy(&ip6_dst_blackhole_ops);
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08003045 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003046}