blob: fb545edef6ea63e235d6ef161fb057bc0face6dc [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09006 * Pedro Roque <roque@di.fc.ul.pt>
Linus Torvalds1da177e2005-04-16 15:20:36 -07007 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07008 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
YOSHIFUJI Hideakic0bece92006-08-23 17:23:25 -070023 * Ville Nuorvala
24 * Fixed routing subtrees.
Linus Torvalds1da177e2005-04-16 15:20:36 -070025 */
26
Randy Dunlap4fc268d2006-01-11 12:17:47 -080027#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070028#include <linux/errno.h>
29#include <linux/types.h>
30#include <linux/times.h>
31#include <linux/socket.h>
32#include <linux/sockios.h>
33#include <linux/net.h>
34#include <linux/route.h>
35#include <linux/netdevice.h>
36#include <linux/in6.h>
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +090037#include <linux/mroute6.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070038#include <linux/init.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070039#include <linux/if_arp.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070040#include <linux/proc_fs.h>
41#include <linux/seq_file.h>
Daniel Lezcano5b7c9312008-03-03 23:28:58 -080042#include <linux/nsproxy.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090043#include <linux/slab.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020044#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070045#include <net/snmp.h>
46#include <net/ipv6.h>
47#include <net/ip6_fib.h>
48#include <net/ip6_route.h>
49#include <net/ndisc.h>
50#include <net/addrconf.h>
51#include <net/tcp.h>
52#include <linux/rtnetlink.h>
53#include <net/dst.h>
54#include <net/xfrm.h>
Tom Tucker8d717402006-07-30 20:43:36 -070055#include <net/netevent.h>
Thomas Graf21713eb2006-08-15 00:35:24 -070056#include <net/netlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070057
58#include <asm/uaccess.h>
59
60#ifdef CONFIG_SYSCTL
61#include <linux/sysctl.h>
62#endif
63
64/* Set to 3 to get tracing. */
65#define RT6_DEBUG 2
66
67#if RT6_DEBUG >= 3
68#define RDBG(x) printk x
69#define RT6_TRACE(x...) printk(KERN_DEBUG x)
70#else
71#define RDBG(x)
72#define RT6_TRACE(x...) do { ; } while (0)
73#endif
74
Eric Dumazet21efcfa2011-07-19 20:18:36 +000075static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
76 const struct in6_addr *dest);
Linus Torvalds1da177e2005-04-16 15:20:36 -070077static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
David S. Miller0dbaee32010-12-13 12:52:14 -080078static unsigned int ip6_default_advmss(const struct dst_entry *dst);
David S. Millerd33e4552010-12-14 13:01:14 -080079static unsigned int ip6_default_mtu(const struct dst_entry *dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -070080static struct dst_entry *ip6_negative_advice(struct dst_entry *);
81static void ip6_dst_destroy(struct dst_entry *);
82static void ip6_dst_ifdown(struct dst_entry *,
83 struct net_device *dev, int how);
Daniel Lezcano569d3642008-01-18 03:56:57 -080084static int ip6_dst_gc(struct dst_ops *ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -070085
86static int ip6_pkt_discard(struct sk_buff *skb);
87static int ip6_pkt_discard_out(struct sk_buff *skb);
88static void ip6_link_failure(struct sk_buff *skb);
89static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
90
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -080091#ifdef CONFIG_IPV6_ROUTE_INFO
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -080092static struct rt6_info *rt6_add_route_info(struct net *net,
Eric Dumazetb71d1d42011-04-22 04:53:02 +000093 const struct in6_addr *prefix, int prefixlen,
94 const struct in6_addr *gwaddr, int ifindex,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -080095 unsigned pref);
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -080096static struct rt6_info *rt6_get_route_info(struct net *net,
Eric Dumazetb71d1d42011-04-22 04:53:02 +000097 const struct in6_addr *prefix, int prefixlen,
98 const struct in6_addr *gwaddr, int ifindex);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -080099#endif
100
David S. Miller06582542011-01-27 14:58:42 -0800101static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
102{
103 struct rt6_info *rt = (struct rt6_info *) dst;
104 struct inet_peer *peer;
105 u32 *p = NULL;
106
Yan, Zheng8e2ec632011-09-05 21:34:30 +0000107 if (!(rt->dst.flags & DST_HOST))
108 return NULL;
109
David S. Miller06582542011-01-27 14:58:42 -0800110 if (!rt->rt6i_peer)
111 rt6_bind_peer(rt, 1);
112
113 peer = rt->rt6i_peer;
114 if (peer) {
115 u32 *old_p = __DST_METRICS_PTR(old);
116 unsigned long prev, new;
117
118 p = peer->metrics;
119 if (inet_metrics_new(peer))
120 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
121
122 new = (unsigned long) p;
123 prev = cmpxchg(&dst->_metrics, old, new);
124
125 if (prev != old) {
126 p = __DST_METRICS_PTR(prev);
127 if (prev & DST_METRICS_READ_ONLY)
128 p = NULL;
129 }
130 }
131 return p;
132}
133
David S. Millerd3aaeb32011-07-18 00:40:17 -0700134static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, const void *daddr)
135{
136 return __neigh_lookup_errno(&nd_tbl, daddr, dst->dev);
137}
138
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -0800139static struct dst_ops ip6_dst_ops_template = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700140 .family = AF_INET6,
Harvey Harrison09640e62009-02-01 00:45:17 -0800141 .protocol = cpu_to_be16(ETH_P_IPV6),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700142 .gc = ip6_dst_gc,
143 .gc_thresh = 1024,
144 .check = ip6_dst_check,
David S. Miller0dbaee32010-12-13 12:52:14 -0800145 .default_advmss = ip6_default_advmss,
David S. Millerd33e4552010-12-14 13:01:14 -0800146 .default_mtu = ip6_default_mtu,
David S. Miller06582542011-01-27 14:58:42 -0800147 .cow_metrics = ipv6_cow_metrics,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700148 .destroy = ip6_dst_destroy,
149 .ifdown = ip6_dst_ifdown,
150 .negative_advice = ip6_negative_advice,
151 .link_failure = ip6_link_failure,
152 .update_pmtu = ip6_rt_update_pmtu,
Herbert Xu1ac06e02008-05-20 14:32:14 -0700153 .local_out = __ip6_local_out,
David S. Millerd3aaeb32011-07-18 00:40:17 -0700154 .neigh_lookup = ip6_neigh_lookup,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700155};
156
Roland Dreierec831ea2011-01-31 13:16:00 -0800157static unsigned int ip6_blackhole_default_mtu(const struct dst_entry *dst)
158{
159 return 0;
160}
161
David S. Miller14e50e52007-05-24 18:17:54 -0700162static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
163{
164}
165
Held Bernhard0972ddb2011-04-24 22:07:32 +0000166static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
167 unsigned long old)
168{
169 return NULL;
170}
171
David S. Miller14e50e52007-05-24 18:17:54 -0700172static struct dst_ops ip6_dst_blackhole_ops = {
173 .family = AF_INET6,
Harvey Harrison09640e62009-02-01 00:45:17 -0800174 .protocol = cpu_to_be16(ETH_P_IPV6),
David S. Miller14e50e52007-05-24 18:17:54 -0700175 .destroy = ip6_dst_destroy,
176 .check = ip6_dst_check,
Roland Dreierec831ea2011-01-31 13:16:00 -0800177 .default_mtu = ip6_blackhole_default_mtu,
Eric Dumazet214f45c2011-02-18 11:39:01 -0800178 .default_advmss = ip6_default_advmss,
David S. Miller14e50e52007-05-24 18:17:54 -0700179 .update_pmtu = ip6_rt_blackhole_update_pmtu,
Held Bernhard0972ddb2011-04-24 22:07:32 +0000180 .cow_metrics = ip6_rt_blackhole_cow_metrics,
David S. Millerd3aaeb32011-07-18 00:40:17 -0700181 .neigh_lookup = ip6_neigh_lookup,
David S. Miller14e50e52007-05-24 18:17:54 -0700182};
183
David S. Miller62fa8a82011-01-26 20:51:05 -0800184static const u32 ip6_template_metrics[RTAX_MAX] = {
185 [RTAX_HOPLIMIT - 1] = 255,
186};
187
Daniel Lezcanobdb32892008-03-04 13:48:10 -0800188static struct rt6_info ip6_null_entry_template = {
Changli Gaod8d1f302010-06-10 23:31:35 -0700189 .dst = {
190 .__refcnt = ATOMIC_INIT(1),
191 .__use = 1,
192 .obsolete = -1,
193 .error = -ENETUNREACH,
Changli Gaod8d1f302010-06-10 23:31:35 -0700194 .input = ip6_pkt_discard,
195 .output = ip6_pkt_discard_out,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700196 },
197 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
Jean-Mickael Guerin4f724272009-05-20 17:38:59 -0700198 .rt6i_protocol = RTPROT_KERNEL,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700199 .rt6i_metric = ~(u32) 0,
200 .rt6i_ref = ATOMIC_INIT(1),
201};
202
Thomas Graf101367c2006-08-04 03:39:02 -0700203#ifdef CONFIG_IPV6_MULTIPLE_TABLES
204
David S. Miller6723ab52006-10-18 21:20:57 -0700205static int ip6_pkt_prohibit(struct sk_buff *skb);
206static int ip6_pkt_prohibit_out(struct sk_buff *skb);
David S. Miller6723ab52006-10-18 21:20:57 -0700207
Adrian Bunk280a34c2008-04-21 02:29:32 -0700208static struct rt6_info ip6_prohibit_entry_template = {
Changli Gaod8d1f302010-06-10 23:31:35 -0700209 .dst = {
210 .__refcnt = ATOMIC_INIT(1),
211 .__use = 1,
212 .obsolete = -1,
213 .error = -EACCES,
Changli Gaod8d1f302010-06-10 23:31:35 -0700214 .input = ip6_pkt_prohibit,
215 .output = ip6_pkt_prohibit_out,
Thomas Graf101367c2006-08-04 03:39:02 -0700216 },
217 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
Jean-Mickael Guerin4f724272009-05-20 17:38:59 -0700218 .rt6i_protocol = RTPROT_KERNEL,
Thomas Graf101367c2006-08-04 03:39:02 -0700219 .rt6i_metric = ~(u32) 0,
220 .rt6i_ref = ATOMIC_INIT(1),
221};
222
Daniel Lezcanobdb32892008-03-04 13:48:10 -0800223static struct rt6_info ip6_blk_hole_entry_template = {
Changli Gaod8d1f302010-06-10 23:31:35 -0700224 .dst = {
225 .__refcnt = ATOMIC_INIT(1),
226 .__use = 1,
227 .obsolete = -1,
228 .error = -EINVAL,
Changli Gaod8d1f302010-06-10 23:31:35 -0700229 .input = dst_discard,
230 .output = dst_discard,
Thomas Graf101367c2006-08-04 03:39:02 -0700231 },
232 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
Jean-Mickael Guerin4f724272009-05-20 17:38:59 -0700233 .rt6i_protocol = RTPROT_KERNEL,
Thomas Graf101367c2006-08-04 03:39:02 -0700234 .rt6i_metric = ~(u32) 0,
235 .rt6i_ref = ATOMIC_INIT(1),
236};
237
238#endif
239
Linus Torvalds1da177e2005-04-16 15:20:36 -0700240/* allocate dst with ip6_dst_ops */
David S. Miller5c1e6aa2011-04-28 14:13:38 -0700241static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops,
David S. Miller957c6652011-06-24 15:25:00 -0700242 struct net_device *dev,
243 int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700244{
David S. Miller957c6652011-06-24 15:25:00 -0700245 struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, flags);
David S. Millercf911662011-04-28 14:31:47 -0700246
Madalin Bucurfbe58182011-09-26 07:04:56 +0000247 if (rt != NULL)
248 memset(&rt->rt6i_table, 0,
249 sizeof(*rt) - sizeof(struct dst_entry));
David S. Millercf911662011-04-28 14:31:47 -0700250
251 return rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700252}
253
254static void ip6_dst_destroy(struct dst_entry *dst)
255{
256 struct rt6_info *rt = (struct rt6_info *)dst;
257 struct inet6_dev *idev = rt->rt6i_idev;
David S. Millerb3419362010-11-30 12:27:11 -0800258 struct inet_peer *peer = rt->rt6i_peer;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700259
Yan, Zheng8e2ec632011-09-05 21:34:30 +0000260 if (!(rt->dst.flags & DST_HOST))
261 dst_destroy_metrics_generic(dst);
262
Linus Torvalds1da177e2005-04-16 15:20:36 -0700263 if (idev != NULL) {
264 rt->rt6i_idev = NULL;
265 in6_dev_put(idev);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900266 }
David S. Millerb3419362010-11-30 12:27:11 -0800267 if (peer) {
David S. Millerb3419362010-11-30 12:27:11 -0800268 rt->rt6i_peer = NULL;
269 inet_putpeer(peer);
270 }
271}
272
David S. Miller6431cbc2011-02-07 20:38:06 -0800273static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
274
275static u32 rt6_peer_genid(void)
276{
277 return atomic_read(&__rt6_peer_genid);
278}
279
David S. Millerb3419362010-11-30 12:27:11 -0800280void rt6_bind_peer(struct rt6_info *rt, int create)
281{
282 struct inet_peer *peer;
283
David S. Millerb3419362010-11-30 12:27:11 -0800284 peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
285 if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
286 inet_putpeer(peer);
David S. Miller6431cbc2011-02-07 20:38:06 -0800287 else
288 rt->rt6i_peer_genid = rt6_peer_genid();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700289}
290
291static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
292 int how)
293{
294 struct rt6_info *rt = (struct rt6_info *)dst;
295 struct inet6_dev *idev = rt->rt6i_idev;
Denis V. Lunev5a3e55d2007-12-07 00:38:10 -0800296 struct net_device *loopback_dev =
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900297 dev_net(dev)->loopback_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700298
Denis V. Lunev5a3e55d2007-12-07 00:38:10 -0800299 if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
300 struct inet6_dev *loopback_idev =
301 in6_dev_get(loopback_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700302 if (loopback_idev != NULL) {
303 rt->rt6i_idev = loopback_idev;
304 in6_dev_put(idev);
305 }
306 }
307}
308
309static __inline__ int rt6_check_expired(const struct rt6_info *rt)
310{
Eric Dumazeta02cec22010-09-22 20:43:57 +0000311 return (rt->rt6i_flags & RTF_EXPIRES) &&
312 time_after(jiffies, rt->rt6i_expires);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700313}
314
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000315static inline int rt6_need_strict(const struct in6_addr *daddr)
Thomas Grafc71099a2006-08-04 23:20:06 -0700316{
Eric Dumazeta02cec22010-09-22 20:43:57 +0000317 return ipv6_addr_type(daddr) &
318 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
Thomas Grafc71099a2006-08-04 23:20:06 -0700319}
320
Linus Torvalds1da177e2005-04-16 15:20:36 -0700321/*
Thomas Grafc71099a2006-08-04 23:20:06 -0700322 * Route lookup. Any table->tb6_lock is implied.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700323 */
324
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800325static inline struct rt6_info *rt6_device_match(struct net *net,
326 struct rt6_info *rt,
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000327 const struct in6_addr *saddr,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700328 int oif,
YOSHIFUJI Hideakid4208952008-06-27 20:14:54 -0700329 int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700330{
331 struct rt6_info *local = NULL;
332 struct rt6_info *sprt;
333
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900334 if (!oif && ipv6_addr_any(saddr))
335 goto out;
336
Changli Gaod8d1f302010-06-10 23:31:35 -0700337 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900338 struct net_device *dev = sprt->rt6i_dev;
339
340 if (oif) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700341 if (dev->ifindex == oif)
342 return sprt;
343 if (dev->flags & IFF_LOOPBACK) {
344 if (sprt->rt6i_idev == NULL ||
345 sprt->rt6i_idev->dev->ifindex != oif) {
YOSHIFUJI Hideakid4208952008-06-27 20:14:54 -0700346 if (flags & RT6_LOOKUP_F_IFACE && oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700347 continue;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900348 if (local && (!oif ||
Linus Torvalds1da177e2005-04-16 15:20:36 -0700349 local->rt6i_idev->dev->ifindex == oif))
350 continue;
351 }
352 local = sprt;
353 }
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900354 } else {
355 if (ipv6_chk_addr(net, saddr, dev,
356 flags & RT6_LOOKUP_F_IFACE))
357 return sprt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700358 }
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900359 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700360
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900361 if (oif) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700362 if (local)
363 return local;
364
YOSHIFUJI Hideakid4208952008-06-27 20:14:54 -0700365 if (flags & RT6_LOOKUP_F_IFACE)
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800366 return net->ipv6.ip6_null_entry;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700367 }
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900368out:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700369 return rt;
370}
371
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800372#ifdef CONFIG_IPV6_ROUTER_PREF
373static void rt6_probe(struct rt6_info *rt)
374{
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000375 struct neighbour *neigh;
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800376 /*
377 * Okay, this does not seem to be appropriate
378 * for now, however, we need to check if it
379 * is really so; aka Router Reachability Probing.
380 *
381 * Router Reachability Probe MUST be rate-limited
382 * to no more than one per minute.
383 */
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000384 rcu_read_lock();
385 neigh = rt ? dst_get_neighbour(&rt->dst) : NULL;
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800386 if (!neigh || (neigh->nud_state & NUD_VALID))
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000387 goto out;
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800388 read_lock_bh(&neigh->lock);
389 if (!(neigh->nud_state & NUD_VALID) &&
YOSHIFUJI Hideaki52e16352006-03-20 17:05:47 -0800390 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800391 struct in6_addr mcaddr;
392 struct in6_addr *target;
393
394 neigh->updated = jiffies;
395 read_unlock_bh(&neigh->lock);
396
397 target = (struct in6_addr *)&neigh->primary_key;
398 addrconf_addr_solict_mult(target, &mcaddr);
399 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000400 } else {
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800401 read_unlock_bh(&neigh->lock);
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000402 }
403out:
404 rcu_read_unlock();
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800405}
406#else
407static inline void rt6_probe(struct rt6_info *rt)
408{
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800409}
410#endif
411
Linus Torvalds1da177e2005-04-16 15:20:36 -0700412/*
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800413 * Default Router Selection (RFC 2461 6.3.6)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700414 */
Dave Jonesb6f99a22007-03-22 12:27:49 -0700415static inline int rt6_check_dev(struct rt6_info *rt, int oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700416{
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800417 struct net_device *dev = rt->rt6i_dev;
David S. Miller161980f2007-04-06 11:42:27 -0700418 if (!oif || dev->ifindex == oif)
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800419 return 2;
David S. Miller161980f2007-04-06 11:42:27 -0700420 if ((dev->flags & IFF_LOOPBACK) &&
421 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
422 return 1;
423 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700424}
425
Dave Jonesb6f99a22007-03-22 12:27:49 -0700426static inline int rt6_check_neigh(struct rt6_info *rt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700427{
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000428 struct neighbour *neigh;
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800429 int m;
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000430
431 rcu_read_lock();
432 neigh = dst_get_neighbour(&rt->dst);
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700433 if (rt->rt6i_flags & RTF_NONEXTHOP ||
434 !(rt->rt6i_flags & RTF_GATEWAY))
435 m = 1;
436 else if (neigh) {
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800437 read_lock_bh(&neigh->lock);
438 if (neigh->nud_state & NUD_VALID)
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700439 m = 2;
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800440#ifdef CONFIG_IPV6_ROUTER_PREF
441 else if (neigh->nud_state & NUD_FAILED)
442 m = 0;
443#endif
444 else
YOSHIFUJI Hideakiea73ee22006-11-06 09:45:44 -0800445 m = 1;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800446 read_unlock_bh(&neigh->lock);
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800447 } else
448 m = 0;
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000449 rcu_read_unlock();
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800450 return m;
451}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700452
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800453static int rt6_score_route(struct rt6_info *rt, int oif,
454 int strict)
455{
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700456 int m, n;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900457
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700458 m = rt6_check_dev(rt, oif);
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700459 if (!m && (strict & RT6_LOOKUP_F_IFACE))
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800460 return -1;
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -0800461#ifdef CONFIG_IPV6_ROUTER_PREF
462 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
463#endif
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700464 n = rt6_check_neigh(rt);
YOSHIFUJI Hideaki557e92e2006-11-06 09:45:45 -0800465 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800466 return -1;
467 return m;
468}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700469
David S. Millerf11e6652007-03-24 20:36:25 -0700470static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
471 int *mpri, struct rt6_info *match)
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800472{
David S. Millerf11e6652007-03-24 20:36:25 -0700473 int m;
474
475 if (rt6_check_expired(rt))
476 goto out;
477
478 m = rt6_score_route(rt, oif, strict);
479 if (m < 0)
480 goto out;
481
482 if (m > *mpri) {
483 if (strict & RT6_LOOKUP_F_REACHABLE)
484 rt6_probe(match);
485 *mpri = m;
486 match = rt;
487 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
488 rt6_probe(rt);
489 }
490
491out:
492 return match;
493}
494
495static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
496 struct rt6_info *rr_head,
497 u32 metric, int oif, int strict)
498{
499 struct rt6_info *rt, *match;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800500 int mpri = -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700501
David S. Millerf11e6652007-03-24 20:36:25 -0700502 match = NULL;
503 for (rt = rr_head; rt && rt->rt6i_metric == metric;
Changli Gaod8d1f302010-06-10 23:31:35 -0700504 rt = rt->dst.rt6_next)
David S. Millerf11e6652007-03-24 20:36:25 -0700505 match = find_match(rt, oif, strict, &mpri, match);
506 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
Changli Gaod8d1f302010-06-10 23:31:35 -0700507 rt = rt->dst.rt6_next)
David S. Millerf11e6652007-03-24 20:36:25 -0700508 match = find_match(rt, oif, strict, &mpri, match);
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800509
David S. Millerf11e6652007-03-24 20:36:25 -0700510 return match;
511}
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800512
David S. Millerf11e6652007-03-24 20:36:25 -0700513static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
514{
515 struct rt6_info *match, *rt0;
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800516 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700517
David S. Millerf11e6652007-03-24 20:36:25 -0700518 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
Harvey Harrison0dc47872008-03-05 20:47:47 -0800519 __func__, fn->leaf, oif);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700520
David S. Millerf11e6652007-03-24 20:36:25 -0700521 rt0 = fn->rr_ptr;
522 if (!rt0)
523 fn->rr_ptr = rt0 = fn->leaf;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700524
David S. Millerf11e6652007-03-24 20:36:25 -0700525 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700526
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800527 if (!match &&
David S. Millerf11e6652007-03-24 20:36:25 -0700528 (strict & RT6_LOOKUP_F_REACHABLE)) {
Changli Gaod8d1f302010-06-10 23:31:35 -0700529 struct rt6_info *next = rt0->dst.rt6_next;
David S. Millerf11e6652007-03-24 20:36:25 -0700530
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800531 /* no entries matched; do round-robin */
David S. Millerf11e6652007-03-24 20:36:25 -0700532 if (!next || next->rt6i_metric != rt0->rt6i_metric)
533 next = fn->leaf;
534
535 if (next != rt0)
536 fn->rr_ptr = next;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700537 }
538
David S. Millerf11e6652007-03-24 20:36:25 -0700539 RT6_TRACE("%s() => %p\n",
Harvey Harrison0dc47872008-03-05 20:47:47 -0800540 __func__, match);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700541
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900542 net = dev_net(rt0->rt6i_dev);
Eric Dumazeta02cec22010-09-22 20:43:57 +0000543 return match ? match : net->ipv6.ip6_null_entry;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700544}
545
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800546#ifdef CONFIG_IPV6_ROUTE_INFO
547int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000548 const struct in6_addr *gwaddr)
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800549{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900550 struct net *net = dev_net(dev);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800551 struct route_info *rinfo = (struct route_info *) opt;
552 struct in6_addr prefix_buf, *prefix;
553 unsigned int pref;
YOSHIFUJI Hideaki4bed72e2008-05-27 17:37:49 +0900554 unsigned long lifetime;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800555 struct rt6_info *rt;
556
557 if (len < sizeof(struct route_info)) {
558 return -EINVAL;
559 }
560
561 /* Sanity check for prefix_len and length */
562 if (rinfo->length > 3) {
563 return -EINVAL;
564 } else if (rinfo->prefix_len > 128) {
565 return -EINVAL;
566 } else if (rinfo->prefix_len > 64) {
567 if (rinfo->length < 2) {
568 return -EINVAL;
569 }
570 } else if (rinfo->prefix_len > 0) {
571 if (rinfo->length < 1) {
572 return -EINVAL;
573 }
574 }
575
576 pref = rinfo->route_pref;
577 if (pref == ICMPV6_ROUTER_PREF_INVALID)
Jens Rosenboom3933fc92009-09-10 06:25:11 +0000578 return -EINVAL;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800579
YOSHIFUJI Hideaki4bed72e2008-05-27 17:37:49 +0900580 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800581
582 if (rinfo->length == 3)
583 prefix = (struct in6_addr *)rinfo->prefix;
584 else {
585 /* this function is safe */
586 ipv6_addr_prefix(&prefix_buf,
587 (struct in6_addr *)rinfo->prefix,
588 rinfo->prefix_len);
589 prefix = &prefix_buf;
590 }
591
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -0800592 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
593 dev->ifindex);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800594
595 if (rt && !lifetime) {
Thomas Grafe0a1ad732006-08-22 00:00:21 -0700596 ip6_del_rt(rt);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800597 rt = NULL;
598 }
599
600 if (!rt && lifetime)
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -0800601 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800602 pref);
603 else if (rt)
604 rt->rt6i_flags = RTF_ROUTEINFO |
605 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
606
607 if (rt) {
YOSHIFUJI Hideaki4bed72e2008-05-27 17:37:49 +0900608 if (!addrconf_finite_timeout(lifetime)) {
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800609 rt->rt6i_flags &= ~RTF_EXPIRES;
610 } else {
611 rt->rt6i_expires = jiffies + HZ * lifetime;
612 rt->rt6i_flags |= RTF_EXPIRES;
613 }
Changli Gaod8d1f302010-06-10 23:31:35 -0700614 dst_release(&rt->dst);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800615 }
616 return 0;
617}
618#endif
619
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800620#define BACKTRACK(__net, saddr) \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700621do { \
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800622 if (rt == __net->ipv6.ip6_null_entry) { \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700623 struct fib6_node *pn; \
Ville Nuorvalae0eda7b2006-10-16 22:11:11 -0700624 while (1) { \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700625 if (fn->fn_flags & RTN_TL_ROOT) \
626 goto out; \
627 pn = fn->parent; \
628 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
Kim Nordlund8bce65b2006-12-13 16:38:29 -0800629 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700630 else \
631 fn = pn; \
632 if (fn->fn_flags & RTN_RTINFO) \
633 goto restart; \
Thomas Grafc71099a2006-08-04 23:20:06 -0700634 } \
Thomas Grafc71099a2006-08-04 23:20:06 -0700635 } \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700636} while(0)
Thomas Grafc71099a2006-08-04 23:20:06 -0700637
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800638static struct rt6_info *ip6_pol_route_lookup(struct net *net,
639 struct fib6_table *table,
David S. Miller4c9483b2011-03-12 16:22:43 -0500640 struct flowi6 *fl6, int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700641{
642 struct fib6_node *fn;
643 struct rt6_info *rt;
644
Thomas Grafc71099a2006-08-04 23:20:06 -0700645 read_lock_bh(&table->tb6_lock);
David S. Miller4c9483b2011-03-12 16:22:43 -0500646 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
Thomas Grafc71099a2006-08-04 23:20:06 -0700647restart:
648 rt = fn->leaf;
David S. Miller4c9483b2011-03-12 16:22:43 -0500649 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
650 BACKTRACK(net, &fl6->saddr);
Thomas Grafc71099a2006-08-04 23:20:06 -0700651out:
Changli Gaod8d1f302010-06-10 23:31:35 -0700652 dst_use(&rt->dst, jiffies);
Thomas Grafc71099a2006-08-04 23:20:06 -0700653 read_unlock_bh(&table->tb6_lock);
Thomas Grafc71099a2006-08-04 23:20:06 -0700654 return rt;
655
656}
657
YOSHIFUJI Hideaki9acd9f32008-04-10 15:42:10 +0900658struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
659 const struct in6_addr *saddr, int oif, int strict)
Thomas Grafc71099a2006-08-04 23:20:06 -0700660{
David S. Miller4c9483b2011-03-12 16:22:43 -0500661 struct flowi6 fl6 = {
662 .flowi6_oif = oif,
663 .daddr = *daddr,
Thomas Grafc71099a2006-08-04 23:20:06 -0700664 };
665 struct dst_entry *dst;
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700666 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
Thomas Grafc71099a2006-08-04 23:20:06 -0700667
Thomas Grafadaa70b2006-10-13 15:01:03 -0700668 if (saddr) {
David S. Miller4c9483b2011-03-12 16:22:43 -0500669 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
Thomas Grafadaa70b2006-10-13 15:01:03 -0700670 flags |= RT6_LOOKUP_F_HAS_SADDR;
671 }
672
David S. Miller4c9483b2011-03-12 16:22:43 -0500673 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
Thomas Grafc71099a2006-08-04 23:20:06 -0700674 if (dst->error == 0)
675 return (struct rt6_info *) dst;
676
677 dst_release(dst);
678
Linus Torvalds1da177e2005-04-16 15:20:36 -0700679 return NULL;
680}
681
YOSHIFUJI Hideaki71590392007-02-22 22:05:40 +0900682EXPORT_SYMBOL(rt6_lookup);
683
Thomas Grafc71099a2006-08-04 23:20:06 -0700684/* ip6_ins_rt is called with FREE table->tb6_lock.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700685 It takes new route entry, the addition fails by any reason the
686 route is freed. In any case, if caller does not hold it, it may
687 be destroyed.
688 */
689
Thomas Graf86872cb2006-08-22 00:01:08 -0700690static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700691{
692 int err;
Thomas Grafc71099a2006-08-04 23:20:06 -0700693 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700694
Thomas Grafc71099a2006-08-04 23:20:06 -0700695 table = rt->rt6i_table;
696 write_lock_bh(&table->tb6_lock);
Thomas Graf86872cb2006-08-22 00:01:08 -0700697 err = fib6_add(&table->tb6_root, rt, info);
Thomas Grafc71099a2006-08-04 23:20:06 -0700698 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700699
700 return err;
701}
702
Thomas Graf40e22e82006-08-22 00:00:45 -0700703int ip6_ins_rt(struct rt6_info *rt)
704{
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800705 struct nl_info info = {
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900706 .nl_net = dev_net(rt->rt6i_dev),
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800707 };
Denis V. Lunev528c4ce2007-12-13 09:45:12 -0800708 return __ip6_ins_rt(rt, &info);
Thomas Graf40e22e82006-08-22 00:00:45 -0700709}
710
Eric Dumazet21efcfa2011-07-19 20:18:36 +0000711static struct rt6_info *rt6_alloc_cow(const struct rt6_info *ort,
712 const struct in6_addr *daddr,
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000713 const struct in6_addr *saddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700714{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700715 struct rt6_info *rt;
716
717 /*
718 * Clone the route.
719 */
720
Eric Dumazet21efcfa2011-07-19 20:18:36 +0000721 rt = ip6_rt_copy(ort, daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700722
723 if (rt) {
David S. Miller14deae42009-01-04 16:04:39 -0800724 struct neighbour *neigh;
725 int attempts = !in_softirq();
726
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900727 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
728 if (rt->rt6i_dst.plen != 128 &&
Eric Dumazet21efcfa2011-07-19 20:18:36 +0000729 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900730 rt->rt6i_flags |= RTF_ANYCAST;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700731 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900732 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700733
Linus Torvalds1da177e2005-04-16 15:20:36 -0700734 rt->rt6i_flags |= RTF_CACHE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700735
736#ifdef CONFIG_IPV6_SUBTREES
737 if (rt->rt6i_src.plen && saddr) {
738 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
739 rt->rt6i_src.plen = 128;
740 }
741#endif
742
David S. Miller14deae42009-01-04 16:04:39 -0800743 retry:
744 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
745 if (IS_ERR(neigh)) {
746 struct net *net = dev_net(rt->rt6i_dev);
747 int saved_rt_min_interval =
748 net->ipv6.sysctl.ip6_rt_gc_min_interval;
749 int saved_rt_elasticity =
750 net->ipv6.sysctl.ip6_rt_gc_elasticity;
751
752 if (attempts-- > 0) {
753 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
754 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
755
Alexey Dobriyan86393e52009-08-29 01:34:49 +0000756 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
David S. Miller14deae42009-01-04 16:04:39 -0800757
758 net->ipv6.sysctl.ip6_rt_gc_elasticity =
759 saved_rt_elasticity;
760 net->ipv6.sysctl.ip6_rt_gc_min_interval =
761 saved_rt_min_interval;
762 goto retry;
763 }
764
765 if (net_ratelimit())
766 printk(KERN_WARNING
Ulrich Weber7e1b33e2010-09-27 15:02:18 -0700767 "ipv6: Neighbour table overflow.\n");
Changli Gaod8d1f302010-06-10 23:31:35 -0700768 dst_free(&rt->dst);
David S. Miller14deae42009-01-04 16:04:39 -0800769 return NULL;
770 }
David S. Miller69cce1d2011-07-17 23:09:49 -0700771 dst_set_neighbour(&rt->dst, neigh);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700772
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800773 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700774
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800775 return rt;
776}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700777
Eric Dumazet21efcfa2011-07-19 20:18:36 +0000778static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
779 const struct in6_addr *daddr)
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800780{
Eric Dumazet21efcfa2011-07-19 20:18:36 +0000781 struct rt6_info *rt = ip6_rt_copy(ort, daddr);
782
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800783 if (rt) {
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800784 rt->rt6i_flags |= RTF_CACHE;
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000785 dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_raw(&ort->dst)));
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800786 }
787 return rt;
788}
789
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800790static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
David S. Miller4c9483b2011-03-12 16:22:43 -0500791 struct flowi6 *fl6, int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700792{
793 struct fib6_node *fn;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800794 struct rt6_info *rt, *nrt;
Thomas Grafc71099a2006-08-04 23:20:06 -0700795 int strict = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700796 int attempts = 3;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800797 int err;
YOSHIFUJI Hideaki53b79972008-07-19 22:35:03 -0700798 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700799
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700800 strict |= flags & RT6_LOOKUP_F_IFACE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700801
802relookup:
Thomas Grafc71099a2006-08-04 23:20:06 -0700803 read_lock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700804
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800805restart_2:
David S. Miller4c9483b2011-03-12 16:22:43 -0500806 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700807
808restart:
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700809 rt = rt6_select(fn, oif, strict | reachable);
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800810
David S. Miller4c9483b2011-03-12 16:22:43 -0500811 BACKTRACK(net, &fl6->saddr);
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800812 if (rt == net->ipv6.ip6_null_entry ||
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800813 rt->rt6i_flags & RTF_CACHE)
YOSHIFUJI Hideaki1ddef0442006-03-20 17:01:24 -0800814 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700815
Changli Gaod8d1f302010-06-10 23:31:35 -0700816 dst_hold(&rt->dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700817 read_unlock_bh(&table->tb6_lock);
YOSHIFUJI Hideakifb9de912006-03-20 16:59:08 -0800818
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000819 if (!dst_get_neighbour_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
David S. Miller4c9483b2011-03-12 16:22:43 -0500820 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
David S. Miller7343ff32011-03-09 19:55:25 -0800821 else if (!(rt->dst.flags & DST_HOST))
David S. Miller4c9483b2011-03-12 16:22:43 -0500822 nrt = rt6_alloc_clone(rt, &fl6->daddr);
David S. Miller7343ff32011-03-09 19:55:25 -0800823 else
824 goto out2;
YOSHIFUJI Hideakie40cf352006-03-20 16:59:27 -0800825
Changli Gaod8d1f302010-06-10 23:31:35 -0700826 dst_release(&rt->dst);
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800827 rt = nrt ? : net->ipv6.ip6_null_entry;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800828
Changli Gaod8d1f302010-06-10 23:31:35 -0700829 dst_hold(&rt->dst);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800830 if (nrt) {
Thomas Graf40e22e82006-08-22 00:00:45 -0700831 err = ip6_ins_rt(nrt);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800832 if (!err)
833 goto out2;
834 }
835
836 if (--attempts <= 0)
837 goto out2;
838
839 /*
Thomas Grafc71099a2006-08-04 23:20:06 -0700840 * Race condition! In the gap, when table->tb6_lock was
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800841 * released someone could insert this route. Relookup.
842 */
Changli Gaod8d1f302010-06-10 23:31:35 -0700843 dst_release(&rt->dst);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800844 goto relookup;
845
846out:
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800847 if (reachable) {
848 reachable = 0;
849 goto restart_2;
850 }
Changli Gaod8d1f302010-06-10 23:31:35 -0700851 dst_hold(&rt->dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700852 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700853out2:
Changli Gaod8d1f302010-06-10 23:31:35 -0700854 rt->dst.lastuse = jiffies;
855 rt->dst.__use++;
Thomas Grafc71099a2006-08-04 23:20:06 -0700856
857 return rt;
858}
859
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800860static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
David S. Miller4c9483b2011-03-12 16:22:43 -0500861 struct flowi6 *fl6, int flags)
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700862{
David S. Miller4c9483b2011-03-12 16:22:43 -0500863 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700864}
865
Thomas Grafc71099a2006-08-04 23:20:06 -0700866void ip6_route_input(struct sk_buff *skb)
867{
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000868 const struct ipv6hdr *iph = ipv6_hdr(skb);
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900869 struct net *net = dev_net(skb->dev);
Thomas Grafadaa70b2006-10-13 15:01:03 -0700870 int flags = RT6_LOOKUP_F_HAS_SADDR;
David S. Miller4c9483b2011-03-12 16:22:43 -0500871 struct flowi6 fl6 = {
872 .flowi6_iif = skb->dev->ifindex,
873 .daddr = iph->daddr,
874 .saddr = iph->saddr,
875 .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
876 .flowi6_mark = skb->mark,
877 .flowi6_proto = iph->nexthdr,
Thomas Grafc71099a2006-08-04 23:20:06 -0700878 };
Thomas Grafadaa70b2006-10-13 15:01:03 -0700879
Thomas Goff1d6e55f2009-01-27 22:39:59 -0800880 if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
Thomas Grafadaa70b2006-10-13 15:01:03 -0700881 flags |= RT6_LOOKUP_F_IFACE;
Thomas Grafc71099a2006-08-04 23:20:06 -0700882
David S. Miller4c9483b2011-03-12 16:22:43 -0500883 skb_dst_set(skb, fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_input));
Thomas Grafc71099a2006-08-04 23:20:06 -0700884}
885
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800886static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
David S. Miller4c9483b2011-03-12 16:22:43 -0500887 struct flowi6 *fl6, int flags)
Thomas Grafc71099a2006-08-04 23:20:06 -0700888{
David S. Miller4c9483b2011-03-12 16:22:43 -0500889 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
Thomas Grafc71099a2006-08-04 23:20:06 -0700890}
891
Florian Westphal9c7a4f92011-03-22 19:17:36 -0700892struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
David S. Miller4c9483b2011-03-12 16:22:43 -0500893 struct flowi6 *fl6)
Thomas Grafc71099a2006-08-04 23:20:06 -0700894{
895 int flags = 0;
896
David S. Miller4c9483b2011-03-12 16:22:43 -0500897 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700898 flags |= RT6_LOOKUP_F_IFACE;
Thomas Grafc71099a2006-08-04 23:20:06 -0700899
David S. Miller4c9483b2011-03-12 16:22:43 -0500900 if (!ipv6_addr_any(&fl6->saddr))
Thomas Grafadaa70b2006-10-13 15:01:03 -0700901 flags |= RT6_LOOKUP_F_HAS_SADDR;
YOSHIFUJI Hideaki / 吉藤英明0c9a2ac2010-03-07 00:14:44 +0000902 else if (sk)
903 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
Thomas Grafadaa70b2006-10-13 15:01:03 -0700904
David S. Miller4c9483b2011-03-12 16:22:43 -0500905 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700906}
907
YOSHIFUJI Hideaki71590392007-02-22 22:05:40 +0900908EXPORT_SYMBOL(ip6_route_output);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700909
David S. Miller2774c132011-03-01 14:59:04 -0800910struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
David S. Miller14e50e52007-05-24 18:17:54 -0700911{
David S. Miller5c1e6aa2011-04-28 14:13:38 -0700912 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
David S. Miller14e50e52007-05-24 18:17:54 -0700913 struct dst_entry *new = NULL;
914
David S. Miller5c1e6aa2011-04-28 14:13:38 -0700915 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0);
David S. Miller14e50e52007-05-24 18:17:54 -0700916 if (rt) {
David S. Millercf911662011-04-28 14:31:47 -0700917 memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
918
Changli Gaod8d1f302010-06-10 23:31:35 -0700919 new = &rt->dst;
David S. Miller14e50e52007-05-24 18:17:54 -0700920
David S. Miller14e50e52007-05-24 18:17:54 -0700921 new->__use = 1;
Herbert Xu352e5122007-11-13 21:34:06 -0800922 new->input = dst_discard;
923 new->output = dst_discard;
David S. Miller14e50e52007-05-24 18:17:54 -0700924
Eric Dumazet21efcfa2011-07-19 20:18:36 +0000925 if (dst_metrics_read_only(&ort->dst))
926 new->_metrics = ort->dst._metrics;
927 else
928 dst_copy_metrics(new, &ort->dst);
David S. Miller14e50e52007-05-24 18:17:54 -0700929 rt->rt6i_idev = ort->rt6i_idev;
930 if (rt->rt6i_idev)
931 in6_dev_hold(rt->rt6i_idev);
932 rt->rt6i_expires = 0;
933
934 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
935 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
936 rt->rt6i_metric = 0;
937
938 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
939#ifdef CONFIG_IPV6_SUBTREES
940 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
941#endif
942
943 dst_free(new);
944 }
945
David S. Miller69ead7a2011-03-01 14:45:33 -0800946 dst_release(dst_orig);
947 return new ? new : ERR_PTR(-ENOMEM);
David S. Miller14e50e52007-05-24 18:17:54 -0700948}
David S. Miller14e50e52007-05-24 18:17:54 -0700949
Linus Torvalds1da177e2005-04-16 15:20:36 -0700950/*
951 * Destination cache support functions
952 */
953
954static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
955{
956 struct rt6_info *rt;
957
958 rt = (struct rt6_info *) dst;
959
David S. Miller6431cbc2011-02-07 20:38:06 -0800960 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
961 if (rt->rt6i_peer_genid != rt6_peer_genid()) {
962 if (!rt->rt6i_peer)
963 rt6_bind_peer(rt, 0);
964 rt->rt6i_peer_genid = rt6_peer_genid();
965 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700966 return dst;
David S. Miller6431cbc2011-02-07 20:38:06 -0800967 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700968 return NULL;
969}
970
971static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
972{
973 struct rt6_info *rt = (struct rt6_info *) dst;
974
975 if (rt) {
YOSHIFUJI Hideaki / 吉藤英明54c1a852010-03-28 07:15:45 +0000976 if (rt->rt6i_flags & RTF_CACHE) {
977 if (rt6_check_expired(rt)) {
978 ip6_del_rt(rt);
979 dst = NULL;
980 }
981 } else {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700982 dst_release(dst);
YOSHIFUJI Hideaki / 吉藤英明54c1a852010-03-28 07:15:45 +0000983 dst = NULL;
984 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700985 }
YOSHIFUJI Hideaki / 吉藤英明54c1a852010-03-28 07:15:45 +0000986 return dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700987}
988
989static void ip6_link_failure(struct sk_buff *skb)
990{
991 struct rt6_info *rt;
992
Alexey Dobriyan3ffe5332010-02-18 08:25:24 +0000993 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700994
Eric Dumazetadf30902009-06-02 05:19:30 +0000995 rt = (struct rt6_info *) skb_dst(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700996 if (rt) {
997 if (rt->rt6i_flags&RTF_CACHE) {
Changli Gaod8d1f302010-06-10 23:31:35 -0700998 dst_set_expires(&rt->dst, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700999 rt->rt6i_flags |= RTF_EXPIRES;
1000 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1001 rt->rt6i_node->fn_sernum = -1;
1002 }
1003}
1004
1005static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1006{
1007 struct rt6_info *rt6 = (struct rt6_info*)dst;
1008
1009 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1010 rt6->rt6i_flags |= RTF_MODIFIED;
1011 if (mtu < IPV6_MIN_MTU) {
David S. Millerdefb3512010-12-08 21:16:57 -08001012 u32 features = dst_metric(dst, RTAX_FEATURES);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001013 mtu = IPV6_MIN_MTU;
David S. Millerdefb3512010-12-08 21:16:57 -08001014 features |= RTAX_FEATURE_ALLFRAG;
1015 dst_metric_set(dst, RTAX_FEATURES, features);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001016 }
David S. Millerdefb3512010-12-08 21:16:57 -08001017 dst_metric_set(dst, RTAX_MTU, mtu);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001018 }
1019}
1020
David S. Miller0dbaee32010-12-13 12:52:14 -08001021static unsigned int ip6_default_advmss(const struct dst_entry *dst)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001022{
David S. Miller0dbaee32010-12-13 12:52:14 -08001023 struct net_device *dev = dst->dev;
1024 unsigned int mtu = dst_mtu(dst);
1025 struct net *net = dev_net(dev);
1026
Linus Torvalds1da177e2005-04-16 15:20:36 -07001027 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1028
Daniel Lezcano55786892008-03-04 13:47:47 -08001029 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1030 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001031
1032 /*
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001033 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1034 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1035 * IPV6_MAXPLEN is also valid and means: "any MSS,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001036 * rely only on pmtu discovery"
1037 */
1038 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1039 mtu = IPV6_MAXPLEN;
1040 return mtu;
1041}
1042
David S. Millerd33e4552010-12-14 13:01:14 -08001043static unsigned int ip6_default_mtu(const struct dst_entry *dst)
1044{
1045 unsigned int mtu = IPV6_MIN_MTU;
1046 struct inet6_dev *idev;
1047
1048 rcu_read_lock();
1049 idev = __in6_dev_get(dst->dev);
1050 if (idev)
1051 mtu = idev->cnf.mtu6;
1052 rcu_read_unlock();
1053
1054 return mtu;
1055}
1056
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001057static struct dst_entry *icmp6_dst_gc_list;
1058static DEFINE_SPINLOCK(icmp6_dst_lock);
Thomas Graf5d0bbee2006-08-04 03:37:36 -07001059
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001060struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001061 struct neighbour *neigh,
YOSHIFUJI Hideaki9acd9f32008-04-10 15:42:10 +09001062 const struct in6_addr *addr)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001063{
1064 struct rt6_info *rt;
1065 struct inet6_dev *idev = in6_dev_get(dev);
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001066 struct net *net = dev_net(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001067
1068 if (unlikely(idev == NULL))
1069 return NULL;
1070
David S. Miller957c6652011-06-24 15:25:00 -07001071 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001072 if (unlikely(rt == NULL)) {
1073 in6_dev_put(idev);
1074 goto out;
1075 }
1076
Linus Torvalds1da177e2005-04-16 15:20:36 -07001077 if (neigh)
1078 neigh_hold(neigh);
David S. Miller14deae42009-01-04 16:04:39 -08001079 else {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001080 neigh = ndisc_get_neigh(dev, addr);
David S. Miller14deae42009-01-04 16:04:39 -08001081 if (IS_ERR(neigh))
1082 neigh = NULL;
1083 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001084
Yan, Zheng8e2ec632011-09-05 21:34:30 +00001085 rt->dst.flags |= DST_HOST;
1086 rt->dst.output = ip6_output;
David S. Miller69cce1d2011-07-17 23:09:49 -07001087 dst_set_neighbour(&rt->dst, neigh);
Changli Gaod8d1f302010-06-10 23:31:35 -07001088 atomic_set(&rt->dst.__refcnt, 1);
David S. Millerdefb3512010-12-08 21:16:57 -08001089 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
Yan, Zheng8e2ec632011-09-05 21:34:30 +00001090
1091 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1092 rt->rt6i_dst.plen = 128;
1093 rt->rt6i_idev = idev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001094
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001095 spin_lock_bh(&icmp6_dst_lock);
Changli Gaod8d1f302010-06-10 23:31:35 -07001096 rt->dst.next = icmp6_dst_gc_list;
1097 icmp6_dst_gc_list = &rt->dst;
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001098 spin_unlock_bh(&icmp6_dst_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001099
Daniel Lezcano55786892008-03-04 13:47:47 -08001100 fib6_force_start_gc(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001101
1102out:
Changli Gaod8d1f302010-06-10 23:31:35 -07001103 return &rt->dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001104}
1105
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -07001106int icmp6_dst_gc(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001107{
Hagen Paul Pfeifere9476e92011-02-25 05:45:19 +00001108 struct dst_entry *dst, **pprev;
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -07001109 int more = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001110
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001111 spin_lock_bh(&icmp6_dst_lock);
1112 pprev = &icmp6_dst_gc_list;
Thomas Graf5d0bbee2006-08-04 03:37:36 -07001113
Linus Torvalds1da177e2005-04-16 15:20:36 -07001114 while ((dst = *pprev) != NULL) {
1115 if (!atomic_read(&dst->__refcnt)) {
1116 *pprev = dst->next;
1117 dst_free(dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001118 } else {
1119 pprev = &dst->next;
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -07001120 ++more;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001121 }
1122 }
1123
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001124 spin_unlock_bh(&icmp6_dst_lock);
Thomas Graf5d0bbee2006-08-04 03:37:36 -07001125
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -07001126 return more;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001127}
1128
David S. Miller1e493d12008-09-10 17:27:15 -07001129static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1130 void *arg)
1131{
1132 struct dst_entry *dst, **pprev;
1133
1134 spin_lock_bh(&icmp6_dst_lock);
1135 pprev = &icmp6_dst_gc_list;
1136 while ((dst = *pprev) != NULL) {
1137 struct rt6_info *rt = (struct rt6_info *) dst;
1138 if (func(rt, arg)) {
1139 *pprev = dst->next;
1140 dst_free(dst);
1141 } else {
1142 pprev = &dst->next;
1143 }
1144 }
1145 spin_unlock_bh(&icmp6_dst_lock);
1146}
1147
Daniel Lezcano569d3642008-01-18 03:56:57 -08001148static int ip6_dst_gc(struct dst_ops *ops)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001149{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001150 unsigned long now = jiffies;
Alexey Dobriyan86393e52009-08-29 01:34:49 +00001151 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
Daniel Lezcano7019b782008-03-04 13:50:14 -08001152 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1153 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1154 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1155 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1156 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
Eric Dumazetfc66f952010-10-08 06:37:34 +00001157 int entries;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001158
Eric Dumazetfc66f952010-10-08 06:37:34 +00001159 entries = dst_entries_get_fast(ops);
Daniel Lezcano7019b782008-03-04 13:50:14 -08001160 if (time_after(rt_last_gc + rt_min_interval, now) &&
Eric Dumazetfc66f952010-10-08 06:37:34 +00001161 entries <= rt_max_size)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001162 goto out;
1163
Benjamin Thery6891a342008-03-04 13:49:47 -08001164 net->ipv6.ip6_rt_gc_expire++;
1165 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1166 net->ipv6.ip6_rt_last_gc = now;
Eric Dumazetfc66f952010-10-08 06:37:34 +00001167 entries = dst_entries_get_slow(ops);
1168 if (entries < ops->gc_thresh)
Daniel Lezcano7019b782008-03-04 13:50:14 -08001169 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001170out:
Daniel Lezcano7019b782008-03-04 13:50:14 -08001171 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
Eric Dumazetfc66f952010-10-08 06:37:34 +00001172 return entries > rt_max_size;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001173}
1174
1175/* Clean host part of a prefix. Not necessary in radix tree,
1176 but results in cleaner routing tables.
1177
1178 Remove it only when all the things will work!
1179 */
1180
YOSHIFUJI Hideaki6b75d092008-03-10 06:00:30 -04001181int ip6_dst_hoplimit(struct dst_entry *dst)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001182{
David S. Miller5170ae82010-12-12 21:35:57 -08001183 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
David S. Millera02e4b72010-12-12 21:39:02 -08001184 if (hoplimit == 0) {
YOSHIFUJI Hideaki6b75d092008-03-10 06:00:30 -04001185 struct net_device *dev = dst->dev;
Eric Dumazetc68f24c2010-06-14 04:46:20 +00001186 struct inet6_dev *idev;
1187
1188 rcu_read_lock();
1189 idev = __in6_dev_get(dev);
1190 if (idev)
YOSHIFUJI Hideaki6b75d092008-03-10 06:00:30 -04001191 hoplimit = idev->cnf.hop_limit;
Eric Dumazetc68f24c2010-06-14 04:46:20 +00001192 else
YOSHIFUJI Hideaki53b79972008-07-19 22:35:03 -07001193 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
Eric Dumazetc68f24c2010-06-14 04:46:20 +00001194 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001195 }
1196 return hoplimit;
1197}
David S. Millerabbf46a2010-12-12 21:14:46 -08001198EXPORT_SYMBOL(ip6_dst_hoplimit);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001199
1200/*
1201 *
1202 */
1203
Thomas Graf86872cb2006-08-22 00:01:08 -07001204int ip6_route_add(struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001205{
1206 int err;
Daniel Lezcano55786892008-03-04 13:47:47 -08001207 struct net *net = cfg->fc_nlinfo.nl_net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001208 struct rt6_info *rt = NULL;
1209 struct net_device *dev = NULL;
1210 struct inet6_dev *idev = NULL;
Thomas Grafc71099a2006-08-04 23:20:06 -07001211 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001212 int addr_type;
1213
Thomas Graf86872cb2006-08-22 00:01:08 -07001214 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001215 return -EINVAL;
1216#ifndef CONFIG_IPV6_SUBTREES
Thomas Graf86872cb2006-08-22 00:01:08 -07001217 if (cfg->fc_src_len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001218 return -EINVAL;
1219#endif
Thomas Graf86872cb2006-08-22 00:01:08 -07001220 if (cfg->fc_ifindex) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001221 err = -ENODEV;
Daniel Lezcano55786892008-03-04 13:47:47 -08001222 dev = dev_get_by_index(net, cfg->fc_ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001223 if (!dev)
1224 goto out;
1225 idev = in6_dev_get(dev);
1226 if (!idev)
1227 goto out;
1228 }
1229
Thomas Graf86872cb2006-08-22 00:01:08 -07001230 if (cfg->fc_metric == 0)
1231 cfg->fc_metric = IP6_RT_PRIO_USER;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001232
Daniel Lezcano55786892008-03-04 13:47:47 -08001233 table = fib6_new_table(net, cfg->fc_table);
Thomas Grafc71099a2006-08-04 23:20:06 -07001234 if (table == NULL) {
1235 err = -ENOBUFS;
1236 goto out;
1237 }
1238
David S. Miller957c6652011-06-24 15:25:00 -07001239 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL, DST_NOCOUNT);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001240
1241 if (rt == NULL) {
1242 err = -ENOMEM;
1243 goto out;
1244 }
1245
Changli Gaod8d1f302010-06-10 23:31:35 -07001246 rt->dst.obsolete = -1;
YOSHIFUJI Hideaki6f704992008-05-19 16:56:11 -07001247 rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
1248 jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1249 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001250
Thomas Graf86872cb2006-08-22 00:01:08 -07001251 if (cfg->fc_protocol == RTPROT_UNSPEC)
1252 cfg->fc_protocol = RTPROT_BOOT;
1253 rt->rt6i_protocol = cfg->fc_protocol;
1254
1255 addr_type = ipv6_addr_type(&cfg->fc_dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001256
1257 if (addr_type & IPV6_ADDR_MULTICAST)
Changli Gaod8d1f302010-06-10 23:31:35 -07001258 rt->dst.input = ip6_mc_input;
Maciej Żenczykowskiab79ad12010-09-27 00:07:02 +00001259 else if (cfg->fc_flags & RTF_LOCAL)
1260 rt->dst.input = ip6_input;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001261 else
Changli Gaod8d1f302010-06-10 23:31:35 -07001262 rt->dst.input = ip6_forward;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001263
Changli Gaod8d1f302010-06-10 23:31:35 -07001264 rt->dst.output = ip6_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001265
Thomas Graf86872cb2006-08-22 00:01:08 -07001266 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1267 rt->rt6i_dst.plen = cfg->fc_dst_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001268 if (rt->rt6i_dst.plen == 128)
David S. Miller11d53b42011-06-24 15:23:34 -07001269 rt->dst.flags |= DST_HOST;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001270
Yan, Zheng8e2ec632011-09-05 21:34:30 +00001271 if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1272 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1273 if (!metrics) {
1274 err = -ENOMEM;
1275 goto out;
1276 }
1277 dst_init_metrics(&rt->dst, metrics, 0);
1278 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001279#ifdef CONFIG_IPV6_SUBTREES
Thomas Graf86872cb2006-08-22 00:01:08 -07001280 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1281 rt->rt6i_src.plen = cfg->fc_src_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001282#endif
1283
Thomas Graf86872cb2006-08-22 00:01:08 -07001284 rt->rt6i_metric = cfg->fc_metric;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001285
1286 /* We cannot add true routes via loopback here,
1287 they would result in kernel looping; promote them to reject routes
1288 */
Thomas Graf86872cb2006-08-22 00:01:08 -07001289 if ((cfg->fc_flags & RTF_REJECT) ||
Maciej Żenczykowskiab79ad12010-09-27 00:07:02 +00001290 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK)
1291 && !(cfg->fc_flags&RTF_LOCAL))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001292 /* hold loopback dev/idev if we haven't done so. */
Daniel Lezcano55786892008-03-04 13:47:47 -08001293 if (dev != net->loopback_dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001294 if (dev) {
1295 dev_put(dev);
1296 in6_dev_put(idev);
1297 }
Daniel Lezcano55786892008-03-04 13:47:47 -08001298 dev = net->loopback_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001299 dev_hold(dev);
1300 idev = in6_dev_get(dev);
1301 if (!idev) {
1302 err = -ENODEV;
1303 goto out;
1304 }
1305 }
Changli Gaod8d1f302010-06-10 23:31:35 -07001306 rt->dst.output = ip6_pkt_discard_out;
1307 rt->dst.input = ip6_pkt_discard;
1308 rt->dst.error = -ENETUNREACH;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001309 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1310 goto install_route;
1311 }
1312
Thomas Graf86872cb2006-08-22 00:01:08 -07001313 if (cfg->fc_flags & RTF_GATEWAY) {
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001314 const struct in6_addr *gw_addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001315 int gwa_type;
1316
Thomas Graf86872cb2006-08-22 00:01:08 -07001317 gw_addr = &cfg->fc_gateway;
1318 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001319 gwa_type = ipv6_addr_type(gw_addr);
1320
1321 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1322 struct rt6_info *grt;
1323
1324 /* IPv6 strictly inhibits using not link-local
1325 addresses as nexthop address.
1326 Otherwise, router will not able to send redirects.
1327 It is very good, but in some (rare!) circumstances
1328 (SIT, PtP, NBMA NOARP links) it is handy to allow
1329 some exceptions. --ANK
1330 */
1331 err = -EINVAL;
1332 if (!(gwa_type&IPV6_ADDR_UNICAST))
1333 goto out;
1334
Daniel Lezcano55786892008-03-04 13:47:47 -08001335 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001336
1337 err = -EHOSTUNREACH;
1338 if (grt == NULL)
1339 goto out;
1340 if (dev) {
1341 if (dev != grt->rt6i_dev) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001342 dst_release(&grt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001343 goto out;
1344 }
1345 } else {
1346 dev = grt->rt6i_dev;
1347 idev = grt->rt6i_idev;
1348 dev_hold(dev);
1349 in6_dev_hold(grt->rt6i_idev);
1350 }
1351 if (!(grt->rt6i_flags&RTF_GATEWAY))
1352 err = 0;
Changli Gaod8d1f302010-06-10 23:31:35 -07001353 dst_release(&grt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001354
1355 if (err)
1356 goto out;
1357 }
1358 err = -EINVAL;
1359 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1360 goto out;
1361 }
1362
1363 err = -ENODEV;
1364 if (dev == NULL)
1365 goto out;
1366
Daniel Walterc3968a82011-04-13 21:10:57 +00001367 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1368 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1369 err = -EINVAL;
1370 goto out;
1371 }
1372 ipv6_addr_copy(&rt->rt6i_prefsrc.addr, &cfg->fc_prefsrc);
1373 rt->rt6i_prefsrc.plen = 128;
1374 } else
1375 rt->rt6i_prefsrc.plen = 0;
1376
Thomas Graf86872cb2006-08-22 00:01:08 -07001377 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
David S. Miller69cce1d2011-07-17 23:09:49 -07001378 struct neighbour *n = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1379 if (IS_ERR(n)) {
1380 err = PTR_ERR(n);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001381 goto out;
1382 }
David S. Miller69cce1d2011-07-17 23:09:49 -07001383 dst_set_neighbour(&rt->dst, n);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001384 }
1385
Thomas Graf86872cb2006-08-22 00:01:08 -07001386 rt->rt6i_flags = cfg->fc_flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001387
1388install_route:
Thomas Graf86872cb2006-08-22 00:01:08 -07001389 if (cfg->fc_mx) {
1390 struct nlattr *nla;
1391 int remaining;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001392
Thomas Graf86872cb2006-08-22 00:01:08 -07001393 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
Thomas Graf8f4c1f92007-09-12 14:44:36 +02001394 int type = nla_type(nla);
Thomas Graf86872cb2006-08-22 00:01:08 -07001395
1396 if (type) {
1397 if (type > RTAX_MAX) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001398 err = -EINVAL;
1399 goto out;
1400 }
Thomas Graf86872cb2006-08-22 00:01:08 -07001401
David S. Millerdefb3512010-12-08 21:16:57 -08001402 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001403 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001404 }
1405 }
1406
Changli Gaod8d1f302010-06-10 23:31:35 -07001407 rt->dst.dev = dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001408 rt->rt6i_idev = idev;
Thomas Grafc71099a2006-08-04 23:20:06 -07001409 rt->rt6i_table = table;
Daniel Lezcano63152fc2008-03-03 23:31:11 -08001410
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001411 cfg->fc_nlinfo.nl_net = dev_net(dev);
Daniel Lezcano63152fc2008-03-03 23:31:11 -08001412
Thomas Graf86872cb2006-08-22 00:01:08 -07001413 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001414
1415out:
1416 if (dev)
1417 dev_put(dev);
1418 if (idev)
1419 in6_dev_put(idev);
1420 if (rt)
Changli Gaod8d1f302010-06-10 23:31:35 -07001421 dst_free(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001422 return err;
1423}
1424
Thomas Graf86872cb2006-08-22 00:01:08 -07001425static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001426{
1427 int err;
Thomas Grafc71099a2006-08-04 23:20:06 -07001428 struct fib6_table *table;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001429 struct net *net = dev_net(rt->rt6i_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001430
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001431 if (rt == net->ipv6.ip6_null_entry)
Patrick McHardy6c813a72006-08-06 22:22:47 -07001432 return -ENOENT;
1433
Thomas Grafc71099a2006-08-04 23:20:06 -07001434 table = rt->rt6i_table;
1435 write_lock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001436
Thomas Graf86872cb2006-08-22 00:01:08 -07001437 err = fib6_del(rt, info);
Changli Gaod8d1f302010-06-10 23:31:35 -07001438 dst_release(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001439
Thomas Grafc71099a2006-08-04 23:20:06 -07001440 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001441
1442 return err;
1443}
1444
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001445int ip6_del_rt(struct rt6_info *rt)
1446{
Denis V. Lunev4d1169c2008-01-10 03:26:13 -08001447 struct nl_info info = {
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001448 .nl_net = dev_net(rt->rt6i_dev),
Denis V. Lunev4d1169c2008-01-10 03:26:13 -08001449 };
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08001450 return __ip6_del_rt(rt, &info);
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001451}
1452
Thomas Graf86872cb2006-08-22 00:01:08 -07001453static int ip6_route_del(struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001454{
Thomas Grafc71099a2006-08-04 23:20:06 -07001455 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001456 struct fib6_node *fn;
1457 struct rt6_info *rt;
1458 int err = -ESRCH;
1459
Daniel Lezcano55786892008-03-04 13:47:47 -08001460 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
Thomas Grafc71099a2006-08-04 23:20:06 -07001461 if (table == NULL)
1462 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001463
Thomas Grafc71099a2006-08-04 23:20:06 -07001464 read_lock_bh(&table->tb6_lock);
1465
1466 fn = fib6_locate(&table->tb6_root,
Thomas Graf86872cb2006-08-22 00:01:08 -07001467 &cfg->fc_dst, cfg->fc_dst_len,
1468 &cfg->fc_src, cfg->fc_src_len);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001469
Linus Torvalds1da177e2005-04-16 15:20:36 -07001470 if (fn) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001471 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
Thomas Graf86872cb2006-08-22 00:01:08 -07001472 if (cfg->fc_ifindex &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001473 (rt->rt6i_dev == NULL ||
Thomas Graf86872cb2006-08-22 00:01:08 -07001474 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001475 continue;
Thomas Graf86872cb2006-08-22 00:01:08 -07001476 if (cfg->fc_flags & RTF_GATEWAY &&
1477 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001478 continue;
Thomas Graf86872cb2006-08-22 00:01:08 -07001479 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001480 continue;
Changli Gaod8d1f302010-06-10 23:31:35 -07001481 dst_hold(&rt->dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001482 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001483
Thomas Graf86872cb2006-08-22 00:01:08 -07001484 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001485 }
1486 }
Thomas Grafc71099a2006-08-04 23:20:06 -07001487 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001488
1489 return err;
1490}
1491
1492/*
1493 * Handle redirects
1494 */
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001495struct ip6rd_flowi {
David S. Miller4c9483b2011-03-12 16:22:43 -05001496 struct flowi6 fl6;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001497 struct in6_addr gateway;
1498};
Linus Torvalds1da177e2005-04-16 15:20:36 -07001499
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001500static struct rt6_info *__ip6_route_redirect(struct net *net,
1501 struct fib6_table *table,
David S. Miller4c9483b2011-03-12 16:22:43 -05001502 struct flowi6 *fl6,
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001503 int flags)
1504{
David S. Miller4c9483b2011-03-12 16:22:43 -05001505 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001506 struct rt6_info *rt;
1507 struct fib6_node *fn;
Thomas Grafc71099a2006-08-04 23:20:06 -07001508
Linus Torvalds1da177e2005-04-16 15:20:36 -07001509 /*
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001510 * Get the "current" route for this destination and
1511 * check if the redirect has come from approriate router.
1512 *
1513 * RFC 2461 specifies that redirects should only be
1514 * accepted if they come from the nexthop to the target.
1515 * Due to the way the routes are chosen, this notion
1516 * is a bit fuzzy and one might need to check all possible
1517 * routes.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001518 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001519
Thomas Grafc71099a2006-08-04 23:20:06 -07001520 read_lock_bh(&table->tb6_lock);
David S. Miller4c9483b2011-03-12 16:22:43 -05001521 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001522restart:
Changli Gaod8d1f302010-06-10 23:31:35 -07001523 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001524 /*
1525 * Current route is on-link; redirect is always invalid.
1526 *
1527 * Seems, previous statement is not true. It could
1528 * be node, which looks for us as on-link (f.e. proxy ndisc)
1529 * But then router serving it might decide, that we should
1530 * know truth 8)8) --ANK (980726).
1531 */
1532 if (rt6_check_expired(rt))
1533 continue;
1534 if (!(rt->rt6i_flags & RTF_GATEWAY))
1535 continue;
David S. Miller4c9483b2011-03-12 16:22:43 -05001536 if (fl6->flowi6_oif != rt->rt6i_dev->ifindex)
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001537 continue;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001538 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001539 continue;
1540 break;
1541 }
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001542
YOSHIFUJI Hideakicb15d9c2006-08-23 17:23:11 -07001543 if (!rt)
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001544 rt = net->ipv6.ip6_null_entry;
David S. Miller4c9483b2011-03-12 16:22:43 -05001545 BACKTRACK(net, &fl6->saddr);
YOSHIFUJI Hideakicb15d9c2006-08-23 17:23:11 -07001546out:
Changli Gaod8d1f302010-06-10 23:31:35 -07001547 dst_hold(&rt->dst);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001548
1549 read_unlock_bh(&table->tb6_lock);
1550
1551 return rt;
1552};
1553
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001554static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest,
1555 const struct in6_addr *src,
1556 const struct in6_addr *gateway,
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001557 struct net_device *dev)
1558{
Thomas Grafadaa70b2006-10-13 15:01:03 -07001559 int flags = RT6_LOOKUP_F_HAS_SADDR;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001560 struct net *net = dev_net(dev);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001561 struct ip6rd_flowi rdfl = {
David S. Miller4c9483b2011-03-12 16:22:43 -05001562 .fl6 = {
1563 .flowi6_oif = dev->ifindex,
1564 .daddr = *dest,
1565 .saddr = *src,
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001566 },
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001567 };
Thomas Grafadaa70b2006-10-13 15:01:03 -07001568
Brian Haley86c36ce2009-10-07 13:58:01 -07001569 ipv6_addr_copy(&rdfl.gateway, gateway);
1570
Thomas Grafadaa70b2006-10-13 15:01:03 -07001571 if (rt6_need_strict(dest))
1572 flags |= RT6_LOOKUP_F_IFACE;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001573
David S. Miller4c9483b2011-03-12 16:22:43 -05001574 return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6,
Daniel Lezcano58f09b72008-03-03 23:25:27 -08001575 flags, __ip6_route_redirect);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001576}
1577
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001578void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
1579 const struct in6_addr *saddr,
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001580 struct neighbour *neigh, u8 *lladdr, int on_link)
1581{
1582 struct rt6_info *rt, *nrt = NULL;
1583 struct netevent_redirect netevent;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001584 struct net *net = dev_net(neigh->dev);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001585
1586 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1587
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001588 if (rt == net->ipv6.ip6_null_entry) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001589 if (net_ratelimit())
1590 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1591 "for redirect target\n");
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001592 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001593 }
1594
Linus Torvalds1da177e2005-04-16 15:20:36 -07001595 /*
1596 * We have finally decided to accept it.
1597 */
1598
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001599 neigh_update(neigh, lladdr, NUD_STALE,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001600 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1601 NEIGH_UPDATE_F_OVERRIDE|
1602 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1603 NEIGH_UPDATE_F_ISROUTER))
1604 );
1605
1606 /*
1607 * Redirect received -> path was valid.
1608 * Look, redirects are sent only in response to data packets,
1609 * so that this nexthop apparently is reachable. --ANK
1610 */
Changli Gaod8d1f302010-06-10 23:31:35 -07001611 dst_confirm(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001612
1613 /* Duplicate redirect: silently ignore. */
Eric Dumazetf2c31e32011-07-29 19:00:53 +00001614 if (neigh == dst_get_neighbour_raw(&rt->dst))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001615 goto out;
1616
Eric Dumazet21efcfa2011-07-19 20:18:36 +00001617 nrt = ip6_rt_copy(rt, dest);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001618 if (nrt == NULL)
1619 goto out;
1620
1621 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1622 if (on_link)
1623 nrt->rt6i_flags &= ~RTF_GATEWAY;
1624
Linus Torvalds1da177e2005-04-16 15:20:36 -07001625 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
David S. Miller69cce1d2011-07-17 23:09:49 -07001626 dst_set_neighbour(&nrt->dst, neigh_clone(neigh));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001627
Thomas Graf40e22e82006-08-22 00:00:45 -07001628 if (ip6_ins_rt(nrt))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001629 goto out;
1630
Changli Gaod8d1f302010-06-10 23:31:35 -07001631 netevent.old = &rt->dst;
1632 netevent.new = &nrt->dst;
Tom Tucker8d717402006-07-30 20:43:36 -07001633 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1634
Linus Torvalds1da177e2005-04-16 15:20:36 -07001635 if (rt->rt6i_flags&RTF_CACHE) {
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001636 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001637 return;
1638 }
1639
1640out:
Changli Gaod8d1f302010-06-10 23:31:35 -07001641 dst_release(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001642}
1643
1644/*
1645 * Handle ICMP "packet too big" messages
1646 * i.e. Path MTU discovery
1647 */
1648
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001649static void rt6_do_pmtu_disc(const struct in6_addr *daddr, const struct in6_addr *saddr,
Maciej Żenczykowskiae878ae2010-10-03 14:49:00 -07001650 struct net *net, u32 pmtu, int ifindex)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001651{
1652 struct rt6_info *rt, *nrt;
1653 int allfrag = 0;
Andrey Vagind3052b52010-12-11 15:20:11 +00001654again:
Maciej Żenczykowskiae878ae2010-10-03 14:49:00 -07001655 rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001656 if (rt == NULL)
1657 return;
1658
Andrey Vagind3052b52010-12-11 15:20:11 +00001659 if (rt6_check_expired(rt)) {
1660 ip6_del_rt(rt);
1661 goto again;
1662 }
1663
Changli Gaod8d1f302010-06-10 23:31:35 -07001664 if (pmtu >= dst_mtu(&rt->dst))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001665 goto out;
1666
1667 if (pmtu < IPV6_MIN_MTU) {
1668 /*
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001669 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
Linus Torvalds1da177e2005-04-16 15:20:36 -07001670 * MTU (1280) and a fragment header should always be included
1671 * after a node receiving Too Big message reporting PMTU is
1672 * less than the IPv6 Minimum Link MTU.
1673 */
1674 pmtu = IPV6_MIN_MTU;
1675 allfrag = 1;
1676 }
1677
1678 /* New mtu received -> path was valid.
1679 They are sent only in response to data packets,
1680 so that this nexthop apparently is reachable. --ANK
1681 */
Changli Gaod8d1f302010-06-10 23:31:35 -07001682 dst_confirm(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001683
1684 /* Host route. If it is static, it would be better
1685 not to override it, but add new one, so that
1686 when cache entry will expire old pmtu
1687 would return automatically.
1688 */
1689 if (rt->rt6i_flags & RTF_CACHE) {
David S. Millerdefb3512010-12-08 21:16:57 -08001690 dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1691 if (allfrag) {
1692 u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1693 features |= RTAX_FEATURE_ALLFRAG;
1694 dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1695 }
Changli Gaod8d1f302010-06-10 23:31:35 -07001696 dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001697 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1698 goto out;
1699 }
1700
1701 /* Network route.
1702 Two cases are possible:
1703 1. It is connected route. Action: COW
1704 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1705 */
Eric Dumazetf2c31e32011-07-29 19:00:53 +00001706 if (!dst_get_neighbour_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001707 nrt = rt6_alloc_cow(rt, daddr, saddr);
YOSHIFUJI Hideakid5315b52006-03-20 16:58:48 -08001708 else
1709 nrt = rt6_alloc_clone(rt, daddr);
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001710
YOSHIFUJI Hideakid5315b52006-03-20 16:58:48 -08001711 if (nrt) {
David S. Millerdefb3512010-12-08 21:16:57 -08001712 dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1713 if (allfrag) {
1714 u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1715 features |= RTAX_FEATURE_ALLFRAG;
1716 dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1717 }
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001718
1719 /* According to RFC 1981, detecting PMTU increase shouldn't be
1720 * happened within 5 mins, the recommended timer is 10 mins.
1721 * Here this route expiration time is set to ip6_rt_mtu_expires
1722 * which is 10 mins. After 10 mins the decreased pmtu is expired
1723 * and detecting PMTU increase will be automatically happened.
1724 */
Changli Gaod8d1f302010-06-10 23:31:35 -07001725 dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001726 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1727
Thomas Graf40e22e82006-08-22 00:00:45 -07001728 ip6_ins_rt(nrt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001729 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001730out:
Changli Gaod8d1f302010-06-10 23:31:35 -07001731 dst_release(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001732}
1733
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001734void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *saddr,
Maciej Żenczykowskiae878ae2010-10-03 14:49:00 -07001735 struct net_device *dev, u32 pmtu)
1736{
1737 struct net *net = dev_net(dev);
1738
1739 /*
1740 * RFC 1981 states that a node "MUST reduce the size of the packets it
1741 * is sending along the path" that caused the Packet Too Big message.
1742 * Since it's not possible in the general case to determine which
1743 * interface was used to send the original packet, we update the MTU
1744 * on the interface that will be used to send future packets. We also
1745 * update the MTU on the interface that received the Packet Too Big in
1746 * case the original packet was forced out that interface with
1747 * SO_BINDTODEVICE or similar. This is the next best thing to the
1748 * correct behaviour, which would be to update the MTU on all
1749 * interfaces.
1750 */
1751 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1752 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1753}
1754
Linus Torvalds1da177e2005-04-16 15:20:36 -07001755/*
1756 * Misc support functions
1757 */
1758
Eric Dumazet21efcfa2011-07-19 20:18:36 +00001759static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
1760 const struct in6_addr *dest)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001761{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001762 struct net *net = dev_net(ort->rt6i_dev);
David S. Miller5c1e6aa2011-04-28 14:13:38 -07001763 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
David S. Miller957c6652011-06-24 15:25:00 -07001764 ort->dst.dev, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001765
1766 if (rt) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001767 rt->dst.input = ort->dst.input;
1768 rt->dst.output = ort->dst.output;
Yan, Zheng8e2ec632011-09-05 21:34:30 +00001769 rt->dst.flags |= DST_HOST;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001770
Eric Dumazet21efcfa2011-07-19 20:18:36 +00001771 ipv6_addr_copy(&rt->rt6i_dst.addr, dest);
Yan, Zheng8e2ec632011-09-05 21:34:30 +00001772 rt->rt6i_dst.plen = 128;
David S. Millerdefb3512010-12-08 21:16:57 -08001773 dst_copy_metrics(&rt->dst, &ort->dst);
Changli Gaod8d1f302010-06-10 23:31:35 -07001774 rt->dst.error = ort->dst.error;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001775 rt->rt6i_idev = ort->rt6i_idev;
1776 if (rt->rt6i_idev)
1777 in6_dev_hold(rt->rt6i_idev);
Changli Gaod8d1f302010-06-10 23:31:35 -07001778 rt->dst.lastuse = jiffies;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001779 rt->rt6i_expires = 0;
1780
1781 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1782 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1783 rt->rt6i_metric = 0;
1784
Linus Torvalds1da177e2005-04-16 15:20:36 -07001785#ifdef CONFIG_IPV6_SUBTREES
1786 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1787#endif
Florian Westphal0f6c6392011-05-20 11:27:24 +00001788 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
Thomas Grafc71099a2006-08-04 23:20:06 -07001789 rt->rt6i_table = ort->rt6i_table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001790 }
1791 return rt;
1792}
1793
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001794#ifdef CONFIG_IPV6_ROUTE_INFO
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001795static struct rt6_info *rt6_get_route_info(struct net *net,
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001796 const struct in6_addr *prefix, int prefixlen,
1797 const struct in6_addr *gwaddr, int ifindex)
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001798{
1799 struct fib6_node *fn;
1800 struct rt6_info *rt = NULL;
Thomas Grafc71099a2006-08-04 23:20:06 -07001801 struct fib6_table *table;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001802
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001803 table = fib6_get_table(net, RT6_TABLE_INFO);
Thomas Grafc71099a2006-08-04 23:20:06 -07001804 if (table == NULL)
1805 return NULL;
1806
1807 write_lock_bh(&table->tb6_lock);
1808 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001809 if (!fn)
1810 goto out;
1811
Changli Gaod8d1f302010-06-10 23:31:35 -07001812 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001813 if (rt->rt6i_dev->ifindex != ifindex)
1814 continue;
1815 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1816 continue;
1817 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1818 continue;
Changli Gaod8d1f302010-06-10 23:31:35 -07001819 dst_hold(&rt->dst);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001820 break;
1821 }
1822out:
Thomas Grafc71099a2006-08-04 23:20:06 -07001823 write_unlock_bh(&table->tb6_lock);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001824 return rt;
1825}
1826
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001827static struct rt6_info *rt6_add_route_info(struct net *net,
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001828 const struct in6_addr *prefix, int prefixlen,
1829 const struct in6_addr *gwaddr, int ifindex,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001830 unsigned pref)
1831{
Thomas Graf86872cb2006-08-22 00:01:08 -07001832 struct fib6_config cfg = {
1833 .fc_table = RT6_TABLE_INFO,
Rami Rosen238fc7e2008-02-09 23:43:11 -08001834 .fc_metric = IP6_RT_PRIO_USER,
Thomas Graf86872cb2006-08-22 00:01:08 -07001835 .fc_ifindex = ifindex,
1836 .fc_dst_len = prefixlen,
1837 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1838 RTF_UP | RTF_PREF(pref),
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001839 .fc_nlinfo.pid = 0,
1840 .fc_nlinfo.nlh = NULL,
1841 .fc_nlinfo.nl_net = net,
Thomas Graf86872cb2006-08-22 00:01:08 -07001842 };
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001843
Thomas Graf86872cb2006-08-22 00:01:08 -07001844 ipv6_addr_copy(&cfg.fc_dst, prefix);
1845 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1846
YOSHIFUJI Hideakie317da92006-03-20 17:06:42 -08001847 /* We should treat it as a default route if prefix length is 0. */
1848 if (!prefixlen)
Thomas Graf86872cb2006-08-22 00:01:08 -07001849 cfg.fc_flags |= RTF_DEFAULT;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001850
Thomas Graf86872cb2006-08-22 00:01:08 -07001851 ip6_route_add(&cfg);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001852
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001853 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001854}
1855#endif
1856
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001857struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001858{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001859 struct rt6_info *rt;
Thomas Grafc71099a2006-08-04 23:20:06 -07001860 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001861
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001862 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
Thomas Grafc71099a2006-08-04 23:20:06 -07001863 if (table == NULL)
1864 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001865
Thomas Grafc71099a2006-08-04 23:20:06 -07001866 write_lock_bh(&table->tb6_lock);
Changli Gaod8d1f302010-06-10 23:31:35 -07001867 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001868 if (dev == rt->rt6i_dev &&
YOSHIFUJI Hideaki045927f2006-03-20 17:00:48 -08001869 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001870 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1871 break;
1872 }
1873 if (rt)
Changli Gaod8d1f302010-06-10 23:31:35 -07001874 dst_hold(&rt->dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001875 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001876 return rt;
1877}
1878
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001879struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -08001880 struct net_device *dev,
1881 unsigned int pref)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001882{
Thomas Graf86872cb2006-08-22 00:01:08 -07001883 struct fib6_config cfg = {
1884 .fc_table = RT6_TABLE_DFLT,
Rami Rosen238fc7e2008-02-09 23:43:11 -08001885 .fc_metric = IP6_RT_PRIO_USER,
Thomas Graf86872cb2006-08-22 00:01:08 -07001886 .fc_ifindex = dev->ifindex,
1887 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1888 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
Daniel Lezcano55786892008-03-04 13:47:47 -08001889 .fc_nlinfo.pid = 0,
1890 .fc_nlinfo.nlh = NULL,
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001891 .fc_nlinfo.nl_net = dev_net(dev),
Thomas Graf86872cb2006-08-22 00:01:08 -07001892 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07001893
Thomas Graf86872cb2006-08-22 00:01:08 -07001894 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001895
Thomas Graf86872cb2006-08-22 00:01:08 -07001896 ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001897
Linus Torvalds1da177e2005-04-16 15:20:36 -07001898 return rt6_get_dflt_router(gwaddr, dev);
1899}
1900
Daniel Lezcano7b4da532008-03-04 13:47:14 -08001901void rt6_purge_dflt_routers(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001902{
1903 struct rt6_info *rt;
Thomas Grafc71099a2006-08-04 23:20:06 -07001904 struct fib6_table *table;
1905
1906 /* NOTE: Keep consistent with rt6_get_dflt_router */
Daniel Lezcano7b4da532008-03-04 13:47:14 -08001907 table = fib6_get_table(net, RT6_TABLE_DFLT);
Thomas Grafc71099a2006-08-04 23:20:06 -07001908 if (table == NULL)
1909 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001910
1911restart:
Thomas Grafc71099a2006-08-04 23:20:06 -07001912 read_lock_bh(&table->tb6_lock);
Changli Gaod8d1f302010-06-10 23:31:35 -07001913 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001914 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001915 dst_hold(&rt->dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001916 read_unlock_bh(&table->tb6_lock);
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001917 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001918 goto restart;
1919 }
1920 }
Thomas Grafc71099a2006-08-04 23:20:06 -07001921 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001922}
1923
Daniel Lezcano55786892008-03-04 13:47:47 -08001924static void rtmsg_to_fib6_config(struct net *net,
1925 struct in6_rtmsg *rtmsg,
Thomas Graf86872cb2006-08-22 00:01:08 -07001926 struct fib6_config *cfg)
1927{
1928 memset(cfg, 0, sizeof(*cfg));
1929
1930 cfg->fc_table = RT6_TABLE_MAIN;
1931 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1932 cfg->fc_metric = rtmsg->rtmsg_metric;
1933 cfg->fc_expires = rtmsg->rtmsg_info;
1934 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1935 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1936 cfg->fc_flags = rtmsg->rtmsg_flags;
1937
Daniel Lezcano55786892008-03-04 13:47:47 -08001938 cfg->fc_nlinfo.nl_net = net;
Benjamin Theryf1243c22008-02-26 18:10:03 -08001939
Thomas Graf86872cb2006-08-22 00:01:08 -07001940 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1941 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1942 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1943}
1944
Daniel Lezcano55786892008-03-04 13:47:47 -08001945int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001946{
Thomas Graf86872cb2006-08-22 00:01:08 -07001947 struct fib6_config cfg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001948 struct in6_rtmsg rtmsg;
1949 int err;
1950
1951 switch(cmd) {
1952 case SIOCADDRT: /* Add a route */
1953 case SIOCDELRT: /* Delete a route */
1954 if (!capable(CAP_NET_ADMIN))
1955 return -EPERM;
1956 err = copy_from_user(&rtmsg, arg,
1957 sizeof(struct in6_rtmsg));
1958 if (err)
1959 return -EFAULT;
Thomas Graf86872cb2006-08-22 00:01:08 -07001960
Daniel Lezcano55786892008-03-04 13:47:47 -08001961 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
Thomas Graf86872cb2006-08-22 00:01:08 -07001962
Linus Torvalds1da177e2005-04-16 15:20:36 -07001963 rtnl_lock();
1964 switch (cmd) {
1965 case SIOCADDRT:
Thomas Graf86872cb2006-08-22 00:01:08 -07001966 err = ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001967 break;
1968 case SIOCDELRT:
Thomas Graf86872cb2006-08-22 00:01:08 -07001969 err = ip6_route_del(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001970 break;
1971 default:
1972 err = -EINVAL;
1973 }
1974 rtnl_unlock();
1975
1976 return err;
Stephen Hemminger3ff50b72007-04-20 17:09:22 -07001977 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001978
1979 return -EINVAL;
1980}
1981
1982/*
1983 * Drop the packet on the floor
1984 */
1985
Brian Haleyd5fdd6b2009-06-23 04:31:07 -07001986static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001987{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001988 int type;
Eric Dumazetadf30902009-06-02 05:19:30 +00001989 struct dst_entry *dst = skb_dst(skb);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001990 switch (ipstats_mib_noroutes) {
1991 case IPSTATS_MIB_INNOROUTES:
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -07001992 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
Ulrich Weber45bb0062010-02-25 23:28:58 +00001993 if (type == IPV6_ADDR_ANY) {
Denis V. Lunev3bd653c2008-10-08 10:54:51 -07001994 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1995 IPSTATS_MIB_INADDRERRORS);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001996 break;
1997 }
1998 /* FALLTHROUGH */
1999 case IPSTATS_MIB_OUTNOROUTES:
Denis V. Lunev3bd653c2008-10-08 10:54:51 -07002000 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2001 ipstats_mib_noroutes);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07002002 break;
2003 }
Alexey Dobriyan3ffe5332010-02-18 08:25:24 +00002004 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002005 kfree_skb(skb);
2006 return 0;
2007}
2008
Thomas Graf9ce8ade2006-10-18 20:46:54 -07002009static int ip6_pkt_discard(struct sk_buff *skb)
2010{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07002011 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07002012}
2013
Arnaldo Carvalho de Melo20380732005-08-16 02:18:02 -03002014static int ip6_pkt_discard_out(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002015{
Eric Dumazetadf30902009-06-02 05:19:30 +00002016 skb->dev = skb_dst(skb)->dev;
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07002017 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002018}
2019
David S. Miller6723ab52006-10-18 21:20:57 -07002020#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2021
Thomas Graf9ce8ade2006-10-18 20:46:54 -07002022static int ip6_pkt_prohibit(struct sk_buff *skb)
2023{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07002024 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07002025}
2026
2027static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2028{
Eric Dumazetadf30902009-06-02 05:19:30 +00002029 skb->dev = skb_dst(skb)->dev;
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07002030 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07002031}
2032
David S. Miller6723ab52006-10-18 21:20:57 -07002033#endif
2034
Linus Torvalds1da177e2005-04-16 15:20:36 -07002035/*
2036 * Allocate a dst for local (unicast / anycast) address.
2037 */
2038
2039struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2040 const struct in6_addr *addr,
2041 int anycast)
2042{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09002043 struct net *net = dev_net(idev->dev);
David S. Miller5c1e6aa2011-04-28 14:13:38 -07002044 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
David S. Miller957c6652011-06-24 15:25:00 -07002045 net->loopback_dev, 0);
David S. Miller14deae42009-01-04 16:04:39 -08002046 struct neighbour *neigh;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002047
Ben Greear40385652010-11-08 12:33:48 +00002048 if (rt == NULL) {
2049 if (net_ratelimit())
2050 pr_warning("IPv6: Maximum number of routes reached,"
2051 " consider increasing route/max_size.\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002052 return ERR_PTR(-ENOMEM);
Ben Greear40385652010-11-08 12:33:48 +00002053 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002054
Linus Torvalds1da177e2005-04-16 15:20:36 -07002055 in6_dev_hold(idev);
2056
David S. Miller11d53b42011-06-24 15:23:34 -07002057 rt->dst.flags |= DST_HOST;
Changli Gaod8d1f302010-06-10 23:31:35 -07002058 rt->dst.input = ip6_input;
2059 rt->dst.output = ip6_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002060 rt->rt6i_idev = idev;
Changli Gaod8d1f302010-06-10 23:31:35 -07002061 rt->dst.obsolete = -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002062
2063 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +09002064 if (anycast)
2065 rt->rt6i_flags |= RTF_ANYCAST;
2066 else
Linus Torvalds1da177e2005-04-16 15:20:36 -07002067 rt->rt6i_flags |= RTF_LOCAL;
David S. Miller14deae42009-01-04 16:04:39 -08002068 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
2069 if (IS_ERR(neigh)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002070 dst_free(&rt->dst);
David S. Miller14deae42009-01-04 16:04:39 -08002071
David S. Miller29546a62011-03-03 12:10:37 -08002072 return ERR_CAST(neigh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002073 }
David S. Miller69cce1d2011-07-17 23:09:49 -07002074 dst_set_neighbour(&rt->dst, neigh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002075
2076 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
2077 rt->rt6i_dst.plen = 128;
Daniel Lezcano55786892008-03-04 13:47:47 -08002078 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002079
Changli Gaod8d1f302010-06-10 23:31:35 -07002080 atomic_set(&rt->dst.__refcnt, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002081
2082 return rt;
2083}
2084
Daniel Walterc3968a82011-04-13 21:10:57 +00002085int ip6_route_get_saddr(struct net *net,
2086 struct rt6_info *rt,
Eric Dumazetb71d1d42011-04-22 04:53:02 +00002087 const struct in6_addr *daddr,
Daniel Walterc3968a82011-04-13 21:10:57 +00002088 unsigned int prefs,
2089 struct in6_addr *saddr)
2090{
2091 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2092 int err = 0;
2093 if (rt->rt6i_prefsrc.plen)
2094 ipv6_addr_copy(saddr, &rt->rt6i_prefsrc.addr);
2095 else
2096 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2097 daddr, prefs, saddr);
2098 return err;
2099}
2100
2101/* remove deleted ip from prefsrc entries */
2102struct arg_dev_net_ip {
2103 struct net_device *dev;
2104 struct net *net;
2105 struct in6_addr *addr;
2106};
2107
2108static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2109{
2110 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2111 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2112 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2113
2114 if (((void *)rt->rt6i_dev == dev || dev == NULL) &&
2115 rt != net->ipv6.ip6_null_entry &&
2116 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2117 /* remove prefsrc entry */
2118 rt->rt6i_prefsrc.plen = 0;
2119 }
2120 return 0;
2121}
2122
2123void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2124{
2125 struct net *net = dev_net(ifp->idev->dev);
2126 struct arg_dev_net_ip adni = {
2127 .dev = ifp->idev->dev,
2128 .net = net,
2129 .addr = &ifp->addr,
2130 };
2131 fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2132}
2133
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002134struct arg_dev_net {
2135 struct net_device *dev;
2136 struct net *net;
2137};
2138
Linus Torvalds1da177e2005-04-16 15:20:36 -07002139static int fib6_ifdown(struct rt6_info *rt, void *arg)
2140{
stephen hemmingerbc3ef662010-12-16 17:42:40 +00002141 const struct arg_dev_net *adn = arg;
2142 const struct net_device *dev = adn->dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002143
stephen hemmingerbc3ef662010-12-16 17:42:40 +00002144 if ((rt->rt6i_dev == dev || dev == NULL) &&
2145 rt != adn->net->ipv6.ip6_null_entry) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002146 RT6_TRACE("deleted by ifdown %p\n", rt);
2147 return -1;
2148 }
2149 return 0;
2150}
2151
Daniel Lezcanof3db4852008-03-03 23:27:06 -08002152void rt6_ifdown(struct net *net, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002153{
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002154 struct arg_dev_net adn = {
2155 .dev = dev,
2156 .net = net,
2157 };
2158
2159 fib6_clean_all(net, fib6_ifdown, 0, &adn);
David S. Miller1e493d12008-09-10 17:27:15 -07002160 icmp6_clean_all(fib6_ifdown, &adn);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002161}
2162
2163struct rt6_mtu_change_arg
2164{
2165 struct net_device *dev;
2166 unsigned mtu;
2167};
2168
2169static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2170{
2171 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2172 struct inet6_dev *idev;
2173
2174 /* In IPv6 pmtu discovery is not optional,
2175 so that RTAX_MTU lock cannot disable it.
2176 We still use this lock to block changes
2177 caused by addrconf/ndisc.
2178 */
2179
2180 idev = __in6_dev_get(arg->dev);
2181 if (idev == NULL)
2182 return 0;
2183
2184 /* For administrative MTU increase, there is no way to discover
2185 IPv6 PMTU increase, so PMTU increase should be updated here.
2186 Since RFC 1981 doesn't include administrative MTU increase
2187 update PMTU increase is a MUST. (i.e. jumbo frame)
2188 */
2189 /*
2190 If new MTU is less than route PMTU, this new MTU will be the
2191 lowest MTU in the path, update the route PMTU to reflect PMTU
2192 decreases; if new MTU is greater than route PMTU, and the
2193 old MTU is the lowest MTU in the path, update the route PMTU
2194 to reflect the increase. In this case if the other nodes' MTU
2195 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2196 PMTU discouvery.
2197 */
2198 if (rt->rt6i_dev == arg->dev &&
Changli Gaod8d1f302010-06-10 23:31:35 -07002199 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2200 (dst_mtu(&rt->dst) >= arg->mtu ||
2201 (dst_mtu(&rt->dst) < arg->mtu &&
2202 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
David S. Millerdefb3512010-12-08 21:16:57 -08002203 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
Simon Arlott566cfd82007-07-26 00:09:55 -07002204 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002205 return 0;
2206}
2207
2208void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2209{
Thomas Grafc71099a2006-08-04 23:20:06 -07002210 struct rt6_mtu_change_arg arg = {
2211 .dev = dev,
2212 .mtu = mtu,
2213 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07002214
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09002215 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002216}
2217
Patrick McHardyef7c79e2007-06-05 12:38:30 -07002218static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
Thomas Graf5176f912006-08-26 20:13:18 -07002219 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
Thomas Graf86872cb2006-08-22 00:01:08 -07002220 [RTA_OIF] = { .type = NLA_U32 },
Thomas Grafab364a62006-08-22 00:01:47 -07002221 [RTA_IIF] = { .type = NLA_U32 },
Thomas Graf86872cb2006-08-22 00:01:08 -07002222 [RTA_PRIORITY] = { .type = NLA_U32 },
2223 [RTA_METRICS] = { .type = NLA_NESTED },
2224};
2225
2226static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2227 struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002228{
Thomas Graf86872cb2006-08-22 00:01:08 -07002229 struct rtmsg *rtm;
2230 struct nlattr *tb[RTA_MAX+1];
2231 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002232
Thomas Graf86872cb2006-08-22 00:01:08 -07002233 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2234 if (err < 0)
2235 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002236
Thomas Graf86872cb2006-08-22 00:01:08 -07002237 err = -EINVAL;
2238 rtm = nlmsg_data(nlh);
2239 memset(cfg, 0, sizeof(*cfg));
2240
2241 cfg->fc_table = rtm->rtm_table;
2242 cfg->fc_dst_len = rtm->rtm_dst_len;
2243 cfg->fc_src_len = rtm->rtm_src_len;
2244 cfg->fc_flags = RTF_UP;
2245 cfg->fc_protocol = rtm->rtm_protocol;
2246
2247 if (rtm->rtm_type == RTN_UNREACHABLE)
2248 cfg->fc_flags |= RTF_REJECT;
2249
Maciej Żenczykowskiab79ad12010-09-27 00:07:02 +00002250 if (rtm->rtm_type == RTN_LOCAL)
2251 cfg->fc_flags |= RTF_LOCAL;
2252
Thomas Graf86872cb2006-08-22 00:01:08 -07002253 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2254 cfg->fc_nlinfo.nlh = nlh;
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09002255 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
Thomas Graf86872cb2006-08-22 00:01:08 -07002256
2257 if (tb[RTA_GATEWAY]) {
2258 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2259 cfg->fc_flags |= RTF_GATEWAY;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002260 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002261
2262 if (tb[RTA_DST]) {
2263 int plen = (rtm->rtm_dst_len + 7) >> 3;
2264
2265 if (nla_len(tb[RTA_DST]) < plen)
2266 goto errout;
2267
2268 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002269 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002270
2271 if (tb[RTA_SRC]) {
2272 int plen = (rtm->rtm_src_len + 7) >> 3;
2273
2274 if (nla_len(tb[RTA_SRC]) < plen)
2275 goto errout;
2276
2277 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002278 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002279
Daniel Walterc3968a82011-04-13 21:10:57 +00002280 if (tb[RTA_PREFSRC])
2281 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2282
Thomas Graf86872cb2006-08-22 00:01:08 -07002283 if (tb[RTA_OIF])
2284 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2285
2286 if (tb[RTA_PRIORITY])
2287 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2288
2289 if (tb[RTA_METRICS]) {
2290 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2291 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002292 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002293
2294 if (tb[RTA_TABLE])
2295 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2296
2297 err = 0;
2298errout:
2299 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002300}
2301
Thomas Grafc127ea22007-03-22 11:58:32 -07002302static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002303{
Thomas Graf86872cb2006-08-22 00:01:08 -07002304 struct fib6_config cfg;
2305 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002306
Thomas Graf86872cb2006-08-22 00:01:08 -07002307 err = rtm_to_fib6_config(skb, nlh, &cfg);
2308 if (err < 0)
2309 return err;
2310
2311 return ip6_route_del(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002312}
2313
Thomas Grafc127ea22007-03-22 11:58:32 -07002314static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002315{
Thomas Graf86872cb2006-08-22 00:01:08 -07002316 struct fib6_config cfg;
2317 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002318
Thomas Graf86872cb2006-08-22 00:01:08 -07002319 err = rtm_to_fib6_config(skb, nlh, &cfg);
2320 if (err < 0)
2321 return err;
2322
2323 return ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002324}
2325
Thomas Graf339bf982006-11-10 14:10:15 -08002326static inline size_t rt6_nlmsg_size(void)
2327{
2328 return NLMSG_ALIGN(sizeof(struct rtmsg))
2329 + nla_total_size(16) /* RTA_SRC */
2330 + nla_total_size(16) /* RTA_DST */
2331 + nla_total_size(16) /* RTA_GATEWAY */
2332 + nla_total_size(16) /* RTA_PREFSRC */
2333 + nla_total_size(4) /* RTA_TABLE */
2334 + nla_total_size(4) /* RTA_IIF */
2335 + nla_total_size(4) /* RTA_OIF */
2336 + nla_total_size(4) /* RTA_PRIORITY */
Noriaki TAKAMIYA6a2b9ce2007-01-23 22:09:41 -08002337 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
Thomas Graf339bf982006-11-10 14:10:15 -08002338 + nla_total_size(sizeof(struct rta_cacheinfo));
2339}
2340
Brian Haley191cd582008-08-14 15:33:21 -07002341static int rt6_fill_node(struct net *net,
2342 struct sk_buff *skb, struct rt6_info *rt,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07002343 struct in6_addr *dst, struct in6_addr *src,
2344 int iif, int type, u32 pid, u32 seq,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002345 int prefix, int nowait, unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002346{
2347 struct rtmsg *rtm;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002348 struct nlmsghdr *nlh;
Thomas Grafe3703b32006-11-27 09:27:07 -08002349 long expires;
Patrick McHardy9e762a42006-08-10 23:09:48 -07002350 u32 table;
Eric Dumazetf2c31e32011-07-29 19:00:53 +00002351 struct neighbour *n;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002352
2353 if (prefix) { /* user wants prefix routes only */
2354 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2355 /* success since this is not a prefix route */
2356 return 1;
2357 }
2358 }
2359
Thomas Graf2d7202b2006-08-22 00:01:27 -07002360 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2361 if (nlh == NULL)
Patrick McHardy26932562007-01-31 23:16:40 -08002362 return -EMSGSIZE;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002363
2364 rtm = nlmsg_data(nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002365 rtm->rtm_family = AF_INET6;
2366 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2367 rtm->rtm_src_len = rt->rt6i_src.plen;
2368 rtm->rtm_tos = 0;
Thomas Grafc71099a2006-08-04 23:20:06 -07002369 if (rt->rt6i_table)
Patrick McHardy9e762a42006-08-10 23:09:48 -07002370 table = rt->rt6i_table->tb6_id;
Thomas Grafc71099a2006-08-04 23:20:06 -07002371 else
Patrick McHardy9e762a42006-08-10 23:09:48 -07002372 table = RT6_TABLE_UNSPEC;
2373 rtm->rtm_table = table;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002374 NLA_PUT_U32(skb, RTA_TABLE, table);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002375 if (rt->rt6i_flags&RTF_REJECT)
2376 rtm->rtm_type = RTN_UNREACHABLE;
Maciej Żenczykowskiab79ad12010-09-27 00:07:02 +00002377 else if (rt->rt6i_flags&RTF_LOCAL)
2378 rtm->rtm_type = RTN_LOCAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002379 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2380 rtm->rtm_type = RTN_LOCAL;
2381 else
2382 rtm->rtm_type = RTN_UNICAST;
2383 rtm->rtm_flags = 0;
2384 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2385 rtm->rtm_protocol = rt->rt6i_protocol;
2386 if (rt->rt6i_flags&RTF_DYNAMIC)
2387 rtm->rtm_protocol = RTPROT_REDIRECT;
2388 else if (rt->rt6i_flags & RTF_ADDRCONF)
2389 rtm->rtm_protocol = RTPROT_KERNEL;
2390 else if (rt->rt6i_flags&RTF_DEFAULT)
2391 rtm->rtm_protocol = RTPROT_RA;
2392
2393 if (rt->rt6i_flags&RTF_CACHE)
2394 rtm->rtm_flags |= RTM_F_CLONED;
2395
2396 if (dst) {
Thomas Graf2d7202b2006-08-22 00:01:27 -07002397 NLA_PUT(skb, RTA_DST, 16, dst);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002398 rtm->rtm_dst_len = 128;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002399 } else if (rtm->rtm_dst_len)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002400 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002401#ifdef CONFIG_IPV6_SUBTREES
2402 if (src) {
Thomas Graf2d7202b2006-08-22 00:01:27 -07002403 NLA_PUT(skb, RTA_SRC, 16, src);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002404 rtm->rtm_src_len = 128;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002405 } else if (rtm->rtm_src_len)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002406 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002407#endif
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002408 if (iif) {
2409#ifdef CONFIG_IPV6_MROUTE
2410 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
Benjamin Thery8229efd2008-12-10 16:30:15 -08002411 int err = ip6mr_get_route(net, skb, rtm, nowait);
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002412 if (err <= 0) {
2413 if (!nowait) {
2414 if (err == 0)
2415 return 0;
2416 goto nla_put_failure;
2417 } else {
2418 if (err == -EMSGSIZE)
2419 goto nla_put_failure;
2420 }
2421 }
2422 } else
2423#endif
2424 NLA_PUT_U32(skb, RTA_IIF, iif);
2425 } else if (dst) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002426 struct in6_addr saddr_buf;
Daniel Walterc3968a82011-04-13 21:10:57 +00002427 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002428 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002429 }
Thomas Graf2d7202b2006-08-22 00:01:27 -07002430
Daniel Walterc3968a82011-04-13 21:10:57 +00002431 if (rt->rt6i_prefsrc.plen) {
2432 struct in6_addr saddr_buf;
2433 ipv6_addr_copy(&saddr_buf, &rt->rt6i_prefsrc.addr);
2434 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2435 }
2436
David S. Millerdefb3512010-12-08 21:16:57 -08002437 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002438 goto nla_put_failure;
2439
Eric Dumazetf2c31e32011-07-29 19:00:53 +00002440 rcu_read_lock();
2441 n = dst_get_neighbour(&rt->dst);
2442 if (n)
2443 NLA_PUT(skb, RTA_GATEWAY, 16, &n->primary_key);
2444 rcu_read_unlock();
Thomas Graf2d7202b2006-08-22 00:01:27 -07002445
Changli Gaod8d1f302010-06-10 23:31:35 -07002446 if (rt->dst.dev)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002447 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2448
2449 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
Thomas Grafe3703b32006-11-27 09:27:07 -08002450
YOSHIFUJI Hideaki36e3dea2008-05-13 02:52:55 +09002451 if (!(rt->rt6i_flags & RTF_EXPIRES))
2452 expires = 0;
2453 else if (rt->rt6i_expires - jiffies < INT_MAX)
2454 expires = rt->rt6i_expires - jiffies;
2455 else
2456 expires = INT_MAX;
YOSHIFUJI Hideaki69cdf8f2008-05-19 16:55:13 -07002457
Changli Gaod8d1f302010-06-10 23:31:35 -07002458 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0,
2459 expires, rt->dst.error) < 0)
Thomas Grafe3703b32006-11-27 09:27:07 -08002460 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002461
Thomas Graf2d7202b2006-08-22 00:01:27 -07002462 return nlmsg_end(skb, nlh);
2463
2464nla_put_failure:
Patrick McHardy26932562007-01-31 23:16:40 -08002465 nlmsg_cancel(skb, nlh);
2466 return -EMSGSIZE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002467}
2468
Patrick McHardy1b43af52006-08-10 23:11:17 -07002469int rt6_dump_route(struct rt6_info *rt, void *p_arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002470{
2471 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2472 int prefix;
2473
Thomas Graf2d7202b2006-08-22 00:01:27 -07002474 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2475 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002476 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2477 } else
2478 prefix = 0;
2479
Brian Haley191cd582008-08-14 15:33:21 -07002480 return rt6_fill_node(arg->net,
2481 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002482 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002483 prefix, 0, NLM_F_MULTI);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002484}
2485
Thomas Grafc127ea22007-03-22 11:58:32 -07002486static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002487{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09002488 struct net *net = sock_net(in_skb->sk);
Thomas Grafab364a62006-08-22 00:01:47 -07002489 struct nlattr *tb[RTA_MAX+1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07002490 struct rt6_info *rt;
Thomas Grafab364a62006-08-22 00:01:47 -07002491 struct sk_buff *skb;
2492 struct rtmsg *rtm;
David S. Miller4c9483b2011-03-12 16:22:43 -05002493 struct flowi6 fl6;
Thomas Grafab364a62006-08-22 00:01:47 -07002494 int err, iif = 0;
2495
2496 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2497 if (err < 0)
2498 goto errout;
2499
2500 err = -EINVAL;
David S. Miller4c9483b2011-03-12 16:22:43 -05002501 memset(&fl6, 0, sizeof(fl6));
Thomas Grafab364a62006-08-22 00:01:47 -07002502
2503 if (tb[RTA_SRC]) {
2504 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2505 goto errout;
2506
David S. Miller4c9483b2011-03-12 16:22:43 -05002507 ipv6_addr_copy(&fl6.saddr, nla_data(tb[RTA_SRC]));
Thomas Grafab364a62006-08-22 00:01:47 -07002508 }
2509
2510 if (tb[RTA_DST]) {
2511 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2512 goto errout;
2513
David S. Miller4c9483b2011-03-12 16:22:43 -05002514 ipv6_addr_copy(&fl6.daddr, nla_data(tb[RTA_DST]));
Thomas Grafab364a62006-08-22 00:01:47 -07002515 }
2516
2517 if (tb[RTA_IIF])
2518 iif = nla_get_u32(tb[RTA_IIF]);
2519
2520 if (tb[RTA_OIF])
David S. Miller4c9483b2011-03-12 16:22:43 -05002521 fl6.flowi6_oif = nla_get_u32(tb[RTA_OIF]);
Thomas Grafab364a62006-08-22 00:01:47 -07002522
2523 if (iif) {
2524 struct net_device *dev;
Daniel Lezcano55786892008-03-04 13:47:47 -08002525 dev = __dev_get_by_index(net, iif);
Thomas Grafab364a62006-08-22 00:01:47 -07002526 if (!dev) {
2527 err = -ENODEV;
2528 goto errout;
2529 }
2530 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002531
2532 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
Thomas Grafab364a62006-08-22 00:01:47 -07002533 if (skb == NULL) {
2534 err = -ENOBUFS;
2535 goto errout;
2536 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002537
2538 /* Reserve room for dummy headers, this skb can pass
2539 through good chunk of routing engine.
2540 */
Arnaldo Carvalho de Melo459a98e2007-03-19 15:30:44 -07002541 skb_reset_mac_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002542 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2543
David S. Miller4c9483b2011-03-12 16:22:43 -05002544 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl6);
Changli Gaod8d1f302010-06-10 23:31:35 -07002545 skb_dst_set(skb, &rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002546
David S. Miller4c9483b2011-03-12 16:22:43 -05002547 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002548 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002549 nlh->nlmsg_seq, 0, 0, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002550 if (err < 0) {
Thomas Grafab364a62006-08-22 00:01:47 -07002551 kfree_skb(skb);
2552 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002553 }
2554
Daniel Lezcano55786892008-03-04 13:47:47 -08002555 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
Thomas Grafab364a62006-08-22 00:01:47 -07002556errout:
Linus Torvalds1da177e2005-04-16 15:20:36 -07002557 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002558}
2559
Thomas Graf86872cb2006-08-22 00:01:08 -07002560void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002561{
2562 struct sk_buff *skb;
Daniel Lezcano55786892008-03-04 13:47:47 -08002563 struct net *net = info->nl_net;
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08002564 u32 seq;
2565 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002566
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08002567 err = -ENOBUFS;
2568 seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
Thomas Graf86872cb2006-08-22 00:01:08 -07002569
Thomas Graf339bf982006-11-10 14:10:15 -08002570 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
Thomas Graf21713eb2006-08-15 00:35:24 -07002571 if (skb == NULL)
2572 goto errout;
2573
Brian Haley191cd582008-08-14 15:33:21 -07002574 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002575 event, info->pid, seq, 0, 0, 0);
Patrick McHardy26932562007-01-31 23:16:40 -08002576 if (err < 0) {
2577 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2578 WARN_ON(err == -EMSGSIZE);
2579 kfree_skb(skb);
2580 goto errout;
2581 }
Pablo Neira Ayuso1ce85fe2009-02-24 23:18:28 -08002582 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2583 info->nlh, gfp_any());
2584 return;
Thomas Graf21713eb2006-08-15 00:35:24 -07002585errout:
2586 if (err < 0)
Daniel Lezcano55786892008-03-04 13:47:47 -08002587 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002588}
2589
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002590static int ip6_route_dev_notify(struct notifier_block *this,
2591 unsigned long event, void *data)
2592{
2593 struct net_device *dev = (struct net_device *)data;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09002594 struct net *net = dev_net(dev);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002595
2596 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002597 net->ipv6.ip6_null_entry->dst.dev = dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002598 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2599#ifdef CONFIG_IPV6_MULTIPLE_TABLES
Changli Gaod8d1f302010-06-10 23:31:35 -07002600 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002601 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
Changli Gaod8d1f302010-06-10 23:31:35 -07002602 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002603 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2604#endif
2605 }
2606
2607 return NOTIFY_OK;
2608}
2609
Linus Torvalds1da177e2005-04-16 15:20:36 -07002610/*
2611 * /proc
2612 */
2613
2614#ifdef CONFIG_PROC_FS
2615
Linus Torvalds1da177e2005-04-16 15:20:36 -07002616struct rt6_proc_arg
2617{
2618 char *buffer;
2619 int offset;
2620 int length;
2621 int skip;
2622 int len;
2623};
2624
2625static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2626{
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002627 struct seq_file *m = p_arg;
David S. Miller69cce1d2011-07-17 23:09:49 -07002628 struct neighbour *n;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002629
Harvey Harrison4b7a4272008-10-29 12:50:24 -07002630 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002631
2632#ifdef CONFIG_IPV6_SUBTREES
Harvey Harrison4b7a4272008-10-29 12:50:24 -07002633 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002634#else
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002635 seq_puts(m, "00000000000000000000000000000000 00 ");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002636#endif
Eric Dumazetf2c31e32011-07-29 19:00:53 +00002637 rcu_read_lock();
David S. Miller69cce1d2011-07-17 23:09:49 -07002638 n = dst_get_neighbour(&rt->dst);
2639 if (n) {
2640 seq_printf(m, "%pi6", n->primary_key);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002641 } else {
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002642 seq_puts(m, "00000000000000000000000000000000");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002643 }
Eric Dumazetf2c31e32011-07-29 19:00:53 +00002644 rcu_read_unlock();
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002645 seq_printf(m, " %08x %08x %08x %08x %8s\n",
Changli Gaod8d1f302010-06-10 23:31:35 -07002646 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2647 rt->dst.__use, rt->rt6i_flags,
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002648 rt->rt6i_dev ? rt->rt6i_dev->name : "");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002649 return 0;
2650}
2651
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002652static int ipv6_route_show(struct seq_file *m, void *v)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002653{
Daniel Lezcanof3db4852008-03-03 23:27:06 -08002654 struct net *net = (struct net *)m->private;
2655 fib6_clean_all(net, rt6_info_route, 0, m);
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002656 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002657}
2658
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002659static int ipv6_route_open(struct inode *inode, struct file *file)
2660{
Pavel Emelyanovde05c552008-07-18 04:07:21 -07002661 return single_open_net(inode, file, ipv6_route_show);
Daniel Lezcanof3db4852008-03-03 23:27:06 -08002662}
2663
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002664static const struct file_operations ipv6_route_proc_fops = {
2665 .owner = THIS_MODULE,
2666 .open = ipv6_route_open,
2667 .read = seq_read,
2668 .llseek = seq_lseek,
Pavel Emelyanovb6fcbdb2008-07-18 04:07:44 -07002669 .release = single_release_net,
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002670};
2671
Linus Torvalds1da177e2005-04-16 15:20:36 -07002672static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2673{
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002674 struct net *net = (struct net *)seq->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002675 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002676 net->ipv6.rt6_stats->fib_nodes,
2677 net->ipv6.rt6_stats->fib_route_nodes,
2678 net->ipv6.rt6_stats->fib_rt_alloc,
2679 net->ipv6.rt6_stats->fib_rt_entries,
2680 net->ipv6.rt6_stats->fib_rt_cache,
Eric Dumazetfc66f952010-10-08 06:37:34 +00002681 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002682 net->ipv6.rt6_stats->fib_discarded_routes);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002683
2684 return 0;
2685}
2686
2687static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2688{
Pavel Emelyanovde05c552008-07-18 04:07:21 -07002689 return single_open_net(inode, file, rt6_stats_seq_show);
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002690}
2691
Arjan van de Ven9a321442007-02-12 00:55:35 -08002692static const struct file_operations rt6_stats_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002693 .owner = THIS_MODULE,
2694 .open = rt6_stats_seq_open,
2695 .read = seq_read,
2696 .llseek = seq_lseek,
Pavel Emelyanovb6fcbdb2008-07-18 04:07:44 -07002697 .release = single_release_net,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002698};
2699#endif /* CONFIG_PROC_FS */
2700
2701#ifdef CONFIG_SYSCTL
2702
Linus Torvalds1da177e2005-04-16 15:20:36 -07002703static
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07002704int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002705 void __user *buffer, size_t *lenp, loff_t *ppos)
2706{
Lucian Adrian Grijincuc486da32011-02-24 19:48:03 +00002707 struct net *net;
2708 int delay;
2709 if (!write)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002710 return -EINVAL;
Lucian Adrian Grijincuc486da32011-02-24 19:48:03 +00002711
2712 net = (struct net *)ctl->extra1;
2713 delay = net->ipv6.sysctl.flush_delay;
2714 proc_dointvec(ctl, write, buffer, lenp, ppos);
2715 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2716 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002717}
2718
Daniel Lezcano760f2d02008-01-10 02:53:43 -08002719ctl_table ipv6_route_table_template[] = {
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002720 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002721 .procname = "flush",
Daniel Lezcano49905092008-01-10 03:01:01 -08002722 .data = &init_net.ipv6.sysctl.flush_delay,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002723 .maxlen = sizeof(int),
Dave Jones89c8b3a12005-04-28 12:11:49 -07002724 .mode = 0200,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002725 .proc_handler = ipv6_sysctl_rtcache_flush
Linus Torvalds1da177e2005-04-16 15:20:36 -07002726 },
2727 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002728 .procname = "gc_thresh",
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -08002729 .data = &ip6_dst_ops_template.gc_thresh,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002730 .maxlen = sizeof(int),
2731 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002732 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002733 },
2734 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002735 .procname = "max_size",
Daniel Lezcano49905092008-01-10 03:01:01 -08002736 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002737 .maxlen = sizeof(int),
2738 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002739 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002740 },
2741 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002742 .procname = "gc_min_interval",
Daniel Lezcano49905092008-01-10 03:01:01 -08002743 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002744 .maxlen = sizeof(int),
2745 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002746 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002747 },
2748 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002749 .procname = "gc_timeout",
Daniel Lezcano49905092008-01-10 03:01:01 -08002750 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002751 .maxlen = sizeof(int),
2752 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002753 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002754 },
2755 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002756 .procname = "gc_interval",
Daniel Lezcano49905092008-01-10 03:01:01 -08002757 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002758 .maxlen = sizeof(int),
2759 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002760 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002761 },
2762 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002763 .procname = "gc_elasticity",
Daniel Lezcano49905092008-01-10 03:01:01 -08002764 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002765 .maxlen = sizeof(int),
2766 .mode = 0644,
Min Zhangf3d3f612010-08-14 22:42:51 -07002767 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002768 },
2769 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002770 .procname = "mtu_expires",
Daniel Lezcano49905092008-01-10 03:01:01 -08002771 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002772 .maxlen = sizeof(int),
2773 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002774 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002775 },
2776 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002777 .procname = "min_adv_mss",
Daniel Lezcano49905092008-01-10 03:01:01 -08002778 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002779 .maxlen = sizeof(int),
2780 .mode = 0644,
Min Zhangf3d3f612010-08-14 22:42:51 -07002781 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002782 },
2783 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002784 .procname = "gc_min_interval_ms",
Daniel Lezcano49905092008-01-10 03:01:01 -08002785 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002786 .maxlen = sizeof(int),
2787 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002788 .proc_handler = proc_dointvec_ms_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002789 },
Eric W. Biedermanf8572d82009-11-05 13:32:03 -08002790 { }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002791};
2792
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00002793struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
Daniel Lezcano760f2d02008-01-10 02:53:43 -08002794{
2795 struct ctl_table *table;
2796
2797 table = kmemdup(ipv6_route_table_template,
2798 sizeof(ipv6_route_table_template),
2799 GFP_KERNEL);
YOSHIFUJI Hideaki5ee09102008-02-28 00:24:28 +09002800
2801 if (table) {
2802 table[0].data = &net->ipv6.sysctl.flush_delay;
Lucian Adrian Grijincuc486da32011-02-24 19:48:03 +00002803 table[0].extra1 = net;
Alexey Dobriyan86393e52009-08-29 01:34:49 +00002804 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
YOSHIFUJI Hideaki5ee09102008-02-28 00:24:28 +09002805 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2806 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2807 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2808 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2809 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2810 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2811 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
Alexey Dobriyan9c69fab2009-12-18 20:11:03 -08002812 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
YOSHIFUJI Hideaki5ee09102008-02-28 00:24:28 +09002813 }
2814
Daniel Lezcano760f2d02008-01-10 02:53:43 -08002815 return table;
2816}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002817#endif
2818
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00002819static int __net_init ip6_route_net_init(struct net *net)
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002820{
Pavel Emelyanov633d424b2008-04-21 14:25:23 -07002821 int ret = -ENOMEM;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002822
Alexey Dobriyan86393e52009-08-29 01:34:49 +00002823 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2824 sizeof(net->ipv6.ip6_dst_ops));
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002825
Eric Dumazetfc66f952010-10-08 06:37:34 +00002826 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2827 goto out_ip6_dst_ops;
2828
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002829 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2830 sizeof(*net->ipv6.ip6_null_entry),
2831 GFP_KERNEL);
2832 if (!net->ipv6.ip6_null_entry)
Eric Dumazetfc66f952010-10-08 06:37:34 +00002833 goto out_ip6_dst_entries;
Changli Gaod8d1f302010-06-10 23:31:35 -07002834 net->ipv6.ip6_null_entry->dst.path =
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002835 (struct dst_entry *)net->ipv6.ip6_null_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07002836 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
David S. Miller62fa8a82011-01-26 20:51:05 -08002837 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2838 ip6_template_metrics, true);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002839
2840#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2841 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2842 sizeof(*net->ipv6.ip6_prohibit_entry),
2843 GFP_KERNEL);
Peter Zijlstra68fffc62008-10-07 14:12:10 -07002844 if (!net->ipv6.ip6_prohibit_entry)
2845 goto out_ip6_null_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07002846 net->ipv6.ip6_prohibit_entry->dst.path =
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002847 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07002848 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
David S. Miller62fa8a82011-01-26 20:51:05 -08002849 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2850 ip6_template_metrics, true);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002851
2852 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2853 sizeof(*net->ipv6.ip6_blk_hole_entry),
2854 GFP_KERNEL);
Peter Zijlstra68fffc62008-10-07 14:12:10 -07002855 if (!net->ipv6.ip6_blk_hole_entry)
2856 goto out_ip6_prohibit_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07002857 net->ipv6.ip6_blk_hole_entry->dst.path =
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002858 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07002859 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
David S. Miller62fa8a82011-01-26 20:51:05 -08002860 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2861 ip6_template_metrics, true);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002862#endif
2863
Peter Zijlstrab339a47c2008-10-07 14:15:00 -07002864 net->ipv6.sysctl.flush_delay = 0;
2865 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2866 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2867 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2868 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2869 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2870 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2871 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2872
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002873#ifdef CONFIG_PROC_FS
2874 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2875 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2876#endif
Benjamin Thery6891a342008-03-04 13:49:47 -08002877 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2878
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002879 ret = 0;
2880out:
2881 return ret;
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002882
Peter Zijlstra68fffc62008-10-07 14:12:10 -07002883#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2884out_ip6_prohibit_entry:
2885 kfree(net->ipv6.ip6_prohibit_entry);
2886out_ip6_null_entry:
2887 kfree(net->ipv6.ip6_null_entry);
2888#endif
Eric Dumazetfc66f952010-10-08 06:37:34 +00002889out_ip6_dst_entries:
2890 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002891out_ip6_dst_ops:
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002892 goto out;
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002893}
2894
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00002895static void __net_exit ip6_route_net_exit(struct net *net)
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002896{
2897#ifdef CONFIG_PROC_FS
2898 proc_net_remove(net, "ipv6_route");
2899 proc_net_remove(net, "rt6_stats");
2900#endif
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002901 kfree(net->ipv6.ip6_null_entry);
2902#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2903 kfree(net->ipv6.ip6_prohibit_entry);
2904 kfree(net->ipv6.ip6_blk_hole_entry);
2905#endif
Xiaotian Feng41bb78b2010-11-02 16:11:05 +00002906 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002907}
2908
2909static struct pernet_operations ip6_route_net_ops = {
2910 .init = ip6_route_net_init,
2911 .exit = ip6_route_net_exit,
2912};
2913
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002914static struct notifier_block ip6_route_dev_notifier = {
2915 .notifier_call = ip6_route_dev_notify,
2916 .priority = 0,
2917};
2918
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002919int __init ip6_route_init(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002920{
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002921 int ret;
2922
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -08002923 ret = -ENOMEM;
2924 ip6_dst_ops_template.kmem_cachep =
2925 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2926 SLAB_HWCACHE_ALIGN, NULL);
2927 if (!ip6_dst_ops_template.kmem_cachep)
Fernando Carrijoc19a28e2009-01-07 18:09:08 -08002928 goto out;
David S. Miller14e50e52007-05-24 18:17:54 -07002929
Eric Dumazetfc66f952010-10-08 06:37:34 +00002930 ret = dst_entries_init(&ip6_dst_blackhole_ops);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002931 if (ret)
Daniel Lezcanobdb32892008-03-04 13:48:10 -08002932 goto out_kmem_cache;
Daniel Lezcanobdb32892008-03-04 13:48:10 -08002933
Eric Dumazetfc66f952010-10-08 06:37:34 +00002934 ret = register_pernet_subsys(&ip6_route_net_ops);
2935 if (ret)
2936 goto out_dst_entries;
2937
Arnaud Ebalard5dc121e2008-10-01 02:37:56 -07002938 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2939
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002940 /* Registering of the loopback is done before this portion of code,
2941 * the loopback reference in rt6_info will not be taken, do it
2942 * manually for init_net */
Changli Gaod8d1f302010-06-10 23:31:35 -07002943 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002944 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2945 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
Changli Gaod8d1f302010-06-10 23:31:35 -07002946 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002947 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
Changli Gaod8d1f302010-06-10 23:31:35 -07002948 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002949 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2950 #endif
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002951 ret = fib6_init();
2952 if (ret)
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002953 goto out_register_subsys;
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002954
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002955 ret = xfrm6_init();
2956 if (ret)
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002957 goto out_fib6_init;
Daniel Lezcanoc35b7e72007-12-08 00:14:11 -08002958
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002959 ret = fib6_rules_init();
2960 if (ret)
2961 goto xfrm6_init;
Daniel Lezcano7e5449c2007-12-08 00:14:54 -08002962
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002963 ret = -ENOBUFS;
Greg Rosec7ac8672011-06-10 01:27:09 +00002964 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
2965 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
2966 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002967 goto fib6_rules_init;
2968
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002969 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002970 if (ret)
2971 goto fib6_rules_init;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002972
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002973out:
2974 return ret;
2975
2976fib6_rules_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002977 fib6_rules_cleanup();
2978xfrm6_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002979 xfrm6_fini();
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002980out_fib6_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002981 fib6_gc_cleanup();
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002982out_register_subsys:
2983 unregister_pernet_subsys(&ip6_route_net_ops);
Eric Dumazetfc66f952010-10-08 06:37:34 +00002984out_dst_entries:
2985 dst_entries_destroy(&ip6_dst_blackhole_ops);
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002986out_kmem_cache:
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002987 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002988 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002989}
2990
2991void ip6_route_cleanup(void)
2992{
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002993 unregister_netdevice_notifier(&ip6_route_dev_notifier);
Thomas Graf101367c2006-08-04 03:39:02 -07002994 fib6_rules_cleanup();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002995 xfrm6_fini();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002996 fib6_gc_cleanup();
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002997 unregister_pernet_subsys(&ip6_route_net_ops);
Xiaotian Feng41bb78b2010-11-02 16:11:05 +00002998 dst_entries_destroy(&ip6_dst_blackhole_ops);
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002999 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003000}