blob: 8473016bba4a8cd6ae4dfc420c7827524fc3a84b [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09006 * Pedro Roque <roque@di.fc.ul.pt>
Linus Torvalds1da177e2005-04-16 15:20:36 -07007 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07008 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
YOSHIFUJI Hideakic0bece92006-08-23 17:23:25 -070023 * Ville Nuorvala
24 * Fixed routing subtrees.
Linus Torvalds1da177e2005-04-16 15:20:36 -070025 */
26
Randy Dunlap4fc268d2006-01-11 12:17:47 -080027#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070028#include <linux/errno.h>
Paul Gortmakerbc3b2d72011-07-15 11:47:34 -040029#include <linux/export.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070030#include <linux/types.h>
31#include <linux/times.h>
32#include <linux/socket.h>
33#include <linux/sockios.h>
34#include <linux/net.h>
35#include <linux/route.h>
36#include <linux/netdevice.h>
37#include <linux/in6.h>
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +090038#include <linux/mroute6.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070039#include <linux/init.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070040#include <linux/if_arp.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070041#include <linux/proc_fs.h>
42#include <linux/seq_file.h>
Daniel Lezcano5b7c9312008-03-03 23:28:58 -080043#include <linux/nsproxy.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090044#include <linux/slab.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020045#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070046#include <net/snmp.h>
47#include <net/ipv6.h>
48#include <net/ip6_fib.h>
49#include <net/ip6_route.h>
50#include <net/ndisc.h>
51#include <net/addrconf.h>
52#include <net/tcp.h>
53#include <linux/rtnetlink.h>
54#include <net/dst.h>
55#include <net/xfrm.h>
Tom Tucker8d717402006-07-30 20:43:36 -070056#include <net/netevent.h>
Thomas Graf21713eb2006-08-15 00:35:24 -070057#include <net/netlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070058
59#include <asm/uaccess.h>
60
61#ifdef CONFIG_SYSCTL
62#include <linux/sysctl.h>
63#endif
64
65/* Set to 3 to get tracing. */
66#define RT6_DEBUG 2
67
68#if RT6_DEBUG >= 3
69#define RDBG(x) printk x
70#define RT6_TRACE(x...) printk(KERN_DEBUG x)
71#else
72#define RDBG(x)
73#define RT6_TRACE(x...) do { ; } while (0)
74#endif
75
Eric Dumazet21efcfa2011-07-19 20:18:36 +000076static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
77 const struct in6_addr *dest);
Linus Torvalds1da177e2005-04-16 15:20:36 -070078static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
David S. Miller0dbaee32010-12-13 12:52:14 -080079static unsigned int ip6_default_advmss(const struct dst_entry *dst);
David S. Millerd33e4552010-12-14 13:01:14 -080080static unsigned int ip6_default_mtu(const struct dst_entry *dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -070081static struct dst_entry *ip6_negative_advice(struct dst_entry *);
82static void ip6_dst_destroy(struct dst_entry *);
83static void ip6_dst_ifdown(struct dst_entry *,
84 struct net_device *dev, int how);
Daniel Lezcano569d3642008-01-18 03:56:57 -080085static int ip6_dst_gc(struct dst_ops *ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -070086
87static int ip6_pkt_discard(struct sk_buff *skb);
88static int ip6_pkt_discard_out(struct sk_buff *skb);
89static void ip6_link_failure(struct sk_buff *skb);
90static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
91
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -080092#ifdef CONFIG_IPV6_ROUTE_INFO
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -080093static struct rt6_info *rt6_add_route_info(struct net *net,
Eric Dumazetb71d1d42011-04-22 04:53:02 +000094 const struct in6_addr *prefix, int prefixlen,
95 const struct in6_addr *gwaddr, int ifindex,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -080096 unsigned pref);
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -080097static struct rt6_info *rt6_get_route_info(struct net *net,
Eric Dumazetb71d1d42011-04-22 04:53:02 +000098 const struct in6_addr *prefix, int prefixlen,
99 const struct in6_addr *gwaddr, int ifindex);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800100#endif
101
David S. Miller06582542011-01-27 14:58:42 -0800102static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
103{
104 struct rt6_info *rt = (struct rt6_info *) dst;
105 struct inet_peer *peer;
106 u32 *p = NULL;
107
Yan, Zheng8e2ec632011-09-05 21:34:30 +0000108 if (!(rt->dst.flags & DST_HOST))
109 return NULL;
110
David S. Miller06582542011-01-27 14:58:42 -0800111 if (!rt->rt6i_peer)
112 rt6_bind_peer(rt, 1);
113
114 peer = rt->rt6i_peer;
115 if (peer) {
116 u32 *old_p = __DST_METRICS_PTR(old);
117 unsigned long prev, new;
118
119 p = peer->metrics;
120 if (inet_metrics_new(peer))
121 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
122
123 new = (unsigned long) p;
124 prev = cmpxchg(&dst->_metrics, old, new);
125
126 if (prev != old) {
127 p = __DST_METRICS_PTR(prev);
128 if (prev & DST_METRICS_READ_ONLY)
129 p = NULL;
130 }
131 }
132 return p;
133}
134
David S. Millerd3aaeb32011-07-18 00:40:17 -0700135static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, const void *daddr)
136{
137 return __neigh_lookup_errno(&nd_tbl, daddr, dst->dev);
138}
139
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -0800140static struct dst_ops ip6_dst_ops_template = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700141 .family = AF_INET6,
Harvey Harrison09640e62009-02-01 00:45:17 -0800142 .protocol = cpu_to_be16(ETH_P_IPV6),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700143 .gc = ip6_dst_gc,
144 .gc_thresh = 1024,
145 .check = ip6_dst_check,
David S. Miller0dbaee32010-12-13 12:52:14 -0800146 .default_advmss = ip6_default_advmss,
David S. Millerd33e4552010-12-14 13:01:14 -0800147 .default_mtu = ip6_default_mtu,
David S. Miller06582542011-01-27 14:58:42 -0800148 .cow_metrics = ipv6_cow_metrics,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700149 .destroy = ip6_dst_destroy,
150 .ifdown = ip6_dst_ifdown,
151 .negative_advice = ip6_negative_advice,
152 .link_failure = ip6_link_failure,
153 .update_pmtu = ip6_rt_update_pmtu,
Herbert Xu1ac06e02008-05-20 14:32:14 -0700154 .local_out = __ip6_local_out,
David S. Millerd3aaeb32011-07-18 00:40:17 -0700155 .neigh_lookup = ip6_neigh_lookup,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700156};
157
Roland Dreierec831ea2011-01-31 13:16:00 -0800158static unsigned int ip6_blackhole_default_mtu(const struct dst_entry *dst)
159{
160 return 0;
161}
162
David S. Miller14e50e52007-05-24 18:17:54 -0700163static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
164{
165}
166
Held Bernhard0972ddb2011-04-24 22:07:32 +0000167static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
168 unsigned long old)
169{
170 return NULL;
171}
172
David S. Miller14e50e52007-05-24 18:17:54 -0700173static struct dst_ops ip6_dst_blackhole_ops = {
174 .family = AF_INET6,
Harvey Harrison09640e62009-02-01 00:45:17 -0800175 .protocol = cpu_to_be16(ETH_P_IPV6),
David S. Miller14e50e52007-05-24 18:17:54 -0700176 .destroy = ip6_dst_destroy,
177 .check = ip6_dst_check,
Roland Dreierec831ea2011-01-31 13:16:00 -0800178 .default_mtu = ip6_blackhole_default_mtu,
Eric Dumazet214f45c2011-02-18 11:39:01 -0800179 .default_advmss = ip6_default_advmss,
David S. Miller14e50e52007-05-24 18:17:54 -0700180 .update_pmtu = ip6_rt_blackhole_update_pmtu,
Held Bernhard0972ddb2011-04-24 22:07:32 +0000181 .cow_metrics = ip6_rt_blackhole_cow_metrics,
David S. Millerd3aaeb32011-07-18 00:40:17 -0700182 .neigh_lookup = ip6_neigh_lookup,
David S. Miller14e50e52007-05-24 18:17:54 -0700183};
184
David S. Miller62fa8a82011-01-26 20:51:05 -0800185static const u32 ip6_template_metrics[RTAX_MAX] = {
186 [RTAX_HOPLIMIT - 1] = 255,
187};
188
Daniel Lezcanobdb32892008-03-04 13:48:10 -0800189static struct rt6_info ip6_null_entry_template = {
Changli Gaod8d1f302010-06-10 23:31:35 -0700190 .dst = {
191 .__refcnt = ATOMIC_INIT(1),
192 .__use = 1,
193 .obsolete = -1,
194 .error = -ENETUNREACH,
Changli Gaod8d1f302010-06-10 23:31:35 -0700195 .input = ip6_pkt_discard,
196 .output = ip6_pkt_discard_out,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700197 },
198 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
Jean-Mickael Guerin4f724272009-05-20 17:38:59 -0700199 .rt6i_protocol = RTPROT_KERNEL,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700200 .rt6i_metric = ~(u32) 0,
201 .rt6i_ref = ATOMIC_INIT(1),
202};
203
Thomas Graf101367c2006-08-04 03:39:02 -0700204#ifdef CONFIG_IPV6_MULTIPLE_TABLES
205
David S. Miller6723ab52006-10-18 21:20:57 -0700206static int ip6_pkt_prohibit(struct sk_buff *skb);
207static int ip6_pkt_prohibit_out(struct sk_buff *skb);
David S. Miller6723ab52006-10-18 21:20:57 -0700208
Adrian Bunk280a34c2008-04-21 02:29:32 -0700209static struct rt6_info ip6_prohibit_entry_template = {
Changli Gaod8d1f302010-06-10 23:31:35 -0700210 .dst = {
211 .__refcnt = ATOMIC_INIT(1),
212 .__use = 1,
213 .obsolete = -1,
214 .error = -EACCES,
Changli Gaod8d1f302010-06-10 23:31:35 -0700215 .input = ip6_pkt_prohibit,
216 .output = ip6_pkt_prohibit_out,
Thomas Graf101367c2006-08-04 03:39:02 -0700217 },
218 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
Jean-Mickael Guerin4f724272009-05-20 17:38:59 -0700219 .rt6i_protocol = RTPROT_KERNEL,
Thomas Graf101367c2006-08-04 03:39:02 -0700220 .rt6i_metric = ~(u32) 0,
221 .rt6i_ref = ATOMIC_INIT(1),
222};
223
Daniel Lezcanobdb32892008-03-04 13:48:10 -0800224static struct rt6_info ip6_blk_hole_entry_template = {
Changli Gaod8d1f302010-06-10 23:31:35 -0700225 .dst = {
226 .__refcnt = ATOMIC_INIT(1),
227 .__use = 1,
228 .obsolete = -1,
229 .error = -EINVAL,
Changli Gaod8d1f302010-06-10 23:31:35 -0700230 .input = dst_discard,
231 .output = dst_discard,
Thomas Graf101367c2006-08-04 03:39:02 -0700232 },
233 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
Jean-Mickael Guerin4f724272009-05-20 17:38:59 -0700234 .rt6i_protocol = RTPROT_KERNEL,
Thomas Graf101367c2006-08-04 03:39:02 -0700235 .rt6i_metric = ~(u32) 0,
236 .rt6i_ref = ATOMIC_INIT(1),
237};
238
239#endif
240
Linus Torvalds1da177e2005-04-16 15:20:36 -0700241/* allocate dst with ip6_dst_ops */
David S. Miller5c1e6aa2011-04-28 14:13:38 -0700242static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops,
David S. Miller957c6652011-06-24 15:25:00 -0700243 struct net_device *dev,
244 int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700245{
David S. Miller957c6652011-06-24 15:25:00 -0700246 struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, flags);
David S. Millercf911662011-04-28 14:31:47 -0700247
Madalin Bucurfbe58182011-09-26 07:04:56 +0000248 if (rt != NULL)
249 memset(&rt->rt6i_table, 0,
250 sizeof(*rt) - sizeof(struct dst_entry));
David S. Millercf911662011-04-28 14:31:47 -0700251
252 return rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700253}
254
255static void ip6_dst_destroy(struct dst_entry *dst)
256{
257 struct rt6_info *rt = (struct rt6_info *)dst;
258 struct inet6_dev *idev = rt->rt6i_idev;
David S. Millerb3419362010-11-30 12:27:11 -0800259 struct inet_peer *peer = rt->rt6i_peer;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700260
Yan, Zheng8e2ec632011-09-05 21:34:30 +0000261 if (!(rt->dst.flags & DST_HOST))
262 dst_destroy_metrics_generic(dst);
263
Linus Torvalds1da177e2005-04-16 15:20:36 -0700264 if (idev != NULL) {
265 rt->rt6i_idev = NULL;
266 in6_dev_put(idev);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900267 }
David S. Millerb3419362010-11-30 12:27:11 -0800268 if (peer) {
David S. Millerb3419362010-11-30 12:27:11 -0800269 rt->rt6i_peer = NULL;
270 inet_putpeer(peer);
271 }
272}
273
David S. Miller6431cbc2011-02-07 20:38:06 -0800274static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
275
276static u32 rt6_peer_genid(void)
277{
278 return atomic_read(&__rt6_peer_genid);
279}
280
David S. Millerb3419362010-11-30 12:27:11 -0800281void rt6_bind_peer(struct rt6_info *rt, int create)
282{
283 struct inet_peer *peer;
284
David S. Millerb3419362010-11-30 12:27:11 -0800285 peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
286 if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
287 inet_putpeer(peer);
David S. Miller6431cbc2011-02-07 20:38:06 -0800288 else
289 rt->rt6i_peer_genid = rt6_peer_genid();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700290}
291
292static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
293 int how)
294{
295 struct rt6_info *rt = (struct rt6_info *)dst;
296 struct inet6_dev *idev = rt->rt6i_idev;
Denis V. Lunev5a3e55d2007-12-07 00:38:10 -0800297 struct net_device *loopback_dev =
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900298 dev_net(dev)->loopback_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700299
Denis V. Lunev5a3e55d2007-12-07 00:38:10 -0800300 if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
301 struct inet6_dev *loopback_idev =
302 in6_dev_get(loopback_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700303 if (loopback_idev != NULL) {
304 rt->rt6i_idev = loopback_idev;
305 in6_dev_put(idev);
306 }
307 }
308}
309
310static __inline__ int rt6_check_expired(const struct rt6_info *rt)
311{
Eric Dumazeta02cec22010-09-22 20:43:57 +0000312 return (rt->rt6i_flags & RTF_EXPIRES) &&
313 time_after(jiffies, rt->rt6i_expires);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700314}
315
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000316static inline int rt6_need_strict(const struct in6_addr *daddr)
Thomas Grafc71099a2006-08-04 23:20:06 -0700317{
Eric Dumazeta02cec22010-09-22 20:43:57 +0000318 return ipv6_addr_type(daddr) &
319 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
Thomas Grafc71099a2006-08-04 23:20:06 -0700320}
321
Linus Torvalds1da177e2005-04-16 15:20:36 -0700322/*
Thomas Grafc71099a2006-08-04 23:20:06 -0700323 * Route lookup. Any table->tb6_lock is implied.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700324 */
325
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800326static inline struct rt6_info *rt6_device_match(struct net *net,
327 struct rt6_info *rt,
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000328 const struct in6_addr *saddr,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700329 int oif,
YOSHIFUJI Hideakid4208952008-06-27 20:14:54 -0700330 int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700331{
332 struct rt6_info *local = NULL;
333 struct rt6_info *sprt;
334
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900335 if (!oif && ipv6_addr_any(saddr))
336 goto out;
337
Changli Gaod8d1f302010-06-10 23:31:35 -0700338 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900339 struct net_device *dev = sprt->rt6i_dev;
340
341 if (oif) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700342 if (dev->ifindex == oif)
343 return sprt;
344 if (dev->flags & IFF_LOOPBACK) {
345 if (sprt->rt6i_idev == NULL ||
346 sprt->rt6i_idev->dev->ifindex != oif) {
YOSHIFUJI Hideakid4208952008-06-27 20:14:54 -0700347 if (flags & RT6_LOOKUP_F_IFACE && oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700348 continue;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900349 if (local && (!oif ||
Linus Torvalds1da177e2005-04-16 15:20:36 -0700350 local->rt6i_idev->dev->ifindex == oif))
351 continue;
352 }
353 local = sprt;
354 }
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900355 } else {
356 if (ipv6_chk_addr(net, saddr, dev,
357 flags & RT6_LOOKUP_F_IFACE))
358 return sprt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700359 }
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900360 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700361
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900362 if (oif) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700363 if (local)
364 return local;
365
YOSHIFUJI Hideakid4208952008-06-27 20:14:54 -0700366 if (flags & RT6_LOOKUP_F_IFACE)
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800367 return net->ipv6.ip6_null_entry;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700368 }
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900369out:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700370 return rt;
371}
372
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800373#ifdef CONFIG_IPV6_ROUTER_PREF
374static void rt6_probe(struct rt6_info *rt)
375{
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000376 struct neighbour *neigh;
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800377 /*
378 * Okay, this does not seem to be appropriate
379 * for now, however, we need to check if it
380 * is really so; aka Router Reachability Probing.
381 *
382 * Router Reachability Probe MUST be rate-limited
383 * to no more than one per minute.
384 */
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000385 rcu_read_lock();
386 neigh = rt ? dst_get_neighbour(&rt->dst) : NULL;
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800387 if (!neigh || (neigh->nud_state & NUD_VALID))
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000388 goto out;
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800389 read_lock_bh(&neigh->lock);
390 if (!(neigh->nud_state & NUD_VALID) &&
YOSHIFUJI Hideaki52e16352006-03-20 17:05:47 -0800391 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800392 struct in6_addr mcaddr;
393 struct in6_addr *target;
394
395 neigh->updated = jiffies;
396 read_unlock_bh(&neigh->lock);
397
398 target = (struct in6_addr *)&neigh->primary_key;
399 addrconf_addr_solict_mult(target, &mcaddr);
400 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000401 } else {
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800402 read_unlock_bh(&neigh->lock);
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000403 }
404out:
405 rcu_read_unlock();
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800406}
407#else
408static inline void rt6_probe(struct rt6_info *rt)
409{
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800410}
411#endif
412
Linus Torvalds1da177e2005-04-16 15:20:36 -0700413/*
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800414 * Default Router Selection (RFC 2461 6.3.6)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700415 */
Dave Jonesb6f99a22007-03-22 12:27:49 -0700416static inline int rt6_check_dev(struct rt6_info *rt, int oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700417{
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800418 struct net_device *dev = rt->rt6i_dev;
David S. Miller161980f2007-04-06 11:42:27 -0700419 if (!oif || dev->ifindex == oif)
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800420 return 2;
David S. Miller161980f2007-04-06 11:42:27 -0700421 if ((dev->flags & IFF_LOOPBACK) &&
422 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
423 return 1;
424 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700425}
426
Dave Jonesb6f99a22007-03-22 12:27:49 -0700427static inline int rt6_check_neigh(struct rt6_info *rt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700428{
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000429 struct neighbour *neigh;
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800430 int m;
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000431
432 rcu_read_lock();
433 neigh = dst_get_neighbour(&rt->dst);
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700434 if (rt->rt6i_flags & RTF_NONEXTHOP ||
435 !(rt->rt6i_flags & RTF_GATEWAY))
436 m = 1;
437 else if (neigh) {
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800438 read_lock_bh(&neigh->lock);
439 if (neigh->nud_state & NUD_VALID)
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700440 m = 2;
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800441#ifdef CONFIG_IPV6_ROUTER_PREF
442 else if (neigh->nud_state & NUD_FAILED)
443 m = 0;
444#endif
445 else
YOSHIFUJI Hideakiea73ee22006-11-06 09:45:44 -0800446 m = 1;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800447 read_unlock_bh(&neigh->lock);
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800448 } else
449 m = 0;
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000450 rcu_read_unlock();
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800451 return m;
452}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700453
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800454static int rt6_score_route(struct rt6_info *rt, int oif,
455 int strict)
456{
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700457 int m, n;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900458
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700459 m = rt6_check_dev(rt, oif);
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700460 if (!m && (strict & RT6_LOOKUP_F_IFACE))
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800461 return -1;
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -0800462#ifdef CONFIG_IPV6_ROUTER_PREF
463 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
464#endif
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700465 n = rt6_check_neigh(rt);
YOSHIFUJI Hideaki557e92e2006-11-06 09:45:45 -0800466 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800467 return -1;
468 return m;
469}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700470
David S. Millerf11e6652007-03-24 20:36:25 -0700471static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
472 int *mpri, struct rt6_info *match)
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800473{
David S. Millerf11e6652007-03-24 20:36:25 -0700474 int m;
475
476 if (rt6_check_expired(rt))
477 goto out;
478
479 m = rt6_score_route(rt, oif, strict);
480 if (m < 0)
481 goto out;
482
483 if (m > *mpri) {
484 if (strict & RT6_LOOKUP_F_REACHABLE)
485 rt6_probe(match);
486 *mpri = m;
487 match = rt;
488 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
489 rt6_probe(rt);
490 }
491
492out:
493 return match;
494}
495
496static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
497 struct rt6_info *rr_head,
498 u32 metric, int oif, int strict)
499{
500 struct rt6_info *rt, *match;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800501 int mpri = -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700502
David S. Millerf11e6652007-03-24 20:36:25 -0700503 match = NULL;
504 for (rt = rr_head; rt && rt->rt6i_metric == metric;
Changli Gaod8d1f302010-06-10 23:31:35 -0700505 rt = rt->dst.rt6_next)
David S. Millerf11e6652007-03-24 20:36:25 -0700506 match = find_match(rt, oif, strict, &mpri, match);
507 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
Changli Gaod8d1f302010-06-10 23:31:35 -0700508 rt = rt->dst.rt6_next)
David S. Millerf11e6652007-03-24 20:36:25 -0700509 match = find_match(rt, oif, strict, &mpri, match);
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800510
David S. Millerf11e6652007-03-24 20:36:25 -0700511 return match;
512}
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800513
David S. Millerf11e6652007-03-24 20:36:25 -0700514static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
515{
516 struct rt6_info *match, *rt0;
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800517 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700518
David S. Millerf11e6652007-03-24 20:36:25 -0700519 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
Harvey Harrison0dc47872008-03-05 20:47:47 -0800520 __func__, fn->leaf, oif);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700521
David S. Millerf11e6652007-03-24 20:36:25 -0700522 rt0 = fn->rr_ptr;
523 if (!rt0)
524 fn->rr_ptr = rt0 = fn->leaf;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700525
David S. Millerf11e6652007-03-24 20:36:25 -0700526 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700527
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800528 if (!match &&
David S. Millerf11e6652007-03-24 20:36:25 -0700529 (strict & RT6_LOOKUP_F_REACHABLE)) {
Changli Gaod8d1f302010-06-10 23:31:35 -0700530 struct rt6_info *next = rt0->dst.rt6_next;
David S. Millerf11e6652007-03-24 20:36:25 -0700531
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800532 /* no entries matched; do round-robin */
David S. Millerf11e6652007-03-24 20:36:25 -0700533 if (!next || next->rt6i_metric != rt0->rt6i_metric)
534 next = fn->leaf;
535
536 if (next != rt0)
537 fn->rr_ptr = next;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700538 }
539
David S. Millerf11e6652007-03-24 20:36:25 -0700540 RT6_TRACE("%s() => %p\n",
Harvey Harrison0dc47872008-03-05 20:47:47 -0800541 __func__, match);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700542
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900543 net = dev_net(rt0->rt6i_dev);
Eric Dumazeta02cec22010-09-22 20:43:57 +0000544 return match ? match : net->ipv6.ip6_null_entry;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700545}
546
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800547#ifdef CONFIG_IPV6_ROUTE_INFO
548int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000549 const struct in6_addr *gwaddr)
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800550{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900551 struct net *net = dev_net(dev);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800552 struct route_info *rinfo = (struct route_info *) opt;
553 struct in6_addr prefix_buf, *prefix;
554 unsigned int pref;
YOSHIFUJI Hideaki4bed72e2008-05-27 17:37:49 +0900555 unsigned long lifetime;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800556 struct rt6_info *rt;
557
558 if (len < sizeof(struct route_info)) {
559 return -EINVAL;
560 }
561
562 /* Sanity check for prefix_len and length */
563 if (rinfo->length > 3) {
564 return -EINVAL;
565 } else if (rinfo->prefix_len > 128) {
566 return -EINVAL;
567 } else if (rinfo->prefix_len > 64) {
568 if (rinfo->length < 2) {
569 return -EINVAL;
570 }
571 } else if (rinfo->prefix_len > 0) {
572 if (rinfo->length < 1) {
573 return -EINVAL;
574 }
575 }
576
577 pref = rinfo->route_pref;
578 if (pref == ICMPV6_ROUTER_PREF_INVALID)
Jens Rosenboom3933fc92009-09-10 06:25:11 +0000579 return -EINVAL;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800580
YOSHIFUJI Hideaki4bed72e2008-05-27 17:37:49 +0900581 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800582
583 if (rinfo->length == 3)
584 prefix = (struct in6_addr *)rinfo->prefix;
585 else {
586 /* this function is safe */
587 ipv6_addr_prefix(&prefix_buf,
588 (struct in6_addr *)rinfo->prefix,
589 rinfo->prefix_len);
590 prefix = &prefix_buf;
591 }
592
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -0800593 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
594 dev->ifindex);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800595
596 if (rt && !lifetime) {
Thomas Grafe0a1ad732006-08-22 00:00:21 -0700597 ip6_del_rt(rt);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800598 rt = NULL;
599 }
600
601 if (!rt && lifetime)
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -0800602 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800603 pref);
604 else if (rt)
605 rt->rt6i_flags = RTF_ROUTEINFO |
606 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
607
608 if (rt) {
YOSHIFUJI Hideaki4bed72e2008-05-27 17:37:49 +0900609 if (!addrconf_finite_timeout(lifetime)) {
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800610 rt->rt6i_flags &= ~RTF_EXPIRES;
611 } else {
612 rt->rt6i_expires = jiffies + HZ * lifetime;
613 rt->rt6i_flags |= RTF_EXPIRES;
614 }
Changli Gaod8d1f302010-06-10 23:31:35 -0700615 dst_release(&rt->dst);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800616 }
617 return 0;
618}
619#endif
620
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800621#define BACKTRACK(__net, saddr) \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700622do { \
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800623 if (rt == __net->ipv6.ip6_null_entry) { \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700624 struct fib6_node *pn; \
Ville Nuorvalae0eda7b2006-10-16 22:11:11 -0700625 while (1) { \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700626 if (fn->fn_flags & RTN_TL_ROOT) \
627 goto out; \
628 pn = fn->parent; \
629 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
Kim Nordlund8bce65b2006-12-13 16:38:29 -0800630 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700631 else \
632 fn = pn; \
633 if (fn->fn_flags & RTN_RTINFO) \
634 goto restart; \
Thomas Grafc71099a2006-08-04 23:20:06 -0700635 } \
Thomas Grafc71099a2006-08-04 23:20:06 -0700636 } \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700637} while(0)
Thomas Grafc71099a2006-08-04 23:20:06 -0700638
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800639static struct rt6_info *ip6_pol_route_lookup(struct net *net,
640 struct fib6_table *table,
David S. Miller4c9483b2011-03-12 16:22:43 -0500641 struct flowi6 *fl6, int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700642{
643 struct fib6_node *fn;
644 struct rt6_info *rt;
645
Thomas Grafc71099a2006-08-04 23:20:06 -0700646 read_lock_bh(&table->tb6_lock);
David S. Miller4c9483b2011-03-12 16:22:43 -0500647 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
Thomas Grafc71099a2006-08-04 23:20:06 -0700648restart:
649 rt = fn->leaf;
David S. Miller4c9483b2011-03-12 16:22:43 -0500650 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
651 BACKTRACK(net, &fl6->saddr);
Thomas Grafc71099a2006-08-04 23:20:06 -0700652out:
Changli Gaod8d1f302010-06-10 23:31:35 -0700653 dst_use(&rt->dst, jiffies);
Thomas Grafc71099a2006-08-04 23:20:06 -0700654 read_unlock_bh(&table->tb6_lock);
Thomas Grafc71099a2006-08-04 23:20:06 -0700655 return rt;
656
657}
658
YOSHIFUJI Hideaki9acd9f32008-04-10 15:42:10 +0900659struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
660 const struct in6_addr *saddr, int oif, int strict)
Thomas Grafc71099a2006-08-04 23:20:06 -0700661{
David S. Miller4c9483b2011-03-12 16:22:43 -0500662 struct flowi6 fl6 = {
663 .flowi6_oif = oif,
664 .daddr = *daddr,
Thomas Grafc71099a2006-08-04 23:20:06 -0700665 };
666 struct dst_entry *dst;
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700667 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
Thomas Grafc71099a2006-08-04 23:20:06 -0700668
Thomas Grafadaa70b2006-10-13 15:01:03 -0700669 if (saddr) {
David S. Miller4c9483b2011-03-12 16:22:43 -0500670 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
Thomas Grafadaa70b2006-10-13 15:01:03 -0700671 flags |= RT6_LOOKUP_F_HAS_SADDR;
672 }
673
David S. Miller4c9483b2011-03-12 16:22:43 -0500674 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
Thomas Grafc71099a2006-08-04 23:20:06 -0700675 if (dst->error == 0)
676 return (struct rt6_info *) dst;
677
678 dst_release(dst);
679
Linus Torvalds1da177e2005-04-16 15:20:36 -0700680 return NULL;
681}
682
YOSHIFUJI Hideaki71590392007-02-22 22:05:40 +0900683EXPORT_SYMBOL(rt6_lookup);
684
Thomas Grafc71099a2006-08-04 23:20:06 -0700685/* ip6_ins_rt is called with FREE table->tb6_lock.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700686 It takes new route entry, the addition fails by any reason the
687 route is freed. In any case, if caller does not hold it, it may
688 be destroyed.
689 */
690
Thomas Graf86872cb2006-08-22 00:01:08 -0700691static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700692{
693 int err;
Thomas Grafc71099a2006-08-04 23:20:06 -0700694 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700695
Thomas Grafc71099a2006-08-04 23:20:06 -0700696 table = rt->rt6i_table;
697 write_lock_bh(&table->tb6_lock);
Thomas Graf86872cb2006-08-22 00:01:08 -0700698 err = fib6_add(&table->tb6_root, rt, info);
Thomas Grafc71099a2006-08-04 23:20:06 -0700699 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700700
701 return err;
702}
703
Thomas Graf40e22e82006-08-22 00:00:45 -0700704int ip6_ins_rt(struct rt6_info *rt)
705{
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800706 struct nl_info info = {
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900707 .nl_net = dev_net(rt->rt6i_dev),
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800708 };
Denis V. Lunev528c4ce2007-12-13 09:45:12 -0800709 return __ip6_ins_rt(rt, &info);
Thomas Graf40e22e82006-08-22 00:00:45 -0700710}
711
Eric Dumazet21efcfa2011-07-19 20:18:36 +0000712static struct rt6_info *rt6_alloc_cow(const struct rt6_info *ort,
713 const struct in6_addr *daddr,
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000714 const struct in6_addr *saddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700715{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700716 struct rt6_info *rt;
717
718 /*
719 * Clone the route.
720 */
721
Eric Dumazet21efcfa2011-07-19 20:18:36 +0000722 rt = ip6_rt_copy(ort, daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700723
724 if (rt) {
David S. Miller14deae42009-01-04 16:04:39 -0800725 struct neighbour *neigh;
726 int attempts = !in_softirq();
727
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900728 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
729 if (rt->rt6i_dst.plen != 128 &&
Eric Dumazet21efcfa2011-07-19 20:18:36 +0000730 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900731 rt->rt6i_flags |= RTF_ANYCAST;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700732 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900733 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700734
Linus Torvalds1da177e2005-04-16 15:20:36 -0700735 rt->rt6i_flags |= RTF_CACHE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700736
737#ifdef CONFIG_IPV6_SUBTREES
738 if (rt->rt6i_src.plen && saddr) {
739 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
740 rt->rt6i_src.plen = 128;
741 }
742#endif
743
David S. Miller14deae42009-01-04 16:04:39 -0800744 retry:
745 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
746 if (IS_ERR(neigh)) {
747 struct net *net = dev_net(rt->rt6i_dev);
748 int saved_rt_min_interval =
749 net->ipv6.sysctl.ip6_rt_gc_min_interval;
750 int saved_rt_elasticity =
751 net->ipv6.sysctl.ip6_rt_gc_elasticity;
752
753 if (attempts-- > 0) {
754 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
755 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
756
Alexey Dobriyan86393e52009-08-29 01:34:49 +0000757 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
David S. Miller14deae42009-01-04 16:04:39 -0800758
759 net->ipv6.sysctl.ip6_rt_gc_elasticity =
760 saved_rt_elasticity;
761 net->ipv6.sysctl.ip6_rt_gc_min_interval =
762 saved_rt_min_interval;
763 goto retry;
764 }
765
766 if (net_ratelimit())
767 printk(KERN_WARNING
Ulrich Weber7e1b33e2010-09-27 15:02:18 -0700768 "ipv6: Neighbour table overflow.\n");
Changli Gaod8d1f302010-06-10 23:31:35 -0700769 dst_free(&rt->dst);
David S. Miller14deae42009-01-04 16:04:39 -0800770 return NULL;
771 }
David S. Miller69cce1d2011-07-17 23:09:49 -0700772 dst_set_neighbour(&rt->dst, neigh);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700773
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800774 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700775
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800776 return rt;
777}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700778
Eric Dumazet21efcfa2011-07-19 20:18:36 +0000779static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
780 const struct in6_addr *daddr)
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800781{
Eric Dumazet21efcfa2011-07-19 20:18:36 +0000782 struct rt6_info *rt = ip6_rt_copy(ort, daddr);
783
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800784 if (rt) {
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800785 rt->rt6i_flags |= RTF_CACHE;
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000786 dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_raw(&ort->dst)));
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800787 }
788 return rt;
789}
790
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800791static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
David S. Miller4c9483b2011-03-12 16:22:43 -0500792 struct flowi6 *fl6, int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700793{
794 struct fib6_node *fn;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800795 struct rt6_info *rt, *nrt;
Thomas Grafc71099a2006-08-04 23:20:06 -0700796 int strict = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700797 int attempts = 3;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800798 int err;
YOSHIFUJI Hideaki53b79972008-07-19 22:35:03 -0700799 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700800
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700801 strict |= flags & RT6_LOOKUP_F_IFACE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700802
803relookup:
Thomas Grafc71099a2006-08-04 23:20:06 -0700804 read_lock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700805
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800806restart_2:
David S. Miller4c9483b2011-03-12 16:22:43 -0500807 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700808
809restart:
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700810 rt = rt6_select(fn, oif, strict | reachable);
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800811
David S. Miller4c9483b2011-03-12 16:22:43 -0500812 BACKTRACK(net, &fl6->saddr);
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800813 if (rt == net->ipv6.ip6_null_entry ||
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800814 rt->rt6i_flags & RTF_CACHE)
YOSHIFUJI Hideaki1ddef0442006-03-20 17:01:24 -0800815 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700816
Changli Gaod8d1f302010-06-10 23:31:35 -0700817 dst_hold(&rt->dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700818 read_unlock_bh(&table->tb6_lock);
YOSHIFUJI Hideakifb9de912006-03-20 16:59:08 -0800819
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000820 if (!dst_get_neighbour_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
David S. Miller4c9483b2011-03-12 16:22:43 -0500821 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
David S. Miller7343ff32011-03-09 19:55:25 -0800822 else if (!(rt->dst.flags & DST_HOST))
David S. Miller4c9483b2011-03-12 16:22:43 -0500823 nrt = rt6_alloc_clone(rt, &fl6->daddr);
David S. Miller7343ff32011-03-09 19:55:25 -0800824 else
825 goto out2;
YOSHIFUJI Hideakie40cf352006-03-20 16:59:27 -0800826
Changli Gaod8d1f302010-06-10 23:31:35 -0700827 dst_release(&rt->dst);
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800828 rt = nrt ? : net->ipv6.ip6_null_entry;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800829
Changli Gaod8d1f302010-06-10 23:31:35 -0700830 dst_hold(&rt->dst);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800831 if (nrt) {
Thomas Graf40e22e82006-08-22 00:00:45 -0700832 err = ip6_ins_rt(nrt);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800833 if (!err)
834 goto out2;
835 }
836
837 if (--attempts <= 0)
838 goto out2;
839
840 /*
Thomas Grafc71099a2006-08-04 23:20:06 -0700841 * Race condition! In the gap, when table->tb6_lock was
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800842 * released someone could insert this route. Relookup.
843 */
Changli Gaod8d1f302010-06-10 23:31:35 -0700844 dst_release(&rt->dst);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800845 goto relookup;
846
847out:
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800848 if (reachable) {
849 reachable = 0;
850 goto restart_2;
851 }
Changli Gaod8d1f302010-06-10 23:31:35 -0700852 dst_hold(&rt->dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700853 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700854out2:
Changli Gaod8d1f302010-06-10 23:31:35 -0700855 rt->dst.lastuse = jiffies;
856 rt->dst.__use++;
Thomas Grafc71099a2006-08-04 23:20:06 -0700857
858 return rt;
859}
860
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800861static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
David S. Miller4c9483b2011-03-12 16:22:43 -0500862 struct flowi6 *fl6, int flags)
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700863{
David S. Miller4c9483b2011-03-12 16:22:43 -0500864 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700865}
866
Thomas Grafc71099a2006-08-04 23:20:06 -0700867void ip6_route_input(struct sk_buff *skb)
868{
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000869 const struct ipv6hdr *iph = ipv6_hdr(skb);
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900870 struct net *net = dev_net(skb->dev);
Thomas Grafadaa70b2006-10-13 15:01:03 -0700871 int flags = RT6_LOOKUP_F_HAS_SADDR;
David S. Miller4c9483b2011-03-12 16:22:43 -0500872 struct flowi6 fl6 = {
873 .flowi6_iif = skb->dev->ifindex,
874 .daddr = iph->daddr,
875 .saddr = iph->saddr,
876 .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
877 .flowi6_mark = skb->mark,
878 .flowi6_proto = iph->nexthdr,
Thomas Grafc71099a2006-08-04 23:20:06 -0700879 };
Thomas Grafadaa70b2006-10-13 15:01:03 -0700880
Thomas Goff1d6e55f2009-01-27 22:39:59 -0800881 if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
Thomas Grafadaa70b2006-10-13 15:01:03 -0700882 flags |= RT6_LOOKUP_F_IFACE;
Thomas Grafc71099a2006-08-04 23:20:06 -0700883
David S. Miller4c9483b2011-03-12 16:22:43 -0500884 skb_dst_set(skb, fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_input));
Thomas Grafc71099a2006-08-04 23:20:06 -0700885}
886
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800887static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
David S. Miller4c9483b2011-03-12 16:22:43 -0500888 struct flowi6 *fl6, int flags)
Thomas Grafc71099a2006-08-04 23:20:06 -0700889{
David S. Miller4c9483b2011-03-12 16:22:43 -0500890 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
Thomas Grafc71099a2006-08-04 23:20:06 -0700891}
892
Florian Westphal9c7a4f92011-03-22 19:17:36 -0700893struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
David S. Miller4c9483b2011-03-12 16:22:43 -0500894 struct flowi6 *fl6)
Thomas Grafc71099a2006-08-04 23:20:06 -0700895{
896 int flags = 0;
897
David S. Miller4c9483b2011-03-12 16:22:43 -0500898 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700899 flags |= RT6_LOOKUP_F_IFACE;
Thomas Grafc71099a2006-08-04 23:20:06 -0700900
David S. Miller4c9483b2011-03-12 16:22:43 -0500901 if (!ipv6_addr_any(&fl6->saddr))
Thomas Grafadaa70b2006-10-13 15:01:03 -0700902 flags |= RT6_LOOKUP_F_HAS_SADDR;
YOSHIFUJI Hideaki / 吉藤英明0c9a2ac2010-03-07 00:14:44 +0000903 else if (sk)
904 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
Thomas Grafadaa70b2006-10-13 15:01:03 -0700905
David S. Miller4c9483b2011-03-12 16:22:43 -0500906 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700907}
908
YOSHIFUJI Hideaki71590392007-02-22 22:05:40 +0900909EXPORT_SYMBOL(ip6_route_output);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700910
David S. Miller2774c132011-03-01 14:59:04 -0800911struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
David S. Miller14e50e52007-05-24 18:17:54 -0700912{
David S. Miller5c1e6aa2011-04-28 14:13:38 -0700913 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
David S. Miller14e50e52007-05-24 18:17:54 -0700914 struct dst_entry *new = NULL;
915
David S. Miller5c1e6aa2011-04-28 14:13:38 -0700916 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0);
David S. Miller14e50e52007-05-24 18:17:54 -0700917 if (rt) {
David S. Millercf911662011-04-28 14:31:47 -0700918 memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
919
Changli Gaod8d1f302010-06-10 23:31:35 -0700920 new = &rt->dst;
David S. Miller14e50e52007-05-24 18:17:54 -0700921
David S. Miller14e50e52007-05-24 18:17:54 -0700922 new->__use = 1;
Herbert Xu352e5122007-11-13 21:34:06 -0800923 new->input = dst_discard;
924 new->output = dst_discard;
David S. Miller14e50e52007-05-24 18:17:54 -0700925
Eric Dumazet21efcfa2011-07-19 20:18:36 +0000926 if (dst_metrics_read_only(&ort->dst))
927 new->_metrics = ort->dst._metrics;
928 else
929 dst_copy_metrics(new, &ort->dst);
David S. Miller14e50e52007-05-24 18:17:54 -0700930 rt->rt6i_idev = ort->rt6i_idev;
931 if (rt->rt6i_idev)
932 in6_dev_hold(rt->rt6i_idev);
933 rt->rt6i_expires = 0;
934
935 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
936 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
937 rt->rt6i_metric = 0;
938
939 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
940#ifdef CONFIG_IPV6_SUBTREES
941 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
942#endif
943
944 dst_free(new);
945 }
946
David S. Miller69ead7a2011-03-01 14:45:33 -0800947 dst_release(dst_orig);
948 return new ? new : ERR_PTR(-ENOMEM);
David S. Miller14e50e52007-05-24 18:17:54 -0700949}
David S. Miller14e50e52007-05-24 18:17:54 -0700950
Linus Torvalds1da177e2005-04-16 15:20:36 -0700951/*
952 * Destination cache support functions
953 */
954
955static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
956{
957 struct rt6_info *rt;
958
959 rt = (struct rt6_info *) dst;
960
David S. Miller6431cbc2011-02-07 20:38:06 -0800961 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
962 if (rt->rt6i_peer_genid != rt6_peer_genid()) {
963 if (!rt->rt6i_peer)
964 rt6_bind_peer(rt, 0);
965 rt->rt6i_peer_genid = rt6_peer_genid();
966 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700967 return dst;
David S. Miller6431cbc2011-02-07 20:38:06 -0800968 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700969 return NULL;
970}
971
972static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
973{
974 struct rt6_info *rt = (struct rt6_info *) dst;
975
976 if (rt) {
YOSHIFUJI Hideaki / 吉藤英明54c1a852010-03-28 07:15:45 +0000977 if (rt->rt6i_flags & RTF_CACHE) {
978 if (rt6_check_expired(rt)) {
979 ip6_del_rt(rt);
980 dst = NULL;
981 }
982 } else {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700983 dst_release(dst);
YOSHIFUJI Hideaki / 吉藤英明54c1a852010-03-28 07:15:45 +0000984 dst = NULL;
985 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700986 }
YOSHIFUJI Hideaki / 吉藤英明54c1a852010-03-28 07:15:45 +0000987 return dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700988}
989
990static void ip6_link_failure(struct sk_buff *skb)
991{
992 struct rt6_info *rt;
993
Alexey Dobriyan3ffe5332010-02-18 08:25:24 +0000994 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700995
Eric Dumazetadf30902009-06-02 05:19:30 +0000996 rt = (struct rt6_info *) skb_dst(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700997 if (rt) {
998 if (rt->rt6i_flags&RTF_CACHE) {
Changli Gaod8d1f302010-06-10 23:31:35 -0700999 dst_set_expires(&rt->dst, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001000 rt->rt6i_flags |= RTF_EXPIRES;
1001 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1002 rt->rt6i_node->fn_sernum = -1;
1003 }
1004}
1005
1006static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1007{
1008 struct rt6_info *rt6 = (struct rt6_info*)dst;
1009
1010 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1011 rt6->rt6i_flags |= RTF_MODIFIED;
1012 if (mtu < IPV6_MIN_MTU) {
David S. Millerdefb3512010-12-08 21:16:57 -08001013 u32 features = dst_metric(dst, RTAX_FEATURES);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001014 mtu = IPV6_MIN_MTU;
David S. Millerdefb3512010-12-08 21:16:57 -08001015 features |= RTAX_FEATURE_ALLFRAG;
1016 dst_metric_set(dst, RTAX_FEATURES, features);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001017 }
David S. Millerdefb3512010-12-08 21:16:57 -08001018 dst_metric_set(dst, RTAX_MTU, mtu);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001019 }
1020}
1021
David S. Miller0dbaee32010-12-13 12:52:14 -08001022static unsigned int ip6_default_advmss(const struct dst_entry *dst)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001023{
David S. Miller0dbaee32010-12-13 12:52:14 -08001024 struct net_device *dev = dst->dev;
1025 unsigned int mtu = dst_mtu(dst);
1026 struct net *net = dev_net(dev);
1027
Linus Torvalds1da177e2005-04-16 15:20:36 -07001028 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1029
Daniel Lezcano55786892008-03-04 13:47:47 -08001030 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1031 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001032
1033 /*
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001034 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1035 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1036 * IPV6_MAXPLEN is also valid and means: "any MSS,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001037 * rely only on pmtu discovery"
1038 */
1039 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1040 mtu = IPV6_MAXPLEN;
1041 return mtu;
1042}
1043
David S. Millerd33e4552010-12-14 13:01:14 -08001044static unsigned int ip6_default_mtu(const struct dst_entry *dst)
1045{
1046 unsigned int mtu = IPV6_MIN_MTU;
1047 struct inet6_dev *idev;
1048
1049 rcu_read_lock();
1050 idev = __in6_dev_get(dst->dev);
1051 if (idev)
1052 mtu = idev->cnf.mtu6;
1053 rcu_read_unlock();
1054
1055 return mtu;
1056}
1057
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001058static struct dst_entry *icmp6_dst_gc_list;
1059static DEFINE_SPINLOCK(icmp6_dst_lock);
Thomas Graf5d0bbee2006-08-04 03:37:36 -07001060
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001061struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001062 struct neighbour *neigh,
YOSHIFUJI Hideaki9acd9f32008-04-10 15:42:10 +09001063 const struct in6_addr *addr)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001064{
1065 struct rt6_info *rt;
1066 struct inet6_dev *idev = in6_dev_get(dev);
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001067 struct net *net = dev_net(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001068
1069 if (unlikely(idev == NULL))
1070 return NULL;
1071
David S. Miller957c6652011-06-24 15:25:00 -07001072 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001073 if (unlikely(rt == NULL)) {
1074 in6_dev_put(idev);
1075 goto out;
1076 }
1077
Linus Torvalds1da177e2005-04-16 15:20:36 -07001078 if (neigh)
1079 neigh_hold(neigh);
David S. Miller14deae42009-01-04 16:04:39 -08001080 else {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001081 neigh = ndisc_get_neigh(dev, addr);
David S. Miller14deae42009-01-04 16:04:39 -08001082 if (IS_ERR(neigh))
1083 neigh = NULL;
1084 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001085
Yan, Zheng8e2ec632011-09-05 21:34:30 +00001086 rt->dst.flags |= DST_HOST;
1087 rt->dst.output = ip6_output;
David S. Miller69cce1d2011-07-17 23:09:49 -07001088 dst_set_neighbour(&rt->dst, neigh);
Changli Gaod8d1f302010-06-10 23:31:35 -07001089 atomic_set(&rt->dst.__refcnt, 1);
Yan, Zheng8e2ec632011-09-05 21:34:30 +00001090 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1091 rt->rt6i_dst.plen = 128;
1092 rt->rt6i_idev = idev;
Gao feng70116872011-10-28 02:46:57 +00001093 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001094
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001095 spin_lock_bh(&icmp6_dst_lock);
Changli Gaod8d1f302010-06-10 23:31:35 -07001096 rt->dst.next = icmp6_dst_gc_list;
1097 icmp6_dst_gc_list = &rt->dst;
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001098 spin_unlock_bh(&icmp6_dst_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001099
Daniel Lezcano55786892008-03-04 13:47:47 -08001100 fib6_force_start_gc(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001101
1102out:
Changli Gaod8d1f302010-06-10 23:31:35 -07001103 return &rt->dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001104}
1105
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -07001106int icmp6_dst_gc(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001107{
Hagen Paul Pfeifere9476e92011-02-25 05:45:19 +00001108 struct dst_entry *dst, **pprev;
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -07001109 int more = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001110
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001111 spin_lock_bh(&icmp6_dst_lock);
1112 pprev = &icmp6_dst_gc_list;
Thomas Graf5d0bbee2006-08-04 03:37:36 -07001113
Linus Torvalds1da177e2005-04-16 15:20:36 -07001114 while ((dst = *pprev) != NULL) {
1115 if (!atomic_read(&dst->__refcnt)) {
1116 *pprev = dst->next;
1117 dst_free(dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001118 } else {
1119 pprev = &dst->next;
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -07001120 ++more;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001121 }
1122 }
1123
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001124 spin_unlock_bh(&icmp6_dst_lock);
Thomas Graf5d0bbee2006-08-04 03:37:36 -07001125
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -07001126 return more;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001127}
1128
David S. Miller1e493d12008-09-10 17:27:15 -07001129static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1130 void *arg)
1131{
1132 struct dst_entry *dst, **pprev;
1133
1134 spin_lock_bh(&icmp6_dst_lock);
1135 pprev = &icmp6_dst_gc_list;
1136 while ((dst = *pprev) != NULL) {
1137 struct rt6_info *rt = (struct rt6_info *) dst;
1138 if (func(rt, arg)) {
1139 *pprev = dst->next;
1140 dst_free(dst);
1141 } else {
1142 pprev = &dst->next;
1143 }
1144 }
1145 spin_unlock_bh(&icmp6_dst_lock);
1146}
1147
Daniel Lezcano569d3642008-01-18 03:56:57 -08001148static int ip6_dst_gc(struct dst_ops *ops)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001149{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001150 unsigned long now = jiffies;
Alexey Dobriyan86393e52009-08-29 01:34:49 +00001151 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
Daniel Lezcano7019b782008-03-04 13:50:14 -08001152 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1153 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1154 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1155 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1156 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
Eric Dumazetfc66f952010-10-08 06:37:34 +00001157 int entries;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001158
Eric Dumazetfc66f952010-10-08 06:37:34 +00001159 entries = dst_entries_get_fast(ops);
Daniel Lezcano7019b782008-03-04 13:50:14 -08001160 if (time_after(rt_last_gc + rt_min_interval, now) &&
Eric Dumazetfc66f952010-10-08 06:37:34 +00001161 entries <= rt_max_size)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001162 goto out;
1163
Benjamin Thery6891a342008-03-04 13:49:47 -08001164 net->ipv6.ip6_rt_gc_expire++;
1165 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1166 net->ipv6.ip6_rt_last_gc = now;
Eric Dumazetfc66f952010-10-08 06:37:34 +00001167 entries = dst_entries_get_slow(ops);
1168 if (entries < ops->gc_thresh)
Daniel Lezcano7019b782008-03-04 13:50:14 -08001169 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001170out:
Daniel Lezcano7019b782008-03-04 13:50:14 -08001171 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
Eric Dumazetfc66f952010-10-08 06:37:34 +00001172 return entries > rt_max_size;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001173}
1174
1175/* Clean host part of a prefix. Not necessary in radix tree,
1176 but results in cleaner routing tables.
1177
1178 Remove it only when all the things will work!
1179 */
1180
YOSHIFUJI Hideaki6b75d092008-03-10 06:00:30 -04001181int ip6_dst_hoplimit(struct dst_entry *dst)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001182{
David S. Miller5170ae82010-12-12 21:35:57 -08001183 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
David S. Millera02e4b72010-12-12 21:39:02 -08001184 if (hoplimit == 0) {
YOSHIFUJI Hideaki6b75d092008-03-10 06:00:30 -04001185 struct net_device *dev = dst->dev;
Eric Dumazetc68f24c2010-06-14 04:46:20 +00001186 struct inet6_dev *idev;
1187
1188 rcu_read_lock();
1189 idev = __in6_dev_get(dev);
1190 if (idev)
YOSHIFUJI Hideaki6b75d092008-03-10 06:00:30 -04001191 hoplimit = idev->cnf.hop_limit;
Eric Dumazetc68f24c2010-06-14 04:46:20 +00001192 else
YOSHIFUJI Hideaki53b79972008-07-19 22:35:03 -07001193 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
Eric Dumazetc68f24c2010-06-14 04:46:20 +00001194 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001195 }
1196 return hoplimit;
1197}
David S. Millerabbf46a2010-12-12 21:14:46 -08001198EXPORT_SYMBOL(ip6_dst_hoplimit);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001199
1200/*
1201 *
1202 */
1203
Thomas Graf86872cb2006-08-22 00:01:08 -07001204int ip6_route_add(struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001205{
1206 int err;
Daniel Lezcano55786892008-03-04 13:47:47 -08001207 struct net *net = cfg->fc_nlinfo.nl_net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001208 struct rt6_info *rt = NULL;
1209 struct net_device *dev = NULL;
1210 struct inet6_dev *idev = NULL;
Thomas Grafc71099a2006-08-04 23:20:06 -07001211 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001212 int addr_type;
1213
Thomas Graf86872cb2006-08-22 00:01:08 -07001214 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001215 return -EINVAL;
1216#ifndef CONFIG_IPV6_SUBTREES
Thomas Graf86872cb2006-08-22 00:01:08 -07001217 if (cfg->fc_src_len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001218 return -EINVAL;
1219#endif
Thomas Graf86872cb2006-08-22 00:01:08 -07001220 if (cfg->fc_ifindex) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001221 err = -ENODEV;
Daniel Lezcano55786892008-03-04 13:47:47 -08001222 dev = dev_get_by_index(net, cfg->fc_ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001223 if (!dev)
1224 goto out;
1225 idev = in6_dev_get(dev);
1226 if (!idev)
1227 goto out;
1228 }
1229
Thomas Graf86872cb2006-08-22 00:01:08 -07001230 if (cfg->fc_metric == 0)
1231 cfg->fc_metric = IP6_RT_PRIO_USER;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001232
Daniel Lezcano55786892008-03-04 13:47:47 -08001233 table = fib6_new_table(net, cfg->fc_table);
Thomas Grafc71099a2006-08-04 23:20:06 -07001234 if (table == NULL) {
1235 err = -ENOBUFS;
1236 goto out;
1237 }
1238
David S. Miller957c6652011-06-24 15:25:00 -07001239 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL, DST_NOCOUNT);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001240
1241 if (rt == NULL) {
1242 err = -ENOMEM;
1243 goto out;
1244 }
1245
Changli Gaod8d1f302010-06-10 23:31:35 -07001246 rt->dst.obsolete = -1;
YOSHIFUJI Hideaki6f704992008-05-19 16:56:11 -07001247 rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
1248 jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1249 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001250
Thomas Graf86872cb2006-08-22 00:01:08 -07001251 if (cfg->fc_protocol == RTPROT_UNSPEC)
1252 cfg->fc_protocol = RTPROT_BOOT;
1253 rt->rt6i_protocol = cfg->fc_protocol;
1254
1255 addr_type = ipv6_addr_type(&cfg->fc_dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001256
1257 if (addr_type & IPV6_ADDR_MULTICAST)
Changli Gaod8d1f302010-06-10 23:31:35 -07001258 rt->dst.input = ip6_mc_input;
Maciej Żenczykowskiab79ad12010-09-27 00:07:02 +00001259 else if (cfg->fc_flags & RTF_LOCAL)
1260 rt->dst.input = ip6_input;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001261 else
Changli Gaod8d1f302010-06-10 23:31:35 -07001262 rt->dst.input = ip6_forward;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001263
Changli Gaod8d1f302010-06-10 23:31:35 -07001264 rt->dst.output = ip6_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001265
Thomas Graf86872cb2006-08-22 00:01:08 -07001266 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1267 rt->rt6i_dst.plen = cfg->fc_dst_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001268 if (rt->rt6i_dst.plen == 128)
David S. Miller11d53b42011-06-24 15:23:34 -07001269 rt->dst.flags |= DST_HOST;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001270
Yan, Zheng8e2ec632011-09-05 21:34:30 +00001271 if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1272 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1273 if (!metrics) {
1274 err = -ENOMEM;
1275 goto out;
1276 }
1277 dst_init_metrics(&rt->dst, metrics, 0);
1278 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001279#ifdef CONFIG_IPV6_SUBTREES
Thomas Graf86872cb2006-08-22 00:01:08 -07001280 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1281 rt->rt6i_src.plen = cfg->fc_src_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001282#endif
1283
Thomas Graf86872cb2006-08-22 00:01:08 -07001284 rt->rt6i_metric = cfg->fc_metric;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001285
1286 /* We cannot add true routes via loopback here,
1287 they would result in kernel looping; promote them to reject routes
1288 */
Thomas Graf86872cb2006-08-22 00:01:08 -07001289 if ((cfg->fc_flags & RTF_REJECT) ||
Maciej Żenczykowskiab79ad12010-09-27 00:07:02 +00001290 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK)
1291 && !(cfg->fc_flags&RTF_LOCAL))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001292 /* hold loopback dev/idev if we haven't done so. */
Daniel Lezcano55786892008-03-04 13:47:47 -08001293 if (dev != net->loopback_dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001294 if (dev) {
1295 dev_put(dev);
1296 in6_dev_put(idev);
1297 }
Daniel Lezcano55786892008-03-04 13:47:47 -08001298 dev = net->loopback_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001299 dev_hold(dev);
1300 idev = in6_dev_get(dev);
1301 if (!idev) {
1302 err = -ENODEV;
1303 goto out;
1304 }
1305 }
Changli Gaod8d1f302010-06-10 23:31:35 -07001306 rt->dst.output = ip6_pkt_discard_out;
1307 rt->dst.input = ip6_pkt_discard;
1308 rt->dst.error = -ENETUNREACH;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001309 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1310 goto install_route;
1311 }
1312
Thomas Graf86872cb2006-08-22 00:01:08 -07001313 if (cfg->fc_flags & RTF_GATEWAY) {
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001314 const struct in6_addr *gw_addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001315 int gwa_type;
1316
Thomas Graf86872cb2006-08-22 00:01:08 -07001317 gw_addr = &cfg->fc_gateway;
1318 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001319 gwa_type = ipv6_addr_type(gw_addr);
1320
1321 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1322 struct rt6_info *grt;
1323
1324 /* IPv6 strictly inhibits using not link-local
1325 addresses as nexthop address.
1326 Otherwise, router will not able to send redirects.
1327 It is very good, but in some (rare!) circumstances
1328 (SIT, PtP, NBMA NOARP links) it is handy to allow
1329 some exceptions. --ANK
1330 */
1331 err = -EINVAL;
1332 if (!(gwa_type&IPV6_ADDR_UNICAST))
1333 goto out;
1334
Daniel Lezcano55786892008-03-04 13:47:47 -08001335 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001336
1337 err = -EHOSTUNREACH;
1338 if (grt == NULL)
1339 goto out;
1340 if (dev) {
1341 if (dev != grt->rt6i_dev) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001342 dst_release(&grt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001343 goto out;
1344 }
1345 } else {
1346 dev = grt->rt6i_dev;
1347 idev = grt->rt6i_idev;
1348 dev_hold(dev);
1349 in6_dev_hold(grt->rt6i_idev);
1350 }
1351 if (!(grt->rt6i_flags&RTF_GATEWAY))
1352 err = 0;
Changli Gaod8d1f302010-06-10 23:31:35 -07001353 dst_release(&grt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001354
1355 if (err)
1356 goto out;
1357 }
1358 err = -EINVAL;
1359 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1360 goto out;
1361 }
1362
1363 err = -ENODEV;
1364 if (dev == NULL)
1365 goto out;
1366
Daniel Walterc3968a82011-04-13 21:10:57 +00001367 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1368 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1369 err = -EINVAL;
1370 goto out;
1371 }
1372 ipv6_addr_copy(&rt->rt6i_prefsrc.addr, &cfg->fc_prefsrc);
1373 rt->rt6i_prefsrc.plen = 128;
1374 } else
1375 rt->rt6i_prefsrc.plen = 0;
1376
Thomas Graf86872cb2006-08-22 00:01:08 -07001377 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
David S. Miller69cce1d2011-07-17 23:09:49 -07001378 struct neighbour *n = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1379 if (IS_ERR(n)) {
1380 err = PTR_ERR(n);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001381 goto out;
1382 }
David S. Miller69cce1d2011-07-17 23:09:49 -07001383 dst_set_neighbour(&rt->dst, n);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001384 }
1385
Thomas Graf86872cb2006-08-22 00:01:08 -07001386 rt->rt6i_flags = cfg->fc_flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001387
1388install_route:
Thomas Graf86872cb2006-08-22 00:01:08 -07001389 if (cfg->fc_mx) {
1390 struct nlattr *nla;
1391 int remaining;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001392
Thomas Graf86872cb2006-08-22 00:01:08 -07001393 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
Thomas Graf8f4c1f92007-09-12 14:44:36 +02001394 int type = nla_type(nla);
Thomas Graf86872cb2006-08-22 00:01:08 -07001395
1396 if (type) {
1397 if (type > RTAX_MAX) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001398 err = -EINVAL;
1399 goto out;
1400 }
Thomas Graf86872cb2006-08-22 00:01:08 -07001401
David S. Millerdefb3512010-12-08 21:16:57 -08001402 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001403 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001404 }
1405 }
1406
Changli Gaod8d1f302010-06-10 23:31:35 -07001407 rt->dst.dev = dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001408 rt->rt6i_idev = idev;
Thomas Grafc71099a2006-08-04 23:20:06 -07001409 rt->rt6i_table = table;
Daniel Lezcano63152fc2008-03-03 23:31:11 -08001410
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001411 cfg->fc_nlinfo.nl_net = dev_net(dev);
Daniel Lezcano63152fc2008-03-03 23:31:11 -08001412
Thomas Graf86872cb2006-08-22 00:01:08 -07001413 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001414
1415out:
1416 if (dev)
1417 dev_put(dev);
1418 if (idev)
1419 in6_dev_put(idev);
1420 if (rt)
Changli Gaod8d1f302010-06-10 23:31:35 -07001421 dst_free(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001422 return err;
1423}
1424
Thomas Graf86872cb2006-08-22 00:01:08 -07001425static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001426{
1427 int err;
Thomas Grafc71099a2006-08-04 23:20:06 -07001428 struct fib6_table *table;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001429 struct net *net = dev_net(rt->rt6i_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001430
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001431 if (rt == net->ipv6.ip6_null_entry)
Patrick McHardy6c813a72006-08-06 22:22:47 -07001432 return -ENOENT;
1433
Thomas Grafc71099a2006-08-04 23:20:06 -07001434 table = rt->rt6i_table;
1435 write_lock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001436
Thomas Graf86872cb2006-08-22 00:01:08 -07001437 err = fib6_del(rt, info);
Changli Gaod8d1f302010-06-10 23:31:35 -07001438 dst_release(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001439
Thomas Grafc71099a2006-08-04 23:20:06 -07001440 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001441
1442 return err;
1443}
1444
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001445int ip6_del_rt(struct rt6_info *rt)
1446{
Denis V. Lunev4d1169c2008-01-10 03:26:13 -08001447 struct nl_info info = {
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001448 .nl_net = dev_net(rt->rt6i_dev),
Denis V. Lunev4d1169c2008-01-10 03:26:13 -08001449 };
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08001450 return __ip6_del_rt(rt, &info);
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001451}
1452
Thomas Graf86872cb2006-08-22 00:01:08 -07001453static int ip6_route_del(struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001454{
Thomas Grafc71099a2006-08-04 23:20:06 -07001455 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001456 struct fib6_node *fn;
1457 struct rt6_info *rt;
1458 int err = -ESRCH;
1459
Daniel Lezcano55786892008-03-04 13:47:47 -08001460 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
Thomas Grafc71099a2006-08-04 23:20:06 -07001461 if (table == NULL)
1462 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001463
Thomas Grafc71099a2006-08-04 23:20:06 -07001464 read_lock_bh(&table->tb6_lock);
1465
1466 fn = fib6_locate(&table->tb6_root,
Thomas Graf86872cb2006-08-22 00:01:08 -07001467 &cfg->fc_dst, cfg->fc_dst_len,
1468 &cfg->fc_src, cfg->fc_src_len);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001469
Linus Torvalds1da177e2005-04-16 15:20:36 -07001470 if (fn) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001471 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
Thomas Graf86872cb2006-08-22 00:01:08 -07001472 if (cfg->fc_ifindex &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001473 (rt->rt6i_dev == NULL ||
Thomas Graf86872cb2006-08-22 00:01:08 -07001474 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001475 continue;
Thomas Graf86872cb2006-08-22 00:01:08 -07001476 if (cfg->fc_flags & RTF_GATEWAY &&
1477 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001478 continue;
Thomas Graf86872cb2006-08-22 00:01:08 -07001479 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001480 continue;
Changli Gaod8d1f302010-06-10 23:31:35 -07001481 dst_hold(&rt->dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001482 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001483
Thomas Graf86872cb2006-08-22 00:01:08 -07001484 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001485 }
1486 }
Thomas Grafc71099a2006-08-04 23:20:06 -07001487 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001488
1489 return err;
1490}
1491
1492/*
1493 * Handle redirects
1494 */
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001495struct ip6rd_flowi {
David S. Miller4c9483b2011-03-12 16:22:43 -05001496 struct flowi6 fl6;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001497 struct in6_addr gateway;
1498};
Linus Torvalds1da177e2005-04-16 15:20:36 -07001499
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001500static struct rt6_info *__ip6_route_redirect(struct net *net,
1501 struct fib6_table *table,
David S. Miller4c9483b2011-03-12 16:22:43 -05001502 struct flowi6 *fl6,
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001503 int flags)
1504{
David S. Miller4c9483b2011-03-12 16:22:43 -05001505 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001506 struct rt6_info *rt;
1507 struct fib6_node *fn;
Thomas Grafc71099a2006-08-04 23:20:06 -07001508
Linus Torvalds1da177e2005-04-16 15:20:36 -07001509 /*
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001510 * Get the "current" route for this destination and
1511 * check if the redirect has come from approriate router.
1512 *
1513 * RFC 2461 specifies that redirects should only be
1514 * accepted if they come from the nexthop to the target.
1515 * Due to the way the routes are chosen, this notion
1516 * is a bit fuzzy and one might need to check all possible
1517 * routes.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001518 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001519
Thomas Grafc71099a2006-08-04 23:20:06 -07001520 read_lock_bh(&table->tb6_lock);
David S. Miller4c9483b2011-03-12 16:22:43 -05001521 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001522restart:
Changli Gaod8d1f302010-06-10 23:31:35 -07001523 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001524 /*
1525 * Current route is on-link; redirect is always invalid.
1526 *
1527 * Seems, previous statement is not true. It could
1528 * be node, which looks for us as on-link (f.e. proxy ndisc)
1529 * But then router serving it might decide, that we should
1530 * know truth 8)8) --ANK (980726).
1531 */
1532 if (rt6_check_expired(rt))
1533 continue;
1534 if (!(rt->rt6i_flags & RTF_GATEWAY))
1535 continue;
David S. Miller4c9483b2011-03-12 16:22:43 -05001536 if (fl6->flowi6_oif != rt->rt6i_dev->ifindex)
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001537 continue;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001538 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001539 continue;
1540 break;
1541 }
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001542
YOSHIFUJI Hideakicb15d9c2006-08-23 17:23:11 -07001543 if (!rt)
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001544 rt = net->ipv6.ip6_null_entry;
David S. Miller4c9483b2011-03-12 16:22:43 -05001545 BACKTRACK(net, &fl6->saddr);
YOSHIFUJI Hideakicb15d9c2006-08-23 17:23:11 -07001546out:
Changli Gaod8d1f302010-06-10 23:31:35 -07001547 dst_hold(&rt->dst);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001548
1549 read_unlock_bh(&table->tb6_lock);
1550
1551 return rt;
1552};
1553
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001554static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest,
1555 const struct in6_addr *src,
1556 const struct in6_addr *gateway,
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001557 struct net_device *dev)
1558{
Thomas Grafadaa70b2006-10-13 15:01:03 -07001559 int flags = RT6_LOOKUP_F_HAS_SADDR;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001560 struct net *net = dev_net(dev);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001561 struct ip6rd_flowi rdfl = {
David S. Miller4c9483b2011-03-12 16:22:43 -05001562 .fl6 = {
1563 .flowi6_oif = dev->ifindex,
1564 .daddr = *dest,
1565 .saddr = *src,
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001566 },
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001567 };
Thomas Grafadaa70b2006-10-13 15:01:03 -07001568
Brian Haley86c36ce2009-10-07 13:58:01 -07001569 ipv6_addr_copy(&rdfl.gateway, gateway);
1570
Thomas Grafadaa70b2006-10-13 15:01:03 -07001571 if (rt6_need_strict(dest))
1572 flags |= RT6_LOOKUP_F_IFACE;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001573
David S. Miller4c9483b2011-03-12 16:22:43 -05001574 return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6,
Daniel Lezcano58f09b72008-03-03 23:25:27 -08001575 flags, __ip6_route_redirect);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001576}
1577
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001578void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
1579 const struct in6_addr *saddr,
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001580 struct neighbour *neigh, u8 *lladdr, int on_link)
1581{
1582 struct rt6_info *rt, *nrt = NULL;
1583 struct netevent_redirect netevent;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001584 struct net *net = dev_net(neigh->dev);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001585
1586 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1587
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001588 if (rt == net->ipv6.ip6_null_entry) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001589 if (net_ratelimit())
1590 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1591 "for redirect target\n");
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001592 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001593 }
1594
Linus Torvalds1da177e2005-04-16 15:20:36 -07001595 /*
1596 * We have finally decided to accept it.
1597 */
1598
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001599 neigh_update(neigh, lladdr, NUD_STALE,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001600 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1601 NEIGH_UPDATE_F_OVERRIDE|
1602 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1603 NEIGH_UPDATE_F_ISROUTER))
1604 );
1605
1606 /*
1607 * Redirect received -> path was valid.
1608 * Look, redirects are sent only in response to data packets,
1609 * so that this nexthop apparently is reachable. --ANK
1610 */
Changli Gaod8d1f302010-06-10 23:31:35 -07001611 dst_confirm(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001612
1613 /* Duplicate redirect: silently ignore. */
Eric Dumazetf2c31e32011-07-29 19:00:53 +00001614 if (neigh == dst_get_neighbour_raw(&rt->dst))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001615 goto out;
1616
Eric Dumazet21efcfa2011-07-19 20:18:36 +00001617 nrt = ip6_rt_copy(rt, dest);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001618 if (nrt == NULL)
1619 goto out;
1620
1621 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1622 if (on_link)
1623 nrt->rt6i_flags &= ~RTF_GATEWAY;
1624
Linus Torvalds1da177e2005-04-16 15:20:36 -07001625 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
David S. Miller69cce1d2011-07-17 23:09:49 -07001626 dst_set_neighbour(&nrt->dst, neigh_clone(neigh));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001627
Thomas Graf40e22e82006-08-22 00:00:45 -07001628 if (ip6_ins_rt(nrt))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001629 goto out;
1630
Changli Gaod8d1f302010-06-10 23:31:35 -07001631 netevent.old = &rt->dst;
1632 netevent.new = &nrt->dst;
Tom Tucker8d717402006-07-30 20:43:36 -07001633 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1634
Linus Torvalds1da177e2005-04-16 15:20:36 -07001635 if (rt->rt6i_flags&RTF_CACHE) {
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001636 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001637 return;
1638 }
1639
1640out:
Changli Gaod8d1f302010-06-10 23:31:35 -07001641 dst_release(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001642}
1643
1644/*
1645 * Handle ICMP "packet too big" messages
1646 * i.e. Path MTU discovery
1647 */
1648
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001649static void rt6_do_pmtu_disc(const struct in6_addr *daddr, const struct in6_addr *saddr,
Maciej Żenczykowskiae878ae2010-10-03 14:49:00 -07001650 struct net *net, u32 pmtu, int ifindex)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001651{
1652 struct rt6_info *rt, *nrt;
1653 int allfrag = 0;
Andrey Vagind3052b52010-12-11 15:20:11 +00001654again:
Maciej Żenczykowskiae878ae2010-10-03 14:49:00 -07001655 rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001656 if (rt == NULL)
1657 return;
1658
Andrey Vagind3052b52010-12-11 15:20:11 +00001659 if (rt6_check_expired(rt)) {
1660 ip6_del_rt(rt);
1661 goto again;
1662 }
1663
Changli Gaod8d1f302010-06-10 23:31:35 -07001664 if (pmtu >= dst_mtu(&rt->dst))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001665 goto out;
1666
1667 if (pmtu < IPV6_MIN_MTU) {
1668 /*
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001669 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
Linus Torvalds1da177e2005-04-16 15:20:36 -07001670 * MTU (1280) and a fragment header should always be included
1671 * after a node receiving Too Big message reporting PMTU is
1672 * less than the IPv6 Minimum Link MTU.
1673 */
1674 pmtu = IPV6_MIN_MTU;
1675 allfrag = 1;
1676 }
1677
1678 /* New mtu received -> path was valid.
1679 They are sent only in response to data packets,
1680 so that this nexthop apparently is reachable. --ANK
1681 */
Changli Gaod8d1f302010-06-10 23:31:35 -07001682 dst_confirm(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001683
1684 /* Host route. If it is static, it would be better
1685 not to override it, but add new one, so that
1686 when cache entry will expire old pmtu
1687 would return automatically.
1688 */
1689 if (rt->rt6i_flags & RTF_CACHE) {
David S. Millerdefb3512010-12-08 21:16:57 -08001690 dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1691 if (allfrag) {
1692 u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1693 features |= RTAX_FEATURE_ALLFRAG;
1694 dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1695 }
Changli Gaod8d1f302010-06-10 23:31:35 -07001696 dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001697 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1698 goto out;
1699 }
1700
1701 /* Network route.
1702 Two cases are possible:
1703 1. It is connected route. Action: COW
1704 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1705 */
Eric Dumazetf2c31e32011-07-29 19:00:53 +00001706 if (!dst_get_neighbour_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001707 nrt = rt6_alloc_cow(rt, daddr, saddr);
YOSHIFUJI Hideakid5315b52006-03-20 16:58:48 -08001708 else
1709 nrt = rt6_alloc_clone(rt, daddr);
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001710
YOSHIFUJI Hideakid5315b52006-03-20 16:58:48 -08001711 if (nrt) {
David S. Millerdefb3512010-12-08 21:16:57 -08001712 dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1713 if (allfrag) {
1714 u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1715 features |= RTAX_FEATURE_ALLFRAG;
1716 dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1717 }
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001718
1719 /* According to RFC 1981, detecting PMTU increase shouldn't be
1720 * happened within 5 mins, the recommended timer is 10 mins.
1721 * Here this route expiration time is set to ip6_rt_mtu_expires
1722 * which is 10 mins. After 10 mins the decreased pmtu is expired
1723 * and detecting PMTU increase will be automatically happened.
1724 */
Changli Gaod8d1f302010-06-10 23:31:35 -07001725 dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001726 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1727
Thomas Graf40e22e82006-08-22 00:00:45 -07001728 ip6_ins_rt(nrt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001729 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001730out:
Changli Gaod8d1f302010-06-10 23:31:35 -07001731 dst_release(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001732}
1733
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001734void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *saddr,
Maciej Żenczykowskiae878ae2010-10-03 14:49:00 -07001735 struct net_device *dev, u32 pmtu)
1736{
1737 struct net *net = dev_net(dev);
1738
1739 /*
1740 * RFC 1981 states that a node "MUST reduce the size of the packets it
1741 * is sending along the path" that caused the Packet Too Big message.
1742 * Since it's not possible in the general case to determine which
1743 * interface was used to send the original packet, we update the MTU
1744 * on the interface that will be used to send future packets. We also
1745 * update the MTU on the interface that received the Packet Too Big in
1746 * case the original packet was forced out that interface with
1747 * SO_BINDTODEVICE or similar. This is the next best thing to the
1748 * correct behaviour, which would be to update the MTU on all
1749 * interfaces.
1750 */
1751 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1752 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1753}
1754
Linus Torvalds1da177e2005-04-16 15:20:36 -07001755/*
1756 * Misc support functions
1757 */
1758
Eric Dumazet21efcfa2011-07-19 20:18:36 +00001759static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
1760 const struct in6_addr *dest)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001761{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001762 struct net *net = dev_net(ort->rt6i_dev);
David S. Miller5c1e6aa2011-04-28 14:13:38 -07001763 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
David S. Miller957c6652011-06-24 15:25:00 -07001764 ort->dst.dev, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001765
1766 if (rt) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001767 rt->dst.input = ort->dst.input;
1768 rt->dst.output = ort->dst.output;
Yan, Zheng8e2ec632011-09-05 21:34:30 +00001769 rt->dst.flags |= DST_HOST;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001770
Eric Dumazet21efcfa2011-07-19 20:18:36 +00001771 ipv6_addr_copy(&rt->rt6i_dst.addr, dest);
Yan, Zheng8e2ec632011-09-05 21:34:30 +00001772 rt->rt6i_dst.plen = 128;
David S. Millerdefb3512010-12-08 21:16:57 -08001773 dst_copy_metrics(&rt->dst, &ort->dst);
Changli Gaod8d1f302010-06-10 23:31:35 -07001774 rt->dst.error = ort->dst.error;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001775 rt->rt6i_idev = ort->rt6i_idev;
1776 if (rt->rt6i_idev)
1777 in6_dev_hold(rt->rt6i_idev);
Changli Gaod8d1f302010-06-10 23:31:35 -07001778 rt->dst.lastuse = jiffies;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001779 rt->rt6i_expires = 0;
1780
1781 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1782 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1783 rt->rt6i_metric = 0;
1784
Linus Torvalds1da177e2005-04-16 15:20:36 -07001785#ifdef CONFIG_IPV6_SUBTREES
1786 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1787#endif
Florian Westphal0f6c6392011-05-20 11:27:24 +00001788 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
Thomas Grafc71099a2006-08-04 23:20:06 -07001789 rt->rt6i_table = ort->rt6i_table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001790 }
1791 return rt;
1792}
1793
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001794#ifdef CONFIG_IPV6_ROUTE_INFO
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001795static struct rt6_info *rt6_get_route_info(struct net *net,
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001796 const struct in6_addr *prefix, int prefixlen,
1797 const struct in6_addr *gwaddr, int ifindex)
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001798{
1799 struct fib6_node *fn;
1800 struct rt6_info *rt = NULL;
Thomas Grafc71099a2006-08-04 23:20:06 -07001801 struct fib6_table *table;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001802
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001803 table = fib6_get_table(net, RT6_TABLE_INFO);
Thomas Grafc71099a2006-08-04 23:20:06 -07001804 if (table == NULL)
1805 return NULL;
1806
1807 write_lock_bh(&table->tb6_lock);
1808 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001809 if (!fn)
1810 goto out;
1811
Changli Gaod8d1f302010-06-10 23:31:35 -07001812 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001813 if (rt->rt6i_dev->ifindex != ifindex)
1814 continue;
1815 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1816 continue;
1817 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1818 continue;
Changli Gaod8d1f302010-06-10 23:31:35 -07001819 dst_hold(&rt->dst);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001820 break;
1821 }
1822out:
Thomas Grafc71099a2006-08-04 23:20:06 -07001823 write_unlock_bh(&table->tb6_lock);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001824 return rt;
1825}
1826
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001827static struct rt6_info *rt6_add_route_info(struct net *net,
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001828 const struct in6_addr *prefix, int prefixlen,
1829 const struct in6_addr *gwaddr, int ifindex,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001830 unsigned pref)
1831{
Thomas Graf86872cb2006-08-22 00:01:08 -07001832 struct fib6_config cfg = {
1833 .fc_table = RT6_TABLE_INFO,
Rami Rosen238fc7e2008-02-09 23:43:11 -08001834 .fc_metric = IP6_RT_PRIO_USER,
Thomas Graf86872cb2006-08-22 00:01:08 -07001835 .fc_ifindex = ifindex,
1836 .fc_dst_len = prefixlen,
1837 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1838 RTF_UP | RTF_PREF(pref),
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001839 .fc_nlinfo.pid = 0,
1840 .fc_nlinfo.nlh = NULL,
1841 .fc_nlinfo.nl_net = net,
Thomas Graf86872cb2006-08-22 00:01:08 -07001842 };
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001843
Thomas Graf86872cb2006-08-22 00:01:08 -07001844 ipv6_addr_copy(&cfg.fc_dst, prefix);
1845 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1846
YOSHIFUJI Hideakie317da92006-03-20 17:06:42 -08001847 /* We should treat it as a default route if prefix length is 0. */
1848 if (!prefixlen)
Thomas Graf86872cb2006-08-22 00:01:08 -07001849 cfg.fc_flags |= RTF_DEFAULT;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001850
Thomas Graf86872cb2006-08-22 00:01:08 -07001851 ip6_route_add(&cfg);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001852
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001853 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001854}
1855#endif
1856
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001857struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001858{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001859 struct rt6_info *rt;
Thomas Grafc71099a2006-08-04 23:20:06 -07001860 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001861
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001862 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
Thomas Grafc71099a2006-08-04 23:20:06 -07001863 if (table == NULL)
1864 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001865
Thomas Grafc71099a2006-08-04 23:20:06 -07001866 write_lock_bh(&table->tb6_lock);
Changli Gaod8d1f302010-06-10 23:31:35 -07001867 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001868 if (dev == rt->rt6i_dev &&
YOSHIFUJI Hideaki045927f2006-03-20 17:00:48 -08001869 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001870 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1871 break;
1872 }
1873 if (rt)
Changli Gaod8d1f302010-06-10 23:31:35 -07001874 dst_hold(&rt->dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001875 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001876 return rt;
1877}
1878
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001879struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -08001880 struct net_device *dev,
1881 unsigned int pref)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001882{
Thomas Graf86872cb2006-08-22 00:01:08 -07001883 struct fib6_config cfg = {
1884 .fc_table = RT6_TABLE_DFLT,
Rami Rosen238fc7e2008-02-09 23:43:11 -08001885 .fc_metric = IP6_RT_PRIO_USER,
Thomas Graf86872cb2006-08-22 00:01:08 -07001886 .fc_ifindex = dev->ifindex,
1887 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1888 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
Daniel Lezcano55786892008-03-04 13:47:47 -08001889 .fc_nlinfo.pid = 0,
1890 .fc_nlinfo.nlh = NULL,
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001891 .fc_nlinfo.nl_net = dev_net(dev),
Thomas Graf86872cb2006-08-22 00:01:08 -07001892 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07001893
Thomas Graf86872cb2006-08-22 00:01:08 -07001894 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001895
Thomas Graf86872cb2006-08-22 00:01:08 -07001896 ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001897
Linus Torvalds1da177e2005-04-16 15:20:36 -07001898 return rt6_get_dflt_router(gwaddr, dev);
1899}
1900
Daniel Lezcano7b4da532008-03-04 13:47:14 -08001901void rt6_purge_dflt_routers(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001902{
1903 struct rt6_info *rt;
Thomas Grafc71099a2006-08-04 23:20:06 -07001904 struct fib6_table *table;
1905
1906 /* NOTE: Keep consistent with rt6_get_dflt_router */
Daniel Lezcano7b4da532008-03-04 13:47:14 -08001907 table = fib6_get_table(net, RT6_TABLE_DFLT);
Thomas Grafc71099a2006-08-04 23:20:06 -07001908 if (table == NULL)
1909 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001910
1911restart:
Thomas Grafc71099a2006-08-04 23:20:06 -07001912 read_lock_bh(&table->tb6_lock);
Changli Gaod8d1f302010-06-10 23:31:35 -07001913 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001914 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001915 dst_hold(&rt->dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001916 read_unlock_bh(&table->tb6_lock);
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001917 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001918 goto restart;
1919 }
1920 }
Thomas Grafc71099a2006-08-04 23:20:06 -07001921 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001922}
1923
Daniel Lezcano55786892008-03-04 13:47:47 -08001924static void rtmsg_to_fib6_config(struct net *net,
1925 struct in6_rtmsg *rtmsg,
Thomas Graf86872cb2006-08-22 00:01:08 -07001926 struct fib6_config *cfg)
1927{
1928 memset(cfg, 0, sizeof(*cfg));
1929
1930 cfg->fc_table = RT6_TABLE_MAIN;
1931 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1932 cfg->fc_metric = rtmsg->rtmsg_metric;
1933 cfg->fc_expires = rtmsg->rtmsg_info;
1934 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1935 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1936 cfg->fc_flags = rtmsg->rtmsg_flags;
1937
Daniel Lezcano55786892008-03-04 13:47:47 -08001938 cfg->fc_nlinfo.nl_net = net;
Benjamin Theryf1243c22008-02-26 18:10:03 -08001939
Thomas Graf86872cb2006-08-22 00:01:08 -07001940 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1941 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1942 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1943}
1944
Daniel Lezcano55786892008-03-04 13:47:47 -08001945int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001946{
Thomas Graf86872cb2006-08-22 00:01:08 -07001947 struct fib6_config cfg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001948 struct in6_rtmsg rtmsg;
1949 int err;
1950
1951 switch(cmd) {
1952 case SIOCADDRT: /* Add a route */
1953 case SIOCDELRT: /* Delete a route */
1954 if (!capable(CAP_NET_ADMIN))
1955 return -EPERM;
1956 err = copy_from_user(&rtmsg, arg,
1957 sizeof(struct in6_rtmsg));
1958 if (err)
1959 return -EFAULT;
Thomas Graf86872cb2006-08-22 00:01:08 -07001960
Daniel Lezcano55786892008-03-04 13:47:47 -08001961 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
Thomas Graf86872cb2006-08-22 00:01:08 -07001962
Linus Torvalds1da177e2005-04-16 15:20:36 -07001963 rtnl_lock();
1964 switch (cmd) {
1965 case SIOCADDRT:
Thomas Graf86872cb2006-08-22 00:01:08 -07001966 err = ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001967 break;
1968 case SIOCDELRT:
Thomas Graf86872cb2006-08-22 00:01:08 -07001969 err = ip6_route_del(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001970 break;
1971 default:
1972 err = -EINVAL;
1973 }
1974 rtnl_unlock();
1975
1976 return err;
Stephen Hemminger3ff50b72007-04-20 17:09:22 -07001977 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001978
1979 return -EINVAL;
1980}
1981
1982/*
1983 * Drop the packet on the floor
1984 */
1985
Brian Haleyd5fdd6b2009-06-23 04:31:07 -07001986static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001987{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001988 int type;
Eric Dumazetadf30902009-06-02 05:19:30 +00001989 struct dst_entry *dst = skb_dst(skb);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001990 switch (ipstats_mib_noroutes) {
1991 case IPSTATS_MIB_INNOROUTES:
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -07001992 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
Ulrich Weber45bb0062010-02-25 23:28:58 +00001993 if (type == IPV6_ADDR_ANY) {
Denis V. Lunev3bd653c2008-10-08 10:54:51 -07001994 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1995 IPSTATS_MIB_INADDRERRORS);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001996 break;
1997 }
1998 /* FALLTHROUGH */
1999 case IPSTATS_MIB_OUTNOROUTES:
Denis V. Lunev3bd653c2008-10-08 10:54:51 -07002000 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2001 ipstats_mib_noroutes);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07002002 break;
2003 }
Alexey Dobriyan3ffe5332010-02-18 08:25:24 +00002004 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002005 kfree_skb(skb);
2006 return 0;
2007}
2008
Thomas Graf9ce8ade2006-10-18 20:46:54 -07002009static int ip6_pkt_discard(struct sk_buff *skb)
2010{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07002011 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07002012}
2013
Arnaldo Carvalho de Melo20380732005-08-16 02:18:02 -03002014static int ip6_pkt_discard_out(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002015{
Eric Dumazetadf30902009-06-02 05:19:30 +00002016 skb->dev = skb_dst(skb)->dev;
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07002017 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002018}
2019
David S. Miller6723ab52006-10-18 21:20:57 -07002020#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2021
Thomas Graf9ce8ade2006-10-18 20:46:54 -07002022static int ip6_pkt_prohibit(struct sk_buff *skb)
2023{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07002024 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07002025}
2026
2027static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2028{
Eric Dumazetadf30902009-06-02 05:19:30 +00002029 skb->dev = skb_dst(skb)->dev;
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07002030 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07002031}
2032
David S. Miller6723ab52006-10-18 21:20:57 -07002033#endif
2034
Linus Torvalds1da177e2005-04-16 15:20:36 -07002035/*
2036 * Allocate a dst for local (unicast / anycast) address.
2037 */
2038
2039struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2040 const struct in6_addr *addr,
2041 int anycast)
2042{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09002043 struct net *net = dev_net(idev->dev);
David S. Miller5c1e6aa2011-04-28 14:13:38 -07002044 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
David S. Miller957c6652011-06-24 15:25:00 -07002045 net->loopback_dev, 0);
David S. Miller14deae42009-01-04 16:04:39 -08002046 struct neighbour *neigh;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002047
Ben Greear40385652010-11-08 12:33:48 +00002048 if (rt == NULL) {
2049 if (net_ratelimit())
2050 pr_warning("IPv6: Maximum number of routes reached,"
2051 " consider increasing route/max_size.\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002052 return ERR_PTR(-ENOMEM);
Ben Greear40385652010-11-08 12:33:48 +00002053 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002054
Linus Torvalds1da177e2005-04-16 15:20:36 -07002055 in6_dev_hold(idev);
2056
David S. Miller11d53b42011-06-24 15:23:34 -07002057 rt->dst.flags |= DST_HOST;
Changli Gaod8d1f302010-06-10 23:31:35 -07002058 rt->dst.input = ip6_input;
2059 rt->dst.output = ip6_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002060 rt->rt6i_idev = idev;
Changli Gaod8d1f302010-06-10 23:31:35 -07002061 rt->dst.obsolete = -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002062
2063 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +09002064 if (anycast)
2065 rt->rt6i_flags |= RTF_ANYCAST;
2066 else
Linus Torvalds1da177e2005-04-16 15:20:36 -07002067 rt->rt6i_flags |= RTF_LOCAL;
David S. Miller14deae42009-01-04 16:04:39 -08002068 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
2069 if (IS_ERR(neigh)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002070 dst_free(&rt->dst);
David S. Miller14deae42009-01-04 16:04:39 -08002071
David S. Miller29546a62011-03-03 12:10:37 -08002072 return ERR_CAST(neigh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002073 }
David S. Miller69cce1d2011-07-17 23:09:49 -07002074 dst_set_neighbour(&rt->dst, neigh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002075
2076 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
2077 rt->rt6i_dst.plen = 128;
Daniel Lezcano55786892008-03-04 13:47:47 -08002078 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002079
Changli Gaod8d1f302010-06-10 23:31:35 -07002080 atomic_set(&rt->dst.__refcnt, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002081
2082 return rt;
2083}
2084
Daniel Walterc3968a82011-04-13 21:10:57 +00002085int ip6_route_get_saddr(struct net *net,
2086 struct rt6_info *rt,
Eric Dumazetb71d1d42011-04-22 04:53:02 +00002087 const struct in6_addr *daddr,
Daniel Walterc3968a82011-04-13 21:10:57 +00002088 unsigned int prefs,
2089 struct in6_addr *saddr)
2090{
2091 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2092 int err = 0;
2093 if (rt->rt6i_prefsrc.plen)
2094 ipv6_addr_copy(saddr, &rt->rt6i_prefsrc.addr);
2095 else
2096 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2097 daddr, prefs, saddr);
2098 return err;
2099}
2100
2101/* remove deleted ip from prefsrc entries */
2102struct arg_dev_net_ip {
2103 struct net_device *dev;
2104 struct net *net;
2105 struct in6_addr *addr;
2106};
2107
2108static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2109{
2110 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2111 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2112 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2113
2114 if (((void *)rt->rt6i_dev == dev || dev == NULL) &&
2115 rt != net->ipv6.ip6_null_entry &&
2116 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2117 /* remove prefsrc entry */
2118 rt->rt6i_prefsrc.plen = 0;
2119 }
2120 return 0;
2121}
2122
2123void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2124{
2125 struct net *net = dev_net(ifp->idev->dev);
2126 struct arg_dev_net_ip adni = {
2127 .dev = ifp->idev->dev,
2128 .net = net,
2129 .addr = &ifp->addr,
2130 };
2131 fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2132}
2133
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002134struct arg_dev_net {
2135 struct net_device *dev;
2136 struct net *net;
2137};
2138
Linus Torvalds1da177e2005-04-16 15:20:36 -07002139static int fib6_ifdown(struct rt6_info *rt, void *arg)
2140{
stephen hemmingerbc3ef662010-12-16 17:42:40 +00002141 const struct arg_dev_net *adn = arg;
2142 const struct net_device *dev = adn->dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002143
stephen hemmingerbc3ef662010-12-16 17:42:40 +00002144 if ((rt->rt6i_dev == dev || dev == NULL) &&
2145 rt != adn->net->ipv6.ip6_null_entry) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002146 RT6_TRACE("deleted by ifdown %p\n", rt);
2147 return -1;
2148 }
2149 return 0;
2150}
2151
Daniel Lezcanof3db4852008-03-03 23:27:06 -08002152void rt6_ifdown(struct net *net, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002153{
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002154 struct arg_dev_net adn = {
2155 .dev = dev,
2156 .net = net,
2157 };
2158
2159 fib6_clean_all(net, fib6_ifdown, 0, &adn);
David S. Miller1e493d12008-09-10 17:27:15 -07002160 icmp6_clean_all(fib6_ifdown, &adn);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002161}
2162
2163struct rt6_mtu_change_arg
2164{
2165 struct net_device *dev;
2166 unsigned mtu;
2167};
2168
2169static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2170{
2171 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2172 struct inet6_dev *idev;
2173
2174 /* In IPv6 pmtu discovery is not optional,
2175 so that RTAX_MTU lock cannot disable it.
2176 We still use this lock to block changes
2177 caused by addrconf/ndisc.
2178 */
2179
2180 idev = __in6_dev_get(arg->dev);
2181 if (idev == NULL)
2182 return 0;
2183
2184 /* For administrative MTU increase, there is no way to discover
2185 IPv6 PMTU increase, so PMTU increase should be updated here.
2186 Since RFC 1981 doesn't include administrative MTU increase
2187 update PMTU increase is a MUST. (i.e. jumbo frame)
2188 */
2189 /*
2190 If new MTU is less than route PMTU, this new MTU will be the
2191 lowest MTU in the path, update the route PMTU to reflect PMTU
2192 decreases; if new MTU is greater than route PMTU, and the
2193 old MTU is the lowest MTU in the path, update the route PMTU
2194 to reflect the increase. In this case if the other nodes' MTU
2195 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2196 PMTU discouvery.
2197 */
2198 if (rt->rt6i_dev == arg->dev &&
Changli Gaod8d1f302010-06-10 23:31:35 -07002199 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2200 (dst_mtu(&rt->dst) >= arg->mtu ||
2201 (dst_mtu(&rt->dst) < arg->mtu &&
2202 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
David S. Millerdefb3512010-12-08 21:16:57 -08002203 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
Simon Arlott566cfd82007-07-26 00:09:55 -07002204 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002205 return 0;
2206}
2207
2208void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2209{
Thomas Grafc71099a2006-08-04 23:20:06 -07002210 struct rt6_mtu_change_arg arg = {
2211 .dev = dev,
2212 .mtu = mtu,
2213 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07002214
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09002215 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002216}
2217
Patrick McHardyef7c79e2007-06-05 12:38:30 -07002218static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
Thomas Graf5176f912006-08-26 20:13:18 -07002219 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
Thomas Graf86872cb2006-08-22 00:01:08 -07002220 [RTA_OIF] = { .type = NLA_U32 },
Thomas Grafab364a62006-08-22 00:01:47 -07002221 [RTA_IIF] = { .type = NLA_U32 },
Thomas Graf86872cb2006-08-22 00:01:08 -07002222 [RTA_PRIORITY] = { .type = NLA_U32 },
2223 [RTA_METRICS] = { .type = NLA_NESTED },
2224};
2225
2226static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2227 struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002228{
Thomas Graf86872cb2006-08-22 00:01:08 -07002229 struct rtmsg *rtm;
2230 struct nlattr *tb[RTA_MAX+1];
2231 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002232
Thomas Graf86872cb2006-08-22 00:01:08 -07002233 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2234 if (err < 0)
2235 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002236
Thomas Graf86872cb2006-08-22 00:01:08 -07002237 err = -EINVAL;
2238 rtm = nlmsg_data(nlh);
2239 memset(cfg, 0, sizeof(*cfg));
2240
2241 cfg->fc_table = rtm->rtm_table;
2242 cfg->fc_dst_len = rtm->rtm_dst_len;
2243 cfg->fc_src_len = rtm->rtm_src_len;
2244 cfg->fc_flags = RTF_UP;
2245 cfg->fc_protocol = rtm->rtm_protocol;
2246
2247 if (rtm->rtm_type == RTN_UNREACHABLE)
2248 cfg->fc_flags |= RTF_REJECT;
2249
Maciej Żenczykowskiab79ad12010-09-27 00:07:02 +00002250 if (rtm->rtm_type == RTN_LOCAL)
2251 cfg->fc_flags |= RTF_LOCAL;
2252
Thomas Graf86872cb2006-08-22 00:01:08 -07002253 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2254 cfg->fc_nlinfo.nlh = nlh;
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09002255 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
Thomas Graf86872cb2006-08-22 00:01:08 -07002256
2257 if (tb[RTA_GATEWAY]) {
2258 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2259 cfg->fc_flags |= RTF_GATEWAY;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002260 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002261
2262 if (tb[RTA_DST]) {
2263 int plen = (rtm->rtm_dst_len + 7) >> 3;
2264
2265 if (nla_len(tb[RTA_DST]) < plen)
2266 goto errout;
2267
2268 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002269 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002270
2271 if (tb[RTA_SRC]) {
2272 int plen = (rtm->rtm_src_len + 7) >> 3;
2273
2274 if (nla_len(tb[RTA_SRC]) < plen)
2275 goto errout;
2276
2277 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002278 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002279
Daniel Walterc3968a82011-04-13 21:10:57 +00002280 if (tb[RTA_PREFSRC])
2281 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2282
Thomas Graf86872cb2006-08-22 00:01:08 -07002283 if (tb[RTA_OIF])
2284 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2285
2286 if (tb[RTA_PRIORITY])
2287 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2288
2289 if (tb[RTA_METRICS]) {
2290 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2291 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002292 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002293
2294 if (tb[RTA_TABLE])
2295 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2296
2297 err = 0;
2298errout:
2299 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002300}
2301
Thomas Grafc127ea22007-03-22 11:58:32 -07002302static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002303{
Thomas Graf86872cb2006-08-22 00:01:08 -07002304 struct fib6_config cfg;
2305 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002306
Thomas Graf86872cb2006-08-22 00:01:08 -07002307 err = rtm_to_fib6_config(skb, nlh, &cfg);
2308 if (err < 0)
2309 return err;
2310
2311 return ip6_route_del(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002312}
2313
Thomas Grafc127ea22007-03-22 11:58:32 -07002314static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002315{
Thomas Graf86872cb2006-08-22 00:01:08 -07002316 struct fib6_config cfg;
2317 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002318
Thomas Graf86872cb2006-08-22 00:01:08 -07002319 err = rtm_to_fib6_config(skb, nlh, &cfg);
2320 if (err < 0)
2321 return err;
2322
2323 return ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002324}
2325
Thomas Graf339bf982006-11-10 14:10:15 -08002326static inline size_t rt6_nlmsg_size(void)
2327{
2328 return NLMSG_ALIGN(sizeof(struct rtmsg))
2329 + nla_total_size(16) /* RTA_SRC */
2330 + nla_total_size(16) /* RTA_DST */
2331 + nla_total_size(16) /* RTA_GATEWAY */
2332 + nla_total_size(16) /* RTA_PREFSRC */
2333 + nla_total_size(4) /* RTA_TABLE */
2334 + nla_total_size(4) /* RTA_IIF */
2335 + nla_total_size(4) /* RTA_OIF */
2336 + nla_total_size(4) /* RTA_PRIORITY */
Noriaki TAKAMIYA6a2b9ce2007-01-23 22:09:41 -08002337 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
Thomas Graf339bf982006-11-10 14:10:15 -08002338 + nla_total_size(sizeof(struct rta_cacheinfo));
2339}
2340
Brian Haley191cd582008-08-14 15:33:21 -07002341static int rt6_fill_node(struct net *net,
2342 struct sk_buff *skb, struct rt6_info *rt,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07002343 struct in6_addr *dst, struct in6_addr *src,
2344 int iif, int type, u32 pid, u32 seq,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002345 int prefix, int nowait, unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002346{
2347 struct rtmsg *rtm;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002348 struct nlmsghdr *nlh;
Thomas Grafe3703b32006-11-27 09:27:07 -08002349 long expires;
Patrick McHardy9e762a42006-08-10 23:09:48 -07002350 u32 table;
Eric Dumazetf2c31e32011-07-29 19:00:53 +00002351 struct neighbour *n;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002352
2353 if (prefix) { /* user wants prefix routes only */
2354 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2355 /* success since this is not a prefix route */
2356 return 1;
2357 }
2358 }
2359
Thomas Graf2d7202b2006-08-22 00:01:27 -07002360 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2361 if (nlh == NULL)
Patrick McHardy26932562007-01-31 23:16:40 -08002362 return -EMSGSIZE;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002363
2364 rtm = nlmsg_data(nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002365 rtm->rtm_family = AF_INET6;
2366 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2367 rtm->rtm_src_len = rt->rt6i_src.plen;
2368 rtm->rtm_tos = 0;
Thomas Grafc71099a2006-08-04 23:20:06 -07002369 if (rt->rt6i_table)
Patrick McHardy9e762a42006-08-10 23:09:48 -07002370 table = rt->rt6i_table->tb6_id;
Thomas Grafc71099a2006-08-04 23:20:06 -07002371 else
Patrick McHardy9e762a42006-08-10 23:09:48 -07002372 table = RT6_TABLE_UNSPEC;
2373 rtm->rtm_table = table;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002374 NLA_PUT_U32(skb, RTA_TABLE, table);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002375 if (rt->rt6i_flags&RTF_REJECT)
2376 rtm->rtm_type = RTN_UNREACHABLE;
Maciej Żenczykowskiab79ad12010-09-27 00:07:02 +00002377 else if (rt->rt6i_flags&RTF_LOCAL)
2378 rtm->rtm_type = RTN_LOCAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002379 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2380 rtm->rtm_type = RTN_LOCAL;
2381 else
2382 rtm->rtm_type = RTN_UNICAST;
2383 rtm->rtm_flags = 0;
2384 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2385 rtm->rtm_protocol = rt->rt6i_protocol;
2386 if (rt->rt6i_flags&RTF_DYNAMIC)
2387 rtm->rtm_protocol = RTPROT_REDIRECT;
2388 else if (rt->rt6i_flags & RTF_ADDRCONF)
2389 rtm->rtm_protocol = RTPROT_KERNEL;
2390 else if (rt->rt6i_flags&RTF_DEFAULT)
2391 rtm->rtm_protocol = RTPROT_RA;
2392
2393 if (rt->rt6i_flags&RTF_CACHE)
2394 rtm->rtm_flags |= RTM_F_CLONED;
2395
2396 if (dst) {
Thomas Graf2d7202b2006-08-22 00:01:27 -07002397 NLA_PUT(skb, RTA_DST, 16, dst);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002398 rtm->rtm_dst_len = 128;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002399 } else if (rtm->rtm_dst_len)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002400 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002401#ifdef CONFIG_IPV6_SUBTREES
2402 if (src) {
Thomas Graf2d7202b2006-08-22 00:01:27 -07002403 NLA_PUT(skb, RTA_SRC, 16, src);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002404 rtm->rtm_src_len = 128;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002405 } else if (rtm->rtm_src_len)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002406 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002407#endif
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002408 if (iif) {
2409#ifdef CONFIG_IPV6_MROUTE
2410 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
Benjamin Thery8229efd2008-12-10 16:30:15 -08002411 int err = ip6mr_get_route(net, skb, rtm, nowait);
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002412 if (err <= 0) {
2413 if (!nowait) {
2414 if (err == 0)
2415 return 0;
2416 goto nla_put_failure;
2417 } else {
2418 if (err == -EMSGSIZE)
2419 goto nla_put_failure;
2420 }
2421 }
2422 } else
2423#endif
2424 NLA_PUT_U32(skb, RTA_IIF, iif);
2425 } else if (dst) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002426 struct in6_addr saddr_buf;
Daniel Walterc3968a82011-04-13 21:10:57 +00002427 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002428 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002429 }
Thomas Graf2d7202b2006-08-22 00:01:27 -07002430
Daniel Walterc3968a82011-04-13 21:10:57 +00002431 if (rt->rt6i_prefsrc.plen) {
2432 struct in6_addr saddr_buf;
2433 ipv6_addr_copy(&saddr_buf, &rt->rt6i_prefsrc.addr);
2434 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2435 }
2436
David S. Millerdefb3512010-12-08 21:16:57 -08002437 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002438 goto nla_put_failure;
2439
Eric Dumazetf2c31e32011-07-29 19:00:53 +00002440 rcu_read_lock();
2441 n = dst_get_neighbour(&rt->dst);
2442 if (n)
2443 NLA_PUT(skb, RTA_GATEWAY, 16, &n->primary_key);
2444 rcu_read_unlock();
Thomas Graf2d7202b2006-08-22 00:01:27 -07002445
Changli Gaod8d1f302010-06-10 23:31:35 -07002446 if (rt->dst.dev)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002447 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2448
2449 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
Thomas Grafe3703b32006-11-27 09:27:07 -08002450
YOSHIFUJI Hideaki36e3dea2008-05-13 02:52:55 +09002451 if (!(rt->rt6i_flags & RTF_EXPIRES))
2452 expires = 0;
2453 else if (rt->rt6i_expires - jiffies < INT_MAX)
2454 expires = rt->rt6i_expires - jiffies;
2455 else
2456 expires = INT_MAX;
YOSHIFUJI Hideaki69cdf8f2008-05-19 16:55:13 -07002457
Changli Gaod8d1f302010-06-10 23:31:35 -07002458 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0,
2459 expires, rt->dst.error) < 0)
Thomas Grafe3703b32006-11-27 09:27:07 -08002460 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002461
Thomas Graf2d7202b2006-08-22 00:01:27 -07002462 return nlmsg_end(skb, nlh);
2463
2464nla_put_failure:
Patrick McHardy26932562007-01-31 23:16:40 -08002465 nlmsg_cancel(skb, nlh);
2466 return -EMSGSIZE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002467}
2468
Patrick McHardy1b43af52006-08-10 23:11:17 -07002469int rt6_dump_route(struct rt6_info *rt, void *p_arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002470{
2471 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2472 int prefix;
2473
Thomas Graf2d7202b2006-08-22 00:01:27 -07002474 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2475 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002476 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2477 } else
2478 prefix = 0;
2479
Brian Haley191cd582008-08-14 15:33:21 -07002480 return rt6_fill_node(arg->net,
2481 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002482 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002483 prefix, 0, NLM_F_MULTI);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002484}
2485
Thomas Grafc127ea22007-03-22 11:58:32 -07002486static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002487{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09002488 struct net *net = sock_net(in_skb->sk);
Thomas Grafab364a62006-08-22 00:01:47 -07002489 struct nlattr *tb[RTA_MAX+1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07002490 struct rt6_info *rt;
Thomas Grafab364a62006-08-22 00:01:47 -07002491 struct sk_buff *skb;
2492 struct rtmsg *rtm;
David S. Miller4c9483b2011-03-12 16:22:43 -05002493 struct flowi6 fl6;
Thomas Grafab364a62006-08-22 00:01:47 -07002494 int err, iif = 0;
2495
2496 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2497 if (err < 0)
2498 goto errout;
2499
2500 err = -EINVAL;
David S. Miller4c9483b2011-03-12 16:22:43 -05002501 memset(&fl6, 0, sizeof(fl6));
Thomas Grafab364a62006-08-22 00:01:47 -07002502
2503 if (tb[RTA_SRC]) {
2504 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2505 goto errout;
2506
David S. Miller4c9483b2011-03-12 16:22:43 -05002507 ipv6_addr_copy(&fl6.saddr, nla_data(tb[RTA_SRC]));
Thomas Grafab364a62006-08-22 00:01:47 -07002508 }
2509
2510 if (tb[RTA_DST]) {
2511 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2512 goto errout;
2513
David S. Miller4c9483b2011-03-12 16:22:43 -05002514 ipv6_addr_copy(&fl6.daddr, nla_data(tb[RTA_DST]));
Thomas Grafab364a62006-08-22 00:01:47 -07002515 }
2516
2517 if (tb[RTA_IIF])
2518 iif = nla_get_u32(tb[RTA_IIF]);
2519
2520 if (tb[RTA_OIF])
David S. Miller4c9483b2011-03-12 16:22:43 -05002521 fl6.flowi6_oif = nla_get_u32(tb[RTA_OIF]);
Thomas Grafab364a62006-08-22 00:01:47 -07002522
2523 if (iif) {
2524 struct net_device *dev;
Daniel Lezcano55786892008-03-04 13:47:47 -08002525 dev = __dev_get_by_index(net, iif);
Thomas Grafab364a62006-08-22 00:01:47 -07002526 if (!dev) {
2527 err = -ENODEV;
2528 goto errout;
2529 }
2530 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002531
2532 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
Thomas Grafab364a62006-08-22 00:01:47 -07002533 if (skb == NULL) {
2534 err = -ENOBUFS;
2535 goto errout;
2536 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002537
2538 /* Reserve room for dummy headers, this skb can pass
2539 through good chunk of routing engine.
2540 */
Arnaldo Carvalho de Melo459a98e2007-03-19 15:30:44 -07002541 skb_reset_mac_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002542 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2543
David S. Miller4c9483b2011-03-12 16:22:43 -05002544 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl6);
Changli Gaod8d1f302010-06-10 23:31:35 -07002545 skb_dst_set(skb, &rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002546
David S. Miller4c9483b2011-03-12 16:22:43 -05002547 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002548 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002549 nlh->nlmsg_seq, 0, 0, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002550 if (err < 0) {
Thomas Grafab364a62006-08-22 00:01:47 -07002551 kfree_skb(skb);
2552 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002553 }
2554
Daniel Lezcano55786892008-03-04 13:47:47 -08002555 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
Thomas Grafab364a62006-08-22 00:01:47 -07002556errout:
Linus Torvalds1da177e2005-04-16 15:20:36 -07002557 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002558}
2559
Thomas Graf86872cb2006-08-22 00:01:08 -07002560void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002561{
2562 struct sk_buff *skb;
Daniel Lezcano55786892008-03-04 13:47:47 -08002563 struct net *net = info->nl_net;
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08002564 u32 seq;
2565 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002566
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08002567 err = -ENOBUFS;
2568 seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
Thomas Graf86872cb2006-08-22 00:01:08 -07002569
Thomas Graf339bf982006-11-10 14:10:15 -08002570 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
Thomas Graf21713eb2006-08-15 00:35:24 -07002571 if (skb == NULL)
2572 goto errout;
2573
Brian Haley191cd582008-08-14 15:33:21 -07002574 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002575 event, info->pid, seq, 0, 0, 0);
Patrick McHardy26932562007-01-31 23:16:40 -08002576 if (err < 0) {
2577 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2578 WARN_ON(err == -EMSGSIZE);
2579 kfree_skb(skb);
2580 goto errout;
2581 }
Pablo Neira Ayuso1ce85fe2009-02-24 23:18:28 -08002582 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2583 info->nlh, gfp_any());
2584 return;
Thomas Graf21713eb2006-08-15 00:35:24 -07002585errout:
2586 if (err < 0)
Daniel Lezcano55786892008-03-04 13:47:47 -08002587 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002588}
2589
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002590static int ip6_route_dev_notify(struct notifier_block *this,
2591 unsigned long event, void *data)
2592{
2593 struct net_device *dev = (struct net_device *)data;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09002594 struct net *net = dev_net(dev);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002595
2596 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002597 net->ipv6.ip6_null_entry->dst.dev = dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002598 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2599#ifdef CONFIG_IPV6_MULTIPLE_TABLES
Changli Gaod8d1f302010-06-10 23:31:35 -07002600 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002601 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
Changli Gaod8d1f302010-06-10 23:31:35 -07002602 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002603 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2604#endif
2605 }
2606
2607 return NOTIFY_OK;
2608}
2609
Linus Torvalds1da177e2005-04-16 15:20:36 -07002610/*
2611 * /proc
2612 */
2613
2614#ifdef CONFIG_PROC_FS
2615
Linus Torvalds1da177e2005-04-16 15:20:36 -07002616struct rt6_proc_arg
2617{
2618 char *buffer;
2619 int offset;
2620 int length;
2621 int skip;
2622 int len;
2623};
2624
2625static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2626{
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002627 struct seq_file *m = p_arg;
David S. Miller69cce1d2011-07-17 23:09:49 -07002628 struct neighbour *n;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002629
Harvey Harrison4b7a4272008-10-29 12:50:24 -07002630 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002631
2632#ifdef CONFIG_IPV6_SUBTREES
Harvey Harrison4b7a4272008-10-29 12:50:24 -07002633 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002634#else
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002635 seq_puts(m, "00000000000000000000000000000000 00 ");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002636#endif
Eric Dumazetf2c31e32011-07-29 19:00:53 +00002637 rcu_read_lock();
David S. Miller69cce1d2011-07-17 23:09:49 -07002638 n = dst_get_neighbour(&rt->dst);
2639 if (n) {
2640 seq_printf(m, "%pi6", n->primary_key);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002641 } else {
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002642 seq_puts(m, "00000000000000000000000000000000");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002643 }
Eric Dumazetf2c31e32011-07-29 19:00:53 +00002644 rcu_read_unlock();
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002645 seq_printf(m, " %08x %08x %08x %08x %8s\n",
Changli Gaod8d1f302010-06-10 23:31:35 -07002646 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2647 rt->dst.__use, rt->rt6i_flags,
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002648 rt->rt6i_dev ? rt->rt6i_dev->name : "");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002649 return 0;
2650}
2651
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002652static int ipv6_route_show(struct seq_file *m, void *v)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002653{
Daniel Lezcanof3db4852008-03-03 23:27:06 -08002654 struct net *net = (struct net *)m->private;
2655 fib6_clean_all(net, rt6_info_route, 0, m);
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002656 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002657}
2658
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002659static int ipv6_route_open(struct inode *inode, struct file *file)
2660{
Pavel Emelyanovde05c552008-07-18 04:07:21 -07002661 return single_open_net(inode, file, ipv6_route_show);
Daniel Lezcanof3db4852008-03-03 23:27:06 -08002662}
2663
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002664static const struct file_operations ipv6_route_proc_fops = {
2665 .owner = THIS_MODULE,
2666 .open = ipv6_route_open,
2667 .read = seq_read,
2668 .llseek = seq_lseek,
Pavel Emelyanovb6fcbdb2008-07-18 04:07:44 -07002669 .release = single_release_net,
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002670};
2671
Linus Torvalds1da177e2005-04-16 15:20:36 -07002672static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2673{
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002674 struct net *net = (struct net *)seq->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002675 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002676 net->ipv6.rt6_stats->fib_nodes,
2677 net->ipv6.rt6_stats->fib_route_nodes,
2678 net->ipv6.rt6_stats->fib_rt_alloc,
2679 net->ipv6.rt6_stats->fib_rt_entries,
2680 net->ipv6.rt6_stats->fib_rt_cache,
Eric Dumazetfc66f952010-10-08 06:37:34 +00002681 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002682 net->ipv6.rt6_stats->fib_discarded_routes);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002683
2684 return 0;
2685}
2686
2687static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2688{
Pavel Emelyanovde05c552008-07-18 04:07:21 -07002689 return single_open_net(inode, file, rt6_stats_seq_show);
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002690}
2691
Arjan van de Ven9a321442007-02-12 00:55:35 -08002692static const struct file_operations rt6_stats_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002693 .owner = THIS_MODULE,
2694 .open = rt6_stats_seq_open,
2695 .read = seq_read,
2696 .llseek = seq_lseek,
Pavel Emelyanovb6fcbdb2008-07-18 04:07:44 -07002697 .release = single_release_net,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002698};
2699#endif /* CONFIG_PROC_FS */
2700
2701#ifdef CONFIG_SYSCTL
2702
Linus Torvalds1da177e2005-04-16 15:20:36 -07002703static
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07002704int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002705 void __user *buffer, size_t *lenp, loff_t *ppos)
2706{
Lucian Adrian Grijincuc486da32011-02-24 19:48:03 +00002707 struct net *net;
2708 int delay;
2709 if (!write)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002710 return -EINVAL;
Lucian Adrian Grijincuc486da32011-02-24 19:48:03 +00002711
2712 net = (struct net *)ctl->extra1;
2713 delay = net->ipv6.sysctl.flush_delay;
2714 proc_dointvec(ctl, write, buffer, lenp, ppos);
2715 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2716 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002717}
2718
Daniel Lezcano760f2d02008-01-10 02:53:43 -08002719ctl_table ipv6_route_table_template[] = {
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002720 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002721 .procname = "flush",
Daniel Lezcano49905092008-01-10 03:01:01 -08002722 .data = &init_net.ipv6.sysctl.flush_delay,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002723 .maxlen = sizeof(int),
Dave Jones89c8b3a12005-04-28 12:11:49 -07002724 .mode = 0200,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002725 .proc_handler = ipv6_sysctl_rtcache_flush
Linus Torvalds1da177e2005-04-16 15:20:36 -07002726 },
2727 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002728 .procname = "gc_thresh",
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -08002729 .data = &ip6_dst_ops_template.gc_thresh,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002730 .maxlen = sizeof(int),
2731 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002732 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002733 },
2734 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002735 .procname = "max_size",
Daniel Lezcano49905092008-01-10 03:01:01 -08002736 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002737 .maxlen = sizeof(int),
2738 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002739 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002740 },
2741 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002742 .procname = "gc_min_interval",
Daniel Lezcano49905092008-01-10 03:01:01 -08002743 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002744 .maxlen = sizeof(int),
2745 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002746 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002747 },
2748 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002749 .procname = "gc_timeout",
Daniel Lezcano49905092008-01-10 03:01:01 -08002750 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002751 .maxlen = sizeof(int),
2752 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002753 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002754 },
2755 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002756 .procname = "gc_interval",
Daniel Lezcano49905092008-01-10 03:01:01 -08002757 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002758 .maxlen = sizeof(int),
2759 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002760 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002761 },
2762 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002763 .procname = "gc_elasticity",
Daniel Lezcano49905092008-01-10 03:01:01 -08002764 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002765 .maxlen = sizeof(int),
2766 .mode = 0644,
Min Zhangf3d3f612010-08-14 22:42:51 -07002767 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002768 },
2769 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002770 .procname = "mtu_expires",
Daniel Lezcano49905092008-01-10 03:01:01 -08002771 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002772 .maxlen = sizeof(int),
2773 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002774 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002775 },
2776 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002777 .procname = "min_adv_mss",
Daniel Lezcano49905092008-01-10 03:01:01 -08002778 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002779 .maxlen = sizeof(int),
2780 .mode = 0644,
Min Zhangf3d3f612010-08-14 22:42:51 -07002781 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002782 },
2783 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002784 .procname = "gc_min_interval_ms",
Daniel Lezcano49905092008-01-10 03:01:01 -08002785 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002786 .maxlen = sizeof(int),
2787 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002788 .proc_handler = proc_dointvec_ms_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002789 },
Eric W. Biedermanf8572d82009-11-05 13:32:03 -08002790 { }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002791};
2792
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00002793struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
Daniel Lezcano760f2d02008-01-10 02:53:43 -08002794{
2795 struct ctl_table *table;
2796
2797 table = kmemdup(ipv6_route_table_template,
2798 sizeof(ipv6_route_table_template),
2799 GFP_KERNEL);
YOSHIFUJI Hideaki5ee09102008-02-28 00:24:28 +09002800
2801 if (table) {
2802 table[0].data = &net->ipv6.sysctl.flush_delay;
Lucian Adrian Grijincuc486da32011-02-24 19:48:03 +00002803 table[0].extra1 = net;
Alexey Dobriyan86393e52009-08-29 01:34:49 +00002804 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
YOSHIFUJI Hideaki5ee09102008-02-28 00:24:28 +09002805 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2806 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2807 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2808 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2809 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2810 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2811 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
Alexey Dobriyan9c69fab2009-12-18 20:11:03 -08002812 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
YOSHIFUJI Hideaki5ee09102008-02-28 00:24:28 +09002813 }
2814
Daniel Lezcano760f2d02008-01-10 02:53:43 -08002815 return table;
2816}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002817#endif
2818
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00002819static int __net_init ip6_route_net_init(struct net *net)
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002820{
Pavel Emelyanov633d424b2008-04-21 14:25:23 -07002821 int ret = -ENOMEM;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002822
Alexey Dobriyan86393e52009-08-29 01:34:49 +00002823 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2824 sizeof(net->ipv6.ip6_dst_ops));
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002825
Eric Dumazetfc66f952010-10-08 06:37:34 +00002826 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2827 goto out_ip6_dst_ops;
2828
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002829 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2830 sizeof(*net->ipv6.ip6_null_entry),
2831 GFP_KERNEL);
2832 if (!net->ipv6.ip6_null_entry)
Eric Dumazetfc66f952010-10-08 06:37:34 +00002833 goto out_ip6_dst_entries;
Changli Gaod8d1f302010-06-10 23:31:35 -07002834 net->ipv6.ip6_null_entry->dst.path =
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002835 (struct dst_entry *)net->ipv6.ip6_null_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07002836 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
David S. Miller62fa8a82011-01-26 20:51:05 -08002837 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2838 ip6_template_metrics, true);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002839
2840#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2841 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2842 sizeof(*net->ipv6.ip6_prohibit_entry),
2843 GFP_KERNEL);
Peter Zijlstra68fffc62008-10-07 14:12:10 -07002844 if (!net->ipv6.ip6_prohibit_entry)
2845 goto out_ip6_null_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07002846 net->ipv6.ip6_prohibit_entry->dst.path =
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002847 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07002848 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
David S. Miller62fa8a82011-01-26 20:51:05 -08002849 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2850 ip6_template_metrics, true);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002851
2852 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2853 sizeof(*net->ipv6.ip6_blk_hole_entry),
2854 GFP_KERNEL);
Peter Zijlstra68fffc62008-10-07 14:12:10 -07002855 if (!net->ipv6.ip6_blk_hole_entry)
2856 goto out_ip6_prohibit_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07002857 net->ipv6.ip6_blk_hole_entry->dst.path =
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002858 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07002859 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
David S. Miller62fa8a82011-01-26 20:51:05 -08002860 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2861 ip6_template_metrics, true);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002862#endif
2863
Peter Zijlstrab339a47c2008-10-07 14:15:00 -07002864 net->ipv6.sysctl.flush_delay = 0;
2865 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2866 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2867 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2868 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2869 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2870 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2871 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2872
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002873#ifdef CONFIG_PROC_FS
2874 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2875 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2876#endif
Benjamin Thery6891a342008-03-04 13:49:47 -08002877 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2878
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002879 ret = 0;
2880out:
2881 return ret;
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002882
Peter Zijlstra68fffc62008-10-07 14:12:10 -07002883#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2884out_ip6_prohibit_entry:
2885 kfree(net->ipv6.ip6_prohibit_entry);
2886out_ip6_null_entry:
2887 kfree(net->ipv6.ip6_null_entry);
2888#endif
Eric Dumazetfc66f952010-10-08 06:37:34 +00002889out_ip6_dst_entries:
2890 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002891out_ip6_dst_ops:
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002892 goto out;
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002893}
2894
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00002895static void __net_exit ip6_route_net_exit(struct net *net)
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002896{
2897#ifdef CONFIG_PROC_FS
2898 proc_net_remove(net, "ipv6_route");
2899 proc_net_remove(net, "rt6_stats");
2900#endif
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002901 kfree(net->ipv6.ip6_null_entry);
2902#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2903 kfree(net->ipv6.ip6_prohibit_entry);
2904 kfree(net->ipv6.ip6_blk_hole_entry);
2905#endif
Xiaotian Feng41bb78b2010-11-02 16:11:05 +00002906 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002907}
2908
2909static struct pernet_operations ip6_route_net_ops = {
2910 .init = ip6_route_net_init,
2911 .exit = ip6_route_net_exit,
2912};
2913
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002914static struct notifier_block ip6_route_dev_notifier = {
2915 .notifier_call = ip6_route_dev_notify,
2916 .priority = 0,
2917};
2918
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002919int __init ip6_route_init(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002920{
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002921 int ret;
2922
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -08002923 ret = -ENOMEM;
2924 ip6_dst_ops_template.kmem_cachep =
2925 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2926 SLAB_HWCACHE_ALIGN, NULL);
2927 if (!ip6_dst_ops_template.kmem_cachep)
Fernando Carrijoc19a28e2009-01-07 18:09:08 -08002928 goto out;
David S. Miller14e50e52007-05-24 18:17:54 -07002929
Eric Dumazetfc66f952010-10-08 06:37:34 +00002930 ret = dst_entries_init(&ip6_dst_blackhole_ops);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002931 if (ret)
Daniel Lezcanobdb32892008-03-04 13:48:10 -08002932 goto out_kmem_cache;
Daniel Lezcanobdb32892008-03-04 13:48:10 -08002933
Eric Dumazetfc66f952010-10-08 06:37:34 +00002934 ret = register_pernet_subsys(&ip6_route_net_ops);
2935 if (ret)
2936 goto out_dst_entries;
2937
Arnaud Ebalard5dc121e2008-10-01 02:37:56 -07002938 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2939
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002940 /* Registering of the loopback is done before this portion of code,
2941 * the loopback reference in rt6_info will not be taken, do it
2942 * manually for init_net */
Changli Gaod8d1f302010-06-10 23:31:35 -07002943 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002944 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2945 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
Changli Gaod8d1f302010-06-10 23:31:35 -07002946 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002947 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
Changli Gaod8d1f302010-06-10 23:31:35 -07002948 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002949 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2950 #endif
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002951 ret = fib6_init();
2952 if (ret)
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002953 goto out_register_subsys;
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002954
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002955 ret = xfrm6_init();
2956 if (ret)
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002957 goto out_fib6_init;
Daniel Lezcanoc35b7e72007-12-08 00:14:11 -08002958
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002959 ret = fib6_rules_init();
2960 if (ret)
2961 goto xfrm6_init;
Daniel Lezcano7e5449c2007-12-08 00:14:54 -08002962
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002963 ret = -ENOBUFS;
Greg Rosec7ac8672011-06-10 01:27:09 +00002964 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
2965 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
2966 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002967 goto fib6_rules_init;
2968
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002969 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002970 if (ret)
2971 goto fib6_rules_init;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002972
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002973out:
2974 return ret;
2975
2976fib6_rules_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002977 fib6_rules_cleanup();
2978xfrm6_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002979 xfrm6_fini();
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002980out_fib6_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002981 fib6_gc_cleanup();
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002982out_register_subsys:
2983 unregister_pernet_subsys(&ip6_route_net_ops);
Eric Dumazetfc66f952010-10-08 06:37:34 +00002984out_dst_entries:
2985 dst_entries_destroy(&ip6_dst_blackhole_ops);
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002986out_kmem_cache:
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002987 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002988 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002989}
2990
2991void ip6_route_cleanup(void)
2992{
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002993 unregister_netdevice_notifier(&ip6_route_dev_notifier);
Thomas Graf101367c2006-08-04 03:39:02 -07002994 fib6_rules_cleanup();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002995 xfrm6_fini();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002996 fib6_gc_cleanup();
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002997 unregister_pernet_subsys(&ip6_route_net_ops);
Xiaotian Feng41bb78b2010-11-02 16:11:05 +00002998 dst_entries_destroy(&ip6_dst_blackhole_ops);
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002999 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003000}