blob: 2897403fdaff027b0e974df0b4c8643c3bbbdede [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09006 * Pedro Roque <roque@di.fc.ul.pt>
Linus Torvalds1da177e2005-04-16 15:20:36 -07007 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07008 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
YOSHIFUJI Hideakic0bece92006-08-23 17:23:25 -070023 * Ville Nuorvala
24 * Fixed routing subtrees.
Linus Torvalds1da177e2005-04-16 15:20:36 -070025 */
26
Randy Dunlap4fc268d2006-01-11 12:17:47 -080027#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070028#include <linux/errno.h>
Paul Gortmakerbc3b2d72011-07-15 11:47:34 -040029#include <linux/export.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070030#include <linux/types.h>
31#include <linux/times.h>
32#include <linux/socket.h>
33#include <linux/sockios.h>
34#include <linux/net.h>
35#include <linux/route.h>
36#include <linux/netdevice.h>
37#include <linux/in6.h>
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +090038#include <linux/mroute6.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070039#include <linux/init.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070040#include <linux/if_arp.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070041#include <linux/proc_fs.h>
42#include <linux/seq_file.h>
Daniel Lezcano5b7c9312008-03-03 23:28:58 -080043#include <linux/nsproxy.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090044#include <linux/slab.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020045#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070046#include <net/snmp.h>
47#include <net/ipv6.h>
48#include <net/ip6_fib.h>
49#include <net/ip6_route.h>
50#include <net/ndisc.h>
51#include <net/addrconf.h>
52#include <net/tcp.h>
53#include <linux/rtnetlink.h>
54#include <net/dst.h>
55#include <net/xfrm.h>
Tom Tucker8d717402006-07-30 20:43:36 -070056#include <net/netevent.h>
Thomas Graf21713eb2006-08-15 00:35:24 -070057#include <net/netlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070058
59#include <asm/uaccess.h>
60
61#ifdef CONFIG_SYSCTL
62#include <linux/sysctl.h>
63#endif
64
65/* Set to 3 to get tracing. */
66#define RT6_DEBUG 2
67
68#if RT6_DEBUG >= 3
69#define RDBG(x) printk x
70#define RT6_TRACE(x...) printk(KERN_DEBUG x)
71#else
72#define RDBG(x)
73#define RT6_TRACE(x...) do { ; } while (0)
74#endif
75
Eric Dumazet21efcfa2011-07-19 20:18:36 +000076static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
77 const struct in6_addr *dest);
Linus Torvalds1da177e2005-04-16 15:20:36 -070078static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
David S. Miller0dbaee32010-12-13 12:52:14 -080079static unsigned int ip6_default_advmss(const struct dst_entry *dst);
David S. Millerd33e4552010-12-14 13:01:14 -080080static unsigned int ip6_default_mtu(const struct dst_entry *dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -070081static struct dst_entry *ip6_negative_advice(struct dst_entry *);
82static void ip6_dst_destroy(struct dst_entry *);
83static void ip6_dst_ifdown(struct dst_entry *,
84 struct net_device *dev, int how);
Daniel Lezcano569d3642008-01-18 03:56:57 -080085static int ip6_dst_gc(struct dst_ops *ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -070086
87static int ip6_pkt_discard(struct sk_buff *skb);
88static int ip6_pkt_discard_out(struct sk_buff *skb);
89static void ip6_link_failure(struct sk_buff *skb);
90static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
91
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -080092#ifdef CONFIG_IPV6_ROUTE_INFO
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -080093static struct rt6_info *rt6_add_route_info(struct net *net,
Eric Dumazetb71d1d42011-04-22 04:53:02 +000094 const struct in6_addr *prefix, int prefixlen,
95 const struct in6_addr *gwaddr, int ifindex,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -080096 unsigned pref);
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -080097static struct rt6_info *rt6_get_route_info(struct net *net,
Eric Dumazetb71d1d42011-04-22 04:53:02 +000098 const struct in6_addr *prefix, int prefixlen,
99 const struct in6_addr *gwaddr, int ifindex);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800100#endif
101
David S. Miller06582542011-01-27 14:58:42 -0800102static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
103{
104 struct rt6_info *rt = (struct rt6_info *) dst;
105 struct inet_peer *peer;
106 u32 *p = NULL;
107
Yan, Zheng8e2ec632011-09-05 21:34:30 +0000108 if (!(rt->dst.flags & DST_HOST))
109 return NULL;
110
David S. Miller06582542011-01-27 14:58:42 -0800111 if (!rt->rt6i_peer)
112 rt6_bind_peer(rt, 1);
113
114 peer = rt->rt6i_peer;
115 if (peer) {
116 u32 *old_p = __DST_METRICS_PTR(old);
117 unsigned long prev, new;
118
119 p = peer->metrics;
120 if (inet_metrics_new(peer))
121 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
122
123 new = (unsigned long) p;
124 prev = cmpxchg(&dst->_metrics, old, new);
125
126 if (prev != old) {
127 p = __DST_METRICS_PTR(prev);
128 if (prev & DST_METRICS_READ_ONLY)
129 p = NULL;
130 }
131 }
132 return p;
133}
134
David S. Millerd3aaeb32011-07-18 00:40:17 -0700135static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, const void *daddr)
136{
137 return __neigh_lookup_errno(&nd_tbl, daddr, dst->dev);
138}
139
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -0800140static struct dst_ops ip6_dst_ops_template = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700141 .family = AF_INET6,
Harvey Harrison09640e62009-02-01 00:45:17 -0800142 .protocol = cpu_to_be16(ETH_P_IPV6),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700143 .gc = ip6_dst_gc,
144 .gc_thresh = 1024,
145 .check = ip6_dst_check,
David S. Miller0dbaee32010-12-13 12:52:14 -0800146 .default_advmss = ip6_default_advmss,
David S. Millerd33e4552010-12-14 13:01:14 -0800147 .default_mtu = ip6_default_mtu,
David S. Miller06582542011-01-27 14:58:42 -0800148 .cow_metrics = ipv6_cow_metrics,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700149 .destroy = ip6_dst_destroy,
150 .ifdown = ip6_dst_ifdown,
151 .negative_advice = ip6_negative_advice,
152 .link_failure = ip6_link_failure,
153 .update_pmtu = ip6_rt_update_pmtu,
Herbert Xu1ac06e02008-05-20 14:32:14 -0700154 .local_out = __ip6_local_out,
David S. Millerd3aaeb32011-07-18 00:40:17 -0700155 .neigh_lookup = ip6_neigh_lookup,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700156};
157
Roland Dreierec831ea2011-01-31 13:16:00 -0800158static unsigned int ip6_blackhole_default_mtu(const struct dst_entry *dst)
159{
160 return 0;
161}
162
David S. Miller14e50e52007-05-24 18:17:54 -0700163static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
164{
165}
166
Held Bernhard0972ddb2011-04-24 22:07:32 +0000167static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
168 unsigned long old)
169{
170 return NULL;
171}
172
David S. Miller14e50e52007-05-24 18:17:54 -0700173static struct dst_ops ip6_dst_blackhole_ops = {
174 .family = AF_INET6,
Harvey Harrison09640e62009-02-01 00:45:17 -0800175 .protocol = cpu_to_be16(ETH_P_IPV6),
David S. Miller14e50e52007-05-24 18:17:54 -0700176 .destroy = ip6_dst_destroy,
177 .check = ip6_dst_check,
Roland Dreierec831ea2011-01-31 13:16:00 -0800178 .default_mtu = ip6_blackhole_default_mtu,
Eric Dumazet214f45c2011-02-18 11:39:01 -0800179 .default_advmss = ip6_default_advmss,
David S. Miller14e50e52007-05-24 18:17:54 -0700180 .update_pmtu = ip6_rt_blackhole_update_pmtu,
Held Bernhard0972ddb2011-04-24 22:07:32 +0000181 .cow_metrics = ip6_rt_blackhole_cow_metrics,
David S. Millerd3aaeb32011-07-18 00:40:17 -0700182 .neigh_lookup = ip6_neigh_lookup,
David S. Miller14e50e52007-05-24 18:17:54 -0700183};
184
David S. Miller62fa8a82011-01-26 20:51:05 -0800185static const u32 ip6_template_metrics[RTAX_MAX] = {
186 [RTAX_HOPLIMIT - 1] = 255,
187};
188
Daniel Lezcanobdb32892008-03-04 13:48:10 -0800189static struct rt6_info ip6_null_entry_template = {
Changli Gaod8d1f302010-06-10 23:31:35 -0700190 .dst = {
191 .__refcnt = ATOMIC_INIT(1),
192 .__use = 1,
193 .obsolete = -1,
194 .error = -ENETUNREACH,
Changli Gaod8d1f302010-06-10 23:31:35 -0700195 .input = ip6_pkt_discard,
196 .output = ip6_pkt_discard_out,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700197 },
198 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
Jean-Mickael Guerin4f724272009-05-20 17:38:59 -0700199 .rt6i_protocol = RTPROT_KERNEL,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700200 .rt6i_metric = ~(u32) 0,
201 .rt6i_ref = ATOMIC_INIT(1),
202};
203
Thomas Graf101367c2006-08-04 03:39:02 -0700204#ifdef CONFIG_IPV6_MULTIPLE_TABLES
205
David S. Miller6723ab52006-10-18 21:20:57 -0700206static int ip6_pkt_prohibit(struct sk_buff *skb);
207static int ip6_pkt_prohibit_out(struct sk_buff *skb);
David S. Miller6723ab52006-10-18 21:20:57 -0700208
Adrian Bunk280a34c2008-04-21 02:29:32 -0700209static struct rt6_info ip6_prohibit_entry_template = {
Changli Gaod8d1f302010-06-10 23:31:35 -0700210 .dst = {
211 .__refcnt = ATOMIC_INIT(1),
212 .__use = 1,
213 .obsolete = -1,
214 .error = -EACCES,
Changli Gaod8d1f302010-06-10 23:31:35 -0700215 .input = ip6_pkt_prohibit,
216 .output = ip6_pkt_prohibit_out,
Thomas Graf101367c2006-08-04 03:39:02 -0700217 },
218 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
Jean-Mickael Guerin4f724272009-05-20 17:38:59 -0700219 .rt6i_protocol = RTPROT_KERNEL,
Thomas Graf101367c2006-08-04 03:39:02 -0700220 .rt6i_metric = ~(u32) 0,
221 .rt6i_ref = ATOMIC_INIT(1),
222};
223
Daniel Lezcanobdb32892008-03-04 13:48:10 -0800224static struct rt6_info ip6_blk_hole_entry_template = {
Changli Gaod8d1f302010-06-10 23:31:35 -0700225 .dst = {
226 .__refcnt = ATOMIC_INIT(1),
227 .__use = 1,
228 .obsolete = -1,
229 .error = -EINVAL,
Changli Gaod8d1f302010-06-10 23:31:35 -0700230 .input = dst_discard,
231 .output = dst_discard,
Thomas Graf101367c2006-08-04 03:39:02 -0700232 },
233 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
Jean-Mickael Guerin4f724272009-05-20 17:38:59 -0700234 .rt6i_protocol = RTPROT_KERNEL,
Thomas Graf101367c2006-08-04 03:39:02 -0700235 .rt6i_metric = ~(u32) 0,
236 .rt6i_ref = ATOMIC_INIT(1),
237};
238
239#endif
240
Linus Torvalds1da177e2005-04-16 15:20:36 -0700241/* allocate dst with ip6_dst_ops */
David S. Miller5c1e6aa2011-04-28 14:13:38 -0700242static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops,
David S. Miller957c6652011-06-24 15:25:00 -0700243 struct net_device *dev,
244 int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700245{
David S. Miller957c6652011-06-24 15:25:00 -0700246 struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, flags);
David S. Millercf911662011-04-28 14:31:47 -0700247
Madalin Bucurfbe58182011-09-26 07:04:56 +0000248 if (rt != NULL)
249 memset(&rt->rt6i_table, 0,
250 sizeof(*rt) - sizeof(struct dst_entry));
David S. Millercf911662011-04-28 14:31:47 -0700251
252 return rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700253}
254
255static void ip6_dst_destroy(struct dst_entry *dst)
256{
257 struct rt6_info *rt = (struct rt6_info *)dst;
258 struct inet6_dev *idev = rt->rt6i_idev;
David S. Millerb3419362010-11-30 12:27:11 -0800259 struct inet_peer *peer = rt->rt6i_peer;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700260
Yan, Zheng8e2ec632011-09-05 21:34:30 +0000261 if (!(rt->dst.flags & DST_HOST))
262 dst_destroy_metrics_generic(dst);
263
Linus Torvalds1da177e2005-04-16 15:20:36 -0700264 if (idev != NULL) {
265 rt->rt6i_idev = NULL;
266 in6_dev_put(idev);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900267 }
David S. Millerb3419362010-11-30 12:27:11 -0800268 if (peer) {
David S. Millerb3419362010-11-30 12:27:11 -0800269 rt->rt6i_peer = NULL;
270 inet_putpeer(peer);
271 }
272}
273
David S. Miller6431cbc2011-02-07 20:38:06 -0800274static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
275
276static u32 rt6_peer_genid(void)
277{
278 return atomic_read(&__rt6_peer_genid);
279}
280
David S. Millerb3419362010-11-30 12:27:11 -0800281void rt6_bind_peer(struct rt6_info *rt, int create)
282{
283 struct inet_peer *peer;
284
David S. Millerb3419362010-11-30 12:27:11 -0800285 peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
286 if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
287 inet_putpeer(peer);
David S. Miller6431cbc2011-02-07 20:38:06 -0800288 else
289 rt->rt6i_peer_genid = rt6_peer_genid();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700290}
291
292static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
293 int how)
294{
295 struct rt6_info *rt = (struct rt6_info *)dst;
296 struct inet6_dev *idev = rt->rt6i_idev;
Denis V. Lunev5a3e55d2007-12-07 00:38:10 -0800297 struct net_device *loopback_dev =
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900298 dev_net(dev)->loopback_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700299
Denis V. Lunev5a3e55d2007-12-07 00:38:10 -0800300 if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
301 struct inet6_dev *loopback_idev =
302 in6_dev_get(loopback_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700303 if (loopback_idev != NULL) {
304 rt->rt6i_idev = loopback_idev;
305 in6_dev_put(idev);
306 }
307 }
308}
309
310static __inline__ int rt6_check_expired(const struct rt6_info *rt)
311{
Eric Dumazeta02cec22010-09-22 20:43:57 +0000312 return (rt->rt6i_flags & RTF_EXPIRES) &&
313 time_after(jiffies, rt->rt6i_expires);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700314}
315
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000316static inline int rt6_need_strict(const struct in6_addr *daddr)
Thomas Grafc71099a2006-08-04 23:20:06 -0700317{
Eric Dumazeta02cec22010-09-22 20:43:57 +0000318 return ipv6_addr_type(daddr) &
319 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
Thomas Grafc71099a2006-08-04 23:20:06 -0700320}
321
Linus Torvalds1da177e2005-04-16 15:20:36 -0700322/*
Thomas Grafc71099a2006-08-04 23:20:06 -0700323 * Route lookup. Any table->tb6_lock is implied.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700324 */
325
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800326static inline struct rt6_info *rt6_device_match(struct net *net,
327 struct rt6_info *rt,
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000328 const struct in6_addr *saddr,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700329 int oif,
YOSHIFUJI Hideakid4208952008-06-27 20:14:54 -0700330 int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700331{
332 struct rt6_info *local = NULL;
333 struct rt6_info *sprt;
334
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900335 if (!oif && ipv6_addr_any(saddr))
336 goto out;
337
Changli Gaod8d1f302010-06-10 23:31:35 -0700338 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900339 struct net_device *dev = sprt->rt6i_dev;
340
341 if (oif) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700342 if (dev->ifindex == oif)
343 return sprt;
344 if (dev->flags & IFF_LOOPBACK) {
345 if (sprt->rt6i_idev == NULL ||
346 sprt->rt6i_idev->dev->ifindex != oif) {
YOSHIFUJI Hideakid4208952008-06-27 20:14:54 -0700347 if (flags & RT6_LOOKUP_F_IFACE && oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700348 continue;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900349 if (local && (!oif ||
Linus Torvalds1da177e2005-04-16 15:20:36 -0700350 local->rt6i_idev->dev->ifindex == oif))
351 continue;
352 }
353 local = sprt;
354 }
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900355 } else {
356 if (ipv6_chk_addr(net, saddr, dev,
357 flags & RT6_LOOKUP_F_IFACE))
358 return sprt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700359 }
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900360 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700361
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900362 if (oif) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700363 if (local)
364 return local;
365
YOSHIFUJI Hideakid4208952008-06-27 20:14:54 -0700366 if (flags & RT6_LOOKUP_F_IFACE)
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800367 return net->ipv6.ip6_null_entry;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700368 }
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900369out:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700370 return rt;
371}
372
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800373#ifdef CONFIG_IPV6_ROUTER_PREF
374static void rt6_probe(struct rt6_info *rt)
375{
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000376 struct neighbour *neigh;
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800377 /*
378 * Okay, this does not seem to be appropriate
379 * for now, however, we need to check if it
380 * is really so; aka Router Reachability Probing.
381 *
382 * Router Reachability Probe MUST be rate-limited
383 * to no more than one per minute.
384 */
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000385 rcu_read_lock();
386 neigh = rt ? dst_get_neighbour(&rt->dst) : NULL;
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800387 if (!neigh || (neigh->nud_state & NUD_VALID))
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000388 goto out;
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800389 read_lock_bh(&neigh->lock);
390 if (!(neigh->nud_state & NUD_VALID) &&
YOSHIFUJI Hideaki52e16352006-03-20 17:05:47 -0800391 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800392 struct in6_addr mcaddr;
393 struct in6_addr *target;
394
395 neigh->updated = jiffies;
396 read_unlock_bh(&neigh->lock);
397
398 target = (struct in6_addr *)&neigh->primary_key;
399 addrconf_addr_solict_mult(target, &mcaddr);
400 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000401 } else {
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800402 read_unlock_bh(&neigh->lock);
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000403 }
404out:
405 rcu_read_unlock();
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800406}
407#else
408static inline void rt6_probe(struct rt6_info *rt)
409{
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800410}
411#endif
412
Linus Torvalds1da177e2005-04-16 15:20:36 -0700413/*
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800414 * Default Router Selection (RFC 2461 6.3.6)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700415 */
Dave Jonesb6f99a22007-03-22 12:27:49 -0700416static inline int rt6_check_dev(struct rt6_info *rt, int oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700417{
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800418 struct net_device *dev = rt->rt6i_dev;
David S. Miller161980f2007-04-06 11:42:27 -0700419 if (!oif || dev->ifindex == oif)
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800420 return 2;
David S. Miller161980f2007-04-06 11:42:27 -0700421 if ((dev->flags & IFF_LOOPBACK) &&
422 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
423 return 1;
424 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700425}
426
Dave Jonesb6f99a22007-03-22 12:27:49 -0700427static inline int rt6_check_neigh(struct rt6_info *rt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700428{
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000429 struct neighbour *neigh;
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800430 int m;
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000431
432 rcu_read_lock();
433 neigh = dst_get_neighbour(&rt->dst);
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700434 if (rt->rt6i_flags & RTF_NONEXTHOP ||
435 !(rt->rt6i_flags & RTF_GATEWAY))
436 m = 1;
437 else if (neigh) {
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800438 read_lock_bh(&neigh->lock);
439 if (neigh->nud_state & NUD_VALID)
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700440 m = 2;
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800441#ifdef CONFIG_IPV6_ROUTER_PREF
442 else if (neigh->nud_state & NUD_FAILED)
443 m = 0;
444#endif
445 else
YOSHIFUJI Hideakiea73ee22006-11-06 09:45:44 -0800446 m = 1;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800447 read_unlock_bh(&neigh->lock);
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800448 } else
449 m = 0;
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000450 rcu_read_unlock();
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800451 return m;
452}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700453
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800454static int rt6_score_route(struct rt6_info *rt, int oif,
455 int strict)
456{
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700457 int m, n;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900458
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700459 m = rt6_check_dev(rt, oif);
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700460 if (!m && (strict & RT6_LOOKUP_F_IFACE))
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800461 return -1;
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -0800462#ifdef CONFIG_IPV6_ROUTER_PREF
463 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
464#endif
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700465 n = rt6_check_neigh(rt);
YOSHIFUJI Hideaki557e92e2006-11-06 09:45:45 -0800466 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800467 return -1;
468 return m;
469}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700470
David S. Millerf11e6652007-03-24 20:36:25 -0700471static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
472 int *mpri, struct rt6_info *match)
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800473{
David S. Millerf11e6652007-03-24 20:36:25 -0700474 int m;
475
476 if (rt6_check_expired(rt))
477 goto out;
478
479 m = rt6_score_route(rt, oif, strict);
480 if (m < 0)
481 goto out;
482
483 if (m > *mpri) {
484 if (strict & RT6_LOOKUP_F_REACHABLE)
485 rt6_probe(match);
486 *mpri = m;
487 match = rt;
488 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
489 rt6_probe(rt);
490 }
491
492out:
493 return match;
494}
495
496static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
497 struct rt6_info *rr_head,
498 u32 metric, int oif, int strict)
499{
500 struct rt6_info *rt, *match;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800501 int mpri = -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700502
David S. Millerf11e6652007-03-24 20:36:25 -0700503 match = NULL;
504 for (rt = rr_head; rt && rt->rt6i_metric == metric;
Changli Gaod8d1f302010-06-10 23:31:35 -0700505 rt = rt->dst.rt6_next)
David S. Millerf11e6652007-03-24 20:36:25 -0700506 match = find_match(rt, oif, strict, &mpri, match);
507 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
Changli Gaod8d1f302010-06-10 23:31:35 -0700508 rt = rt->dst.rt6_next)
David S. Millerf11e6652007-03-24 20:36:25 -0700509 match = find_match(rt, oif, strict, &mpri, match);
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800510
David S. Millerf11e6652007-03-24 20:36:25 -0700511 return match;
512}
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800513
David S. Millerf11e6652007-03-24 20:36:25 -0700514static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
515{
516 struct rt6_info *match, *rt0;
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800517 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700518
David S. Millerf11e6652007-03-24 20:36:25 -0700519 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
Harvey Harrison0dc47872008-03-05 20:47:47 -0800520 __func__, fn->leaf, oif);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700521
David S. Millerf11e6652007-03-24 20:36:25 -0700522 rt0 = fn->rr_ptr;
523 if (!rt0)
524 fn->rr_ptr = rt0 = fn->leaf;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700525
David S. Millerf11e6652007-03-24 20:36:25 -0700526 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700527
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800528 if (!match &&
David S. Millerf11e6652007-03-24 20:36:25 -0700529 (strict & RT6_LOOKUP_F_REACHABLE)) {
Changli Gaod8d1f302010-06-10 23:31:35 -0700530 struct rt6_info *next = rt0->dst.rt6_next;
David S. Millerf11e6652007-03-24 20:36:25 -0700531
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800532 /* no entries matched; do round-robin */
David S. Millerf11e6652007-03-24 20:36:25 -0700533 if (!next || next->rt6i_metric != rt0->rt6i_metric)
534 next = fn->leaf;
535
536 if (next != rt0)
537 fn->rr_ptr = next;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700538 }
539
David S. Millerf11e6652007-03-24 20:36:25 -0700540 RT6_TRACE("%s() => %p\n",
Harvey Harrison0dc47872008-03-05 20:47:47 -0800541 __func__, match);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700542
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900543 net = dev_net(rt0->rt6i_dev);
Eric Dumazeta02cec22010-09-22 20:43:57 +0000544 return match ? match : net->ipv6.ip6_null_entry;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700545}
546
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800547#ifdef CONFIG_IPV6_ROUTE_INFO
548int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000549 const struct in6_addr *gwaddr)
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800550{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900551 struct net *net = dev_net(dev);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800552 struct route_info *rinfo = (struct route_info *) opt;
553 struct in6_addr prefix_buf, *prefix;
554 unsigned int pref;
YOSHIFUJI Hideaki4bed72e2008-05-27 17:37:49 +0900555 unsigned long lifetime;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800556 struct rt6_info *rt;
557
558 if (len < sizeof(struct route_info)) {
559 return -EINVAL;
560 }
561
562 /* Sanity check for prefix_len and length */
563 if (rinfo->length > 3) {
564 return -EINVAL;
565 } else if (rinfo->prefix_len > 128) {
566 return -EINVAL;
567 } else if (rinfo->prefix_len > 64) {
568 if (rinfo->length < 2) {
569 return -EINVAL;
570 }
571 } else if (rinfo->prefix_len > 0) {
572 if (rinfo->length < 1) {
573 return -EINVAL;
574 }
575 }
576
577 pref = rinfo->route_pref;
578 if (pref == ICMPV6_ROUTER_PREF_INVALID)
Jens Rosenboom3933fc92009-09-10 06:25:11 +0000579 return -EINVAL;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800580
YOSHIFUJI Hideaki4bed72e2008-05-27 17:37:49 +0900581 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800582
583 if (rinfo->length == 3)
584 prefix = (struct in6_addr *)rinfo->prefix;
585 else {
586 /* this function is safe */
587 ipv6_addr_prefix(&prefix_buf,
588 (struct in6_addr *)rinfo->prefix,
589 rinfo->prefix_len);
590 prefix = &prefix_buf;
591 }
592
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -0800593 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
594 dev->ifindex);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800595
596 if (rt && !lifetime) {
Thomas Grafe0a1ad732006-08-22 00:00:21 -0700597 ip6_del_rt(rt);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800598 rt = NULL;
599 }
600
601 if (!rt && lifetime)
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -0800602 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800603 pref);
604 else if (rt)
605 rt->rt6i_flags = RTF_ROUTEINFO |
606 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
607
608 if (rt) {
YOSHIFUJI Hideaki4bed72e2008-05-27 17:37:49 +0900609 if (!addrconf_finite_timeout(lifetime)) {
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800610 rt->rt6i_flags &= ~RTF_EXPIRES;
611 } else {
612 rt->rt6i_expires = jiffies + HZ * lifetime;
613 rt->rt6i_flags |= RTF_EXPIRES;
614 }
Changli Gaod8d1f302010-06-10 23:31:35 -0700615 dst_release(&rt->dst);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800616 }
617 return 0;
618}
619#endif
620
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800621#define BACKTRACK(__net, saddr) \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700622do { \
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800623 if (rt == __net->ipv6.ip6_null_entry) { \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700624 struct fib6_node *pn; \
Ville Nuorvalae0eda7b2006-10-16 22:11:11 -0700625 while (1) { \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700626 if (fn->fn_flags & RTN_TL_ROOT) \
627 goto out; \
628 pn = fn->parent; \
629 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
Kim Nordlund8bce65b2006-12-13 16:38:29 -0800630 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700631 else \
632 fn = pn; \
633 if (fn->fn_flags & RTN_RTINFO) \
634 goto restart; \
Thomas Grafc71099a2006-08-04 23:20:06 -0700635 } \
Thomas Grafc71099a2006-08-04 23:20:06 -0700636 } \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700637} while(0)
Thomas Grafc71099a2006-08-04 23:20:06 -0700638
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800639static struct rt6_info *ip6_pol_route_lookup(struct net *net,
640 struct fib6_table *table,
David S. Miller4c9483b2011-03-12 16:22:43 -0500641 struct flowi6 *fl6, int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700642{
643 struct fib6_node *fn;
644 struct rt6_info *rt;
645
Thomas Grafc71099a2006-08-04 23:20:06 -0700646 read_lock_bh(&table->tb6_lock);
David S. Miller4c9483b2011-03-12 16:22:43 -0500647 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
Thomas Grafc71099a2006-08-04 23:20:06 -0700648restart:
649 rt = fn->leaf;
David S. Miller4c9483b2011-03-12 16:22:43 -0500650 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
651 BACKTRACK(net, &fl6->saddr);
Thomas Grafc71099a2006-08-04 23:20:06 -0700652out:
Changli Gaod8d1f302010-06-10 23:31:35 -0700653 dst_use(&rt->dst, jiffies);
Thomas Grafc71099a2006-08-04 23:20:06 -0700654 read_unlock_bh(&table->tb6_lock);
Thomas Grafc71099a2006-08-04 23:20:06 -0700655 return rt;
656
657}
658
YOSHIFUJI Hideaki9acd9f32008-04-10 15:42:10 +0900659struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
660 const struct in6_addr *saddr, int oif, int strict)
Thomas Grafc71099a2006-08-04 23:20:06 -0700661{
David S. Miller4c9483b2011-03-12 16:22:43 -0500662 struct flowi6 fl6 = {
663 .flowi6_oif = oif,
664 .daddr = *daddr,
Thomas Grafc71099a2006-08-04 23:20:06 -0700665 };
666 struct dst_entry *dst;
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700667 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
Thomas Grafc71099a2006-08-04 23:20:06 -0700668
Thomas Grafadaa70b2006-10-13 15:01:03 -0700669 if (saddr) {
David S. Miller4c9483b2011-03-12 16:22:43 -0500670 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
Thomas Grafadaa70b2006-10-13 15:01:03 -0700671 flags |= RT6_LOOKUP_F_HAS_SADDR;
672 }
673
David S. Miller4c9483b2011-03-12 16:22:43 -0500674 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
Thomas Grafc71099a2006-08-04 23:20:06 -0700675 if (dst->error == 0)
676 return (struct rt6_info *) dst;
677
678 dst_release(dst);
679
Linus Torvalds1da177e2005-04-16 15:20:36 -0700680 return NULL;
681}
682
YOSHIFUJI Hideaki71590392007-02-22 22:05:40 +0900683EXPORT_SYMBOL(rt6_lookup);
684
Thomas Grafc71099a2006-08-04 23:20:06 -0700685/* ip6_ins_rt is called with FREE table->tb6_lock.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700686 It takes new route entry, the addition fails by any reason the
687 route is freed. In any case, if caller does not hold it, it may
688 be destroyed.
689 */
690
Thomas Graf86872cb2006-08-22 00:01:08 -0700691static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700692{
693 int err;
Thomas Grafc71099a2006-08-04 23:20:06 -0700694 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700695
Thomas Grafc71099a2006-08-04 23:20:06 -0700696 table = rt->rt6i_table;
697 write_lock_bh(&table->tb6_lock);
Thomas Graf86872cb2006-08-22 00:01:08 -0700698 err = fib6_add(&table->tb6_root, rt, info);
Thomas Grafc71099a2006-08-04 23:20:06 -0700699 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700700
701 return err;
702}
703
Thomas Graf40e22e82006-08-22 00:00:45 -0700704int ip6_ins_rt(struct rt6_info *rt)
705{
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800706 struct nl_info info = {
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900707 .nl_net = dev_net(rt->rt6i_dev),
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800708 };
Denis V. Lunev528c4ce2007-12-13 09:45:12 -0800709 return __ip6_ins_rt(rt, &info);
Thomas Graf40e22e82006-08-22 00:00:45 -0700710}
711
Eric Dumazet21efcfa2011-07-19 20:18:36 +0000712static struct rt6_info *rt6_alloc_cow(const struct rt6_info *ort,
713 const struct in6_addr *daddr,
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000714 const struct in6_addr *saddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700715{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700716 struct rt6_info *rt;
717
718 /*
719 * Clone the route.
720 */
721
Eric Dumazet21efcfa2011-07-19 20:18:36 +0000722 rt = ip6_rt_copy(ort, daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700723
724 if (rt) {
David S. Miller14deae42009-01-04 16:04:39 -0800725 struct neighbour *neigh;
726 int attempts = !in_softirq();
727
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900728 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
729 if (rt->rt6i_dst.plen != 128 &&
Eric Dumazet21efcfa2011-07-19 20:18:36 +0000730 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900731 rt->rt6i_flags |= RTF_ANYCAST;
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +0000732 rt->rt6i_gateway = *daddr;
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900733 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700734
Linus Torvalds1da177e2005-04-16 15:20:36 -0700735 rt->rt6i_flags |= RTF_CACHE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700736
737#ifdef CONFIG_IPV6_SUBTREES
738 if (rt->rt6i_src.plen && saddr) {
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +0000739 rt->rt6i_src.addr = *saddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700740 rt->rt6i_src.plen = 128;
741 }
742#endif
743
David S. Miller14deae42009-01-04 16:04:39 -0800744 retry:
745 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
746 if (IS_ERR(neigh)) {
747 struct net *net = dev_net(rt->rt6i_dev);
748 int saved_rt_min_interval =
749 net->ipv6.sysctl.ip6_rt_gc_min_interval;
750 int saved_rt_elasticity =
751 net->ipv6.sysctl.ip6_rt_gc_elasticity;
752
753 if (attempts-- > 0) {
754 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
755 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
756
Alexey Dobriyan86393e52009-08-29 01:34:49 +0000757 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
David S. Miller14deae42009-01-04 16:04:39 -0800758
759 net->ipv6.sysctl.ip6_rt_gc_elasticity =
760 saved_rt_elasticity;
761 net->ipv6.sysctl.ip6_rt_gc_min_interval =
762 saved_rt_min_interval;
763 goto retry;
764 }
765
766 if (net_ratelimit())
767 printk(KERN_WARNING
Ulrich Weber7e1b33e2010-09-27 15:02:18 -0700768 "ipv6: Neighbour table overflow.\n");
Changli Gaod8d1f302010-06-10 23:31:35 -0700769 dst_free(&rt->dst);
David S. Miller14deae42009-01-04 16:04:39 -0800770 return NULL;
771 }
David S. Miller69cce1d2011-07-17 23:09:49 -0700772 dst_set_neighbour(&rt->dst, neigh);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700773
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800774 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700775
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800776 return rt;
777}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700778
Eric Dumazet21efcfa2011-07-19 20:18:36 +0000779static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
780 const struct in6_addr *daddr)
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800781{
Eric Dumazet21efcfa2011-07-19 20:18:36 +0000782 struct rt6_info *rt = ip6_rt_copy(ort, daddr);
783
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800784 if (rt) {
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800785 rt->rt6i_flags |= RTF_CACHE;
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000786 dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_raw(&ort->dst)));
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800787 }
788 return rt;
789}
790
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800791static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
David S. Miller4c9483b2011-03-12 16:22:43 -0500792 struct flowi6 *fl6, int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700793{
794 struct fib6_node *fn;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800795 struct rt6_info *rt, *nrt;
Thomas Grafc71099a2006-08-04 23:20:06 -0700796 int strict = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700797 int attempts = 3;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800798 int err;
YOSHIFUJI Hideaki53b79972008-07-19 22:35:03 -0700799 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700800
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700801 strict |= flags & RT6_LOOKUP_F_IFACE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700802
803relookup:
Thomas Grafc71099a2006-08-04 23:20:06 -0700804 read_lock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700805
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800806restart_2:
David S. Miller4c9483b2011-03-12 16:22:43 -0500807 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700808
809restart:
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700810 rt = rt6_select(fn, oif, strict | reachable);
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800811
David S. Miller4c9483b2011-03-12 16:22:43 -0500812 BACKTRACK(net, &fl6->saddr);
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800813 if (rt == net->ipv6.ip6_null_entry ||
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800814 rt->rt6i_flags & RTF_CACHE)
YOSHIFUJI Hideaki1ddef0442006-03-20 17:01:24 -0800815 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700816
Changli Gaod8d1f302010-06-10 23:31:35 -0700817 dst_hold(&rt->dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700818 read_unlock_bh(&table->tb6_lock);
YOSHIFUJI Hideakifb9de912006-03-20 16:59:08 -0800819
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000820 if (!dst_get_neighbour_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
David S. Miller4c9483b2011-03-12 16:22:43 -0500821 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
David S. Miller7343ff32011-03-09 19:55:25 -0800822 else if (!(rt->dst.flags & DST_HOST))
David S. Miller4c9483b2011-03-12 16:22:43 -0500823 nrt = rt6_alloc_clone(rt, &fl6->daddr);
David S. Miller7343ff32011-03-09 19:55:25 -0800824 else
825 goto out2;
YOSHIFUJI Hideakie40cf352006-03-20 16:59:27 -0800826
Changli Gaod8d1f302010-06-10 23:31:35 -0700827 dst_release(&rt->dst);
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800828 rt = nrt ? : net->ipv6.ip6_null_entry;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800829
Changli Gaod8d1f302010-06-10 23:31:35 -0700830 dst_hold(&rt->dst);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800831 if (nrt) {
Thomas Graf40e22e82006-08-22 00:00:45 -0700832 err = ip6_ins_rt(nrt);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800833 if (!err)
834 goto out2;
835 }
836
837 if (--attempts <= 0)
838 goto out2;
839
840 /*
Thomas Grafc71099a2006-08-04 23:20:06 -0700841 * Race condition! In the gap, when table->tb6_lock was
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800842 * released someone could insert this route. Relookup.
843 */
Changli Gaod8d1f302010-06-10 23:31:35 -0700844 dst_release(&rt->dst);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800845 goto relookup;
846
847out:
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800848 if (reachable) {
849 reachable = 0;
850 goto restart_2;
851 }
Changli Gaod8d1f302010-06-10 23:31:35 -0700852 dst_hold(&rt->dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700853 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700854out2:
Changli Gaod8d1f302010-06-10 23:31:35 -0700855 rt->dst.lastuse = jiffies;
856 rt->dst.__use++;
Thomas Grafc71099a2006-08-04 23:20:06 -0700857
858 return rt;
859}
860
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800861static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
David S. Miller4c9483b2011-03-12 16:22:43 -0500862 struct flowi6 *fl6, int flags)
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700863{
David S. Miller4c9483b2011-03-12 16:22:43 -0500864 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700865}
866
Thomas Grafc71099a2006-08-04 23:20:06 -0700867void ip6_route_input(struct sk_buff *skb)
868{
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000869 const struct ipv6hdr *iph = ipv6_hdr(skb);
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900870 struct net *net = dev_net(skb->dev);
Thomas Grafadaa70b2006-10-13 15:01:03 -0700871 int flags = RT6_LOOKUP_F_HAS_SADDR;
David S. Miller4c9483b2011-03-12 16:22:43 -0500872 struct flowi6 fl6 = {
873 .flowi6_iif = skb->dev->ifindex,
874 .daddr = iph->daddr,
875 .saddr = iph->saddr,
876 .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
877 .flowi6_mark = skb->mark,
878 .flowi6_proto = iph->nexthdr,
Thomas Grafc71099a2006-08-04 23:20:06 -0700879 };
Thomas Grafadaa70b2006-10-13 15:01:03 -0700880
Thomas Goff1d6e55f2009-01-27 22:39:59 -0800881 if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
Thomas Grafadaa70b2006-10-13 15:01:03 -0700882 flags |= RT6_LOOKUP_F_IFACE;
Thomas Grafc71099a2006-08-04 23:20:06 -0700883
David S. Miller4c9483b2011-03-12 16:22:43 -0500884 skb_dst_set(skb, fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_input));
Thomas Grafc71099a2006-08-04 23:20:06 -0700885}
886
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800887static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
David S. Miller4c9483b2011-03-12 16:22:43 -0500888 struct flowi6 *fl6, int flags)
Thomas Grafc71099a2006-08-04 23:20:06 -0700889{
David S. Miller4c9483b2011-03-12 16:22:43 -0500890 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
Thomas Grafc71099a2006-08-04 23:20:06 -0700891}
892
Florian Westphal9c7a4f92011-03-22 19:17:36 -0700893struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
David S. Miller4c9483b2011-03-12 16:22:43 -0500894 struct flowi6 *fl6)
Thomas Grafc71099a2006-08-04 23:20:06 -0700895{
896 int flags = 0;
897
David S. Miller4c9483b2011-03-12 16:22:43 -0500898 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700899 flags |= RT6_LOOKUP_F_IFACE;
Thomas Grafc71099a2006-08-04 23:20:06 -0700900
David S. Miller4c9483b2011-03-12 16:22:43 -0500901 if (!ipv6_addr_any(&fl6->saddr))
Thomas Grafadaa70b2006-10-13 15:01:03 -0700902 flags |= RT6_LOOKUP_F_HAS_SADDR;
YOSHIFUJI Hideaki / 吉藤英明0c9a2ac2010-03-07 00:14:44 +0000903 else if (sk)
904 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
Thomas Grafadaa70b2006-10-13 15:01:03 -0700905
David S. Miller4c9483b2011-03-12 16:22:43 -0500906 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700907}
908
YOSHIFUJI Hideaki71590392007-02-22 22:05:40 +0900909EXPORT_SYMBOL(ip6_route_output);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700910
David S. Miller2774c132011-03-01 14:59:04 -0800911struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
David S. Miller14e50e52007-05-24 18:17:54 -0700912{
David S. Miller5c1e6aa2011-04-28 14:13:38 -0700913 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
David S. Miller14e50e52007-05-24 18:17:54 -0700914 struct dst_entry *new = NULL;
915
David S. Miller5c1e6aa2011-04-28 14:13:38 -0700916 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0);
David S. Miller14e50e52007-05-24 18:17:54 -0700917 if (rt) {
David S. Millercf911662011-04-28 14:31:47 -0700918 memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
919
Changli Gaod8d1f302010-06-10 23:31:35 -0700920 new = &rt->dst;
David S. Miller14e50e52007-05-24 18:17:54 -0700921
David S. Miller14e50e52007-05-24 18:17:54 -0700922 new->__use = 1;
Herbert Xu352e5122007-11-13 21:34:06 -0800923 new->input = dst_discard;
924 new->output = dst_discard;
David S. Miller14e50e52007-05-24 18:17:54 -0700925
Eric Dumazet21efcfa2011-07-19 20:18:36 +0000926 if (dst_metrics_read_only(&ort->dst))
927 new->_metrics = ort->dst._metrics;
928 else
929 dst_copy_metrics(new, &ort->dst);
David S. Miller14e50e52007-05-24 18:17:54 -0700930 rt->rt6i_idev = ort->rt6i_idev;
931 if (rt->rt6i_idev)
932 in6_dev_hold(rt->rt6i_idev);
933 rt->rt6i_expires = 0;
934
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +0000935 rt->rt6i_gateway = ort->rt6i_gateway;
David S. Miller14e50e52007-05-24 18:17:54 -0700936 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
937 rt->rt6i_metric = 0;
938
939 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
940#ifdef CONFIG_IPV6_SUBTREES
941 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
942#endif
943
944 dst_free(new);
945 }
946
David S. Miller69ead7a2011-03-01 14:45:33 -0800947 dst_release(dst_orig);
948 return new ? new : ERR_PTR(-ENOMEM);
David S. Miller14e50e52007-05-24 18:17:54 -0700949}
David S. Miller14e50e52007-05-24 18:17:54 -0700950
Linus Torvalds1da177e2005-04-16 15:20:36 -0700951/*
952 * Destination cache support functions
953 */
954
955static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
956{
957 struct rt6_info *rt;
958
959 rt = (struct rt6_info *) dst;
960
David S. Miller6431cbc2011-02-07 20:38:06 -0800961 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
962 if (rt->rt6i_peer_genid != rt6_peer_genid()) {
963 if (!rt->rt6i_peer)
964 rt6_bind_peer(rt, 0);
965 rt->rt6i_peer_genid = rt6_peer_genid();
966 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700967 return dst;
David S. Miller6431cbc2011-02-07 20:38:06 -0800968 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700969 return NULL;
970}
971
972static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
973{
974 struct rt6_info *rt = (struct rt6_info *) dst;
975
976 if (rt) {
YOSHIFUJI Hideaki / 吉藤英明54c1a852010-03-28 07:15:45 +0000977 if (rt->rt6i_flags & RTF_CACHE) {
978 if (rt6_check_expired(rt)) {
979 ip6_del_rt(rt);
980 dst = NULL;
981 }
982 } else {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700983 dst_release(dst);
YOSHIFUJI Hideaki / 吉藤英明54c1a852010-03-28 07:15:45 +0000984 dst = NULL;
985 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700986 }
YOSHIFUJI Hideaki / 吉藤英明54c1a852010-03-28 07:15:45 +0000987 return dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700988}
989
990static void ip6_link_failure(struct sk_buff *skb)
991{
992 struct rt6_info *rt;
993
Alexey Dobriyan3ffe5332010-02-18 08:25:24 +0000994 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700995
Eric Dumazetadf30902009-06-02 05:19:30 +0000996 rt = (struct rt6_info *) skb_dst(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700997 if (rt) {
998 if (rt->rt6i_flags&RTF_CACHE) {
Changli Gaod8d1f302010-06-10 23:31:35 -0700999 dst_set_expires(&rt->dst, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001000 rt->rt6i_flags |= RTF_EXPIRES;
1001 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1002 rt->rt6i_node->fn_sernum = -1;
1003 }
1004}
1005
1006static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1007{
1008 struct rt6_info *rt6 = (struct rt6_info*)dst;
1009
1010 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1011 rt6->rt6i_flags |= RTF_MODIFIED;
1012 if (mtu < IPV6_MIN_MTU) {
David S. Millerdefb3512010-12-08 21:16:57 -08001013 u32 features = dst_metric(dst, RTAX_FEATURES);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001014 mtu = IPV6_MIN_MTU;
David S. Millerdefb3512010-12-08 21:16:57 -08001015 features |= RTAX_FEATURE_ALLFRAG;
1016 dst_metric_set(dst, RTAX_FEATURES, features);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001017 }
David S. Millerdefb3512010-12-08 21:16:57 -08001018 dst_metric_set(dst, RTAX_MTU, mtu);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001019 }
1020}
1021
David S. Miller0dbaee32010-12-13 12:52:14 -08001022static unsigned int ip6_default_advmss(const struct dst_entry *dst)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001023{
David S. Miller0dbaee32010-12-13 12:52:14 -08001024 struct net_device *dev = dst->dev;
1025 unsigned int mtu = dst_mtu(dst);
1026 struct net *net = dev_net(dev);
1027
Linus Torvalds1da177e2005-04-16 15:20:36 -07001028 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1029
Daniel Lezcano55786892008-03-04 13:47:47 -08001030 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1031 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001032
1033 /*
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001034 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1035 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1036 * IPV6_MAXPLEN is also valid and means: "any MSS,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001037 * rely only on pmtu discovery"
1038 */
1039 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1040 mtu = IPV6_MAXPLEN;
1041 return mtu;
1042}
1043
David S. Millerd33e4552010-12-14 13:01:14 -08001044static unsigned int ip6_default_mtu(const struct dst_entry *dst)
1045{
1046 unsigned int mtu = IPV6_MIN_MTU;
1047 struct inet6_dev *idev;
1048
1049 rcu_read_lock();
1050 idev = __in6_dev_get(dst->dev);
1051 if (idev)
1052 mtu = idev->cnf.mtu6;
1053 rcu_read_unlock();
1054
1055 return mtu;
1056}
1057
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001058static struct dst_entry *icmp6_dst_gc_list;
1059static DEFINE_SPINLOCK(icmp6_dst_lock);
Thomas Graf5d0bbee2006-08-04 03:37:36 -07001060
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001061struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001062 struct neighbour *neigh,
YOSHIFUJI Hideaki9acd9f32008-04-10 15:42:10 +09001063 const struct in6_addr *addr)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001064{
1065 struct rt6_info *rt;
1066 struct inet6_dev *idev = in6_dev_get(dev);
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001067 struct net *net = dev_net(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001068
1069 if (unlikely(idev == NULL))
1070 return NULL;
1071
David S. Miller957c6652011-06-24 15:25:00 -07001072 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001073 if (unlikely(rt == NULL)) {
1074 in6_dev_put(idev);
1075 goto out;
1076 }
1077
Linus Torvalds1da177e2005-04-16 15:20:36 -07001078 if (neigh)
1079 neigh_hold(neigh);
David S. Miller14deae42009-01-04 16:04:39 -08001080 else {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001081 neigh = ndisc_get_neigh(dev, addr);
David S. Miller14deae42009-01-04 16:04:39 -08001082 if (IS_ERR(neigh))
1083 neigh = NULL;
1084 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001085
Yan, Zheng8e2ec632011-09-05 21:34:30 +00001086 rt->dst.flags |= DST_HOST;
1087 rt->dst.output = ip6_output;
David S. Miller69cce1d2011-07-17 23:09:49 -07001088 dst_set_neighbour(&rt->dst, neigh);
Changli Gaod8d1f302010-06-10 23:31:35 -07001089 atomic_set(&rt->dst.__refcnt, 1);
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00001090 rt->rt6i_dst.addr = *addr;
Yan, Zheng8e2ec632011-09-05 21:34:30 +00001091 rt->rt6i_dst.plen = 128;
1092 rt->rt6i_idev = idev;
Gao feng70116872011-10-28 02:46:57 +00001093 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001094
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001095 spin_lock_bh(&icmp6_dst_lock);
Changli Gaod8d1f302010-06-10 23:31:35 -07001096 rt->dst.next = icmp6_dst_gc_list;
1097 icmp6_dst_gc_list = &rt->dst;
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001098 spin_unlock_bh(&icmp6_dst_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001099
Daniel Lezcano55786892008-03-04 13:47:47 -08001100 fib6_force_start_gc(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001101
1102out:
Changli Gaod8d1f302010-06-10 23:31:35 -07001103 return &rt->dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001104}
1105
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -07001106int icmp6_dst_gc(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001107{
Hagen Paul Pfeifere9476e92011-02-25 05:45:19 +00001108 struct dst_entry *dst, **pprev;
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -07001109 int more = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001110
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001111 spin_lock_bh(&icmp6_dst_lock);
1112 pprev = &icmp6_dst_gc_list;
Thomas Graf5d0bbee2006-08-04 03:37:36 -07001113
Linus Torvalds1da177e2005-04-16 15:20:36 -07001114 while ((dst = *pprev) != NULL) {
1115 if (!atomic_read(&dst->__refcnt)) {
1116 *pprev = dst->next;
1117 dst_free(dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001118 } else {
1119 pprev = &dst->next;
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -07001120 ++more;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001121 }
1122 }
1123
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001124 spin_unlock_bh(&icmp6_dst_lock);
Thomas Graf5d0bbee2006-08-04 03:37:36 -07001125
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -07001126 return more;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001127}
1128
David S. Miller1e493d12008-09-10 17:27:15 -07001129static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1130 void *arg)
1131{
1132 struct dst_entry *dst, **pprev;
1133
1134 spin_lock_bh(&icmp6_dst_lock);
1135 pprev = &icmp6_dst_gc_list;
1136 while ((dst = *pprev) != NULL) {
1137 struct rt6_info *rt = (struct rt6_info *) dst;
1138 if (func(rt, arg)) {
1139 *pprev = dst->next;
1140 dst_free(dst);
1141 } else {
1142 pprev = &dst->next;
1143 }
1144 }
1145 spin_unlock_bh(&icmp6_dst_lock);
1146}
1147
Daniel Lezcano569d3642008-01-18 03:56:57 -08001148static int ip6_dst_gc(struct dst_ops *ops)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001149{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001150 unsigned long now = jiffies;
Alexey Dobriyan86393e52009-08-29 01:34:49 +00001151 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
Daniel Lezcano7019b782008-03-04 13:50:14 -08001152 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1153 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1154 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1155 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1156 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
Eric Dumazetfc66f952010-10-08 06:37:34 +00001157 int entries;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001158
Eric Dumazetfc66f952010-10-08 06:37:34 +00001159 entries = dst_entries_get_fast(ops);
Daniel Lezcano7019b782008-03-04 13:50:14 -08001160 if (time_after(rt_last_gc + rt_min_interval, now) &&
Eric Dumazetfc66f952010-10-08 06:37:34 +00001161 entries <= rt_max_size)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001162 goto out;
1163
Benjamin Thery6891a342008-03-04 13:49:47 -08001164 net->ipv6.ip6_rt_gc_expire++;
1165 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1166 net->ipv6.ip6_rt_last_gc = now;
Eric Dumazetfc66f952010-10-08 06:37:34 +00001167 entries = dst_entries_get_slow(ops);
1168 if (entries < ops->gc_thresh)
Daniel Lezcano7019b782008-03-04 13:50:14 -08001169 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001170out:
Daniel Lezcano7019b782008-03-04 13:50:14 -08001171 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
Eric Dumazetfc66f952010-10-08 06:37:34 +00001172 return entries > rt_max_size;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001173}
1174
1175/* Clean host part of a prefix. Not necessary in radix tree,
1176 but results in cleaner routing tables.
1177
1178 Remove it only when all the things will work!
1179 */
1180
YOSHIFUJI Hideaki6b75d092008-03-10 06:00:30 -04001181int ip6_dst_hoplimit(struct dst_entry *dst)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001182{
David S. Miller5170ae82010-12-12 21:35:57 -08001183 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
David S. Millera02e4b72010-12-12 21:39:02 -08001184 if (hoplimit == 0) {
YOSHIFUJI Hideaki6b75d092008-03-10 06:00:30 -04001185 struct net_device *dev = dst->dev;
Eric Dumazetc68f24c2010-06-14 04:46:20 +00001186 struct inet6_dev *idev;
1187
1188 rcu_read_lock();
1189 idev = __in6_dev_get(dev);
1190 if (idev)
YOSHIFUJI Hideaki6b75d092008-03-10 06:00:30 -04001191 hoplimit = idev->cnf.hop_limit;
Eric Dumazetc68f24c2010-06-14 04:46:20 +00001192 else
YOSHIFUJI Hideaki53b79972008-07-19 22:35:03 -07001193 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
Eric Dumazetc68f24c2010-06-14 04:46:20 +00001194 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001195 }
1196 return hoplimit;
1197}
David S. Millerabbf46a2010-12-12 21:14:46 -08001198EXPORT_SYMBOL(ip6_dst_hoplimit);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001199
1200/*
1201 *
1202 */
1203
Thomas Graf86872cb2006-08-22 00:01:08 -07001204int ip6_route_add(struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001205{
1206 int err;
Daniel Lezcano55786892008-03-04 13:47:47 -08001207 struct net *net = cfg->fc_nlinfo.nl_net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001208 struct rt6_info *rt = NULL;
1209 struct net_device *dev = NULL;
1210 struct inet6_dev *idev = NULL;
Thomas Grafc71099a2006-08-04 23:20:06 -07001211 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001212 int addr_type;
1213
Thomas Graf86872cb2006-08-22 00:01:08 -07001214 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001215 return -EINVAL;
1216#ifndef CONFIG_IPV6_SUBTREES
Thomas Graf86872cb2006-08-22 00:01:08 -07001217 if (cfg->fc_src_len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001218 return -EINVAL;
1219#endif
Thomas Graf86872cb2006-08-22 00:01:08 -07001220 if (cfg->fc_ifindex) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001221 err = -ENODEV;
Daniel Lezcano55786892008-03-04 13:47:47 -08001222 dev = dev_get_by_index(net, cfg->fc_ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001223 if (!dev)
1224 goto out;
1225 idev = in6_dev_get(dev);
1226 if (!idev)
1227 goto out;
1228 }
1229
Thomas Graf86872cb2006-08-22 00:01:08 -07001230 if (cfg->fc_metric == 0)
1231 cfg->fc_metric = IP6_RT_PRIO_USER;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001232
Matti Vaittinend71314b2011-11-14 00:14:49 +00001233 err = -ENOBUFS;
1234 if (NULL != cfg->fc_nlinfo.nlh &&
1235 !(cfg->fc_nlinfo.nlh->nlmsg_flags&NLM_F_CREATE)) {
1236 table = fib6_get_table(net, cfg->fc_table);
1237 if (table == NULL) {
1238 printk(KERN_WARNING "IPv6: NLM_F_CREATE should be specified when creating new route\n");
1239 table = fib6_new_table(net, cfg->fc_table);
1240 }
1241 } else {
1242 table = fib6_new_table(net, cfg->fc_table);
1243 }
Thomas Grafc71099a2006-08-04 23:20:06 -07001244 if (table == NULL) {
Thomas Grafc71099a2006-08-04 23:20:06 -07001245 goto out;
1246 }
1247
David S. Miller957c6652011-06-24 15:25:00 -07001248 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL, DST_NOCOUNT);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001249
1250 if (rt == NULL) {
1251 err = -ENOMEM;
1252 goto out;
1253 }
1254
Changli Gaod8d1f302010-06-10 23:31:35 -07001255 rt->dst.obsolete = -1;
YOSHIFUJI Hideaki6f704992008-05-19 16:56:11 -07001256 rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
1257 jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1258 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001259
Thomas Graf86872cb2006-08-22 00:01:08 -07001260 if (cfg->fc_protocol == RTPROT_UNSPEC)
1261 cfg->fc_protocol = RTPROT_BOOT;
1262 rt->rt6i_protocol = cfg->fc_protocol;
1263
1264 addr_type = ipv6_addr_type(&cfg->fc_dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001265
1266 if (addr_type & IPV6_ADDR_MULTICAST)
Changli Gaod8d1f302010-06-10 23:31:35 -07001267 rt->dst.input = ip6_mc_input;
Maciej Żenczykowskiab79ad12010-09-27 00:07:02 +00001268 else if (cfg->fc_flags & RTF_LOCAL)
1269 rt->dst.input = ip6_input;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001270 else
Changli Gaod8d1f302010-06-10 23:31:35 -07001271 rt->dst.input = ip6_forward;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001272
Changli Gaod8d1f302010-06-10 23:31:35 -07001273 rt->dst.output = ip6_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001274
Thomas Graf86872cb2006-08-22 00:01:08 -07001275 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1276 rt->rt6i_dst.plen = cfg->fc_dst_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001277 if (rt->rt6i_dst.plen == 128)
David S. Miller11d53b42011-06-24 15:23:34 -07001278 rt->dst.flags |= DST_HOST;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001279
Yan, Zheng8e2ec632011-09-05 21:34:30 +00001280 if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1281 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1282 if (!metrics) {
1283 err = -ENOMEM;
1284 goto out;
1285 }
1286 dst_init_metrics(&rt->dst, metrics, 0);
1287 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001288#ifdef CONFIG_IPV6_SUBTREES
Thomas Graf86872cb2006-08-22 00:01:08 -07001289 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1290 rt->rt6i_src.plen = cfg->fc_src_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001291#endif
1292
Thomas Graf86872cb2006-08-22 00:01:08 -07001293 rt->rt6i_metric = cfg->fc_metric;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001294
1295 /* We cannot add true routes via loopback here,
1296 they would result in kernel looping; promote them to reject routes
1297 */
Thomas Graf86872cb2006-08-22 00:01:08 -07001298 if ((cfg->fc_flags & RTF_REJECT) ||
Maciej Żenczykowskiab79ad12010-09-27 00:07:02 +00001299 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK)
1300 && !(cfg->fc_flags&RTF_LOCAL))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001301 /* hold loopback dev/idev if we haven't done so. */
Daniel Lezcano55786892008-03-04 13:47:47 -08001302 if (dev != net->loopback_dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001303 if (dev) {
1304 dev_put(dev);
1305 in6_dev_put(idev);
1306 }
Daniel Lezcano55786892008-03-04 13:47:47 -08001307 dev = net->loopback_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001308 dev_hold(dev);
1309 idev = in6_dev_get(dev);
1310 if (!idev) {
1311 err = -ENODEV;
1312 goto out;
1313 }
1314 }
Changli Gaod8d1f302010-06-10 23:31:35 -07001315 rt->dst.output = ip6_pkt_discard_out;
1316 rt->dst.input = ip6_pkt_discard;
1317 rt->dst.error = -ENETUNREACH;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001318 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1319 goto install_route;
1320 }
1321
Thomas Graf86872cb2006-08-22 00:01:08 -07001322 if (cfg->fc_flags & RTF_GATEWAY) {
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001323 const struct in6_addr *gw_addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001324 int gwa_type;
1325
Thomas Graf86872cb2006-08-22 00:01:08 -07001326 gw_addr = &cfg->fc_gateway;
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00001327 rt->rt6i_gateway = *gw_addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001328 gwa_type = ipv6_addr_type(gw_addr);
1329
1330 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1331 struct rt6_info *grt;
1332
1333 /* IPv6 strictly inhibits using not link-local
1334 addresses as nexthop address.
1335 Otherwise, router will not able to send redirects.
1336 It is very good, but in some (rare!) circumstances
1337 (SIT, PtP, NBMA NOARP links) it is handy to allow
1338 some exceptions. --ANK
1339 */
1340 err = -EINVAL;
1341 if (!(gwa_type&IPV6_ADDR_UNICAST))
1342 goto out;
1343
Daniel Lezcano55786892008-03-04 13:47:47 -08001344 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001345
1346 err = -EHOSTUNREACH;
1347 if (grt == NULL)
1348 goto out;
1349 if (dev) {
1350 if (dev != grt->rt6i_dev) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001351 dst_release(&grt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001352 goto out;
1353 }
1354 } else {
1355 dev = grt->rt6i_dev;
1356 idev = grt->rt6i_idev;
1357 dev_hold(dev);
1358 in6_dev_hold(grt->rt6i_idev);
1359 }
1360 if (!(grt->rt6i_flags&RTF_GATEWAY))
1361 err = 0;
Changli Gaod8d1f302010-06-10 23:31:35 -07001362 dst_release(&grt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001363
1364 if (err)
1365 goto out;
1366 }
1367 err = -EINVAL;
1368 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1369 goto out;
1370 }
1371
1372 err = -ENODEV;
1373 if (dev == NULL)
1374 goto out;
1375
Daniel Walterc3968a82011-04-13 21:10:57 +00001376 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1377 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1378 err = -EINVAL;
1379 goto out;
1380 }
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00001381 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
Daniel Walterc3968a82011-04-13 21:10:57 +00001382 rt->rt6i_prefsrc.plen = 128;
1383 } else
1384 rt->rt6i_prefsrc.plen = 0;
1385
Thomas Graf86872cb2006-08-22 00:01:08 -07001386 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
David S. Miller69cce1d2011-07-17 23:09:49 -07001387 struct neighbour *n = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1388 if (IS_ERR(n)) {
1389 err = PTR_ERR(n);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001390 goto out;
1391 }
David S. Miller69cce1d2011-07-17 23:09:49 -07001392 dst_set_neighbour(&rt->dst, n);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001393 }
1394
Thomas Graf86872cb2006-08-22 00:01:08 -07001395 rt->rt6i_flags = cfg->fc_flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001396
1397install_route:
Thomas Graf86872cb2006-08-22 00:01:08 -07001398 if (cfg->fc_mx) {
1399 struct nlattr *nla;
1400 int remaining;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001401
Thomas Graf86872cb2006-08-22 00:01:08 -07001402 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
Thomas Graf8f4c1f92007-09-12 14:44:36 +02001403 int type = nla_type(nla);
Thomas Graf86872cb2006-08-22 00:01:08 -07001404
1405 if (type) {
1406 if (type > RTAX_MAX) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001407 err = -EINVAL;
1408 goto out;
1409 }
Thomas Graf86872cb2006-08-22 00:01:08 -07001410
David S. Millerdefb3512010-12-08 21:16:57 -08001411 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001412 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001413 }
1414 }
1415
Changli Gaod8d1f302010-06-10 23:31:35 -07001416 rt->dst.dev = dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001417 rt->rt6i_idev = idev;
Thomas Grafc71099a2006-08-04 23:20:06 -07001418 rt->rt6i_table = table;
Daniel Lezcano63152fc2008-03-03 23:31:11 -08001419
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001420 cfg->fc_nlinfo.nl_net = dev_net(dev);
Daniel Lezcano63152fc2008-03-03 23:31:11 -08001421
Thomas Graf86872cb2006-08-22 00:01:08 -07001422 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001423
1424out:
1425 if (dev)
1426 dev_put(dev);
1427 if (idev)
1428 in6_dev_put(idev);
1429 if (rt)
Changli Gaod8d1f302010-06-10 23:31:35 -07001430 dst_free(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001431 return err;
1432}
1433
Thomas Graf86872cb2006-08-22 00:01:08 -07001434static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001435{
1436 int err;
Thomas Grafc71099a2006-08-04 23:20:06 -07001437 struct fib6_table *table;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001438 struct net *net = dev_net(rt->rt6i_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001439
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001440 if (rt == net->ipv6.ip6_null_entry)
Patrick McHardy6c813a72006-08-06 22:22:47 -07001441 return -ENOENT;
1442
Thomas Grafc71099a2006-08-04 23:20:06 -07001443 table = rt->rt6i_table;
1444 write_lock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001445
Thomas Graf86872cb2006-08-22 00:01:08 -07001446 err = fib6_del(rt, info);
Changli Gaod8d1f302010-06-10 23:31:35 -07001447 dst_release(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001448
Thomas Grafc71099a2006-08-04 23:20:06 -07001449 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001450
1451 return err;
1452}
1453
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001454int ip6_del_rt(struct rt6_info *rt)
1455{
Denis V. Lunev4d1169c2008-01-10 03:26:13 -08001456 struct nl_info info = {
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001457 .nl_net = dev_net(rt->rt6i_dev),
Denis V. Lunev4d1169c2008-01-10 03:26:13 -08001458 };
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08001459 return __ip6_del_rt(rt, &info);
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001460}
1461
Thomas Graf86872cb2006-08-22 00:01:08 -07001462static int ip6_route_del(struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001463{
Thomas Grafc71099a2006-08-04 23:20:06 -07001464 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001465 struct fib6_node *fn;
1466 struct rt6_info *rt;
1467 int err = -ESRCH;
1468
Daniel Lezcano55786892008-03-04 13:47:47 -08001469 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
Thomas Grafc71099a2006-08-04 23:20:06 -07001470 if (table == NULL)
1471 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001472
Thomas Grafc71099a2006-08-04 23:20:06 -07001473 read_lock_bh(&table->tb6_lock);
1474
1475 fn = fib6_locate(&table->tb6_root,
Thomas Graf86872cb2006-08-22 00:01:08 -07001476 &cfg->fc_dst, cfg->fc_dst_len,
1477 &cfg->fc_src, cfg->fc_src_len);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001478
Linus Torvalds1da177e2005-04-16 15:20:36 -07001479 if (fn) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001480 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
Thomas Graf86872cb2006-08-22 00:01:08 -07001481 if (cfg->fc_ifindex &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001482 (rt->rt6i_dev == NULL ||
Thomas Graf86872cb2006-08-22 00:01:08 -07001483 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001484 continue;
Thomas Graf86872cb2006-08-22 00:01:08 -07001485 if (cfg->fc_flags & RTF_GATEWAY &&
1486 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001487 continue;
Thomas Graf86872cb2006-08-22 00:01:08 -07001488 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001489 continue;
Changli Gaod8d1f302010-06-10 23:31:35 -07001490 dst_hold(&rt->dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001491 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001492
Thomas Graf86872cb2006-08-22 00:01:08 -07001493 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001494 }
1495 }
Thomas Grafc71099a2006-08-04 23:20:06 -07001496 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001497
1498 return err;
1499}
1500
1501/*
1502 * Handle redirects
1503 */
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001504struct ip6rd_flowi {
David S. Miller4c9483b2011-03-12 16:22:43 -05001505 struct flowi6 fl6;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001506 struct in6_addr gateway;
1507};
Linus Torvalds1da177e2005-04-16 15:20:36 -07001508
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001509static struct rt6_info *__ip6_route_redirect(struct net *net,
1510 struct fib6_table *table,
David S. Miller4c9483b2011-03-12 16:22:43 -05001511 struct flowi6 *fl6,
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001512 int flags)
1513{
David S. Miller4c9483b2011-03-12 16:22:43 -05001514 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001515 struct rt6_info *rt;
1516 struct fib6_node *fn;
Thomas Grafc71099a2006-08-04 23:20:06 -07001517
Linus Torvalds1da177e2005-04-16 15:20:36 -07001518 /*
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001519 * Get the "current" route for this destination and
1520 * check if the redirect has come from approriate router.
1521 *
1522 * RFC 2461 specifies that redirects should only be
1523 * accepted if they come from the nexthop to the target.
1524 * Due to the way the routes are chosen, this notion
1525 * is a bit fuzzy and one might need to check all possible
1526 * routes.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001527 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001528
Thomas Grafc71099a2006-08-04 23:20:06 -07001529 read_lock_bh(&table->tb6_lock);
David S. Miller4c9483b2011-03-12 16:22:43 -05001530 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001531restart:
Changli Gaod8d1f302010-06-10 23:31:35 -07001532 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001533 /*
1534 * Current route is on-link; redirect is always invalid.
1535 *
1536 * Seems, previous statement is not true. It could
1537 * be node, which looks for us as on-link (f.e. proxy ndisc)
1538 * But then router serving it might decide, that we should
1539 * know truth 8)8) --ANK (980726).
1540 */
1541 if (rt6_check_expired(rt))
1542 continue;
1543 if (!(rt->rt6i_flags & RTF_GATEWAY))
1544 continue;
David S. Miller4c9483b2011-03-12 16:22:43 -05001545 if (fl6->flowi6_oif != rt->rt6i_dev->ifindex)
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001546 continue;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001547 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001548 continue;
1549 break;
1550 }
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001551
YOSHIFUJI Hideakicb15d9c2006-08-23 17:23:11 -07001552 if (!rt)
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001553 rt = net->ipv6.ip6_null_entry;
David S. Miller4c9483b2011-03-12 16:22:43 -05001554 BACKTRACK(net, &fl6->saddr);
YOSHIFUJI Hideakicb15d9c2006-08-23 17:23:11 -07001555out:
Changli Gaod8d1f302010-06-10 23:31:35 -07001556 dst_hold(&rt->dst);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001557
1558 read_unlock_bh(&table->tb6_lock);
1559
1560 return rt;
1561};
1562
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001563static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest,
1564 const struct in6_addr *src,
1565 const struct in6_addr *gateway,
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001566 struct net_device *dev)
1567{
Thomas Grafadaa70b2006-10-13 15:01:03 -07001568 int flags = RT6_LOOKUP_F_HAS_SADDR;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001569 struct net *net = dev_net(dev);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001570 struct ip6rd_flowi rdfl = {
David S. Miller4c9483b2011-03-12 16:22:43 -05001571 .fl6 = {
1572 .flowi6_oif = dev->ifindex,
1573 .daddr = *dest,
1574 .saddr = *src,
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001575 },
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001576 };
Thomas Grafadaa70b2006-10-13 15:01:03 -07001577
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00001578 rdfl.gateway = *gateway;
Brian Haley86c36ce2009-10-07 13:58:01 -07001579
Thomas Grafadaa70b2006-10-13 15:01:03 -07001580 if (rt6_need_strict(dest))
1581 flags |= RT6_LOOKUP_F_IFACE;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001582
David S. Miller4c9483b2011-03-12 16:22:43 -05001583 return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6,
Daniel Lezcano58f09b72008-03-03 23:25:27 -08001584 flags, __ip6_route_redirect);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001585}
1586
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001587void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
1588 const struct in6_addr *saddr,
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001589 struct neighbour *neigh, u8 *lladdr, int on_link)
1590{
1591 struct rt6_info *rt, *nrt = NULL;
1592 struct netevent_redirect netevent;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001593 struct net *net = dev_net(neigh->dev);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001594
1595 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1596
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001597 if (rt == net->ipv6.ip6_null_entry) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001598 if (net_ratelimit())
1599 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1600 "for redirect target\n");
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001601 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001602 }
1603
Linus Torvalds1da177e2005-04-16 15:20:36 -07001604 /*
1605 * We have finally decided to accept it.
1606 */
1607
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001608 neigh_update(neigh, lladdr, NUD_STALE,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001609 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1610 NEIGH_UPDATE_F_OVERRIDE|
1611 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1612 NEIGH_UPDATE_F_ISROUTER))
1613 );
1614
1615 /*
1616 * Redirect received -> path was valid.
1617 * Look, redirects are sent only in response to data packets,
1618 * so that this nexthop apparently is reachable. --ANK
1619 */
Changli Gaod8d1f302010-06-10 23:31:35 -07001620 dst_confirm(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001621
1622 /* Duplicate redirect: silently ignore. */
Eric Dumazetf2c31e32011-07-29 19:00:53 +00001623 if (neigh == dst_get_neighbour_raw(&rt->dst))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001624 goto out;
1625
Eric Dumazet21efcfa2011-07-19 20:18:36 +00001626 nrt = ip6_rt_copy(rt, dest);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001627 if (nrt == NULL)
1628 goto out;
1629
1630 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1631 if (on_link)
1632 nrt->rt6i_flags &= ~RTF_GATEWAY;
1633
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00001634 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
David S. Miller69cce1d2011-07-17 23:09:49 -07001635 dst_set_neighbour(&nrt->dst, neigh_clone(neigh));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001636
Thomas Graf40e22e82006-08-22 00:00:45 -07001637 if (ip6_ins_rt(nrt))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001638 goto out;
1639
Changli Gaod8d1f302010-06-10 23:31:35 -07001640 netevent.old = &rt->dst;
1641 netevent.new = &nrt->dst;
Tom Tucker8d717402006-07-30 20:43:36 -07001642 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1643
Linus Torvalds1da177e2005-04-16 15:20:36 -07001644 if (rt->rt6i_flags&RTF_CACHE) {
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001645 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001646 return;
1647 }
1648
1649out:
Changli Gaod8d1f302010-06-10 23:31:35 -07001650 dst_release(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001651}
1652
1653/*
1654 * Handle ICMP "packet too big" messages
1655 * i.e. Path MTU discovery
1656 */
1657
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001658static void rt6_do_pmtu_disc(const struct in6_addr *daddr, const struct in6_addr *saddr,
Maciej Żenczykowskiae878ae2010-10-03 14:49:00 -07001659 struct net *net, u32 pmtu, int ifindex)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001660{
1661 struct rt6_info *rt, *nrt;
1662 int allfrag = 0;
Andrey Vagind3052b52010-12-11 15:20:11 +00001663again:
Maciej Żenczykowskiae878ae2010-10-03 14:49:00 -07001664 rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001665 if (rt == NULL)
1666 return;
1667
Andrey Vagind3052b52010-12-11 15:20:11 +00001668 if (rt6_check_expired(rt)) {
1669 ip6_del_rt(rt);
1670 goto again;
1671 }
1672
Changli Gaod8d1f302010-06-10 23:31:35 -07001673 if (pmtu >= dst_mtu(&rt->dst))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001674 goto out;
1675
1676 if (pmtu < IPV6_MIN_MTU) {
1677 /*
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001678 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
Linus Torvalds1da177e2005-04-16 15:20:36 -07001679 * MTU (1280) and a fragment header should always be included
1680 * after a node receiving Too Big message reporting PMTU is
1681 * less than the IPv6 Minimum Link MTU.
1682 */
1683 pmtu = IPV6_MIN_MTU;
1684 allfrag = 1;
1685 }
1686
1687 /* New mtu received -> path was valid.
1688 They are sent only in response to data packets,
1689 so that this nexthop apparently is reachable. --ANK
1690 */
Changli Gaod8d1f302010-06-10 23:31:35 -07001691 dst_confirm(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001692
1693 /* Host route. If it is static, it would be better
1694 not to override it, but add new one, so that
1695 when cache entry will expire old pmtu
1696 would return automatically.
1697 */
1698 if (rt->rt6i_flags & RTF_CACHE) {
David S. Millerdefb3512010-12-08 21:16:57 -08001699 dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1700 if (allfrag) {
1701 u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1702 features |= RTAX_FEATURE_ALLFRAG;
1703 dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1704 }
Changli Gaod8d1f302010-06-10 23:31:35 -07001705 dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001706 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1707 goto out;
1708 }
1709
1710 /* Network route.
1711 Two cases are possible:
1712 1. It is connected route. Action: COW
1713 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1714 */
Eric Dumazetf2c31e32011-07-29 19:00:53 +00001715 if (!dst_get_neighbour_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001716 nrt = rt6_alloc_cow(rt, daddr, saddr);
YOSHIFUJI Hideakid5315b52006-03-20 16:58:48 -08001717 else
1718 nrt = rt6_alloc_clone(rt, daddr);
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001719
YOSHIFUJI Hideakid5315b52006-03-20 16:58:48 -08001720 if (nrt) {
David S. Millerdefb3512010-12-08 21:16:57 -08001721 dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1722 if (allfrag) {
1723 u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1724 features |= RTAX_FEATURE_ALLFRAG;
1725 dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1726 }
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001727
1728 /* According to RFC 1981, detecting PMTU increase shouldn't be
1729 * happened within 5 mins, the recommended timer is 10 mins.
1730 * Here this route expiration time is set to ip6_rt_mtu_expires
1731 * which is 10 mins. After 10 mins the decreased pmtu is expired
1732 * and detecting PMTU increase will be automatically happened.
1733 */
Changli Gaod8d1f302010-06-10 23:31:35 -07001734 dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001735 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1736
Thomas Graf40e22e82006-08-22 00:00:45 -07001737 ip6_ins_rt(nrt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001738 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001739out:
Changli Gaod8d1f302010-06-10 23:31:35 -07001740 dst_release(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001741}
1742
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001743void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *saddr,
Maciej Żenczykowskiae878ae2010-10-03 14:49:00 -07001744 struct net_device *dev, u32 pmtu)
1745{
1746 struct net *net = dev_net(dev);
1747
1748 /*
1749 * RFC 1981 states that a node "MUST reduce the size of the packets it
1750 * is sending along the path" that caused the Packet Too Big message.
1751 * Since it's not possible in the general case to determine which
1752 * interface was used to send the original packet, we update the MTU
1753 * on the interface that will be used to send future packets. We also
1754 * update the MTU on the interface that received the Packet Too Big in
1755 * case the original packet was forced out that interface with
1756 * SO_BINDTODEVICE or similar. This is the next best thing to the
1757 * correct behaviour, which would be to update the MTU on all
1758 * interfaces.
1759 */
1760 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1761 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1762}
1763
Linus Torvalds1da177e2005-04-16 15:20:36 -07001764/*
1765 * Misc support functions
1766 */
1767
Eric Dumazet21efcfa2011-07-19 20:18:36 +00001768static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
1769 const struct in6_addr *dest)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001770{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001771 struct net *net = dev_net(ort->rt6i_dev);
David S. Miller5c1e6aa2011-04-28 14:13:38 -07001772 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
David S. Miller957c6652011-06-24 15:25:00 -07001773 ort->dst.dev, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001774
1775 if (rt) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001776 rt->dst.input = ort->dst.input;
1777 rt->dst.output = ort->dst.output;
Yan, Zheng8e2ec632011-09-05 21:34:30 +00001778 rt->dst.flags |= DST_HOST;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001779
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00001780 rt->rt6i_dst.addr = *dest;
Yan, Zheng8e2ec632011-09-05 21:34:30 +00001781 rt->rt6i_dst.plen = 128;
David S. Millerdefb3512010-12-08 21:16:57 -08001782 dst_copy_metrics(&rt->dst, &ort->dst);
Changli Gaod8d1f302010-06-10 23:31:35 -07001783 rt->dst.error = ort->dst.error;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001784 rt->rt6i_idev = ort->rt6i_idev;
1785 if (rt->rt6i_idev)
1786 in6_dev_hold(rt->rt6i_idev);
Changli Gaod8d1f302010-06-10 23:31:35 -07001787 rt->dst.lastuse = jiffies;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001788 rt->rt6i_expires = 0;
1789
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00001790 rt->rt6i_gateway = ort->rt6i_gateway;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001791 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1792 rt->rt6i_metric = 0;
1793
Linus Torvalds1da177e2005-04-16 15:20:36 -07001794#ifdef CONFIG_IPV6_SUBTREES
1795 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1796#endif
Florian Westphal0f6c6392011-05-20 11:27:24 +00001797 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
Thomas Grafc71099a2006-08-04 23:20:06 -07001798 rt->rt6i_table = ort->rt6i_table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001799 }
1800 return rt;
1801}
1802
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001803#ifdef CONFIG_IPV6_ROUTE_INFO
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001804static struct rt6_info *rt6_get_route_info(struct net *net,
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001805 const struct in6_addr *prefix, int prefixlen,
1806 const struct in6_addr *gwaddr, int ifindex)
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001807{
1808 struct fib6_node *fn;
1809 struct rt6_info *rt = NULL;
Thomas Grafc71099a2006-08-04 23:20:06 -07001810 struct fib6_table *table;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001811
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001812 table = fib6_get_table(net, RT6_TABLE_INFO);
Thomas Grafc71099a2006-08-04 23:20:06 -07001813 if (table == NULL)
1814 return NULL;
1815
1816 write_lock_bh(&table->tb6_lock);
1817 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001818 if (!fn)
1819 goto out;
1820
Changli Gaod8d1f302010-06-10 23:31:35 -07001821 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001822 if (rt->rt6i_dev->ifindex != ifindex)
1823 continue;
1824 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1825 continue;
1826 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1827 continue;
Changli Gaod8d1f302010-06-10 23:31:35 -07001828 dst_hold(&rt->dst);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001829 break;
1830 }
1831out:
Thomas Grafc71099a2006-08-04 23:20:06 -07001832 write_unlock_bh(&table->tb6_lock);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001833 return rt;
1834}
1835
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001836static struct rt6_info *rt6_add_route_info(struct net *net,
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001837 const struct in6_addr *prefix, int prefixlen,
1838 const struct in6_addr *gwaddr, int ifindex,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001839 unsigned pref)
1840{
Thomas Graf86872cb2006-08-22 00:01:08 -07001841 struct fib6_config cfg = {
1842 .fc_table = RT6_TABLE_INFO,
Rami Rosen238fc7e2008-02-09 23:43:11 -08001843 .fc_metric = IP6_RT_PRIO_USER,
Thomas Graf86872cb2006-08-22 00:01:08 -07001844 .fc_ifindex = ifindex,
1845 .fc_dst_len = prefixlen,
1846 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1847 RTF_UP | RTF_PREF(pref),
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001848 .fc_nlinfo.pid = 0,
1849 .fc_nlinfo.nlh = NULL,
1850 .fc_nlinfo.nl_net = net,
Thomas Graf86872cb2006-08-22 00:01:08 -07001851 };
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001852
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00001853 cfg.fc_dst = *prefix;
1854 cfg.fc_gateway = *gwaddr;
Thomas Graf86872cb2006-08-22 00:01:08 -07001855
YOSHIFUJI Hideakie317da92006-03-20 17:06:42 -08001856 /* We should treat it as a default route if prefix length is 0. */
1857 if (!prefixlen)
Thomas Graf86872cb2006-08-22 00:01:08 -07001858 cfg.fc_flags |= RTF_DEFAULT;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001859
Thomas Graf86872cb2006-08-22 00:01:08 -07001860 ip6_route_add(&cfg);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001861
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001862 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001863}
1864#endif
1865
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001866struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001867{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001868 struct rt6_info *rt;
Thomas Grafc71099a2006-08-04 23:20:06 -07001869 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001870
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001871 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
Thomas Grafc71099a2006-08-04 23:20:06 -07001872 if (table == NULL)
1873 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001874
Thomas Grafc71099a2006-08-04 23:20:06 -07001875 write_lock_bh(&table->tb6_lock);
Changli Gaod8d1f302010-06-10 23:31:35 -07001876 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001877 if (dev == rt->rt6i_dev &&
YOSHIFUJI Hideaki045927f2006-03-20 17:00:48 -08001878 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001879 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1880 break;
1881 }
1882 if (rt)
Changli Gaod8d1f302010-06-10 23:31:35 -07001883 dst_hold(&rt->dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001884 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001885 return rt;
1886}
1887
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001888struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -08001889 struct net_device *dev,
1890 unsigned int pref)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001891{
Thomas Graf86872cb2006-08-22 00:01:08 -07001892 struct fib6_config cfg = {
1893 .fc_table = RT6_TABLE_DFLT,
Rami Rosen238fc7e2008-02-09 23:43:11 -08001894 .fc_metric = IP6_RT_PRIO_USER,
Thomas Graf86872cb2006-08-22 00:01:08 -07001895 .fc_ifindex = dev->ifindex,
1896 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1897 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
Daniel Lezcano55786892008-03-04 13:47:47 -08001898 .fc_nlinfo.pid = 0,
1899 .fc_nlinfo.nlh = NULL,
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001900 .fc_nlinfo.nl_net = dev_net(dev),
Thomas Graf86872cb2006-08-22 00:01:08 -07001901 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07001902
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00001903 cfg.fc_gateway = *gwaddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001904
Thomas Graf86872cb2006-08-22 00:01:08 -07001905 ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001906
Linus Torvalds1da177e2005-04-16 15:20:36 -07001907 return rt6_get_dflt_router(gwaddr, dev);
1908}
1909
Daniel Lezcano7b4da532008-03-04 13:47:14 -08001910void rt6_purge_dflt_routers(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001911{
1912 struct rt6_info *rt;
Thomas Grafc71099a2006-08-04 23:20:06 -07001913 struct fib6_table *table;
1914
1915 /* NOTE: Keep consistent with rt6_get_dflt_router */
Daniel Lezcano7b4da532008-03-04 13:47:14 -08001916 table = fib6_get_table(net, RT6_TABLE_DFLT);
Thomas Grafc71099a2006-08-04 23:20:06 -07001917 if (table == NULL)
1918 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001919
1920restart:
Thomas Grafc71099a2006-08-04 23:20:06 -07001921 read_lock_bh(&table->tb6_lock);
Changli Gaod8d1f302010-06-10 23:31:35 -07001922 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001923 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001924 dst_hold(&rt->dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001925 read_unlock_bh(&table->tb6_lock);
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001926 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001927 goto restart;
1928 }
1929 }
Thomas Grafc71099a2006-08-04 23:20:06 -07001930 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001931}
1932
Daniel Lezcano55786892008-03-04 13:47:47 -08001933static void rtmsg_to_fib6_config(struct net *net,
1934 struct in6_rtmsg *rtmsg,
Thomas Graf86872cb2006-08-22 00:01:08 -07001935 struct fib6_config *cfg)
1936{
1937 memset(cfg, 0, sizeof(*cfg));
1938
1939 cfg->fc_table = RT6_TABLE_MAIN;
1940 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1941 cfg->fc_metric = rtmsg->rtmsg_metric;
1942 cfg->fc_expires = rtmsg->rtmsg_info;
1943 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1944 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1945 cfg->fc_flags = rtmsg->rtmsg_flags;
1946
Daniel Lezcano55786892008-03-04 13:47:47 -08001947 cfg->fc_nlinfo.nl_net = net;
Benjamin Theryf1243c22008-02-26 18:10:03 -08001948
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00001949 cfg->fc_dst = rtmsg->rtmsg_dst;
1950 cfg->fc_src = rtmsg->rtmsg_src;
1951 cfg->fc_gateway = rtmsg->rtmsg_gateway;
Thomas Graf86872cb2006-08-22 00:01:08 -07001952}
1953
Daniel Lezcano55786892008-03-04 13:47:47 -08001954int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001955{
Thomas Graf86872cb2006-08-22 00:01:08 -07001956 struct fib6_config cfg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001957 struct in6_rtmsg rtmsg;
1958 int err;
1959
1960 switch(cmd) {
1961 case SIOCADDRT: /* Add a route */
1962 case SIOCDELRT: /* Delete a route */
1963 if (!capable(CAP_NET_ADMIN))
1964 return -EPERM;
1965 err = copy_from_user(&rtmsg, arg,
1966 sizeof(struct in6_rtmsg));
1967 if (err)
1968 return -EFAULT;
Thomas Graf86872cb2006-08-22 00:01:08 -07001969
Daniel Lezcano55786892008-03-04 13:47:47 -08001970 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
Thomas Graf86872cb2006-08-22 00:01:08 -07001971
Linus Torvalds1da177e2005-04-16 15:20:36 -07001972 rtnl_lock();
1973 switch (cmd) {
1974 case SIOCADDRT:
Thomas Graf86872cb2006-08-22 00:01:08 -07001975 err = ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001976 break;
1977 case SIOCDELRT:
Thomas Graf86872cb2006-08-22 00:01:08 -07001978 err = ip6_route_del(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001979 break;
1980 default:
1981 err = -EINVAL;
1982 }
1983 rtnl_unlock();
1984
1985 return err;
Stephen Hemminger3ff50b72007-04-20 17:09:22 -07001986 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001987
1988 return -EINVAL;
1989}
1990
1991/*
1992 * Drop the packet on the floor
1993 */
1994
Brian Haleyd5fdd6b2009-06-23 04:31:07 -07001995static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001996{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001997 int type;
Eric Dumazetadf30902009-06-02 05:19:30 +00001998 struct dst_entry *dst = skb_dst(skb);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001999 switch (ipstats_mib_noroutes) {
2000 case IPSTATS_MIB_INNOROUTES:
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -07002001 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
Ulrich Weber45bb0062010-02-25 23:28:58 +00002002 if (type == IPV6_ADDR_ANY) {
Denis V. Lunev3bd653c2008-10-08 10:54:51 -07002003 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2004 IPSTATS_MIB_INADDRERRORS);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07002005 break;
2006 }
2007 /* FALLTHROUGH */
2008 case IPSTATS_MIB_OUTNOROUTES:
Denis V. Lunev3bd653c2008-10-08 10:54:51 -07002009 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2010 ipstats_mib_noroutes);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07002011 break;
2012 }
Alexey Dobriyan3ffe5332010-02-18 08:25:24 +00002013 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002014 kfree_skb(skb);
2015 return 0;
2016}
2017
Thomas Graf9ce8ade2006-10-18 20:46:54 -07002018static int ip6_pkt_discard(struct sk_buff *skb)
2019{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07002020 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07002021}
2022
Arnaldo Carvalho de Melo20380732005-08-16 02:18:02 -03002023static int ip6_pkt_discard_out(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002024{
Eric Dumazetadf30902009-06-02 05:19:30 +00002025 skb->dev = skb_dst(skb)->dev;
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07002026 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002027}
2028
David S. Miller6723ab52006-10-18 21:20:57 -07002029#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2030
Thomas Graf9ce8ade2006-10-18 20:46:54 -07002031static int ip6_pkt_prohibit(struct sk_buff *skb)
2032{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07002033 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07002034}
2035
2036static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2037{
Eric Dumazetadf30902009-06-02 05:19:30 +00002038 skb->dev = skb_dst(skb)->dev;
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07002039 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07002040}
2041
David S. Miller6723ab52006-10-18 21:20:57 -07002042#endif
2043
Linus Torvalds1da177e2005-04-16 15:20:36 -07002044/*
2045 * Allocate a dst for local (unicast / anycast) address.
2046 */
2047
2048struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2049 const struct in6_addr *addr,
2050 int anycast)
2051{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09002052 struct net *net = dev_net(idev->dev);
David S. Miller5c1e6aa2011-04-28 14:13:38 -07002053 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
David S. Miller957c6652011-06-24 15:25:00 -07002054 net->loopback_dev, 0);
David S. Miller14deae42009-01-04 16:04:39 -08002055 struct neighbour *neigh;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002056
Ben Greear40385652010-11-08 12:33:48 +00002057 if (rt == NULL) {
2058 if (net_ratelimit())
2059 pr_warning("IPv6: Maximum number of routes reached,"
2060 " consider increasing route/max_size.\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002061 return ERR_PTR(-ENOMEM);
Ben Greear40385652010-11-08 12:33:48 +00002062 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002063
Linus Torvalds1da177e2005-04-16 15:20:36 -07002064 in6_dev_hold(idev);
2065
David S. Miller11d53b42011-06-24 15:23:34 -07002066 rt->dst.flags |= DST_HOST;
Changli Gaod8d1f302010-06-10 23:31:35 -07002067 rt->dst.input = ip6_input;
2068 rt->dst.output = ip6_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002069 rt->rt6i_idev = idev;
Changli Gaod8d1f302010-06-10 23:31:35 -07002070 rt->dst.obsolete = -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002071
2072 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +09002073 if (anycast)
2074 rt->rt6i_flags |= RTF_ANYCAST;
2075 else
Linus Torvalds1da177e2005-04-16 15:20:36 -07002076 rt->rt6i_flags |= RTF_LOCAL;
David S. Miller14deae42009-01-04 16:04:39 -08002077 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
2078 if (IS_ERR(neigh)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002079 dst_free(&rt->dst);
David S. Miller14deae42009-01-04 16:04:39 -08002080
David S. Miller29546a62011-03-03 12:10:37 -08002081 return ERR_CAST(neigh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002082 }
David S. Miller69cce1d2011-07-17 23:09:49 -07002083 dst_set_neighbour(&rt->dst, neigh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002084
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00002085 rt->rt6i_dst.addr = *addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002086 rt->rt6i_dst.plen = 128;
Daniel Lezcano55786892008-03-04 13:47:47 -08002087 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002088
Changli Gaod8d1f302010-06-10 23:31:35 -07002089 atomic_set(&rt->dst.__refcnt, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002090
2091 return rt;
2092}
2093
Daniel Walterc3968a82011-04-13 21:10:57 +00002094int ip6_route_get_saddr(struct net *net,
2095 struct rt6_info *rt,
Eric Dumazetb71d1d42011-04-22 04:53:02 +00002096 const struct in6_addr *daddr,
Daniel Walterc3968a82011-04-13 21:10:57 +00002097 unsigned int prefs,
2098 struct in6_addr *saddr)
2099{
2100 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2101 int err = 0;
2102 if (rt->rt6i_prefsrc.plen)
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00002103 *saddr = rt->rt6i_prefsrc.addr;
Daniel Walterc3968a82011-04-13 21:10:57 +00002104 else
2105 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2106 daddr, prefs, saddr);
2107 return err;
2108}
2109
2110/* remove deleted ip from prefsrc entries */
2111struct arg_dev_net_ip {
2112 struct net_device *dev;
2113 struct net *net;
2114 struct in6_addr *addr;
2115};
2116
2117static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2118{
2119 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2120 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2121 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2122
2123 if (((void *)rt->rt6i_dev == dev || dev == NULL) &&
2124 rt != net->ipv6.ip6_null_entry &&
2125 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2126 /* remove prefsrc entry */
2127 rt->rt6i_prefsrc.plen = 0;
2128 }
2129 return 0;
2130}
2131
2132void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2133{
2134 struct net *net = dev_net(ifp->idev->dev);
2135 struct arg_dev_net_ip adni = {
2136 .dev = ifp->idev->dev,
2137 .net = net,
2138 .addr = &ifp->addr,
2139 };
2140 fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2141}
2142
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002143struct arg_dev_net {
2144 struct net_device *dev;
2145 struct net *net;
2146};
2147
Linus Torvalds1da177e2005-04-16 15:20:36 -07002148static int fib6_ifdown(struct rt6_info *rt, void *arg)
2149{
stephen hemmingerbc3ef662010-12-16 17:42:40 +00002150 const struct arg_dev_net *adn = arg;
2151 const struct net_device *dev = adn->dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002152
stephen hemmingerbc3ef662010-12-16 17:42:40 +00002153 if ((rt->rt6i_dev == dev || dev == NULL) &&
2154 rt != adn->net->ipv6.ip6_null_entry) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002155 RT6_TRACE("deleted by ifdown %p\n", rt);
2156 return -1;
2157 }
2158 return 0;
2159}
2160
Daniel Lezcanof3db4852008-03-03 23:27:06 -08002161void rt6_ifdown(struct net *net, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002162{
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002163 struct arg_dev_net adn = {
2164 .dev = dev,
2165 .net = net,
2166 };
2167
2168 fib6_clean_all(net, fib6_ifdown, 0, &adn);
David S. Miller1e493d12008-09-10 17:27:15 -07002169 icmp6_clean_all(fib6_ifdown, &adn);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002170}
2171
2172struct rt6_mtu_change_arg
2173{
2174 struct net_device *dev;
2175 unsigned mtu;
2176};
2177
2178static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2179{
2180 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2181 struct inet6_dev *idev;
2182
2183 /* In IPv6 pmtu discovery is not optional,
2184 so that RTAX_MTU lock cannot disable it.
2185 We still use this lock to block changes
2186 caused by addrconf/ndisc.
2187 */
2188
2189 idev = __in6_dev_get(arg->dev);
2190 if (idev == NULL)
2191 return 0;
2192
2193 /* For administrative MTU increase, there is no way to discover
2194 IPv6 PMTU increase, so PMTU increase should be updated here.
2195 Since RFC 1981 doesn't include administrative MTU increase
2196 update PMTU increase is a MUST. (i.e. jumbo frame)
2197 */
2198 /*
2199 If new MTU is less than route PMTU, this new MTU will be the
2200 lowest MTU in the path, update the route PMTU to reflect PMTU
2201 decreases; if new MTU is greater than route PMTU, and the
2202 old MTU is the lowest MTU in the path, update the route PMTU
2203 to reflect the increase. In this case if the other nodes' MTU
2204 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2205 PMTU discouvery.
2206 */
2207 if (rt->rt6i_dev == arg->dev &&
Changli Gaod8d1f302010-06-10 23:31:35 -07002208 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2209 (dst_mtu(&rt->dst) >= arg->mtu ||
2210 (dst_mtu(&rt->dst) < arg->mtu &&
2211 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
David S. Millerdefb3512010-12-08 21:16:57 -08002212 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
Simon Arlott566cfd82007-07-26 00:09:55 -07002213 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002214 return 0;
2215}
2216
2217void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2218{
Thomas Grafc71099a2006-08-04 23:20:06 -07002219 struct rt6_mtu_change_arg arg = {
2220 .dev = dev,
2221 .mtu = mtu,
2222 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07002223
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09002224 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002225}
2226
Patrick McHardyef7c79e2007-06-05 12:38:30 -07002227static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
Thomas Graf5176f912006-08-26 20:13:18 -07002228 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
Thomas Graf86872cb2006-08-22 00:01:08 -07002229 [RTA_OIF] = { .type = NLA_U32 },
Thomas Grafab364a62006-08-22 00:01:47 -07002230 [RTA_IIF] = { .type = NLA_U32 },
Thomas Graf86872cb2006-08-22 00:01:08 -07002231 [RTA_PRIORITY] = { .type = NLA_U32 },
2232 [RTA_METRICS] = { .type = NLA_NESTED },
2233};
2234
2235static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2236 struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002237{
Thomas Graf86872cb2006-08-22 00:01:08 -07002238 struct rtmsg *rtm;
2239 struct nlattr *tb[RTA_MAX+1];
2240 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002241
Thomas Graf86872cb2006-08-22 00:01:08 -07002242 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2243 if (err < 0)
2244 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002245
Thomas Graf86872cb2006-08-22 00:01:08 -07002246 err = -EINVAL;
2247 rtm = nlmsg_data(nlh);
2248 memset(cfg, 0, sizeof(*cfg));
2249
2250 cfg->fc_table = rtm->rtm_table;
2251 cfg->fc_dst_len = rtm->rtm_dst_len;
2252 cfg->fc_src_len = rtm->rtm_src_len;
2253 cfg->fc_flags = RTF_UP;
2254 cfg->fc_protocol = rtm->rtm_protocol;
2255
2256 if (rtm->rtm_type == RTN_UNREACHABLE)
2257 cfg->fc_flags |= RTF_REJECT;
2258
Maciej Żenczykowskiab79ad12010-09-27 00:07:02 +00002259 if (rtm->rtm_type == RTN_LOCAL)
2260 cfg->fc_flags |= RTF_LOCAL;
2261
Thomas Graf86872cb2006-08-22 00:01:08 -07002262 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2263 cfg->fc_nlinfo.nlh = nlh;
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09002264 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
Thomas Graf86872cb2006-08-22 00:01:08 -07002265
2266 if (tb[RTA_GATEWAY]) {
2267 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2268 cfg->fc_flags |= RTF_GATEWAY;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002269 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002270
2271 if (tb[RTA_DST]) {
2272 int plen = (rtm->rtm_dst_len + 7) >> 3;
2273
2274 if (nla_len(tb[RTA_DST]) < plen)
2275 goto errout;
2276
2277 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002278 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002279
2280 if (tb[RTA_SRC]) {
2281 int plen = (rtm->rtm_src_len + 7) >> 3;
2282
2283 if (nla_len(tb[RTA_SRC]) < plen)
2284 goto errout;
2285
2286 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002287 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002288
Daniel Walterc3968a82011-04-13 21:10:57 +00002289 if (tb[RTA_PREFSRC])
2290 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2291
Thomas Graf86872cb2006-08-22 00:01:08 -07002292 if (tb[RTA_OIF])
2293 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2294
2295 if (tb[RTA_PRIORITY])
2296 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2297
2298 if (tb[RTA_METRICS]) {
2299 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2300 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002301 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002302
2303 if (tb[RTA_TABLE])
2304 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2305
2306 err = 0;
2307errout:
2308 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002309}
2310
Thomas Grafc127ea22007-03-22 11:58:32 -07002311static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002312{
Thomas Graf86872cb2006-08-22 00:01:08 -07002313 struct fib6_config cfg;
2314 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002315
Thomas Graf86872cb2006-08-22 00:01:08 -07002316 err = rtm_to_fib6_config(skb, nlh, &cfg);
2317 if (err < 0)
2318 return err;
2319
2320 return ip6_route_del(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002321}
2322
Thomas Grafc127ea22007-03-22 11:58:32 -07002323static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002324{
Thomas Graf86872cb2006-08-22 00:01:08 -07002325 struct fib6_config cfg;
2326 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002327
Thomas Graf86872cb2006-08-22 00:01:08 -07002328 err = rtm_to_fib6_config(skb, nlh, &cfg);
2329 if (err < 0)
2330 return err;
2331
2332 return ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002333}
2334
Thomas Graf339bf982006-11-10 14:10:15 -08002335static inline size_t rt6_nlmsg_size(void)
2336{
2337 return NLMSG_ALIGN(sizeof(struct rtmsg))
2338 + nla_total_size(16) /* RTA_SRC */
2339 + nla_total_size(16) /* RTA_DST */
2340 + nla_total_size(16) /* RTA_GATEWAY */
2341 + nla_total_size(16) /* RTA_PREFSRC */
2342 + nla_total_size(4) /* RTA_TABLE */
2343 + nla_total_size(4) /* RTA_IIF */
2344 + nla_total_size(4) /* RTA_OIF */
2345 + nla_total_size(4) /* RTA_PRIORITY */
Noriaki TAKAMIYA6a2b9ce2007-01-23 22:09:41 -08002346 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
Thomas Graf339bf982006-11-10 14:10:15 -08002347 + nla_total_size(sizeof(struct rta_cacheinfo));
2348}
2349
Brian Haley191cd582008-08-14 15:33:21 -07002350static int rt6_fill_node(struct net *net,
2351 struct sk_buff *skb, struct rt6_info *rt,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07002352 struct in6_addr *dst, struct in6_addr *src,
2353 int iif, int type, u32 pid, u32 seq,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002354 int prefix, int nowait, unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002355{
2356 struct rtmsg *rtm;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002357 struct nlmsghdr *nlh;
Thomas Grafe3703b32006-11-27 09:27:07 -08002358 long expires;
Patrick McHardy9e762a42006-08-10 23:09:48 -07002359 u32 table;
Eric Dumazetf2c31e32011-07-29 19:00:53 +00002360 struct neighbour *n;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002361
2362 if (prefix) { /* user wants prefix routes only */
2363 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2364 /* success since this is not a prefix route */
2365 return 1;
2366 }
2367 }
2368
Thomas Graf2d7202b2006-08-22 00:01:27 -07002369 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2370 if (nlh == NULL)
Patrick McHardy26932562007-01-31 23:16:40 -08002371 return -EMSGSIZE;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002372
2373 rtm = nlmsg_data(nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002374 rtm->rtm_family = AF_INET6;
2375 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2376 rtm->rtm_src_len = rt->rt6i_src.plen;
2377 rtm->rtm_tos = 0;
Thomas Grafc71099a2006-08-04 23:20:06 -07002378 if (rt->rt6i_table)
Patrick McHardy9e762a42006-08-10 23:09:48 -07002379 table = rt->rt6i_table->tb6_id;
Thomas Grafc71099a2006-08-04 23:20:06 -07002380 else
Patrick McHardy9e762a42006-08-10 23:09:48 -07002381 table = RT6_TABLE_UNSPEC;
2382 rtm->rtm_table = table;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002383 NLA_PUT_U32(skb, RTA_TABLE, table);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002384 if (rt->rt6i_flags&RTF_REJECT)
2385 rtm->rtm_type = RTN_UNREACHABLE;
Maciej Żenczykowskiab79ad12010-09-27 00:07:02 +00002386 else if (rt->rt6i_flags&RTF_LOCAL)
2387 rtm->rtm_type = RTN_LOCAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002388 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2389 rtm->rtm_type = RTN_LOCAL;
2390 else
2391 rtm->rtm_type = RTN_UNICAST;
2392 rtm->rtm_flags = 0;
2393 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2394 rtm->rtm_protocol = rt->rt6i_protocol;
2395 if (rt->rt6i_flags&RTF_DYNAMIC)
2396 rtm->rtm_protocol = RTPROT_REDIRECT;
2397 else if (rt->rt6i_flags & RTF_ADDRCONF)
2398 rtm->rtm_protocol = RTPROT_KERNEL;
2399 else if (rt->rt6i_flags&RTF_DEFAULT)
2400 rtm->rtm_protocol = RTPROT_RA;
2401
2402 if (rt->rt6i_flags&RTF_CACHE)
2403 rtm->rtm_flags |= RTM_F_CLONED;
2404
2405 if (dst) {
Thomas Graf2d7202b2006-08-22 00:01:27 -07002406 NLA_PUT(skb, RTA_DST, 16, dst);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002407 rtm->rtm_dst_len = 128;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002408 } else if (rtm->rtm_dst_len)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002409 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002410#ifdef CONFIG_IPV6_SUBTREES
2411 if (src) {
Thomas Graf2d7202b2006-08-22 00:01:27 -07002412 NLA_PUT(skb, RTA_SRC, 16, src);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002413 rtm->rtm_src_len = 128;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002414 } else if (rtm->rtm_src_len)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002415 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002416#endif
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002417 if (iif) {
2418#ifdef CONFIG_IPV6_MROUTE
2419 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
Benjamin Thery8229efd2008-12-10 16:30:15 -08002420 int err = ip6mr_get_route(net, skb, rtm, nowait);
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002421 if (err <= 0) {
2422 if (!nowait) {
2423 if (err == 0)
2424 return 0;
2425 goto nla_put_failure;
2426 } else {
2427 if (err == -EMSGSIZE)
2428 goto nla_put_failure;
2429 }
2430 }
2431 } else
2432#endif
2433 NLA_PUT_U32(skb, RTA_IIF, iif);
2434 } else if (dst) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002435 struct in6_addr saddr_buf;
Daniel Walterc3968a82011-04-13 21:10:57 +00002436 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002437 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002438 }
Thomas Graf2d7202b2006-08-22 00:01:27 -07002439
Daniel Walterc3968a82011-04-13 21:10:57 +00002440 if (rt->rt6i_prefsrc.plen) {
2441 struct in6_addr saddr_buf;
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00002442 saddr_buf = rt->rt6i_prefsrc.addr;
Daniel Walterc3968a82011-04-13 21:10:57 +00002443 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2444 }
2445
David S. Millerdefb3512010-12-08 21:16:57 -08002446 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002447 goto nla_put_failure;
2448
Eric Dumazetf2c31e32011-07-29 19:00:53 +00002449 rcu_read_lock();
2450 n = dst_get_neighbour(&rt->dst);
2451 if (n)
2452 NLA_PUT(skb, RTA_GATEWAY, 16, &n->primary_key);
2453 rcu_read_unlock();
Thomas Graf2d7202b2006-08-22 00:01:27 -07002454
Changli Gaod8d1f302010-06-10 23:31:35 -07002455 if (rt->dst.dev)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002456 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2457
2458 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
Thomas Grafe3703b32006-11-27 09:27:07 -08002459
YOSHIFUJI Hideaki36e3dea2008-05-13 02:52:55 +09002460 if (!(rt->rt6i_flags & RTF_EXPIRES))
2461 expires = 0;
2462 else if (rt->rt6i_expires - jiffies < INT_MAX)
2463 expires = rt->rt6i_expires - jiffies;
2464 else
2465 expires = INT_MAX;
YOSHIFUJI Hideaki69cdf8f2008-05-19 16:55:13 -07002466
Changli Gaod8d1f302010-06-10 23:31:35 -07002467 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0,
2468 expires, rt->dst.error) < 0)
Thomas Grafe3703b32006-11-27 09:27:07 -08002469 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002470
Thomas Graf2d7202b2006-08-22 00:01:27 -07002471 return nlmsg_end(skb, nlh);
2472
2473nla_put_failure:
Patrick McHardy26932562007-01-31 23:16:40 -08002474 nlmsg_cancel(skb, nlh);
2475 return -EMSGSIZE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002476}
2477
Patrick McHardy1b43af52006-08-10 23:11:17 -07002478int rt6_dump_route(struct rt6_info *rt, void *p_arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002479{
2480 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2481 int prefix;
2482
Thomas Graf2d7202b2006-08-22 00:01:27 -07002483 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2484 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002485 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2486 } else
2487 prefix = 0;
2488
Brian Haley191cd582008-08-14 15:33:21 -07002489 return rt6_fill_node(arg->net,
2490 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002491 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002492 prefix, 0, NLM_F_MULTI);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002493}
2494
Thomas Grafc127ea22007-03-22 11:58:32 -07002495static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002496{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09002497 struct net *net = sock_net(in_skb->sk);
Thomas Grafab364a62006-08-22 00:01:47 -07002498 struct nlattr *tb[RTA_MAX+1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07002499 struct rt6_info *rt;
Thomas Grafab364a62006-08-22 00:01:47 -07002500 struct sk_buff *skb;
2501 struct rtmsg *rtm;
David S. Miller4c9483b2011-03-12 16:22:43 -05002502 struct flowi6 fl6;
Thomas Grafab364a62006-08-22 00:01:47 -07002503 int err, iif = 0;
2504
2505 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2506 if (err < 0)
2507 goto errout;
2508
2509 err = -EINVAL;
David S. Miller4c9483b2011-03-12 16:22:43 -05002510 memset(&fl6, 0, sizeof(fl6));
Thomas Grafab364a62006-08-22 00:01:47 -07002511
2512 if (tb[RTA_SRC]) {
2513 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2514 goto errout;
2515
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00002516 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
Thomas Grafab364a62006-08-22 00:01:47 -07002517 }
2518
2519 if (tb[RTA_DST]) {
2520 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2521 goto errout;
2522
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00002523 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
Thomas Grafab364a62006-08-22 00:01:47 -07002524 }
2525
2526 if (tb[RTA_IIF])
2527 iif = nla_get_u32(tb[RTA_IIF]);
2528
2529 if (tb[RTA_OIF])
David S. Miller4c9483b2011-03-12 16:22:43 -05002530 fl6.flowi6_oif = nla_get_u32(tb[RTA_OIF]);
Thomas Grafab364a62006-08-22 00:01:47 -07002531
2532 if (iif) {
2533 struct net_device *dev;
Daniel Lezcano55786892008-03-04 13:47:47 -08002534 dev = __dev_get_by_index(net, iif);
Thomas Grafab364a62006-08-22 00:01:47 -07002535 if (!dev) {
2536 err = -ENODEV;
2537 goto errout;
2538 }
2539 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002540
2541 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
Thomas Grafab364a62006-08-22 00:01:47 -07002542 if (skb == NULL) {
2543 err = -ENOBUFS;
2544 goto errout;
2545 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002546
2547 /* Reserve room for dummy headers, this skb can pass
2548 through good chunk of routing engine.
2549 */
Arnaldo Carvalho de Melo459a98e2007-03-19 15:30:44 -07002550 skb_reset_mac_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002551 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2552
David S. Miller4c9483b2011-03-12 16:22:43 -05002553 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl6);
Changli Gaod8d1f302010-06-10 23:31:35 -07002554 skb_dst_set(skb, &rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002555
David S. Miller4c9483b2011-03-12 16:22:43 -05002556 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002557 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002558 nlh->nlmsg_seq, 0, 0, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002559 if (err < 0) {
Thomas Grafab364a62006-08-22 00:01:47 -07002560 kfree_skb(skb);
2561 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002562 }
2563
Daniel Lezcano55786892008-03-04 13:47:47 -08002564 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
Thomas Grafab364a62006-08-22 00:01:47 -07002565errout:
Linus Torvalds1da177e2005-04-16 15:20:36 -07002566 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002567}
2568
Thomas Graf86872cb2006-08-22 00:01:08 -07002569void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002570{
2571 struct sk_buff *skb;
Daniel Lezcano55786892008-03-04 13:47:47 -08002572 struct net *net = info->nl_net;
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08002573 u32 seq;
2574 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002575
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08002576 err = -ENOBUFS;
2577 seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
Thomas Graf86872cb2006-08-22 00:01:08 -07002578
Thomas Graf339bf982006-11-10 14:10:15 -08002579 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
Thomas Graf21713eb2006-08-15 00:35:24 -07002580 if (skb == NULL)
2581 goto errout;
2582
Brian Haley191cd582008-08-14 15:33:21 -07002583 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002584 event, info->pid, seq, 0, 0, 0);
Patrick McHardy26932562007-01-31 23:16:40 -08002585 if (err < 0) {
2586 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2587 WARN_ON(err == -EMSGSIZE);
2588 kfree_skb(skb);
2589 goto errout;
2590 }
Pablo Neira Ayuso1ce85fe2009-02-24 23:18:28 -08002591 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2592 info->nlh, gfp_any());
2593 return;
Thomas Graf21713eb2006-08-15 00:35:24 -07002594errout:
2595 if (err < 0)
Daniel Lezcano55786892008-03-04 13:47:47 -08002596 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002597}
2598
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002599static int ip6_route_dev_notify(struct notifier_block *this,
2600 unsigned long event, void *data)
2601{
2602 struct net_device *dev = (struct net_device *)data;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09002603 struct net *net = dev_net(dev);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002604
2605 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002606 net->ipv6.ip6_null_entry->dst.dev = dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002607 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2608#ifdef CONFIG_IPV6_MULTIPLE_TABLES
Changli Gaod8d1f302010-06-10 23:31:35 -07002609 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002610 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
Changli Gaod8d1f302010-06-10 23:31:35 -07002611 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002612 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2613#endif
2614 }
2615
2616 return NOTIFY_OK;
2617}
2618
Linus Torvalds1da177e2005-04-16 15:20:36 -07002619/*
2620 * /proc
2621 */
2622
2623#ifdef CONFIG_PROC_FS
2624
Linus Torvalds1da177e2005-04-16 15:20:36 -07002625struct rt6_proc_arg
2626{
2627 char *buffer;
2628 int offset;
2629 int length;
2630 int skip;
2631 int len;
2632};
2633
2634static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2635{
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002636 struct seq_file *m = p_arg;
David S. Miller69cce1d2011-07-17 23:09:49 -07002637 struct neighbour *n;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002638
Harvey Harrison4b7a4272008-10-29 12:50:24 -07002639 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002640
2641#ifdef CONFIG_IPV6_SUBTREES
Harvey Harrison4b7a4272008-10-29 12:50:24 -07002642 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002643#else
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002644 seq_puts(m, "00000000000000000000000000000000 00 ");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002645#endif
Eric Dumazetf2c31e32011-07-29 19:00:53 +00002646 rcu_read_lock();
David S. Miller69cce1d2011-07-17 23:09:49 -07002647 n = dst_get_neighbour(&rt->dst);
2648 if (n) {
2649 seq_printf(m, "%pi6", n->primary_key);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002650 } else {
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002651 seq_puts(m, "00000000000000000000000000000000");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002652 }
Eric Dumazetf2c31e32011-07-29 19:00:53 +00002653 rcu_read_unlock();
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002654 seq_printf(m, " %08x %08x %08x %08x %8s\n",
Changli Gaod8d1f302010-06-10 23:31:35 -07002655 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2656 rt->dst.__use, rt->rt6i_flags,
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002657 rt->rt6i_dev ? rt->rt6i_dev->name : "");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002658 return 0;
2659}
2660
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002661static int ipv6_route_show(struct seq_file *m, void *v)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002662{
Daniel Lezcanof3db4852008-03-03 23:27:06 -08002663 struct net *net = (struct net *)m->private;
2664 fib6_clean_all(net, rt6_info_route, 0, m);
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002665 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002666}
2667
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002668static int ipv6_route_open(struct inode *inode, struct file *file)
2669{
Pavel Emelyanovde05c552008-07-18 04:07:21 -07002670 return single_open_net(inode, file, ipv6_route_show);
Daniel Lezcanof3db4852008-03-03 23:27:06 -08002671}
2672
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002673static const struct file_operations ipv6_route_proc_fops = {
2674 .owner = THIS_MODULE,
2675 .open = ipv6_route_open,
2676 .read = seq_read,
2677 .llseek = seq_lseek,
Pavel Emelyanovb6fcbdb2008-07-18 04:07:44 -07002678 .release = single_release_net,
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002679};
2680
Linus Torvalds1da177e2005-04-16 15:20:36 -07002681static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2682{
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002683 struct net *net = (struct net *)seq->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002684 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002685 net->ipv6.rt6_stats->fib_nodes,
2686 net->ipv6.rt6_stats->fib_route_nodes,
2687 net->ipv6.rt6_stats->fib_rt_alloc,
2688 net->ipv6.rt6_stats->fib_rt_entries,
2689 net->ipv6.rt6_stats->fib_rt_cache,
Eric Dumazetfc66f952010-10-08 06:37:34 +00002690 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002691 net->ipv6.rt6_stats->fib_discarded_routes);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002692
2693 return 0;
2694}
2695
2696static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2697{
Pavel Emelyanovde05c552008-07-18 04:07:21 -07002698 return single_open_net(inode, file, rt6_stats_seq_show);
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002699}
2700
Arjan van de Ven9a321442007-02-12 00:55:35 -08002701static const struct file_operations rt6_stats_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002702 .owner = THIS_MODULE,
2703 .open = rt6_stats_seq_open,
2704 .read = seq_read,
2705 .llseek = seq_lseek,
Pavel Emelyanovb6fcbdb2008-07-18 04:07:44 -07002706 .release = single_release_net,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002707};
2708#endif /* CONFIG_PROC_FS */
2709
2710#ifdef CONFIG_SYSCTL
2711
Linus Torvalds1da177e2005-04-16 15:20:36 -07002712static
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07002713int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002714 void __user *buffer, size_t *lenp, loff_t *ppos)
2715{
Lucian Adrian Grijincuc486da32011-02-24 19:48:03 +00002716 struct net *net;
2717 int delay;
2718 if (!write)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002719 return -EINVAL;
Lucian Adrian Grijincuc486da32011-02-24 19:48:03 +00002720
2721 net = (struct net *)ctl->extra1;
2722 delay = net->ipv6.sysctl.flush_delay;
2723 proc_dointvec(ctl, write, buffer, lenp, ppos);
2724 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2725 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002726}
2727
Daniel Lezcano760f2d02008-01-10 02:53:43 -08002728ctl_table ipv6_route_table_template[] = {
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002729 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002730 .procname = "flush",
Daniel Lezcano49905092008-01-10 03:01:01 -08002731 .data = &init_net.ipv6.sysctl.flush_delay,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002732 .maxlen = sizeof(int),
Dave Jones89c8b3a12005-04-28 12:11:49 -07002733 .mode = 0200,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002734 .proc_handler = ipv6_sysctl_rtcache_flush
Linus Torvalds1da177e2005-04-16 15:20:36 -07002735 },
2736 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002737 .procname = "gc_thresh",
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -08002738 .data = &ip6_dst_ops_template.gc_thresh,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002739 .maxlen = sizeof(int),
2740 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002741 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002742 },
2743 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002744 .procname = "max_size",
Daniel Lezcano49905092008-01-10 03:01:01 -08002745 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002746 .maxlen = sizeof(int),
2747 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002748 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002749 },
2750 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002751 .procname = "gc_min_interval",
Daniel Lezcano49905092008-01-10 03:01:01 -08002752 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002753 .maxlen = sizeof(int),
2754 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002755 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002756 },
2757 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002758 .procname = "gc_timeout",
Daniel Lezcano49905092008-01-10 03:01:01 -08002759 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002760 .maxlen = sizeof(int),
2761 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002762 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002763 },
2764 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002765 .procname = "gc_interval",
Daniel Lezcano49905092008-01-10 03:01:01 -08002766 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002767 .maxlen = sizeof(int),
2768 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002769 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002770 },
2771 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002772 .procname = "gc_elasticity",
Daniel Lezcano49905092008-01-10 03:01:01 -08002773 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002774 .maxlen = sizeof(int),
2775 .mode = 0644,
Min Zhangf3d3f612010-08-14 22:42:51 -07002776 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002777 },
2778 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002779 .procname = "mtu_expires",
Daniel Lezcano49905092008-01-10 03:01:01 -08002780 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002781 .maxlen = sizeof(int),
2782 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002783 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002784 },
2785 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002786 .procname = "min_adv_mss",
Daniel Lezcano49905092008-01-10 03:01:01 -08002787 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002788 .maxlen = sizeof(int),
2789 .mode = 0644,
Min Zhangf3d3f612010-08-14 22:42:51 -07002790 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002791 },
2792 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002793 .procname = "gc_min_interval_ms",
Daniel Lezcano49905092008-01-10 03:01:01 -08002794 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002795 .maxlen = sizeof(int),
2796 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002797 .proc_handler = proc_dointvec_ms_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002798 },
Eric W. Biedermanf8572d82009-11-05 13:32:03 -08002799 { }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002800};
2801
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00002802struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
Daniel Lezcano760f2d02008-01-10 02:53:43 -08002803{
2804 struct ctl_table *table;
2805
2806 table = kmemdup(ipv6_route_table_template,
2807 sizeof(ipv6_route_table_template),
2808 GFP_KERNEL);
YOSHIFUJI Hideaki5ee09102008-02-28 00:24:28 +09002809
2810 if (table) {
2811 table[0].data = &net->ipv6.sysctl.flush_delay;
Lucian Adrian Grijincuc486da32011-02-24 19:48:03 +00002812 table[0].extra1 = net;
Alexey Dobriyan86393e52009-08-29 01:34:49 +00002813 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
YOSHIFUJI Hideaki5ee09102008-02-28 00:24:28 +09002814 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2815 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2816 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2817 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2818 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2819 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2820 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
Alexey Dobriyan9c69fab2009-12-18 20:11:03 -08002821 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
YOSHIFUJI Hideaki5ee09102008-02-28 00:24:28 +09002822 }
2823
Daniel Lezcano760f2d02008-01-10 02:53:43 -08002824 return table;
2825}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002826#endif
2827
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00002828static int __net_init ip6_route_net_init(struct net *net)
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002829{
Pavel Emelyanov633d424b2008-04-21 14:25:23 -07002830 int ret = -ENOMEM;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002831
Alexey Dobriyan86393e52009-08-29 01:34:49 +00002832 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2833 sizeof(net->ipv6.ip6_dst_ops));
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002834
Eric Dumazetfc66f952010-10-08 06:37:34 +00002835 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2836 goto out_ip6_dst_ops;
2837
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002838 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2839 sizeof(*net->ipv6.ip6_null_entry),
2840 GFP_KERNEL);
2841 if (!net->ipv6.ip6_null_entry)
Eric Dumazetfc66f952010-10-08 06:37:34 +00002842 goto out_ip6_dst_entries;
Changli Gaod8d1f302010-06-10 23:31:35 -07002843 net->ipv6.ip6_null_entry->dst.path =
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002844 (struct dst_entry *)net->ipv6.ip6_null_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07002845 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
David S. Miller62fa8a82011-01-26 20:51:05 -08002846 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2847 ip6_template_metrics, true);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002848
2849#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2850 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2851 sizeof(*net->ipv6.ip6_prohibit_entry),
2852 GFP_KERNEL);
Peter Zijlstra68fffc62008-10-07 14:12:10 -07002853 if (!net->ipv6.ip6_prohibit_entry)
2854 goto out_ip6_null_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07002855 net->ipv6.ip6_prohibit_entry->dst.path =
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002856 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07002857 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
David S. Miller62fa8a82011-01-26 20:51:05 -08002858 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2859 ip6_template_metrics, true);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002860
2861 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2862 sizeof(*net->ipv6.ip6_blk_hole_entry),
2863 GFP_KERNEL);
Peter Zijlstra68fffc62008-10-07 14:12:10 -07002864 if (!net->ipv6.ip6_blk_hole_entry)
2865 goto out_ip6_prohibit_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07002866 net->ipv6.ip6_blk_hole_entry->dst.path =
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002867 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07002868 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
David S. Miller62fa8a82011-01-26 20:51:05 -08002869 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2870 ip6_template_metrics, true);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002871#endif
2872
Peter Zijlstrab339a47c2008-10-07 14:15:00 -07002873 net->ipv6.sysctl.flush_delay = 0;
2874 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2875 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2876 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2877 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2878 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2879 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2880 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2881
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002882#ifdef CONFIG_PROC_FS
2883 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2884 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2885#endif
Benjamin Thery6891a342008-03-04 13:49:47 -08002886 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2887
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002888 ret = 0;
2889out:
2890 return ret;
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002891
Peter Zijlstra68fffc62008-10-07 14:12:10 -07002892#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2893out_ip6_prohibit_entry:
2894 kfree(net->ipv6.ip6_prohibit_entry);
2895out_ip6_null_entry:
2896 kfree(net->ipv6.ip6_null_entry);
2897#endif
Eric Dumazetfc66f952010-10-08 06:37:34 +00002898out_ip6_dst_entries:
2899 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002900out_ip6_dst_ops:
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002901 goto out;
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002902}
2903
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00002904static void __net_exit ip6_route_net_exit(struct net *net)
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002905{
2906#ifdef CONFIG_PROC_FS
2907 proc_net_remove(net, "ipv6_route");
2908 proc_net_remove(net, "rt6_stats");
2909#endif
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002910 kfree(net->ipv6.ip6_null_entry);
2911#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2912 kfree(net->ipv6.ip6_prohibit_entry);
2913 kfree(net->ipv6.ip6_blk_hole_entry);
2914#endif
Xiaotian Feng41bb78b2010-11-02 16:11:05 +00002915 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002916}
2917
2918static struct pernet_operations ip6_route_net_ops = {
2919 .init = ip6_route_net_init,
2920 .exit = ip6_route_net_exit,
2921};
2922
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002923static struct notifier_block ip6_route_dev_notifier = {
2924 .notifier_call = ip6_route_dev_notify,
2925 .priority = 0,
2926};
2927
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002928int __init ip6_route_init(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002929{
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002930 int ret;
2931
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -08002932 ret = -ENOMEM;
2933 ip6_dst_ops_template.kmem_cachep =
2934 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2935 SLAB_HWCACHE_ALIGN, NULL);
2936 if (!ip6_dst_ops_template.kmem_cachep)
Fernando Carrijoc19a28e2009-01-07 18:09:08 -08002937 goto out;
David S. Miller14e50e52007-05-24 18:17:54 -07002938
Eric Dumazetfc66f952010-10-08 06:37:34 +00002939 ret = dst_entries_init(&ip6_dst_blackhole_ops);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002940 if (ret)
Daniel Lezcanobdb32892008-03-04 13:48:10 -08002941 goto out_kmem_cache;
Daniel Lezcanobdb32892008-03-04 13:48:10 -08002942
Eric Dumazetfc66f952010-10-08 06:37:34 +00002943 ret = register_pernet_subsys(&ip6_route_net_ops);
2944 if (ret)
2945 goto out_dst_entries;
2946
Arnaud Ebalard5dc121e2008-10-01 02:37:56 -07002947 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2948
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002949 /* Registering of the loopback is done before this portion of code,
2950 * the loopback reference in rt6_info will not be taken, do it
2951 * manually for init_net */
Changli Gaod8d1f302010-06-10 23:31:35 -07002952 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002953 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2954 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
Changli Gaod8d1f302010-06-10 23:31:35 -07002955 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002956 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
Changli Gaod8d1f302010-06-10 23:31:35 -07002957 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002958 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2959 #endif
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002960 ret = fib6_init();
2961 if (ret)
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002962 goto out_register_subsys;
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002963
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002964 ret = xfrm6_init();
2965 if (ret)
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002966 goto out_fib6_init;
Daniel Lezcanoc35b7e72007-12-08 00:14:11 -08002967
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002968 ret = fib6_rules_init();
2969 if (ret)
2970 goto xfrm6_init;
Daniel Lezcano7e5449c2007-12-08 00:14:54 -08002971
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002972 ret = -ENOBUFS;
Greg Rosec7ac8672011-06-10 01:27:09 +00002973 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
2974 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
2975 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002976 goto fib6_rules_init;
2977
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002978 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002979 if (ret)
2980 goto fib6_rules_init;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002981
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002982out:
2983 return ret;
2984
2985fib6_rules_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002986 fib6_rules_cleanup();
2987xfrm6_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002988 xfrm6_fini();
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002989out_fib6_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002990 fib6_gc_cleanup();
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002991out_register_subsys:
2992 unregister_pernet_subsys(&ip6_route_net_ops);
Eric Dumazetfc66f952010-10-08 06:37:34 +00002993out_dst_entries:
2994 dst_entries_destroy(&ip6_dst_blackhole_ops);
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002995out_kmem_cache:
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002996 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002997 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002998}
2999
3000void ip6_route_cleanup(void)
3001{
Daniel Lezcano8ed67782008-03-04 13:48:30 -08003002 unregister_netdevice_notifier(&ip6_route_dev_notifier);
Thomas Graf101367c2006-08-04 03:39:02 -07003003 fib6_rules_cleanup();
Linus Torvalds1da177e2005-04-16 15:20:36 -07003004 xfrm6_fini();
Linus Torvalds1da177e2005-04-16 15:20:36 -07003005 fib6_gc_cleanup();
Daniel Lezcano8ed67782008-03-04 13:48:30 -08003006 unregister_pernet_subsys(&ip6_route_net_ops);
Xiaotian Feng41bb78b2010-11-02 16:11:05 +00003007 dst_entries_destroy(&ip6_dst_blackhole_ops);
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08003008 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003009}