blob: e8987da06667225e44dae0cbfaf7fc51490788d7 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09006 * Pedro Roque <roque@di.fc.ul.pt>
Linus Torvalds1da177e2005-04-16 15:20:36 -07007 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07008 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
YOSHIFUJI Hideakic0bece92006-08-23 17:23:25 -070023 * Ville Nuorvala
24 * Fixed routing subtrees.
Linus Torvalds1da177e2005-04-16 15:20:36 -070025 */
26
Randy Dunlap4fc268d2006-01-11 12:17:47 -080027#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070028#include <linux/errno.h>
29#include <linux/types.h>
30#include <linux/times.h>
31#include <linux/socket.h>
32#include <linux/sockios.h>
33#include <linux/net.h>
34#include <linux/route.h>
35#include <linux/netdevice.h>
36#include <linux/in6.h>
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +090037#include <linux/mroute6.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070038#include <linux/init.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070039#include <linux/if_arp.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070040#include <linux/proc_fs.h>
41#include <linux/seq_file.h>
Daniel Lezcano5b7c9312008-03-03 23:28:58 -080042#include <linux/nsproxy.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090043#include <linux/slab.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020044#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070045#include <net/snmp.h>
46#include <net/ipv6.h>
47#include <net/ip6_fib.h>
48#include <net/ip6_route.h>
49#include <net/ndisc.h>
50#include <net/addrconf.h>
51#include <net/tcp.h>
52#include <linux/rtnetlink.h>
53#include <net/dst.h>
54#include <net/xfrm.h>
Tom Tucker8d717402006-07-30 20:43:36 -070055#include <net/netevent.h>
Thomas Graf21713eb2006-08-15 00:35:24 -070056#include <net/netlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070057
58#include <asm/uaccess.h>
59
60#ifdef CONFIG_SYSCTL
61#include <linux/sysctl.h>
62#endif
63
64/* Set to 3 to get tracing. */
65#define RT6_DEBUG 2
66
67#if RT6_DEBUG >= 3
68#define RDBG(x) printk x
69#define RT6_TRACE(x...) printk(KERN_DEBUG x)
70#else
71#define RDBG(x)
72#define RT6_TRACE(x...) do { ; } while (0)
73#endif
74
Eric Dumazet21efcfa2011-07-19 20:18:36 +000075static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
76 const struct in6_addr *dest);
Linus Torvalds1da177e2005-04-16 15:20:36 -070077static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
David S. Miller0dbaee32010-12-13 12:52:14 -080078static unsigned int ip6_default_advmss(const struct dst_entry *dst);
David S. Millerd33e4552010-12-14 13:01:14 -080079static unsigned int ip6_default_mtu(const struct dst_entry *dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -070080static struct dst_entry *ip6_negative_advice(struct dst_entry *);
81static void ip6_dst_destroy(struct dst_entry *);
82static void ip6_dst_ifdown(struct dst_entry *,
83 struct net_device *dev, int how);
Daniel Lezcano569d3642008-01-18 03:56:57 -080084static int ip6_dst_gc(struct dst_ops *ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -070085
86static int ip6_pkt_discard(struct sk_buff *skb);
87static int ip6_pkt_discard_out(struct sk_buff *skb);
88static void ip6_link_failure(struct sk_buff *skb);
89static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
90
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -080091#ifdef CONFIG_IPV6_ROUTE_INFO
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -080092static struct rt6_info *rt6_add_route_info(struct net *net,
Eric Dumazetb71d1d42011-04-22 04:53:02 +000093 const struct in6_addr *prefix, int prefixlen,
94 const struct in6_addr *gwaddr, int ifindex,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -080095 unsigned pref);
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -080096static struct rt6_info *rt6_get_route_info(struct net *net,
Eric Dumazetb71d1d42011-04-22 04:53:02 +000097 const struct in6_addr *prefix, int prefixlen,
98 const struct in6_addr *gwaddr, int ifindex);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -080099#endif
100
David S. Miller06582542011-01-27 14:58:42 -0800101static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
102{
103 struct rt6_info *rt = (struct rt6_info *) dst;
104 struct inet_peer *peer;
105 u32 *p = NULL;
106
107 if (!rt->rt6i_peer)
108 rt6_bind_peer(rt, 1);
109
110 peer = rt->rt6i_peer;
111 if (peer) {
112 u32 *old_p = __DST_METRICS_PTR(old);
113 unsigned long prev, new;
114
115 p = peer->metrics;
116 if (inet_metrics_new(peer))
117 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
118
119 new = (unsigned long) p;
120 prev = cmpxchg(&dst->_metrics, old, new);
121
122 if (prev != old) {
123 p = __DST_METRICS_PTR(prev);
124 if (prev & DST_METRICS_READ_ONLY)
125 p = NULL;
126 }
127 }
128 return p;
129}
130
David S. Millerd3aaeb32011-07-18 00:40:17 -0700131static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, const void *daddr)
132{
133 return __neigh_lookup_errno(&nd_tbl, daddr, dst->dev);
134}
135
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -0800136static struct dst_ops ip6_dst_ops_template = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700137 .family = AF_INET6,
Harvey Harrison09640e62009-02-01 00:45:17 -0800138 .protocol = cpu_to_be16(ETH_P_IPV6),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700139 .gc = ip6_dst_gc,
140 .gc_thresh = 1024,
141 .check = ip6_dst_check,
David S. Miller0dbaee32010-12-13 12:52:14 -0800142 .default_advmss = ip6_default_advmss,
David S. Millerd33e4552010-12-14 13:01:14 -0800143 .default_mtu = ip6_default_mtu,
David S. Miller06582542011-01-27 14:58:42 -0800144 .cow_metrics = ipv6_cow_metrics,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700145 .destroy = ip6_dst_destroy,
146 .ifdown = ip6_dst_ifdown,
147 .negative_advice = ip6_negative_advice,
148 .link_failure = ip6_link_failure,
149 .update_pmtu = ip6_rt_update_pmtu,
Herbert Xu1ac06e02008-05-20 14:32:14 -0700150 .local_out = __ip6_local_out,
David S. Millerd3aaeb32011-07-18 00:40:17 -0700151 .neigh_lookup = ip6_neigh_lookup,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700152};
153
Roland Dreierec831ea2011-01-31 13:16:00 -0800154static unsigned int ip6_blackhole_default_mtu(const struct dst_entry *dst)
155{
156 return 0;
157}
158
David S. Miller14e50e52007-05-24 18:17:54 -0700159static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
160{
161}
162
Held Bernhard0972ddb2011-04-24 22:07:32 +0000163static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
164 unsigned long old)
165{
166 return NULL;
167}
168
David S. Miller14e50e52007-05-24 18:17:54 -0700169static struct dst_ops ip6_dst_blackhole_ops = {
170 .family = AF_INET6,
Harvey Harrison09640e62009-02-01 00:45:17 -0800171 .protocol = cpu_to_be16(ETH_P_IPV6),
David S. Miller14e50e52007-05-24 18:17:54 -0700172 .destroy = ip6_dst_destroy,
173 .check = ip6_dst_check,
Roland Dreierec831ea2011-01-31 13:16:00 -0800174 .default_mtu = ip6_blackhole_default_mtu,
Eric Dumazet214f45c2011-02-18 11:39:01 -0800175 .default_advmss = ip6_default_advmss,
David S. Miller14e50e52007-05-24 18:17:54 -0700176 .update_pmtu = ip6_rt_blackhole_update_pmtu,
Held Bernhard0972ddb2011-04-24 22:07:32 +0000177 .cow_metrics = ip6_rt_blackhole_cow_metrics,
David S. Millerd3aaeb32011-07-18 00:40:17 -0700178 .neigh_lookup = ip6_neigh_lookup,
David S. Miller14e50e52007-05-24 18:17:54 -0700179};
180
David S. Miller62fa8a82011-01-26 20:51:05 -0800181static const u32 ip6_template_metrics[RTAX_MAX] = {
182 [RTAX_HOPLIMIT - 1] = 255,
183};
184
Daniel Lezcanobdb32892008-03-04 13:48:10 -0800185static struct rt6_info ip6_null_entry_template = {
Changli Gaod8d1f302010-06-10 23:31:35 -0700186 .dst = {
187 .__refcnt = ATOMIC_INIT(1),
188 .__use = 1,
189 .obsolete = -1,
190 .error = -ENETUNREACH,
Changli Gaod8d1f302010-06-10 23:31:35 -0700191 .input = ip6_pkt_discard,
192 .output = ip6_pkt_discard_out,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700193 },
194 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
Jean-Mickael Guerin4f724272009-05-20 17:38:59 -0700195 .rt6i_protocol = RTPROT_KERNEL,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700196 .rt6i_metric = ~(u32) 0,
197 .rt6i_ref = ATOMIC_INIT(1),
198};
199
Thomas Graf101367c2006-08-04 03:39:02 -0700200#ifdef CONFIG_IPV6_MULTIPLE_TABLES
201
David S. Miller6723ab52006-10-18 21:20:57 -0700202static int ip6_pkt_prohibit(struct sk_buff *skb);
203static int ip6_pkt_prohibit_out(struct sk_buff *skb);
David S. Miller6723ab52006-10-18 21:20:57 -0700204
Adrian Bunk280a34c2008-04-21 02:29:32 -0700205static struct rt6_info ip6_prohibit_entry_template = {
Changli Gaod8d1f302010-06-10 23:31:35 -0700206 .dst = {
207 .__refcnt = ATOMIC_INIT(1),
208 .__use = 1,
209 .obsolete = -1,
210 .error = -EACCES,
Changli Gaod8d1f302010-06-10 23:31:35 -0700211 .input = ip6_pkt_prohibit,
212 .output = ip6_pkt_prohibit_out,
Thomas Graf101367c2006-08-04 03:39:02 -0700213 },
214 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
Jean-Mickael Guerin4f724272009-05-20 17:38:59 -0700215 .rt6i_protocol = RTPROT_KERNEL,
Thomas Graf101367c2006-08-04 03:39:02 -0700216 .rt6i_metric = ~(u32) 0,
217 .rt6i_ref = ATOMIC_INIT(1),
218};
219
Daniel Lezcanobdb32892008-03-04 13:48:10 -0800220static struct rt6_info ip6_blk_hole_entry_template = {
Changli Gaod8d1f302010-06-10 23:31:35 -0700221 .dst = {
222 .__refcnt = ATOMIC_INIT(1),
223 .__use = 1,
224 .obsolete = -1,
225 .error = -EINVAL,
Changli Gaod8d1f302010-06-10 23:31:35 -0700226 .input = dst_discard,
227 .output = dst_discard,
Thomas Graf101367c2006-08-04 03:39:02 -0700228 },
229 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
Jean-Mickael Guerin4f724272009-05-20 17:38:59 -0700230 .rt6i_protocol = RTPROT_KERNEL,
Thomas Graf101367c2006-08-04 03:39:02 -0700231 .rt6i_metric = ~(u32) 0,
232 .rt6i_ref = ATOMIC_INIT(1),
233};
234
235#endif
236
Linus Torvalds1da177e2005-04-16 15:20:36 -0700237/* allocate dst with ip6_dst_ops */
David S. Miller5c1e6aa2011-04-28 14:13:38 -0700238static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops,
David S. Miller957c6652011-06-24 15:25:00 -0700239 struct net_device *dev,
240 int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700241{
David S. Miller957c6652011-06-24 15:25:00 -0700242 struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, flags);
David S. Millercf911662011-04-28 14:31:47 -0700243
244 memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
245
246 return rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700247}
248
249static void ip6_dst_destroy(struct dst_entry *dst)
250{
251 struct rt6_info *rt = (struct rt6_info *)dst;
252 struct inet6_dev *idev = rt->rt6i_idev;
David S. Millerb3419362010-11-30 12:27:11 -0800253 struct inet_peer *peer = rt->rt6i_peer;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700254
255 if (idev != NULL) {
256 rt->rt6i_idev = NULL;
257 in6_dev_put(idev);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900258 }
David S. Millerb3419362010-11-30 12:27:11 -0800259 if (peer) {
David S. Millerb3419362010-11-30 12:27:11 -0800260 rt->rt6i_peer = NULL;
261 inet_putpeer(peer);
262 }
263}
264
David S. Miller6431cbc2011-02-07 20:38:06 -0800265static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
266
267static u32 rt6_peer_genid(void)
268{
269 return atomic_read(&__rt6_peer_genid);
270}
271
David S. Millerb3419362010-11-30 12:27:11 -0800272void rt6_bind_peer(struct rt6_info *rt, int create)
273{
274 struct inet_peer *peer;
275
David S. Millerb3419362010-11-30 12:27:11 -0800276 peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
277 if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
278 inet_putpeer(peer);
David S. Miller6431cbc2011-02-07 20:38:06 -0800279 else
280 rt->rt6i_peer_genid = rt6_peer_genid();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700281}
282
283static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
284 int how)
285{
286 struct rt6_info *rt = (struct rt6_info *)dst;
287 struct inet6_dev *idev = rt->rt6i_idev;
Denis V. Lunev5a3e55d2007-12-07 00:38:10 -0800288 struct net_device *loopback_dev =
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900289 dev_net(dev)->loopback_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700290
Denis V. Lunev5a3e55d2007-12-07 00:38:10 -0800291 if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
292 struct inet6_dev *loopback_idev =
293 in6_dev_get(loopback_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700294 if (loopback_idev != NULL) {
295 rt->rt6i_idev = loopback_idev;
296 in6_dev_put(idev);
297 }
298 }
299}
300
301static __inline__ int rt6_check_expired(const struct rt6_info *rt)
302{
Eric Dumazeta02cec22010-09-22 20:43:57 +0000303 return (rt->rt6i_flags & RTF_EXPIRES) &&
304 time_after(jiffies, rt->rt6i_expires);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700305}
306
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000307static inline int rt6_need_strict(const struct in6_addr *daddr)
Thomas Grafc71099a2006-08-04 23:20:06 -0700308{
Eric Dumazeta02cec22010-09-22 20:43:57 +0000309 return ipv6_addr_type(daddr) &
310 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
Thomas Grafc71099a2006-08-04 23:20:06 -0700311}
312
Linus Torvalds1da177e2005-04-16 15:20:36 -0700313/*
Thomas Grafc71099a2006-08-04 23:20:06 -0700314 * Route lookup. Any table->tb6_lock is implied.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700315 */
316
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800317static inline struct rt6_info *rt6_device_match(struct net *net,
318 struct rt6_info *rt,
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000319 const struct in6_addr *saddr,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700320 int oif,
YOSHIFUJI Hideakid4208952008-06-27 20:14:54 -0700321 int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700322{
323 struct rt6_info *local = NULL;
324 struct rt6_info *sprt;
325
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900326 if (!oif && ipv6_addr_any(saddr))
327 goto out;
328
Changli Gaod8d1f302010-06-10 23:31:35 -0700329 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900330 struct net_device *dev = sprt->rt6i_dev;
331
332 if (oif) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700333 if (dev->ifindex == oif)
334 return sprt;
335 if (dev->flags & IFF_LOOPBACK) {
336 if (sprt->rt6i_idev == NULL ||
337 sprt->rt6i_idev->dev->ifindex != oif) {
YOSHIFUJI Hideakid4208952008-06-27 20:14:54 -0700338 if (flags & RT6_LOOKUP_F_IFACE && oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700339 continue;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900340 if (local && (!oif ||
Linus Torvalds1da177e2005-04-16 15:20:36 -0700341 local->rt6i_idev->dev->ifindex == oif))
342 continue;
343 }
344 local = sprt;
345 }
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900346 } else {
347 if (ipv6_chk_addr(net, saddr, dev,
348 flags & RT6_LOOKUP_F_IFACE))
349 return sprt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700350 }
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900351 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700352
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900353 if (oif) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700354 if (local)
355 return local;
356
YOSHIFUJI Hideakid4208952008-06-27 20:14:54 -0700357 if (flags & RT6_LOOKUP_F_IFACE)
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800358 return net->ipv6.ip6_null_entry;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700359 }
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900360out:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700361 return rt;
362}
363
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800364#ifdef CONFIG_IPV6_ROUTER_PREF
365static void rt6_probe(struct rt6_info *rt)
366{
David S. Miller69cce1d2011-07-17 23:09:49 -0700367 struct neighbour *neigh = rt ? dst_get_neighbour(&rt->dst) : NULL;
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800368 /*
369 * Okay, this does not seem to be appropriate
370 * for now, however, we need to check if it
371 * is really so; aka Router Reachability Probing.
372 *
373 * Router Reachability Probe MUST be rate-limited
374 * to no more than one per minute.
375 */
376 if (!neigh || (neigh->nud_state & NUD_VALID))
377 return;
378 read_lock_bh(&neigh->lock);
379 if (!(neigh->nud_state & NUD_VALID) &&
YOSHIFUJI Hideaki52e16352006-03-20 17:05:47 -0800380 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800381 struct in6_addr mcaddr;
382 struct in6_addr *target;
383
384 neigh->updated = jiffies;
385 read_unlock_bh(&neigh->lock);
386
387 target = (struct in6_addr *)&neigh->primary_key;
388 addrconf_addr_solict_mult(target, &mcaddr);
389 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
390 } else
391 read_unlock_bh(&neigh->lock);
392}
393#else
394static inline void rt6_probe(struct rt6_info *rt)
395{
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800396}
397#endif
398
Linus Torvalds1da177e2005-04-16 15:20:36 -0700399/*
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800400 * Default Router Selection (RFC 2461 6.3.6)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700401 */
Dave Jonesb6f99a22007-03-22 12:27:49 -0700402static inline int rt6_check_dev(struct rt6_info *rt, int oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700403{
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800404 struct net_device *dev = rt->rt6i_dev;
David S. Miller161980f2007-04-06 11:42:27 -0700405 if (!oif || dev->ifindex == oif)
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800406 return 2;
David S. Miller161980f2007-04-06 11:42:27 -0700407 if ((dev->flags & IFF_LOOPBACK) &&
408 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
409 return 1;
410 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700411}
412
Dave Jonesb6f99a22007-03-22 12:27:49 -0700413static inline int rt6_check_neigh(struct rt6_info *rt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700414{
David S. Miller69cce1d2011-07-17 23:09:49 -0700415 struct neighbour *neigh = dst_get_neighbour(&rt->dst);
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800416 int m;
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700417 if (rt->rt6i_flags & RTF_NONEXTHOP ||
418 !(rt->rt6i_flags & RTF_GATEWAY))
419 m = 1;
420 else if (neigh) {
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800421 read_lock_bh(&neigh->lock);
422 if (neigh->nud_state & NUD_VALID)
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700423 m = 2;
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800424#ifdef CONFIG_IPV6_ROUTER_PREF
425 else if (neigh->nud_state & NUD_FAILED)
426 m = 0;
427#endif
428 else
YOSHIFUJI Hideakiea73ee22006-11-06 09:45:44 -0800429 m = 1;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800430 read_unlock_bh(&neigh->lock);
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800431 } else
432 m = 0;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800433 return m;
434}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700435
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800436static int rt6_score_route(struct rt6_info *rt, int oif,
437 int strict)
438{
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700439 int m, n;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900440
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700441 m = rt6_check_dev(rt, oif);
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700442 if (!m && (strict & RT6_LOOKUP_F_IFACE))
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800443 return -1;
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -0800444#ifdef CONFIG_IPV6_ROUTER_PREF
445 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
446#endif
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700447 n = rt6_check_neigh(rt);
YOSHIFUJI Hideaki557e92e2006-11-06 09:45:45 -0800448 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800449 return -1;
450 return m;
451}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700452
David S. Millerf11e6652007-03-24 20:36:25 -0700453static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
454 int *mpri, struct rt6_info *match)
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800455{
David S. Millerf11e6652007-03-24 20:36:25 -0700456 int m;
457
458 if (rt6_check_expired(rt))
459 goto out;
460
461 m = rt6_score_route(rt, oif, strict);
462 if (m < 0)
463 goto out;
464
465 if (m > *mpri) {
466 if (strict & RT6_LOOKUP_F_REACHABLE)
467 rt6_probe(match);
468 *mpri = m;
469 match = rt;
470 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
471 rt6_probe(rt);
472 }
473
474out:
475 return match;
476}
477
478static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
479 struct rt6_info *rr_head,
480 u32 metric, int oif, int strict)
481{
482 struct rt6_info *rt, *match;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800483 int mpri = -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700484
David S. Millerf11e6652007-03-24 20:36:25 -0700485 match = NULL;
486 for (rt = rr_head; rt && rt->rt6i_metric == metric;
Changli Gaod8d1f302010-06-10 23:31:35 -0700487 rt = rt->dst.rt6_next)
David S. Millerf11e6652007-03-24 20:36:25 -0700488 match = find_match(rt, oif, strict, &mpri, match);
489 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
Changli Gaod8d1f302010-06-10 23:31:35 -0700490 rt = rt->dst.rt6_next)
David S. Millerf11e6652007-03-24 20:36:25 -0700491 match = find_match(rt, oif, strict, &mpri, match);
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800492
David S. Millerf11e6652007-03-24 20:36:25 -0700493 return match;
494}
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800495
David S. Millerf11e6652007-03-24 20:36:25 -0700496static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
497{
498 struct rt6_info *match, *rt0;
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800499 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700500
David S. Millerf11e6652007-03-24 20:36:25 -0700501 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
Harvey Harrison0dc47872008-03-05 20:47:47 -0800502 __func__, fn->leaf, oif);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700503
David S. Millerf11e6652007-03-24 20:36:25 -0700504 rt0 = fn->rr_ptr;
505 if (!rt0)
506 fn->rr_ptr = rt0 = fn->leaf;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700507
David S. Millerf11e6652007-03-24 20:36:25 -0700508 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700509
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800510 if (!match &&
David S. Millerf11e6652007-03-24 20:36:25 -0700511 (strict & RT6_LOOKUP_F_REACHABLE)) {
Changli Gaod8d1f302010-06-10 23:31:35 -0700512 struct rt6_info *next = rt0->dst.rt6_next;
David S. Millerf11e6652007-03-24 20:36:25 -0700513
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800514 /* no entries matched; do round-robin */
David S. Millerf11e6652007-03-24 20:36:25 -0700515 if (!next || next->rt6i_metric != rt0->rt6i_metric)
516 next = fn->leaf;
517
518 if (next != rt0)
519 fn->rr_ptr = next;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700520 }
521
David S. Millerf11e6652007-03-24 20:36:25 -0700522 RT6_TRACE("%s() => %p\n",
Harvey Harrison0dc47872008-03-05 20:47:47 -0800523 __func__, match);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700524
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900525 net = dev_net(rt0->rt6i_dev);
Eric Dumazeta02cec22010-09-22 20:43:57 +0000526 return match ? match : net->ipv6.ip6_null_entry;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700527}
528
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800529#ifdef CONFIG_IPV6_ROUTE_INFO
530int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000531 const struct in6_addr *gwaddr)
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800532{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900533 struct net *net = dev_net(dev);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800534 struct route_info *rinfo = (struct route_info *) opt;
535 struct in6_addr prefix_buf, *prefix;
536 unsigned int pref;
YOSHIFUJI Hideaki4bed72e2008-05-27 17:37:49 +0900537 unsigned long lifetime;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800538 struct rt6_info *rt;
539
540 if (len < sizeof(struct route_info)) {
541 return -EINVAL;
542 }
543
544 /* Sanity check for prefix_len and length */
545 if (rinfo->length > 3) {
546 return -EINVAL;
547 } else if (rinfo->prefix_len > 128) {
548 return -EINVAL;
549 } else if (rinfo->prefix_len > 64) {
550 if (rinfo->length < 2) {
551 return -EINVAL;
552 }
553 } else if (rinfo->prefix_len > 0) {
554 if (rinfo->length < 1) {
555 return -EINVAL;
556 }
557 }
558
559 pref = rinfo->route_pref;
560 if (pref == ICMPV6_ROUTER_PREF_INVALID)
Jens Rosenboom3933fc92009-09-10 06:25:11 +0000561 return -EINVAL;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800562
YOSHIFUJI Hideaki4bed72e2008-05-27 17:37:49 +0900563 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800564
565 if (rinfo->length == 3)
566 prefix = (struct in6_addr *)rinfo->prefix;
567 else {
568 /* this function is safe */
569 ipv6_addr_prefix(&prefix_buf,
570 (struct in6_addr *)rinfo->prefix,
571 rinfo->prefix_len);
572 prefix = &prefix_buf;
573 }
574
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -0800575 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
576 dev->ifindex);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800577
578 if (rt && !lifetime) {
Thomas Grafe0a1ad732006-08-22 00:00:21 -0700579 ip6_del_rt(rt);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800580 rt = NULL;
581 }
582
583 if (!rt && lifetime)
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -0800584 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800585 pref);
586 else if (rt)
587 rt->rt6i_flags = RTF_ROUTEINFO |
588 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
589
590 if (rt) {
YOSHIFUJI Hideaki4bed72e2008-05-27 17:37:49 +0900591 if (!addrconf_finite_timeout(lifetime)) {
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800592 rt->rt6i_flags &= ~RTF_EXPIRES;
593 } else {
594 rt->rt6i_expires = jiffies + HZ * lifetime;
595 rt->rt6i_flags |= RTF_EXPIRES;
596 }
Changli Gaod8d1f302010-06-10 23:31:35 -0700597 dst_release(&rt->dst);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800598 }
599 return 0;
600}
601#endif
602
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800603#define BACKTRACK(__net, saddr) \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700604do { \
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800605 if (rt == __net->ipv6.ip6_null_entry) { \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700606 struct fib6_node *pn; \
Ville Nuorvalae0eda7b2006-10-16 22:11:11 -0700607 while (1) { \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700608 if (fn->fn_flags & RTN_TL_ROOT) \
609 goto out; \
610 pn = fn->parent; \
611 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
Kim Nordlund8bce65b2006-12-13 16:38:29 -0800612 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700613 else \
614 fn = pn; \
615 if (fn->fn_flags & RTN_RTINFO) \
616 goto restart; \
Thomas Grafc71099a2006-08-04 23:20:06 -0700617 } \
Thomas Grafc71099a2006-08-04 23:20:06 -0700618 } \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700619} while(0)
Thomas Grafc71099a2006-08-04 23:20:06 -0700620
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800621static struct rt6_info *ip6_pol_route_lookup(struct net *net,
622 struct fib6_table *table,
David S. Miller4c9483b2011-03-12 16:22:43 -0500623 struct flowi6 *fl6, int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700624{
625 struct fib6_node *fn;
626 struct rt6_info *rt;
627
Thomas Grafc71099a2006-08-04 23:20:06 -0700628 read_lock_bh(&table->tb6_lock);
David S. Miller4c9483b2011-03-12 16:22:43 -0500629 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
Thomas Grafc71099a2006-08-04 23:20:06 -0700630restart:
631 rt = fn->leaf;
David S. Miller4c9483b2011-03-12 16:22:43 -0500632 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
633 BACKTRACK(net, &fl6->saddr);
Thomas Grafc71099a2006-08-04 23:20:06 -0700634out:
Changli Gaod8d1f302010-06-10 23:31:35 -0700635 dst_use(&rt->dst, jiffies);
Thomas Grafc71099a2006-08-04 23:20:06 -0700636 read_unlock_bh(&table->tb6_lock);
Thomas Grafc71099a2006-08-04 23:20:06 -0700637 return rt;
638
639}
640
YOSHIFUJI Hideaki9acd9f32008-04-10 15:42:10 +0900641struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
642 const struct in6_addr *saddr, int oif, int strict)
Thomas Grafc71099a2006-08-04 23:20:06 -0700643{
David S. Miller4c9483b2011-03-12 16:22:43 -0500644 struct flowi6 fl6 = {
645 .flowi6_oif = oif,
646 .daddr = *daddr,
Thomas Grafc71099a2006-08-04 23:20:06 -0700647 };
648 struct dst_entry *dst;
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700649 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
Thomas Grafc71099a2006-08-04 23:20:06 -0700650
Thomas Grafadaa70b2006-10-13 15:01:03 -0700651 if (saddr) {
David S. Miller4c9483b2011-03-12 16:22:43 -0500652 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
Thomas Grafadaa70b2006-10-13 15:01:03 -0700653 flags |= RT6_LOOKUP_F_HAS_SADDR;
654 }
655
David S. Miller4c9483b2011-03-12 16:22:43 -0500656 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
Thomas Grafc71099a2006-08-04 23:20:06 -0700657 if (dst->error == 0)
658 return (struct rt6_info *) dst;
659
660 dst_release(dst);
661
Linus Torvalds1da177e2005-04-16 15:20:36 -0700662 return NULL;
663}
664
YOSHIFUJI Hideaki71590392007-02-22 22:05:40 +0900665EXPORT_SYMBOL(rt6_lookup);
666
Thomas Grafc71099a2006-08-04 23:20:06 -0700667/* ip6_ins_rt is called with FREE table->tb6_lock.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700668 It takes new route entry, the addition fails by any reason the
669 route is freed. In any case, if caller does not hold it, it may
670 be destroyed.
671 */
672
Thomas Graf86872cb2006-08-22 00:01:08 -0700673static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700674{
675 int err;
Thomas Grafc71099a2006-08-04 23:20:06 -0700676 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700677
Thomas Grafc71099a2006-08-04 23:20:06 -0700678 table = rt->rt6i_table;
679 write_lock_bh(&table->tb6_lock);
Thomas Graf86872cb2006-08-22 00:01:08 -0700680 err = fib6_add(&table->tb6_root, rt, info);
Thomas Grafc71099a2006-08-04 23:20:06 -0700681 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700682
683 return err;
684}
685
Thomas Graf40e22e82006-08-22 00:00:45 -0700686int ip6_ins_rt(struct rt6_info *rt)
687{
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800688 struct nl_info info = {
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900689 .nl_net = dev_net(rt->rt6i_dev),
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800690 };
Denis V. Lunev528c4ce2007-12-13 09:45:12 -0800691 return __ip6_ins_rt(rt, &info);
Thomas Graf40e22e82006-08-22 00:00:45 -0700692}
693
Eric Dumazet21efcfa2011-07-19 20:18:36 +0000694static struct rt6_info *rt6_alloc_cow(const struct rt6_info *ort,
695 const struct in6_addr *daddr,
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000696 const struct in6_addr *saddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700697{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700698 struct rt6_info *rt;
699
700 /*
701 * Clone the route.
702 */
703
Eric Dumazet21efcfa2011-07-19 20:18:36 +0000704 rt = ip6_rt_copy(ort, daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700705
706 if (rt) {
David S. Miller14deae42009-01-04 16:04:39 -0800707 struct neighbour *neigh;
708 int attempts = !in_softirq();
709
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900710 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
711 if (rt->rt6i_dst.plen != 128 &&
Eric Dumazet21efcfa2011-07-19 20:18:36 +0000712 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900713 rt->rt6i_flags |= RTF_ANYCAST;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700714 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900715 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700716
717 rt->rt6i_dst.plen = 128;
718 rt->rt6i_flags |= RTF_CACHE;
Changli Gaod8d1f302010-06-10 23:31:35 -0700719 rt->dst.flags |= DST_HOST;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700720
721#ifdef CONFIG_IPV6_SUBTREES
722 if (rt->rt6i_src.plen && saddr) {
723 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
724 rt->rt6i_src.plen = 128;
725 }
726#endif
727
David S. Miller14deae42009-01-04 16:04:39 -0800728 retry:
729 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
730 if (IS_ERR(neigh)) {
731 struct net *net = dev_net(rt->rt6i_dev);
732 int saved_rt_min_interval =
733 net->ipv6.sysctl.ip6_rt_gc_min_interval;
734 int saved_rt_elasticity =
735 net->ipv6.sysctl.ip6_rt_gc_elasticity;
736
737 if (attempts-- > 0) {
738 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
739 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
740
Alexey Dobriyan86393e52009-08-29 01:34:49 +0000741 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
David S. Miller14deae42009-01-04 16:04:39 -0800742
743 net->ipv6.sysctl.ip6_rt_gc_elasticity =
744 saved_rt_elasticity;
745 net->ipv6.sysctl.ip6_rt_gc_min_interval =
746 saved_rt_min_interval;
747 goto retry;
748 }
749
750 if (net_ratelimit())
751 printk(KERN_WARNING
Ulrich Weber7e1b33e2010-09-27 15:02:18 -0700752 "ipv6: Neighbour table overflow.\n");
Changli Gaod8d1f302010-06-10 23:31:35 -0700753 dst_free(&rt->dst);
David S. Miller14deae42009-01-04 16:04:39 -0800754 return NULL;
755 }
David S. Miller69cce1d2011-07-17 23:09:49 -0700756 dst_set_neighbour(&rt->dst, neigh);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700757
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800758 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700759
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800760 return rt;
761}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700762
Eric Dumazet21efcfa2011-07-19 20:18:36 +0000763static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
764 const struct in6_addr *daddr)
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800765{
Eric Dumazet21efcfa2011-07-19 20:18:36 +0000766 struct rt6_info *rt = ip6_rt_copy(ort, daddr);
767
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800768 if (rt) {
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800769 rt->rt6i_dst.plen = 128;
770 rt->rt6i_flags |= RTF_CACHE;
Changli Gaod8d1f302010-06-10 23:31:35 -0700771 rt->dst.flags |= DST_HOST;
David S. Miller69cce1d2011-07-17 23:09:49 -0700772 dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour(&ort->dst)));
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800773 }
774 return rt;
775}
776
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800777static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
David S. Miller4c9483b2011-03-12 16:22:43 -0500778 struct flowi6 *fl6, int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700779{
780 struct fib6_node *fn;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800781 struct rt6_info *rt, *nrt;
Thomas Grafc71099a2006-08-04 23:20:06 -0700782 int strict = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700783 int attempts = 3;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800784 int err;
YOSHIFUJI Hideaki53b79972008-07-19 22:35:03 -0700785 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700786
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700787 strict |= flags & RT6_LOOKUP_F_IFACE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700788
789relookup:
Thomas Grafc71099a2006-08-04 23:20:06 -0700790 read_lock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700791
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800792restart_2:
David S. Miller4c9483b2011-03-12 16:22:43 -0500793 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700794
795restart:
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700796 rt = rt6_select(fn, oif, strict | reachable);
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800797
David S. Miller4c9483b2011-03-12 16:22:43 -0500798 BACKTRACK(net, &fl6->saddr);
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800799 if (rt == net->ipv6.ip6_null_entry ||
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800800 rt->rt6i_flags & RTF_CACHE)
YOSHIFUJI Hideaki1ddef042006-03-20 17:01:24 -0800801 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700802
Changli Gaod8d1f302010-06-10 23:31:35 -0700803 dst_hold(&rt->dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700804 read_unlock_bh(&table->tb6_lock);
YOSHIFUJI Hideakifb9de912006-03-20 16:59:08 -0800805
David S. Miller69cce1d2011-07-17 23:09:49 -0700806 if (!dst_get_neighbour(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
David S. Miller4c9483b2011-03-12 16:22:43 -0500807 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
David S. Miller7343ff32011-03-09 19:55:25 -0800808 else if (!(rt->dst.flags & DST_HOST))
David S. Miller4c9483b2011-03-12 16:22:43 -0500809 nrt = rt6_alloc_clone(rt, &fl6->daddr);
David S. Miller7343ff32011-03-09 19:55:25 -0800810 else
811 goto out2;
YOSHIFUJI Hideakie40cf352006-03-20 16:59:27 -0800812
Changli Gaod8d1f302010-06-10 23:31:35 -0700813 dst_release(&rt->dst);
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800814 rt = nrt ? : net->ipv6.ip6_null_entry;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800815
Changli Gaod8d1f302010-06-10 23:31:35 -0700816 dst_hold(&rt->dst);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800817 if (nrt) {
Thomas Graf40e22e82006-08-22 00:00:45 -0700818 err = ip6_ins_rt(nrt);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800819 if (!err)
820 goto out2;
821 }
822
823 if (--attempts <= 0)
824 goto out2;
825
826 /*
Thomas Grafc71099a2006-08-04 23:20:06 -0700827 * Race condition! In the gap, when table->tb6_lock was
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800828 * released someone could insert this route. Relookup.
829 */
Changli Gaod8d1f302010-06-10 23:31:35 -0700830 dst_release(&rt->dst);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800831 goto relookup;
832
833out:
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800834 if (reachable) {
835 reachable = 0;
836 goto restart_2;
837 }
Changli Gaod8d1f302010-06-10 23:31:35 -0700838 dst_hold(&rt->dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700839 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700840out2:
Changli Gaod8d1f302010-06-10 23:31:35 -0700841 rt->dst.lastuse = jiffies;
842 rt->dst.__use++;
Thomas Grafc71099a2006-08-04 23:20:06 -0700843
844 return rt;
845}
846
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800847static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
David S. Miller4c9483b2011-03-12 16:22:43 -0500848 struct flowi6 *fl6, int flags)
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700849{
David S. Miller4c9483b2011-03-12 16:22:43 -0500850 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700851}
852
Thomas Grafc71099a2006-08-04 23:20:06 -0700853void ip6_route_input(struct sk_buff *skb)
854{
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000855 const struct ipv6hdr *iph = ipv6_hdr(skb);
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900856 struct net *net = dev_net(skb->dev);
Thomas Grafadaa70b2006-10-13 15:01:03 -0700857 int flags = RT6_LOOKUP_F_HAS_SADDR;
David S. Miller4c9483b2011-03-12 16:22:43 -0500858 struct flowi6 fl6 = {
859 .flowi6_iif = skb->dev->ifindex,
860 .daddr = iph->daddr,
861 .saddr = iph->saddr,
862 .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
863 .flowi6_mark = skb->mark,
864 .flowi6_proto = iph->nexthdr,
Thomas Grafc71099a2006-08-04 23:20:06 -0700865 };
Thomas Grafadaa70b2006-10-13 15:01:03 -0700866
Thomas Goff1d6e55f2009-01-27 22:39:59 -0800867 if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
Thomas Grafadaa70b2006-10-13 15:01:03 -0700868 flags |= RT6_LOOKUP_F_IFACE;
Thomas Grafc71099a2006-08-04 23:20:06 -0700869
David S. Miller4c9483b2011-03-12 16:22:43 -0500870 skb_dst_set(skb, fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_input));
Thomas Grafc71099a2006-08-04 23:20:06 -0700871}
872
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800873static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
David S. Miller4c9483b2011-03-12 16:22:43 -0500874 struct flowi6 *fl6, int flags)
Thomas Grafc71099a2006-08-04 23:20:06 -0700875{
David S. Miller4c9483b2011-03-12 16:22:43 -0500876 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
Thomas Grafc71099a2006-08-04 23:20:06 -0700877}
878
Florian Westphal9c7a4f92011-03-22 19:17:36 -0700879struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
David S. Miller4c9483b2011-03-12 16:22:43 -0500880 struct flowi6 *fl6)
Thomas Grafc71099a2006-08-04 23:20:06 -0700881{
882 int flags = 0;
883
David S. Miller4c9483b2011-03-12 16:22:43 -0500884 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700885 flags |= RT6_LOOKUP_F_IFACE;
Thomas Grafc71099a2006-08-04 23:20:06 -0700886
David S. Miller4c9483b2011-03-12 16:22:43 -0500887 if (!ipv6_addr_any(&fl6->saddr))
Thomas Grafadaa70b2006-10-13 15:01:03 -0700888 flags |= RT6_LOOKUP_F_HAS_SADDR;
YOSHIFUJI Hideaki / 吉藤英明0c9a2ac2010-03-07 00:14:44 +0000889 else if (sk)
890 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
Thomas Grafadaa70b2006-10-13 15:01:03 -0700891
David S. Miller4c9483b2011-03-12 16:22:43 -0500892 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700893}
894
YOSHIFUJI Hideaki71590392007-02-22 22:05:40 +0900895EXPORT_SYMBOL(ip6_route_output);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700896
David S. Miller2774c132011-03-01 14:59:04 -0800897struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
David S. Miller14e50e52007-05-24 18:17:54 -0700898{
David S. Miller5c1e6aa2011-04-28 14:13:38 -0700899 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
David S. Miller14e50e52007-05-24 18:17:54 -0700900 struct dst_entry *new = NULL;
901
David S. Miller5c1e6aa2011-04-28 14:13:38 -0700902 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0);
David S. Miller14e50e52007-05-24 18:17:54 -0700903 if (rt) {
David S. Millercf911662011-04-28 14:31:47 -0700904 memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
905
Changli Gaod8d1f302010-06-10 23:31:35 -0700906 new = &rt->dst;
David S. Miller14e50e52007-05-24 18:17:54 -0700907
David S. Miller14e50e52007-05-24 18:17:54 -0700908 new->__use = 1;
Herbert Xu352e5122007-11-13 21:34:06 -0800909 new->input = dst_discard;
910 new->output = dst_discard;
David S. Miller14e50e52007-05-24 18:17:54 -0700911
Eric Dumazet21efcfa2011-07-19 20:18:36 +0000912 if (dst_metrics_read_only(&ort->dst))
913 new->_metrics = ort->dst._metrics;
914 else
915 dst_copy_metrics(new, &ort->dst);
David S. Miller14e50e52007-05-24 18:17:54 -0700916 rt->rt6i_idev = ort->rt6i_idev;
917 if (rt->rt6i_idev)
918 in6_dev_hold(rt->rt6i_idev);
919 rt->rt6i_expires = 0;
920
921 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
922 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
923 rt->rt6i_metric = 0;
924
925 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
926#ifdef CONFIG_IPV6_SUBTREES
927 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
928#endif
929
930 dst_free(new);
931 }
932
David S. Miller69ead7a2011-03-01 14:45:33 -0800933 dst_release(dst_orig);
934 return new ? new : ERR_PTR(-ENOMEM);
David S. Miller14e50e52007-05-24 18:17:54 -0700935}
David S. Miller14e50e52007-05-24 18:17:54 -0700936
Linus Torvalds1da177e2005-04-16 15:20:36 -0700937/*
938 * Destination cache support functions
939 */
940
941static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
942{
943 struct rt6_info *rt;
944
945 rt = (struct rt6_info *) dst;
946
David S. Miller6431cbc2011-02-07 20:38:06 -0800947 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
948 if (rt->rt6i_peer_genid != rt6_peer_genid()) {
949 if (!rt->rt6i_peer)
950 rt6_bind_peer(rt, 0);
951 rt->rt6i_peer_genid = rt6_peer_genid();
952 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700953 return dst;
David S. Miller6431cbc2011-02-07 20:38:06 -0800954 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700955 return NULL;
956}
957
958static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
959{
960 struct rt6_info *rt = (struct rt6_info *) dst;
961
962 if (rt) {
YOSHIFUJI Hideaki / 吉藤英明54c1a852010-03-28 07:15:45 +0000963 if (rt->rt6i_flags & RTF_CACHE) {
964 if (rt6_check_expired(rt)) {
965 ip6_del_rt(rt);
966 dst = NULL;
967 }
968 } else {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700969 dst_release(dst);
YOSHIFUJI Hideaki / 吉藤英明54c1a852010-03-28 07:15:45 +0000970 dst = NULL;
971 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700972 }
YOSHIFUJI Hideaki / 吉藤英明54c1a852010-03-28 07:15:45 +0000973 return dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700974}
975
976static void ip6_link_failure(struct sk_buff *skb)
977{
978 struct rt6_info *rt;
979
Alexey Dobriyan3ffe5332010-02-18 08:25:24 +0000980 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700981
Eric Dumazetadf30902009-06-02 05:19:30 +0000982 rt = (struct rt6_info *) skb_dst(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700983 if (rt) {
984 if (rt->rt6i_flags&RTF_CACHE) {
Changli Gaod8d1f302010-06-10 23:31:35 -0700985 dst_set_expires(&rt->dst, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700986 rt->rt6i_flags |= RTF_EXPIRES;
987 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
988 rt->rt6i_node->fn_sernum = -1;
989 }
990}
991
992static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
993{
994 struct rt6_info *rt6 = (struct rt6_info*)dst;
995
996 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
997 rt6->rt6i_flags |= RTF_MODIFIED;
998 if (mtu < IPV6_MIN_MTU) {
David S. Millerdefb3512010-12-08 21:16:57 -0800999 u32 features = dst_metric(dst, RTAX_FEATURES);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001000 mtu = IPV6_MIN_MTU;
David S. Millerdefb3512010-12-08 21:16:57 -08001001 features |= RTAX_FEATURE_ALLFRAG;
1002 dst_metric_set(dst, RTAX_FEATURES, features);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001003 }
David S. Millerdefb3512010-12-08 21:16:57 -08001004 dst_metric_set(dst, RTAX_MTU, mtu);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001005 }
1006}
1007
David S. Miller0dbaee32010-12-13 12:52:14 -08001008static unsigned int ip6_default_advmss(const struct dst_entry *dst)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001009{
David S. Miller0dbaee32010-12-13 12:52:14 -08001010 struct net_device *dev = dst->dev;
1011 unsigned int mtu = dst_mtu(dst);
1012 struct net *net = dev_net(dev);
1013
Linus Torvalds1da177e2005-04-16 15:20:36 -07001014 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1015
Daniel Lezcano55786892008-03-04 13:47:47 -08001016 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1017 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001018
1019 /*
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001020 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1021 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1022 * IPV6_MAXPLEN is also valid and means: "any MSS,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001023 * rely only on pmtu discovery"
1024 */
1025 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1026 mtu = IPV6_MAXPLEN;
1027 return mtu;
1028}
1029
David S. Millerd33e4552010-12-14 13:01:14 -08001030static unsigned int ip6_default_mtu(const struct dst_entry *dst)
1031{
1032 unsigned int mtu = IPV6_MIN_MTU;
1033 struct inet6_dev *idev;
1034
1035 rcu_read_lock();
1036 idev = __in6_dev_get(dst->dev);
1037 if (idev)
1038 mtu = idev->cnf.mtu6;
1039 rcu_read_unlock();
1040
1041 return mtu;
1042}
1043
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001044static struct dst_entry *icmp6_dst_gc_list;
1045static DEFINE_SPINLOCK(icmp6_dst_lock);
Thomas Graf5d0bbee2006-08-04 03:37:36 -07001046
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001047struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001048 struct neighbour *neigh,
YOSHIFUJI Hideaki9acd9f32008-04-10 15:42:10 +09001049 const struct in6_addr *addr)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001050{
1051 struct rt6_info *rt;
1052 struct inet6_dev *idev = in6_dev_get(dev);
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001053 struct net *net = dev_net(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001054
1055 if (unlikely(idev == NULL))
1056 return NULL;
1057
David S. Miller957c6652011-06-24 15:25:00 -07001058 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001059 if (unlikely(rt == NULL)) {
1060 in6_dev_put(idev);
1061 goto out;
1062 }
1063
Linus Torvalds1da177e2005-04-16 15:20:36 -07001064 if (neigh)
1065 neigh_hold(neigh);
David S. Miller14deae42009-01-04 16:04:39 -08001066 else {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001067 neigh = ndisc_get_neigh(dev, addr);
David S. Miller14deae42009-01-04 16:04:39 -08001068 if (IS_ERR(neigh))
1069 neigh = NULL;
1070 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001071
Linus Torvalds1da177e2005-04-16 15:20:36 -07001072 rt->rt6i_idev = idev;
David S. Miller69cce1d2011-07-17 23:09:49 -07001073 dst_set_neighbour(&rt->dst, neigh);
Changli Gaod8d1f302010-06-10 23:31:35 -07001074 atomic_set(&rt->dst.__refcnt, 1);
Eric Dumazet21efcfa2011-07-19 20:18:36 +00001075 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
David S. Millerdefb3512010-12-08 21:16:57 -08001076 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
Changli Gaod8d1f302010-06-10 23:31:35 -07001077 rt->dst.output = ip6_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001078
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001079 spin_lock_bh(&icmp6_dst_lock);
Changli Gaod8d1f302010-06-10 23:31:35 -07001080 rt->dst.next = icmp6_dst_gc_list;
1081 icmp6_dst_gc_list = &rt->dst;
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001082 spin_unlock_bh(&icmp6_dst_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001083
Daniel Lezcano55786892008-03-04 13:47:47 -08001084 fib6_force_start_gc(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001085
1086out:
Changli Gaod8d1f302010-06-10 23:31:35 -07001087 return &rt->dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001088}
1089
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -07001090int icmp6_dst_gc(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001091{
Hagen Paul Pfeifere9476e92011-02-25 05:45:19 +00001092 struct dst_entry *dst, **pprev;
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -07001093 int more = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001094
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001095 spin_lock_bh(&icmp6_dst_lock);
1096 pprev = &icmp6_dst_gc_list;
Thomas Graf5d0bbee2006-08-04 03:37:36 -07001097
Linus Torvalds1da177e2005-04-16 15:20:36 -07001098 while ((dst = *pprev) != NULL) {
1099 if (!atomic_read(&dst->__refcnt)) {
1100 *pprev = dst->next;
1101 dst_free(dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001102 } else {
1103 pprev = &dst->next;
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -07001104 ++more;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001105 }
1106 }
1107
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001108 spin_unlock_bh(&icmp6_dst_lock);
Thomas Graf5d0bbee2006-08-04 03:37:36 -07001109
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -07001110 return more;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001111}
1112
David S. Miller1e493d12008-09-10 17:27:15 -07001113static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1114 void *arg)
1115{
1116 struct dst_entry *dst, **pprev;
1117
1118 spin_lock_bh(&icmp6_dst_lock);
1119 pprev = &icmp6_dst_gc_list;
1120 while ((dst = *pprev) != NULL) {
1121 struct rt6_info *rt = (struct rt6_info *) dst;
1122 if (func(rt, arg)) {
1123 *pprev = dst->next;
1124 dst_free(dst);
1125 } else {
1126 pprev = &dst->next;
1127 }
1128 }
1129 spin_unlock_bh(&icmp6_dst_lock);
1130}
1131
Daniel Lezcano569d3642008-01-18 03:56:57 -08001132static int ip6_dst_gc(struct dst_ops *ops)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001133{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001134 unsigned long now = jiffies;
Alexey Dobriyan86393e52009-08-29 01:34:49 +00001135 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
Daniel Lezcano7019b782008-03-04 13:50:14 -08001136 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1137 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1138 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1139 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1140 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
Eric Dumazetfc66f952010-10-08 06:37:34 +00001141 int entries;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001142
Eric Dumazetfc66f952010-10-08 06:37:34 +00001143 entries = dst_entries_get_fast(ops);
Daniel Lezcano7019b782008-03-04 13:50:14 -08001144 if (time_after(rt_last_gc + rt_min_interval, now) &&
Eric Dumazetfc66f952010-10-08 06:37:34 +00001145 entries <= rt_max_size)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001146 goto out;
1147
Benjamin Thery6891a342008-03-04 13:49:47 -08001148 net->ipv6.ip6_rt_gc_expire++;
1149 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1150 net->ipv6.ip6_rt_last_gc = now;
Eric Dumazetfc66f952010-10-08 06:37:34 +00001151 entries = dst_entries_get_slow(ops);
1152 if (entries < ops->gc_thresh)
Daniel Lezcano7019b782008-03-04 13:50:14 -08001153 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001154out:
Daniel Lezcano7019b782008-03-04 13:50:14 -08001155 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
Eric Dumazetfc66f952010-10-08 06:37:34 +00001156 return entries > rt_max_size;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001157}
1158
1159/* Clean host part of a prefix. Not necessary in radix tree,
1160 but results in cleaner routing tables.
1161
1162 Remove it only when all the things will work!
1163 */
1164
YOSHIFUJI Hideaki6b75d092008-03-10 06:00:30 -04001165int ip6_dst_hoplimit(struct dst_entry *dst)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001166{
David S. Miller5170ae82010-12-12 21:35:57 -08001167 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
David S. Millera02e4b72010-12-12 21:39:02 -08001168 if (hoplimit == 0) {
YOSHIFUJI Hideaki6b75d092008-03-10 06:00:30 -04001169 struct net_device *dev = dst->dev;
Eric Dumazetc68f24c2010-06-14 04:46:20 +00001170 struct inet6_dev *idev;
1171
1172 rcu_read_lock();
1173 idev = __in6_dev_get(dev);
1174 if (idev)
YOSHIFUJI Hideaki6b75d092008-03-10 06:00:30 -04001175 hoplimit = idev->cnf.hop_limit;
Eric Dumazetc68f24c2010-06-14 04:46:20 +00001176 else
YOSHIFUJI Hideaki53b79972008-07-19 22:35:03 -07001177 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
Eric Dumazetc68f24c2010-06-14 04:46:20 +00001178 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001179 }
1180 return hoplimit;
1181}
David S. Millerabbf46a2010-12-12 21:14:46 -08001182EXPORT_SYMBOL(ip6_dst_hoplimit);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001183
1184/*
1185 *
1186 */
1187
Thomas Graf86872cb2006-08-22 00:01:08 -07001188int ip6_route_add(struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001189{
1190 int err;
Daniel Lezcano55786892008-03-04 13:47:47 -08001191 struct net *net = cfg->fc_nlinfo.nl_net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001192 struct rt6_info *rt = NULL;
1193 struct net_device *dev = NULL;
1194 struct inet6_dev *idev = NULL;
Thomas Grafc71099a2006-08-04 23:20:06 -07001195 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001196 int addr_type;
1197
Thomas Graf86872cb2006-08-22 00:01:08 -07001198 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001199 return -EINVAL;
1200#ifndef CONFIG_IPV6_SUBTREES
Thomas Graf86872cb2006-08-22 00:01:08 -07001201 if (cfg->fc_src_len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001202 return -EINVAL;
1203#endif
Thomas Graf86872cb2006-08-22 00:01:08 -07001204 if (cfg->fc_ifindex) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001205 err = -ENODEV;
Daniel Lezcano55786892008-03-04 13:47:47 -08001206 dev = dev_get_by_index(net, cfg->fc_ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001207 if (!dev)
1208 goto out;
1209 idev = in6_dev_get(dev);
1210 if (!idev)
1211 goto out;
1212 }
1213
Thomas Graf86872cb2006-08-22 00:01:08 -07001214 if (cfg->fc_metric == 0)
1215 cfg->fc_metric = IP6_RT_PRIO_USER;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001216
Daniel Lezcano55786892008-03-04 13:47:47 -08001217 table = fib6_new_table(net, cfg->fc_table);
Thomas Grafc71099a2006-08-04 23:20:06 -07001218 if (table == NULL) {
1219 err = -ENOBUFS;
1220 goto out;
1221 }
1222
David S. Miller957c6652011-06-24 15:25:00 -07001223 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL, DST_NOCOUNT);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001224
1225 if (rt == NULL) {
1226 err = -ENOMEM;
1227 goto out;
1228 }
1229
Changli Gaod8d1f302010-06-10 23:31:35 -07001230 rt->dst.obsolete = -1;
YOSHIFUJI Hideaki6f704992008-05-19 16:56:11 -07001231 rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
1232 jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1233 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001234
Thomas Graf86872cb2006-08-22 00:01:08 -07001235 if (cfg->fc_protocol == RTPROT_UNSPEC)
1236 cfg->fc_protocol = RTPROT_BOOT;
1237 rt->rt6i_protocol = cfg->fc_protocol;
1238
1239 addr_type = ipv6_addr_type(&cfg->fc_dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001240
1241 if (addr_type & IPV6_ADDR_MULTICAST)
Changli Gaod8d1f302010-06-10 23:31:35 -07001242 rt->dst.input = ip6_mc_input;
Maciej Żenczykowskiab79ad12010-09-27 00:07:02 +00001243 else if (cfg->fc_flags & RTF_LOCAL)
1244 rt->dst.input = ip6_input;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001245 else
Changli Gaod8d1f302010-06-10 23:31:35 -07001246 rt->dst.input = ip6_forward;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001247
Changli Gaod8d1f302010-06-10 23:31:35 -07001248 rt->dst.output = ip6_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001249
Thomas Graf86872cb2006-08-22 00:01:08 -07001250 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1251 rt->rt6i_dst.plen = cfg->fc_dst_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001252 if (rt->rt6i_dst.plen == 128)
David S. Miller11d53b42011-06-24 15:23:34 -07001253 rt->dst.flags |= DST_HOST;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001254
1255#ifdef CONFIG_IPV6_SUBTREES
Thomas Graf86872cb2006-08-22 00:01:08 -07001256 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1257 rt->rt6i_src.plen = cfg->fc_src_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001258#endif
1259
Thomas Graf86872cb2006-08-22 00:01:08 -07001260 rt->rt6i_metric = cfg->fc_metric;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001261
1262 /* We cannot add true routes via loopback here,
1263 they would result in kernel looping; promote them to reject routes
1264 */
Thomas Graf86872cb2006-08-22 00:01:08 -07001265 if ((cfg->fc_flags & RTF_REJECT) ||
Maciej Żenczykowskiab79ad12010-09-27 00:07:02 +00001266 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK)
1267 && !(cfg->fc_flags&RTF_LOCAL))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001268 /* hold loopback dev/idev if we haven't done so. */
Daniel Lezcano55786892008-03-04 13:47:47 -08001269 if (dev != net->loopback_dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001270 if (dev) {
1271 dev_put(dev);
1272 in6_dev_put(idev);
1273 }
Daniel Lezcano55786892008-03-04 13:47:47 -08001274 dev = net->loopback_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001275 dev_hold(dev);
1276 idev = in6_dev_get(dev);
1277 if (!idev) {
1278 err = -ENODEV;
1279 goto out;
1280 }
1281 }
Changli Gaod8d1f302010-06-10 23:31:35 -07001282 rt->dst.output = ip6_pkt_discard_out;
1283 rt->dst.input = ip6_pkt_discard;
1284 rt->dst.error = -ENETUNREACH;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001285 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1286 goto install_route;
1287 }
1288
Thomas Graf86872cb2006-08-22 00:01:08 -07001289 if (cfg->fc_flags & RTF_GATEWAY) {
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001290 const struct in6_addr *gw_addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001291 int gwa_type;
1292
Thomas Graf86872cb2006-08-22 00:01:08 -07001293 gw_addr = &cfg->fc_gateway;
1294 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001295 gwa_type = ipv6_addr_type(gw_addr);
1296
1297 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1298 struct rt6_info *grt;
1299
1300 /* IPv6 strictly inhibits using not link-local
1301 addresses as nexthop address.
1302 Otherwise, router will not able to send redirects.
1303 It is very good, but in some (rare!) circumstances
1304 (SIT, PtP, NBMA NOARP links) it is handy to allow
1305 some exceptions. --ANK
1306 */
1307 err = -EINVAL;
1308 if (!(gwa_type&IPV6_ADDR_UNICAST))
1309 goto out;
1310
Daniel Lezcano55786892008-03-04 13:47:47 -08001311 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001312
1313 err = -EHOSTUNREACH;
1314 if (grt == NULL)
1315 goto out;
1316 if (dev) {
1317 if (dev != grt->rt6i_dev) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001318 dst_release(&grt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001319 goto out;
1320 }
1321 } else {
1322 dev = grt->rt6i_dev;
1323 idev = grt->rt6i_idev;
1324 dev_hold(dev);
1325 in6_dev_hold(grt->rt6i_idev);
1326 }
1327 if (!(grt->rt6i_flags&RTF_GATEWAY))
1328 err = 0;
Changli Gaod8d1f302010-06-10 23:31:35 -07001329 dst_release(&grt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001330
1331 if (err)
1332 goto out;
1333 }
1334 err = -EINVAL;
1335 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1336 goto out;
1337 }
1338
1339 err = -ENODEV;
1340 if (dev == NULL)
1341 goto out;
1342
Daniel Walterc3968a82011-04-13 21:10:57 +00001343 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1344 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1345 err = -EINVAL;
1346 goto out;
1347 }
1348 ipv6_addr_copy(&rt->rt6i_prefsrc.addr, &cfg->fc_prefsrc);
1349 rt->rt6i_prefsrc.plen = 128;
1350 } else
1351 rt->rt6i_prefsrc.plen = 0;
1352
Thomas Graf86872cb2006-08-22 00:01:08 -07001353 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
David S. Miller69cce1d2011-07-17 23:09:49 -07001354 struct neighbour *n = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1355 if (IS_ERR(n)) {
1356 err = PTR_ERR(n);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001357 goto out;
1358 }
David S. Miller69cce1d2011-07-17 23:09:49 -07001359 dst_set_neighbour(&rt->dst, n);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001360 }
1361
Thomas Graf86872cb2006-08-22 00:01:08 -07001362 rt->rt6i_flags = cfg->fc_flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001363
1364install_route:
Thomas Graf86872cb2006-08-22 00:01:08 -07001365 if (cfg->fc_mx) {
1366 struct nlattr *nla;
1367 int remaining;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001368
Thomas Graf86872cb2006-08-22 00:01:08 -07001369 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
Thomas Graf8f4c1f92007-09-12 14:44:36 +02001370 int type = nla_type(nla);
Thomas Graf86872cb2006-08-22 00:01:08 -07001371
1372 if (type) {
1373 if (type > RTAX_MAX) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001374 err = -EINVAL;
1375 goto out;
1376 }
Thomas Graf86872cb2006-08-22 00:01:08 -07001377
David S. Millerdefb3512010-12-08 21:16:57 -08001378 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001379 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001380 }
1381 }
1382
Changli Gaod8d1f302010-06-10 23:31:35 -07001383 rt->dst.dev = dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001384 rt->rt6i_idev = idev;
Thomas Grafc71099a2006-08-04 23:20:06 -07001385 rt->rt6i_table = table;
Daniel Lezcano63152fc2008-03-03 23:31:11 -08001386
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001387 cfg->fc_nlinfo.nl_net = dev_net(dev);
Daniel Lezcano63152fc2008-03-03 23:31:11 -08001388
Thomas Graf86872cb2006-08-22 00:01:08 -07001389 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001390
1391out:
1392 if (dev)
1393 dev_put(dev);
1394 if (idev)
1395 in6_dev_put(idev);
1396 if (rt)
Changli Gaod8d1f302010-06-10 23:31:35 -07001397 dst_free(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001398 return err;
1399}
1400
Thomas Graf86872cb2006-08-22 00:01:08 -07001401static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001402{
1403 int err;
Thomas Grafc71099a2006-08-04 23:20:06 -07001404 struct fib6_table *table;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001405 struct net *net = dev_net(rt->rt6i_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001406
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001407 if (rt == net->ipv6.ip6_null_entry)
Patrick McHardy6c813a72006-08-06 22:22:47 -07001408 return -ENOENT;
1409
Thomas Grafc71099a2006-08-04 23:20:06 -07001410 table = rt->rt6i_table;
1411 write_lock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001412
Thomas Graf86872cb2006-08-22 00:01:08 -07001413 err = fib6_del(rt, info);
Changli Gaod8d1f302010-06-10 23:31:35 -07001414 dst_release(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001415
Thomas Grafc71099a2006-08-04 23:20:06 -07001416 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001417
1418 return err;
1419}
1420
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001421int ip6_del_rt(struct rt6_info *rt)
1422{
Denis V. Lunev4d1169c2008-01-10 03:26:13 -08001423 struct nl_info info = {
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001424 .nl_net = dev_net(rt->rt6i_dev),
Denis V. Lunev4d1169c2008-01-10 03:26:13 -08001425 };
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08001426 return __ip6_del_rt(rt, &info);
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001427}
1428
Thomas Graf86872cb2006-08-22 00:01:08 -07001429static int ip6_route_del(struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001430{
Thomas Grafc71099a2006-08-04 23:20:06 -07001431 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001432 struct fib6_node *fn;
1433 struct rt6_info *rt;
1434 int err = -ESRCH;
1435
Daniel Lezcano55786892008-03-04 13:47:47 -08001436 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
Thomas Grafc71099a2006-08-04 23:20:06 -07001437 if (table == NULL)
1438 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001439
Thomas Grafc71099a2006-08-04 23:20:06 -07001440 read_lock_bh(&table->tb6_lock);
1441
1442 fn = fib6_locate(&table->tb6_root,
Thomas Graf86872cb2006-08-22 00:01:08 -07001443 &cfg->fc_dst, cfg->fc_dst_len,
1444 &cfg->fc_src, cfg->fc_src_len);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001445
Linus Torvalds1da177e2005-04-16 15:20:36 -07001446 if (fn) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001447 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
Thomas Graf86872cb2006-08-22 00:01:08 -07001448 if (cfg->fc_ifindex &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001449 (rt->rt6i_dev == NULL ||
Thomas Graf86872cb2006-08-22 00:01:08 -07001450 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001451 continue;
Thomas Graf86872cb2006-08-22 00:01:08 -07001452 if (cfg->fc_flags & RTF_GATEWAY &&
1453 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001454 continue;
Thomas Graf86872cb2006-08-22 00:01:08 -07001455 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001456 continue;
Changli Gaod8d1f302010-06-10 23:31:35 -07001457 dst_hold(&rt->dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001458 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001459
Thomas Graf86872cb2006-08-22 00:01:08 -07001460 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001461 }
1462 }
Thomas Grafc71099a2006-08-04 23:20:06 -07001463 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001464
1465 return err;
1466}
1467
1468/*
1469 * Handle redirects
1470 */
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001471struct ip6rd_flowi {
David S. Miller4c9483b2011-03-12 16:22:43 -05001472 struct flowi6 fl6;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001473 struct in6_addr gateway;
1474};
Linus Torvalds1da177e2005-04-16 15:20:36 -07001475
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001476static struct rt6_info *__ip6_route_redirect(struct net *net,
1477 struct fib6_table *table,
David S. Miller4c9483b2011-03-12 16:22:43 -05001478 struct flowi6 *fl6,
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001479 int flags)
1480{
David S. Miller4c9483b2011-03-12 16:22:43 -05001481 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001482 struct rt6_info *rt;
1483 struct fib6_node *fn;
Thomas Grafc71099a2006-08-04 23:20:06 -07001484
Linus Torvalds1da177e2005-04-16 15:20:36 -07001485 /*
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001486 * Get the "current" route for this destination and
1487 * check if the redirect has come from approriate router.
1488 *
1489 * RFC 2461 specifies that redirects should only be
1490 * accepted if they come from the nexthop to the target.
1491 * Due to the way the routes are chosen, this notion
1492 * is a bit fuzzy and one might need to check all possible
1493 * routes.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001494 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001495
Thomas Grafc71099a2006-08-04 23:20:06 -07001496 read_lock_bh(&table->tb6_lock);
David S. Miller4c9483b2011-03-12 16:22:43 -05001497 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001498restart:
Changli Gaod8d1f302010-06-10 23:31:35 -07001499 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001500 /*
1501 * Current route is on-link; redirect is always invalid.
1502 *
1503 * Seems, previous statement is not true. It could
1504 * be node, which looks for us as on-link (f.e. proxy ndisc)
1505 * But then router serving it might decide, that we should
1506 * know truth 8)8) --ANK (980726).
1507 */
1508 if (rt6_check_expired(rt))
1509 continue;
1510 if (!(rt->rt6i_flags & RTF_GATEWAY))
1511 continue;
David S. Miller4c9483b2011-03-12 16:22:43 -05001512 if (fl6->flowi6_oif != rt->rt6i_dev->ifindex)
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001513 continue;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001514 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001515 continue;
1516 break;
1517 }
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001518
YOSHIFUJI Hideakicb15d9c2006-08-23 17:23:11 -07001519 if (!rt)
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001520 rt = net->ipv6.ip6_null_entry;
David S. Miller4c9483b2011-03-12 16:22:43 -05001521 BACKTRACK(net, &fl6->saddr);
YOSHIFUJI Hideakicb15d9c2006-08-23 17:23:11 -07001522out:
Changli Gaod8d1f302010-06-10 23:31:35 -07001523 dst_hold(&rt->dst);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001524
1525 read_unlock_bh(&table->tb6_lock);
1526
1527 return rt;
1528};
1529
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001530static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest,
1531 const struct in6_addr *src,
1532 const struct in6_addr *gateway,
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001533 struct net_device *dev)
1534{
Thomas Grafadaa70b2006-10-13 15:01:03 -07001535 int flags = RT6_LOOKUP_F_HAS_SADDR;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001536 struct net *net = dev_net(dev);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001537 struct ip6rd_flowi rdfl = {
David S. Miller4c9483b2011-03-12 16:22:43 -05001538 .fl6 = {
1539 .flowi6_oif = dev->ifindex,
1540 .daddr = *dest,
1541 .saddr = *src,
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001542 },
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001543 };
Thomas Grafadaa70b2006-10-13 15:01:03 -07001544
Brian Haley86c36ce2009-10-07 13:58:01 -07001545 ipv6_addr_copy(&rdfl.gateway, gateway);
1546
Thomas Grafadaa70b2006-10-13 15:01:03 -07001547 if (rt6_need_strict(dest))
1548 flags |= RT6_LOOKUP_F_IFACE;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001549
David S. Miller4c9483b2011-03-12 16:22:43 -05001550 return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6,
Daniel Lezcano58f09b72008-03-03 23:25:27 -08001551 flags, __ip6_route_redirect);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001552}
1553
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001554void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
1555 const struct in6_addr *saddr,
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001556 struct neighbour *neigh, u8 *lladdr, int on_link)
1557{
1558 struct rt6_info *rt, *nrt = NULL;
1559 struct netevent_redirect netevent;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001560 struct net *net = dev_net(neigh->dev);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001561
1562 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1563
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001564 if (rt == net->ipv6.ip6_null_entry) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001565 if (net_ratelimit())
1566 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1567 "for redirect target\n");
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001568 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001569 }
1570
Linus Torvalds1da177e2005-04-16 15:20:36 -07001571 /*
1572 * We have finally decided to accept it.
1573 */
1574
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001575 neigh_update(neigh, lladdr, NUD_STALE,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001576 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1577 NEIGH_UPDATE_F_OVERRIDE|
1578 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1579 NEIGH_UPDATE_F_ISROUTER))
1580 );
1581
1582 /*
1583 * Redirect received -> path was valid.
1584 * Look, redirects are sent only in response to data packets,
1585 * so that this nexthop apparently is reachable. --ANK
1586 */
Changli Gaod8d1f302010-06-10 23:31:35 -07001587 dst_confirm(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001588
1589 /* Duplicate redirect: silently ignore. */
David S. Miller69cce1d2011-07-17 23:09:49 -07001590 if (neigh == dst_get_neighbour(&rt->dst))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001591 goto out;
1592
Eric Dumazet21efcfa2011-07-19 20:18:36 +00001593 nrt = ip6_rt_copy(rt, dest);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001594 if (nrt == NULL)
1595 goto out;
1596
1597 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1598 if (on_link)
1599 nrt->rt6i_flags &= ~RTF_GATEWAY;
1600
Linus Torvalds1da177e2005-04-16 15:20:36 -07001601 nrt->rt6i_dst.plen = 128;
Changli Gaod8d1f302010-06-10 23:31:35 -07001602 nrt->dst.flags |= DST_HOST;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001603
1604 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
David S. Miller69cce1d2011-07-17 23:09:49 -07001605 dst_set_neighbour(&nrt->dst, neigh_clone(neigh));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001606
Thomas Graf40e22e82006-08-22 00:00:45 -07001607 if (ip6_ins_rt(nrt))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001608 goto out;
1609
Changli Gaod8d1f302010-06-10 23:31:35 -07001610 netevent.old = &rt->dst;
1611 netevent.new = &nrt->dst;
Tom Tucker8d717402006-07-30 20:43:36 -07001612 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1613
Linus Torvalds1da177e2005-04-16 15:20:36 -07001614 if (rt->rt6i_flags&RTF_CACHE) {
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001615 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001616 return;
1617 }
1618
1619out:
Changli Gaod8d1f302010-06-10 23:31:35 -07001620 dst_release(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001621}
1622
1623/*
1624 * Handle ICMP "packet too big" messages
1625 * i.e. Path MTU discovery
1626 */
1627
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001628static void rt6_do_pmtu_disc(const struct in6_addr *daddr, const struct in6_addr *saddr,
Maciej Żenczykowskiae878ae2010-10-03 14:49:00 -07001629 struct net *net, u32 pmtu, int ifindex)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001630{
1631 struct rt6_info *rt, *nrt;
1632 int allfrag = 0;
Andrey Vagind3052b52010-12-11 15:20:11 +00001633again:
Maciej Żenczykowskiae878ae2010-10-03 14:49:00 -07001634 rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001635 if (rt == NULL)
1636 return;
1637
Andrey Vagind3052b52010-12-11 15:20:11 +00001638 if (rt6_check_expired(rt)) {
1639 ip6_del_rt(rt);
1640 goto again;
1641 }
1642
Changli Gaod8d1f302010-06-10 23:31:35 -07001643 if (pmtu >= dst_mtu(&rt->dst))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001644 goto out;
1645
1646 if (pmtu < IPV6_MIN_MTU) {
1647 /*
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001648 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
Linus Torvalds1da177e2005-04-16 15:20:36 -07001649 * MTU (1280) and a fragment header should always be included
1650 * after a node receiving Too Big message reporting PMTU is
1651 * less than the IPv6 Minimum Link MTU.
1652 */
1653 pmtu = IPV6_MIN_MTU;
1654 allfrag = 1;
1655 }
1656
1657 /* New mtu received -> path was valid.
1658 They are sent only in response to data packets,
1659 so that this nexthop apparently is reachable. --ANK
1660 */
Changli Gaod8d1f302010-06-10 23:31:35 -07001661 dst_confirm(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001662
1663 /* Host route. If it is static, it would be better
1664 not to override it, but add new one, so that
1665 when cache entry will expire old pmtu
1666 would return automatically.
1667 */
1668 if (rt->rt6i_flags & RTF_CACHE) {
David S. Millerdefb3512010-12-08 21:16:57 -08001669 dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1670 if (allfrag) {
1671 u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1672 features |= RTAX_FEATURE_ALLFRAG;
1673 dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1674 }
Changli Gaod8d1f302010-06-10 23:31:35 -07001675 dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001676 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1677 goto out;
1678 }
1679
1680 /* Network route.
1681 Two cases are possible:
1682 1. It is connected route. Action: COW
1683 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1684 */
David S. Miller69cce1d2011-07-17 23:09:49 -07001685 if (!dst_get_neighbour(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001686 nrt = rt6_alloc_cow(rt, daddr, saddr);
YOSHIFUJI Hideakid5315b52006-03-20 16:58:48 -08001687 else
1688 nrt = rt6_alloc_clone(rt, daddr);
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001689
YOSHIFUJI Hideakid5315b52006-03-20 16:58:48 -08001690 if (nrt) {
David S. Millerdefb3512010-12-08 21:16:57 -08001691 dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1692 if (allfrag) {
1693 u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1694 features |= RTAX_FEATURE_ALLFRAG;
1695 dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1696 }
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001697
1698 /* According to RFC 1981, detecting PMTU increase shouldn't be
1699 * happened within 5 mins, the recommended timer is 10 mins.
1700 * Here this route expiration time is set to ip6_rt_mtu_expires
1701 * which is 10 mins. After 10 mins the decreased pmtu is expired
1702 * and detecting PMTU increase will be automatically happened.
1703 */
Changli Gaod8d1f302010-06-10 23:31:35 -07001704 dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001705 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1706
Thomas Graf40e22e82006-08-22 00:00:45 -07001707 ip6_ins_rt(nrt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001708 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001709out:
Changli Gaod8d1f302010-06-10 23:31:35 -07001710 dst_release(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001711}
1712
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001713void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *saddr,
Maciej Żenczykowskiae878ae2010-10-03 14:49:00 -07001714 struct net_device *dev, u32 pmtu)
1715{
1716 struct net *net = dev_net(dev);
1717
1718 /*
1719 * RFC 1981 states that a node "MUST reduce the size of the packets it
1720 * is sending along the path" that caused the Packet Too Big message.
1721 * Since it's not possible in the general case to determine which
1722 * interface was used to send the original packet, we update the MTU
1723 * on the interface that will be used to send future packets. We also
1724 * update the MTU on the interface that received the Packet Too Big in
1725 * case the original packet was forced out that interface with
1726 * SO_BINDTODEVICE or similar. This is the next best thing to the
1727 * correct behaviour, which would be to update the MTU on all
1728 * interfaces.
1729 */
1730 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1731 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1732}
1733
Linus Torvalds1da177e2005-04-16 15:20:36 -07001734/*
1735 * Misc support functions
1736 */
1737
Eric Dumazet21efcfa2011-07-19 20:18:36 +00001738static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
1739 const struct in6_addr *dest)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001740{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001741 struct net *net = dev_net(ort->rt6i_dev);
David S. Miller5c1e6aa2011-04-28 14:13:38 -07001742 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
David S. Miller957c6652011-06-24 15:25:00 -07001743 ort->dst.dev, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001744
1745 if (rt) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001746 rt->dst.input = ort->dst.input;
1747 rt->dst.output = ort->dst.output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001748
Eric Dumazet21efcfa2011-07-19 20:18:36 +00001749 ipv6_addr_copy(&rt->rt6i_dst.addr, dest);
1750 rt->rt6i_dst.plen = ort->rt6i_dst.plen;
David S. Millerdefb3512010-12-08 21:16:57 -08001751 dst_copy_metrics(&rt->dst, &ort->dst);
Changli Gaod8d1f302010-06-10 23:31:35 -07001752 rt->dst.error = ort->dst.error;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001753 rt->rt6i_idev = ort->rt6i_idev;
1754 if (rt->rt6i_idev)
1755 in6_dev_hold(rt->rt6i_idev);
Changli Gaod8d1f302010-06-10 23:31:35 -07001756 rt->dst.lastuse = jiffies;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001757 rt->rt6i_expires = 0;
1758
1759 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1760 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1761 rt->rt6i_metric = 0;
1762
Linus Torvalds1da177e2005-04-16 15:20:36 -07001763#ifdef CONFIG_IPV6_SUBTREES
1764 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1765#endif
Florian Westphal0f6c6392011-05-20 11:27:24 +00001766 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
Thomas Grafc71099a2006-08-04 23:20:06 -07001767 rt->rt6i_table = ort->rt6i_table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001768 }
1769 return rt;
1770}
1771
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001772#ifdef CONFIG_IPV6_ROUTE_INFO
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001773static struct rt6_info *rt6_get_route_info(struct net *net,
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001774 const struct in6_addr *prefix, int prefixlen,
1775 const struct in6_addr *gwaddr, int ifindex)
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001776{
1777 struct fib6_node *fn;
1778 struct rt6_info *rt = NULL;
Thomas Grafc71099a2006-08-04 23:20:06 -07001779 struct fib6_table *table;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001780
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001781 table = fib6_get_table(net, RT6_TABLE_INFO);
Thomas Grafc71099a2006-08-04 23:20:06 -07001782 if (table == NULL)
1783 return NULL;
1784
1785 write_lock_bh(&table->tb6_lock);
1786 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001787 if (!fn)
1788 goto out;
1789
Changli Gaod8d1f302010-06-10 23:31:35 -07001790 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001791 if (rt->rt6i_dev->ifindex != ifindex)
1792 continue;
1793 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1794 continue;
1795 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1796 continue;
Changli Gaod8d1f302010-06-10 23:31:35 -07001797 dst_hold(&rt->dst);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001798 break;
1799 }
1800out:
Thomas Grafc71099a2006-08-04 23:20:06 -07001801 write_unlock_bh(&table->tb6_lock);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001802 return rt;
1803}
1804
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001805static struct rt6_info *rt6_add_route_info(struct net *net,
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001806 const struct in6_addr *prefix, int prefixlen,
1807 const struct in6_addr *gwaddr, int ifindex,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001808 unsigned pref)
1809{
Thomas Graf86872cb2006-08-22 00:01:08 -07001810 struct fib6_config cfg = {
1811 .fc_table = RT6_TABLE_INFO,
Rami Rosen238fc7e2008-02-09 23:43:11 -08001812 .fc_metric = IP6_RT_PRIO_USER,
Thomas Graf86872cb2006-08-22 00:01:08 -07001813 .fc_ifindex = ifindex,
1814 .fc_dst_len = prefixlen,
1815 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1816 RTF_UP | RTF_PREF(pref),
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001817 .fc_nlinfo.pid = 0,
1818 .fc_nlinfo.nlh = NULL,
1819 .fc_nlinfo.nl_net = net,
Thomas Graf86872cb2006-08-22 00:01:08 -07001820 };
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001821
Thomas Graf86872cb2006-08-22 00:01:08 -07001822 ipv6_addr_copy(&cfg.fc_dst, prefix);
1823 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1824
YOSHIFUJI Hideakie317da92006-03-20 17:06:42 -08001825 /* We should treat it as a default route if prefix length is 0. */
1826 if (!prefixlen)
Thomas Graf86872cb2006-08-22 00:01:08 -07001827 cfg.fc_flags |= RTF_DEFAULT;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001828
Thomas Graf86872cb2006-08-22 00:01:08 -07001829 ip6_route_add(&cfg);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001830
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001831 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001832}
1833#endif
1834
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001835struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001836{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001837 struct rt6_info *rt;
Thomas Grafc71099a2006-08-04 23:20:06 -07001838 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001839
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001840 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
Thomas Grafc71099a2006-08-04 23:20:06 -07001841 if (table == NULL)
1842 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001843
Thomas Grafc71099a2006-08-04 23:20:06 -07001844 write_lock_bh(&table->tb6_lock);
Changli Gaod8d1f302010-06-10 23:31:35 -07001845 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001846 if (dev == rt->rt6i_dev &&
YOSHIFUJI Hideaki045927f2006-03-20 17:00:48 -08001847 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001848 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1849 break;
1850 }
1851 if (rt)
Changli Gaod8d1f302010-06-10 23:31:35 -07001852 dst_hold(&rt->dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001853 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001854 return rt;
1855}
1856
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001857struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -08001858 struct net_device *dev,
1859 unsigned int pref)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001860{
Thomas Graf86872cb2006-08-22 00:01:08 -07001861 struct fib6_config cfg = {
1862 .fc_table = RT6_TABLE_DFLT,
Rami Rosen238fc7e2008-02-09 23:43:11 -08001863 .fc_metric = IP6_RT_PRIO_USER,
Thomas Graf86872cb2006-08-22 00:01:08 -07001864 .fc_ifindex = dev->ifindex,
1865 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1866 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
Daniel Lezcano55786892008-03-04 13:47:47 -08001867 .fc_nlinfo.pid = 0,
1868 .fc_nlinfo.nlh = NULL,
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001869 .fc_nlinfo.nl_net = dev_net(dev),
Thomas Graf86872cb2006-08-22 00:01:08 -07001870 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07001871
Thomas Graf86872cb2006-08-22 00:01:08 -07001872 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001873
Thomas Graf86872cb2006-08-22 00:01:08 -07001874 ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001875
Linus Torvalds1da177e2005-04-16 15:20:36 -07001876 return rt6_get_dflt_router(gwaddr, dev);
1877}
1878
Daniel Lezcano7b4da532008-03-04 13:47:14 -08001879void rt6_purge_dflt_routers(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001880{
1881 struct rt6_info *rt;
Thomas Grafc71099a2006-08-04 23:20:06 -07001882 struct fib6_table *table;
1883
1884 /* NOTE: Keep consistent with rt6_get_dflt_router */
Daniel Lezcano7b4da532008-03-04 13:47:14 -08001885 table = fib6_get_table(net, RT6_TABLE_DFLT);
Thomas Grafc71099a2006-08-04 23:20:06 -07001886 if (table == NULL)
1887 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001888
1889restart:
Thomas Grafc71099a2006-08-04 23:20:06 -07001890 read_lock_bh(&table->tb6_lock);
Changli Gaod8d1f302010-06-10 23:31:35 -07001891 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001892 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001893 dst_hold(&rt->dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001894 read_unlock_bh(&table->tb6_lock);
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001895 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001896 goto restart;
1897 }
1898 }
Thomas Grafc71099a2006-08-04 23:20:06 -07001899 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001900}
1901
Daniel Lezcano55786892008-03-04 13:47:47 -08001902static void rtmsg_to_fib6_config(struct net *net,
1903 struct in6_rtmsg *rtmsg,
Thomas Graf86872cb2006-08-22 00:01:08 -07001904 struct fib6_config *cfg)
1905{
1906 memset(cfg, 0, sizeof(*cfg));
1907
1908 cfg->fc_table = RT6_TABLE_MAIN;
1909 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1910 cfg->fc_metric = rtmsg->rtmsg_metric;
1911 cfg->fc_expires = rtmsg->rtmsg_info;
1912 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1913 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1914 cfg->fc_flags = rtmsg->rtmsg_flags;
1915
Daniel Lezcano55786892008-03-04 13:47:47 -08001916 cfg->fc_nlinfo.nl_net = net;
Benjamin Theryf1243c22008-02-26 18:10:03 -08001917
Thomas Graf86872cb2006-08-22 00:01:08 -07001918 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1919 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1920 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1921}
1922
Daniel Lezcano55786892008-03-04 13:47:47 -08001923int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001924{
Thomas Graf86872cb2006-08-22 00:01:08 -07001925 struct fib6_config cfg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001926 struct in6_rtmsg rtmsg;
1927 int err;
1928
1929 switch(cmd) {
1930 case SIOCADDRT: /* Add a route */
1931 case SIOCDELRT: /* Delete a route */
1932 if (!capable(CAP_NET_ADMIN))
1933 return -EPERM;
1934 err = copy_from_user(&rtmsg, arg,
1935 sizeof(struct in6_rtmsg));
1936 if (err)
1937 return -EFAULT;
Thomas Graf86872cb2006-08-22 00:01:08 -07001938
Daniel Lezcano55786892008-03-04 13:47:47 -08001939 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
Thomas Graf86872cb2006-08-22 00:01:08 -07001940
Linus Torvalds1da177e2005-04-16 15:20:36 -07001941 rtnl_lock();
1942 switch (cmd) {
1943 case SIOCADDRT:
Thomas Graf86872cb2006-08-22 00:01:08 -07001944 err = ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001945 break;
1946 case SIOCDELRT:
Thomas Graf86872cb2006-08-22 00:01:08 -07001947 err = ip6_route_del(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001948 break;
1949 default:
1950 err = -EINVAL;
1951 }
1952 rtnl_unlock();
1953
1954 return err;
Stephen Hemminger3ff50b72007-04-20 17:09:22 -07001955 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001956
1957 return -EINVAL;
1958}
1959
1960/*
1961 * Drop the packet on the floor
1962 */
1963
Brian Haleyd5fdd6b2009-06-23 04:31:07 -07001964static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001965{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001966 int type;
Eric Dumazetadf30902009-06-02 05:19:30 +00001967 struct dst_entry *dst = skb_dst(skb);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001968 switch (ipstats_mib_noroutes) {
1969 case IPSTATS_MIB_INNOROUTES:
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -07001970 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
Ulrich Weber45bb0062010-02-25 23:28:58 +00001971 if (type == IPV6_ADDR_ANY) {
Denis V. Lunev3bd653c2008-10-08 10:54:51 -07001972 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1973 IPSTATS_MIB_INADDRERRORS);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001974 break;
1975 }
1976 /* FALLTHROUGH */
1977 case IPSTATS_MIB_OUTNOROUTES:
Denis V. Lunev3bd653c2008-10-08 10:54:51 -07001978 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1979 ipstats_mib_noroutes);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001980 break;
1981 }
Alexey Dobriyan3ffe5332010-02-18 08:25:24 +00001982 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001983 kfree_skb(skb);
1984 return 0;
1985}
1986
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001987static int ip6_pkt_discard(struct sk_buff *skb)
1988{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001989 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001990}
1991
Arnaldo Carvalho de Melo20380732005-08-16 02:18:02 -03001992static int ip6_pkt_discard_out(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001993{
Eric Dumazetadf30902009-06-02 05:19:30 +00001994 skb->dev = skb_dst(skb)->dev;
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001995 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001996}
1997
David S. Miller6723ab52006-10-18 21:20:57 -07001998#ifdef CONFIG_IPV6_MULTIPLE_TABLES
1999
Thomas Graf9ce8ade2006-10-18 20:46:54 -07002000static int ip6_pkt_prohibit(struct sk_buff *skb)
2001{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07002002 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07002003}
2004
2005static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2006{
Eric Dumazetadf30902009-06-02 05:19:30 +00002007 skb->dev = skb_dst(skb)->dev;
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07002008 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07002009}
2010
David S. Miller6723ab52006-10-18 21:20:57 -07002011#endif
2012
Linus Torvalds1da177e2005-04-16 15:20:36 -07002013/*
2014 * Allocate a dst for local (unicast / anycast) address.
2015 */
2016
2017struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2018 const struct in6_addr *addr,
2019 int anycast)
2020{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09002021 struct net *net = dev_net(idev->dev);
David S. Miller5c1e6aa2011-04-28 14:13:38 -07002022 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
David S. Miller957c6652011-06-24 15:25:00 -07002023 net->loopback_dev, 0);
David S. Miller14deae42009-01-04 16:04:39 -08002024 struct neighbour *neigh;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002025
Ben Greear40385652010-11-08 12:33:48 +00002026 if (rt == NULL) {
2027 if (net_ratelimit())
2028 pr_warning("IPv6: Maximum number of routes reached,"
2029 " consider increasing route/max_size.\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002030 return ERR_PTR(-ENOMEM);
Ben Greear40385652010-11-08 12:33:48 +00002031 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002032
Linus Torvalds1da177e2005-04-16 15:20:36 -07002033 in6_dev_hold(idev);
2034
David S. Miller11d53b42011-06-24 15:23:34 -07002035 rt->dst.flags |= DST_HOST;
Changli Gaod8d1f302010-06-10 23:31:35 -07002036 rt->dst.input = ip6_input;
2037 rt->dst.output = ip6_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002038 rt->rt6i_idev = idev;
Changli Gaod8d1f302010-06-10 23:31:35 -07002039 rt->dst.obsolete = -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002040
2041 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +09002042 if (anycast)
2043 rt->rt6i_flags |= RTF_ANYCAST;
2044 else
Linus Torvalds1da177e2005-04-16 15:20:36 -07002045 rt->rt6i_flags |= RTF_LOCAL;
David S. Miller14deae42009-01-04 16:04:39 -08002046 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
2047 if (IS_ERR(neigh)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002048 dst_free(&rt->dst);
David S. Miller14deae42009-01-04 16:04:39 -08002049
David S. Miller29546a62011-03-03 12:10:37 -08002050 return ERR_CAST(neigh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002051 }
David S. Miller69cce1d2011-07-17 23:09:49 -07002052 dst_set_neighbour(&rt->dst, neigh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002053
2054 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
2055 rt->rt6i_dst.plen = 128;
Daniel Lezcano55786892008-03-04 13:47:47 -08002056 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002057
Changli Gaod8d1f302010-06-10 23:31:35 -07002058 atomic_set(&rt->dst.__refcnt, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002059
2060 return rt;
2061}
2062
Daniel Walterc3968a82011-04-13 21:10:57 +00002063int ip6_route_get_saddr(struct net *net,
2064 struct rt6_info *rt,
Eric Dumazetb71d1d42011-04-22 04:53:02 +00002065 const struct in6_addr *daddr,
Daniel Walterc3968a82011-04-13 21:10:57 +00002066 unsigned int prefs,
2067 struct in6_addr *saddr)
2068{
2069 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2070 int err = 0;
2071 if (rt->rt6i_prefsrc.plen)
2072 ipv6_addr_copy(saddr, &rt->rt6i_prefsrc.addr);
2073 else
2074 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2075 daddr, prefs, saddr);
2076 return err;
2077}
2078
2079/* remove deleted ip from prefsrc entries */
2080struct arg_dev_net_ip {
2081 struct net_device *dev;
2082 struct net *net;
2083 struct in6_addr *addr;
2084};
2085
2086static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2087{
2088 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2089 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2090 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2091
2092 if (((void *)rt->rt6i_dev == dev || dev == NULL) &&
2093 rt != net->ipv6.ip6_null_entry &&
2094 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2095 /* remove prefsrc entry */
2096 rt->rt6i_prefsrc.plen = 0;
2097 }
2098 return 0;
2099}
2100
2101void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2102{
2103 struct net *net = dev_net(ifp->idev->dev);
2104 struct arg_dev_net_ip adni = {
2105 .dev = ifp->idev->dev,
2106 .net = net,
2107 .addr = &ifp->addr,
2108 };
2109 fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2110}
2111
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002112struct arg_dev_net {
2113 struct net_device *dev;
2114 struct net *net;
2115};
2116
Linus Torvalds1da177e2005-04-16 15:20:36 -07002117static int fib6_ifdown(struct rt6_info *rt, void *arg)
2118{
stephen hemmingerbc3ef662010-12-16 17:42:40 +00002119 const struct arg_dev_net *adn = arg;
2120 const struct net_device *dev = adn->dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002121
stephen hemmingerbc3ef662010-12-16 17:42:40 +00002122 if ((rt->rt6i_dev == dev || dev == NULL) &&
2123 rt != adn->net->ipv6.ip6_null_entry) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002124 RT6_TRACE("deleted by ifdown %p\n", rt);
2125 return -1;
2126 }
2127 return 0;
2128}
2129
Daniel Lezcanof3db4852008-03-03 23:27:06 -08002130void rt6_ifdown(struct net *net, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002131{
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002132 struct arg_dev_net adn = {
2133 .dev = dev,
2134 .net = net,
2135 };
2136
2137 fib6_clean_all(net, fib6_ifdown, 0, &adn);
David S. Miller1e493d12008-09-10 17:27:15 -07002138 icmp6_clean_all(fib6_ifdown, &adn);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002139}
2140
2141struct rt6_mtu_change_arg
2142{
2143 struct net_device *dev;
2144 unsigned mtu;
2145};
2146
2147static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2148{
2149 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2150 struct inet6_dev *idev;
2151
2152 /* In IPv6 pmtu discovery is not optional,
2153 so that RTAX_MTU lock cannot disable it.
2154 We still use this lock to block changes
2155 caused by addrconf/ndisc.
2156 */
2157
2158 idev = __in6_dev_get(arg->dev);
2159 if (idev == NULL)
2160 return 0;
2161
2162 /* For administrative MTU increase, there is no way to discover
2163 IPv6 PMTU increase, so PMTU increase should be updated here.
2164 Since RFC 1981 doesn't include administrative MTU increase
2165 update PMTU increase is a MUST. (i.e. jumbo frame)
2166 */
2167 /*
2168 If new MTU is less than route PMTU, this new MTU will be the
2169 lowest MTU in the path, update the route PMTU to reflect PMTU
2170 decreases; if new MTU is greater than route PMTU, and the
2171 old MTU is the lowest MTU in the path, update the route PMTU
2172 to reflect the increase. In this case if the other nodes' MTU
2173 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2174 PMTU discouvery.
2175 */
2176 if (rt->rt6i_dev == arg->dev &&
Changli Gaod8d1f302010-06-10 23:31:35 -07002177 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2178 (dst_mtu(&rt->dst) >= arg->mtu ||
2179 (dst_mtu(&rt->dst) < arg->mtu &&
2180 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
David S. Millerdefb3512010-12-08 21:16:57 -08002181 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
Simon Arlott566cfd82007-07-26 00:09:55 -07002182 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002183 return 0;
2184}
2185
2186void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2187{
Thomas Grafc71099a2006-08-04 23:20:06 -07002188 struct rt6_mtu_change_arg arg = {
2189 .dev = dev,
2190 .mtu = mtu,
2191 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07002192
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09002193 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002194}
2195
Patrick McHardyef7c79e2007-06-05 12:38:30 -07002196static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
Thomas Graf5176f912006-08-26 20:13:18 -07002197 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
Thomas Graf86872cb2006-08-22 00:01:08 -07002198 [RTA_OIF] = { .type = NLA_U32 },
Thomas Grafab364a62006-08-22 00:01:47 -07002199 [RTA_IIF] = { .type = NLA_U32 },
Thomas Graf86872cb2006-08-22 00:01:08 -07002200 [RTA_PRIORITY] = { .type = NLA_U32 },
2201 [RTA_METRICS] = { .type = NLA_NESTED },
2202};
2203
2204static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2205 struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002206{
Thomas Graf86872cb2006-08-22 00:01:08 -07002207 struct rtmsg *rtm;
2208 struct nlattr *tb[RTA_MAX+1];
2209 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002210
Thomas Graf86872cb2006-08-22 00:01:08 -07002211 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2212 if (err < 0)
2213 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002214
Thomas Graf86872cb2006-08-22 00:01:08 -07002215 err = -EINVAL;
2216 rtm = nlmsg_data(nlh);
2217 memset(cfg, 0, sizeof(*cfg));
2218
2219 cfg->fc_table = rtm->rtm_table;
2220 cfg->fc_dst_len = rtm->rtm_dst_len;
2221 cfg->fc_src_len = rtm->rtm_src_len;
2222 cfg->fc_flags = RTF_UP;
2223 cfg->fc_protocol = rtm->rtm_protocol;
2224
2225 if (rtm->rtm_type == RTN_UNREACHABLE)
2226 cfg->fc_flags |= RTF_REJECT;
2227
Maciej Żenczykowskiab79ad12010-09-27 00:07:02 +00002228 if (rtm->rtm_type == RTN_LOCAL)
2229 cfg->fc_flags |= RTF_LOCAL;
2230
Thomas Graf86872cb2006-08-22 00:01:08 -07002231 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2232 cfg->fc_nlinfo.nlh = nlh;
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09002233 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
Thomas Graf86872cb2006-08-22 00:01:08 -07002234
2235 if (tb[RTA_GATEWAY]) {
2236 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2237 cfg->fc_flags |= RTF_GATEWAY;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002238 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002239
2240 if (tb[RTA_DST]) {
2241 int plen = (rtm->rtm_dst_len + 7) >> 3;
2242
2243 if (nla_len(tb[RTA_DST]) < plen)
2244 goto errout;
2245
2246 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002247 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002248
2249 if (tb[RTA_SRC]) {
2250 int plen = (rtm->rtm_src_len + 7) >> 3;
2251
2252 if (nla_len(tb[RTA_SRC]) < plen)
2253 goto errout;
2254
2255 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002256 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002257
Daniel Walterc3968a82011-04-13 21:10:57 +00002258 if (tb[RTA_PREFSRC])
2259 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2260
Thomas Graf86872cb2006-08-22 00:01:08 -07002261 if (tb[RTA_OIF])
2262 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2263
2264 if (tb[RTA_PRIORITY])
2265 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2266
2267 if (tb[RTA_METRICS]) {
2268 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2269 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002270 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002271
2272 if (tb[RTA_TABLE])
2273 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2274
2275 err = 0;
2276errout:
2277 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002278}
2279
Thomas Grafc127ea22007-03-22 11:58:32 -07002280static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002281{
Thomas Graf86872cb2006-08-22 00:01:08 -07002282 struct fib6_config cfg;
2283 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002284
Thomas Graf86872cb2006-08-22 00:01:08 -07002285 err = rtm_to_fib6_config(skb, nlh, &cfg);
2286 if (err < 0)
2287 return err;
2288
2289 return ip6_route_del(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002290}
2291
Thomas Grafc127ea22007-03-22 11:58:32 -07002292static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002293{
Thomas Graf86872cb2006-08-22 00:01:08 -07002294 struct fib6_config cfg;
2295 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002296
Thomas Graf86872cb2006-08-22 00:01:08 -07002297 err = rtm_to_fib6_config(skb, nlh, &cfg);
2298 if (err < 0)
2299 return err;
2300
2301 return ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002302}
2303
Thomas Graf339bf982006-11-10 14:10:15 -08002304static inline size_t rt6_nlmsg_size(void)
2305{
2306 return NLMSG_ALIGN(sizeof(struct rtmsg))
2307 + nla_total_size(16) /* RTA_SRC */
2308 + nla_total_size(16) /* RTA_DST */
2309 + nla_total_size(16) /* RTA_GATEWAY */
2310 + nla_total_size(16) /* RTA_PREFSRC */
2311 + nla_total_size(4) /* RTA_TABLE */
2312 + nla_total_size(4) /* RTA_IIF */
2313 + nla_total_size(4) /* RTA_OIF */
2314 + nla_total_size(4) /* RTA_PRIORITY */
Noriaki TAKAMIYA6a2b9ce2007-01-23 22:09:41 -08002315 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
Thomas Graf339bf982006-11-10 14:10:15 -08002316 + nla_total_size(sizeof(struct rta_cacheinfo));
2317}
2318
Brian Haley191cd582008-08-14 15:33:21 -07002319static int rt6_fill_node(struct net *net,
2320 struct sk_buff *skb, struct rt6_info *rt,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07002321 struct in6_addr *dst, struct in6_addr *src,
2322 int iif, int type, u32 pid, u32 seq,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002323 int prefix, int nowait, unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002324{
2325 struct rtmsg *rtm;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002326 struct nlmsghdr *nlh;
Thomas Grafe3703b32006-11-27 09:27:07 -08002327 long expires;
Patrick McHardy9e762a42006-08-10 23:09:48 -07002328 u32 table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002329
2330 if (prefix) { /* user wants prefix routes only */
2331 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2332 /* success since this is not a prefix route */
2333 return 1;
2334 }
2335 }
2336
Thomas Graf2d7202b2006-08-22 00:01:27 -07002337 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2338 if (nlh == NULL)
Patrick McHardy26932562007-01-31 23:16:40 -08002339 return -EMSGSIZE;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002340
2341 rtm = nlmsg_data(nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002342 rtm->rtm_family = AF_INET6;
2343 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2344 rtm->rtm_src_len = rt->rt6i_src.plen;
2345 rtm->rtm_tos = 0;
Thomas Grafc71099a2006-08-04 23:20:06 -07002346 if (rt->rt6i_table)
Patrick McHardy9e762a42006-08-10 23:09:48 -07002347 table = rt->rt6i_table->tb6_id;
Thomas Grafc71099a2006-08-04 23:20:06 -07002348 else
Patrick McHardy9e762a42006-08-10 23:09:48 -07002349 table = RT6_TABLE_UNSPEC;
2350 rtm->rtm_table = table;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002351 NLA_PUT_U32(skb, RTA_TABLE, table);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002352 if (rt->rt6i_flags&RTF_REJECT)
2353 rtm->rtm_type = RTN_UNREACHABLE;
Maciej Żenczykowskiab79ad12010-09-27 00:07:02 +00002354 else if (rt->rt6i_flags&RTF_LOCAL)
2355 rtm->rtm_type = RTN_LOCAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002356 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2357 rtm->rtm_type = RTN_LOCAL;
2358 else
2359 rtm->rtm_type = RTN_UNICAST;
2360 rtm->rtm_flags = 0;
2361 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2362 rtm->rtm_protocol = rt->rt6i_protocol;
2363 if (rt->rt6i_flags&RTF_DYNAMIC)
2364 rtm->rtm_protocol = RTPROT_REDIRECT;
2365 else if (rt->rt6i_flags & RTF_ADDRCONF)
2366 rtm->rtm_protocol = RTPROT_KERNEL;
2367 else if (rt->rt6i_flags&RTF_DEFAULT)
2368 rtm->rtm_protocol = RTPROT_RA;
2369
2370 if (rt->rt6i_flags&RTF_CACHE)
2371 rtm->rtm_flags |= RTM_F_CLONED;
2372
2373 if (dst) {
Thomas Graf2d7202b2006-08-22 00:01:27 -07002374 NLA_PUT(skb, RTA_DST, 16, dst);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002375 rtm->rtm_dst_len = 128;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002376 } else if (rtm->rtm_dst_len)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002377 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002378#ifdef CONFIG_IPV6_SUBTREES
2379 if (src) {
Thomas Graf2d7202b2006-08-22 00:01:27 -07002380 NLA_PUT(skb, RTA_SRC, 16, src);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002381 rtm->rtm_src_len = 128;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002382 } else if (rtm->rtm_src_len)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002383 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002384#endif
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002385 if (iif) {
2386#ifdef CONFIG_IPV6_MROUTE
2387 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
Benjamin Thery8229efd2008-12-10 16:30:15 -08002388 int err = ip6mr_get_route(net, skb, rtm, nowait);
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002389 if (err <= 0) {
2390 if (!nowait) {
2391 if (err == 0)
2392 return 0;
2393 goto nla_put_failure;
2394 } else {
2395 if (err == -EMSGSIZE)
2396 goto nla_put_failure;
2397 }
2398 }
2399 } else
2400#endif
2401 NLA_PUT_U32(skb, RTA_IIF, iif);
2402 } else if (dst) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002403 struct in6_addr saddr_buf;
Daniel Walterc3968a82011-04-13 21:10:57 +00002404 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002405 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002406 }
Thomas Graf2d7202b2006-08-22 00:01:27 -07002407
Daniel Walterc3968a82011-04-13 21:10:57 +00002408 if (rt->rt6i_prefsrc.plen) {
2409 struct in6_addr saddr_buf;
2410 ipv6_addr_copy(&saddr_buf, &rt->rt6i_prefsrc.addr);
2411 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2412 }
2413
David S. Millerdefb3512010-12-08 21:16:57 -08002414 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002415 goto nla_put_failure;
2416
David S. Miller69cce1d2011-07-17 23:09:49 -07002417 if (dst_get_neighbour(&rt->dst))
2418 NLA_PUT(skb, RTA_GATEWAY, 16, &dst_get_neighbour(&rt->dst)->primary_key);
Thomas Graf2d7202b2006-08-22 00:01:27 -07002419
Changli Gaod8d1f302010-06-10 23:31:35 -07002420 if (rt->dst.dev)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002421 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2422
2423 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
Thomas Grafe3703b32006-11-27 09:27:07 -08002424
YOSHIFUJI Hideaki36e3dea2008-05-13 02:52:55 +09002425 if (!(rt->rt6i_flags & RTF_EXPIRES))
2426 expires = 0;
2427 else if (rt->rt6i_expires - jiffies < INT_MAX)
2428 expires = rt->rt6i_expires - jiffies;
2429 else
2430 expires = INT_MAX;
YOSHIFUJI Hideaki69cdf8f2008-05-19 16:55:13 -07002431
Changli Gaod8d1f302010-06-10 23:31:35 -07002432 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0,
2433 expires, rt->dst.error) < 0)
Thomas Grafe3703b32006-11-27 09:27:07 -08002434 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002435
Thomas Graf2d7202b2006-08-22 00:01:27 -07002436 return nlmsg_end(skb, nlh);
2437
2438nla_put_failure:
Patrick McHardy26932562007-01-31 23:16:40 -08002439 nlmsg_cancel(skb, nlh);
2440 return -EMSGSIZE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002441}
2442
Patrick McHardy1b43af52006-08-10 23:11:17 -07002443int rt6_dump_route(struct rt6_info *rt, void *p_arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002444{
2445 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2446 int prefix;
2447
Thomas Graf2d7202b2006-08-22 00:01:27 -07002448 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2449 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002450 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2451 } else
2452 prefix = 0;
2453
Brian Haley191cd582008-08-14 15:33:21 -07002454 return rt6_fill_node(arg->net,
2455 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002456 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002457 prefix, 0, NLM_F_MULTI);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002458}
2459
Thomas Grafc127ea22007-03-22 11:58:32 -07002460static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002461{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09002462 struct net *net = sock_net(in_skb->sk);
Thomas Grafab364a62006-08-22 00:01:47 -07002463 struct nlattr *tb[RTA_MAX+1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07002464 struct rt6_info *rt;
Thomas Grafab364a62006-08-22 00:01:47 -07002465 struct sk_buff *skb;
2466 struct rtmsg *rtm;
David S. Miller4c9483b2011-03-12 16:22:43 -05002467 struct flowi6 fl6;
Thomas Grafab364a62006-08-22 00:01:47 -07002468 int err, iif = 0;
2469
2470 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2471 if (err < 0)
2472 goto errout;
2473
2474 err = -EINVAL;
David S. Miller4c9483b2011-03-12 16:22:43 -05002475 memset(&fl6, 0, sizeof(fl6));
Thomas Grafab364a62006-08-22 00:01:47 -07002476
2477 if (tb[RTA_SRC]) {
2478 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2479 goto errout;
2480
David S. Miller4c9483b2011-03-12 16:22:43 -05002481 ipv6_addr_copy(&fl6.saddr, nla_data(tb[RTA_SRC]));
Thomas Grafab364a62006-08-22 00:01:47 -07002482 }
2483
2484 if (tb[RTA_DST]) {
2485 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2486 goto errout;
2487
David S. Miller4c9483b2011-03-12 16:22:43 -05002488 ipv6_addr_copy(&fl6.daddr, nla_data(tb[RTA_DST]));
Thomas Grafab364a62006-08-22 00:01:47 -07002489 }
2490
2491 if (tb[RTA_IIF])
2492 iif = nla_get_u32(tb[RTA_IIF]);
2493
2494 if (tb[RTA_OIF])
David S. Miller4c9483b2011-03-12 16:22:43 -05002495 fl6.flowi6_oif = nla_get_u32(tb[RTA_OIF]);
Thomas Grafab364a62006-08-22 00:01:47 -07002496
2497 if (iif) {
2498 struct net_device *dev;
Daniel Lezcano55786892008-03-04 13:47:47 -08002499 dev = __dev_get_by_index(net, iif);
Thomas Grafab364a62006-08-22 00:01:47 -07002500 if (!dev) {
2501 err = -ENODEV;
2502 goto errout;
2503 }
2504 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002505
2506 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
Thomas Grafab364a62006-08-22 00:01:47 -07002507 if (skb == NULL) {
2508 err = -ENOBUFS;
2509 goto errout;
2510 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002511
2512 /* Reserve room for dummy headers, this skb can pass
2513 through good chunk of routing engine.
2514 */
Arnaldo Carvalho de Melo459a98e2007-03-19 15:30:44 -07002515 skb_reset_mac_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002516 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2517
David S. Miller4c9483b2011-03-12 16:22:43 -05002518 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl6);
Changli Gaod8d1f302010-06-10 23:31:35 -07002519 skb_dst_set(skb, &rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002520
David S. Miller4c9483b2011-03-12 16:22:43 -05002521 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002522 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002523 nlh->nlmsg_seq, 0, 0, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002524 if (err < 0) {
Thomas Grafab364a62006-08-22 00:01:47 -07002525 kfree_skb(skb);
2526 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002527 }
2528
Daniel Lezcano55786892008-03-04 13:47:47 -08002529 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
Thomas Grafab364a62006-08-22 00:01:47 -07002530errout:
Linus Torvalds1da177e2005-04-16 15:20:36 -07002531 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002532}
2533
Thomas Graf86872cb2006-08-22 00:01:08 -07002534void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002535{
2536 struct sk_buff *skb;
Daniel Lezcano55786892008-03-04 13:47:47 -08002537 struct net *net = info->nl_net;
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08002538 u32 seq;
2539 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002540
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08002541 err = -ENOBUFS;
2542 seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
Thomas Graf86872cb2006-08-22 00:01:08 -07002543
Thomas Graf339bf982006-11-10 14:10:15 -08002544 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
Thomas Graf21713eb2006-08-15 00:35:24 -07002545 if (skb == NULL)
2546 goto errout;
2547
Brian Haley191cd582008-08-14 15:33:21 -07002548 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002549 event, info->pid, seq, 0, 0, 0);
Patrick McHardy26932562007-01-31 23:16:40 -08002550 if (err < 0) {
2551 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2552 WARN_ON(err == -EMSGSIZE);
2553 kfree_skb(skb);
2554 goto errout;
2555 }
Pablo Neira Ayuso1ce85fe2009-02-24 23:18:28 -08002556 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2557 info->nlh, gfp_any());
2558 return;
Thomas Graf21713eb2006-08-15 00:35:24 -07002559errout:
2560 if (err < 0)
Daniel Lezcano55786892008-03-04 13:47:47 -08002561 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002562}
2563
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002564static int ip6_route_dev_notify(struct notifier_block *this,
2565 unsigned long event, void *data)
2566{
2567 struct net_device *dev = (struct net_device *)data;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09002568 struct net *net = dev_net(dev);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002569
2570 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002571 net->ipv6.ip6_null_entry->dst.dev = dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002572 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2573#ifdef CONFIG_IPV6_MULTIPLE_TABLES
Changli Gaod8d1f302010-06-10 23:31:35 -07002574 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002575 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
Changli Gaod8d1f302010-06-10 23:31:35 -07002576 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002577 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2578#endif
2579 }
2580
2581 return NOTIFY_OK;
2582}
2583
Linus Torvalds1da177e2005-04-16 15:20:36 -07002584/*
2585 * /proc
2586 */
2587
2588#ifdef CONFIG_PROC_FS
2589
Linus Torvalds1da177e2005-04-16 15:20:36 -07002590struct rt6_proc_arg
2591{
2592 char *buffer;
2593 int offset;
2594 int length;
2595 int skip;
2596 int len;
2597};
2598
2599static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2600{
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002601 struct seq_file *m = p_arg;
David S. Miller69cce1d2011-07-17 23:09:49 -07002602 struct neighbour *n;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002603
Harvey Harrison4b7a4272008-10-29 12:50:24 -07002604 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002605
2606#ifdef CONFIG_IPV6_SUBTREES
Harvey Harrison4b7a4272008-10-29 12:50:24 -07002607 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002608#else
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002609 seq_puts(m, "00000000000000000000000000000000 00 ");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002610#endif
David S. Miller69cce1d2011-07-17 23:09:49 -07002611 n = dst_get_neighbour(&rt->dst);
2612 if (n) {
2613 seq_printf(m, "%pi6", n->primary_key);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002614 } else {
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002615 seq_puts(m, "00000000000000000000000000000000");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002616 }
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002617 seq_printf(m, " %08x %08x %08x %08x %8s\n",
Changli Gaod8d1f302010-06-10 23:31:35 -07002618 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2619 rt->dst.__use, rt->rt6i_flags,
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002620 rt->rt6i_dev ? rt->rt6i_dev->name : "");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002621 return 0;
2622}
2623
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002624static int ipv6_route_show(struct seq_file *m, void *v)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002625{
Daniel Lezcanof3db4852008-03-03 23:27:06 -08002626 struct net *net = (struct net *)m->private;
2627 fib6_clean_all(net, rt6_info_route, 0, m);
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002628 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002629}
2630
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002631static int ipv6_route_open(struct inode *inode, struct file *file)
2632{
Pavel Emelyanovde05c552008-07-18 04:07:21 -07002633 return single_open_net(inode, file, ipv6_route_show);
Daniel Lezcanof3db4852008-03-03 23:27:06 -08002634}
2635
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002636static const struct file_operations ipv6_route_proc_fops = {
2637 .owner = THIS_MODULE,
2638 .open = ipv6_route_open,
2639 .read = seq_read,
2640 .llseek = seq_lseek,
Pavel Emelyanovb6fcbdb2008-07-18 04:07:44 -07002641 .release = single_release_net,
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002642};
2643
Linus Torvalds1da177e2005-04-16 15:20:36 -07002644static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2645{
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002646 struct net *net = (struct net *)seq->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002647 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002648 net->ipv6.rt6_stats->fib_nodes,
2649 net->ipv6.rt6_stats->fib_route_nodes,
2650 net->ipv6.rt6_stats->fib_rt_alloc,
2651 net->ipv6.rt6_stats->fib_rt_entries,
2652 net->ipv6.rt6_stats->fib_rt_cache,
Eric Dumazetfc66f952010-10-08 06:37:34 +00002653 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002654 net->ipv6.rt6_stats->fib_discarded_routes);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002655
2656 return 0;
2657}
2658
2659static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2660{
Pavel Emelyanovde05c552008-07-18 04:07:21 -07002661 return single_open_net(inode, file, rt6_stats_seq_show);
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002662}
2663
Arjan van de Ven9a321442007-02-12 00:55:35 -08002664static const struct file_operations rt6_stats_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002665 .owner = THIS_MODULE,
2666 .open = rt6_stats_seq_open,
2667 .read = seq_read,
2668 .llseek = seq_lseek,
Pavel Emelyanovb6fcbdb2008-07-18 04:07:44 -07002669 .release = single_release_net,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002670};
2671#endif /* CONFIG_PROC_FS */
2672
2673#ifdef CONFIG_SYSCTL
2674
Linus Torvalds1da177e2005-04-16 15:20:36 -07002675static
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07002676int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002677 void __user *buffer, size_t *lenp, loff_t *ppos)
2678{
Lucian Adrian Grijincuc486da32011-02-24 19:48:03 +00002679 struct net *net;
2680 int delay;
2681 if (!write)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002682 return -EINVAL;
Lucian Adrian Grijincuc486da32011-02-24 19:48:03 +00002683
2684 net = (struct net *)ctl->extra1;
2685 delay = net->ipv6.sysctl.flush_delay;
2686 proc_dointvec(ctl, write, buffer, lenp, ppos);
2687 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2688 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002689}
2690
Daniel Lezcano760f2d02008-01-10 02:53:43 -08002691ctl_table ipv6_route_table_template[] = {
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002692 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002693 .procname = "flush",
Daniel Lezcano49905092008-01-10 03:01:01 -08002694 .data = &init_net.ipv6.sysctl.flush_delay,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002695 .maxlen = sizeof(int),
Dave Jones89c8b3a2005-04-28 12:11:49 -07002696 .mode = 0200,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002697 .proc_handler = ipv6_sysctl_rtcache_flush
Linus Torvalds1da177e2005-04-16 15:20:36 -07002698 },
2699 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002700 .procname = "gc_thresh",
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -08002701 .data = &ip6_dst_ops_template.gc_thresh,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002702 .maxlen = sizeof(int),
2703 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002704 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002705 },
2706 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002707 .procname = "max_size",
Daniel Lezcano49905092008-01-10 03:01:01 -08002708 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002709 .maxlen = sizeof(int),
2710 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002711 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002712 },
2713 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002714 .procname = "gc_min_interval",
Daniel Lezcano49905092008-01-10 03:01:01 -08002715 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002716 .maxlen = sizeof(int),
2717 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002718 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002719 },
2720 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002721 .procname = "gc_timeout",
Daniel Lezcano49905092008-01-10 03:01:01 -08002722 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002723 .maxlen = sizeof(int),
2724 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002725 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002726 },
2727 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002728 .procname = "gc_interval",
Daniel Lezcano49905092008-01-10 03:01:01 -08002729 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002730 .maxlen = sizeof(int),
2731 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002732 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002733 },
2734 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002735 .procname = "gc_elasticity",
Daniel Lezcano49905092008-01-10 03:01:01 -08002736 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002737 .maxlen = sizeof(int),
2738 .mode = 0644,
Min Zhangf3d3f612010-08-14 22:42:51 -07002739 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002740 },
2741 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002742 .procname = "mtu_expires",
Daniel Lezcano49905092008-01-10 03:01:01 -08002743 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002744 .maxlen = sizeof(int),
2745 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002746 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002747 },
2748 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002749 .procname = "min_adv_mss",
Daniel Lezcano49905092008-01-10 03:01:01 -08002750 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002751 .maxlen = sizeof(int),
2752 .mode = 0644,
Min Zhangf3d3f612010-08-14 22:42:51 -07002753 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002754 },
2755 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002756 .procname = "gc_min_interval_ms",
Daniel Lezcano49905092008-01-10 03:01:01 -08002757 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002758 .maxlen = sizeof(int),
2759 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002760 .proc_handler = proc_dointvec_ms_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002761 },
Eric W. Biedermanf8572d82009-11-05 13:32:03 -08002762 { }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002763};
2764
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00002765struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
Daniel Lezcano760f2d02008-01-10 02:53:43 -08002766{
2767 struct ctl_table *table;
2768
2769 table = kmemdup(ipv6_route_table_template,
2770 sizeof(ipv6_route_table_template),
2771 GFP_KERNEL);
YOSHIFUJI Hideaki5ee09102008-02-28 00:24:28 +09002772
2773 if (table) {
2774 table[0].data = &net->ipv6.sysctl.flush_delay;
Lucian Adrian Grijincuc486da32011-02-24 19:48:03 +00002775 table[0].extra1 = net;
Alexey Dobriyan86393e52009-08-29 01:34:49 +00002776 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
YOSHIFUJI Hideaki5ee09102008-02-28 00:24:28 +09002777 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2778 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2779 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2780 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2781 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2782 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2783 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
Alexey Dobriyan9c69fab2009-12-18 20:11:03 -08002784 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
YOSHIFUJI Hideaki5ee09102008-02-28 00:24:28 +09002785 }
2786
Daniel Lezcano760f2d02008-01-10 02:53:43 -08002787 return table;
2788}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002789#endif
2790
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00002791static int __net_init ip6_route_net_init(struct net *net)
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002792{
Pavel Emelyanov633d4242008-04-21 14:25:23 -07002793 int ret = -ENOMEM;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002794
Alexey Dobriyan86393e52009-08-29 01:34:49 +00002795 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2796 sizeof(net->ipv6.ip6_dst_ops));
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002797
Eric Dumazetfc66f952010-10-08 06:37:34 +00002798 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2799 goto out_ip6_dst_ops;
2800
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002801 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2802 sizeof(*net->ipv6.ip6_null_entry),
2803 GFP_KERNEL);
2804 if (!net->ipv6.ip6_null_entry)
Eric Dumazetfc66f952010-10-08 06:37:34 +00002805 goto out_ip6_dst_entries;
Changli Gaod8d1f302010-06-10 23:31:35 -07002806 net->ipv6.ip6_null_entry->dst.path =
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002807 (struct dst_entry *)net->ipv6.ip6_null_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07002808 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
David S. Miller62fa8a82011-01-26 20:51:05 -08002809 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2810 ip6_template_metrics, true);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002811
2812#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2813 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2814 sizeof(*net->ipv6.ip6_prohibit_entry),
2815 GFP_KERNEL);
Peter Zijlstra68fffc62008-10-07 14:12:10 -07002816 if (!net->ipv6.ip6_prohibit_entry)
2817 goto out_ip6_null_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07002818 net->ipv6.ip6_prohibit_entry->dst.path =
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002819 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07002820 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
David S. Miller62fa8a82011-01-26 20:51:05 -08002821 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2822 ip6_template_metrics, true);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002823
2824 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2825 sizeof(*net->ipv6.ip6_blk_hole_entry),
2826 GFP_KERNEL);
Peter Zijlstra68fffc62008-10-07 14:12:10 -07002827 if (!net->ipv6.ip6_blk_hole_entry)
2828 goto out_ip6_prohibit_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07002829 net->ipv6.ip6_blk_hole_entry->dst.path =
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002830 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07002831 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
David S. Miller62fa8a82011-01-26 20:51:05 -08002832 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2833 ip6_template_metrics, true);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002834#endif
2835
Peter Zijlstrab339a472008-10-07 14:15:00 -07002836 net->ipv6.sysctl.flush_delay = 0;
2837 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2838 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2839 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2840 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2841 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2842 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2843 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2844
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002845#ifdef CONFIG_PROC_FS
2846 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2847 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2848#endif
Benjamin Thery6891a342008-03-04 13:49:47 -08002849 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2850
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002851 ret = 0;
2852out:
2853 return ret;
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002854
Peter Zijlstra68fffc62008-10-07 14:12:10 -07002855#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2856out_ip6_prohibit_entry:
2857 kfree(net->ipv6.ip6_prohibit_entry);
2858out_ip6_null_entry:
2859 kfree(net->ipv6.ip6_null_entry);
2860#endif
Eric Dumazetfc66f952010-10-08 06:37:34 +00002861out_ip6_dst_entries:
2862 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002863out_ip6_dst_ops:
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002864 goto out;
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002865}
2866
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00002867static void __net_exit ip6_route_net_exit(struct net *net)
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002868{
2869#ifdef CONFIG_PROC_FS
2870 proc_net_remove(net, "ipv6_route");
2871 proc_net_remove(net, "rt6_stats");
2872#endif
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002873 kfree(net->ipv6.ip6_null_entry);
2874#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2875 kfree(net->ipv6.ip6_prohibit_entry);
2876 kfree(net->ipv6.ip6_blk_hole_entry);
2877#endif
Xiaotian Feng41bb78b2010-11-02 16:11:05 +00002878 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002879}
2880
2881static struct pernet_operations ip6_route_net_ops = {
2882 .init = ip6_route_net_init,
2883 .exit = ip6_route_net_exit,
2884};
2885
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002886static struct notifier_block ip6_route_dev_notifier = {
2887 .notifier_call = ip6_route_dev_notify,
2888 .priority = 0,
2889};
2890
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002891int __init ip6_route_init(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002892{
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002893 int ret;
2894
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -08002895 ret = -ENOMEM;
2896 ip6_dst_ops_template.kmem_cachep =
2897 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2898 SLAB_HWCACHE_ALIGN, NULL);
2899 if (!ip6_dst_ops_template.kmem_cachep)
Fernando Carrijoc19a28e2009-01-07 18:09:08 -08002900 goto out;
David S. Miller14e50e52007-05-24 18:17:54 -07002901
Eric Dumazetfc66f952010-10-08 06:37:34 +00002902 ret = dst_entries_init(&ip6_dst_blackhole_ops);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002903 if (ret)
Daniel Lezcanobdb32892008-03-04 13:48:10 -08002904 goto out_kmem_cache;
Daniel Lezcanobdb32892008-03-04 13:48:10 -08002905
Eric Dumazetfc66f952010-10-08 06:37:34 +00002906 ret = register_pernet_subsys(&ip6_route_net_ops);
2907 if (ret)
2908 goto out_dst_entries;
2909
Arnaud Ebalard5dc121e2008-10-01 02:37:56 -07002910 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2911
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002912 /* Registering of the loopback is done before this portion of code,
2913 * the loopback reference in rt6_info will not be taken, do it
2914 * manually for init_net */
Changli Gaod8d1f302010-06-10 23:31:35 -07002915 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002916 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2917 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
Changli Gaod8d1f302010-06-10 23:31:35 -07002918 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002919 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
Changli Gaod8d1f302010-06-10 23:31:35 -07002920 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002921 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2922 #endif
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002923 ret = fib6_init();
2924 if (ret)
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002925 goto out_register_subsys;
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002926
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002927 ret = xfrm6_init();
2928 if (ret)
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002929 goto out_fib6_init;
Daniel Lezcanoc35b7e72007-12-08 00:14:11 -08002930
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002931 ret = fib6_rules_init();
2932 if (ret)
2933 goto xfrm6_init;
Daniel Lezcano7e5449c2007-12-08 00:14:54 -08002934
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002935 ret = -ENOBUFS;
Greg Rosec7ac8672011-06-10 01:27:09 +00002936 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
2937 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
2938 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002939 goto fib6_rules_init;
2940
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002941 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002942 if (ret)
2943 goto fib6_rules_init;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002944
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002945out:
2946 return ret;
2947
2948fib6_rules_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002949 fib6_rules_cleanup();
2950xfrm6_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002951 xfrm6_fini();
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002952out_fib6_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002953 fib6_gc_cleanup();
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002954out_register_subsys:
2955 unregister_pernet_subsys(&ip6_route_net_ops);
Eric Dumazetfc66f952010-10-08 06:37:34 +00002956out_dst_entries:
2957 dst_entries_destroy(&ip6_dst_blackhole_ops);
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002958out_kmem_cache:
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002959 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002960 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002961}
2962
2963void ip6_route_cleanup(void)
2964{
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002965 unregister_netdevice_notifier(&ip6_route_dev_notifier);
Thomas Graf101367c2006-08-04 03:39:02 -07002966 fib6_rules_cleanup();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002967 xfrm6_fini();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002968 fib6_gc_cleanup();
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002969 unregister_pernet_subsys(&ip6_route_net_ops);
Xiaotian Feng41bb78b2010-11-02 16:11:05 +00002970 dst_entries_destroy(&ip6_dst_blackhole_ops);
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002971 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002972}