blob: 1250f902067016d694ea59910778555ad68702bc [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09006 * Pedro Roque <roque@di.fc.ul.pt>
Linus Torvalds1da177e2005-04-16 15:20:36 -07007 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07008 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
YOSHIFUJI Hideakic0bece92006-08-23 17:23:25 -070023 * Ville Nuorvala
24 * Fixed routing subtrees.
Linus Torvalds1da177e2005-04-16 15:20:36 -070025 */
26
Randy Dunlap4fc268d2006-01-11 12:17:47 -080027#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070028#include <linux/errno.h>
29#include <linux/types.h>
30#include <linux/times.h>
31#include <linux/socket.h>
32#include <linux/sockios.h>
33#include <linux/net.h>
34#include <linux/route.h>
35#include <linux/netdevice.h>
36#include <linux/in6.h>
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +090037#include <linux/mroute6.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070038#include <linux/init.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070039#include <linux/if_arp.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070040#include <linux/proc_fs.h>
41#include <linux/seq_file.h>
Daniel Lezcano5b7c9312008-03-03 23:28:58 -080042#include <linux/nsproxy.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090043#include <linux/slab.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020044#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070045#include <net/snmp.h>
46#include <net/ipv6.h>
47#include <net/ip6_fib.h>
48#include <net/ip6_route.h>
49#include <net/ndisc.h>
50#include <net/addrconf.h>
51#include <net/tcp.h>
52#include <linux/rtnetlink.h>
53#include <net/dst.h>
54#include <net/xfrm.h>
Tom Tucker8d717402006-07-30 20:43:36 -070055#include <net/netevent.h>
Thomas Graf21713eb2006-08-15 00:35:24 -070056#include <net/netlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070057
58#include <asm/uaccess.h>
59
60#ifdef CONFIG_SYSCTL
61#include <linux/sysctl.h>
62#endif
63
64/* Set to 3 to get tracing. */
65#define RT6_DEBUG 2
66
67#if RT6_DEBUG >= 3
68#define RDBG(x) printk x
69#define RT6_TRACE(x...) printk(KERN_DEBUG x)
70#else
71#define RDBG(x)
72#define RT6_TRACE(x...) do { ; } while (0)
73#endif
74
Eric Dumazet21efcfa2011-07-19 20:18:36 +000075static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
76 const struct in6_addr *dest);
Linus Torvalds1da177e2005-04-16 15:20:36 -070077static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
David S. Miller0dbaee32010-12-13 12:52:14 -080078static unsigned int ip6_default_advmss(const struct dst_entry *dst);
David S. Millerd33e4552010-12-14 13:01:14 -080079static unsigned int ip6_default_mtu(const struct dst_entry *dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -070080static struct dst_entry *ip6_negative_advice(struct dst_entry *);
81static void ip6_dst_destroy(struct dst_entry *);
82static void ip6_dst_ifdown(struct dst_entry *,
83 struct net_device *dev, int how);
Daniel Lezcano569d3642008-01-18 03:56:57 -080084static int ip6_dst_gc(struct dst_ops *ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -070085
86static int ip6_pkt_discard(struct sk_buff *skb);
87static int ip6_pkt_discard_out(struct sk_buff *skb);
88static void ip6_link_failure(struct sk_buff *skb);
89static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
90
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -080091#ifdef CONFIG_IPV6_ROUTE_INFO
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -080092static struct rt6_info *rt6_add_route_info(struct net *net,
Eric Dumazetb71d1d42011-04-22 04:53:02 +000093 const struct in6_addr *prefix, int prefixlen,
94 const struct in6_addr *gwaddr, int ifindex,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -080095 unsigned pref);
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -080096static struct rt6_info *rt6_get_route_info(struct net *net,
Eric Dumazetb71d1d42011-04-22 04:53:02 +000097 const struct in6_addr *prefix, int prefixlen,
98 const struct in6_addr *gwaddr, int ifindex);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -080099#endif
100
David S. Miller06582542011-01-27 14:58:42 -0800101static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
102{
103 struct rt6_info *rt = (struct rt6_info *) dst;
104 struct inet_peer *peer;
105 u32 *p = NULL;
106
Yan, Zheng8e2ec632011-09-05 21:34:30 +0000107 if (!(rt->dst.flags & DST_HOST))
108 return NULL;
109
David S. Miller06582542011-01-27 14:58:42 -0800110 if (!rt->rt6i_peer)
111 rt6_bind_peer(rt, 1);
112
113 peer = rt->rt6i_peer;
114 if (peer) {
115 u32 *old_p = __DST_METRICS_PTR(old);
116 unsigned long prev, new;
117
118 p = peer->metrics;
119 if (inet_metrics_new(peer))
120 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
121
122 new = (unsigned long) p;
123 prev = cmpxchg(&dst->_metrics, old, new);
124
125 if (prev != old) {
126 p = __DST_METRICS_PTR(prev);
127 if (prev & DST_METRICS_READ_ONLY)
128 p = NULL;
129 }
130 }
131 return p;
132}
133
David S. Millerd3aaeb32011-07-18 00:40:17 -0700134static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, const void *daddr)
135{
136 return __neigh_lookup_errno(&nd_tbl, daddr, dst->dev);
137}
138
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -0800139static struct dst_ops ip6_dst_ops_template = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700140 .family = AF_INET6,
Harvey Harrison09640e62009-02-01 00:45:17 -0800141 .protocol = cpu_to_be16(ETH_P_IPV6),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700142 .gc = ip6_dst_gc,
143 .gc_thresh = 1024,
144 .check = ip6_dst_check,
David S. Miller0dbaee32010-12-13 12:52:14 -0800145 .default_advmss = ip6_default_advmss,
David S. Millerd33e4552010-12-14 13:01:14 -0800146 .default_mtu = ip6_default_mtu,
David S. Miller06582542011-01-27 14:58:42 -0800147 .cow_metrics = ipv6_cow_metrics,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700148 .destroy = ip6_dst_destroy,
149 .ifdown = ip6_dst_ifdown,
150 .negative_advice = ip6_negative_advice,
151 .link_failure = ip6_link_failure,
152 .update_pmtu = ip6_rt_update_pmtu,
Herbert Xu1ac06e02008-05-20 14:32:14 -0700153 .local_out = __ip6_local_out,
David S. Millerd3aaeb32011-07-18 00:40:17 -0700154 .neigh_lookup = ip6_neigh_lookup,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700155};
156
Roland Dreierec831ea2011-01-31 13:16:00 -0800157static unsigned int ip6_blackhole_default_mtu(const struct dst_entry *dst)
158{
159 return 0;
160}
161
David S. Miller14e50e52007-05-24 18:17:54 -0700162static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
163{
164}
165
Held Bernhard0972ddb2011-04-24 22:07:32 +0000166static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
167 unsigned long old)
168{
169 return NULL;
170}
171
David S. Miller14e50e52007-05-24 18:17:54 -0700172static struct dst_ops ip6_dst_blackhole_ops = {
173 .family = AF_INET6,
Harvey Harrison09640e62009-02-01 00:45:17 -0800174 .protocol = cpu_to_be16(ETH_P_IPV6),
David S. Miller14e50e52007-05-24 18:17:54 -0700175 .destroy = ip6_dst_destroy,
176 .check = ip6_dst_check,
Roland Dreierec831ea2011-01-31 13:16:00 -0800177 .default_mtu = ip6_blackhole_default_mtu,
Eric Dumazet214f45c2011-02-18 11:39:01 -0800178 .default_advmss = ip6_default_advmss,
David S. Miller14e50e52007-05-24 18:17:54 -0700179 .update_pmtu = ip6_rt_blackhole_update_pmtu,
Held Bernhard0972ddb2011-04-24 22:07:32 +0000180 .cow_metrics = ip6_rt_blackhole_cow_metrics,
David S. Millerd3aaeb32011-07-18 00:40:17 -0700181 .neigh_lookup = ip6_neigh_lookup,
David S. Miller14e50e52007-05-24 18:17:54 -0700182};
183
David S. Miller62fa8a82011-01-26 20:51:05 -0800184static const u32 ip6_template_metrics[RTAX_MAX] = {
185 [RTAX_HOPLIMIT - 1] = 255,
186};
187
Daniel Lezcanobdb32892008-03-04 13:48:10 -0800188static struct rt6_info ip6_null_entry_template = {
Changli Gaod8d1f302010-06-10 23:31:35 -0700189 .dst = {
190 .__refcnt = ATOMIC_INIT(1),
191 .__use = 1,
192 .obsolete = -1,
193 .error = -ENETUNREACH,
Changli Gaod8d1f302010-06-10 23:31:35 -0700194 .input = ip6_pkt_discard,
195 .output = ip6_pkt_discard_out,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700196 },
197 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
Jean-Mickael Guerin4f724272009-05-20 17:38:59 -0700198 .rt6i_protocol = RTPROT_KERNEL,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700199 .rt6i_metric = ~(u32) 0,
200 .rt6i_ref = ATOMIC_INIT(1),
201};
202
Thomas Graf101367c2006-08-04 03:39:02 -0700203#ifdef CONFIG_IPV6_MULTIPLE_TABLES
204
David S. Miller6723ab52006-10-18 21:20:57 -0700205static int ip6_pkt_prohibit(struct sk_buff *skb);
206static int ip6_pkt_prohibit_out(struct sk_buff *skb);
David S. Miller6723ab52006-10-18 21:20:57 -0700207
Adrian Bunk280a34c2008-04-21 02:29:32 -0700208static struct rt6_info ip6_prohibit_entry_template = {
Changli Gaod8d1f302010-06-10 23:31:35 -0700209 .dst = {
210 .__refcnt = ATOMIC_INIT(1),
211 .__use = 1,
212 .obsolete = -1,
213 .error = -EACCES,
Changli Gaod8d1f302010-06-10 23:31:35 -0700214 .input = ip6_pkt_prohibit,
215 .output = ip6_pkt_prohibit_out,
Thomas Graf101367c2006-08-04 03:39:02 -0700216 },
217 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
Jean-Mickael Guerin4f724272009-05-20 17:38:59 -0700218 .rt6i_protocol = RTPROT_KERNEL,
Thomas Graf101367c2006-08-04 03:39:02 -0700219 .rt6i_metric = ~(u32) 0,
220 .rt6i_ref = ATOMIC_INIT(1),
221};
222
Daniel Lezcanobdb32892008-03-04 13:48:10 -0800223static struct rt6_info ip6_blk_hole_entry_template = {
Changli Gaod8d1f302010-06-10 23:31:35 -0700224 .dst = {
225 .__refcnt = ATOMIC_INIT(1),
226 .__use = 1,
227 .obsolete = -1,
228 .error = -EINVAL,
Changli Gaod8d1f302010-06-10 23:31:35 -0700229 .input = dst_discard,
230 .output = dst_discard,
Thomas Graf101367c2006-08-04 03:39:02 -0700231 },
232 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
Jean-Mickael Guerin4f724272009-05-20 17:38:59 -0700233 .rt6i_protocol = RTPROT_KERNEL,
Thomas Graf101367c2006-08-04 03:39:02 -0700234 .rt6i_metric = ~(u32) 0,
235 .rt6i_ref = ATOMIC_INIT(1),
236};
237
238#endif
239
Linus Torvalds1da177e2005-04-16 15:20:36 -0700240/* allocate dst with ip6_dst_ops */
David S. Miller5c1e6aa2011-04-28 14:13:38 -0700241static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops,
David S. Miller957c6652011-06-24 15:25:00 -0700242 struct net_device *dev,
243 int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700244{
David S. Miller957c6652011-06-24 15:25:00 -0700245 struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, flags);
David S. Millercf911662011-04-28 14:31:47 -0700246
247 memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
248
249 return rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700250}
251
252static void ip6_dst_destroy(struct dst_entry *dst)
253{
254 struct rt6_info *rt = (struct rt6_info *)dst;
255 struct inet6_dev *idev = rt->rt6i_idev;
David S. Millerb3419362010-11-30 12:27:11 -0800256 struct inet_peer *peer = rt->rt6i_peer;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700257
Yan, Zheng8e2ec632011-09-05 21:34:30 +0000258 if (!(rt->dst.flags & DST_HOST))
259 dst_destroy_metrics_generic(dst);
260
Linus Torvalds1da177e2005-04-16 15:20:36 -0700261 if (idev != NULL) {
262 rt->rt6i_idev = NULL;
263 in6_dev_put(idev);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900264 }
David S. Millerb3419362010-11-30 12:27:11 -0800265 if (peer) {
David S. Millerb3419362010-11-30 12:27:11 -0800266 rt->rt6i_peer = NULL;
267 inet_putpeer(peer);
268 }
269}
270
David S. Miller6431cbc2011-02-07 20:38:06 -0800271static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
272
273static u32 rt6_peer_genid(void)
274{
275 return atomic_read(&__rt6_peer_genid);
276}
277
David S. Millerb3419362010-11-30 12:27:11 -0800278void rt6_bind_peer(struct rt6_info *rt, int create)
279{
280 struct inet_peer *peer;
281
David S. Millerb3419362010-11-30 12:27:11 -0800282 peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
283 if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
284 inet_putpeer(peer);
David S. Miller6431cbc2011-02-07 20:38:06 -0800285 else
286 rt->rt6i_peer_genid = rt6_peer_genid();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700287}
288
289static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
290 int how)
291{
292 struct rt6_info *rt = (struct rt6_info *)dst;
293 struct inet6_dev *idev = rt->rt6i_idev;
Denis V. Lunev5a3e55d2007-12-07 00:38:10 -0800294 struct net_device *loopback_dev =
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900295 dev_net(dev)->loopback_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700296
Denis V. Lunev5a3e55d2007-12-07 00:38:10 -0800297 if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
298 struct inet6_dev *loopback_idev =
299 in6_dev_get(loopback_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700300 if (loopback_idev != NULL) {
301 rt->rt6i_idev = loopback_idev;
302 in6_dev_put(idev);
303 }
304 }
305}
306
307static __inline__ int rt6_check_expired(const struct rt6_info *rt)
308{
Eric Dumazeta02cec22010-09-22 20:43:57 +0000309 return (rt->rt6i_flags & RTF_EXPIRES) &&
310 time_after(jiffies, rt->rt6i_expires);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700311}
312
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000313static inline int rt6_need_strict(const struct in6_addr *daddr)
Thomas Grafc71099a2006-08-04 23:20:06 -0700314{
Eric Dumazeta02cec22010-09-22 20:43:57 +0000315 return ipv6_addr_type(daddr) &
316 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
Thomas Grafc71099a2006-08-04 23:20:06 -0700317}
318
Linus Torvalds1da177e2005-04-16 15:20:36 -0700319/*
Thomas Grafc71099a2006-08-04 23:20:06 -0700320 * Route lookup. Any table->tb6_lock is implied.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700321 */
322
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800323static inline struct rt6_info *rt6_device_match(struct net *net,
324 struct rt6_info *rt,
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000325 const struct in6_addr *saddr,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700326 int oif,
YOSHIFUJI Hideakid4208952008-06-27 20:14:54 -0700327 int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700328{
329 struct rt6_info *local = NULL;
330 struct rt6_info *sprt;
331
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900332 if (!oif && ipv6_addr_any(saddr))
333 goto out;
334
Changli Gaod8d1f302010-06-10 23:31:35 -0700335 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900336 struct net_device *dev = sprt->rt6i_dev;
337
338 if (oif) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700339 if (dev->ifindex == oif)
340 return sprt;
341 if (dev->flags & IFF_LOOPBACK) {
342 if (sprt->rt6i_idev == NULL ||
343 sprt->rt6i_idev->dev->ifindex != oif) {
YOSHIFUJI Hideakid4208952008-06-27 20:14:54 -0700344 if (flags & RT6_LOOKUP_F_IFACE && oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700345 continue;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900346 if (local && (!oif ||
Linus Torvalds1da177e2005-04-16 15:20:36 -0700347 local->rt6i_idev->dev->ifindex == oif))
348 continue;
349 }
350 local = sprt;
351 }
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900352 } else {
353 if (ipv6_chk_addr(net, saddr, dev,
354 flags & RT6_LOOKUP_F_IFACE))
355 return sprt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700356 }
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900357 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700358
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900359 if (oif) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700360 if (local)
361 return local;
362
YOSHIFUJI Hideakid4208952008-06-27 20:14:54 -0700363 if (flags & RT6_LOOKUP_F_IFACE)
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800364 return net->ipv6.ip6_null_entry;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700365 }
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900366out:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700367 return rt;
368}
369
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800370#ifdef CONFIG_IPV6_ROUTER_PREF
371static void rt6_probe(struct rt6_info *rt)
372{
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000373 struct neighbour *neigh;
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800374 /*
375 * Okay, this does not seem to be appropriate
376 * for now, however, we need to check if it
377 * is really so; aka Router Reachability Probing.
378 *
379 * Router Reachability Probe MUST be rate-limited
380 * to no more than one per minute.
381 */
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000382 rcu_read_lock();
383 neigh = rt ? dst_get_neighbour(&rt->dst) : NULL;
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800384 if (!neigh || (neigh->nud_state & NUD_VALID))
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000385 goto out;
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800386 read_lock_bh(&neigh->lock);
387 if (!(neigh->nud_state & NUD_VALID) &&
YOSHIFUJI Hideaki52e16352006-03-20 17:05:47 -0800388 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800389 struct in6_addr mcaddr;
390 struct in6_addr *target;
391
392 neigh->updated = jiffies;
393 read_unlock_bh(&neigh->lock);
394
395 target = (struct in6_addr *)&neigh->primary_key;
396 addrconf_addr_solict_mult(target, &mcaddr);
397 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000398 } else {
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800399 read_unlock_bh(&neigh->lock);
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000400 }
401out:
402 rcu_read_unlock();
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800403}
404#else
405static inline void rt6_probe(struct rt6_info *rt)
406{
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800407}
408#endif
409
Linus Torvalds1da177e2005-04-16 15:20:36 -0700410/*
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800411 * Default Router Selection (RFC 2461 6.3.6)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700412 */
Dave Jonesb6f99a22007-03-22 12:27:49 -0700413static inline int rt6_check_dev(struct rt6_info *rt, int oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700414{
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800415 struct net_device *dev = rt->rt6i_dev;
David S. Miller161980f2007-04-06 11:42:27 -0700416 if (!oif || dev->ifindex == oif)
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800417 return 2;
David S. Miller161980f2007-04-06 11:42:27 -0700418 if ((dev->flags & IFF_LOOPBACK) &&
419 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
420 return 1;
421 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700422}
423
Dave Jonesb6f99a22007-03-22 12:27:49 -0700424static inline int rt6_check_neigh(struct rt6_info *rt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700425{
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000426 struct neighbour *neigh;
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800427 int m;
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000428
429 rcu_read_lock();
430 neigh = dst_get_neighbour(&rt->dst);
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700431 if (rt->rt6i_flags & RTF_NONEXTHOP ||
432 !(rt->rt6i_flags & RTF_GATEWAY))
433 m = 1;
434 else if (neigh) {
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800435 read_lock_bh(&neigh->lock);
436 if (neigh->nud_state & NUD_VALID)
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700437 m = 2;
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800438#ifdef CONFIG_IPV6_ROUTER_PREF
439 else if (neigh->nud_state & NUD_FAILED)
440 m = 0;
441#endif
442 else
YOSHIFUJI Hideakiea73ee22006-11-06 09:45:44 -0800443 m = 1;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800444 read_unlock_bh(&neigh->lock);
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800445 } else
446 m = 0;
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000447 rcu_read_unlock();
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800448 return m;
449}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700450
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800451static int rt6_score_route(struct rt6_info *rt, int oif,
452 int strict)
453{
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700454 int m, n;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900455
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700456 m = rt6_check_dev(rt, oif);
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700457 if (!m && (strict & RT6_LOOKUP_F_IFACE))
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800458 return -1;
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -0800459#ifdef CONFIG_IPV6_ROUTER_PREF
460 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
461#endif
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700462 n = rt6_check_neigh(rt);
YOSHIFUJI Hideaki557e92e2006-11-06 09:45:45 -0800463 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800464 return -1;
465 return m;
466}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700467
David S. Millerf11e6652007-03-24 20:36:25 -0700468static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
469 int *mpri, struct rt6_info *match)
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800470{
David S. Millerf11e6652007-03-24 20:36:25 -0700471 int m;
472
473 if (rt6_check_expired(rt))
474 goto out;
475
476 m = rt6_score_route(rt, oif, strict);
477 if (m < 0)
478 goto out;
479
480 if (m > *mpri) {
481 if (strict & RT6_LOOKUP_F_REACHABLE)
482 rt6_probe(match);
483 *mpri = m;
484 match = rt;
485 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
486 rt6_probe(rt);
487 }
488
489out:
490 return match;
491}
492
493static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
494 struct rt6_info *rr_head,
495 u32 metric, int oif, int strict)
496{
497 struct rt6_info *rt, *match;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800498 int mpri = -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700499
David S. Millerf11e6652007-03-24 20:36:25 -0700500 match = NULL;
501 for (rt = rr_head; rt && rt->rt6i_metric == metric;
Changli Gaod8d1f302010-06-10 23:31:35 -0700502 rt = rt->dst.rt6_next)
David S. Millerf11e6652007-03-24 20:36:25 -0700503 match = find_match(rt, oif, strict, &mpri, match);
504 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
Changli Gaod8d1f302010-06-10 23:31:35 -0700505 rt = rt->dst.rt6_next)
David S. Millerf11e6652007-03-24 20:36:25 -0700506 match = find_match(rt, oif, strict, &mpri, match);
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800507
David S. Millerf11e6652007-03-24 20:36:25 -0700508 return match;
509}
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800510
David S. Millerf11e6652007-03-24 20:36:25 -0700511static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
512{
513 struct rt6_info *match, *rt0;
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800514 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700515
David S. Millerf11e6652007-03-24 20:36:25 -0700516 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
Harvey Harrison0dc47872008-03-05 20:47:47 -0800517 __func__, fn->leaf, oif);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700518
David S. Millerf11e6652007-03-24 20:36:25 -0700519 rt0 = fn->rr_ptr;
520 if (!rt0)
521 fn->rr_ptr = rt0 = fn->leaf;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700522
David S. Millerf11e6652007-03-24 20:36:25 -0700523 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700524
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800525 if (!match &&
David S. Millerf11e6652007-03-24 20:36:25 -0700526 (strict & RT6_LOOKUP_F_REACHABLE)) {
Changli Gaod8d1f302010-06-10 23:31:35 -0700527 struct rt6_info *next = rt0->dst.rt6_next;
David S. Millerf11e6652007-03-24 20:36:25 -0700528
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800529 /* no entries matched; do round-robin */
David S. Millerf11e6652007-03-24 20:36:25 -0700530 if (!next || next->rt6i_metric != rt0->rt6i_metric)
531 next = fn->leaf;
532
533 if (next != rt0)
534 fn->rr_ptr = next;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700535 }
536
David S. Millerf11e6652007-03-24 20:36:25 -0700537 RT6_TRACE("%s() => %p\n",
Harvey Harrison0dc47872008-03-05 20:47:47 -0800538 __func__, match);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700539
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900540 net = dev_net(rt0->rt6i_dev);
Eric Dumazeta02cec22010-09-22 20:43:57 +0000541 return match ? match : net->ipv6.ip6_null_entry;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700542}
543
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800544#ifdef CONFIG_IPV6_ROUTE_INFO
545int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000546 const struct in6_addr *gwaddr)
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800547{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900548 struct net *net = dev_net(dev);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800549 struct route_info *rinfo = (struct route_info *) opt;
550 struct in6_addr prefix_buf, *prefix;
551 unsigned int pref;
YOSHIFUJI Hideaki4bed72e2008-05-27 17:37:49 +0900552 unsigned long lifetime;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800553 struct rt6_info *rt;
554
555 if (len < sizeof(struct route_info)) {
556 return -EINVAL;
557 }
558
559 /* Sanity check for prefix_len and length */
560 if (rinfo->length > 3) {
561 return -EINVAL;
562 } else if (rinfo->prefix_len > 128) {
563 return -EINVAL;
564 } else if (rinfo->prefix_len > 64) {
565 if (rinfo->length < 2) {
566 return -EINVAL;
567 }
568 } else if (rinfo->prefix_len > 0) {
569 if (rinfo->length < 1) {
570 return -EINVAL;
571 }
572 }
573
574 pref = rinfo->route_pref;
575 if (pref == ICMPV6_ROUTER_PREF_INVALID)
Jens Rosenboom3933fc92009-09-10 06:25:11 +0000576 return -EINVAL;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800577
YOSHIFUJI Hideaki4bed72e2008-05-27 17:37:49 +0900578 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800579
580 if (rinfo->length == 3)
581 prefix = (struct in6_addr *)rinfo->prefix;
582 else {
583 /* this function is safe */
584 ipv6_addr_prefix(&prefix_buf,
585 (struct in6_addr *)rinfo->prefix,
586 rinfo->prefix_len);
587 prefix = &prefix_buf;
588 }
589
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -0800590 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
591 dev->ifindex);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800592
593 if (rt && !lifetime) {
Thomas Grafe0a1ad732006-08-22 00:00:21 -0700594 ip6_del_rt(rt);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800595 rt = NULL;
596 }
597
598 if (!rt && lifetime)
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -0800599 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800600 pref);
601 else if (rt)
602 rt->rt6i_flags = RTF_ROUTEINFO |
603 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
604
605 if (rt) {
YOSHIFUJI Hideaki4bed72e2008-05-27 17:37:49 +0900606 if (!addrconf_finite_timeout(lifetime)) {
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800607 rt->rt6i_flags &= ~RTF_EXPIRES;
608 } else {
609 rt->rt6i_expires = jiffies + HZ * lifetime;
610 rt->rt6i_flags |= RTF_EXPIRES;
611 }
Changli Gaod8d1f302010-06-10 23:31:35 -0700612 dst_release(&rt->dst);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800613 }
614 return 0;
615}
616#endif
617
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800618#define BACKTRACK(__net, saddr) \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700619do { \
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800620 if (rt == __net->ipv6.ip6_null_entry) { \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700621 struct fib6_node *pn; \
Ville Nuorvalae0eda7b2006-10-16 22:11:11 -0700622 while (1) { \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700623 if (fn->fn_flags & RTN_TL_ROOT) \
624 goto out; \
625 pn = fn->parent; \
626 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
Kim Nordlund8bce65b2006-12-13 16:38:29 -0800627 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700628 else \
629 fn = pn; \
630 if (fn->fn_flags & RTN_RTINFO) \
631 goto restart; \
Thomas Grafc71099a2006-08-04 23:20:06 -0700632 } \
Thomas Grafc71099a2006-08-04 23:20:06 -0700633 } \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700634} while(0)
Thomas Grafc71099a2006-08-04 23:20:06 -0700635
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800636static struct rt6_info *ip6_pol_route_lookup(struct net *net,
637 struct fib6_table *table,
David S. Miller4c9483b2011-03-12 16:22:43 -0500638 struct flowi6 *fl6, int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700639{
640 struct fib6_node *fn;
641 struct rt6_info *rt;
642
Thomas Grafc71099a2006-08-04 23:20:06 -0700643 read_lock_bh(&table->tb6_lock);
David S. Miller4c9483b2011-03-12 16:22:43 -0500644 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
Thomas Grafc71099a2006-08-04 23:20:06 -0700645restart:
646 rt = fn->leaf;
David S. Miller4c9483b2011-03-12 16:22:43 -0500647 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
648 BACKTRACK(net, &fl6->saddr);
Thomas Grafc71099a2006-08-04 23:20:06 -0700649out:
Changli Gaod8d1f302010-06-10 23:31:35 -0700650 dst_use(&rt->dst, jiffies);
Thomas Grafc71099a2006-08-04 23:20:06 -0700651 read_unlock_bh(&table->tb6_lock);
Thomas Grafc71099a2006-08-04 23:20:06 -0700652 return rt;
653
654}
655
YOSHIFUJI Hideaki9acd9f32008-04-10 15:42:10 +0900656struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
657 const struct in6_addr *saddr, int oif, int strict)
Thomas Grafc71099a2006-08-04 23:20:06 -0700658{
David S. Miller4c9483b2011-03-12 16:22:43 -0500659 struct flowi6 fl6 = {
660 .flowi6_oif = oif,
661 .daddr = *daddr,
Thomas Grafc71099a2006-08-04 23:20:06 -0700662 };
663 struct dst_entry *dst;
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700664 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
Thomas Grafc71099a2006-08-04 23:20:06 -0700665
Thomas Grafadaa70b2006-10-13 15:01:03 -0700666 if (saddr) {
David S. Miller4c9483b2011-03-12 16:22:43 -0500667 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
Thomas Grafadaa70b2006-10-13 15:01:03 -0700668 flags |= RT6_LOOKUP_F_HAS_SADDR;
669 }
670
David S. Miller4c9483b2011-03-12 16:22:43 -0500671 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
Thomas Grafc71099a2006-08-04 23:20:06 -0700672 if (dst->error == 0)
673 return (struct rt6_info *) dst;
674
675 dst_release(dst);
676
Linus Torvalds1da177e2005-04-16 15:20:36 -0700677 return NULL;
678}
679
YOSHIFUJI Hideaki71590392007-02-22 22:05:40 +0900680EXPORT_SYMBOL(rt6_lookup);
681
Thomas Grafc71099a2006-08-04 23:20:06 -0700682/* ip6_ins_rt is called with FREE table->tb6_lock.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700683 It takes new route entry, the addition fails by any reason the
684 route is freed. In any case, if caller does not hold it, it may
685 be destroyed.
686 */
687
Thomas Graf86872cb2006-08-22 00:01:08 -0700688static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700689{
690 int err;
Thomas Grafc71099a2006-08-04 23:20:06 -0700691 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700692
Thomas Grafc71099a2006-08-04 23:20:06 -0700693 table = rt->rt6i_table;
694 write_lock_bh(&table->tb6_lock);
Thomas Graf86872cb2006-08-22 00:01:08 -0700695 err = fib6_add(&table->tb6_root, rt, info);
Thomas Grafc71099a2006-08-04 23:20:06 -0700696 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700697
698 return err;
699}
700
Thomas Graf40e22e82006-08-22 00:00:45 -0700701int ip6_ins_rt(struct rt6_info *rt)
702{
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800703 struct nl_info info = {
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900704 .nl_net = dev_net(rt->rt6i_dev),
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800705 };
Denis V. Lunev528c4ce2007-12-13 09:45:12 -0800706 return __ip6_ins_rt(rt, &info);
Thomas Graf40e22e82006-08-22 00:00:45 -0700707}
708
Eric Dumazet21efcfa2011-07-19 20:18:36 +0000709static struct rt6_info *rt6_alloc_cow(const struct rt6_info *ort,
710 const struct in6_addr *daddr,
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000711 const struct in6_addr *saddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700712{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700713 struct rt6_info *rt;
714
715 /*
716 * Clone the route.
717 */
718
Eric Dumazet21efcfa2011-07-19 20:18:36 +0000719 rt = ip6_rt_copy(ort, daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700720
721 if (rt) {
David S. Miller14deae42009-01-04 16:04:39 -0800722 struct neighbour *neigh;
723 int attempts = !in_softirq();
724
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900725 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
726 if (rt->rt6i_dst.plen != 128 &&
Eric Dumazet21efcfa2011-07-19 20:18:36 +0000727 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900728 rt->rt6i_flags |= RTF_ANYCAST;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700729 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900730 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700731
Linus Torvalds1da177e2005-04-16 15:20:36 -0700732 rt->rt6i_flags |= RTF_CACHE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700733
734#ifdef CONFIG_IPV6_SUBTREES
735 if (rt->rt6i_src.plen && saddr) {
736 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
737 rt->rt6i_src.plen = 128;
738 }
739#endif
740
David S. Miller14deae42009-01-04 16:04:39 -0800741 retry:
742 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
743 if (IS_ERR(neigh)) {
744 struct net *net = dev_net(rt->rt6i_dev);
745 int saved_rt_min_interval =
746 net->ipv6.sysctl.ip6_rt_gc_min_interval;
747 int saved_rt_elasticity =
748 net->ipv6.sysctl.ip6_rt_gc_elasticity;
749
750 if (attempts-- > 0) {
751 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
752 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
753
Alexey Dobriyan86393e52009-08-29 01:34:49 +0000754 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
David S. Miller14deae42009-01-04 16:04:39 -0800755
756 net->ipv6.sysctl.ip6_rt_gc_elasticity =
757 saved_rt_elasticity;
758 net->ipv6.sysctl.ip6_rt_gc_min_interval =
759 saved_rt_min_interval;
760 goto retry;
761 }
762
763 if (net_ratelimit())
764 printk(KERN_WARNING
Ulrich Weber7e1b33e2010-09-27 15:02:18 -0700765 "ipv6: Neighbour table overflow.\n");
Changli Gaod8d1f302010-06-10 23:31:35 -0700766 dst_free(&rt->dst);
David S. Miller14deae42009-01-04 16:04:39 -0800767 return NULL;
768 }
David S. Miller69cce1d2011-07-17 23:09:49 -0700769 dst_set_neighbour(&rt->dst, neigh);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700770
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800771 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700772
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800773 return rt;
774}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700775
Eric Dumazet21efcfa2011-07-19 20:18:36 +0000776static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
777 const struct in6_addr *daddr)
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800778{
Eric Dumazet21efcfa2011-07-19 20:18:36 +0000779 struct rt6_info *rt = ip6_rt_copy(ort, daddr);
780
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800781 if (rt) {
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800782 rt->rt6i_flags |= RTF_CACHE;
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000783 dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_raw(&ort->dst)));
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800784 }
785 return rt;
786}
787
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800788static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
David S. Miller4c9483b2011-03-12 16:22:43 -0500789 struct flowi6 *fl6, int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700790{
791 struct fib6_node *fn;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800792 struct rt6_info *rt, *nrt;
Thomas Grafc71099a2006-08-04 23:20:06 -0700793 int strict = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700794 int attempts = 3;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800795 int err;
YOSHIFUJI Hideaki53b79972008-07-19 22:35:03 -0700796 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700797
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700798 strict |= flags & RT6_LOOKUP_F_IFACE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700799
800relookup:
Thomas Grafc71099a2006-08-04 23:20:06 -0700801 read_lock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700802
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800803restart_2:
David S. Miller4c9483b2011-03-12 16:22:43 -0500804 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700805
806restart:
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700807 rt = rt6_select(fn, oif, strict | reachable);
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800808
David S. Miller4c9483b2011-03-12 16:22:43 -0500809 BACKTRACK(net, &fl6->saddr);
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800810 if (rt == net->ipv6.ip6_null_entry ||
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800811 rt->rt6i_flags & RTF_CACHE)
YOSHIFUJI Hideaki1ddef042006-03-20 17:01:24 -0800812 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700813
Changli Gaod8d1f302010-06-10 23:31:35 -0700814 dst_hold(&rt->dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700815 read_unlock_bh(&table->tb6_lock);
YOSHIFUJI Hideakifb9de912006-03-20 16:59:08 -0800816
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000817 if (!dst_get_neighbour_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
David S. Miller4c9483b2011-03-12 16:22:43 -0500818 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
David S. Miller7343ff32011-03-09 19:55:25 -0800819 else if (!(rt->dst.flags & DST_HOST))
David S. Miller4c9483b2011-03-12 16:22:43 -0500820 nrt = rt6_alloc_clone(rt, &fl6->daddr);
David S. Miller7343ff32011-03-09 19:55:25 -0800821 else
822 goto out2;
YOSHIFUJI Hideakie40cf352006-03-20 16:59:27 -0800823
Changli Gaod8d1f302010-06-10 23:31:35 -0700824 dst_release(&rt->dst);
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800825 rt = nrt ? : net->ipv6.ip6_null_entry;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800826
Changli Gaod8d1f302010-06-10 23:31:35 -0700827 dst_hold(&rt->dst);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800828 if (nrt) {
Thomas Graf40e22e82006-08-22 00:00:45 -0700829 err = ip6_ins_rt(nrt);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800830 if (!err)
831 goto out2;
832 }
833
834 if (--attempts <= 0)
835 goto out2;
836
837 /*
Thomas Grafc71099a2006-08-04 23:20:06 -0700838 * Race condition! In the gap, when table->tb6_lock was
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800839 * released someone could insert this route. Relookup.
840 */
Changli Gaod8d1f302010-06-10 23:31:35 -0700841 dst_release(&rt->dst);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800842 goto relookup;
843
844out:
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800845 if (reachable) {
846 reachable = 0;
847 goto restart_2;
848 }
Changli Gaod8d1f302010-06-10 23:31:35 -0700849 dst_hold(&rt->dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700850 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700851out2:
Changli Gaod8d1f302010-06-10 23:31:35 -0700852 rt->dst.lastuse = jiffies;
853 rt->dst.__use++;
Thomas Grafc71099a2006-08-04 23:20:06 -0700854
855 return rt;
856}
857
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800858static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
David S. Miller4c9483b2011-03-12 16:22:43 -0500859 struct flowi6 *fl6, int flags)
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700860{
David S. Miller4c9483b2011-03-12 16:22:43 -0500861 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700862}
863
Thomas Grafc71099a2006-08-04 23:20:06 -0700864void ip6_route_input(struct sk_buff *skb)
865{
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000866 const struct ipv6hdr *iph = ipv6_hdr(skb);
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900867 struct net *net = dev_net(skb->dev);
Thomas Grafadaa70b2006-10-13 15:01:03 -0700868 int flags = RT6_LOOKUP_F_HAS_SADDR;
David S. Miller4c9483b2011-03-12 16:22:43 -0500869 struct flowi6 fl6 = {
870 .flowi6_iif = skb->dev->ifindex,
871 .daddr = iph->daddr,
872 .saddr = iph->saddr,
873 .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
874 .flowi6_mark = skb->mark,
875 .flowi6_proto = iph->nexthdr,
Thomas Grafc71099a2006-08-04 23:20:06 -0700876 };
Thomas Grafadaa70b2006-10-13 15:01:03 -0700877
Thomas Goff1d6e55f2009-01-27 22:39:59 -0800878 if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
Thomas Grafadaa70b2006-10-13 15:01:03 -0700879 flags |= RT6_LOOKUP_F_IFACE;
Thomas Grafc71099a2006-08-04 23:20:06 -0700880
David S. Miller4c9483b2011-03-12 16:22:43 -0500881 skb_dst_set(skb, fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_input));
Thomas Grafc71099a2006-08-04 23:20:06 -0700882}
883
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800884static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
David S. Miller4c9483b2011-03-12 16:22:43 -0500885 struct flowi6 *fl6, int flags)
Thomas Grafc71099a2006-08-04 23:20:06 -0700886{
David S. Miller4c9483b2011-03-12 16:22:43 -0500887 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
Thomas Grafc71099a2006-08-04 23:20:06 -0700888}
889
Florian Westphal9c7a4f92011-03-22 19:17:36 -0700890struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
David S. Miller4c9483b2011-03-12 16:22:43 -0500891 struct flowi6 *fl6)
Thomas Grafc71099a2006-08-04 23:20:06 -0700892{
893 int flags = 0;
894
David S. Miller4c9483b2011-03-12 16:22:43 -0500895 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700896 flags |= RT6_LOOKUP_F_IFACE;
Thomas Grafc71099a2006-08-04 23:20:06 -0700897
David S. Miller4c9483b2011-03-12 16:22:43 -0500898 if (!ipv6_addr_any(&fl6->saddr))
Thomas Grafadaa70b2006-10-13 15:01:03 -0700899 flags |= RT6_LOOKUP_F_HAS_SADDR;
YOSHIFUJI Hideaki / 吉藤英明0c9a2ac2010-03-07 00:14:44 +0000900 else if (sk)
901 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
Thomas Grafadaa70b2006-10-13 15:01:03 -0700902
David S. Miller4c9483b2011-03-12 16:22:43 -0500903 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700904}
905
YOSHIFUJI Hideaki71590392007-02-22 22:05:40 +0900906EXPORT_SYMBOL(ip6_route_output);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700907
David S. Miller2774c132011-03-01 14:59:04 -0800908struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
David S. Miller14e50e52007-05-24 18:17:54 -0700909{
David S. Miller5c1e6aa2011-04-28 14:13:38 -0700910 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
David S. Miller14e50e52007-05-24 18:17:54 -0700911 struct dst_entry *new = NULL;
912
David S. Miller5c1e6aa2011-04-28 14:13:38 -0700913 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0);
David S. Miller14e50e52007-05-24 18:17:54 -0700914 if (rt) {
David S. Millercf911662011-04-28 14:31:47 -0700915 memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
916
Changli Gaod8d1f302010-06-10 23:31:35 -0700917 new = &rt->dst;
David S. Miller14e50e52007-05-24 18:17:54 -0700918
David S. Miller14e50e52007-05-24 18:17:54 -0700919 new->__use = 1;
Herbert Xu352e5122007-11-13 21:34:06 -0800920 new->input = dst_discard;
921 new->output = dst_discard;
David S. Miller14e50e52007-05-24 18:17:54 -0700922
Eric Dumazet21efcfa2011-07-19 20:18:36 +0000923 if (dst_metrics_read_only(&ort->dst))
924 new->_metrics = ort->dst._metrics;
925 else
926 dst_copy_metrics(new, &ort->dst);
David S. Miller14e50e52007-05-24 18:17:54 -0700927 rt->rt6i_idev = ort->rt6i_idev;
928 if (rt->rt6i_idev)
929 in6_dev_hold(rt->rt6i_idev);
930 rt->rt6i_expires = 0;
931
932 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
933 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
934 rt->rt6i_metric = 0;
935
936 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
937#ifdef CONFIG_IPV6_SUBTREES
938 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
939#endif
940
941 dst_free(new);
942 }
943
David S. Miller69ead7a2011-03-01 14:45:33 -0800944 dst_release(dst_orig);
945 return new ? new : ERR_PTR(-ENOMEM);
David S. Miller14e50e52007-05-24 18:17:54 -0700946}
David S. Miller14e50e52007-05-24 18:17:54 -0700947
Linus Torvalds1da177e2005-04-16 15:20:36 -0700948/*
949 * Destination cache support functions
950 */
951
952static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
953{
954 struct rt6_info *rt;
955
956 rt = (struct rt6_info *) dst;
957
David S. Miller6431cbc2011-02-07 20:38:06 -0800958 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
959 if (rt->rt6i_peer_genid != rt6_peer_genid()) {
960 if (!rt->rt6i_peer)
961 rt6_bind_peer(rt, 0);
962 rt->rt6i_peer_genid = rt6_peer_genid();
963 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700964 return dst;
David S. Miller6431cbc2011-02-07 20:38:06 -0800965 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700966 return NULL;
967}
968
969static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
970{
971 struct rt6_info *rt = (struct rt6_info *) dst;
972
973 if (rt) {
YOSHIFUJI Hideaki / 吉藤英明54c1a852010-03-28 07:15:45 +0000974 if (rt->rt6i_flags & RTF_CACHE) {
975 if (rt6_check_expired(rt)) {
976 ip6_del_rt(rt);
977 dst = NULL;
978 }
979 } else {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700980 dst_release(dst);
YOSHIFUJI Hideaki / 吉藤英明54c1a852010-03-28 07:15:45 +0000981 dst = NULL;
982 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700983 }
YOSHIFUJI Hideaki / 吉藤英明54c1a852010-03-28 07:15:45 +0000984 return dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700985}
986
987static void ip6_link_failure(struct sk_buff *skb)
988{
989 struct rt6_info *rt;
990
Alexey Dobriyan3ffe5332010-02-18 08:25:24 +0000991 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700992
Eric Dumazetadf30902009-06-02 05:19:30 +0000993 rt = (struct rt6_info *) skb_dst(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700994 if (rt) {
995 if (rt->rt6i_flags&RTF_CACHE) {
Changli Gaod8d1f302010-06-10 23:31:35 -0700996 dst_set_expires(&rt->dst, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700997 rt->rt6i_flags |= RTF_EXPIRES;
998 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
999 rt->rt6i_node->fn_sernum = -1;
1000 }
1001}
1002
1003static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1004{
1005 struct rt6_info *rt6 = (struct rt6_info*)dst;
1006
1007 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1008 rt6->rt6i_flags |= RTF_MODIFIED;
1009 if (mtu < IPV6_MIN_MTU) {
David S. Millerdefb3512010-12-08 21:16:57 -08001010 u32 features = dst_metric(dst, RTAX_FEATURES);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001011 mtu = IPV6_MIN_MTU;
David S. Millerdefb3512010-12-08 21:16:57 -08001012 features |= RTAX_FEATURE_ALLFRAG;
1013 dst_metric_set(dst, RTAX_FEATURES, features);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001014 }
David S. Millerdefb3512010-12-08 21:16:57 -08001015 dst_metric_set(dst, RTAX_MTU, mtu);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001016 }
1017}
1018
David S. Miller0dbaee32010-12-13 12:52:14 -08001019static unsigned int ip6_default_advmss(const struct dst_entry *dst)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001020{
David S. Miller0dbaee32010-12-13 12:52:14 -08001021 struct net_device *dev = dst->dev;
1022 unsigned int mtu = dst_mtu(dst);
1023 struct net *net = dev_net(dev);
1024
Linus Torvalds1da177e2005-04-16 15:20:36 -07001025 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1026
Daniel Lezcano55786892008-03-04 13:47:47 -08001027 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1028 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001029
1030 /*
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001031 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1032 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1033 * IPV6_MAXPLEN is also valid and means: "any MSS,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001034 * rely only on pmtu discovery"
1035 */
1036 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1037 mtu = IPV6_MAXPLEN;
1038 return mtu;
1039}
1040
David S. Millerd33e4552010-12-14 13:01:14 -08001041static unsigned int ip6_default_mtu(const struct dst_entry *dst)
1042{
1043 unsigned int mtu = IPV6_MIN_MTU;
1044 struct inet6_dev *idev;
1045
1046 rcu_read_lock();
1047 idev = __in6_dev_get(dst->dev);
1048 if (idev)
1049 mtu = idev->cnf.mtu6;
1050 rcu_read_unlock();
1051
1052 return mtu;
1053}
1054
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001055static struct dst_entry *icmp6_dst_gc_list;
1056static DEFINE_SPINLOCK(icmp6_dst_lock);
Thomas Graf5d0bbee2006-08-04 03:37:36 -07001057
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001058struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001059 struct neighbour *neigh,
YOSHIFUJI Hideaki9acd9f32008-04-10 15:42:10 +09001060 const struct in6_addr *addr)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001061{
1062 struct rt6_info *rt;
1063 struct inet6_dev *idev = in6_dev_get(dev);
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001064 struct net *net = dev_net(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001065
1066 if (unlikely(idev == NULL))
1067 return NULL;
1068
David S. Miller957c6652011-06-24 15:25:00 -07001069 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001070 if (unlikely(rt == NULL)) {
1071 in6_dev_put(idev);
1072 goto out;
1073 }
1074
Linus Torvalds1da177e2005-04-16 15:20:36 -07001075 if (neigh)
1076 neigh_hold(neigh);
David S. Miller14deae42009-01-04 16:04:39 -08001077 else {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001078 neigh = ndisc_get_neigh(dev, addr);
David S. Miller14deae42009-01-04 16:04:39 -08001079 if (IS_ERR(neigh))
1080 neigh = NULL;
1081 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001082
Yan, Zheng8e2ec632011-09-05 21:34:30 +00001083 rt->dst.flags |= DST_HOST;
1084 rt->dst.output = ip6_output;
David S. Miller69cce1d2011-07-17 23:09:49 -07001085 dst_set_neighbour(&rt->dst, neigh);
Changli Gaod8d1f302010-06-10 23:31:35 -07001086 atomic_set(&rt->dst.__refcnt, 1);
David S. Millerdefb3512010-12-08 21:16:57 -08001087 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
Yan, Zheng8e2ec632011-09-05 21:34:30 +00001088
1089 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1090 rt->rt6i_dst.plen = 128;
1091 rt->rt6i_idev = idev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001092
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001093 spin_lock_bh(&icmp6_dst_lock);
Changli Gaod8d1f302010-06-10 23:31:35 -07001094 rt->dst.next = icmp6_dst_gc_list;
1095 icmp6_dst_gc_list = &rt->dst;
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001096 spin_unlock_bh(&icmp6_dst_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001097
Daniel Lezcano55786892008-03-04 13:47:47 -08001098 fib6_force_start_gc(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001099
1100out:
Changli Gaod8d1f302010-06-10 23:31:35 -07001101 return &rt->dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001102}
1103
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -07001104int icmp6_dst_gc(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001105{
Hagen Paul Pfeifere9476e92011-02-25 05:45:19 +00001106 struct dst_entry *dst, **pprev;
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -07001107 int more = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001108
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001109 spin_lock_bh(&icmp6_dst_lock);
1110 pprev = &icmp6_dst_gc_list;
Thomas Graf5d0bbee2006-08-04 03:37:36 -07001111
Linus Torvalds1da177e2005-04-16 15:20:36 -07001112 while ((dst = *pprev) != NULL) {
1113 if (!atomic_read(&dst->__refcnt)) {
1114 *pprev = dst->next;
1115 dst_free(dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001116 } else {
1117 pprev = &dst->next;
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -07001118 ++more;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001119 }
1120 }
1121
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001122 spin_unlock_bh(&icmp6_dst_lock);
Thomas Graf5d0bbee2006-08-04 03:37:36 -07001123
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -07001124 return more;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001125}
1126
David S. Miller1e493d12008-09-10 17:27:15 -07001127static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1128 void *arg)
1129{
1130 struct dst_entry *dst, **pprev;
1131
1132 spin_lock_bh(&icmp6_dst_lock);
1133 pprev = &icmp6_dst_gc_list;
1134 while ((dst = *pprev) != NULL) {
1135 struct rt6_info *rt = (struct rt6_info *) dst;
1136 if (func(rt, arg)) {
1137 *pprev = dst->next;
1138 dst_free(dst);
1139 } else {
1140 pprev = &dst->next;
1141 }
1142 }
1143 spin_unlock_bh(&icmp6_dst_lock);
1144}
1145
Daniel Lezcano569d3642008-01-18 03:56:57 -08001146static int ip6_dst_gc(struct dst_ops *ops)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001147{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001148 unsigned long now = jiffies;
Alexey Dobriyan86393e52009-08-29 01:34:49 +00001149 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
Daniel Lezcano7019b782008-03-04 13:50:14 -08001150 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1151 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1152 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1153 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1154 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
Eric Dumazetfc66f952010-10-08 06:37:34 +00001155 int entries;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001156
Eric Dumazetfc66f952010-10-08 06:37:34 +00001157 entries = dst_entries_get_fast(ops);
Daniel Lezcano7019b782008-03-04 13:50:14 -08001158 if (time_after(rt_last_gc + rt_min_interval, now) &&
Eric Dumazetfc66f952010-10-08 06:37:34 +00001159 entries <= rt_max_size)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001160 goto out;
1161
Benjamin Thery6891a342008-03-04 13:49:47 -08001162 net->ipv6.ip6_rt_gc_expire++;
1163 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1164 net->ipv6.ip6_rt_last_gc = now;
Eric Dumazetfc66f952010-10-08 06:37:34 +00001165 entries = dst_entries_get_slow(ops);
1166 if (entries < ops->gc_thresh)
Daniel Lezcano7019b782008-03-04 13:50:14 -08001167 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001168out:
Daniel Lezcano7019b782008-03-04 13:50:14 -08001169 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
Eric Dumazetfc66f952010-10-08 06:37:34 +00001170 return entries > rt_max_size;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001171}
1172
1173/* Clean host part of a prefix. Not necessary in radix tree,
1174 but results in cleaner routing tables.
1175
1176 Remove it only when all the things will work!
1177 */
1178
YOSHIFUJI Hideaki6b75d092008-03-10 06:00:30 -04001179int ip6_dst_hoplimit(struct dst_entry *dst)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001180{
David S. Miller5170ae82010-12-12 21:35:57 -08001181 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
David S. Millera02e4b72010-12-12 21:39:02 -08001182 if (hoplimit == 0) {
YOSHIFUJI Hideaki6b75d092008-03-10 06:00:30 -04001183 struct net_device *dev = dst->dev;
Eric Dumazetc68f24c2010-06-14 04:46:20 +00001184 struct inet6_dev *idev;
1185
1186 rcu_read_lock();
1187 idev = __in6_dev_get(dev);
1188 if (idev)
YOSHIFUJI Hideaki6b75d092008-03-10 06:00:30 -04001189 hoplimit = idev->cnf.hop_limit;
Eric Dumazetc68f24c2010-06-14 04:46:20 +00001190 else
YOSHIFUJI Hideaki53b79972008-07-19 22:35:03 -07001191 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
Eric Dumazetc68f24c2010-06-14 04:46:20 +00001192 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001193 }
1194 return hoplimit;
1195}
David S. Millerabbf46a2010-12-12 21:14:46 -08001196EXPORT_SYMBOL(ip6_dst_hoplimit);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001197
1198/*
1199 *
1200 */
1201
Thomas Graf86872cb2006-08-22 00:01:08 -07001202int ip6_route_add(struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001203{
1204 int err;
Daniel Lezcano55786892008-03-04 13:47:47 -08001205 struct net *net = cfg->fc_nlinfo.nl_net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001206 struct rt6_info *rt = NULL;
1207 struct net_device *dev = NULL;
1208 struct inet6_dev *idev = NULL;
Thomas Grafc71099a2006-08-04 23:20:06 -07001209 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001210 int addr_type;
1211
Thomas Graf86872cb2006-08-22 00:01:08 -07001212 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001213 return -EINVAL;
1214#ifndef CONFIG_IPV6_SUBTREES
Thomas Graf86872cb2006-08-22 00:01:08 -07001215 if (cfg->fc_src_len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001216 return -EINVAL;
1217#endif
Thomas Graf86872cb2006-08-22 00:01:08 -07001218 if (cfg->fc_ifindex) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001219 err = -ENODEV;
Daniel Lezcano55786892008-03-04 13:47:47 -08001220 dev = dev_get_by_index(net, cfg->fc_ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001221 if (!dev)
1222 goto out;
1223 idev = in6_dev_get(dev);
1224 if (!idev)
1225 goto out;
1226 }
1227
Thomas Graf86872cb2006-08-22 00:01:08 -07001228 if (cfg->fc_metric == 0)
1229 cfg->fc_metric = IP6_RT_PRIO_USER;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001230
Daniel Lezcano55786892008-03-04 13:47:47 -08001231 table = fib6_new_table(net, cfg->fc_table);
Thomas Grafc71099a2006-08-04 23:20:06 -07001232 if (table == NULL) {
1233 err = -ENOBUFS;
1234 goto out;
1235 }
1236
David S. Miller957c6652011-06-24 15:25:00 -07001237 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL, DST_NOCOUNT);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001238
1239 if (rt == NULL) {
1240 err = -ENOMEM;
1241 goto out;
1242 }
1243
Changli Gaod8d1f302010-06-10 23:31:35 -07001244 rt->dst.obsolete = -1;
YOSHIFUJI Hideaki6f704992008-05-19 16:56:11 -07001245 rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
1246 jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1247 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001248
Thomas Graf86872cb2006-08-22 00:01:08 -07001249 if (cfg->fc_protocol == RTPROT_UNSPEC)
1250 cfg->fc_protocol = RTPROT_BOOT;
1251 rt->rt6i_protocol = cfg->fc_protocol;
1252
1253 addr_type = ipv6_addr_type(&cfg->fc_dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001254
1255 if (addr_type & IPV6_ADDR_MULTICAST)
Changli Gaod8d1f302010-06-10 23:31:35 -07001256 rt->dst.input = ip6_mc_input;
Maciej Żenczykowskiab79ad12010-09-27 00:07:02 +00001257 else if (cfg->fc_flags & RTF_LOCAL)
1258 rt->dst.input = ip6_input;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001259 else
Changli Gaod8d1f302010-06-10 23:31:35 -07001260 rt->dst.input = ip6_forward;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001261
Changli Gaod8d1f302010-06-10 23:31:35 -07001262 rt->dst.output = ip6_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001263
Thomas Graf86872cb2006-08-22 00:01:08 -07001264 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1265 rt->rt6i_dst.plen = cfg->fc_dst_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001266 if (rt->rt6i_dst.plen == 128)
David S. Miller11d53b42011-06-24 15:23:34 -07001267 rt->dst.flags |= DST_HOST;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001268
Yan, Zheng8e2ec632011-09-05 21:34:30 +00001269 if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1270 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1271 if (!metrics) {
1272 err = -ENOMEM;
1273 goto out;
1274 }
1275 dst_init_metrics(&rt->dst, metrics, 0);
1276 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001277#ifdef CONFIG_IPV6_SUBTREES
Thomas Graf86872cb2006-08-22 00:01:08 -07001278 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1279 rt->rt6i_src.plen = cfg->fc_src_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001280#endif
1281
Thomas Graf86872cb2006-08-22 00:01:08 -07001282 rt->rt6i_metric = cfg->fc_metric;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001283
1284 /* We cannot add true routes via loopback here,
1285 they would result in kernel looping; promote them to reject routes
1286 */
Thomas Graf86872cb2006-08-22 00:01:08 -07001287 if ((cfg->fc_flags & RTF_REJECT) ||
Maciej Żenczykowskiab79ad12010-09-27 00:07:02 +00001288 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK)
1289 && !(cfg->fc_flags&RTF_LOCAL))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001290 /* hold loopback dev/idev if we haven't done so. */
Daniel Lezcano55786892008-03-04 13:47:47 -08001291 if (dev != net->loopback_dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001292 if (dev) {
1293 dev_put(dev);
1294 in6_dev_put(idev);
1295 }
Daniel Lezcano55786892008-03-04 13:47:47 -08001296 dev = net->loopback_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001297 dev_hold(dev);
1298 idev = in6_dev_get(dev);
1299 if (!idev) {
1300 err = -ENODEV;
1301 goto out;
1302 }
1303 }
Changli Gaod8d1f302010-06-10 23:31:35 -07001304 rt->dst.output = ip6_pkt_discard_out;
1305 rt->dst.input = ip6_pkt_discard;
1306 rt->dst.error = -ENETUNREACH;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001307 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1308 goto install_route;
1309 }
1310
Thomas Graf86872cb2006-08-22 00:01:08 -07001311 if (cfg->fc_flags & RTF_GATEWAY) {
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001312 const struct in6_addr *gw_addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001313 int gwa_type;
1314
Thomas Graf86872cb2006-08-22 00:01:08 -07001315 gw_addr = &cfg->fc_gateway;
1316 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001317 gwa_type = ipv6_addr_type(gw_addr);
1318
1319 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1320 struct rt6_info *grt;
1321
1322 /* IPv6 strictly inhibits using not link-local
1323 addresses as nexthop address.
1324 Otherwise, router will not able to send redirects.
1325 It is very good, but in some (rare!) circumstances
1326 (SIT, PtP, NBMA NOARP links) it is handy to allow
1327 some exceptions. --ANK
1328 */
1329 err = -EINVAL;
1330 if (!(gwa_type&IPV6_ADDR_UNICAST))
1331 goto out;
1332
Daniel Lezcano55786892008-03-04 13:47:47 -08001333 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001334
1335 err = -EHOSTUNREACH;
1336 if (grt == NULL)
1337 goto out;
1338 if (dev) {
1339 if (dev != grt->rt6i_dev) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001340 dst_release(&grt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001341 goto out;
1342 }
1343 } else {
1344 dev = grt->rt6i_dev;
1345 idev = grt->rt6i_idev;
1346 dev_hold(dev);
1347 in6_dev_hold(grt->rt6i_idev);
1348 }
1349 if (!(grt->rt6i_flags&RTF_GATEWAY))
1350 err = 0;
Changli Gaod8d1f302010-06-10 23:31:35 -07001351 dst_release(&grt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001352
1353 if (err)
1354 goto out;
1355 }
1356 err = -EINVAL;
1357 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1358 goto out;
1359 }
1360
1361 err = -ENODEV;
1362 if (dev == NULL)
1363 goto out;
1364
Daniel Walterc3968a82011-04-13 21:10:57 +00001365 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1366 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1367 err = -EINVAL;
1368 goto out;
1369 }
1370 ipv6_addr_copy(&rt->rt6i_prefsrc.addr, &cfg->fc_prefsrc);
1371 rt->rt6i_prefsrc.plen = 128;
1372 } else
1373 rt->rt6i_prefsrc.plen = 0;
1374
Thomas Graf86872cb2006-08-22 00:01:08 -07001375 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
David S. Miller69cce1d2011-07-17 23:09:49 -07001376 struct neighbour *n = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1377 if (IS_ERR(n)) {
1378 err = PTR_ERR(n);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001379 goto out;
1380 }
David S. Miller69cce1d2011-07-17 23:09:49 -07001381 dst_set_neighbour(&rt->dst, n);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001382 }
1383
Thomas Graf86872cb2006-08-22 00:01:08 -07001384 rt->rt6i_flags = cfg->fc_flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001385
1386install_route:
Thomas Graf86872cb2006-08-22 00:01:08 -07001387 if (cfg->fc_mx) {
1388 struct nlattr *nla;
1389 int remaining;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001390
Thomas Graf86872cb2006-08-22 00:01:08 -07001391 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
Thomas Graf8f4c1f92007-09-12 14:44:36 +02001392 int type = nla_type(nla);
Thomas Graf86872cb2006-08-22 00:01:08 -07001393
1394 if (type) {
1395 if (type > RTAX_MAX) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001396 err = -EINVAL;
1397 goto out;
1398 }
Thomas Graf86872cb2006-08-22 00:01:08 -07001399
David S. Millerdefb3512010-12-08 21:16:57 -08001400 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001401 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001402 }
1403 }
1404
Changli Gaod8d1f302010-06-10 23:31:35 -07001405 rt->dst.dev = dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001406 rt->rt6i_idev = idev;
Thomas Grafc71099a2006-08-04 23:20:06 -07001407 rt->rt6i_table = table;
Daniel Lezcano63152fc2008-03-03 23:31:11 -08001408
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001409 cfg->fc_nlinfo.nl_net = dev_net(dev);
Daniel Lezcano63152fc2008-03-03 23:31:11 -08001410
Thomas Graf86872cb2006-08-22 00:01:08 -07001411 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001412
1413out:
1414 if (dev)
1415 dev_put(dev);
1416 if (idev)
1417 in6_dev_put(idev);
1418 if (rt)
Changli Gaod8d1f302010-06-10 23:31:35 -07001419 dst_free(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001420 return err;
1421}
1422
Thomas Graf86872cb2006-08-22 00:01:08 -07001423static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001424{
1425 int err;
Thomas Grafc71099a2006-08-04 23:20:06 -07001426 struct fib6_table *table;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001427 struct net *net = dev_net(rt->rt6i_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001428
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001429 if (rt == net->ipv6.ip6_null_entry)
Patrick McHardy6c813a72006-08-06 22:22:47 -07001430 return -ENOENT;
1431
Thomas Grafc71099a2006-08-04 23:20:06 -07001432 table = rt->rt6i_table;
1433 write_lock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001434
Thomas Graf86872cb2006-08-22 00:01:08 -07001435 err = fib6_del(rt, info);
Changli Gaod8d1f302010-06-10 23:31:35 -07001436 dst_release(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001437
Thomas Grafc71099a2006-08-04 23:20:06 -07001438 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001439
1440 return err;
1441}
1442
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001443int ip6_del_rt(struct rt6_info *rt)
1444{
Denis V. Lunev4d1169c2008-01-10 03:26:13 -08001445 struct nl_info info = {
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001446 .nl_net = dev_net(rt->rt6i_dev),
Denis V. Lunev4d1169c2008-01-10 03:26:13 -08001447 };
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08001448 return __ip6_del_rt(rt, &info);
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001449}
1450
Thomas Graf86872cb2006-08-22 00:01:08 -07001451static int ip6_route_del(struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001452{
Thomas Grafc71099a2006-08-04 23:20:06 -07001453 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001454 struct fib6_node *fn;
1455 struct rt6_info *rt;
1456 int err = -ESRCH;
1457
Daniel Lezcano55786892008-03-04 13:47:47 -08001458 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
Thomas Grafc71099a2006-08-04 23:20:06 -07001459 if (table == NULL)
1460 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001461
Thomas Grafc71099a2006-08-04 23:20:06 -07001462 read_lock_bh(&table->tb6_lock);
1463
1464 fn = fib6_locate(&table->tb6_root,
Thomas Graf86872cb2006-08-22 00:01:08 -07001465 &cfg->fc_dst, cfg->fc_dst_len,
1466 &cfg->fc_src, cfg->fc_src_len);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001467
Linus Torvalds1da177e2005-04-16 15:20:36 -07001468 if (fn) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001469 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
Thomas Graf86872cb2006-08-22 00:01:08 -07001470 if (cfg->fc_ifindex &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001471 (rt->rt6i_dev == NULL ||
Thomas Graf86872cb2006-08-22 00:01:08 -07001472 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001473 continue;
Thomas Graf86872cb2006-08-22 00:01:08 -07001474 if (cfg->fc_flags & RTF_GATEWAY &&
1475 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001476 continue;
Thomas Graf86872cb2006-08-22 00:01:08 -07001477 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001478 continue;
Changli Gaod8d1f302010-06-10 23:31:35 -07001479 dst_hold(&rt->dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001480 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001481
Thomas Graf86872cb2006-08-22 00:01:08 -07001482 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001483 }
1484 }
Thomas Grafc71099a2006-08-04 23:20:06 -07001485 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001486
1487 return err;
1488}
1489
1490/*
1491 * Handle redirects
1492 */
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001493struct ip6rd_flowi {
David S. Miller4c9483b2011-03-12 16:22:43 -05001494 struct flowi6 fl6;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001495 struct in6_addr gateway;
1496};
Linus Torvalds1da177e2005-04-16 15:20:36 -07001497
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001498static struct rt6_info *__ip6_route_redirect(struct net *net,
1499 struct fib6_table *table,
David S. Miller4c9483b2011-03-12 16:22:43 -05001500 struct flowi6 *fl6,
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001501 int flags)
1502{
David S. Miller4c9483b2011-03-12 16:22:43 -05001503 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001504 struct rt6_info *rt;
1505 struct fib6_node *fn;
Thomas Grafc71099a2006-08-04 23:20:06 -07001506
Linus Torvalds1da177e2005-04-16 15:20:36 -07001507 /*
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001508 * Get the "current" route for this destination and
1509 * check if the redirect has come from approriate router.
1510 *
1511 * RFC 2461 specifies that redirects should only be
1512 * accepted if they come from the nexthop to the target.
1513 * Due to the way the routes are chosen, this notion
1514 * is a bit fuzzy and one might need to check all possible
1515 * routes.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001516 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001517
Thomas Grafc71099a2006-08-04 23:20:06 -07001518 read_lock_bh(&table->tb6_lock);
David S. Miller4c9483b2011-03-12 16:22:43 -05001519 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001520restart:
Changli Gaod8d1f302010-06-10 23:31:35 -07001521 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001522 /*
1523 * Current route is on-link; redirect is always invalid.
1524 *
1525 * Seems, previous statement is not true. It could
1526 * be node, which looks for us as on-link (f.e. proxy ndisc)
1527 * But then router serving it might decide, that we should
1528 * know truth 8)8) --ANK (980726).
1529 */
1530 if (rt6_check_expired(rt))
1531 continue;
1532 if (!(rt->rt6i_flags & RTF_GATEWAY))
1533 continue;
David S. Miller4c9483b2011-03-12 16:22:43 -05001534 if (fl6->flowi6_oif != rt->rt6i_dev->ifindex)
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001535 continue;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001536 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001537 continue;
1538 break;
1539 }
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001540
YOSHIFUJI Hideakicb15d9c2006-08-23 17:23:11 -07001541 if (!rt)
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001542 rt = net->ipv6.ip6_null_entry;
David S. Miller4c9483b2011-03-12 16:22:43 -05001543 BACKTRACK(net, &fl6->saddr);
YOSHIFUJI Hideakicb15d9c2006-08-23 17:23:11 -07001544out:
Changli Gaod8d1f302010-06-10 23:31:35 -07001545 dst_hold(&rt->dst);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001546
1547 read_unlock_bh(&table->tb6_lock);
1548
1549 return rt;
1550};
1551
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001552static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest,
1553 const struct in6_addr *src,
1554 const struct in6_addr *gateway,
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001555 struct net_device *dev)
1556{
Thomas Grafadaa70b2006-10-13 15:01:03 -07001557 int flags = RT6_LOOKUP_F_HAS_SADDR;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001558 struct net *net = dev_net(dev);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001559 struct ip6rd_flowi rdfl = {
David S. Miller4c9483b2011-03-12 16:22:43 -05001560 .fl6 = {
1561 .flowi6_oif = dev->ifindex,
1562 .daddr = *dest,
1563 .saddr = *src,
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001564 },
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001565 };
Thomas Grafadaa70b2006-10-13 15:01:03 -07001566
Brian Haley86c36ce2009-10-07 13:58:01 -07001567 ipv6_addr_copy(&rdfl.gateway, gateway);
1568
Thomas Grafadaa70b2006-10-13 15:01:03 -07001569 if (rt6_need_strict(dest))
1570 flags |= RT6_LOOKUP_F_IFACE;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001571
David S. Miller4c9483b2011-03-12 16:22:43 -05001572 return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6,
Daniel Lezcano58f09b72008-03-03 23:25:27 -08001573 flags, __ip6_route_redirect);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001574}
1575
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001576void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
1577 const struct in6_addr *saddr,
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001578 struct neighbour *neigh, u8 *lladdr, int on_link)
1579{
1580 struct rt6_info *rt, *nrt = NULL;
1581 struct netevent_redirect netevent;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001582 struct net *net = dev_net(neigh->dev);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001583
1584 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1585
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001586 if (rt == net->ipv6.ip6_null_entry) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001587 if (net_ratelimit())
1588 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1589 "for redirect target\n");
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001590 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001591 }
1592
Linus Torvalds1da177e2005-04-16 15:20:36 -07001593 /*
1594 * We have finally decided to accept it.
1595 */
1596
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001597 neigh_update(neigh, lladdr, NUD_STALE,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001598 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1599 NEIGH_UPDATE_F_OVERRIDE|
1600 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1601 NEIGH_UPDATE_F_ISROUTER))
1602 );
1603
1604 /*
1605 * Redirect received -> path was valid.
1606 * Look, redirects are sent only in response to data packets,
1607 * so that this nexthop apparently is reachable. --ANK
1608 */
Changli Gaod8d1f302010-06-10 23:31:35 -07001609 dst_confirm(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001610
1611 /* Duplicate redirect: silently ignore. */
Eric Dumazetf2c31e32011-07-29 19:00:53 +00001612 if (neigh == dst_get_neighbour_raw(&rt->dst))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001613 goto out;
1614
Eric Dumazet21efcfa2011-07-19 20:18:36 +00001615 nrt = ip6_rt_copy(rt, dest);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001616 if (nrt == NULL)
1617 goto out;
1618
1619 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1620 if (on_link)
1621 nrt->rt6i_flags &= ~RTF_GATEWAY;
1622
Linus Torvalds1da177e2005-04-16 15:20:36 -07001623 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
David S. Miller69cce1d2011-07-17 23:09:49 -07001624 dst_set_neighbour(&nrt->dst, neigh_clone(neigh));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001625
Thomas Graf40e22e82006-08-22 00:00:45 -07001626 if (ip6_ins_rt(nrt))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001627 goto out;
1628
Changli Gaod8d1f302010-06-10 23:31:35 -07001629 netevent.old = &rt->dst;
1630 netevent.new = &nrt->dst;
Tom Tucker8d717402006-07-30 20:43:36 -07001631 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1632
Linus Torvalds1da177e2005-04-16 15:20:36 -07001633 if (rt->rt6i_flags&RTF_CACHE) {
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001634 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001635 return;
1636 }
1637
1638out:
Changli Gaod8d1f302010-06-10 23:31:35 -07001639 dst_release(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001640}
1641
1642/*
1643 * Handle ICMP "packet too big" messages
1644 * i.e. Path MTU discovery
1645 */
1646
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001647static void rt6_do_pmtu_disc(const struct in6_addr *daddr, const struct in6_addr *saddr,
Maciej Żenczykowskiae878ae2010-10-03 14:49:00 -07001648 struct net *net, u32 pmtu, int ifindex)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001649{
1650 struct rt6_info *rt, *nrt;
1651 int allfrag = 0;
Andrey Vagind3052b52010-12-11 15:20:11 +00001652again:
Maciej Żenczykowskiae878ae2010-10-03 14:49:00 -07001653 rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001654 if (rt == NULL)
1655 return;
1656
Andrey Vagind3052b52010-12-11 15:20:11 +00001657 if (rt6_check_expired(rt)) {
1658 ip6_del_rt(rt);
1659 goto again;
1660 }
1661
Changli Gaod8d1f302010-06-10 23:31:35 -07001662 if (pmtu >= dst_mtu(&rt->dst))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001663 goto out;
1664
1665 if (pmtu < IPV6_MIN_MTU) {
1666 /*
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001667 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
Linus Torvalds1da177e2005-04-16 15:20:36 -07001668 * MTU (1280) and a fragment header should always be included
1669 * after a node receiving Too Big message reporting PMTU is
1670 * less than the IPv6 Minimum Link MTU.
1671 */
1672 pmtu = IPV6_MIN_MTU;
1673 allfrag = 1;
1674 }
1675
1676 /* New mtu received -> path was valid.
1677 They are sent only in response to data packets,
1678 so that this nexthop apparently is reachable. --ANK
1679 */
Changli Gaod8d1f302010-06-10 23:31:35 -07001680 dst_confirm(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001681
1682 /* Host route. If it is static, it would be better
1683 not to override it, but add new one, so that
1684 when cache entry will expire old pmtu
1685 would return automatically.
1686 */
1687 if (rt->rt6i_flags & RTF_CACHE) {
David S. Millerdefb3512010-12-08 21:16:57 -08001688 dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1689 if (allfrag) {
1690 u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1691 features |= RTAX_FEATURE_ALLFRAG;
1692 dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1693 }
Changli Gaod8d1f302010-06-10 23:31:35 -07001694 dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001695 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1696 goto out;
1697 }
1698
1699 /* Network route.
1700 Two cases are possible:
1701 1. It is connected route. Action: COW
1702 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1703 */
Eric Dumazetf2c31e32011-07-29 19:00:53 +00001704 if (!dst_get_neighbour_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001705 nrt = rt6_alloc_cow(rt, daddr, saddr);
YOSHIFUJI Hideakid5315b52006-03-20 16:58:48 -08001706 else
1707 nrt = rt6_alloc_clone(rt, daddr);
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001708
YOSHIFUJI Hideakid5315b52006-03-20 16:58:48 -08001709 if (nrt) {
David S. Millerdefb3512010-12-08 21:16:57 -08001710 dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1711 if (allfrag) {
1712 u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1713 features |= RTAX_FEATURE_ALLFRAG;
1714 dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1715 }
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001716
1717 /* According to RFC 1981, detecting PMTU increase shouldn't be
1718 * happened within 5 mins, the recommended timer is 10 mins.
1719 * Here this route expiration time is set to ip6_rt_mtu_expires
1720 * which is 10 mins. After 10 mins the decreased pmtu is expired
1721 * and detecting PMTU increase will be automatically happened.
1722 */
Changli Gaod8d1f302010-06-10 23:31:35 -07001723 dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001724 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1725
Thomas Graf40e22e82006-08-22 00:00:45 -07001726 ip6_ins_rt(nrt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001727 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001728out:
Changli Gaod8d1f302010-06-10 23:31:35 -07001729 dst_release(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001730}
1731
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001732void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *saddr,
Maciej Żenczykowskiae878ae2010-10-03 14:49:00 -07001733 struct net_device *dev, u32 pmtu)
1734{
1735 struct net *net = dev_net(dev);
1736
1737 /*
1738 * RFC 1981 states that a node "MUST reduce the size of the packets it
1739 * is sending along the path" that caused the Packet Too Big message.
1740 * Since it's not possible in the general case to determine which
1741 * interface was used to send the original packet, we update the MTU
1742 * on the interface that will be used to send future packets. We also
1743 * update the MTU on the interface that received the Packet Too Big in
1744 * case the original packet was forced out that interface with
1745 * SO_BINDTODEVICE or similar. This is the next best thing to the
1746 * correct behaviour, which would be to update the MTU on all
1747 * interfaces.
1748 */
1749 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1750 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1751}
1752
Linus Torvalds1da177e2005-04-16 15:20:36 -07001753/*
1754 * Misc support functions
1755 */
1756
Eric Dumazet21efcfa2011-07-19 20:18:36 +00001757static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
1758 const struct in6_addr *dest)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001759{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001760 struct net *net = dev_net(ort->rt6i_dev);
David S. Miller5c1e6aa2011-04-28 14:13:38 -07001761 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
David S. Miller957c6652011-06-24 15:25:00 -07001762 ort->dst.dev, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001763
1764 if (rt) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001765 rt->dst.input = ort->dst.input;
1766 rt->dst.output = ort->dst.output;
Yan, Zheng8e2ec632011-09-05 21:34:30 +00001767 rt->dst.flags |= DST_HOST;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001768
Eric Dumazet21efcfa2011-07-19 20:18:36 +00001769 ipv6_addr_copy(&rt->rt6i_dst.addr, dest);
Yan, Zheng8e2ec632011-09-05 21:34:30 +00001770 rt->rt6i_dst.plen = 128;
David S. Millerdefb3512010-12-08 21:16:57 -08001771 dst_copy_metrics(&rt->dst, &ort->dst);
Changli Gaod8d1f302010-06-10 23:31:35 -07001772 rt->dst.error = ort->dst.error;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001773 rt->rt6i_idev = ort->rt6i_idev;
1774 if (rt->rt6i_idev)
1775 in6_dev_hold(rt->rt6i_idev);
Changli Gaod8d1f302010-06-10 23:31:35 -07001776 rt->dst.lastuse = jiffies;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001777 rt->rt6i_expires = 0;
1778
1779 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1780 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1781 rt->rt6i_metric = 0;
1782
Linus Torvalds1da177e2005-04-16 15:20:36 -07001783#ifdef CONFIG_IPV6_SUBTREES
1784 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1785#endif
Florian Westphal0f6c6392011-05-20 11:27:24 +00001786 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
Thomas Grafc71099a2006-08-04 23:20:06 -07001787 rt->rt6i_table = ort->rt6i_table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001788 }
1789 return rt;
1790}
1791
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001792#ifdef CONFIG_IPV6_ROUTE_INFO
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001793static struct rt6_info *rt6_get_route_info(struct net *net,
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001794 const struct in6_addr *prefix, int prefixlen,
1795 const struct in6_addr *gwaddr, int ifindex)
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001796{
1797 struct fib6_node *fn;
1798 struct rt6_info *rt = NULL;
Thomas Grafc71099a2006-08-04 23:20:06 -07001799 struct fib6_table *table;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001800
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001801 table = fib6_get_table(net, RT6_TABLE_INFO);
Thomas Grafc71099a2006-08-04 23:20:06 -07001802 if (table == NULL)
1803 return NULL;
1804
1805 write_lock_bh(&table->tb6_lock);
1806 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001807 if (!fn)
1808 goto out;
1809
Changli Gaod8d1f302010-06-10 23:31:35 -07001810 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001811 if (rt->rt6i_dev->ifindex != ifindex)
1812 continue;
1813 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1814 continue;
1815 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1816 continue;
Changli Gaod8d1f302010-06-10 23:31:35 -07001817 dst_hold(&rt->dst);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001818 break;
1819 }
1820out:
Thomas Grafc71099a2006-08-04 23:20:06 -07001821 write_unlock_bh(&table->tb6_lock);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001822 return rt;
1823}
1824
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001825static struct rt6_info *rt6_add_route_info(struct net *net,
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001826 const struct in6_addr *prefix, int prefixlen,
1827 const struct in6_addr *gwaddr, int ifindex,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001828 unsigned pref)
1829{
Thomas Graf86872cb2006-08-22 00:01:08 -07001830 struct fib6_config cfg = {
1831 .fc_table = RT6_TABLE_INFO,
Rami Rosen238fc7e2008-02-09 23:43:11 -08001832 .fc_metric = IP6_RT_PRIO_USER,
Thomas Graf86872cb2006-08-22 00:01:08 -07001833 .fc_ifindex = ifindex,
1834 .fc_dst_len = prefixlen,
1835 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1836 RTF_UP | RTF_PREF(pref),
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001837 .fc_nlinfo.pid = 0,
1838 .fc_nlinfo.nlh = NULL,
1839 .fc_nlinfo.nl_net = net,
Thomas Graf86872cb2006-08-22 00:01:08 -07001840 };
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001841
Thomas Graf86872cb2006-08-22 00:01:08 -07001842 ipv6_addr_copy(&cfg.fc_dst, prefix);
1843 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1844
YOSHIFUJI Hideakie317da92006-03-20 17:06:42 -08001845 /* We should treat it as a default route if prefix length is 0. */
1846 if (!prefixlen)
Thomas Graf86872cb2006-08-22 00:01:08 -07001847 cfg.fc_flags |= RTF_DEFAULT;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001848
Thomas Graf86872cb2006-08-22 00:01:08 -07001849 ip6_route_add(&cfg);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001850
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001851 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001852}
1853#endif
1854
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001855struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001856{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001857 struct rt6_info *rt;
Thomas Grafc71099a2006-08-04 23:20:06 -07001858 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001859
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001860 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
Thomas Grafc71099a2006-08-04 23:20:06 -07001861 if (table == NULL)
1862 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001863
Thomas Grafc71099a2006-08-04 23:20:06 -07001864 write_lock_bh(&table->tb6_lock);
Changli Gaod8d1f302010-06-10 23:31:35 -07001865 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001866 if (dev == rt->rt6i_dev &&
YOSHIFUJI Hideaki045927f2006-03-20 17:00:48 -08001867 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001868 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1869 break;
1870 }
1871 if (rt)
Changli Gaod8d1f302010-06-10 23:31:35 -07001872 dst_hold(&rt->dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001873 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001874 return rt;
1875}
1876
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001877struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -08001878 struct net_device *dev,
1879 unsigned int pref)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001880{
Thomas Graf86872cb2006-08-22 00:01:08 -07001881 struct fib6_config cfg = {
1882 .fc_table = RT6_TABLE_DFLT,
Rami Rosen238fc7e2008-02-09 23:43:11 -08001883 .fc_metric = IP6_RT_PRIO_USER,
Thomas Graf86872cb2006-08-22 00:01:08 -07001884 .fc_ifindex = dev->ifindex,
1885 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1886 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
Daniel Lezcano55786892008-03-04 13:47:47 -08001887 .fc_nlinfo.pid = 0,
1888 .fc_nlinfo.nlh = NULL,
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001889 .fc_nlinfo.nl_net = dev_net(dev),
Thomas Graf86872cb2006-08-22 00:01:08 -07001890 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07001891
Thomas Graf86872cb2006-08-22 00:01:08 -07001892 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001893
Thomas Graf86872cb2006-08-22 00:01:08 -07001894 ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001895
Linus Torvalds1da177e2005-04-16 15:20:36 -07001896 return rt6_get_dflt_router(gwaddr, dev);
1897}
1898
Daniel Lezcano7b4da532008-03-04 13:47:14 -08001899void rt6_purge_dflt_routers(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001900{
1901 struct rt6_info *rt;
Thomas Grafc71099a2006-08-04 23:20:06 -07001902 struct fib6_table *table;
1903
1904 /* NOTE: Keep consistent with rt6_get_dflt_router */
Daniel Lezcano7b4da532008-03-04 13:47:14 -08001905 table = fib6_get_table(net, RT6_TABLE_DFLT);
Thomas Grafc71099a2006-08-04 23:20:06 -07001906 if (table == NULL)
1907 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001908
1909restart:
Thomas Grafc71099a2006-08-04 23:20:06 -07001910 read_lock_bh(&table->tb6_lock);
Changli Gaod8d1f302010-06-10 23:31:35 -07001911 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001912 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001913 dst_hold(&rt->dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001914 read_unlock_bh(&table->tb6_lock);
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001915 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001916 goto restart;
1917 }
1918 }
Thomas Grafc71099a2006-08-04 23:20:06 -07001919 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001920}
1921
Daniel Lezcano55786892008-03-04 13:47:47 -08001922static void rtmsg_to_fib6_config(struct net *net,
1923 struct in6_rtmsg *rtmsg,
Thomas Graf86872cb2006-08-22 00:01:08 -07001924 struct fib6_config *cfg)
1925{
1926 memset(cfg, 0, sizeof(*cfg));
1927
1928 cfg->fc_table = RT6_TABLE_MAIN;
1929 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1930 cfg->fc_metric = rtmsg->rtmsg_metric;
1931 cfg->fc_expires = rtmsg->rtmsg_info;
1932 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1933 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1934 cfg->fc_flags = rtmsg->rtmsg_flags;
1935
Daniel Lezcano55786892008-03-04 13:47:47 -08001936 cfg->fc_nlinfo.nl_net = net;
Benjamin Theryf1243c22008-02-26 18:10:03 -08001937
Thomas Graf86872cb2006-08-22 00:01:08 -07001938 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1939 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1940 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1941}
1942
Daniel Lezcano55786892008-03-04 13:47:47 -08001943int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001944{
Thomas Graf86872cb2006-08-22 00:01:08 -07001945 struct fib6_config cfg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001946 struct in6_rtmsg rtmsg;
1947 int err;
1948
1949 switch(cmd) {
1950 case SIOCADDRT: /* Add a route */
1951 case SIOCDELRT: /* Delete a route */
1952 if (!capable(CAP_NET_ADMIN))
1953 return -EPERM;
1954 err = copy_from_user(&rtmsg, arg,
1955 sizeof(struct in6_rtmsg));
1956 if (err)
1957 return -EFAULT;
Thomas Graf86872cb2006-08-22 00:01:08 -07001958
Daniel Lezcano55786892008-03-04 13:47:47 -08001959 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
Thomas Graf86872cb2006-08-22 00:01:08 -07001960
Linus Torvalds1da177e2005-04-16 15:20:36 -07001961 rtnl_lock();
1962 switch (cmd) {
1963 case SIOCADDRT:
Thomas Graf86872cb2006-08-22 00:01:08 -07001964 err = ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001965 break;
1966 case SIOCDELRT:
Thomas Graf86872cb2006-08-22 00:01:08 -07001967 err = ip6_route_del(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001968 break;
1969 default:
1970 err = -EINVAL;
1971 }
1972 rtnl_unlock();
1973
1974 return err;
Stephen Hemminger3ff50b72007-04-20 17:09:22 -07001975 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001976
1977 return -EINVAL;
1978}
1979
1980/*
1981 * Drop the packet on the floor
1982 */
1983
Brian Haleyd5fdd6b2009-06-23 04:31:07 -07001984static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001985{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001986 int type;
Eric Dumazetadf30902009-06-02 05:19:30 +00001987 struct dst_entry *dst = skb_dst(skb);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001988 switch (ipstats_mib_noroutes) {
1989 case IPSTATS_MIB_INNOROUTES:
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -07001990 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
Ulrich Weber45bb0062010-02-25 23:28:58 +00001991 if (type == IPV6_ADDR_ANY) {
Denis V. Lunev3bd653c2008-10-08 10:54:51 -07001992 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1993 IPSTATS_MIB_INADDRERRORS);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001994 break;
1995 }
1996 /* FALLTHROUGH */
1997 case IPSTATS_MIB_OUTNOROUTES:
Denis V. Lunev3bd653c2008-10-08 10:54:51 -07001998 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1999 ipstats_mib_noroutes);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07002000 break;
2001 }
Alexey Dobriyan3ffe5332010-02-18 08:25:24 +00002002 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002003 kfree_skb(skb);
2004 return 0;
2005}
2006
Thomas Graf9ce8ade2006-10-18 20:46:54 -07002007static int ip6_pkt_discard(struct sk_buff *skb)
2008{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07002009 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07002010}
2011
Arnaldo Carvalho de Melo20380732005-08-16 02:18:02 -03002012static int ip6_pkt_discard_out(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002013{
Eric Dumazetadf30902009-06-02 05:19:30 +00002014 skb->dev = skb_dst(skb)->dev;
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07002015 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002016}
2017
David S. Miller6723ab52006-10-18 21:20:57 -07002018#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2019
Thomas Graf9ce8ade2006-10-18 20:46:54 -07002020static int ip6_pkt_prohibit(struct sk_buff *skb)
2021{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07002022 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07002023}
2024
2025static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2026{
Eric Dumazetadf30902009-06-02 05:19:30 +00002027 skb->dev = skb_dst(skb)->dev;
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07002028 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07002029}
2030
David S. Miller6723ab52006-10-18 21:20:57 -07002031#endif
2032
Linus Torvalds1da177e2005-04-16 15:20:36 -07002033/*
2034 * Allocate a dst for local (unicast / anycast) address.
2035 */
2036
2037struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2038 const struct in6_addr *addr,
2039 int anycast)
2040{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09002041 struct net *net = dev_net(idev->dev);
David S. Miller5c1e6aa2011-04-28 14:13:38 -07002042 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
David S. Miller957c6652011-06-24 15:25:00 -07002043 net->loopback_dev, 0);
David S. Miller14deae42009-01-04 16:04:39 -08002044 struct neighbour *neigh;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002045
Ben Greear40385652010-11-08 12:33:48 +00002046 if (rt == NULL) {
2047 if (net_ratelimit())
2048 pr_warning("IPv6: Maximum number of routes reached,"
2049 " consider increasing route/max_size.\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002050 return ERR_PTR(-ENOMEM);
Ben Greear40385652010-11-08 12:33:48 +00002051 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002052
Linus Torvalds1da177e2005-04-16 15:20:36 -07002053 in6_dev_hold(idev);
2054
David S. Miller11d53b42011-06-24 15:23:34 -07002055 rt->dst.flags |= DST_HOST;
Changli Gaod8d1f302010-06-10 23:31:35 -07002056 rt->dst.input = ip6_input;
2057 rt->dst.output = ip6_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002058 rt->rt6i_idev = idev;
Changli Gaod8d1f302010-06-10 23:31:35 -07002059 rt->dst.obsolete = -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002060
2061 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +09002062 if (anycast)
2063 rt->rt6i_flags |= RTF_ANYCAST;
2064 else
Linus Torvalds1da177e2005-04-16 15:20:36 -07002065 rt->rt6i_flags |= RTF_LOCAL;
David S. Miller14deae42009-01-04 16:04:39 -08002066 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
2067 if (IS_ERR(neigh)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002068 dst_free(&rt->dst);
David S. Miller14deae42009-01-04 16:04:39 -08002069
David S. Miller29546a62011-03-03 12:10:37 -08002070 return ERR_CAST(neigh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002071 }
David S. Miller69cce1d2011-07-17 23:09:49 -07002072 dst_set_neighbour(&rt->dst, neigh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002073
2074 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
2075 rt->rt6i_dst.plen = 128;
Daniel Lezcano55786892008-03-04 13:47:47 -08002076 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002077
Changli Gaod8d1f302010-06-10 23:31:35 -07002078 atomic_set(&rt->dst.__refcnt, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002079
2080 return rt;
2081}
2082
Daniel Walterc3968a82011-04-13 21:10:57 +00002083int ip6_route_get_saddr(struct net *net,
2084 struct rt6_info *rt,
Eric Dumazetb71d1d42011-04-22 04:53:02 +00002085 const struct in6_addr *daddr,
Daniel Walterc3968a82011-04-13 21:10:57 +00002086 unsigned int prefs,
2087 struct in6_addr *saddr)
2088{
2089 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2090 int err = 0;
2091 if (rt->rt6i_prefsrc.plen)
2092 ipv6_addr_copy(saddr, &rt->rt6i_prefsrc.addr);
2093 else
2094 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2095 daddr, prefs, saddr);
2096 return err;
2097}
2098
2099/* remove deleted ip from prefsrc entries */
2100struct arg_dev_net_ip {
2101 struct net_device *dev;
2102 struct net *net;
2103 struct in6_addr *addr;
2104};
2105
2106static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2107{
2108 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2109 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2110 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2111
2112 if (((void *)rt->rt6i_dev == dev || dev == NULL) &&
2113 rt != net->ipv6.ip6_null_entry &&
2114 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2115 /* remove prefsrc entry */
2116 rt->rt6i_prefsrc.plen = 0;
2117 }
2118 return 0;
2119}
2120
2121void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2122{
2123 struct net *net = dev_net(ifp->idev->dev);
2124 struct arg_dev_net_ip adni = {
2125 .dev = ifp->idev->dev,
2126 .net = net,
2127 .addr = &ifp->addr,
2128 };
2129 fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2130}
2131
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002132struct arg_dev_net {
2133 struct net_device *dev;
2134 struct net *net;
2135};
2136
Linus Torvalds1da177e2005-04-16 15:20:36 -07002137static int fib6_ifdown(struct rt6_info *rt, void *arg)
2138{
stephen hemmingerbc3ef662010-12-16 17:42:40 +00002139 const struct arg_dev_net *adn = arg;
2140 const struct net_device *dev = adn->dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002141
stephen hemmingerbc3ef662010-12-16 17:42:40 +00002142 if ((rt->rt6i_dev == dev || dev == NULL) &&
2143 rt != adn->net->ipv6.ip6_null_entry) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002144 RT6_TRACE("deleted by ifdown %p\n", rt);
2145 return -1;
2146 }
2147 return 0;
2148}
2149
Daniel Lezcanof3db4852008-03-03 23:27:06 -08002150void rt6_ifdown(struct net *net, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002151{
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002152 struct arg_dev_net adn = {
2153 .dev = dev,
2154 .net = net,
2155 };
2156
2157 fib6_clean_all(net, fib6_ifdown, 0, &adn);
David S. Miller1e493d12008-09-10 17:27:15 -07002158 icmp6_clean_all(fib6_ifdown, &adn);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002159}
2160
2161struct rt6_mtu_change_arg
2162{
2163 struct net_device *dev;
2164 unsigned mtu;
2165};
2166
2167static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2168{
2169 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2170 struct inet6_dev *idev;
2171
2172 /* In IPv6 pmtu discovery is not optional,
2173 so that RTAX_MTU lock cannot disable it.
2174 We still use this lock to block changes
2175 caused by addrconf/ndisc.
2176 */
2177
2178 idev = __in6_dev_get(arg->dev);
2179 if (idev == NULL)
2180 return 0;
2181
2182 /* For administrative MTU increase, there is no way to discover
2183 IPv6 PMTU increase, so PMTU increase should be updated here.
2184 Since RFC 1981 doesn't include administrative MTU increase
2185 update PMTU increase is a MUST. (i.e. jumbo frame)
2186 */
2187 /*
2188 If new MTU is less than route PMTU, this new MTU will be the
2189 lowest MTU in the path, update the route PMTU to reflect PMTU
2190 decreases; if new MTU is greater than route PMTU, and the
2191 old MTU is the lowest MTU in the path, update the route PMTU
2192 to reflect the increase. In this case if the other nodes' MTU
2193 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2194 PMTU discouvery.
2195 */
2196 if (rt->rt6i_dev == arg->dev &&
Changli Gaod8d1f302010-06-10 23:31:35 -07002197 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2198 (dst_mtu(&rt->dst) >= arg->mtu ||
2199 (dst_mtu(&rt->dst) < arg->mtu &&
2200 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
David S. Millerdefb3512010-12-08 21:16:57 -08002201 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
Simon Arlott566cfd82007-07-26 00:09:55 -07002202 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002203 return 0;
2204}
2205
2206void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2207{
Thomas Grafc71099a2006-08-04 23:20:06 -07002208 struct rt6_mtu_change_arg arg = {
2209 .dev = dev,
2210 .mtu = mtu,
2211 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07002212
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09002213 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002214}
2215
Patrick McHardyef7c79e2007-06-05 12:38:30 -07002216static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
Thomas Graf5176f912006-08-26 20:13:18 -07002217 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
Thomas Graf86872cb2006-08-22 00:01:08 -07002218 [RTA_OIF] = { .type = NLA_U32 },
Thomas Grafab364a62006-08-22 00:01:47 -07002219 [RTA_IIF] = { .type = NLA_U32 },
Thomas Graf86872cb2006-08-22 00:01:08 -07002220 [RTA_PRIORITY] = { .type = NLA_U32 },
2221 [RTA_METRICS] = { .type = NLA_NESTED },
2222};
2223
2224static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2225 struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002226{
Thomas Graf86872cb2006-08-22 00:01:08 -07002227 struct rtmsg *rtm;
2228 struct nlattr *tb[RTA_MAX+1];
2229 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002230
Thomas Graf86872cb2006-08-22 00:01:08 -07002231 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2232 if (err < 0)
2233 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002234
Thomas Graf86872cb2006-08-22 00:01:08 -07002235 err = -EINVAL;
2236 rtm = nlmsg_data(nlh);
2237 memset(cfg, 0, sizeof(*cfg));
2238
2239 cfg->fc_table = rtm->rtm_table;
2240 cfg->fc_dst_len = rtm->rtm_dst_len;
2241 cfg->fc_src_len = rtm->rtm_src_len;
2242 cfg->fc_flags = RTF_UP;
2243 cfg->fc_protocol = rtm->rtm_protocol;
2244
2245 if (rtm->rtm_type == RTN_UNREACHABLE)
2246 cfg->fc_flags |= RTF_REJECT;
2247
Maciej Żenczykowskiab79ad12010-09-27 00:07:02 +00002248 if (rtm->rtm_type == RTN_LOCAL)
2249 cfg->fc_flags |= RTF_LOCAL;
2250
Thomas Graf86872cb2006-08-22 00:01:08 -07002251 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2252 cfg->fc_nlinfo.nlh = nlh;
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09002253 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
Thomas Graf86872cb2006-08-22 00:01:08 -07002254
2255 if (tb[RTA_GATEWAY]) {
2256 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2257 cfg->fc_flags |= RTF_GATEWAY;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002258 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002259
2260 if (tb[RTA_DST]) {
2261 int plen = (rtm->rtm_dst_len + 7) >> 3;
2262
2263 if (nla_len(tb[RTA_DST]) < plen)
2264 goto errout;
2265
2266 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002267 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002268
2269 if (tb[RTA_SRC]) {
2270 int plen = (rtm->rtm_src_len + 7) >> 3;
2271
2272 if (nla_len(tb[RTA_SRC]) < plen)
2273 goto errout;
2274
2275 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002276 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002277
Daniel Walterc3968a82011-04-13 21:10:57 +00002278 if (tb[RTA_PREFSRC])
2279 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2280
Thomas Graf86872cb2006-08-22 00:01:08 -07002281 if (tb[RTA_OIF])
2282 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2283
2284 if (tb[RTA_PRIORITY])
2285 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2286
2287 if (tb[RTA_METRICS]) {
2288 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2289 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002290 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002291
2292 if (tb[RTA_TABLE])
2293 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2294
2295 err = 0;
2296errout:
2297 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002298}
2299
Thomas Grafc127ea22007-03-22 11:58:32 -07002300static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002301{
Thomas Graf86872cb2006-08-22 00:01:08 -07002302 struct fib6_config cfg;
2303 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002304
Thomas Graf86872cb2006-08-22 00:01:08 -07002305 err = rtm_to_fib6_config(skb, nlh, &cfg);
2306 if (err < 0)
2307 return err;
2308
2309 return ip6_route_del(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002310}
2311
Thomas Grafc127ea22007-03-22 11:58:32 -07002312static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002313{
Thomas Graf86872cb2006-08-22 00:01:08 -07002314 struct fib6_config cfg;
2315 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002316
Thomas Graf86872cb2006-08-22 00:01:08 -07002317 err = rtm_to_fib6_config(skb, nlh, &cfg);
2318 if (err < 0)
2319 return err;
2320
2321 return ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002322}
2323
Thomas Graf339bf982006-11-10 14:10:15 -08002324static inline size_t rt6_nlmsg_size(void)
2325{
2326 return NLMSG_ALIGN(sizeof(struct rtmsg))
2327 + nla_total_size(16) /* RTA_SRC */
2328 + nla_total_size(16) /* RTA_DST */
2329 + nla_total_size(16) /* RTA_GATEWAY */
2330 + nla_total_size(16) /* RTA_PREFSRC */
2331 + nla_total_size(4) /* RTA_TABLE */
2332 + nla_total_size(4) /* RTA_IIF */
2333 + nla_total_size(4) /* RTA_OIF */
2334 + nla_total_size(4) /* RTA_PRIORITY */
Noriaki TAKAMIYA6a2b9ce2007-01-23 22:09:41 -08002335 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
Thomas Graf339bf982006-11-10 14:10:15 -08002336 + nla_total_size(sizeof(struct rta_cacheinfo));
2337}
2338
Brian Haley191cd582008-08-14 15:33:21 -07002339static int rt6_fill_node(struct net *net,
2340 struct sk_buff *skb, struct rt6_info *rt,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07002341 struct in6_addr *dst, struct in6_addr *src,
2342 int iif, int type, u32 pid, u32 seq,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002343 int prefix, int nowait, unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002344{
2345 struct rtmsg *rtm;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002346 struct nlmsghdr *nlh;
Thomas Grafe3703b32006-11-27 09:27:07 -08002347 long expires;
Patrick McHardy9e762a42006-08-10 23:09:48 -07002348 u32 table;
Eric Dumazetf2c31e32011-07-29 19:00:53 +00002349 struct neighbour *n;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002350
2351 if (prefix) { /* user wants prefix routes only */
2352 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2353 /* success since this is not a prefix route */
2354 return 1;
2355 }
2356 }
2357
Thomas Graf2d7202b2006-08-22 00:01:27 -07002358 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2359 if (nlh == NULL)
Patrick McHardy26932562007-01-31 23:16:40 -08002360 return -EMSGSIZE;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002361
2362 rtm = nlmsg_data(nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002363 rtm->rtm_family = AF_INET6;
2364 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2365 rtm->rtm_src_len = rt->rt6i_src.plen;
2366 rtm->rtm_tos = 0;
Thomas Grafc71099a2006-08-04 23:20:06 -07002367 if (rt->rt6i_table)
Patrick McHardy9e762a42006-08-10 23:09:48 -07002368 table = rt->rt6i_table->tb6_id;
Thomas Grafc71099a2006-08-04 23:20:06 -07002369 else
Patrick McHardy9e762a42006-08-10 23:09:48 -07002370 table = RT6_TABLE_UNSPEC;
2371 rtm->rtm_table = table;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002372 NLA_PUT_U32(skb, RTA_TABLE, table);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002373 if (rt->rt6i_flags&RTF_REJECT)
2374 rtm->rtm_type = RTN_UNREACHABLE;
Maciej Żenczykowskiab79ad12010-09-27 00:07:02 +00002375 else if (rt->rt6i_flags&RTF_LOCAL)
2376 rtm->rtm_type = RTN_LOCAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002377 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2378 rtm->rtm_type = RTN_LOCAL;
2379 else
2380 rtm->rtm_type = RTN_UNICAST;
2381 rtm->rtm_flags = 0;
2382 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2383 rtm->rtm_protocol = rt->rt6i_protocol;
2384 if (rt->rt6i_flags&RTF_DYNAMIC)
2385 rtm->rtm_protocol = RTPROT_REDIRECT;
2386 else if (rt->rt6i_flags & RTF_ADDRCONF)
2387 rtm->rtm_protocol = RTPROT_KERNEL;
2388 else if (rt->rt6i_flags&RTF_DEFAULT)
2389 rtm->rtm_protocol = RTPROT_RA;
2390
2391 if (rt->rt6i_flags&RTF_CACHE)
2392 rtm->rtm_flags |= RTM_F_CLONED;
2393
2394 if (dst) {
Thomas Graf2d7202b2006-08-22 00:01:27 -07002395 NLA_PUT(skb, RTA_DST, 16, dst);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002396 rtm->rtm_dst_len = 128;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002397 } else if (rtm->rtm_dst_len)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002398 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002399#ifdef CONFIG_IPV6_SUBTREES
2400 if (src) {
Thomas Graf2d7202b2006-08-22 00:01:27 -07002401 NLA_PUT(skb, RTA_SRC, 16, src);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002402 rtm->rtm_src_len = 128;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002403 } else if (rtm->rtm_src_len)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002404 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002405#endif
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002406 if (iif) {
2407#ifdef CONFIG_IPV6_MROUTE
2408 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
Benjamin Thery8229efd2008-12-10 16:30:15 -08002409 int err = ip6mr_get_route(net, skb, rtm, nowait);
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002410 if (err <= 0) {
2411 if (!nowait) {
2412 if (err == 0)
2413 return 0;
2414 goto nla_put_failure;
2415 } else {
2416 if (err == -EMSGSIZE)
2417 goto nla_put_failure;
2418 }
2419 }
2420 } else
2421#endif
2422 NLA_PUT_U32(skb, RTA_IIF, iif);
2423 } else if (dst) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002424 struct in6_addr saddr_buf;
Daniel Walterc3968a82011-04-13 21:10:57 +00002425 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002426 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002427 }
Thomas Graf2d7202b2006-08-22 00:01:27 -07002428
Daniel Walterc3968a82011-04-13 21:10:57 +00002429 if (rt->rt6i_prefsrc.plen) {
2430 struct in6_addr saddr_buf;
2431 ipv6_addr_copy(&saddr_buf, &rt->rt6i_prefsrc.addr);
2432 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2433 }
2434
David S. Millerdefb3512010-12-08 21:16:57 -08002435 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002436 goto nla_put_failure;
2437
Eric Dumazetf2c31e32011-07-29 19:00:53 +00002438 rcu_read_lock();
2439 n = dst_get_neighbour(&rt->dst);
2440 if (n)
2441 NLA_PUT(skb, RTA_GATEWAY, 16, &n->primary_key);
2442 rcu_read_unlock();
Thomas Graf2d7202b2006-08-22 00:01:27 -07002443
Changli Gaod8d1f302010-06-10 23:31:35 -07002444 if (rt->dst.dev)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002445 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2446
2447 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
Thomas Grafe3703b32006-11-27 09:27:07 -08002448
YOSHIFUJI Hideaki36e3dea2008-05-13 02:52:55 +09002449 if (!(rt->rt6i_flags & RTF_EXPIRES))
2450 expires = 0;
2451 else if (rt->rt6i_expires - jiffies < INT_MAX)
2452 expires = rt->rt6i_expires - jiffies;
2453 else
2454 expires = INT_MAX;
YOSHIFUJI Hideaki69cdf8f2008-05-19 16:55:13 -07002455
Changli Gaod8d1f302010-06-10 23:31:35 -07002456 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0,
2457 expires, rt->dst.error) < 0)
Thomas Grafe3703b32006-11-27 09:27:07 -08002458 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002459
Thomas Graf2d7202b2006-08-22 00:01:27 -07002460 return nlmsg_end(skb, nlh);
2461
2462nla_put_failure:
Patrick McHardy26932562007-01-31 23:16:40 -08002463 nlmsg_cancel(skb, nlh);
2464 return -EMSGSIZE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002465}
2466
Patrick McHardy1b43af52006-08-10 23:11:17 -07002467int rt6_dump_route(struct rt6_info *rt, void *p_arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002468{
2469 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2470 int prefix;
2471
Thomas Graf2d7202b2006-08-22 00:01:27 -07002472 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2473 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002474 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2475 } else
2476 prefix = 0;
2477
Brian Haley191cd582008-08-14 15:33:21 -07002478 return rt6_fill_node(arg->net,
2479 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002480 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002481 prefix, 0, NLM_F_MULTI);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002482}
2483
Thomas Grafc127ea22007-03-22 11:58:32 -07002484static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002485{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09002486 struct net *net = sock_net(in_skb->sk);
Thomas Grafab364a62006-08-22 00:01:47 -07002487 struct nlattr *tb[RTA_MAX+1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07002488 struct rt6_info *rt;
Thomas Grafab364a62006-08-22 00:01:47 -07002489 struct sk_buff *skb;
2490 struct rtmsg *rtm;
David S. Miller4c9483b2011-03-12 16:22:43 -05002491 struct flowi6 fl6;
Thomas Grafab364a62006-08-22 00:01:47 -07002492 int err, iif = 0;
2493
2494 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2495 if (err < 0)
2496 goto errout;
2497
2498 err = -EINVAL;
David S. Miller4c9483b2011-03-12 16:22:43 -05002499 memset(&fl6, 0, sizeof(fl6));
Thomas Grafab364a62006-08-22 00:01:47 -07002500
2501 if (tb[RTA_SRC]) {
2502 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2503 goto errout;
2504
David S. Miller4c9483b2011-03-12 16:22:43 -05002505 ipv6_addr_copy(&fl6.saddr, nla_data(tb[RTA_SRC]));
Thomas Grafab364a62006-08-22 00:01:47 -07002506 }
2507
2508 if (tb[RTA_DST]) {
2509 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2510 goto errout;
2511
David S. Miller4c9483b2011-03-12 16:22:43 -05002512 ipv6_addr_copy(&fl6.daddr, nla_data(tb[RTA_DST]));
Thomas Grafab364a62006-08-22 00:01:47 -07002513 }
2514
2515 if (tb[RTA_IIF])
2516 iif = nla_get_u32(tb[RTA_IIF]);
2517
2518 if (tb[RTA_OIF])
David S. Miller4c9483b2011-03-12 16:22:43 -05002519 fl6.flowi6_oif = nla_get_u32(tb[RTA_OIF]);
Thomas Grafab364a62006-08-22 00:01:47 -07002520
2521 if (iif) {
2522 struct net_device *dev;
Daniel Lezcano55786892008-03-04 13:47:47 -08002523 dev = __dev_get_by_index(net, iif);
Thomas Grafab364a62006-08-22 00:01:47 -07002524 if (!dev) {
2525 err = -ENODEV;
2526 goto errout;
2527 }
2528 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002529
2530 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
Thomas Grafab364a62006-08-22 00:01:47 -07002531 if (skb == NULL) {
2532 err = -ENOBUFS;
2533 goto errout;
2534 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002535
2536 /* Reserve room for dummy headers, this skb can pass
2537 through good chunk of routing engine.
2538 */
Arnaldo Carvalho de Melo459a98e2007-03-19 15:30:44 -07002539 skb_reset_mac_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002540 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2541
David S. Miller4c9483b2011-03-12 16:22:43 -05002542 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl6);
Changli Gaod8d1f302010-06-10 23:31:35 -07002543 skb_dst_set(skb, &rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002544
David S. Miller4c9483b2011-03-12 16:22:43 -05002545 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002546 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002547 nlh->nlmsg_seq, 0, 0, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002548 if (err < 0) {
Thomas Grafab364a62006-08-22 00:01:47 -07002549 kfree_skb(skb);
2550 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002551 }
2552
Daniel Lezcano55786892008-03-04 13:47:47 -08002553 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
Thomas Grafab364a62006-08-22 00:01:47 -07002554errout:
Linus Torvalds1da177e2005-04-16 15:20:36 -07002555 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002556}
2557
Thomas Graf86872cb2006-08-22 00:01:08 -07002558void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002559{
2560 struct sk_buff *skb;
Daniel Lezcano55786892008-03-04 13:47:47 -08002561 struct net *net = info->nl_net;
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08002562 u32 seq;
2563 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002564
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08002565 err = -ENOBUFS;
2566 seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
Thomas Graf86872cb2006-08-22 00:01:08 -07002567
Thomas Graf339bf982006-11-10 14:10:15 -08002568 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
Thomas Graf21713eb2006-08-15 00:35:24 -07002569 if (skb == NULL)
2570 goto errout;
2571
Brian Haley191cd582008-08-14 15:33:21 -07002572 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002573 event, info->pid, seq, 0, 0, 0);
Patrick McHardy26932562007-01-31 23:16:40 -08002574 if (err < 0) {
2575 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2576 WARN_ON(err == -EMSGSIZE);
2577 kfree_skb(skb);
2578 goto errout;
2579 }
Pablo Neira Ayuso1ce85fe2009-02-24 23:18:28 -08002580 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2581 info->nlh, gfp_any());
2582 return;
Thomas Graf21713eb2006-08-15 00:35:24 -07002583errout:
2584 if (err < 0)
Daniel Lezcano55786892008-03-04 13:47:47 -08002585 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002586}
2587
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002588static int ip6_route_dev_notify(struct notifier_block *this,
2589 unsigned long event, void *data)
2590{
2591 struct net_device *dev = (struct net_device *)data;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09002592 struct net *net = dev_net(dev);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002593
2594 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002595 net->ipv6.ip6_null_entry->dst.dev = dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002596 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2597#ifdef CONFIG_IPV6_MULTIPLE_TABLES
Changli Gaod8d1f302010-06-10 23:31:35 -07002598 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002599 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
Changli Gaod8d1f302010-06-10 23:31:35 -07002600 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002601 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2602#endif
2603 }
2604
2605 return NOTIFY_OK;
2606}
2607
Linus Torvalds1da177e2005-04-16 15:20:36 -07002608/*
2609 * /proc
2610 */
2611
2612#ifdef CONFIG_PROC_FS
2613
Linus Torvalds1da177e2005-04-16 15:20:36 -07002614struct rt6_proc_arg
2615{
2616 char *buffer;
2617 int offset;
2618 int length;
2619 int skip;
2620 int len;
2621};
2622
2623static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2624{
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002625 struct seq_file *m = p_arg;
David S. Miller69cce1d2011-07-17 23:09:49 -07002626 struct neighbour *n;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002627
Harvey Harrison4b7a4272008-10-29 12:50:24 -07002628 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002629
2630#ifdef CONFIG_IPV6_SUBTREES
Harvey Harrison4b7a4272008-10-29 12:50:24 -07002631 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002632#else
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002633 seq_puts(m, "00000000000000000000000000000000 00 ");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002634#endif
Eric Dumazetf2c31e32011-07-29 19:00:53 +00002635 rcu_read_lock();
David S. Miller69cce1d2011-07-17 23:09:49 -07002636 n = dst_get_neighbour(&rt->dst);
2637 if (n) {
2638 seq_printf(m, "%pi6", n->primary_key);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002639 } else {
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002640 seq_puts(m, "00000000000000000000000000000000");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002641 }
Eric Dumazetf2c31e32011-07-29 19:00:53 +00002642 rcu_read_unlock();
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002643 seq_printf(m, " %08x %08x %08x %08x %8s\n",
Changli Gaod8d1f302010-06-10 23:31:35 -07002644 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2645 rt->dst.__use, rt->rt6i_flags,
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002646 rt->rt6i_dev ? rt->rt6i_dev->name : "");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002647 return 0;
2648}
2649
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002650static int ipv6_route_show(struct seq_file *m, void *v)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002651{
Daniel Lezcanof3db4852008-03-03 23:27:06 -08002652 struct net *net = (struct net *)m->private;
2653 fib6_clean_all(net, rt6_info_route, 0, m);
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002654 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002655}
2656
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002657static int ipv6_route_open(struct inode *inode, struct file *file)
2658{
Pavel Emelyanovde05c552008-07-18 04:07:21 -07002659 return single_open_net(inode, file, ipv6_route_show);
Daniel Lezcanof3db4852008-03-03 23:27:06 -08002660}
2661
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002662static const struct file_operations ipv6_route_proc_fops = {
2663 .owner = THIS_MODULE,
2664 .open = ipv6_route_open,
2665 .read = seq_read,
2666 .llseek = seq_lseek,
Pavel Emelyanovb6fcbdb2008-07-18 04:07:44 -07002667 .release = single_release_net,
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002668};
2669
Linus Torvalds1da177e2005-04-16 15:20:36 -07002670static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2671{
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002672 struct net *net = (struct net *)seq->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002673 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002674 net->ipv6.rt6_stats->fib_nodes,
2675 net->ipv6.rt6_stats->fib_route_nodes,
2676 net->ipv6.rt6_stats->fib_rt_alloc,
2677 net->ipv6.rt6_stats->fib_rt_entries,
2678 net->ipv6.rt6_stats->fib_rt_cache,
Eric Dumazetfc66f952010-10-08 06:37:34 +00002679 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002680 net->ipv6.rt6_stats->fib_discarded_routes);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002681
2682 return 0;
2683}
2684
2685static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2686{
Pavel Emelyanovde05c552008-07-18 04:07:21 -07002687 return single_open_net(inode, file, rt6_stats_seq_show);
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002688}
2689
Arjan van de Ven9a321442007-02-12 00:55:35 -08002690static const struct file_operations rt6_stats_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002691 .owner = THIS_MODULE,
2692 .open = rt6_stats_seq_open,
2693 .read = seq_read,
2694 .llseek = seq_lseek,
Pavel Emelyanovb6fcbdb2008-07-18 04:07:44 -07002695 .release = single_release_net,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002696};
2697#endif /* CONFIG_PROC_FS */
2698
2699#ifdef CONFIG_SYSCTL
2700
Linus Torvalds1da177e2005-04-16 15:20:36 -07002701static
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07002702int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002703 void __user *buffer, size_t *lenp, loff_t *ppos)
2704{
Lucian Adrian Grijincuc486da32011-02-24 19:48:03 +00002705 struct net *net;
2706 int delay;
2707 if (!write)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002708 return -EINVAL;
Lucian Adrian Grijincuc486da32011-02-24 19:48:03 +00002709
2710 net = (struct net *)ctl->extra1;
2711 delay = net->ipv6.sysctl.flush_delay;
2712 proc_dointvec(ctl, write, buffer, lenp, ppos);
2713 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2714 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002715}
2716
Daniel Lezcano760f2d02008-01-10 02:53:43 -08002717ctl_table ipv6_route_table_template[] = {
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002718 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002719 .procname = "flush",
Daniel Lezcano49905092008-01-10 03:01:01 -08002720 .data = &init_net.ipv6.sysctl.flush_delay,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002721 .maxlen = sizeof(int),
Dave Jones89c8b3a2005-04-28 12:11:49 -07002722 .mode = 0200,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002723 .proc_handler = ipv6_sysctl_rtcache_flush
Linus Torvalds1da177e2005-04-16 15:20:36 -07002724 },
2725 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002726 .procname = "gc_thresh",
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -08002727 .data = &ip6_dst_ops_template.gc_thresh,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002728 .maxlen = sizeof(int),
2729 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002730 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002731 },
2732 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002733 .procname = "max_size",
Daniel Lezcano49905092008-01-10 03:01:01 -08002734 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002735 .maxlen = sizeof(int),
2736 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002737 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002738 },
2739 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002740 .procname = "gc_min_interval",
Daniel Lezcano49905092008-01-10 03:01:01 -08002741 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002742 .maxlen = sizeof(int),
2743 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002744 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002745 },
2746 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002747 .procname = "gc_timeout",
Daniel Lezcano49905092008-01-10 03:01:01 -08002748 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002749 .maxlen = sizeof(int),
2750 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002751 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002752 },
2753 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002754 .procname = "gc_interval",
Daniel Lezcano49905092008-01-10 03:01:01 -08002755 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002756 .maxlen = sizeof(int),
2757 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002758 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002759 },
2760 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002761 .procname = "gc_elasticity",
Daniel Lezcano49905092008-01-10 03:01:01 -08002762 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002763 .maxlen = sizeof(int),
2764 .mode = 0644,
Min Zhangf3d3f612010-08-14 22:42:51 -07002765 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002766 },
2767 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002768 .procname = "mtu_expires",
Daniel Lezcano49905092008-01-10 03:01:01 -08002769 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002770 .maxlen = sizeof(int),
2771 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002772 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002773 },
2774 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002775 .procname = "min_adv_mss",
Daniel Lezcano49905092008-01-10 03:01:01 -08002776 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002777 .maxlen = sizeof(int),
2778 .mode = 0644,
Min Zhangf3d3f612010-08-14 22:42:51 -07002779 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002780 },
2781 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002782 .procname = "gc_min_interval_ms",
Daniel Lezcano49905092008-01-10 03:01:01 -08002783 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002784 .maxlen = sizeof(int),
2785 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002786 .proc_handler = proc_dointvec_ms_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002787 },
Eric W. Biedermanf8572d82009-11-05 13:32:03 -08002788 { }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002789};
2790
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00002791struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
Daniel Lezcano760f2d02008-01-10 02:53:43 -08002792{
2793 struct ctl_table *table;
2794
2795 table = kmemdup(ipv6_route_table_template,
2796 sizeof(ipv6_route_table_template),
2797 GFP_KERNEL);
YOSHIFUJI Hideaki5ee09102008-02-28 00:24:28 +09002798
2799 if (table) {
2800 table[0].data = &net->ipv6.sysctl.flush_delay;
Lucian Adrian Grijincuc486da32011-02-24 19:48:03 +00002801 table[0].extra1 = net;
Alexey Dobriyan86393e52009-08-29 01:34:49 +00002802 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
YOSHIFUJI Hideaki5ee09102008-02-28 00:24:28 +09002803 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2804 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2805 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2806 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2807 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2808 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2809 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
Alexey Dobriyan9c69fab2009-12-18 20:11:03 -08002810 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
YOSHIFUJI Hideaki5ee09102008-02-28 00:24:28 +09002811 }
2812
Daniel Lezcano760f2d02008-01-10 02:53:43 -08002813 return table;
2814}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002815#endif
2816
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00002817static int __net_init ip6_route_net_init(struct net *net)
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002818{
Pavel Emelyanov633d4242008-04-21 14:25:23 -07002819 int ret = -ENOMEM;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002820
Alexey Dobriyan86393e52009-08-29 01:34:49 +00002821 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2822 sizeof(net->ipv6.ip6_dst_ops));
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002823
Eric Dumazetfc66f952010-10-08 06:37:34 +00002824 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2825 goto out_ip6_dst_ops;
2826
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002827 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2828 sizeof(*net->ipv6.ip6_null_entry),
2829 GFP_KERNEL);
2830 if (!net->ipv6.ip6_null_entry)
Eric Dumazetfc66f952010-10-08 06:37:34 +00002831 goto out_ip6_dst_entries;
Changli Gaod8d1f302010-06-10 23:31:35 -07002832 net->ipv6.ip6_null_entry->dst.path =
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002833 (struct dst_entry *)net->ipv6.ip6_null_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07002834 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
David S. Miller62fa8a82011-01-26 20:51:05 -08002835 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2836 ip6_template_metrics, true);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002837
2838#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2839 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2840 sizeof(*net->ipv6.ip6_prohibit_entry),
2841 GFP_KERNEL);
Peter Zijlstra68fffc62008-10-07 14:12:10 -07002842 if (!net->ipv6.ip6_prohibit_entry)
2843 goto out_ip6_null_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07002844 net->ipv6.ip6_prohibit_entry->dst.path =
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002845 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07002846 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
David S. Miller62fa8a82011-01-26 20:51:05 -08002847 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2848 ip6_template_metrics, true);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002849
2850 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2851 sizeof(*net->ipv6.ip6_blk_hole_entry),
2852 GFP_KERNEL);
Peter Zijlstra68fffc62008-10-07 14:12:10 -07002853 if (!net->ipv6.ip6_blk_hole_entry)
2854 goto out_ip6_prohibit_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07002855 net->ipv6.ip6_blk_hole_entry->dst.path =
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002856 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07002857 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
David S. Miller62fa8a82011-01-26 20:51:05 -08002858 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2859 ip6_template_metrics, true);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002860#endif
2861
Peter Zijlstrab339a472008-10-07 14:15:00 -07002862 net->ipv6.sysctl.flush_delay = 0;
2863 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2864 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2865 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2866 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2867 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2868 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2869 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2870
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002871#ifdef CONFIG_PROC_FS
2872 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2873 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2874#endif
Benjamin Thery6891a342008-03-04 13:49:47 -08002875 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2876
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002877 ret = 0;
2878out:
2879 return ret;
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002880
Peter Zijlstra68fffc62008-10-07 14:12:10 -07002881#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2882out_ip6_prohibit_entry:
2883 kfree(net->ipv6.ip6_prohibit_entry);
2884out_ip6_null_entry:
2885 kfree(net->ipv6.ip6_null_entry);
2886#endif
Eric Dumazetfc66f952010-10-08 06:37:34 +00002887out_ip6_dst_entries:
2888 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002889out_ip6_dst_ops:
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002890 goto out;
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002891}
2892
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00002893static void __net_exit ip6_route_net_exit(struct net *net)
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002894{
2895#ifdef CONFIG_PROC_FS
2896 proc_net_remove(net, "ipv6_route");
2897 proc_net_remove(net, "rt6_stats");
2898#endif
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002899 kfree(net->ipv6.ip6_null_entry);
2900#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2901 kfree(net->ipv6.ip6_prohibit_entry);
2902 kfree(net->ipv6.ip6_blk_hole_entry);
2903#endif
Xiaotian Feng41bb78b2010-11-02 16:11:05 +00002904 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002905}
2906
2907static struct pernet_operations ip6_route_net_ops = {
2908 .init = ip6_route_net_init,
2909 .exit = ip6_route_net_exit,
2910};
2911
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002912static struct notifier_block ip6_route_dev_notifier = {
2913 .notifier_call = ip6_route_dev_notify,
2914 .priority = 0,
2915};
2916
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002917int __init ip6_route_init(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002918{
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002919 int ret;
2920
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -08002921 ret = -ENOMEM;
2922 ip6_dst_ops_template.kmem_cachep =
2923 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2924 SLAB_HWCACHE_ALIGN, NULL);
2925 if (!ip6_dst_ops_template.kmem_cachep)
Fernando Carrijoc19a28e2009-01-07 18:09:08 -08002926 goto out;
David S. Miller14e50e52007-05-24 18:17:54 -07002927
Eric Dumazetfc66f952010-10-08 06:37:34 +00002928 ret = dst_entries_init(&ip6_dst_blackhole_ops);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002929 if (ret)
Daniel Lezcanobdb32892008-03-04 13:48:10 -08002930 goto out_kmem_cache;
Daniel Lezcanobdb32892008-03-04 13:48:10 -08002931
Eric Dumazetfc66f952010-10-08 06:37:34 +00002932 ret = register_pernet_subsys(&ip6_route_net_ops);
2933 if (ret)
2934 goto out_dst_entries;
2935
Arnaud Ebalard5dc121e2008-10-01 02:37:56 -07002936 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2937
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002938 /* Registering of the loopback is done before this portion of code,
2939 * the loopback reference in rt6_info will not be taken, do it
2940 * manually for init_net */
Changli Gaod8d1f302010-06-10 23:31:35 -07002941 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002942 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2943 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
Changli Gaod8d1f302010-06-10 23:31:35 -07002944 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002945 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
Changli Gaod8d1f302010-06-10 23:31:35 -07002946 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002947 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2948 #endif
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002949 ret = fib6_init();
2950 if (ret)
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002951 goto out_register_subsys;
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002952
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002953 ret = xfrm6_init();
2954 if (ret)
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002955 goto out_fib6_init;
Daniel Lezcanoc35b7e72007-12-08 00:14:11 -08002956
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002957 ret = fib6_rules_init();
2958 if (ret)
2959 goto xfrm6_init;
Daniel Lezcano7e5449c2007-12-08 00:14:54 -08002960
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002961 ret = -ENOBUFS;
Greg Rosec7ac8672011-06-10 01:27:09 +00002962 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
2963 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
2964 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002965 goto fib6_rules_init;
2966
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002967 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002968 if (ret)
2969 goto fib6_rules_init;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002970
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002971out:
2972 return ret;
2973
2974fib6_rules_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002975 fib6_rules_cleanup();
2976xfrm6_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002977 xfrm6_fini();
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002978out_fib6_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002979 fib6_gc_cleanup();
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002980out_register_subsys:
2981 unregister_pernet_subsys(&ip6_route_net_ops);
Eric Dumazetfc66f952010-10-08 06:37:34 +00002982out_dst_entries:
2983 dst_entries_destroy(&ip6_dst_blackhole_ops);
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002984out_kmem_cache:
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002985 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002986 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002987}
2988
2989void ip6_route_cleanup(void)
2990{
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002991 unregister_netdevice_notifier(&ip6_route_dev_notifier);
Thomas Graf101367c2006-08-04 03:39:02 -07002992 fib6_rules_cleanup();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002993 xfrm6_fini();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002994 fib6_gc_cleanup();
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002995 unregister_pernet_subsys(&ip6_route_net_ops);
Xiaotian Feng41bb78b2010-11-02 16:11:05 +00002996 dst_entries_destroy(&ip6_dst_blackhole_ops);
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002997 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002998}