blob: 7946b53692da1ae28d5c882c8b06e913b684cd5e [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09006 * Pedro Roque <roque@di.fc.ul.pt>
Linus Torvalds1da177e2005-04-16 15:20:36 -07007 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07008 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
YOSHIFUJI Hideakic0bece92006-08-23 17:23:25 -070023 * Ville Nuorvala
24 * Fixed routing subtrees.
Linus Torvalds1da177e2005-04-16 15:20:36 -070025 */
26
Randy Dunlap4fc268d2006-01-11 12:17:47 -080027#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070028#include <linux/errno.h>
29#include <linux/types.h>
30#include <linux/times.h>
31#include <linux/socket.h>
32#include <linux/sockios.h>
33#include <linux/net.h>
34#include <linux/route.h>
35#include <linux/netdevice.h>
36#include <linux/in6.h>
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +090037#include <linux/mroute6.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070038#include <linux/init.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070039#include <linux/if_arp.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070040#include <linux/proc_fs.h>
41#include <linux/seq_file.h>
Daniel Lezcano5b7c9312008-03-03 23:28:58 -080042#include <linux/nsproxy.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090043#include <linux/slab.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020044#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070045#include <net/snmp.h>
46#include <net/ipv6.h>
47#include <net/ip6_fib.h>
48#include <net/ip6_route.h>
49#include <net/ndisc.h>
50#include <net/addrconf.h>
51#include <net/tcp.h>
52#include <linux/rtnetlink.h>
53#include <net/dst.h>
54#include <net/xfrm.h>
Tom Tucker8d717402006-07-30 20:43:36 -070055#include <net/netevent.h>
Thomas Graf21713eb2006-08-15 00:35:24 -070056#include <net/netlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070057
58#include <asm/uaccess.h>
59
60#ifdef CONFIG_SYSCTL
61#include <linux/sysctl.h>
62#endif
63
64/* Set to 3 to get tracing. */
65#define RT6_DEBUG 2
66
67#if RT6_DEBUG >= 3
68#define RDBG(x) printk x
69#define RT6_TRACE(x...) printk(KERN_DEBUG x)
70#else
71#define RDBG(x)
72#define RT6_TRACE(x...) do { ; } while (0)
73#endif
74
Linus Torvalds1da177e2005-04-16 15:20:36 -070075static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
76static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
David S. Miller0dbaee32010-12-13 12:52:14 -080077static unsigned int ip6_default_advmss(const struct dst_entry *dst);
David S. Millerd33e4552010-12-14 13:01:14 -080078static unsigned int ip6_default_mtu(const struct dst_entry *dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -070079static struct dst_entry *ip6_negative_advice(struct dst_entry *);
80static void ip6_dst_destroy(struct dst_entry *);
81static void ip6_dst_ifdown(struct dst_entry *,
82 struct net_device *dev, int how);
Daniel Lezcano569d3642008-01-18 03:56:57 -080083static int ip6_dst_gc(struct dst_ops *ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -070084
85static int ip6_pkt_discard(struct sk_buff *skb);
86static int ip6_pkt_discard_out(struct sk_buff *skb);
87static void ip6_link_failure(struct sk_buff *skb);
88static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
89
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -080090#ifdef CONFIG_IPV6_ROUTE_INFO
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -080091static struct rt6_info *rt6_add_route_info(struct net *net,
92 struct in6_addr *prefix, int prefixlen,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -080093 struct in6_addr *gwaddr, int ifindex,
94 unsigned pref);
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -080095static struct rt6_info *rt6_get_route_info(struct net *net,
96 struct in6_addr *prefix, int prefixlen,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -080097 struct in6_addr *gwaddr, int ifindex);
98#endif
99
David S. Miller06582542011-01-27 14:58:42 -0800100static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
101{
102 struct rt6_info *rt = (struct rt6_info *) dst;
103 struct inet_peer *peer;
104 u32 *p = NULL;
105
106 if (!rt->rt6i_peer)
107 rt6_bind_peer(rt, 1);
108
109 peer = rt->rt6i_peer;
110 if (peer) {
111 u32 *old_p = __DST_METRICS_PTR(old);
112 unsigned long prev, new;
113
114 p = peer->metrics;
115 if (inet_metrics_new(peer))
116 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
117
118 new = (unsigned long) p;
119 prev = cmpxchg(&dst->_metrics, old, new);
120
121 if (prev != old) {
122 p = __DST_METRICS_PTR(prev);
123 if (prev & DST_METRICS_READ_ONLY)
124 p = NULL;
125 }
126 }
127 return p;
128}
129
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -0800130static struct dst_ops ip6_dst_ops_template = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700131 .family = AF_INET6,
Harvey Harrison09640e62009-02-01 00:45:17 -0800132 .protocol = cpu_to_be16(ETH_P_IPV6),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700133 .gc = ip6_dst_gc,
134 .gc_thresh = 1024,
135 .check = ip6_dst_check,
David S. Miller0dbaee32010-12-13 12:52:14 -0800136 .default_advmss = ip6_default_advmss,
David S. Millerd33e4552010-12-14 13:01:14 -0800137 .default_mtu = ip6_default_mtu,
David S. Miller06582542011-01-27 14:58:42 -0800138 .cow_metrics = ipv6_cow_metrics,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700139 .destroy = ip6_dst_destroy,
140 .ifdown = ip6_dst_ifdown,
141 .negative_advice = ip6_negative_advice,
142 .link_failure = ip6_link_failure,
143 .update_pmtu = ip6_rt_update_pmtu,
Herbert Xu1ac06e02008-05-20 14:32:14 -0700144 .local_out = __ip6_local_out,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700145};
146
Roland Dreierec831ea2011-01-31 13:16:00 -0800147static unsigned int ip6_blackhole_default_mtu(const struct dst_entry *dst)
148{
149 return 0;
150}
151
David S. Miller14e50e52007-05-24 18:17:54 -0700152static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
153{
154}
155
156static struct dst_ops ip6_dst_blackhole_ops = {
157 .family = AF_INET6,
Harvey Harrison09640e62009-02-01 00:45:17 -0800158 .protocol = cpu_to_be16(ETH_P_IPV6),
David S. Miller14e50e52007-05-24 18:17:54 -0700159 .destroy = ip6_dst_destroy,
160 .check = ip6_dst_check,
Roland Dreierec831ea2011-01-31 13:16:00 -0800161 .default_mtu = ip6_blackhole_default_mtu,
David S. Miller14e50e52007-05-24 18:17:54 -0700162 .update_pmtu = ip6_rt_blackhole_update_pmtu,
David S. Miller14e50e52007-05-24 18:17:54 -0700163};
164
David S. Miller62fa8a82011-01-26 20:51:05 -0800165static const u32 ip6_template_metrics[RTAX_MAX] = {
166 [RTAX_HOPLIMIT - 1] = 255,
167};
168
Daniel Lezcanobdb32892008-03-04 13:48:10 -0800169static struct rt6_info ip6_null_entry_template = {
Changli Gaod8d1f302010-06-10 23:31:35 -0700170 .dst = {
171 .__refcnt = ATOMIC_INIT(1),
172 .__use = 1,
173 .obsolete = -1,
174 .error = -ENETUNREACH,
Changli Gaod8d1f302010-06-10 23:31:35 -0700175 .input = ip6_pkt_discard,
176 .output = ip6_pkt_discard_out,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700177 },
178 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
Jean-Mickael Guerin4f724272009-05-20 17:38:59 -0700179 .rt6i_protocol = RTPROT_KERNEL,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700180 .rt6i_metric = ~(u32) 0,
181 .rt6i_ref = ATOMIC_INIT(1),
182};
183
Thomas Graf101367c2006-08-04 03:39:02 -0700184#ifdef CONFIG_IPV6_MULTIPLE_TABLES
185
David S. Miller6723ab52006-10-18 21:20:57 -0700186static int ip6_pkt_prohibit(struct sk_buff *skb);
187static int ip6_pkt_prohibit_out(struct sk_buff *skb);
David S. Miller6723ab52006-10-18 21:20:57 -0700188
Adrian Bunk280a34c2008-04-21 02:29:32 -0700189static struct rt6_info ip6_prohibit_entry_template = {
Changli Gaod8d1f302010-06-10 23:31:35 -0700190 .dst = {
191 .__refcnt = ATOMIC_INIT(1),
192 .__use = 1,
193 .obsolete = -1,
194 .error = -EACCES,
Changli Gaod8d1f302010-06-10 23:31:35 -0700195 .input = ip6_pkt_prohibit,
196 .output = ip6_pkt_prohibit_out,
Thomas Graf101367c2006-08-04 03:39:02 -0700197 },
198 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
Jean-Mickael Guerin4f724272009-05-20 17:38:59 -0700199 .rt6i_protocol = RTPROT_KERNEL,
Thomas Graf101367c2006-08-04 03:39:02 -0700200 .rt6i_metric = ~(u32) 0,
201 .rt6i_ref = ATOMIC_INIT(1),
202};
203
Daniel Lezcanobdb32892008-03-04 13:48:10 -0800204static struct rt6_info ip6_blk_hole_entry_template = {
Changli Gaod8d1f302010-06-10 23:31:35 -0700205 .dst = {
206 .__refcnt = ATOMIC_INIT(1),
207 .__use = 1,
208 .obsolete = -1,
209 .error = -EINVAL,
Changli Gaod8d1f302010-06-10 23:31:35 -0700210 .input = dst_discard,
211 .output = dst_discard,
Thomas Graf101367c2006-08-04 03:39:02 -0700212 },
213 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
Jean-Mickael Guerin4f724272009-05-20 17:38:59 -0700214 .rt6i_protocol = RTPROT_KERNEL,
Thomas Graf101367c2006-08-04 03:39:02 -0700215 .rt6i_metric = ~(u32) 0,
216 .rt6i_ref = ATOMIC_INIT(1),
217};
218
219#endif
220
Linus Torvalds1da177e2005-04-16 15:20:36 -0700221/* allocate dst with ip6_dst_ops */
Benjamin Theryf2fc6a52008-03-04 13:49:23 -0800222static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700223{
David S. Miller3c7bd1a2011-02-16 14:08:44 -0800224 return (struct rt6_info *)dst_alloc(ops, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700225}
226
227static void ip6_dst_destroy(struct dst_entry *dst)
228{
229 struct rt6_info *rt = (struct rt6_info *)dst;
230 struct inet6_dev *idev = rt->rt6i_idev;
David S. Millerb3419362010-11-30 12:27:11 -0800231 struct inet_peer *peer = rt->rt6i_peer;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700232
233 if (idev != NULL) {
234 rt->rt6i_idev = NULL;
235 in6_dev_put(idev);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900236 }
David S. Millerb3419362010-11-30 12:27:11 -0800237 if (peer) {
David S. Millerb3419362010-11-30 12:27:11 -0800238 rt->rt6i_peer = NULL;
239 inet_putpeer(peer);
240 }
241}
242
David S. Miller6431cbc2011-02-07 20:38:06 -0800243static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
244
245static u32 rt6_peer_genid(void)
246{
247 return atomic_read(&__rt6_peer_genid);
248}
249
David S. Millerb3419362010-11-30 12:27:11 -0800250void rt6_bind_peer(struct rt6_info *rt, int create)
251{
252 struct inet_peer *peer;
253
David S. Millerb3419362010-11-30 12:27:11 -0800254 peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
255 if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
256 inet_putpeer(peer);
David S. Miller6431cbc2011-02-07 20:38:06 -0800257 else
258 rt->rt6i_peer_genid = rt6_peer_genid();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700259}
260
261static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
262 int how)
263{
264 struct rt6_info *rt = (struct rt6_info *)dst;
265 struct inet6_dev *idev = rt->rt6i_idev;
Denis V. Lunev5a3e55d2007-12-07 00:38:10 -0800266 struct net_device *loopback_dev =
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900267 dev_net(dev)->loopback_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700268
Denis V. Lunev5a3e55d2007-12-07 00:38:10 -0800269 if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
270 struct inet6_dev *loopback_idev =
271 in6_dev_get(loopback_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700272 if (loopback_idev != NULL) {
273 rt->rt6i_idev = loopback_idev;
274 in6_dev_put(idev);
275 }
276 }
277}
278
279static __inline__ int rt6_check_expired(const struct rt6_info *rt)
280{
Eric Dumazeta02cec22010-09-22 20:43:57 +0000281 return (rt->rt6i_flags & RTF_EXPIRES) &&
282 time_after(jiffies, rt->rt6i_expires);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700283}
284
Thomas Grafc71099a2006-08-04 23:20:06 -0700285static inline int rt6_need_strict(struct in6_addr *daddr)
286{
Eric Dumazeta02cec22010-09-22 20:43:57 +0000287 return ipv6_addr_type(daddr) &
288 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
Thomas Grafc71099a2006-08-04 23:20:06 -0700289}
290
Linus Torvalds1da177e2005-04-16 15:20:36 -0700291/*
Thomas Grafc71099a2006-08-04 23:20:06 -0700292 * Route lookup. Any table->tb6_lock is implied.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700293 */
294
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800295static inline struct rt6_info *rt6_device_match(struct net *net,
296 struct rt6_info *rt,
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900297 struct in6_addr *saddr,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700298 int oif,
YOSHIFUJI Hideakid4208952008-06-27 20:14:54 -0700299 int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700300{
301 struct rt6_info *local = NULL;
302 struct rt6_info *sprt;
303
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900304 if (!oif && ipv6_addr_any(saddr))
305 goto out;
306
Changli Gaod8d1f302010-06-10 23:31:35 -0700307 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900308 struct net_device *dev = sprt->rt6i_dev;
309
310 if (oif) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700311 if (dev->ifindex == oif)
312 return sprt;
313 if (dev->flags & IFF_LOOPBACK) {
314 if (sprt->rt6i_idev == NULL ||
315 sprt->rt6i_idev->dev->ifindex != oif) {
YOSHIFUJI Hideakid4208952008-06-27 20:14:54 -0700316 if (flags & RT6_LOOKUP_F_IFACE && oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700317 continue;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900318 if (local && (!oif ||
Linus Torvalds1da177e2005-04-16 15:20:36 -0700319 local->rt6i_idev->dev->ifindex == oif))
320 continue;
321 }
322 local = sprt;
323 }
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900324 } else {
325 if (ipv6_chk_addr(net, saddr, dev,
326 flags & RT6_LOOKUP_F_IFACE))
327 return sprt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700328 }
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900329 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700330
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900331 if (oif) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700332 if (local)
333 return local;
334
YOSHIFUJI Hideakid4208952008-06-27 20:14:54 -0700335 if (flags & RT6_LOOKUP_F_IFACE)
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800336 return net->ipv6.ip6_null_entry;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700337 }
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900338out:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700339 return rt;
340}
341
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800342#ifdef CONFIG_IPV6_ROUTER_PREF
343static void rt6_probe(struct rt6_info *rt)
344{
345 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
346 /*
347 * Okay, this does not seem to be appropriate
348 * for now, however, we need to check if it
349 * is really so; aka Router Reachability Probing.
350 *
351 * Router Reachability Probe MUST be rate-limited
352 * to no more than one per minute.
353 */
354 if (!neigh || (neigh->nud_state & NUD_VALID))
355 return;
356 read_lock_bh(&neigh->lock);
357 if (!(neigh->nud_state & NUD_VALID) &&
YOSHIFUJI Hideaki52e16352006-03-20 17:05:47 -0800358 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800359 struct in6_addr mcaddr;
360 struct in6_addr *target;
361
362 neigh->updated = jiffies;
363 read_unlock_bh(&neigh->lock);
364
365 target = (struct in6_addr *)&neigh->primary_key;
366 addrconf_addr_solict_mult(target, &mcaddr);
367 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
368 } else
369 read_unlock_bh(&neigh->lock);
370}
371#else
372static inline void rt6_probe(struct rt6_info *rt)
373{
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800374}
375#endif
376
Linus Torvalds1da177e2005-04-16 15:20:36 -0700377/*
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800378 * Default Router Selection (RFC 2461 6.3.6)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700379 */
Dave Jonesb6f99a22007-03-22 12:27:49 -0700380static inline int rt6_check_dev(struct rt6_info *rt, int oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700381{
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800382 struct net_device *dev = rt->rt6i_dev;
David S. Miller161980f2007-04-06 11:42:27 -0700383 if (!oif || dev->ifindex == oif)
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800384 return 2;
David S. Miller161980f2007-04-06 11:42:27 -0700385 if ((dev->flags & IFF_LOOPBACK) &&
386 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
387 return 1;
388 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700389}
390
Dave Jonesb6f99a22007-03-22 12:27:49 -0700391static inline int rt6_check_neigh(struct rt6_info *rt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700392{
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800393 struct neighbour *neigh = rt->rt6i_nexthop;
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800394 int m;
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700395 if (rt->rt6i_flags & RTF_NONEXTHOP ||
396 !(rt->rt6i_flags & RTF_GATEWAY))
397 m = 1;
398 else if (neigh) {
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800399 read_lock_bh(&neigh->lock);
400 if (neigh->nud_state & NUD_VALID)
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700401 m = 2;
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800402#ifdef CONFIG_IPV6_ROUTER_PREF
403 else if (neigh->nud_state & NUD_FAILED)
404 m = 0;
405#endif
406 else
YOSHIFUJI Hideakiea73ee22006-11-06 09:45:44 -0800407 m = 1;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800408 read_unlock_bh(&neigh->lock);
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800409 } else
410 m = 0;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800411 return m;
412}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700413
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800414static int rt6_score_route(struct rt6_info *rt, int oif,
415 int strict)
416{
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700417 int m, n;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900418
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700419 m = rt6_check_dev(rt, oif);
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700420 if (!m && (strict & RT6_LOOKUP_F_IFACE))
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800421 return -1;
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -0800422#ifdef CONFIG_IPV6_ROUTER_PREF
423 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
424#endif
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700425 n = rt6_check_neigh(rt);
YOSHIFUJI Hideaki557e92e2006-11-06 09:45:45 -0800426 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800427 return -1;
428 return m;
429}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700430
David S. Millerf11e6652007-03-24 20:36:25 -0700431static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
432 int *mpri, struct rt6_info *match)
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800433{
David S. Millerf11e6652007-03-24 20:36:25 -0700434 int m;
435
436 if (rt6_check_expired(rt))
437 goto out;
438
439 m = rt6_score_route(rt, oif, strict);
440 if (m < 0)
441 goto out;
442
443 if (m > *mpri) {
444 if (strict & RT6_LOOKUP_F_REACHABLE)
445 rt6_probe(match);
446 *mpri = m;
447 match = rt;
448 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
449 rt6_probe(rt);
450 }
451
452out:
453 return match;
454}
455
456static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
457 struct rt6_info *rr_head,
458 u32 metric, int oif, int strict)
459{
460 struct rt6_info *rt, *match;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800461 int mpri = -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700462
David S. Millerf11e6652007-03-24 20:36:25 -0700463 match = NULL;
464 for (rt = rr_head; rt && rt->rt6i_metric == metric;
Changli Gaod8d1f302010-06-10 23:31:35 -0700465 rt = rt->dst.rt6_next)
David S. Millerf11e6652007-03-24 20:36:25 -0700466 match = find_match(rt, oif, strict, &mpri, match);
467 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
Changli Gaod8d1f302010-06-10 23:31:35 -0700468 rt = rt->dst.rt6_next)
David S. Millerf11e6652007-03-24 20:36:25 -0700469 match = find_match(rt, oif, strict, &mpri, match);
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800470
David S. Millerf11e6652007-03-24 20:36:25 -0700471 return match;
472}
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800473
David S. Millerf11e6652007-03-24 20:36:25 -0700474static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
475{
476 struct rt6_info *match, *rt0;
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800477 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700478
David S. Millerf11e6652007-03-24 20:36:25 -0700479 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
Harvey Harrison0dc47872008-03-05 20:47:47 -0800480 __func__, fn->leaf, oif);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700481
David S. Millerf11e6652007-03-24 20:36:25 -0700482 rt0 = fn->rr_ptr;
483 if (!rt0)
484 fn->rr_ptr = rt0 = fn->leaf;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700485
David S. Millerf11e6652007-03-24 20:36:25 -0700486 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700487
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800488 if (!match &&
David S. Millerf11e6652007-03-24 20:36:25 -0700489 (strict & RT6_LOOKUP_F_REACHABLE)) {
Changli Gaod8d1f302010-06-10 23:31:35 -0700490 struct rt6_info *next = rt0->dst.rt6_next;
David S. Millerf11e6652007-03-24 20:36:25 -0700491
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800492 /* no entries matched; do round-robin */
David S. Millerf11e6652007-03-24 20:36:25 -0700493 if (!next || next->rt6i_metric != rt0->rt6i_metric)
494 next = fn->leaf;
495
496 if (next != rt0)
497 fn->rr_ptr = next;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700498 }
499
David S. Millerf11e6652007-03-24 20:36:25 -0700500 RT6_TRACE("%s() => %p\n",
Harvey Harrison0dc47872008-03-05 20:47:47 -0800501 __func__, match);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700502
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900503 net = dev_net(rt0->rt6i_dev);
Eric Dumazeta02cec22010-09-22 20:43:57 +0000504 return match ? match : net->ipv6.ip6_null_entry;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700505}
506
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800507#ifdef CONFIG_IPV6_ROUTE_INFO
508int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
509 struct in6_addr *gwaddr)
510{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900511 struct net *net = dev_net(dev);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800512 struct route_info *rinfo = (struct route_info *) opt;
513 struct in6_addr prefix_buf, *prefix;
514 unsigned int pref;
YOSHIFUJI Hideaki4bed72e2008-05-27 17:37:49 +0900515 unsigned long lifetime;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800516 struct rt6_info *rt;
517
518 if (len < sizeof(struct route_info)) {
519 return -EINVAL;
520 }
521
522 /* Sanity check for prefix_len and length */
523 if (rinfo->length > 3) {
524 return -EINVAL;
525 } else if (rinfo->prefix_len > 128) {
526 return -EINVAL;
527 } else if (rinfo->prefix_len > 64) {
528 if (rinfo->length < 2) {
529 return -EINVAL;
530 }
531 } else if (rinfo->prefix_len > 0) {
532 if (rinfo->length < 1) {
533 return -EINVAL;
534 }
535 }
536
537 pref = rinfo->route_pref;
538 if (pref == ICMPV6_ROUTER_PREF_INVALID)
Jens Rosenboom3933fc92009-09-10 06:25:11 +0000539 return -EINVAL;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800540
YOSHIFUJI Hideaki4bed72e2008-05-27 17:37:49 +0900541 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800542
543 if (rinfo->length == 3)
544 prefix = (struct in6_addr *)rinfo->prefix;
545 else {
546 /* this function is safe */
547 ipv6_addr_prefix(&prefix_buf,
548 (struct in6_addr *)rinfo->prefix,
549 rinfo->prefix_len);
550 prefix = &prefix_buf;
551 }
552
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -0800553 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
554 dev->ifindex);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800555
556 if (rt && !lifetime) {
Thomas Grafe0a1ad732006-08-22 00:00:21 -0700557 ip6_del_rt(rt);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800558 rt = NULL;
559 }
560
561 if (!rt && lifetime)
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -0800562 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800563 pref);
564 else if (rt)
565 rt->rt6i_flags = RTF_ROUTEINFO |
566 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
567
568 if (rt) {
YOSHIFUJI Hideaki4bed72e2008-05-27 17:37:49 +0900569 if (!addrconf_finite_timeout(lifetime)) {
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800570 rt->rt6i_flags &= ~RTF_EXPIRES;
571 } else {
572 rt->rt6i_expires = jiffies + HZ * lifetime;
573 rt->rt6i_flags |= RTF_EXPIRES;
574 }
Changli Gaod8d1f302010-06-10 23:31:35 -0700575 dst_release(&rt->dst);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800576 }
577 return 0;
578}
579#endif
580
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800581#define BACKTRACK(__net, saddr) \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700582do { \
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800583 if (rt == __net->ipv6.ip6_null_entry) { \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700584 struct fib6_node *pn; \
Ville Nuorvalae0eda7b2006-10-16 22:11:11 -0700585 while (1) { \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700586 if (fn->fn_flags & RTN_TL_ROOT) \
587 goto out; \
588 pn = fn->parent; \
589 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
Kim Nordlund8bce65b2006-12-13 16:38:29 -0800590 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700591 else \
592 fn = pn; \
593 if (fn->fn_flags & RTN_RTINFO) \
594 goto restart; \
Thomas Grafc71099a2006-08-04 23:20:06 -0700595 } \
Thomas Grafc71099a2006-08-04 23:20:06 -0700596 } \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700597} while(0)
Thomas Grafc71099a2006-08-04 23:20:06 -0700598
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800599static struct rt6_info *ip6_pol_route_lookup(struct net *net,
600 struct fib6_table *table,
Thomas Grafc71099a2006-08-04 23:20:06 -0700601 struct flowi *fl, int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700602{
603 struct fib6_node *fn;
604 struct rt6_info *rt;
605
Thomas Grafc71099a2006-08-04 23:20:06 -0700606 read_lock_bh(&table->tb6_lock);
607 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
608restart:
609 rt = fn->leaf;
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900610 rt = rt6_device_match(net, rt, &fl->fl6_src, fl->oif, flags);
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800611 BACKTRACK(net, &fl->fl6_src);
Thomas Grafc71099a2006-08-04 23:20:06 -0700612out:
Changli Gaod8d1f302010-06-10 23:31:35 -0700613 dst_use(&rt->dst, jiffies);
Thomas Grafc71099a2006-08-04 23:20:06 -0700614 read_unlock_bh(&table->tb6_lock);
Thomas Grafc71099a2006-08-04 23:20:06 -0700615 return rt;
616
617}
618
YOSHIFUJI Hideaki9acd9f32008-04-10 15:42:10 +0900619struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
620 const struct in6_addr *saddr, int oif, int strict)
Thomas Grafc71099a2006-08-04 23:20:06 -0700621{
622 struct flowi fl = {
623 .oif = oif,
Changli Gao58116622010-11-12 18:43:55 +0000624 .fl6_dst = *daddr,
Thomas Grafc71099a2006-08-04 23:20:06 -0700625 };
626 struct dst_entry *dst;
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700627 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
Thomas Grafc71099a2006-08-04 23:20:06 -0700628
Thomas Grafadaa70b2006-10-13 15:01:03 -0700629 if (saddr) {
630 memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
631 flags |= RT6_LOOKUP_F_HAS_SADDR;
632 }
633
Daniel Lezcano606a2b42008-03-04 13:45:59 -0800634 dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_lookup);
Thomas Grafc71099a2006-08-04 23:20:06 -0700635 if (dst->error == 0)
636 return (struct rt6_info *) dst;
637
638 dst_release(dst);
639
Linus Torvalds1da177e2005-04-16 15:20:36 -0700640 return NULL;
641}
642
YOSHIFUJI Hideaki71590392007-02-22 22:05:40 +0900643EXPORT_SYMBOL(rt6_lookup);
644
Thomas Grafc71099a2006-08-04 23:20:06 -0700645/* ip6_ins_rt is called with FREE table->tb6_lock.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700646 It takes new route entry, the addition fails by any reason the
647 route is freed. In any case, if caller does not hold it, it may
648 be destroyed.
649 */
650
Thomas Graf86872cb2006-08-22 00:01:08 -0700651static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700652{
653 int err;
Thomas Grafc71099a2006-08-04 23:20:06 -0700654 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700655
Thomas Grafc71099a2006-08-04 23:20:06 -0700656 table = rt->rt6i_table;
657 write_lock_bh(&table->tb6_lock);
Thomas Graf86872cb2006-08-22 00:01:08 -0700658 err = fib6_add(&table->tb6_root, rt, info);
Thomas Grafc71099a2006-08-04 23:20:06 -0700659 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700660
661 return err;
662}
663
Thomas Graf40e22e82006-08-22 00:00:45 -0700664int ip6_ins_rt(struct rt6_info *rt)
665{
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800666 struct nl_info info = {
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900667 .nl_net = dev_net(rt->rt6i_dev),
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800668 };
Denis V. Lunev528c4ce2007-12-13 09:45:12 -0800669 return __ip6_ins_rt(rt, &info);
Thomas Graf40e22e82006-08-22 00:00:45 -0700670}
671
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800672static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
673 struct in6_addr *saddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700674{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700675 struct rt6_info *rt;
676
677 /*
678 * Clone the route.
679 */
680
681 rt = ip6_rt_copy(ort);
682
683 if (rt) {
David S. Miller14deae42009-01-04 16:04:39 -0800684 struct neighbour *neigh;
685 int attempts = !in_softirq();
686
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900687 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
688 if (rt->rt6i_dst.plen != 128 &&
689 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
690 rt->rt6i_flags |= RTF_ANYCAST;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700691 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900692 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700693
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900694 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700695 rt->rt6i_dst.plen = 128;
696 rt->rt6i_flags |= RTF_CACHE;
Changli Gaod8d1f302010-06-10 23:31:35 -0700697 rt->dst.flags |= DST_HOST;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700698
699#ifdef CONFIG_IPV6_SUBTREES
700 if (rt->rt6i_src.plen && saddr) {
701 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
702 rt->rt6i_src.plen = 128;
703 }
704#endif
705
David S. Miller14deae42009-01-04 16:04:39 -0800706 retry:
707 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
708 if (IS_ERR(neigh)) {
709 struct net *net = dev_net(rt->rt6i_dev);
710 int saved_rt_min_interval =
711 net->ipv6.sysctl.ip6_rt_gc_min_interval;
712 int saved_rt_elasticity =
713 net->ipv6.sysctl.ip6_rt_gc_elasticity;
714
715 if (attempts-- > 0) {
716 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
717 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
718
Alexey Dobriyan86393e52009-08-29 01:34:49 +0000719 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
David S. Miller14deae42009-01-04 16:04:39 -0800720
721 net->ipv6.sysctl.ip6_rt_gc_elasticity =
722 saved_rt_elasticity;
723 net->ipv6.sysctl.ip6_rt_gc_min_interval =
724 saved_rt_min_interval;
725 goto retry;
726 }
727
728 if (net_ratelimit())
729 printk(KERN_WARNING
Ulrich Weber7e1b33e2010-09-27 15:02:18 -0700730 "ipv6: Neighbour table overflow.\n");
Changli Gaod8d1f302010-06-10 23:31:35 -0700731 dst_free(&rt->dst);
David S. Miller14deae42009-01-04 16:04:39 -0800732 return NULL;
733 }
734 rt->rt6i_nexthop = neigh;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700735
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800736 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700737
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800738 return rt;
739}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700740
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800741static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
742{
743 struct rt6_info *rt = ip6_rt_copy(ort);
744 if (rt) {
745 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
746 rt->rt6i_dst.plen = 128;
747 rt->rt6i_flags |= RTF_CACHE;
Changli Gaod8d1f302010-06-10 23:31:35 -0700748 rt->dst.flags |= DST_HOST;
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800749 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
750 }
751 return rt;
752}
753
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800754static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
755 struct flowi *fl, int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700756{
757 struct fib6_node *fn;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800758 struct rt6_info *rt, *nrt;
Thomas Grafc71099a2006-08-04 23:20:06 -0700759 int strict = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700760 int attempts = 3;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800761 int err;
YOSHIFUJI Hideaki53b79972008-07-19 22:35:03 -0700762 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700763
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700764 strict |= flags & RT6_LOOKUP_F_IFACE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700765
766relookup:
Thomas Grafc71099a2006-08-04 23:20:06 -0700767 read_lock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700768
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800769restart_2:
Thomas Grafc71099a2006-08-04 23:20:06 -0700770 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700771
772restart:
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700773 rt = rt6_select(fn, oif, strict | reachable);
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800774
775 BACKTRACK(net, &fl->fl6_src);
776 if (rt == net->ipv6.ip6_null_entry ||
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800777 rt->rt6i_flags & RTF_CACHE)
YOSHIFUJI Hideaki1ddef042006-03-20 17:01:24 -0800778 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700779
Changli Gaod8d1f302010-06-10 23:31:35 -0700780 dst_hold(&rt->dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700781 read_unlock_bh(&table->tb6_lock);
YOSHIFUJI Hideakifb9de912006-03-20 16:59:08 -0800782
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800783 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
YOSHIFUJI Hideakie40cf352006-03-20 16:59:27 -0800784 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
David S. Millerd80bc0f2011-01-24 16:01:58 -0800785 else
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800786 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
YOSHIFUJI Hideakie40cf352006-03-20 16:59:27 -0800787
Changli Gaod8d1f302010-06-10 23:31:35 -0700788 dst_release(&rt->dst);
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800789 rt = nrt ? : net->ipv6.ip6_null_entry;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800790
Changli Gaod8d1f302010-06-10 23:31:35 -0700791 dst_hold(&rt->dst);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800792 if (nrt) {
Thomas Graf40e22e82006-08-22 00:00:45 -0700793 err = ip6_ins_rt(nrt);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800794 if (!err)
795 goto out2;
796 }
797
798 if (--attempts <= 0)
799 goto out2;
800
801 /*
Thomas Grafc71099a2006-08-04 23:20:06 -0700802 * Race condition! In the gap, when table->tb6_lock was
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800803 * released someone could insert this route. Relookup.
804 */
Changli Gaod8d1f302010-06-10 23:31:35 -0700805 dst_release(&rt->dst);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800806 goto relookup;
807
808out:
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800809 if (reachable) {
810 reachable = 0;
811 goto restart_2;
812 }
Changli Gaod8d1f302010-06-10 23:31:35 -0700813 dst_hold(&rt->dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700814 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700815out2:
Changli Gaod8d1f302010-06-10 23:31:35 -0700816 rt->dst.lastuse = jiffies;
817 rt->dst.__use++;
Thomas Grafc71099a2006-08-04 23:20:06 -0700818
819 return rt;
820}
821
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800822static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700823 struct flowi *fl, int flags)
824{
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800825 return ip6_pol_route(net, table, fl->iif, fl, flags);
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700826}
827
Thomas Grafc71099a2006-08-04 23:20:06 -0700828void ip6_route_input(struct sk_buff *skb)
829{
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -0700830 struct ipv6hdr *iph = ipv6_hdr(skb);
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900831 struct net *net = dev_net(skb->dev);
Thomas Grafadaa70b2006-10-13 15:01:03 -0700832 int flags = RT6_LOOKUP_F_HAS_SADDR;
Thomas Grafc71099a2006-08-04 23:20:06 -0700833 struct flowi fl = {
834 .iif = skb->dev->ifindex,
Changli Gao58116622010-11-12 18:43:55 +0000835 .fl6_dst = iph->daddr,
836 .fl6_src = iph->saddr,
837 .fl6_flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900838 .mark = skb->mark,
Thomas Grafc71099a2006-08-04 23:20:06 -0700839 .proto = iph->nexthdr,
840 };
Thomas Grafadaa70b2006-10-13 15:01:03 -0700841
Thomas Goff1d6e55f2009-01-27 22:39:59 -0800842 if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
Thomas Grafadaa70b2006-10-13 15:01:03 -0700843 flags |= RT6_LOOKUP_F_IFACE;
Thomas Grafc71099a2006-08-04 23:20:06 -0700844
Eric Dumazetadf30902009-06-02 05:19:30 +0000845 skb_dst_set(skb, fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input));
Thomas Grafc71099a2006-08-04 23:20:06 -0700846}
847
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800848static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
Thomas Grafc71099a2006-08-04 23:20:06 -0700849 struct flowi *fl, int flags)
850{
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800851 return ip6_pol_route(net, table, fl->oif, fl, flags);
Thomas Grafc71099a2006-08-04 23:20:06 -0700852}
853
Daniel Lezcano4591db42008-03-05 10:48:10 -0800854struct dst_entry * ip6_route_output(struct net *net, struct sock *sk,
855 struct flowi *fl)
Thomas Grafc71099a2006-08-04 23:20:06 -0700856{
857 int flags = 0;
858
Brian Haley6057fd72010-05-28 23:02:35 -0700859 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl->fl6_dst))
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700860 flags |= RT6_LOOKUP_F_IFACE;
Thomas Grafc71099a2006-08-04 23:20:06 -0700861
Thomas Grafadaa70b2006-10-13 15:01:03 -0700862 if (!ipv6_addr_any(&fl->fl6_src))
863 flags |= RT6_LOOKUP_F_HAS_SADDR;
YOSHIFUJI Hideaki / 吉藤英明0c9a2ac2010-03-07 00:14:44 +0000864 else if (sk)
865 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
Thomas Grafadaa70b2006-10-13 15:01:03 -0700866
Daniel Lezcano4591db42008-03-05 10:48:10 -0800867 return fib6_rule_lookup(net, fl, flags, ip6_pol_route_output);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700868}
869
YOSHIFUJI Hideaki71590392007-02-22 22:05:40 +0900870EXPORT_SYMBOL(ip6_route_output);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700871
David S. Miller14e50e52007-05-24 18:17:54 -0700872int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
873{
874 struct rt6_info *ort = (struct rt6_info *) *dstp;
875 struct rt6_info *rt = (struct rt6_info *)
David S. Miller3c7bd1a2011-02-16 14:08:44 -0800876 dst_alloc(&ip6_dst_blackhole_ops, 1);
David S. Miller14e50e52007-05-24 18:17:54 -0700877 struct dst_entry *new = NULL;
878
879 if (rt) {
Changli Gaod8d1f302010-06-10 23:31:35 -0700880 new = &rt->dst;
David S. Miller14e50e52007-05-24 18:17:54 -0700881
David S. Miller14e50e52007-05-24 18:17:54 -0700882 new->__use = 1;
Herbert Xu352e5122007-11-13 21:34:06 -0800883 new->input = dst_discard;
884 new->output = dst_discard;
David S. Miller14e50e52007-05-24 18:17:54 -0700885
David S. Millerdefb3512010-12-08 21:16:57 -0800886 dst_copy_metrics(new, &ort->dst);
Changli Gaod8d1f302010-06-10 23:31:35 -0700887 new->dev = ort->dst.dev;
David S. Miller14e50e52007-05-24 18:17:54 -0700888 if (new->dev)
889 dev_hold(new->dev);
890 rt->rt6i_idev = ort->rt6i_idev;
891 if (rt->rt6i_idev)
892 in6_dev_hold(rt->rt6i_idev);
893 rt->rt6i_expires = 0;
894
895 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
896 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
897 rt->rt6i_metric = 0;
898
899 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
900#ifdef CONFIG_IPV6_SUBTREES
901 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
902#endif
903
904 dst_free(new);
905 }
906
907 dst_release(*dstp);
908 *dstp = new;
Eric Dumazeta02cec22010-09-22 20:43:57 +0000909 return new ? 0 : -ENOMEM;
David S. Miller14e50e52007-05-24 18:17:54 -0700910}
911EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
912
Linus Torvalds1da177e2005-04-16 15:20:36 -0700913/*
914 * Destination cache support functions
915 */
916
917static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
918{
919 struct rt6_info *rt;
920
921 rt = (struct rt6_info *) dst;
922
David S. Miller6431cbc2011-02-07 20:38:06 -0800923 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
924 if (rt->rt6i_peer_genid != rt6_peer_genid()) {
925 if (!rt->rt6i_peer)
926 rt6_bind_peer(rt, 0);
927 rt->rt6i_peer_genid = rt6_peer_genid();
928 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700929 return dst;
David S. Miller6431cbc2011-02-07 20:38:06 -0800930 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700931 return NULL;
932}
933
934static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
935{
936 struct rt6_info *rt = (struct rt6_info *) dst;
937
938 if (rt) {
YOSHIFUJI Hideaki / 吉藤英明54c1a852010-03-28 07:15:45 +0000939 if (rt->rt6i_flags & RTF_CACHE) {
940 if (rt6_check_expired(rt)) {
941 ip6_del_rt(rt);
942 dst = NULL;
943 }
944 } else {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700945 dst_release(dst);
YOSHIFUJI Hideaki / 吉藤英明54c1a852010-03-28 07:15:45 +0000946 dst = NULL;
947 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700948 }
YOSHIFUJI Hideaki / 吉藤英明54c1a852010-03-28 07:15:45 +0000949 return dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700950}
951
952static void ip6_link_failure(struct sk_buff *skb)
953{
954 struct rt6_info *rt;
955
Alexey Dobriyan3ffe5332010-02-18 08:25:24 +0000956 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700957
Eric Dumazetadf30902009-06-02 05:19:30 +0000958 rt = (struct rt6_info *) skb_dst(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700959 if (rt) {
960 if (rt->rt6i_flags&RTF_CACHE) {
Changli Gaod8d1f302010-06-10 23:31:35 -0700961 dst_set_expires(&rt->dst, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700962 rt->rt6i_flags |= RTF_EXPIRES;
963 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
964 rt->rt6i_node->fn_sernum = -1;
965 }
966}
967
968static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
969{
970 struct rt6_info *rt6 = (struct rt6_info*)dst;
971
972 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
973 rt6->rt6i_flags |= RTF_MODIFIED;
974 if (mtu < IPV6_MIN_MTU) {
David S. Millerdefb3512010-12-08 21:16:57 -0800975 u32 features = dst_metric(dst, RTAX_FEATURES);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700976 mtu = IPV6_MIN_MTU;
David S. Millerdefb3512010-12-08 21:16:57 -0800977 features |= RTAX_FEATURE_ALLFRAG;
978 dst_metric_set(dst, RTAX_FEATURES, features);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700979 }
David S. Millerdefb3512010-12-08 21:16:57 -0800980 dst_metric_set(dst, RTAX_MTU, mtu);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700981 }
982}
983
David S. Miller0dbaee32010-12-13 12:52:14 -0800984static unsigned int ip6_default_advmss(const struct dst_entry *dst)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700985{
David S. Miller0dbaee32010-12-13 12:52:14 -0800986 struct net_device *dev = dst->dev;
987 unsigned int mtu = dst_mtu(dst);
988 struct net *net = dev_net(dev);
989
Linus Torvalds1da177e2005-04-16 15:20:36 -0700990 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
991
Daniel Lezcano55786892008-03-04 13:47:47 -0800992 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
993 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700994
995 /*
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900996 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
997 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
998 * IPV6_MAXPLEN is also valid and means: "any MSS,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700999 * rely only on pmtu discovery"
1000 */
1001 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1002 mtu = IPV6_MAXPLEN;
1003 return mtu;
1004}
1005
David S. Millerd33e4552010-12-14 13:01:14 -08001006static unsigned int ip6_default_mtu(const struct dst_entry *dst)
1007{
1008 unsigned int mtu = IPV6_MIN_MTU;
1009 struct inet6_dev *idev;
1010
1011 rcu_read_lock();
1012 idev = __in6_dev_get(dst->dev);
1013 if (idev)
1014 mtu = idev->cnf.mtu6;
1015 rcu_read_unlock();
1016
1017 return mtu;
1018}
1019
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001020static struct dst_entry *icmp6_dst_gc_list;
1021static DEFINE_SPINLOCK(icmp6_dst_lock);
Thomas Graf5d0bbee2006-08-04 03:37:36 -07001022
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001023struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001024 struct neighbour *neigh,
YOSHIFUJI Hideaki9acd9f32008-04-10 15:42:10 +09001025 const struct in6_addr *addr)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001026{
1027 struct rt6_info *rt;
1028 struct inet6_dev *idev = in6_dev_get(dev);
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001029 struct net *net = dev_net(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001030
1031 if (unlikely(idev == NULL))
1032 return NULL;
1033
Alexey Dobriyan86393e52009-08-29 01:34:49 +00001034 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001035 if (unlikely(rt == NULL)) {
1036 in6_dev_put(idev);
1037 goto out;
1038 }
1039
1040 dev_hold(dev);
1041 if (neigh)
1042 neigh_hold(neigh);
David S. Miller14deae42009-01-04 16:04:39 -08001043 else {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001044 neigh = ndisc_get_neigh(dev, addr);
David S. Miller14deae42009-01-04 16:04:39 -08001045 if (IS_ERR(neigh))
1046 neigh = NULL;
1047 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001048
1049 rt->rt6i_dev = dev;
1050 rt->rt6i_idev = idev;
1051 rt->rt6i_nexthop = neigh;
Changli Gaod8d1f302010-06-10 23:31:35 -07001052 atomic_set(&rt->dst.__refcnt, 1);
David S. Millerdefb3512010-12-08 21:16:57 -08001053 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
Changli Gaod8d1f302010-06-10 23:31:35 -07001054 rt->dst.output = ip6_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001055
1056#if 0 /* there's no chance to use these for ndisc */
Changli Gaod8d1f302010-06-10 23:31:35 -07001057 rt->dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001058 ? DST_HOST
Linus Torvalds1da177e2005-04-16 15:20:36 -07001059 : 0;
1060 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1061 rt->rt6i_dst.plen = 128;
1062#endif
1063
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001064 spin_lock_bh(&icmp6_dst_lock);
Changli Gaod8d1f302010-06-10 23:31:35 -07001065 rt->dst.next = icmp6_dst_gc_list;
1066 icmp6_dst_gc_list = &rt->dst;
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001067 spin_unlock_bh(&icmp6_dst_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001068
Daniel Lezcano55786892008-03-04 13:47:47 -08001069 fib6_force_start_gc(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001070
1071out:
Changli Gaod8d1f302010-06-10 23:31:35 -07001072 return &rt->dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001073}
1074
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -07001075int icmp6_dst_gc(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001076{
1077 struct dst_entry *dst, *next, **pprev;
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -07001078 int more = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001079
1080 next = NULL;
Thomas Graf5d0bbee2006-08-04 03:37:36 -07001081
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001082 spin_lock_bh(&icmp6_dst_lock);
1083 pprev = &icmp6_dst_gc_list;
Thomas Graf5d0bbee2006-08-04 03:37:36 -07001084
Linus Torvalds1da177e2005-04-16 15:20:36 -07001085 while ((dst = *pprev) != NULL) {
1086 if (!atomic_read(&dst->__refcnt)) {
1087 *pprev = dst->next;
1088 dst_free(dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001089 } else {
1090 pprev = &dst->next;
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -07001091 ++more;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001092 }
1093 }
1094
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001095 spin_unlock_bh(&icmp6_dst_lock);
Thomas Graf5d0bbee2006-08-04 03:37:36 -07001096
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -07001097 return more;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001098}
1099
David S. Miller1e493d12008-09-10 17:27:15 -07001100static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1101 void *arg)
1102{
1103 struct dst_entry *dst, **pprev;
1104
1105 spin_lock_bh(&icmp6_dst_lock);
1106 pprev = &icmp6_dst_gc_list;
1107 while ((dst = *pprev) != NULL) {
1108 struct rt6_info *rt = (struct rt6_info *) dst;
1109 if (func(rt, arg)) {
1110 *pprev = dst->next;
1111 dst_free(dst);
1112 } else {
1113 pprev = &dst->next;
1114 }
1115 }
1116 spin_unlock_bh(&icmp6_dst_lock);
1117}
1118
Daniel Lezcano569d3642008-01-18 03:56:57 -08001119static int ip6_dst_gc(struct dst_ops *ops)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001120{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001121 unsigned long now = jiffies;
Alexey Dobriyan86393e52009-08-29 01:34:49 +00001122 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
Daniel Lezcano7019b782008-03-04 13:50:14 -08001123 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1124 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1125 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1126 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1127 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
Eric Dumazetfc66f952010-10-08 06:37:34 +00001128 int entries;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001129
Eric Dumazetfc66f952010-10-08 06:37:34 +00001130 entries = dst_entries_get_fast(ops);
Daniel Lezcano7019b782008-03-04 13:50:14 -08001131 if (time_after(rt_last_gc + rt_min_interval, now) &&
Eric Dumazetfc66f952010-10-08 06:37:34 +00001132 entries <= rt_max_size)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001133 goto out;
1134
Benjamin Thery6891a342008-03-04 13:49:47 -08001135 net->ipv6.ip6_rt_gc_expire++;
1136 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1137 net->ipv6.ip6_rt_last_gc = now;
Eric Dumazetfc66f952010-10-08 06:37:34 +00001138 entries = dst_entries_get_slow(ops);
1139 if (entries < ops->gc_thresh)
Daniel Lezcano7019b782008-03-04 13:50:14 -08001140 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001141out:
Daniel Lezcano7019b782008-03-04 13:50:14 -08001142 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
Eric Dumazetfc66f952010-10-08 06:37:34 +00001143 return entries > rt_max_size;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001144}
1145
1146/* Clean host part of a prefix. Not necessary in radix tree,
1147 but results in cleaner routing tables.
1148
1149 Remove it only when all the things will work!
1150 */
1151
YOSHIFUJI Hideaki6b75d092008-03-10 06:00:30 -04001152int ip6_dst_hoplimit(struct dst_entry *dst)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001153{
David S. Miller5170ae82010-12-12 21:35:57 -08001154 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
David S. Millera02e4b72010-12-12 21:39:02 -08001155 if (hoplimit == 0) {
YOSHIFUJI Hideaki6b75d092008-03-10 06:00:30 -04001156 struct net_device *dev = dst->dev;
Eric Dumazetc68f24c2010-06-14 04:46:20 +00001157 struct inet6_dev *idev;
1158
1159 rcu_read_lock();
1160 idev = __in6_dev_get(dev);
1161 if (idev)
YOSHIFUJI Hideaki6b75d092008-03-10 06:00:30 -04001162 hoplimit = idev->cnf.hop_limit;
Eric Dumazetc68f24c2010-06-14 04:46:20 +00001163 else
YOSHIFUJI Hideaki53b79972008-07-19 22:35:03 -07001164 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
Eric Dumazetc68f24c2010-06-14 04:46:20 +00001165 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001166 }
1167 return hoplimit;
1168}
David S. Millerabbf46a2010-12-12 21:14:46 -08001169EXPORT_SYMBOL(ip6_dst_hoplimit);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001170
1171/*
1172 *
1173 */
1174
Thomas Graf86872cb2006-08-22 00:01:08 -07001175int ip6_route_add(struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001176{
1177 int err;
Daniel Lezcano55786892008-03-04 13:47:47 -08001178 struct net *net = cfg->fc_nlinfo.nl_net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001179 struct rt6_info *rt = NULL;
1180 struct net_device *dev = NULL;
1181 struct inet6_dev *idev = NULL;
Thomas Grafc71099a2006-08-04 23:20:06 -07001182 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001183 int addr_type;
1184
Thomas Graf86872cb2006-08-22 00:01:08 -07001185 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001186 return -EINVAL;
1187#ifndef CONFIG_IPV6_SUBTREES
Thomas Graf86872cb2006-08-22 00:01:08 -07001188 if (cfg->fc_src_len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001189 return -EINVAL;
1190#endif
Thomas Graf86872cb2006-08-22 00:01:08 -07001191 if (cfg->fc_ifindex) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001192 err = -ENODEV;
Daniel Lezcano55786892008-03-04 13:47:47 -08001193 dev = dev_get_by_index(net, cfg->fc_ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001194 if (!dev)
1195 goto out;
1196 idev = in6_dev_get(dev);
1197 if (!idev)
1198 goto out;
1199 }
1200
Thomas Graf86872cb2006-08-22 00:01:08 -07001201 if (cfg->fc_metric == 0)
1202 cfg->fc_metric = IP6_RT_PRIO_USER;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001203
Daniel Lezcano55786892008-03-04 13:47:47 -08001204 table = fib6_new_table(net, cfg->fc_table);
Thomas Grafc71099a2006-08-04 23:20:06 -07001205 if (table == NULL) {
1206 err = -ENOBUFS;
1207 goto out;
1208 }
1209
Alexey Dobriyan86393e52009-08-29 01:34:49 +00001210 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001211
1212 if (rt == NULL) {
1213 err = -ENOMEM;
1214 goto out;
1215 }
1216
Changli Gaod8d1f302010-06-10 23:31:35 -07001217 rt->dst.obsolete = -1;
YOSHIFUJI Hideaki6f704992008-05-19 16:56:11 -07001218 rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
1219 jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1220 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001221
Thomas Graf86872cb2006-08-22 00:01:08 -07001222 if (cfg->fc_protocol == RTPROT_UNSPEC)
1223 cfg->fc_protocol = RTPROT_BOOT;
1224 rt->rt6i_protocol = cfg->fc_protocol;
1225
1226 addr_type = ipv6_addr_type(&cfg->fc_dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001227
1228 if (addr_type & IPV6_ADDR_MULTICAST)
Changli Gaod8d1f302010-06-10 23:31:35 -07001229 rt->dst.input = ip6_mc_input;
Maciej Żenczykowskiab79ad12010-09-27 00:07:02 +00001230 else if (cfg->fc_flags & RTF_LOCAL)
1231 rt->dst.input = ip6_input;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001232 else
Changli Gaod8d1f302010-06-10 23:31:35 -07001233 rt->dst.input = ip6_forward;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001234
Changli Gaod8d1f302010-06-10 23:31:35 -07001235 rt->dst.output = ip6_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001236
Thomas Graf86872cb2006-08-22 00:01:08 -07001237 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1238 rt->rt6i_dst.plen = cfg->fc_dst_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001239 if (rt->rt6i_dst.plen == 128)
Changli Gaod8d1f302010-06-10 23:31:35 -07001240 rt->dst.flags = DST_HOST;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001241
1242#ifdef CONFIG_IPV6_SUBTREES
Thomas Graf86872cb2006-08-22 00:01:08 -07001243 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1244 rt->rt6i_src.plen = cfg->fc_src_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001245#endif
1246
Thomas Graf86872cb2006-08-22 00:01:08 -07001247 rt->rt6i_metric = cfg->fc_metric;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001248
1249 /* We cannot add true routes via loopback here,
1250 they would result in kernel looping; promote them to reject routes
1251 */
Thomas Graf86872cb2006-08-22 00:01:08 -07001252 if ((cfg->fc_flags & RTF_REJECT) ||
Maciej Żenczykowskiab79ad12010-09-27 00:07:02 +00001253 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK)
1254 && !(cfg->fc_flags&RTF_LOCAL))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001255 /* hold loopback dev/idev if we haven't done so. */
Daniel Lezcano55786892008-03-04 13:47:47 -08001256 if (dev != net->loopback_dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001257 if (dev) {
1258 dev_put(dev);
1259 in6_dev_put(idev);
1260 }
Daniel Lezcano55786892008-03-04 13:47:47 -08001261 dev = net->loopback_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001262 dev_hold(dev);
1263 idev = in6_dev_get(dev);
1264 if (!idev) {
1265 err = -ENODEV;
1266 goto out;
1267 }
1268 }
Changli Gaod8d1f302010-06-10 23:31:35 -07001269 rt->dst.output = ip6_pkt_discard_out;
1270 rt->dst.input = ip6_pkt_discard;
1271 rt->dst.error = -ENETUNREACH;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001272 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1273 goto install_route;
1274 }
1275
Thomas Graf86872cb2006-08-22 00:01:08 -07001276 if (cfg->fc_flags & RTF_GATEWAY) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001277 struct in6_addr *gw_addr;
1278 int gwa_type;
1279
Thomas Graf86872cb2006-08-22 00:01:08 -07001280 gw_addr = &cfg->fc_gateway;
1281 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001282 gwa_type = ipv6_addr_type(gw_addr);
1283
1284 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1285 struct rt6_info *grt;
1286
1287 /* IPv6 strictly inhibits using not link-local
1288 addresses as nexthop address.
1289 Otherwise, router will not able to send redirects.
1290 It is very good, but in some (rare!) circumstances
1291 (SIT, PtP, NBMA NOARP links) it is handy to allow
1292 some exceptions. --ANK
1293 */
1294 err = -EINVAL;
1295 if (!(gwa_type&IPV6_ADDR_UNICAST))
1296 goto out;
1297
Daniel Lezcano55786892008-03-04 13:47:47 -08001298 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001299
1300 err = -EHOSTUNREACH;
1301 if (grt == NULL)
1302 goto out;
1303 if (dev) {
1304 if (dev != grt->rt6i_dev) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001305 dst_release(&grt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001306 goto out;
1307 }
1308 } else {
1309 dev = grt->rt6i_dev;
1310 idev = grt->rt6i_idev;
1311 dev_hold(dev);
1312 in6_dev_hold(grt->rt6i_idev);
1313 }
1314 if (!(grt->rt6i_flags&RTF_GATEWAY))
1315 err = 0;
Changli Gaod8d1f302010-06-10 23:31:35 -07001316 dst_release(&grt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001317
1318 if (err)
1319 goto out;
1320 }
1321 err = -EINVAL;
1322 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1323 goto out;
1324 }
1325
1326 err = -ENODEV;
1327 if (dev == NULL)
1328 goto out;
1329
Thomas Graf86872cb2006-08-22 00:01:08 -07001330 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001331 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1332 if (IS_ERR(rt->rt6i_nexthop)) {
1333 err = PTR_ERR(rt->rt6i_nexthop);
1334 rt->rt6i_nexthop = NULL;
1335 goto out;
1336 }
1337 }
1338
Thomas Graf86872cb2006-08-22 00:01:08 -07001339 rt->rt6i_flags = cfg->fc_flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001340
1341install_route:
Thomas Graf86872cb2006-08-22 00:01:08 -07001342 if (cfg->fc_mx) {
1343 struct nlattr *nla;
1344 int remaining;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001345
Thomas Graf86872cb2006-08-22 00:01:08 -07001346 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
Thomas Graf8f4c1f92007-09-12 14:44:36 +02001347 int type = nla_type(nla);
Thomas Graf86872cb2006-08-22 00:01:08 -07001348
1349 if (type) {
1350 if (type > RTAX_MAX) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001351 err = -EINVAL;
1352 goto out;
1353 }
Thomas Graf86872cb2006-08-22 00:01:08 -07001354
David S. Millerdefb3512010-12-08 21:16:57 -08001355 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001356 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001357 }
1358 }
1359
Changli Gaod8d1f302010-06-10 23:31:35 -07001360 rt->dst.dev = dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001361 rt->rt6i_idev = idev;
Thomas Grafc71099a2006-08-04 23:20:06 -07001362 rt->rt6i_table = table;
Daniel Lezcano63152fc2008-03-03 23:31:11 -08001363
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001364 cfg->fc_nlinfo.nl_net = dev_net(dev);
Daniel Lezcano63152fc2008-03-03 23:31:11 -08001365
Thomas Graf86872cb2006-08-22 00:01:08 -07001366 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001367
1368out:
1369 if (dev)
1370 dev_put(dev);
1371 if (idev)
1372 in6_dev_put(idev);
1373 if (rt)
Changli Gaod8d1f302010-06-10 23:31:35 -07001374 dst_free(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001375 return err;
1376}
1377
Thomas Graf86872cb2006-08-22 00:01:08 -07001378static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001379{
1380 int err;
Thomas Grafc71099a2006-08-04 23:20:06 -07001381 struct fib6_table *table;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001382 struct net *net = dev_net(rt->rt6i_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001383
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001384 if (rt == net->ipv6.ip6_null_entry)
Patrick McHardy6c813a72006-08-06 22:22:47 -07001385 return -ENOENT;
1386
Thomas Grafc71099a2006-08-04 23:20:06 -07001387 table = rt->rt6i_table;
1388 write_lock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001389
Thomas Graf86872cb2006-08-22 00:01:08 -07001390 err = fib6_del(rt, info);
Changli Gaod8d1f302010-06-10 23:31:35 -07001391 dst_release(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001392
Thomas Grafc71099a2006-08-04 23:20:06 -07001393 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001394
1395 return err;
1396}
1397
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001398int ip6_del_rt(struct rt6_info *rt)
1399{
Denis V. Lunev4d1169c2008-01-10 03:26:13 -08001400 struct nl_info info = {
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001401 .nl_net = dev_net(rt->rt6i_dev),
Denis V. Lunev4d1169c2008-01-10 03:26:13 -08001402 };
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08001403 return __ip6_del_rt(rt, &info);
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001404}
1405
Thomas Graf86872cb2006-08-22 00:01:08 -07001406static int ip6_route_del(struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001407{
Thomas Grafc71099a2006-08-04 23:20:06 -07001408 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001409 struct fib6_node *fn;
1410 struct rt6_info *rt;
1411 int err = -ESRCH;
1412
Daniel Lezcano55786892008-03-04 13:47:47 -08001413 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
Thomas Grafc71099a2006-08-04 23:20:06 -07001414 if (table == NULL)
1415 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001416
Thomas Grafc71099a2006-08-04 23:20:06 -07001417 read_lock_bh(&table->tb6_lock);
1418
1419 fn = fib6_locate(&table->tb6_root,
Thomas Graf86872cb2006-08-22 00:01:08 -07001420 &cfg->fc_dst, cfg->fc_dst_len,
1421 &cfg->fc_src, cfg->fc_src_len);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001422
Linus Torvalds1da177e2005-04-16 15:20:36 -07001423 if (fn) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001424 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
Thomas Graf86872cb2006-08-22 00:01:08 -07001425 if (cfg->fc_ifindex &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001426 (rt->rt6i_dev == NULL ||
Thomas Graf86872cb2006-08-22 00:01:08 -07001427 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001428 continue;
Thomas Graf86872cb2006-08-22 00:01:08 -07001429 if (cfg->fc_flags & RTF_GATEWAY &&
1430 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001431 continue;
Thomas Graf86872cb2006-08-22 00:01:08 -07001432 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001433 continue;
Changli Gaod8d1f302010-06-10 23:31:35 -07001434 dst_hold(&rt->dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001435 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001436
Thomas Graf86872cb2006-08-22 00:01:08 -07001437 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001438 }
1439 }
Thomas Grafc71099a2006-08-04 23:20:06 -07001440 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001441
1442 return err;
1443}
1444
1445/*
1446 * Handle redirects
1447 */
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001448struct ip6rd_flowi {
1449 struct flowi fl;
1450 struct in6_addr gateway;
1451};
Linus Torvalds1da177e2005-04-16 15:20:36 -07001452
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001453static struct rt6_info *__ip6_route_redirect(struct net *net,
1454 struct fib6_table *table,
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001455 struct flowi *fl,
1456 int flags)
1457{
1458 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1459 struct rt6_info *rt;
1460 struct fib6_node *fn;
Thomas Grafc71099a2006-08-04 23:20:06 -07001461
Linus Torvalds1da177e2005-04-16 15:20:36 -07001462 /*
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001463 * Get the "current" route for this destination and
1464 * check if the redirect has come from approriate router.
1465 *
1466 * RFC 2461 specifies that redirects should only be
1467 * accepted if they come from the nexthop to the target.
1468 * Due to the way the routes are chosen, this notion
1469 * is a bit fuzzy and one might need to check all possible
1470 * routes.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001471 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001472
Thomas Grafc71099a2006-08-04 23:20:06 -07001473 read_lock_bh(&table->tb6_lock);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001474 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001475restart:
Changli Gaod8d1f302010-06-10 23:31:35 -07001476 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001477 /*
1478 * Current route is on-link; redirect is always invalid.
1479 *
1480 * Seems, previous statement is not true. It could
1481 * be node, which looks for us as on-link (f.e. proxy ndisc)
1482 * But then router serving it might decide, that we should
1483 * know truth 8)8) --ANK (980726).
1484 */
1485 if (rt6_check_expired(rt))
1486 continue;
1487 if (!(rt->rt6i_flags & RTF_GATEWAY))
1488 continue;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001489 if (fl->oif != rt->rt6i_dev->ifindex)
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001490 continue;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001491 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001492 continue;
1493 break;
1494 }
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001495
YOSHIFUJI Hideakicb15d9c2006-08-23 17:23:11 -07001496 if (!rt)
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001497 rt = net->ipv6.ip6_null_entry;
1498 BACKTRACK(net, &fl->fl6_src);
YOSHIFUJI Hideakicb15d9c2006-08-23 17:23:11 -07001499out:
Changli Gaod8d1f302010-06-10 23:31:35 -07001500 dst_hold(&rt->dst);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001501
1502 read_unlock_bh(&table->tb6_lock);
1503
1504 return rt;
1505};
1506
1507static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1508 struct in6_addr *src,
1509 struct in6_addr *gateway,
1510 struct net_device *dev)
1511{
Thomas Grafadaa70b2006-10-13 15:01:03 -07001512 int flags = RT6_LOOKUP_F_HAS_SADDR;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001513 struct net *net = dev_net(dev);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001514 struct ip6rd_flowi rdfl = {
1515 .fl = {
1516 .oif = dev->ifindex,
Changli Gao58116622010-11-12 18:43:55 +00001517 .fl6_dst = *dest,
1518 .fl6_src = *src,
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001519 },
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001520 };
Thomas Grafadaa70b2006-10-13 15:01:03 -07001521
Brian Haley86c36ce2009-10-07 13:58:01 -07001522 ipv6_addr_copy(&rdfl.gateway, gateway);
1523
Thomas Grafadaa70b2006-10-13 15:01:03 -07001524 if (rt6_need_strict(dest))
1525 flags |= RT6_LOOKUP_F_IFACE;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001526
Daniel Lezcano55786892008-03-04 13:47:47 -08001527 return (struct rt6_info *)fib6_rule_lookup(net, (struct flowi *)&rdfl,
Daniel Lezcano58f09b72008-03-03 23:25:27 -08001528 flags, __ip6_route_redirect);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001529}
1530
1531void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1532 struct in6_addr *saddr,
1533 struct neighbour *neigh, u8 *lladdr, int on_link)
1534{
1535 struct rt6_info *rt, *nrt = NULL;
1536 struct netevent_redirect netevent;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001537 struct net *net = dev_net(neigh->dev);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001538
1539 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1540
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001541 if (rt == net->ipv6.ip6_null_entry) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001542 if (net_ratelimit())
1543 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1544 "for redirect target\n");
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001545 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001546 }
1547
Linus Torvalds1da177e2005-04-16 15:20:36 -07001548 /*
1549 * We have finally decided to accept it.
1550 */
1551
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001552 neigh_update(neigh, lladdr, NUD_STALE,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001553 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1554 NEIGH_UPDATE_F_OVERRIDE|
1555 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1556 NEIGH_UPDATE_F_ISROUTER))
1557 );
1558
1559 /*
1560 * Redirect received -> path was valid.
1561 * Look, redirects are sent only in response to data packets,
1562 * so that this nexthop apparently is reachable. --ANK
1563 */
Changli Gaod8d1f302010-06-10 23:31:35 -07001564 dst_confirm(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001565
1566 /* Duplicate redirect: silently ignore. */
Changli Gaod8d1f302010-06-10 23:31:35 -07001567 if (neigh == rt->dst.neighbour)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001568 goto out;
1569
1570 nrt = ip6_rt_copy(rt);
1571 if (nrt == NULL)
1572 goto out;
1573
1574 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1575 if (on_link)
1576 nrt->rt6i_flags &= ~RTF_GATEWAY;
1577
1578 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1579 nrt->rt6i_dst.plen = 128;
Changli Gaod8d1f302010-06-10 23:31:35 -07001580 nrt->dst.flags |= DST_HOST;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001581
1582 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1583 nrt->rt6i_nexthop = neigh_clone(neigh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001584
Thomas Graf40e22e82006-08-22 00:00:45 -07001585 if (ip6_ins_rt(nrt))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001586 goto out;
1587
Changli Gaod8d1f302010-06-10 23:31:35 -07001588 netevent.old = &rt->dst;
1589 netevent.new = &nrt->dst;
Tom Tucker8d717402006-07-30 20:43:36 -07001590 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1591
Linus Torvalds1da177e2005-04-16 15:20:36 -07001592 if (rt->rt6i_flags&RTF_CACHE) {
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001593 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001594 return;
1595 }
1596
1597out:
Changli Gaod8d1f302010-06-10 23:31:35 -07001598 dst_release(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001599}
1600
1601/*
1602 * Handle ICMP "packet too big" messages
1603 * i.e. Path MTU discovery
1604 */
1605
Maciej Żenczykowskiae878ae2010-10-03 14:49:00 -07001606static void rt6_do_pmtu_disc(struct in6_addr *daddr, struct in6_addr *saddr,
1607 struct net *net, u32 pmtu, int ifindex)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001608{
1609 struct rt6_info *rt, *nrt;
1610 int allfrag = 0;
Andrey Vagind3052b52010-12-11 15:20:11 +00001611again:
Maciej Żenczykowskiae878ae2010-10-03 14:49:00 -07001612 rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001613 if (rt == NULL)
1614 return;
1615
Andrey Vagind3052b52010-12-11 15:20:11 +00001616 if (rt6_check_expired(rt)) {
1617 ip6_del_rt(rt);
1618 goto again;
1619 }
1620
Changli Gaod8d1f302010-06-10 23:31:35 -07001621 if (pmtu >= dst_mtu(&rt->dst))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001622 goto out;
1623
1624 if (pmtu < IPV6_MIN_MTU) {
1625 /*
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001626 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
Linus Torvalds1da177e2005-04-16 15:20:36 -07001627 * MTU (1280) and a fragment header should always be included
1628 * after a node receiving Too Big message reporting PMTU is
1629 * less than the IPv6 Minimum Link MTU.
1630 */
1631 pmtu = IPV6_MIN_MTU;
1632 allfrag = 1;
1633 }
1634
1635 /* New mtu received -> path was valid.
1636 They are sent only in response to data packets,
1637 so that this nexthop apparently is reachable. --ANK
1638 */
Changli Gaod8d1f302010-06-10 23:31:35 -07001639 dst_confirm(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001640
1641 /* Host route. If it is static, it would be better
1642 not to override it, but add new one, so that
1643 when cache entry will expire old pmtu
1644 would return automatically.
1645 */
1646 if (rt->rt6i_flags & RTF_CACHE) {
David S. Millerdefb3512010-12-08 21:16:57 -08001647 dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1648 if (allfrag) {
1649 u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1650 features |= RTAX_FEATURE_ALLFRAG;
1651 dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1652 }
Changli Gaod8d1f302010-06-10 23:31:35 -07001653 dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001654 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1655 goto out;
1656 }
1657
1658 /* Network route.
1659 Two cases are possible:
1660 1. It is connected route. Action: COW
1661 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1662 */
YOSHIFUJI Hideakid5315b52006-03-20 16:58:48 -08001663 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001664 nrt = rt6_alloc_cow(rt, daddr, saddr);
YOSHIFUJI Hideakid5315b52006-03-20 16:58:48 -08001665 else
1666 nrt = rt6_alloc_clone(rt, daddr);
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001667
YOSHIFUJI Hideakid5315b52006-03-20 16:58:48 -08001668 if (nrt) {
David S. Millerdefb3512010-12-08 21:16:57 -08001669 dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1670 if (allfrag) {
1671 u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1672 features |= RTAX_FEATURE_ALLFRAG;
1673 dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1674 }
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001675
1676 /* According to RFC 1981, detecting PMTU increase shouldn't be
1677 * happened within 5 mins, the recommended timer is 10 mins.
1678 * Here this route expiration time is set to ip6_rt_mtu_expires
1679 * which is 10 mins. After 10 mins the decreased pmtu is expired
1680 * and detecting PMTU increase will be automatically happened.
1681 */
Changli Gaod8d1f302010-06-10 23:31:35 -07001682 dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001683 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1684
Thomas Graf40e22e82006-08-22 00:00:45 -07001685 ip6_ins_rt(nrt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001686 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001687out:
Changli Gaod8d1f302010-06-10 23:31:35 -07001688 dst_release(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001689}
1690
Maciej Żenczykowskiae878ae2010-10-03 14:49:00 -07001691void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1692 struct net_device *dev, u32 pmtu)
1693{
1694 struct net *net = dev_net(dev);
1695
1696 /*
1697 * RFC 1981 states that a node "MUST reduce the size of the packets it
1698 * is sending along the path" that caused the Packet Too Big message.
1699 * Since it's not possible in the general case to determine which
1700 * interface was used to send the original packet, we update the MTU
1701 * on the interface that will be used to send future packets. We also
1702 * update the MTU on the interface that received the Packet Too Big in
1703 * case the original packet was forced out that interface with
1704 * SO_BINDTODEVICE or similar. This is the next best thing to the
1705 * correct behaviour, which would be to update the MTU on all
1706 * interfaces.
1707 */
1708 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1709 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1710}
1711
Linus Torvalds1da177e2005-04-16 15:20:36 -07001712/*
1713 * Misc support functions
1714 */
1715
1716static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1717{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001718 struct net *net = dev_net(ort->rt6i_dev);
Alexey Dobriyan86393e52009-08-29 01:34:49 +00001719 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001720
1721 if (rt) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001722 rt->dst.input = ort->dst.input;
1723 rt->dst.output = ort->dst.output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001724
David S. Millerdefb3512010-12-08 21:16:57 -08001725 dst_copy_metrics(&rt->dst, &ort->dst);
Changli Gaod8d1f302010-06-10 23:31:35 -07001726 rt->dst.error = ort->dst.error;
1727 rt->dst.dev = ort->dst.dev;
1728 if (rt->dst.dev)
1729 dev_hold(rt->dst.dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001730 rt->rt6i_idev = ort->rt6i_idev;
1731 if (rt->rt6i_idev)
1732 in6_dev_hold(rt->rt6i_idev);
Changli Gaod8d1f302010-06-10 23:31:35 -07001733 rt->dst.lastuse = jiffies;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001734 rt->rt6i_expires = 0;
1735
1736 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1737 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1738 rt->rt6i_metric = 0;
1739
1740 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1741#ifdef CONFIG_IPV6_SUBTREES
1742 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1743#endif
Thomas Grafc71099a2006-08-04 23:20:06 -07001744 rt->rt6i_table = ort->rt6i_table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001745 }
1746 return rt;
1747}
1748
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001749#ifdef CONFIG_IPV6_ROUTE_INFO
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001750static struct rt6_info *rt6_get_route_info(struct net *net,
1751 struct in6_addr *prefix, int prefixlen,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001752 struct in6_addr *gwaddr, int ifindex)
1753{
1754 struct fib6_node *fn;
1755 struct rt6_info *rt = NULL;
Thomas Grafc71099a2006-08-04 23:20:06 -07001756 struct fib6_table *table;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001757
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001758 table = fib6_get_table(net, RT6_TABLE_INFO);
Thomas Grafc71099a2006-08-04 23:20:06 -07001759 if (table == NULL)
1760 return NULL;
1761
1762 write_lock_bh(&table->tb6_lock);
1763 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001764 if (!fn)
1765 goto out;
1766
Changli Gaod8d1f302010-06-10 23:31:35 -07001767 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001768 if (rt->rt6i_dev->ifindex != ifindex)
1769 continue;
1770 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1771 continue;
1772 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1773 continue;
Changli Gaod8d1f302010-06-10 23:31:35 -07001774 dst_hold(&rt->dst);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001775 break;
1776 }
1777out:
Thomas Grafc71099a2006-08-04 23:20:06 -07001778 write_unlock_bh(&table->tb6_lock);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001779 return rt;
1780}
1781
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001782static struct rt6_info *rt6_add_route_info(struct net *net,
1783 struct in6_addr *prefix, int prefixlen,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001784 struct in6_addr *gwaddr, int ifindex,
1785 unsigned pref)
1786{
Thomas Graf86872cb2006-08-22 00:01:08 -07001787 struct fib6_config cfg = {
1788 .fc_table = RT6_TABLE_INFO,
Rami Rosen238fc7e2008-02-09 23:43:11 -08001789 .fc_metric = IP6_RT_PRIO_USER,
Thomas Graf86872cb2006-08-22 00:01:08 -07001790 .fc_ifindex = ifindex,
1791 .fc_dst_len = prefixlen,
1792 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1793 RTF_UP | RTF_PREF(pref),
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001794 .fc_nlinfo.pid = 0,
1795 .fc_nlinfo.nlh = NULL,
1796 .fc_nlinfo.nl_net = net,
Thomas Graf86872cb2006-08-22 00:01:08 -07001797 };
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001798
Thomas Graf86872cb2006-08-22 00:01:08 -07001799 ipv6_addr_copy(&cfg.fc_dst, prefix);
1800 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1801
YOSHIFUJI Hideakie317da92006-03-20 17:06:42 -08001802 /* We should treat it as a default route if prefix length is 0. */
1803 if (!prefixlen)
Thomas Graf86872cb2006-08-22 00:01:08 -07001804 cfg.fc_flags |= RTF_DEFAULT;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001805
Thomas Graf86872cb2006-08-22 00:01:08 -07001806 ip6_route_add(&cfg);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001807
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001808 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001809}
1810#endif
1811
Linus Torvalds1da177e2005-04-16 15:20:36 -07001812struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001813{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001814 struct rt6_info *rt;
Thomas Grafc71099a2006-08-04 23:20:06 -07001815 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001816
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001817 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
Thomas Grafc71099a2006-08-04 23:20:06 -07001818 if (table == NULL)
1819 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001820
Thomas Grafc71099a2006-08-04 23:20:06 -07001821 write_lock_bh(&table->tb6_lock);
Changli Gaod8d1f302010-06-10 23:31:35 -07001822 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001823 if (dev == rt->rt6i_dev &&
YOSHIFUJI Hideaki045927f2006-03-20 17:00:48 -08001824 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001825 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1826 break;
1827 }
1828 if (rt)
Changli Gaod8d1f302010-06-10 23:31:35 -07001829 dst_hold(&rt->dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001830 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001831 return rt;
1832}
1833
1834struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -08001835 struct net_device *dev,
1836 unsigned int pref)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001837{
Thomas Graf86872cb2006-08-22 00:01:08 -07001838 struct fib6_config cfg = {
1839 .fc_table = RT6_TABLE_DFLT,
Rami Rosen238fc7e2008-02-09 23:43:11 -08001840 .fc_metric = IP6_RT_PRIO_USER,
Thomas Graf86872cb2006-08-22 00:01:08 -07001841 .fc_ifindex = dev->ifindex,
1842 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1843 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
Daniel Lezcano55786892008-03-04 13:47:47 -08001844 .fc_nlinfo.pid = 0,
1845 .fc_nlinfo.nlh = NULL,
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001846 .fc_nlinfo.nl_net = dev_net(dev),
Thomas Graf86872cb2006-08-22 00:01:08 -07001847 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07001848
Thomas Graf86872cb2006-08-22 00:01:08 -07001849 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001850
Thomas Graf86872cb2006-08-22 00:01:08 -07001851 ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001852
Linus Torvalds1da177e2005-04-16 15:20:36 -07001853 return rt6_get_dflt_router(gwaddr, dev);
1854}
1855
Daniel Lezcano7b4da532008-03-04 13:47:14 -08001856void rt6_purge_dflt_routers(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001857{
1858 struct rt6_info *rt;
Thomas Grafc71099a2006-08-04 23:20:06 -07001859 struct fib6_table *table;
1860
1861 /* NOTE: Keep consistent with rt6_get_dflt_router */
Daniel Lezcano7b4da532008-03-04 13:47:14 -08001862 table = fib6_get_table(net, RT6_TABLE_DFLT);
Thomas Grafc71099a2006-08-04 23:20:06 -07001863 if (table == NULL)
1864 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001865
1866restart:
Thomas Grafc71099a2006-08-04 23:20:06 -07001867 read_lock_bh(&table->tb6_lock);
Changli Gaod8d1f302010-06-10 23:31:35 -07001868 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001869 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001870 dst_hold(&rt->dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001871 read_unlock_bh(&table->tb6_lock);
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001872 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001873 goto restart;
1874 }
1875 }
Thomas Grafc71099a2006-08-04 23:20:06 -07001876 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001877}
1878
Daniel Lezcano55786892008-03-04 13:47:47 -08001879static void rtmsg_to_fib6_config(struct net *net,
1880 struct in6_rtmsg *rtmsg,
Thomas Graf86872cb2006-08-22 00:01:08 -07001881 struct fib6_config *cfg)
1882{
1883 memset(cfg, 0, sizeof(*cfg));
1884
1885 cfg->fc_table = RT6_TABLE_MAIN;
1886 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1887 cfg->fc_metric = rtmsg->rtmsg_metric;
1888 cfg->fc_expires = rtmsg->rtmsg_info;
1889 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1890 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1891 cfg->fc_flags = rtmsg->rtmsg_flags;
1892
Daniel Lezcano55786892008-03-04 13:47:47 -08001893 cfg->fc_nlinfo.nl_net = net;
Benjamin Theryf1243c22008-02-26 18:10:03 -08001894
Thomas Graf86872cb2006-08-22 00:01:08 -07001895 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1896 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1897 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1898}
1899
Daniel Lezcano55786892008-03-04 13:47:47 -08001900int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001901{
Thomas Graf86872cb2006-08-22 00:01:08 -07001902 struct fib6_config cfg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001903 struct in6_rtmsg rtmsg;
1904 int err;
1905
1906 switch(cmd) {
1907 case SIOCADDRT: /* Add a route */
1908 case SIOCDELRT: /* Delete a route */
1909 if (!capable(CAP_NET_ADMIN))
1910 return -EPERM;
1911 err = copy_from_user(&rtmsg, arg,
1912 sizeof(struct in6_rtmsg));
1913 if (err)
1914 return -EFAULT;
Thomas Graf86872cb2006-08-22 00:01:08 -07001915
Daniel Lezcano55786892008-03-04 13:47:47 -08001916 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
Thomas Graf86872cb2006-08-22 00:01:08 -07001917
Linus Torvalds1da177e2005-04-16 15:20:36 -07001918 rtnl_lock();
1919 switch (cmd) {
1920 case SIOCADDRT:
Thomas Graf86872cb2006-08-22 00:01:08 -07001921 err = ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001922 break;
1923 case SIOCDELRT:
Thomas Graf86872cb2006-08-22 00:01:08 -07001924 err = ip6_route_del(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001925 break;
1926 default:
1927 err = -EINVAL;
1928 }
1929 rtnl_unlock();
1930
1931 return err;
Stephen Hemminger3ff50b72007-04-20 17:09:22 -07001932 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001933
1934 return -EINVAL;
1935}
1936
1937/*
1938 * Drop the packet on the floor
1939 */
1940
Brian Haleyd5fdd6b2009-06-23 04:31:07 -07001941static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001942{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001943 int type;
Eric Dumazetadf30902009-06-02 05:19:30 +00001944 struct dst_entry *dst = skb_dst(skb);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001945 switch (ipstats_mib_noroutes) {
1946 case IPSTATS_MIB_INNOROUTES:
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -07001947 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
Ulrich Weber45bb0062010-02-25 23:28:58 +00001948 if (type == IPV6_ADDR_ANY) {
Denis V. Lunev3bd653c2008-10-08 10:54:51 -07001949 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1950 IPSTATS_MIB_INADDRERRORS);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001951 break;
1952 }
1953 /* FALLTHROUGH */
1954 case IPSTATS_MIB_OUTNOROUTES:
Denis V. Lunev3bd653c2008-10-08 10:54:51 -07001955 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1956 ipstats_mib_noroutes);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001957 break;
1958 }
Alexey Dobriyan3ffe5332010-02-18 08:25:24 +00001959 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001960 kfree_skb(skb);
1961 return 0;
1962}
1963
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001964static int ip6_pkt_discard(struct sk_buff *skb)
1965{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001966 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001967}
1968
Arnaldo Carvalho de Melo20380732005-08-16 02:18:02 -03001969static int ip6_pkt_discard_out(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001970{
Eric Dumazetadf30902009-06-02 05:19:30 +00001971 skb->dev = skb_dst(skb)->dev;
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001972 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001973}
1974
David S. Miller6723ab52006-10-18 21:20:57 -07001975#ifdef CONFIG_IPV6_MULTIPLE_TABLES
1976
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001977static int ip6_pkt_prohibit(struct sk_buff *skb)
1978{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001979 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001980}
1981
1982static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1983{
Eric Dumazetadf30902009-06-02 05:19:30 +00001984 skb->dev = skb_dst(skb)->dev;
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001985 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001986}
1987
David S. Miller6723ab52006-10-18 21:20:57 -07001988#endif
1989
Linus Torvalds1da177e2005-04-16 15:20:36 -07001990/*
1991 * Allocate a dst for local (unicast / anycast) address.
1992 */
1993
1994struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1995 const struct in6_addr *addr,
1996 int anycast)
1997{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001998 struct net *net = dev_net(idev->dev);
Alexey Dobriyan86393e52009-08-29 01:34:49 +00001999 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
David S. Miller14deae42009-01-04 16:04:39 -08002000 struct neighbour *neigh;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002001
Ben Greear40385652010-11-08 12:33:48 +00002002 if (rt == NULL) {
2003 if (net_ratelimit())
2004 pr_warning("IPv6: Maximum number of routes reached,"
2005 " consider increasing route/max_size.\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002006 return ERR_PTR(-ENOMEM);
Ben Greear40385652010-11-08 12:33:48 +00002007 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002008
Daniel Lezcano55786892008-03-04 13:47:47 -08002009 dev_hold(net->loopback_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002010 in6_dev_hold(idev);
2011
Changli Gaod8d1f302010-06-10 23:31:35 -07002012 rt->dst.flags = DST_HOST;
2013 rt->dst.input = ip6_input;
2014 rt->dst.output = ip6_output;
Daniel Lezcano55786892008-03-04 13:47:47 -08002015 rt->rt6i_dev = net->loopback_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002016 rt->rt6i_idev = idev;
David S. Millerdefb3512010-12-08 21:16:57 -08002017 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, -1);
Changli Gaod8d1f302010-06-10 23:31:35 -07002018 rt->dst.obsolete = -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002019
2020 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +09002021 if (anycast)
2022 rt->rt6i_flags |= RTF_ANYCAST;
2023 else
Linus Torvalds1da177e2005-04-16 15:20:36 -07002024 rt->rt6i_flags |= RTF_LOCAL;
David S. Miller14deae42009-01-04 16:04:39 -08002025 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
2026 if (IS_ERR(neigh)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002027 dst_free(&rt->dst);
David S. Miller14deae42009-01-04 16:04:39 -08002028
2029 /* We are casting this because that is the return
2030 * value type. But an errno encoded pointer is the
2031 * same regardless of the underlying pointer type,
2032 * and that's what we are returning. So this is OK.
2033 */
2034 return (struct rt6_info *) neigh;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002035 }
David S. Miller14deae42009-01-04 16:04:39 -08002036 rt->rt6i_nexthop = neigh;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002037
2038 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
2039 rt->rt6i_dst.plen = 128;
Daniel Lezcano55786892008-03-04 13:47:47 -08002040 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002041
Changli Gaod8d1f302010-06-10 23:31:35 -07002042 atomic_set(&rt->dst.__refcnt, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002043
2044 return rt;
2045}
2046
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002047struct arg_dev_net {
2048 struct net_device *dev;
2049 struct net *net;
2050};
2051
Linus Torvalds1da177e2005-04-16 15:20:36 -07002052static int fib6_ifdown(struct rt6_info *rt, void *arg)
2053{
stephen hemmingerbc3ef662010-12-16 17:42:40 +00002054 const struct arg_dev_net *adn = arg;
2055 const struct net_device *dev = adn->dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002056
stephen hemmingerbc3ef662010-12-16 17:42:40 +00002057 if ((rt->rt6i_dev == dev || dev == NULL) &&
2058 rt != adn->net->ipv6.ip6_null_entry) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002059 RT6_TRACE("deleted by ifdown %p\n", rt);
2060 return -1;
2061 }
2062 return 0;
2063}
2064
Daniel Lezcanof3db4852008-03-03 23:27:06 -08002065void rt6_ifdown(struct net *net, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002066{
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002067 struct arg_dev_net adn = {
2068 .dev = dev,
2069 .net = net,
2070 };
2071
2072 fib6_clean_all(net, fib6_ifdown, 0, &adn);
David S. Miller1e493d12008-09-10 17:27:15 -07002073 icmp6_clean_all(fib6_ifdown, &adn);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002074}
2075
2076struct rt6_mtu_change_arg
2077{
2078 struct net_device *dev;
2079 unsigned mtu;
2080};
2081
2082static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2083{
2084 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2085 struct inet6_dev *idev;
2086
2087 /* In IPv6 pmtu discovery is not optional,
2088 so that RTAX_MTU lock cannot disable it.
2089 We still use this lock to block changes
2090 caused by addrconf/ndisc.
2091 */
2092
2093 idev = __in6_dev_get(arg->dev);
2094 if (idev == NULL)
2095 return 0;
2096
2097 /* For administrative MTU increase, there is no way to discover
2098 IPv6 PMTU increase, so PMTU increase should be updated here.
2099 Since RFC 1981 doesn't include administrative MTU increase
2100 update PMTU increase is a MUST. (i.e. jumbo frame)
2101 */
2102 /*
2103 If new MTU is less than route PMTU, this new MTU will be the
2104 lowest MTU in the path, update the route PMTU to reflect PMTU
2105 decreases; if new MTU is greater than route PMTU, and the
2106 old MTU is the lowest MTU in the path, update the route PMTU
2107 to reflect the increase. In this case if the other nodes' MTU
2108 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2109 PMTU discouvery.
2110 */
2111 if (rt->rt6i_dev == arg->dev &&
Changli Gaod8d1f302010-06-10 23:31:35 -07002112 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2113 (dst_mtu(&rt->dst) >= arg->mtu ||
2114 (dst_mtu(&rt->dst) < arg->mtu &&
2115 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
David S. Millerdefb3512010-12-08 21:16:57 -08002116 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
Simon Arlott566cfd82007-07-26 00:09:55 -07002117 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002118 return 0;
2119}
2120
2121void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2122{
Thomas Grafc71099a2006-08-04 23:20:06 -07002123 struct rt6_mtu_change_arg arg = {
2124 .dev = dev,
2125 .mtu = mtu,
2126 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07002127
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09002128 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002129}
2130
Patrick McHardyef7c79e2007-06-05 12:38:30 -07002131static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
Thomas Graf5176f912006-08-26 20:13:18 -07002132 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
Thomas Graf86872cb2006-08-22 00:01:08 -07002133 [RTA_OIF] = { .type = NLA_U32 },
Thomas Grafab364a62006-08-22 00:01:47 -07002134 [RTA_IIF] = { .type = NLA_U32 },
Thomas Graf86872cb2006-08-22 00:01:08 -07002135 [RTA_PRIORITY] = { .type = NLA_U32 },
2136 [RTA_METRICS] = { .type = NLA_NESTED },
2137};
2138
2139static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2140 struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002141{
Thomas Graf86872cb2006-08-22 00:01:08 -07002142 struct rtmsg *rtm;
2143 struct nlattr *tb[RTA_MAX+1];
2144 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002145
Thomas Graf86872cb2006-08-22 00:01:08 -07002146 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2147 if (err < 0)
2148 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002149
Thomas Graf86872cb2006-08-22 00:01:08 -07002150 err = -EINVAL;
2151 rtm = nlmsg_data(nlh);
2152 memset(cfg, 0, sizeof(*cfg));
2153
2154 cfg->fc_table = rtm->rtm_table;
2155 cfg->fc_dst_len = rtm->rtm_dst_len;
2156 cfg->fc_src_len = rtm->rtm_src_len;
2157 cfg->fc_flags = RTF_UP;
2158 cfg->fc_protocol = rtm->rtm_protocol;
2159
2160 if (rtm->rtm_type == RTN_UNREACHABLE)
2161 cfg->fc_flags |= RTF_REJECT;
2162
Maciej Żenczykowskiab79ad12010-09-27 00:07:02 +00002163 if (rtm->rtm_type == RTN_LOCAL)
2164 cfg->fc_flags |= RTF_LOCAL;
2165
Thomas Graf86872cb2006-08-22 00:01:08 -07002166 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2167 cfg->fc_nlinfo.nlh = nlh;
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09002168 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
Thomas Graf86872cb2006-08-22 00:01:08 -07002169
2170 if (tb[RTA_GATEWAY]) {
2171 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2172 cfg->fc_flags |= RTF_GATEWAY;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002173 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002174
2175 if (tb[RTA_DST]) {
2176 int plen = (rtm->rtm_dst_len + 7) >> 3;
2177
2178 if (nla_len(tb[RTA_DST]) < plen)
2179 goto errout;
2180
2181 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002182 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002183
2184 if (tb[RTA_SRC]) {
2185 int plen = (rtm->rtm_src_len + 7) >> 3;
2186
2187 if (nla_len(tb[RTA_SRC]) < plen)
2188 goto errout;
2189
2190 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002191 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002192
2193 if (tb[RTA_OIF])
2194 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2195
2196 if (tb[RTA_PRIORITY])
2197 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2198
2199 if (tb[RTA_METRICS]) {
2200 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2201 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002202 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002203
2204 if (tb[RTA_TABLE])
2205 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2206
2207 err = 0;
2208errout:
2209 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002210}
2211
Thomas Grafc127ea22007-03-22 11:58:32 -07002212static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002213{
Thomas Graf86872cb2006-08-22 00:01:08 -07002214 struct fib6_config cfg;
2215 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002216
Thomas Graf86872cb2006-08-22 00:01:08 -07002217 err = rtm_to_fib6_config(skb, nlh, &cfg);
2218 if (err < 0)
2219 return err;
2220
2221 return ip6_route_del(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002222}
2223
Thomas Grafc127ea22007-03-22 11:58:32 -07002224static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002225{
Thomas Graf86872cb2006-08-22 00:01:08 -07002226 struct fib6_config cfg;
2227 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002228
Thomas Graf86872cb2006-08-22 00:01:08 -07002229 err = rtm_to_fib6_config(skb, nlh, &cfg);
2230 if (err < 0)
2231 return err;
2232
2233 return ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002234}
2235
Thomas Graf339bf982006-11-10 14:10:15 -08002236static inline size_t rt6_nlmsg_size(void)
2237{
2238 return NLMSG_ALIGN(sizeof(struct rtmsg))
2239 + nla_total_size(16) /* RTA_SRC */
2240 + nla_total_size(16) /* RTA_DST */
2241 + nla_total_size(16) /* RTA_GATEWAY */
2242 + nla_total_size(16) /* RTA_PREFSRC */
2243 + nla_total_size(4) /* RTA_TABLE */
2244 + nla_total_size(4) /* RTA_IIF */
2245 + nla_total_size(4) /* RTA_OIF */
2246 + nla_total_size(4) /* RTA_PRIORITY */
Noriaki TAKAMIYA6a2b9ce2007-01-23 22:09:41 -08002247 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
Thomas Graf339bf982006-11-10 14:10:15 -08002248 + nla_total_size(sizeof(struct rta_cacheinfo));
2249}
2250
Brian Haley191cd582008-08-14 15:33:21 -07002251static int rt6_fill_node(struct net *net,
2252 struct sk_buff *skb, struct rt6_info *rt,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07002253 struct in6_addr *dst, struct in6_addr *src,
2254 int iif, int type, u32 pid, u32 seq,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002255 int prefix, int nowait, unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002256{
2257 struct rtmsg *rtm;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002258 struct nlmsghdr *nlh;
Thomas Grafe3703b32006-11-27 09:27:07 -08002259 long expires;
Patrick McHardy9e762a42006-08-10 23:09:48 -07002260 u32 table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002261
2262 if (prefix) { /* user wants prefix routes only */
2263 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2264 /* success since this is not a prefix route */
2265 return 1;
2266 }
2267 }
2268
Thomas Graf2d7202b2006-08-22 00:01:27 -07002269 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2270 if (nlh == NULL)
Patrick McHardy26932562007-01-31 23:16:40 -08002271 return -EMSGSIZE;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002272
2273 rtm = nlmsg_data(nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002274 rtm->rtm_family = AF_INET6;
2275 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2276 rtm->rtm_src_len = rt->rt6i_src.plen;
2277 rtm->rtm_tos = 0;
Thomas Grafc71099a2006-08-04 23:20:06 -07002278 if (rt->rt6i_table)
Patrick McHardy9e762a42006-08-10 23:09:48 -07002279 table = rt->rt6i_table->tb6_id;
Thomas Grafc71099a2006-08-04 23:20:06 -07002280 else
Patrick McHardy9e762a42006-08-10 23:09:48 -07002281 table = RT6_TABLE_UNSPEC;
2282 rtm->rtm_table = table;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002283 NLA_PUT_U32(skb, RTA_TABLE, table);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002284 if (rt->rt6i_flags&RTF_REJECT)
2285 rtm->rtm_type = RTN_UNREACHABLE;
Maciej Żenczykowskiab79ad12010-09-27 00:07:02 +00002286 else if (rt->rt6i_flags&RTF_LOCAL)
2287 rtm->rtm_type = RTN_LOCAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002288 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2289 rtm->rtm_type = RTN_LOCAL;
2290 else
2291 rtm->rtm_type = RTN_UNICAST;
2292 rtm->rtm_flags = 0;
2293 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2294 rtm->rtm_protocol = rt->rt6i_protocol;
2295 if (rt->rt6i_flags&RTF_DYNAMIC)
2296 rtm->rtm_protocol = RTPROT_REDIRECT;
2297 else if (rt->rt6i_flags & RTF_ADDRCONF)
2298 rtm->rtm_protocol = RTPROT_KERNEL;
2299 else if (rt->rt6i_flags&RTF_DEFAULT)
2300 rtm->rtm_protocol = RTPROT_RA;
2301
2302 if (rt->rt6i_flags&RTF_CACHE)
2303 rtm->rtm_flags |= RTM_F_CLONED;
2304
2305 if (dst) {
Thomas Graf2d7202b2006-08-22 00:01:27 -07002306 NLA_PUT(skb, RTA_DST, 16, dst);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002307 rtm->rtm_dst_len = 128;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002308 } else if (rtm->rtm_dst_len)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002309 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002310#ifdef CONFIG_IPV6_SUBTREES
2311 if (src) {
Thomas Graf2d7202b2006-08-22 00:01:27 -07002312 NLA_PUT(skb, RTA_SRC, 16, src);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002313 rtm->rtm_src_len = 128;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002314 } else if (rtm->rtm_src_len)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002315 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002316#endif
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002317 if (iif) {
2318#ifdef CONFIG_IPV6_MROUTE
2319 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
Benjamin Thery8229efd2008-12-10 16:30:15 -08002320 int err = ip6mr_get_route(net, skb, rtm, nowait);
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002321 if (err <= 0) {
2322 if (!nowait) {
2323 if (err == 0)
2324 return 0;
2325 goto nla_put_failure;
2326 } else {
2327 if (err == -EMSGSIZE)
2328 goto nla_put_failure;
2329 }
2330 }
2331 } else
2332#endif
2333 NLA_PUT_U32(skb, RTA_IIF, iif);
2334 } else if (dst) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002335 struct inet6_dev *idev = ip6_dst_idev(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002336 struct in6_addr saddr_buf;
Brian Haley191cd582008-08-14 15:33:21 -07002337 if (ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
YOSHIFUJI Hideaki7cbca672008-03-25 09:37:42 +09002338 dst, 0, &saddr_buf) == 0)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002339 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002340 }
Thomas Graf2d7202b2006-08-22 00:01:27 -07002341
David S. Millerdefb3512010-12-08 21:16:57 -08002342 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002343 goto nla_put_failure;
2344
Changli Gaod8d1f302010-06-10 23:31:35 -07002345 if (rt->dst.neighbour)
2346 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->dst.neighbour->primary_key);
Thomas Graf2d7202b2006-08-22 00:01:27 -07002347
Changli Gaod8d1f302010-06-10 23:31:35 -07002348 if (rt->dst.dev)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002349 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2350
2351 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
Thomas Grafe3703b32006-11-27 09:27:07 -08002352
YOSHIFUJI Hideaki36e3dea2008-05-13 02:52:55 +09002353 if (!(rt->rt6i_flags & RTF_EXPIRES))
2354 expires = 0;
2355 else if (rt->rt6i_expires - jiffies < INT_MAX)
2356 expires = rt->rt6i_expires - jiffies;
2357 else
2358 expires = INT_MAX;
YOSHIFUJI Hideaki69cdf8f2008-05-19 16:55:13 -07002359
Changli Gaod8d1f302010-06-10 23:31:35 -07002360 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0,
2361 expires, rt->dst.error) < 0)
Thomas Grafe3703b32006-11-27 09:27:07 -08002362 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002363
Thomas Graf2d7202b2006-08-22 00:01:27 -07002364 return nlmsg_end(skb, nlh);
2365
2366nla_put_failure:
Patrick McHardy26932562007-01-31 23:16:40 -08002367 nlmsg_cancel(skb, nlh);
2368 return -EMSGSIZE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002369}
2370
Patrick McHardy1b43af52006-08-10 23:11:17 -07002371int rt6_dump_route(struct rt6_info *rt, void *p_arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002372{
2373 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2374 int prefix;
2375
Thomas Graf2d7202b2006-08-22 00:01:27 -07002376 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2377 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002378 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2379 } else
2380 prefix = 0;
2381
Brian Haley191cd582008-08-14 15:33:21 -07002382 return rt6_fill_node(arg->net,
2383 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002384 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002385 prefix, 0, NLM_F_MULTI);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002386}
2387
Thomas Grafc127ea22007-03-22 11:58:32 -07002388static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002389{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09002390 struct net *net = sock_net(in_skb->sk);
Thomas Grafab364a62006-08-22 00:01:47 -07002391 struct nlattr *tb[RTA_MAX+1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07002392 struct rt6_info *rt;
Thomas Grafab364a62006-08-22 00:01:47 -07002393 struct sk_buff *skb;
2394 struct rtmsg *rtm;
2395 struct flowi fl;
2396 int err, iif = 0;
2397
2398 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2399 if (err < 0)
2400 goto errout;
2401
2402 err = -EINVAL;
2403 memset(&fl, 0, sizeof(fl));
2404
2405 if (tb[RTA_SRC]) {
2406 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2407 goto errout;
2408
2409 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2410 }
2411
2412 if (tb[RTA_DST]) {
2413 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2414 goto errout;
2415
2416 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2417 }
2418
2419 if (tb[RTA_IIF])
2420 iif = nla_get_u32(tb[RTA_IIF]);
2421
2422 if (tb[RTA_OIF])
2423 fl.oif = nla_get_u32(tb[RTA_OIF]);
2424
2425 if (iif) {
2426 struct net_device *dev;
Daniel Lezcano55786892008-03-04 13:47:47 -08002427 dev = __dev_get_by_index(net, iif);
Thomas Grafab364a62006-08-22 00:01:47 -07002428 if (!dev) {
2429 err = -ENODEV;
2430 goto errout;
2431 }
2432 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002433
2434 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
Thomas Grafab364a62006-08-22 00:01:47 -07002435 if (skb == NULL) {
2436 err = -ENOBUFS;
2437 goto errout;
2438 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002439
2440 /* Reserve room for dummy headers, this skb can pass
2441 through good chunk of routing engine.
2442 */
Arnaldo Carvalho de Melo459a98e2007-03-19 15:30:44 -07002443 skb_reset_mac_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002444 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2445
Daniel Lezcano8a3edd82008-03-07 11:14:16 -08002446 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl);
Changli Gaod8d1f302010-06-10 23:31:35 -07002447 skb_dst_set(skb, &rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002448
Brian Haley191cd582008-08-14 15:33:21 -07002449 err = rt6_fill_node(net, skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002450 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002451 nlh->nlmsg_seq, 0, 0, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002452 if (err < 0) {
Thomas Grafab364a62006-08-22 00:01:47 -07002453 kfree_skb(skb);
2454 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002455 }
2456
Daniel Lezcano55786892008-03-04 13:47:47 -08002457 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
Thomas Grafab364a62006-08-22 00:01:47 -07002458errout:
Linus Torvalds1da177e2005-04-16 15:20:36 -07002459 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002460}
2461
Thomas Graf86872cb2006-08-22 00:01:08 -07002462void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002463{
2464 struct sk_buff *skb;
Daniel Lezcano55786892008-03-04 13:47:47 -08002465 struct net *net = info->nl_net;
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08002466 u32 seq;
2467 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002468
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08002469 err = -ENOBUFS;
2470 seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
Thomas Graf86872cb2006-08-22 00:01:08 -07002471
Thomas Graf339bf982006-11-10 14:10:15 -08002472 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
Thomas Graf21713eb2006-08-15 00:35:24 -07002473 if (skb == NULL)
2474 goto errout;
2475
Brian Haley191cd582008-08-14 15:33:21 -07002476 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002477 event, info->pid, seq, 0, 0, 0);
Patrick McHardy26932562007-01-31 23:16:40 -08002478 if (err < 0) {
2479 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2480 WARN_ON(err == -EMSGSIZE);
2481 kfree_skb(skb);
2482 goto errout;
2483 }
Pablo Neira Ayuso1ce85fe2009-02-24 23:18:28 -08002484 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2485 info->nlh, gfp_any());
2486 return;
Thomas Graf21713eb2006-08-15 00:35:24 -07002487errout:
2488 if (err < 0)
Daniel Lezcano55786892008-03-04 13:47:47 -08002489 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002490}
2491
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002492static int ip6_route_dev_notify(struct notifier_block *this,
2493 unsigned long event, void *data)
2494{
2495 struct net_device *dev = (struct net_device *)data;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09002496 struct net *net = dev_net(dev);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002497
2498 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002499 net->ipv6.ip6_null_entry->dst.dev = dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002500 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2501#ifdef CONFIG_IPV6_MULTIPLE_TABLES
Changli Gaod8d1f302010-06-10 23:31:35 -07002502 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002503 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
Changli Gaod8d1f302010-06-10 23:31:35 -07002504 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002505 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2506#endif
2507 }
2508
2509 return NOTIFY_OK;
2510}
2511
Linus Torvalds1da177e2005-04-16 15:20:36 -07002512/*
2513 * /proc
2514 */
2515
2516#ifdef CONFIG_PROC_FS
2517
Linus Torvalds1da177e2005-04-16 15:20:36 -07002518struct rt6_proc_arg
2519{
2520 char *buffer;
2521 int offset;
2522 int length;
2523 int skip;
2524 int len;
2525};
2526
2527static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2528{
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002529 struct seq_file *m = p_arg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002530
Harvey Harrison4b7a4272008-10-29 12:50:24 -07002531 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002532
2533#ifdef CONFIG_IPV6_SUBTREES
Harvey Harrison4b7a4272008-10-29 12:50:24 -07002534 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002535#else
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002536 seq_puts(m, "00000000000000000000000000000000 00 ");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002537#endif
2538
2539 if (rt->rt6i_nexthop) {
Harvey Harrison4b7a4272008-10-29 12:50:24 -07002540 seq_printf(m, "%pi6", rt->rt6i_nexthop->primary_key);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002541 } else {
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002542 seq_puts(m, "00000000000000000000000000000000");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002543 }
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002544 seq_printf(m, " %08x %08x %08x %08x %8s\n",
Changli Gaod8d1f302010-06-10 23:31:35 -07002545 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2546 rt->dst.__use, rt->rt6i_flags,
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002547 rt->rt6i_dev ? rt->rt6i_dev->name : "");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002548 return 0;
2549}
2550
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002551static int ipv6_route_show(struct seq_file *m, void *v)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002552{
Daniel Lezcanof3db4852008-03-03 23:27:06 -08002553 struct net *net = (struct net *)m->private;
2554 fib6_clean_all(net, rt6_info_route, 0, m);
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002555 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002556}
2557
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002558static int ipv6_route_open(struct inode *inode, struct file *file)
2559{
Pavel Emelyanovde05c552008-07-18 04:07:21 -07002560 return single_open_net(inode, file, ipv6_route_show);
Daniel Lezcanof3db4852008-03-03 23:27:06 -08002561}
2562
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002563static const struct file_operations ipv6_route_proc_fops = {
2564 .owner = THIS_MODULE,
2565 .open = ipv6_route_open,
2566 .read = seq_read,
2567 .llseek = seq_lseek,
Pavel Emelyanovb6fcbdb2008-07-18 04:07:44 -07002568 .release = single_release_net,
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002569};
2570
Linus Torvalds1da177e2005-04-16 15:20:36 -07002571static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2572{
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002573 struct net *net = (struct net *)seq->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002574 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002575 net->ipv6.rt6_stats->fib_nodes,
2576 net->ipv6.rt6_stats->fib_route_nodes,
2577 net->ipv6.rt6_stats->fib_rt_alloc,
2578 net->ipv6.rt6_stats->fib_rt_entries,
2579 net->ipv6.rt6_stats->fib_rt_cache,
Eric Dumazetfc66f952010-10-08 06:37:34 +00002580 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002581 net->ipv6.rt6_stats->fib_discarded_routes);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002582
2583 return 0;
2584}
2585
2586static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2587{
Pavel Emelyanovde05c552008-07-18 04:07:21 -07002588 return single_open_net(inode, file, rt6_stats_seq_show);
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002589}
2590
Arjan van de Ven9a321442007-02-12 00:55:35 -08002591static const struct file_operations rt6_stats_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002592 .owner = THIS_MODULE,
2593 .open = rt6_stats_seq_open,
2594 .read = seq_read,
2595 .llseek = seq_lseek,
Pavel Emelyanovb6fcbdb2008-07-18 04:07:44 -07002596 .release = single_release_net,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002597};
2598#endif /* CONFIG_PROC_FS */
2599
2600#ifdef CONFIG_SYSCTL
2601
Linus Torvalds1da177e2005-04-16 15:20:36 -07002602static
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07002603int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002604 void __user *buffer, size_t *lenp, loff_t *ppos)
2605{
Daniel Lezcano5b7c9312008-03-03 23:28:58 -08002606 struct net *net = current->nsproxy->net_ns;
2607 int delay = net->ipv6.sysctl.flush_delay;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002608 if (write) {
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07002609 proc_dointvec(ctl, write, buffer, lenp, ppos);
Daniel Lezcano5b7c9312008-03-03 23:28:58 -08002610 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002611 return 0;
2612 } else
2613 return -EINVAL;
2614}
2615
Daniel Lezcano760f2d02008-01-10 02:53:43 -08002616ctl_table ipv6_route_table_template[] = {
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002617 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002618 .procname = "flush",
Daniel Lezcano49905092008-01-10 03:01:01 -08002619 .data = &init_net.ipv6.sysctl.flush_delay,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002620 .maxlen = sizeof(int),
Dave Jones89c8b3a2005-04-28 12:11:49 -07002621 .mode = 0200,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002622 .proc_handler = ipv6_sysctl_rtcache_flush
Linus Torvalds1da177e2005-04-16 15:20:36 -07002623 },
2624 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002625 .procname = "gc_thresh",
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -08002626 .data = &ip6_dst_ops_template.gc_thresh,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002627 .maxlen = sizeof(int),
2628 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002629 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002630 },
2631 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002632 .procname = "max_size",
Daniel Lezcano49905092008-01-10 03:01:01 -08002633 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002634 .maxlen = sizeof(int),
2635 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002636 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002637 },
2638 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002639 .procname = "gc_min_interval",
Daniel Lezcano49905092008-01-10 03:01:01 -08002640 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002641 .maxlen = sizeof(int),
2642 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002643 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002644 },
2645 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002646 .procname = "gc_timeout",
Daniel Lezcano49905092008-01-10 03:01:01 -08002647 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002648 .maxlen = sizeof(int),
2649 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002650 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002651 },
2652 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002653 .procname = "gc_interval",
Daniel Lezcano49905092008-01-10 03:01:01 -08002654 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002655 .maxlen = sizeof(int),
2656 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002657 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002658 },
2659 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002660 .procname = "gc_elasticity",
Daniel Lezcano49905092008-01-10 03:01:01 -08002661 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002662 .maxlen = sizeof(int),
2663 .mode = 0644,
Min Zhangf3d3f612010-08-14 22:42:51 -07002664 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002665 },
2666 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002667 .procname = "mtu_expires",
Daniel Lezcano49905092008-01-10 03:01:01 -08002668 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002669 .maxlen = sizeof(int),
2670 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002671 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002672 },
2673 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002674 .procname = "min_adv_mss",
Daniel Lezcano49905092008-01-10 03:01:01 -08002675 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002676 .maxlen = sizeof(int),
2677 .mode = 0644,
Min Zhangf3d3f612010-08-14 22:42:51 -07002678 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002679 },
2680 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002681 .procname = "gc_min_interval_ms",
Daniel Lezcano49905092008-01-10 03:01:01 -08002682 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002683 .maxlen = sizeof(int),
2684 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002685 .proc_handler = proc_dointvec_ms_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002686 },
Eric W. Biedermanf8572d82009-11-05 13:32:03 -08002687 { }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002688};
2689
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00002690struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
Daniel Lezcano760f2d02008-01-10 02:53:43 -08002691{
2692 struct ctl_table *table;
2693
2694 table = kmemdup(ipv6_route_table_template,
2695 sizeof(ipv6_route_table_template),
2696 GFP_KERNEL);
YOSHIFUJI Hideaki5ee09102008-02-28 00:24:28 +09002697
2698 if (table) {
2699 table[0].data = &net->ipv6.sysctl.flush_delay;
Alexey Dobriyan86393e52009-08-29 01:34:49 +00002700 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
YOSHIFUJI Hideaki5ee09102008-02-28 00:24:28 +09002701 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2702 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2703 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2704 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2705 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2706 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2707 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
Alexey Dobriyan9c69fab2009-12-18 20:11:03 -08002708 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
YOSHIFUJI Hideaki5ee09102008-02-28 00:24:28 +09002709 }
2710
Daniel Lezcano760f2d02008-01-10 02:53:43 -08002711 return table;
2712}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002713#endif
2714
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00002715static int __net_init ip6_route_net_init(struct net *net)
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002716{
Pavel Emelyanov633d4242008-04-21 14:25:23 -07002717 int ret = -ENOMEM;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002718
Alexey Dobriyan86393e52009-08-29 01:34:49 +00002719 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2720 sizeof(net->ipv6.ip6_dst_ops));
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002721
Eric Dumazetfc66f952010-10-08 06:37:34 +00002722 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2723 goto out_ip6_dst_ops;
2724
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002725 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2726 sizeof(*net->ipv6.ip6_null_entry),
2727 GFP_KERNEL);
2728 if (!net->ipv6.ip6_null_entry)
Eric Dumazetfc66f952010-10-08 06:37:34 +00002729 goto out_ip6_dst_entries;
Changli Gaod8d1f302010-06-10 23:31:35 -07002730 net->ipv6.ip6_null_entry->dst.path =
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002731 (struct dst_entry *)net->ipv6.ip6_null_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07002732 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
David S. Miller62fa8a82011-01-26 20:51:05 -08002733 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2734 ip6_template_metrics, true);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002735
2736#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2737 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2738 sizeof(*net->ipv6.ip6_prohibit_entry),
2739 GFP_KERNEL);
Peter Zijlstra68fffc62008-10-07 14:12:10 -07002740 if (!net->ipv6.ip6_prohibit_entry)
2741 goto out_ip6_null_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07002742 net->ipv6.ip6_prohibit_entry->dst.path =
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002743 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07002744 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
David S. Miller62fa8a82011-01-26 20:51:05 -08002745 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2746 ip6_template_metrics, true);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002747
2748 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2749 sizeof(*net->ipv6.ip6_blk_hole_entry),
2750 GFP_KERNEL);
Peter Zijlstra68fffc62008-10-07 14:12:10 -07002751 if (!net->ipv6.ip6_blk_hole_entry)
2752 goto out_ip6_prohibit_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07002753 net->ipv6.ip6_blk_hole_entry->dst.path =
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002754 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07002755 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
David S. Miller62fa8a82011-01-26 20:51:05 -08002756 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2757 ip6_template_metrics, true);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002758#endif
2759
Peter Zijlstrab339a472008-10-07 14:15:00 -07002760 net->ipv6.sysctl.flush_delay = 0;
2761 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2762 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2763 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2764 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2765 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2766 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2767 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2768
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002769#ifdef CONFIG_PROC_FS
2770 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2771 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2772#endif
Benjamin Thery6891a342008-03-04 13:49:47 -08002773 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2774
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002775 ret = 0;
2776out:
2777 return ret;
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002778
Peter Zijlstra68fffc62008-10-07 14:12:10 -07002779#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2780out_ip6_prohibit_entry:
2781 kfree(net->ipv6.ip6_prohibit_entry);
2782out_ip6_null_entry:
2783 kfree(net->ipv6.ip6_null_entry);
2784#endif
Eric Dumazetfc66f952010-10-08 06:37:34 +00002785out_ip6_dst_entries:
2786 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002787out_ip6_dst_ops:
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002788 goto out;
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002789}
2790
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00002791static void __net_exit ip6_route_net_exit(struct net *net)
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002792{
2793#ifdef CONFIG_PROC_FS
2794 proc_net_remove(net, "ipv6_route");
2795 proc_net_remove(net, "rt6_stats");
2796#endif
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002797 kfree(net->ipv6.ip6_null_entry);
2798#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2799 kfree(net->ipv6.ip6_prohibit_entry);
2800 kfree(net->ipv6.ip6_blk_hole_entry);
2801#endif
Xiaotian Feng41bb78b2010-11-02 16:11:05 +00002802 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002803}
2804
2805static struct pernet_operations ip6_route_net_ops = {
2806 .init = ip6_route_net_init,
2807 .exit = ip6_route_net_exit,
2808};
2809
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002810static struct notifier_block ip6_route_dev_notifier = {
2811 .notifier_call = ip6_route_dev_notify,
2812 .priority = 0,
2813};
2814
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002815int __init ip6_route_init(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002816{
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002817 int ret;
2818
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -08002819 ret = -ENOMEM;
2820 ip6_dst_ops_template.kmem_cachep =
2821 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2822 SLAB_HWCACHE_ALIGN, NULL);
2823 if (!ip6_dst_ops_template.kmem_cachep)
Fernando Carrijoc19a28e2009-01-07 18:09:08 -08002824 goto out;
David S. Miller14e50e52007-05-24 18:17:54 -07002825
Eric Dumazetfc66f952010-10-08 06:37:34 +00002826 ret = dst_entries_init(&ip6_dst_blackhole_ops);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002827 if (ret)
Daniel Lezcanobdb32892008-03-04 13:48:10 -08002828 goto out_kmem_cache;
Daniel Lezcanobdb32892008-03-04 13:48:10 -08002829
Eric Dumazetfc66f952010-10-08 06:37:34 +00002830 ret = register_pernet_subsys(&ip6_route_net_ops);
2831 if (ret)
2832 goto out_dst_entries;
2833
Arnaud Ebalard5dc121e2008-10-01 02:37:56 -07002834 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2835
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002836 /* Registering of the loopback is done before this portion of code,
2837 * the loopback reference in rt6_info will not be taken, do it
2838 * manually for init_net */
Changli Gaod8d1f302010-06-10 23:31:35 -07002839 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002840 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2841 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
Changli Gaod8d1f302010-06-10 23:31:35 -07002842 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002843 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
Changli Gaod8d1f302010-06-10 23:31:35 -07002844 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002845 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2846 #endif
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002847 ret = fib6_init();
2848 if (ret)
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002849 goto out_register_subsys;
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002850
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002851 ret = xfrm6_init();
2852 if (ret)
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002853 goto out_fib6_init;
Daniel Lezcanoc35b7e72007-12-08 00:14:11 -08002854
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002855 ret = fib6_rules_init();
2856 if (ret)
2857 goto xfrm6_init;
Daniel Lezcano7e5449c2007-12-08 00:14:54 -08002858
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002859 ret = -ENOBUFS;
2860 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) ||
2861 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) ||
2862 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL))
2863 goto fib6_rules_init;
2864
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002865 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002866 if (ret)
2867 goto fib6_rules_init;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002868
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002869out:
2870 return ret;
2871
2872fib6_rules_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002873 fib6_rules_cleanup();
2874xfrm6_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002875 xfrm6_fini();
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002876out_fib6_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002877 fib6_gc_cleanup();
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002878out_register_subsys:
2879 unregister_pernet_subsys(&ip6_route_net_ops);
Eric Dumazetfc66f952010-10-08 06:37:34 +00002880out_dst_entries:
2881 dst_entries_destroy(&ip6_dst_blackhole_ops);
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002882out_kmem_cache:
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002883 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002884 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002885}
2886
2887void ip6_route_cleanup(void)
2888{
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002889 unregister_netdevice_notifier(&ip6_route_dev_notifier);
Thomas Graf101367c2006-08-04 03:39:02 -07002890 fib6_rules_cleanup();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002891 xfrm6_fini();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002892 fib6_gc_cleanup();
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002893 unregister_pernet_subsys(&ip6_route_net_ops);
Xiaotian Feng41bb78b2010-11-02 16:11:05 +00002894 dst_entries_destroy(&ip6_dst_blackhole_ops);
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002895 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002896}