blob: ad9eaecf539c4629ed101400d7a8f586b906c3e3 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09006 * Pedro Roque <roque@di.fc.ul.pt>
Linus Torvalds1da177e2005-04-16 15:20:36 -07007 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07008 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
YOSHIFUJI Hideakic0bece92006-08-23 17:23:25 -070023 * Ville Nuorvala
24 * Fixed routing subtrees.
Linus Torvalds1da177e2005-04-16 15:20:36 -070025 */
26
Joe Perchesf3213832012-05-15 14:11:53 +000027#define pr_fmt(fmt) "IPv6: " fmt
28
Randy Dunlap4fc268d2006-01-11 12:17:47 -080029#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070030#include <linux/errno.h>
Paul Gortmakerbc3b2d72011-07-15 11:47:34 -040031#include <linux/export.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070032#include <linux/types.h>
33#include <linux/times.h>
34#include <linux/socket.h>
35#include <linux/sockios.h>
36#include <linux/net.h>
37#include <linux/route.h>
38#include <linux/netdevice.h>
39#include <linux/in6.h>
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +090040#include <linux/mroute6.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070041#include <linux/init.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070042#include <linux/if_arp.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070043#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
Daniel Lezcano5b7c9312008-03-03 23:28:58 -080045#include <linux/nsproxy.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090046#include <linux/slab.h>
Wei Wang35732d02017-10-06 12:05:57 -070047#include <linux/jhash.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020048#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070049#include <net/snmp.h>
50#include <net/ipv6.h>
51#include <net/ip6_fib.h>
52#include <net/ip6_route.h>
53#include <net/ndisc.h>
54#include <net/addrconf.h>
55#include <net/tcp.h>
56#include <linux/rtnetlink.h>
57#include <net/dst.h>
Jiri Benc904af042015-08-20 13:56:31 +020058#include <net/dst_metadata.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070059#include <net/xfrm.h>
Tom Tucker8d717402006-07-30 20:43:36 -070060#include <net/netevent.h>
Thomas Graf21713eb2006-08-15 00:35:24 -070061#include <net/netlink.h>
Nicolas Dichtel51ebd312012-10-22 03:42:09 +000062#include <net/nexthop.h>
Roopa Prabhu19e42e42015-07-21 10:43:48 +020063#include <net/lwtunnel.h>
Jiri Benc904af042015-08-20 13:56:31 +020064#include <net/ip_tunnels.h>
David Ahernca254492015-10-12 11:47:10 -070065#include <net/l3mdev.h>
David Ahernb8115802015-11-19 12:24:22 -080066#include <trace/events/fib6.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070067
Linus Torvalds7c0f6ba2016-12-24 11:46:01 -080068#include <linux/uaccess.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070069
70#ifdef CONFIG_SYSCTL
71#include <linux/sysctl.h>
72#endif
73
Hannes Frederic Sowaafc154e2013-07-11 12:43:42 +020074enum rt6_nud_state {
Jiri Benc7e980562013-12-11 13:48:20 +010075 RT6_NUD_FAIL_HARD = -3,
76 RT6_NUD_FAIL_PROBE = -2,
77 RT6_NUD_FAIL_DO_RR = -1,
Hannes Frederic Sowaafc154e2013-07-11 12:43:42 +020078 RT6_NUD_SUCCEED = 1
79};
80
Linus Torvalds1da177e2005-04-16 15:20:36 -070081static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
David S. Miller0dbaee32010-12-13 12:52:14 -080082static unsigned int ip6_default_advmss(const struct dst_entry *dst);
Steffen Klassertebb762f2011-11-23 02:12:51 +000083static unsigned int ip6_mtu(const struct dst_entry *dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -070084static struct dst_entry *ip6_negative_advice(struct dst_entry *);
85static void ip6_dst_destroy(struct dst_entry *);
86static void ip6_dst_ifdown(struct dst_entry *,
87 struct net_device *dev, int how);
Daniel Lezcano569d3642008-01-18 03:56:57 -080088static int ip6_dst_gc(struct dst_ops *ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -070089
90static int ip6_pkt_discard(struct sk_buff *skb);
Eric W. Biedermanede20592015-10-07 16:48:47 -050091static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
Kamala R7150aed2013-12-02 19:55:21 +053092static int ip6_pkt_prohibit(struct sk_buff *skb);
Eric W. Biedermanede20592015-10-07 16:48:47 -050093static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -070094static void ip6_link_failure(struct sk_buff *skb);
David S. Miller6700c272012-07-17 03:29:28 -070095static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
96 struct sk_buff *skb, u32 mtu);
97static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
98 struct sk_buff *skb);
Nicolas Dichtel52bd4c02013-06-28 17:35:48 +020099static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
David Ahern16a16cd2017-02-02 12:37:11 -0800100static size_t rt6_nlmsg_size(struct rt6_info *rt);
David Ahernd4ead6b2018-04-17 17:33:16 -0700101static int rt6_fill_node(struct net *net, struct sk_buff *skb,
102 struct rt6_info *rt, struct dst_entry *dst,
103 struct in6_addr *dest, struct in6_addr *src,
David Ahern16a16cd2017-02-02 12:37:11 -0800104 int iif, int type, u32 portid, u32 seq,
105 unsigned int flags);
Wei Wang35732d02017-10-06 12:05:57 -0700106static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt,
107 struct in6_addr *daddr,
108 struct in6_addr *saddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700109
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800110#ifdef CONFIG_IPV6_ROUTE_INFO
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -0800111static struct rt6_info *rt6_add_route_info(struct net *net,
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000112 const struct in6_addr *prefix, int prefixlen,
David Ahern830218c2016-10-24 10:52:35 -0700113 const struct in6_addr *gwaddr,
114 struct net_device *dev,
Eric Dumazet95c96172012-04-15 05:58:06 +0000115 unsigned int pref);
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -0800116static struct rt6_info *rt6_get_route_info(struct net *net,
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000117 const struct in6_addr *prefix, int prefixlen,
David Ahern830218c2016-10-24 10:52:35 -0700118 const struct in6_addr *gwaddr,
119 struct net_device *dev);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800120#endif
121
Martin KaFai Lau8d0b94a2015-05-22 20:56:04 -0700122struct uncached_list {
123 spinlock_t lock;
124 struct list_head head;
125};
126
127static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
128
Xin Long510c3212018-02-14 19:06:02 +0800129void rt6_uncached_list_add(struct rt6_info *rt)
Martin KaFai Lau8d0b94a2015-05-22 20:56:04 -0700130{
131 struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
132
Martin KaFai Lau8d0b94a2015-05-22 20:56:04 -0700133 rt->rt6i_uncached_list = ul;
134
135 spin_lock_bh(&ul->lock);
136 list_add_tail(&rt->rt6i_uncached, &ul->head);
137 spin_unlock_bh(&ul->lock);
138}
139
Xin Long510c3212018-02-14 19:06:02 +0800140void rt6_uncached_list_del(struct rt6_info *rt)
Martin KaFai Lau8d0b94a2015-05-22 20:56:04 -0700141{
142 if (!list_empty(&rt->rt6i_uncached)) {
143 struct uncached_list *ul = rt->rt6i_uncached_list;
Wei Wang81eb8442017-10-06 12:06:11 -0700144 struct net *net = dev_net(rt->dst.dev);
Martin KaFai Lau8d0b94a2015-05-22 20:56:04 -0700145
146 spin_lock_bh(&ul->lock);
147 list_del(&rt->rt6i_uncached);
Wei Wang81eb8442017-10-06 12:06:11 -0700148 atomic_dec(&net->ipv6.rt6_stats->fib_rt_uncache);
Martin KaFai Lau8d0b94a2015-05-22 20:56:04 -0700149 spin_unlock_bh(&ul->lock);
150 }
151}
152
153static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
154{
155 struct net_device *loopback_dev = net->loopback_dev;
156 int cpu;
157
Eric W. Biedermane332bc62015-10-12 11:02:08 -0500158 if (dev == loopback_dev)
159 return;
160
Martin KaFai Lau8d0b94a2015-05-22 20:56:04 -0700161 for_each_possible_cpu(cpu) {
162 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
163 struct rt6_info *rt;
164
165 spin_lock_bh(&ul->lock);
166 list_for_each_entry(rt, &ul->head, rt6i_uncached) {
167 struct inet6_dev *rt_idev = rt->rt6i_idev;
168 struct net_device *rt_dev = rt->dst.dev;
169
Eric W. Biedermane332bc62015-10-12 11:02:08 -0500170 if (rt_idev->dev == dev) {
Martin KaFai Lau8d0b94a2015-05-22 20:56:04 -0700171 rt->rt6i_idev = in6_dev_get(loopback_dev);
172 in6_dev_put(rt_idev);
173 }
174
Eric W. Biedermane332bc62015-10-12 11:02:08 -0500175 if (rt_dev == dev) {
Martin KaFai Lau8d0b94a2015-05-22 20:56:04 -0700176 rt->dst.dev = loopback_dev;
177 dev_hold(rt->dst.dev);
178 dev_put(rt_dev);
179 }
180 }
181 spin_unlock_bh(&ul->lock);
182 }
183}
184
David Ahernf8a1b432018-04-17 17:33:21 -0700185static inline const void *choose_neigh_daddr(const struct in6_addr *p,
David S. Millerf894cbf2012-07-02 21:52:24 -0700186 struct sk_buff *skb,
187 const void *daddr)
David S. Miller39232972012-01-26 15:22:32 -0500188{
David S. Millera7563f32012-01-26 16:29:16 -0500189 if (!ipv6_addr_any(p))
David S. Miller39232972012-01-26 15:22:32 -0500190 return (const void *) p;
David S. Millerf894cbf2012-07-02 21:52:24 -0700191 else if (skb)
192 return &ipv6_hdr(skb)->daddr;
David S. Miller39232972012-01-26 15:22:32 -0500193 return daddr;
194}
195
David Ahernf8a1b432018-04-17 17:33:21 -0700196struct neighbour *ip6_neigh_lookup(const struct in6_addr *gw,
197 struct net_device *dev,
198 struct sk_buff *skb,
199 const void *daddr)
David S. Millerd3aaeb32011-07-18 00:40:17 -0700200{
David S. Miller39232972012-01-26 15:22:32 -0500201 struct neighbour *n;
202
David Ahernf8a1b432018-04-17 17:33:21 -0700203 daddr = choose_neigh_daddr(gw, skb, daddr);
204 n = __ipv6_neigh_lookup(dev, daddr);
David S. Millerf83c7792011-12-28 15:41:23 -0500205 if (n)
206 return n;
David Ahernf8a1b432018-04-17 17:33:21 -0700207 return neigh_create(&nd_tbl, daddr, dev);
208}
209
210static struct neighbour *ip6_dst_neigh_lookup(const struct dst_entry *dst,
211 struct sk_buff *skb,
212 const void *daddr)
213{
214 const struct rt6_info *rt = container_of(dst, struct rt6_info, dst);
215
216 return ip6_neigh_lookup(&rt->rt6i_gateway, dst->dev, skb, daddr);
David S. Millerf83c7792011-12-28 15:41:23 -0500217}
218
Julian Anastasov63fca652017-02-06 23:14:15 +0200219static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
220{
221 struct net_device *dev = dst->dev;
222 struct rt6_info *rt = (struct rt6_info *)dst;
223
David Ahernf8a1b432018-04-17 17:33:21 -0700224 daddr = choose_neigh_daddr(&rt->rt6i_gateway, NULL, daddr);
Julian Anastasov63fca652017-02-06 23:14:15 +0200225 if (!daddr)
226 return;
227 if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
228 return;
229 if (ipv6_addr_is_multicast((const struct in6_addr *)daddr))
230 return;
231 __ipv6_confirm_neigh(dev, daddr);
232}
233
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -0800234static struct dst_ops ip6_dst_ops_template = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700235 .family = AF_INET6,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700236 .gc = ip6_dst_gc,
237 .gc_thresh = 1024,
238 .check = ip6_dst_check,
David S. Miller0dbaee32010-12-13 12:52:14 -0800239 .default_advmss = ip6_default_advmss,
Steffen Klassertebb762f2011-11-23 02:12:51 +0000240 .mtu = ip6_mtu,
David Ahernd4ead6b2018-04-17 17:33:16 -0700241 .cow_metrics = dst_cow_metrics_generic,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700242 .destroy = ip6_dst_destroy,
243 .ifdown = ip6_dst_ifdown,
244 .negative_advice = ip6_negative_advice,
245 .link_failure = ip6_link_failure,
246 .update_pmtu = ip6_rt_update_pmtu,
David S. Miller6e157b62012-07-12 00:05:02 -0700247 .redirect = rt6_do_redirect,
Eric W. Biederman9f8955c2015-10-07 16:48:39 -0500248 .local_out = __ip6_local_out,
David Ahernf8a1b432018-04-17 17:33:21 -0700249 .neigh_lookup = ip6_dst_neigh_lookup,
Julian Anastasov63fca652017-02-06 23:14:15 +0200250 .confirm_neigh = ip6_confirm_neigh,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700251};
252
Steffen Klassertebb762f2011-11-23 02:12:51 +0000253static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
Roland Dreierec831ea2011-01-31 13:16:00 -0800254{
Steffen Klassert618f9bc2011-11-23 02:13:31 +0000255 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
256
257 return mtu ? : dst->dev->mtu;
Roland Dreierec831ea2011-01-31 13:16:00 -0800258}
259
David S. Miller6700c272012-07-17 03:29:28 -0700260static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
261 struct sk_buff *skb, u32 mtu)
David S. Miller14e50e52007-05-24 18:17:54 -0700262{
263}
264
David S. Miller6700c272012-07-17 03:29:28 -0700265static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
266 struct sk_buff *skb)
David S. Millerb587ee32012-07-12 00:39:24 -0700267{
268}
269
David S. Miller14e50e52007-05-24 18:17:54 -0700270static struct dst_ops ip6_dst_blackhole_ops = {
271 .family = AF_INET6,
David S. Miller14e50e52007-05-24 18:17:54 -0700272 .destroy = ip6_dst_destroy,
273 .check = ip6_dst_check,
Steffen Klassertebb762f2011-11-23 02:12:51 +0000274 .mtu = ip6_blackhole_mtu,
Eric Dumazet214f45c2011-02-18 11:39:01 -0800275 .default_advmss = ip6_default_advmss,
David S. Miller14e50e52007-05-24 18:17:54 -0700276 .update_pmtu = ip6_rt_blackhole_update_pmtu,
David S. Millerb587ee32012-07-12 00:39:24 -0700277 .redirect = ip6_rt_blackhole_redirect,
Martin KaFai Lau0a1f5962015-10-15 16:39:58 -0700278 .cow_metrics = dst_cow_metrics_generic,
David Ahernf8a1b432018-04-17 17:33:21 -0700279 .neigh_lookup = ip6_dst_neigh_lookup,
David S. Miller14e50e52007-05-24 18:17:54 -0700280};
281
David S. Miller62fa8a82011-01-26 20:51:05 -0800282static const u32 ip6_template_metrics[RTAX_MAX] = {
Li RongQing14edd872012-10-24 14:01:18 +0800283 [RTAX_HOPLIMIT - 1] = 0,
David S. Miller62fa8a82011-01-26 20:51:05 -0800284};
285
David Ahern421842e2018-04-17 17:33:18 -0700286static const struct rt6_info fib6_null_entry_template = {
287 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
288 .rt6i_protocol = RTPROT_KERNEL,
289 .rt6i_metric = ~(u32)0,
290 .rt6i_ref = ATOMIC_INIT(1),
291 .fib6_type = RTN_UNREACHABLE,
292 .fib6_metrics = (struct dst_metrics *)&dst_default_metrics,
293};
294
Eric Dumazetfb0af4c2012-09-11 21:47:51 +0000295static const struct rt6_info ip6_null_entry_template = {
Changli Gaod8d1f302010-06-10 23:31:35 -0700296 .dst = {
297 .__refcnt = ATOMIC_INIT(1),
298 .__use = 1,
Nicolas Dichtel2c20cbd2012-09-10 22:09:47 +0000299 .obsolete = DST_OBSOLETE_FORCE_CHK,
Changli Gaod8d1f302010-06-10 23:31:35 -0700300 .error = -ENETUNREACH,
Changli Gaod8d1f302010-06-10 23:31:35 -0700301 .input = ip6_pkt_discard,
302 .output = ip6_pkt_discard_out,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700303 },
304 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
Jean-Mickael Guerin4f724272009-05-20 17:38:59 -0700305 .rt6i_protocol = RTPROT_KERNEL,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700306 .rt6i_metric = ~(u32) 0,
307 .rt6i_ref = ATOMIC_INIT(1),
David Aherne8478e82018-04-17 17:33:13 -0700308 .fib6_type = RTN_UNREACHABLE,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700309};
310
Thomas Graf101367c2006-08-04 03:39:02 -0700311#ifdef CONFIG_IPV6_MULTIPLE_TABLES
312
Eric Dumazetfb0af4c2012-09-11 21:47:51 +0000313static const struct rt6_info ip6_prohibit_entry_template = {
Changli Gaod8d1f302010-06-10 23:31:35 -0700314 .dst = {
315 .__refcnt = ATOMIC_INIT(1),
316 .__use = 1,
Nicolas Dichtel2c20cbd2012-09-10 22:09:47 +0000317 .obsolete = DST_OBSOLETE_FORCE_CHK,
Changli Gaod8d1f302010-06-10 23:31:35 -0700318 .error = -EACCES,
Changli Gaod8d1f302010-06-10 23:31:35 -0700319 .input = ip6_pkt_prohibit,
320 .output = ip6_pkt_prohibit_out,
Thomas Graf101367c2006-08-04 03:39:02 -0700321 },
322 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
Jean-Mickael Guerin4f724272009-05-20 17:38:59 -0700323 .rt6i_protocol = RTPROT_KERNEL,
Thomas Graf101367c2006-08-04 03:39:02 -0700324 .rt6i_metric = ~(u32) 0,
325 .rt6i_ref = ATOMIC_INIT(1),
David Aherne8478e82018-04-17 17:33:13 -0700326 .fib6_type = RTN_PROHIBIT,
Thomas Graf101367c2006-08-04 03:39:02 -0700327};
328
Eric Dumazetfb0af4c2012-09-11 21:47:51 +0000329static const struct rt6_info ip6_blk_hole_entry_template = {
Changli Gaod8d1f302010-06-10 23:31:35 -0700330 .dst = {
331 .__refcnt = ATOMIC_INIT(1),
332 .__use = 1,
Nicolas Dichtel2c20cbd2012-09-10 22:09:47 +0000333 .obsolete = DST_OBSOLETE_FORCE_CHK,
Changli Gaod8d1f302010-06-10 23:31:35 -0700334 .error = -EINVAL,
Changli Gaod8d1f302010-06-10 23:31:35 -0700335 .input = dst_discard,
Eric W. Biedermanede20592015-10-07 16:48:47 -0500336 .output = dst_discard_out,
Thomas Graf101367c2006-08-04 03:39:02 -0700337 },
338 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
Jean-Mickael Guerin4f724272009-05-20 17:38:59 -0700339 .rt6i_protocol = RTPROT_KERNEL,
Thomas Graf101367c2006-08-04 03:39:02 -0700340 .rt6i_metric = ~(u32) 0,
341 .rt6i_ref = ATOMIC_INIT(1),
David Aherne8478e82018-04-17 17:33:13 -0700342 .fib6_type = RTN_BLACKHOLE,
Thomas Graf101367c2006-08-04 03:39:02 -0700343};
344
345#endif
346
Martin KaFai Lauebfa45f2015-10-15 16:39:57 -0700347static void rt6_info_init(struct rt6_info *rt)
348{
349 struct dst_entry *dst = &rt->dst;
350
351 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
352 INIT_LIST_HEAD(&rt->rt6i_siblings);
353 INIT_LIST_HEAD(&rt->rt6i_uncached);
David Ahernd4ead6b2018-04-17 17:33:16 -0700354 rt->fib6_metrics = (struct dst_metrics *)&dst_default_metrics;
Martin KaFai Lauebfa45f2015-10-15 16:39:57 -0700355}
356
Linus Torvalds1da177e2005-04-16 15:20:36 -0700357/* allocate dst with ip6_dst_ops */
Martin KaFai Laud52d3992015-05-22 20:56:06 -0700358static struct rt6_info *__ip6_dst_alloc(struct net *net,
359 struct net_device *dev,
Martin KaFai Lauad706862015-08-14 11:05:52 -0700360 int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700361{
David S. Miller97bab732012-06-09 22:36:36 -0700362 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
Wei Wangb2a9c0e2017-06-17 10:42:41 -0700363 1, DST_OBSOLETE_FORCE_CHK, flags);
David S. Millercf911662011-04-28 14:31:47 -0700364
Wei Wang81eb8442017-10-06 12:06:11 -0700365 if (rt) {
Martin KaFai Lauebfa45f2015-10-15 16:39:57 -0700366 rt6_info_init(rt);
Wei Wang81eb8442017-10-06 12:06:11 -0700367 atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
368 }
Steffen Klassert81048912012-07-05 23:37:09 +0000369
David S. Millercf911662011-04-28 14:31:47 -0700370 return rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700371}
372
David Ahern9ab179d2016-04-07 11:10:06 -0700373struct rt6_info *ip6_dst_alloc(struct net *net,
374 struct net_device *dev,
375 int flags)
Martin KaFai Laud52d3992015-05-22 20:56:06 -0700376{
Martin KaFai Lauad706862015-08-14 11:05:52 -0700377 struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags);
Martin KaFai Laud52d3992015-05-22 20:56:06 -0700378
379 if (rt) {
380 rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
Eric Dumazetbfd8e5a2017-10-09 06:01:37 -0700381 if (!rt->rt6i_pcpu) {
Wei Wang587fea72017-06-17 10:42:36 -0700382 dst_release_immediate(&rt->dst);
Martin KaFai Laud52d3992015-05-22 20:56:06 -0700383 return NULL;
384 }
385 }
386
387 return rt;
388}
David Ahern9ab179d2016-04-07 11:10:06 -0700389EXPORT_SYMBOL(ip6_dst_alloc);
Martin KaFai Laud52d3992015-05-22 20:56:06 -0700390
Linus Torvalds1da177e2005-04-16 15:20:36 -0700391static void ip6_dst_destroy(struct dst_entry *dst)
392{
393 struct rt6_info *rt = (struct rt6_info *)dst;
Wei Wang35732d02017-10-06 12:05:57 -0700394 struct rt6_exception_bucket *bucket;
David Miller3a2232e2017-11-28 15:40:40 -0500395 struct rt6_info *from = rt->from;
Martin KaFai Lau8d0b94a2015-05-22 20:56:04 -0700396 struct inet6_dev *idev;
David Ahernd4ead6b2018-04-17 17:33:16 -0700397 struct dst_metrics *m;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700398
Martin KaFai Lau4b32b5a2015-04-28 13:03:06 -0700399 dst_destroy_metrics_generic(dst);
Markus Elfring87775312015-07-02 16:30:24 +0200400 free_percpu(rt->rt6i_pcpu);
Martin KaFai Lau8d0b94a2015-05-22 20:56:04 -0700401 rt6_uncached_list_del(rt);
402
403 idev = rt->rt6i_idev;
David S. Miller38308472011-12-03 18:02:47 -0500404 if (idev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700405 rt->rt6i_idev = NULL;
406 in6_dev_put(idev);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900407 }
Wei Wang35732d02017-10-06 12:05:57 -0700408 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket, 1);
409 if (bucket) {
410 rt->rt6i_exception_bucket = NULL;
411 kfree(bucket);
412 }
Gao feng1716a962012-04-06 00:13:10 +0000413
David Ahernd4ead6b2018-04-17 17:33:16 -0700414 m = rt->fib6_metrics;
415 if (m != &dst_default_metrics && refcount_dec_and_test(&m->refcnt))
416 kfree(m);
417
David Miller3a2232e2017-11-28 15:40:40 -0500418 rt->from = NULL;
419 dst_release(&from->dst);
David S. Millerb3419362010-11-30 12:27:11 -0800420}
421
Linus Torvalds1da177e2005-04-16 15:20:36 -0700422static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
423 int how)
424{
425 struct rt6_info *rt = (struct rt6_info *)dst;
426 struct inet6_dev *idev = rt->rt6i_idev;
Denis V. Lunev5a3e55d2007-12-07 00:38:10 -0800427 struct net_device *loopback_dev =
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900428 dev_net(dev)->loopback_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700429
Wei Wange5645f52017-08-14 10:44:59 -0700430 if (idev && idev->dev != loopback_dev) {
431 struct inet6_dev *loopback_idev = in6_dev_get(loopback_dev);
432 if (loopback_idev) {
433 rt->rt6i_idev = loopback_idev;
434 in6_dev_put(idev);
David S. Miller97cac082012-07-02 22:43:47 -0700435 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700436 }
437}
438
Martin KaFai Lau5973fb12015-11-11 11:51:07 -0800439static bool __rt6_check_expired(const struct rt6_info *rt)
440{
441 if (rt->rt6i_flags & RTF_EXPIRES)
442 return time_after(jiffies, rt->dst.expires);
443 else
444 return false;
445}
446
Eric Dumazeta50feda2012-05-18 18:57:34 +0000447static bool rt6_check_expired(const struct rt6_info *rt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700448{
Gao feng1716a962012-04-06 00:13:10 +0000449 if (rt->rt6i_flags & RTF_EXPIRES) {
450 if (time_after(jiffies, rt->dst.expires))
Eric Dumazeta50feda2012-05-18 18:57:34 +0000451 return true;
David Miller3a2232e2017-11-28 15:40:40 -0500452 } else if (rt->from) {
Xin Long1e2ea8a2017-08-26 20:10:10 +0800453 return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK ||
David Ahern14895682018-04-17 17:33:17 -0700454 fib6_check_expired(rt->from);
Gao feng1716a962012-04-06 00:13:10 +0000455 }
Eric Dumazeta50feda2012-05-18 18:57:34 +0000456 return false;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700457}
458
David Ahernb4bac172018-03-02 08:32:18 -0800459static struct rt6_info *rt6_multipath_select(const struct net *net,
460 struct rt6_info *match,
Nicolas Dichtel52bd4c02013-06-28 17:35:48 +0200461 struct flowi6 *fl6, int oif,
David Ahernb75cc8f2018-03-02 08:32:17 -0800462 const struct sk_buff *skb,
Nicolas Dichtel52bd4c02013-06-28 17:35:48 +0200463 int strict)
Nicolas Dichtel51ebd312012-10-22 03:42:09 +0000464{
465 struct rt6_info *sibling, *next_sibling;
Nicolas Dichtel51ebd312012-10-22 03:42:09 +0000466
Jakub Sitnickib673d6c2017-08-23 09:58:31 +0200467 /* We might have already computed the hash for ICMPv6 errors. In such
468 * case it will always be non-zero. Otherwise now is the time to do it.
469 */
470 if (!fl6->mp_hash)
David Ahernb4bac172018-03-02 08:32:18 -0800471 fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL);
Jakub Sitnickib673d6c2017-08-23 09:58:31 +0200472
David Ahern5e670d82018-04-17 17:33:14 -0700473 if (fl6->mp_hash <= atomic_read(&match->fib6_nh.nh_upper_bound))
Ido Schimmel3d709f62018-01-09 16:40:27 +0200474 return match;
Ido Schimmelbbfcd772017-11-21 09:50:12 +0200475
Ido Schimmel3d709f62018-01-09 16:40:27 +0200476 list_for_each_entry_safe(sibling, next_sibling, &match->rt6i_siblings,
477 rt6i_siblings) {
David Ahern5e670d82018-04-17 17:33:14 -0700478 int nh_upper_bound;
479
480 nh_upper_bound = atomic_read(&sibling->fib6_nh.nh_upper_bound);
481 if (fl6->mp_hash > nh_upper_bound)
Ido Schimmel3d709f62018-01-09 16:40:27 +0200482 continue;
483 if (rt6_score_route(sibling, oif, strict) < 0)
484 break;
485 match = sibling;
486 break;
487 }
488
Nicolas Dichtel51ebd312012-10-22 03:42:09 +0000489 return match;
490}
491
Linus Torvalds1da177e2005-04-16 15:20:36 -0700492/*
Wei Wang66f5d6c2017-10-06 12:06:10 -0700493 * Route lookup. rcu_read_lock() should be held.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700494 */
495
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800496static inline struct rt6_info *rt6_device_match(struct net *net,
497 struct rt6_info *rt,
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000498 const struct in6_addr *saddr,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700499 int oif,
YOSHIFUJI Hideakid4208952008-06-27 20:14:54 -0700500 int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700501{
502 struct rt6_info *local = NULL;
503 struct rt6_info *sprt;
504
David Ahern5e670d82018-04-17 17:33:14 -0700505 if (!oif && ipv6_addr_any(saddr) &&
506 !(rt->fib6_nh.nh_flags & RTNH_F_DEAD))
Ido Schimmel8067bb82018-01-07 12:45:09 +0200507 return rt;
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900508
David Miller071fb372017-11-28 15:40:15 -0500509 for (sprt = rt; sprt; sprt = rcu_dereference(sprt->rt6_next)) {
David Ahern5e670d82018-04-17 17:33:14 -0700510 const struct net_device *dev = sprt->fib6_nh.nh_dev;
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900511
David Ahern5e670d82018-04-17 17:33:14 -0700512 if (sprt->fib6_nh.nh_flags & RTNH_F_DEAD)
Ido Schimmel8067bb82018-01-07 12:45:09 +0200513 continue;
514
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900515 if (oif) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700516 if (dev->ifindex == oif)
517 return sprt;
518 if (dev->flags & IFF_LOOPBACK) {
David S. Miller38308472011-12-03 18:02:47 -0500519 if (!sprt->rt6i_idev ||
Linus Torvalds1da177e2005-04-16 15:20:36 -0700520 sprt->rt6i_idev->dev->ifindex != oif) {
David Ahern17fb0b22015-09-25 15:22:54 -0600521 if (flags & RT6_LOOKUP_F_IFACE)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700522 continue;
David Ahern17fb0b22015-09-25 15:22:54 -0600523 if (local &&
524 local->rt6i_idev->dev->ifindex == oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700525 continue;
526 }
527 local = sprt;
528 }
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900529 } else {
530 if (ipv6_chk_addr(net, saddr, dev,
531 flags & RT6_LOOKUP_F_IFACE))
532 return sprt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700533 }
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900534 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700535
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900536 if (oif) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700537 if (local)
538 return local;
539
YOSHIFUJI Hideakid4208952008-06-27 20:14:54 -0700540 if (flags & RT6_LOOKUP_F_IFACE)
David Ahern421842e2018-04-17 17:33:18 -0700541 return net->ipv6.fib6_null_entry;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700542 }
Ido Schimmel8067bb82018-01-07 12:45:09 +0200543
David Ahern421842e2018-04-17 17:33:18 -0700544 return rt->fib6_nh.nh_flags & RTNH_F_DEAD ? net->ipv6.fib6_null_entry : rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700545}
546
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800547#ifdef CONFIG_IPV6_ROUTER_PREF
Hannes Frederic Sowac2f17e82013-10-21 06:17:15 +0200548struct __rt6_probe_work {
549 struct work_struct work;
550 struct in6_addr target;
551 struct net_device *dev;
552};
553
554static void rt6_probe_deferred(struct work_struct *w)
555{
556 struct in6_addr mcaddr;
557 struct __rt6_probe_work *work =
558 container_of(w, struct __rt6_probe_work, work);
559
560 addrconf_addr_solict_mult(&work->target, &mcaddr);
Erik Nordmarkadc176c2016-12-02 14:00:08 -0800561 ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0);
Hannes Frederic Sowac2f17e82013-10-21 06:17:15 +0200562 dev_put(work->dev);
Michael Büsch662f5532015-02-08 10:14:07 +0100563 kfree(work);
Hannes Frederic Sowac2f17e82013-10-21 06:17:15 +0200564}
565
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800566static void rt6_probe(struct rt6_info *rt)
567{
Martin KaFai Lau990edb42015-07-24 09:57:42 -0700568 struct __rt6_probe_work *work;
David Ahern5e670d82018-04-17 17:33:14 -0700569 const struct in6_addr *nh_gw;
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000570 struct neighbour *neigh;
David Ahern5e670d82018-04-17 17:33:14 -0700571 struct net_device *dev;
572
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800573 /*
574 * Okay, this does not seem to be appropriate
575 * for now, however, we need to check if it
576 * is really so; aka Router Reachability Probing.
577 *
578 * Router Reachability Probe MUST be rate-limited
579 * to no more than one per minute.
580 */
YOSHIFUJI Hideaki / 吉藤英明2152cae2013-01-17 12:53:43 +0000581 if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
Amerigo Wangfdd66812012-09-10 02:48:44 +0000582 return;
David Ahern5e670d82018-04-17 17:33:14 -0700583
584 nh_gw = &rt->fib6_nh.nh_gw;
585 dev = rt->fib6_nh.nh_dev;
YOSHIFUJI Hideaki / 吉藤英明2152cae2013-01-17 12:53:43 +0000586 rcu_read_lock_bh();
David Ahern5e670d82018-04-17 17:33:14 -0700587 neigh = __ipv6_neigh_lookup_noref(dev, nh_gw);
YOSHIFUJI Hideaki / 吉藤英明2152cae2013-01-17 12:53:43 +0000588 if (neigh) {
Martin KaFai Lau8d6c31b2015-07-24 09:57:43 -0700589 if (neigh->nud_state & NUD_VALID)
590 goto out;
591
Martin KaFai Lau990edb42015-07-24 09:57:42 -0700592 work = NULL;
YOSHIFUJI Hideaki / 吉藤英明2152cae2013-01-17 12:53:43 +0000593 write_lock(&neigh->lock);
Martin KaFai Lau990edb42015-07-24 09:57:42 -0700594 if (!(neigh->nud_state & NUD_VALID) &&
595 time_after(jiffies,
596 neigh->updated +
597 rt->rt6i_idev->cnf.rtr_probe_interval)) {
598 work = kmalloc(sizeof(*work), GFP_ATOMIC);
599 if (work)
600 __neigh_set_probe_once(neigh);
Hannes Frederic Sowac2f17e82013-10-21 06:17:15 +0200601 }
YOSHIFUJI Hideaki / 吉藤英明2152cae2013-01-17 12:53:43 +0000602 write_unlock(&neigh->lock);
Martin KaFai Lau990edb42015-07-24 09:57:42 -0700603 } else {
604 work = kmalloc(sizeof(*work), GFP_ATOMIC);
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000605 }
Martin KaFai Lau990edb42015-07-24 09:57:42 -0700606
607 if (work) {
608 INIT_WORK(&work->work, rt6_probe_deferred);
David Ahern5e670d82018-04-17 17:33:14 -0700609 work->target = *nh_gw;
610 dev_hold(dev);
611 work->dev = dev;
Martin KaFai Lau990edb42015-07-24 09:57:42 -0700612 schedule_work(&work->work);
613 }
614
Martin KaFai Lau8d6c31b2015-07-24 09:57:43 -0700615out:
YOSHIFUJI Hideaki / 吉藤英明2152cae2013-01-17 12:53:43 +0000616 rcu_read_unlock_bh();
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800617}
618#else
619static inline void rt6_probe(struct rt6_info *rt)
620{
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800621}
622#endif
623
Linus Torvalds1da177e2005-04-16 15:20:36 -0700624/*
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800625 * Default Router Selection (RFC 2461 6.3.6)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700626 */
Dave Jonesb6f99a22007-03-22 12:27:49 -0700627static inline int rt6_check_dev(struct rt6_info *rt, int oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700628{
David Ahern5e670d82018-04-17 17:33:14 -0700629 const struct net_device *dev = rt->fib6_nh.nh_dev;
630
David S. Miller161980f2007-04-06 11:42:27 -0700631 if (!oif || dev->ifindex == oif)
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800632 return 2;
David S. Miller161980f2007-04-06 11:42:27 -0700633 if ((dev->flags & IFF_LOOPBACK) &&
634 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
635 return 1;
636 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700637}
638
Hannes Frederic Sowaafc154e2013-07-11 12:43:42 +0200639static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700640{
Hannes Frederic Sowaafc154e2013-07-11 12:43:42 +0200641 enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
David Ahern5e670d82018-04-17 17:33:14 -0700642 struct neighbour *neigh;
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000643
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700644 if (rt->rt6i_flags & RTF_NONEXTHOP ||
645 !(rt->rt6i_flags & RTF_GATEWAY))
Hannes Frederic Sowaafc154e2013-07-11 12:43:42 +0200646 return RT6_NUD_SUCCEED;
YOSHIFUJI Hideaki / 吉藤英明145a3622013-01-17 12:53:38 +0000647
648 rcu_read_lock_bh();
David Ahern5e670d82018-04-17 17:33:14 -0700649 neigh = __ipv6_neigh_lookup_noref(rt->fib6_nh.nh_dev,
650 &rt->fib6_nh.nh_gw);
YOSHIFUJI Hideaki / 吉藤英明145a3622013-01-17 12:53:38 +0000651 if (neigh) {
652 read_lock(&neigh->lock);
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800653 if (neigh->nud_state & NUD_VALID)
Hannes Frederic Sowaafc154e2013-07-11 12:43:42 +0200654 ret = RT6_NUD_SUCCEED;
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800655#ifdef CONFIG_IPV6_ROUTER_PREF
Paul Marksa5a81f02012-12-03 10:26:54 +0000656 else if (!(neigh->nud_state & NUD_FAILED))
Hannes Frederic Sowaafc154e2013-07-11 12:43:42 +0200657 ret = RT6_NUD_SUCCEED;
Jiri Benc7e980562013-12-11 13:48:20 +0100658 else
659 ret = RT6_NUD_FAIL_PROBE;
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800660#endif
YOSHIFUJI Hideaki / 吉藤英明145a3622013-01-17 12:53:38 +0000661 read_unlock(&neigh->lock);
Hannes Frederic Sowaafc154e2013-07-11 12:43:42 +0200662 } else {
663 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
Jiri Benc7e980562013-12-11 13:48:20 +0100664 RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
Paul Marksa5a81f02012-12-03 10:26:54 +0000665 }
YOSHIFUJI Hideaki / 吉藤英明145a3622013-01-17 12:53:38 +0000666 rcu_read_unlock_bh();
667
Paul Marksa5a81f02012-12-03 10:26:54 +0000668 return ret;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800669}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700670
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800671static int rt6_score_route(struct rt6_info *rt, int oif,
672 int strict)
673{
Paul Marksa5a81f02012-12-03 10:26:54 +0000674 int m;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900675
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700676 m = rt6_check_dev(rt, oif);
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700677 if (!m && (strict & RT6_LOOKUP_F_IFACE))
Hannes Frederic Sowaafc154e2013-07-11 12:43:42 +0200678 return RT6_NUD_FAIL_HARD;
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -0800679#ifdef CONFIG_IPV6_ROUTER_PREF
680 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
681#endif
Hannes Frederic Sowaafc154e2013-07-11 12:43:42 +0200682 if (strict & RT6_LOOKUP_F_REACHABLE) {
683 int n = rt6_check_neigh(rt);
684 if (n < 0)
685 return n;
686 }
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800687 return m;
688}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700689
David S. Millerf11e6652007-03-24 20:36:25 -0700690static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
Hannes Frederic Sowaafc154e2013-07-11 12:43:42 +0200691 int *mpri, struct rt6_info *match,
692 bool *do_rr)
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800693{
David S. Millerf11e6652007-03-24 20:36:25 -0700694 int m;
Hannes Frederic Sowaafc154e2013-07-11 12:43:42 +0200695 bool match_do_rr = false;
Andy Gospodarek35103d12015-08-13 10:39:01 -0400696 struct inet6_dev *idev = rt->rt6i_idev;
Andy Gospodarek35103d12015-08-13 10:39:01 -0400697
David Ahern5e670d82018-04-17 17:33:14 -0700698 if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
Ido Schimmel8067bb82018-01-07 12:45:09 +0200699 goto out;
700
Ido Schimmel14c52062018-01-07 12:45:07 +0200701 if (idev->cnf.ignore_routes_with_linkdown &&
David Ahern5e670d82018-04-17 17:33:14 -0700702 rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN &&
David Ahernd5d32e42016-10-24 12:27:23 -0700703 !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
Andy Gospodarek35103d12015-08-13 10:39:01 -0400704 goto out;
David S. Millerf11e6652007-03-24 20:36:25 -0700705
David Ahern14895682018-04-17 17:33:17 -0700706 if (fib6_check_expired(rt))
David S. Millerf11e6652007-03-24 20:36:25 -0700707 goto out;
708
709 m = rt6_score_route(rt, oif, strict);
Jiri Benc7e980562013-12-11 13:48:20 +0100710 if (m == RT6_NUD_FAIL_DO_RR) {
Hannes Frederic Sowaafc154e2013-07-11 12:43:42 +0200711 match_do_rr = true;
712 m = 0; /* lowest valid score */
Jiri Benc7e980562013-12-11 13:48:20 +0100713 } else if (m == RT6_NUD_FAIL_HARD) {
David S. Millerf11e6652007-03-24 20:36:25 -0700714 goto out;
David S. Millerf11e6652007-03-24 20:36:25 -0700715 }
716
Hannes Frederic Sowaafc154e2013-07-11 12:43:42 +0200717 if (strict & RT6_LOOKUP_F_REACHABLE)
718 rt6_probe(rt);
719
Jiri Benc7e980562013-12-11 13:48:20 +0100720 /* note that m can be RT6_NUD_FAIL_PROBE at this point */
Hannes Frederic Sowaafc154e2013-07-11 12:43:42 +0200721 if (m > *mpri) {
722 *do_rr = match_do_rr;
723 *mpri = m;
724 match = rt;
725 }
David S. Millerf11e6652007-03-24 20:36:25 -0700726out:
727 return match;
728}
729
730static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
Wei Wang8d1040e2017-10-06 12:06:08 -0700731 struct rt6_info *leaf,
David S. Millerf11e6652007-03-24 20:36:25 -0700732 struct rt6_info *rr_head,
Hannes Frederic Sowaafc154e2013-07-11 12:43:42 +0200733 u32 metric, int oif, int strict,
734 bool *do_rr)
David S. Millerf11e6652007-03-24 20:36:25 -0700735{
Steffen Klassert9fbdcfa2015-04-28 13:03:04 -0700736 struct rt6_info *rt, *match, *cont;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800737 int mpri = -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700738
David S. Millerf11e6652007-03-24 20:36:25 -0700739 match = NULL;
Steffen Klassert9fbdcfa2015-04-28 13:03:04 -0700740 cont = NULL;
David Miller071fb372017-11-28 15:40:15 -0500741 for (rt = rr_head; rt; rt = rcu_dereference(rt->rt6_next)) {
Steffen Klassert9fbdcfa2015-04-28 13:03:04 -0700742 if (rt->rt6i_metric != metric) {
743 cont = rt;
744 break;
745 }
746
Hannes Frederic Sowaafc154e2013-07-11 12:43:42 +0200747 match = find_match(rt, oif, strict, &mpri, match, do_rr);
Steffen Klassert9fbdcfa2015-04-28 13:03:04 -0700748 }
749
Wei Wang66f5d6c2017-10-06 12:06:10 -0700750 for (rt = leaf; rt && rt != rr_head;
David Miller071fb372017-11-28 15:40:15 -0500751 rt = rcu_dereference(rt->rt6_next)) {
Steffen Klassert9fbdcfa2015-04-28 13:03:04 -0700752 if (rt->rt6i_metric != metric) {
753 cont = rt;
754 break;
755 }
756
757 match = find_match(rt, oif, strict, &mpri, match, do_rr);
758 }
759
760 if (match || !cont)
761 return match;
762
David Miller071fb372017-11-28 15:40:15 -0500763 for (rt = cont; rt; rt = rcu_dereference(rt->rt6_next))
Hannes Frederic Sowaafc154e2013-07-11 12:43:42 +0200764 match = find_match(rt, oif, strict, &mpri, match, do_rr);
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800765
David S. Millerf11e6652007-03-24 20:36:25 -0700766 return match;
767}
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800768
Wei Wang8d1040e2017-10-06 12:06:08 -0700769static struct rt6_info *rt6_select(struct net *net, struct fib6_node *fn,
770 int oif, int strict)
David S. Millerf11e6652007-03-24 20:36:25 -0700771{
Wei Wang66f5d6c2017-10-06 12:06:10 -0700772 struct rt6_info *leaf = rcu_dereference(fn->leaf);
David S. Millerf11e6652007-03-24 20:36:25 -0700773 struct rt6_info *match, *rt0;
Hannes Frederic Sowaafc154e2013-07-11 12:43:42 +0200774 bool do_rr = false;
Wei Wang17ecf592017-10-06 12:06:09 -0700775 int key_plen;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700776
David Ahern421842e2018-04-17 17:33:18 -0700777 if (!leaf || leaf == net->ipv6.fib6_null_entry)
778 return net->ipv6.fib6_null_entry;
Wei Wang8d1040e2017-10-06 12:06:08 -0700779
Wei Wang66f5d6c2017-10-06 12:06:10 -0700780 rt0 = rcu_dereference(fn->rr_ptr);
David S. Millerf11e6652007-03-24 20:36:25 -0700781 if (!rt0)
Wei Wang66f5d6c2017-10-06 12:06:10 -0700782 rt0 = leaf;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700783
Wei Wang17ecf592017-10-06 12:06:09 -0700784 /* Double check to make sure fn is not an intermediate node
785 * and fn->leaf does not points to its child's leaf
786 * (This might happen if all routes under fn are deleted from
787 * the tree and fib6_repair_tree() is called on the node.)
788 */
789 key_plen = rt0->rt6i_dst.plen;
790#ifdef CONFIG_IPV6_SUBTREES
791 if (rt0->rt6i_src.plen)
792 key_plen = rt0->rt6i_src.plen;
793#endif
794 if (fn->fn_bit != key_plen)
David Ahern421842e2018-04-17 17:33:18 -0700795 return net->ipv6.fib6_null_entry;
Wei Wang17ecf592017-10-06 12:06:09 -0700796
Wei Wang8d1040e2017-10-06 12:06:08 -0700797 match = find_rr_leaf(fn, leaf, rt0, rt0->rt6i_metric, oif, strict,
Hannes Frederic Sowaafc154e2013-07-11 12:43:42 +0200798 &do_rr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700799
Hannes Frederic Sowaafc154e2013-07-11 12:43:42 +0200800 if (do_rr) {
David Miller071fb372017-11-28 15:40:15 -0500801 struct rt6_info *next = rcu_dereference(rt0->rt6_next);
David S. Millerf11e6652007-03-24 20:36:25 -0700802
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800803 /* no entries matched; do round-robin */
David S. Millerf11e6652007-03-24 20:36:25 -0700804 if (!next || next->rt6i_metric != rt0->rt6i_metric)
Wei Wang8d1040e2017-10-06 12:06:08 -0700805 next = leaf;
David S. Millerf11e6652007-03-24 20:36:25 -0700806
Wei Wang66f5d6c2017-10-06 12:06:10 -0700807 if (next != rt0) {
808 spin_lock_bh(&leaf->rt6i_table->tb6_lock);
809 /* make sure next is not being deleted from the tree */
810 if (next->rt6i_node)
811 rcu_assign_pointer(fn->rr_ptr, next);
812 spin_unlock_bh(&leaf->rt6i_table->tb6_lock);
813 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700814 }
815
David Ahern421842e2018-04-17 17:33:18 -0700816 return match ? match : net->ipv6.fib6_null_entry;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700817}
818
Martin KaFai Lau8b9df262015-05-22 20:55:59 -0700819static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
820{
821 return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
822}
823
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800824#ifdef CONFIG_IPV6_ROUTE_INFO
825int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000826 const struct in6_addr *gwaddr)
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800827{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900828 struct net *net = dev_net(dev);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800829 struct route_info *rinfo = (struct route_info *) opt;
830 struct in6_addr prefix_buf, *prefix;
831 unsigned int pref;
YOSHIFUJI Hideaki4bed72e2008-05-27 17:37:49 +0900832 unsigned long lifetime;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800833 struct rt6_info *rt;
834
835 if (len < sizeof(struct route_info)) {
836 return -EINVAL;
837 }
838
839 /* Sanity check for prefix_len and length */
840 if (rinfo->length > 3) {
841 return -EINVAL;
842 } else if (rinfo->prefix_len > 128) {
843 return -EINVAL;
844 } else if (rinfo->prefix_len > 64) {
845 if (rinfo->length < 2) {
846 return -EINVAL;
847 }
848 } else if (rinfo->prefix_len > 0) {
849 if (rinfo->length < 1) {
850 return -EINVAL;
851 }
852 }
853
854 pref = rinfo->route_pref;
855 if (pref == ICMPV6_ROUTER_PREF_INVALID)
Jens Rosenboom3933fc92009-09-10 06:25:11 +0000856 return -EINVAL;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800857
YOSHIFUJI Hideaki4bed72e2008-05-27 17:37:49 +0900858 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800859
860 if (rinfo->length == 3)
861 prefix = (struct in6_addr *)rinfo->prefix;
862 else {
863 /* this function is safe */
864 ipv6_addr_prefix(&prefix_buf,
865 (struct in6_addr *)rinfo->prefix,
866 rinfo->prefix_len);
867 prefix = &prefix_buf;
868 }
869
Duan Jiongf104a562013-11-08 09:56:53 +0800870 if (rinfo->prefix_len == 0)
David Ahernafb1d4b52018-04-17 17:33:11 -0700871 rt = rt6_get_dflt_router(net, gwaddr, dev);
Duan Jiongf104a562013-11-08 09:56:53 +0800872 else
873 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
David Ahern830218c2016-10-24 10:52:35 -0700874 gwaddr, dev);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800875
876 if (rt && !lifetime) {
David Ahernafb1d4b52018-04-17 17:33:11 -0700877 ip6_del_rt(net, rt);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800878 rt = NULL;
879 }
880
881 if (!rt && lifetime)
David Ahern830218c2016-10-24 10:52:35 -0700882 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr,
883 dev, pref);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800884 else if (rt)
885 rt->rt6i_flags = RTF_ROUTEINFO |
886 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
887
888 if (rt) {
Gao feng1716a962012-04-06 00:13:10 +0000889 if (!addrconf_finite_timeout(lifetime))
David Ahern14895682018-04-17 17:33:17 -0700890 fib6_clean_expires(rt);
Gao feng1716a962012-04-06 00:13:10 +0000891 else
David Ahern14895682018-04-17 17:33:17 -0700892 fib6_set_expires(rt, jiffies + HZ * lifetime);
Gao feng1716a962012-04-06 00:13:10 +0000893
Amerigo Wang94e187c2012-10-29 00:13:19 +0000894 ip6_rt_put(rt);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800895 }
896 return 0;
897}
898#endif
899
David Ahernae90d862018-04-17 17:33:12 -0700900/*
901 * Misc support functions
902 */
903
904/* called with rcu_lock held */
905static struct net_device *ip6_rt_get_dev_rcu(struct rt6_info *rt)
906{
David Ahern5e670d82018-04-17 17:33:14 -0700907 struct net_device *dev = rt->fib6_nh.nh_dev;
David Ahernae90d862018-04-17 17:33:12 -0700908
909 if (rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST)) {
910 /* for copies of local routes, dst->dev needs to be the
911 * device if it is a master device, the master device if
912 * device is enslaved, and the loopback as the default
913 */
914 if (netif_is_l3_slave(dev) &&
915 !rt6_need_strict(&rt->rt6i_dst.addr))
916 dev = l3mdev_master_dev_rcu(dev);
917 else if (!netif_is_l3_master(dev))
918 dev = dev_net(dev)->loopback_dev;
919 /* last case is netif_is_l3_master(dev) is true in which
920 * case we want dev returned to be dev
921 */
922 }
923
924 return dev;
925}
926
David Ahern6edb3c92018-04-17 17:33:15 -0700927static const int fib6_prop[RTN_MAX + 1] = {
928 [RTN_UNSPEC] = 0,
929 [RTN_UNICAST] = 0,
930 [RTN_LOCAL] = 0,
931 [RTN_BROADCAST] = 0,
932 [RTN_ANYCAST] = 0,
933 [RTN_MULTICAST] = 0,
934 [RTN_BLACKHOLE] = -EINVAL,
935 [RTN_UNREACHABLE] = -EHOSTUNREACH,
936 [RTN_PROHIBIT] = -EACCES,
937 [RTN_THROW] = -EAGAIN,
938 [RTN_NAT] = -EINVAL,
939 [RTN_XRESOLVE] = -EINVAL,
940};
941
942static int ip6_rt_type_to_error(u8 fib6_type)
943{
944 return fib6_prop[fib6_type];
945}
946
David Ahern3b6761d2018-04-17 17:33:20 -0700947static unsigned short fib6_info_dst_flags(struct rt6_info *rt)
948{
949 unsigned short flags = 0;
950
951 if (rt->dst_nocount)
952 flags |= DST_NOCOUNT;
953 if (rt->dst_nopolicy)
954 flags |= DST_NOPOLICY;
955 if (rt->dst_host)
956 flags |= DST_HOST;
957
958 return flags;
959}
960
David Ahern6edb3c92018-04-17 17:33:15 -0700961static void ip6_rt_init_dst_reject(struct rt6_info *rt, struct rt6_info *ort)
962{
963 rt->dst.error = ip6_rt_type_to_error(ort->fib6_type);
964
965 switch (ort->fib6_type) {
966 case RTN_BLACKHOLE:
967 rt->dst.output = dst_discard_out;
968 rt->dst.input = dst_discard;
969 break;
970 case RTN_PROHIBIT:
971 rt->dst.output = ip6_pkt_prohibit_out;
972 rt->dst.input = ip6_pkt_prohibit;
973 break;
974 case RTN_THROW:
975 case RTN_UNREACHABLE:
976 default:
977 rt->dst.output = ip6_pkt_discard_out;
978 rt->dst.input = ip6_pkt_discard;
979 break;
980 }
981}
982
983static void ip6_rt_init_dst(struct rt6_info *rt, struct rt6_info *ort)
984{
David Ahern3b6761d2018-04-17 17:33:20 -0700985 rt->dst.flags |= fib6_info_dst_flags(ort);
986
David Ahern6edb3c92018-04-17 17:33:15 -0700987 if (ort->rt6i_flags & RTF_REJECT) {
988 ip6_rt_init_dst_reject(rt, ort);
989 return;
990 }
991
992 rt->dst.error = 0;
993 rt->dst.output = ip6_output;
994
995 if (ort->fib6_type == RTN_LOCAL) {
David Ahern6edb3c92018-04-17 17:33:15 -0700996 rt->dst.input = ip6_input;
997 } else if (ipv6_addr_type(&ort->rt6i_dst.addr) & IPV6_ADDR_MULTICAST) {
998 rt->dst.input = ip6_mc_input;
999 } else {
1000 rt->dst.input = ip6_forward;
1001 }
1002
1003 if (ort->fib6_nh.nh_lwtstate) {
1004 rt->dst.lwtstate = lwtstate_get(ort->fib6_nh.nh_lwtstate);
1005 lwtunnel_set_redirect(&rt->dst);
1006 }
1007
1008 rt->dst.lastuse = jiffies;
1009}
1010
David Ahernae90d862018-04-17 17:33:12 -07001011static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
1012{
1013 BUG_ON(from->from);
1014
1015 rt->rt6i_flags &= ~RTF_EXPIRES;
David Ahern23fb93a2018-04-17 17:33:23 -07001016 if (dst_hold_safe(&from->dst))
1017 rt->from = from;
David Ahernd4ead6b2018-04-17 17:33:16 -07001018 dst_init_metrics(&rt->dst, from->fib6_metrics->metrics, true);
1019 if (from->fib6_metrics != &dst_default_metrics) {
1020 rt->dst._metrics |= DST_METRICS_REFCOUNTED;
1021 refcount_inc(&from->fib6_metrics->refcnt);
1022 }
David Ahernae90d862018-04-17 17:33:12 -07001023}
1024
1025static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
1026{
David Ahern6edb3c92018-04-17 17:33:15 -07001027 ip6_rt_init_dst(rt, ort);
1028
David Ahernae90d862018-04-17 17:33:12 -07001029 rt->rt6i_dst = ort->rt6i_dst;
David Ahernae90d862018-04-17 17:33:12 -07001030 rt->rt6i_idev = ort->rt6i_idev;
1031 if (rt->rt6i_idev)
1032 in6_dev_hold(rt->rt6i_idev);
David Ahern5e670d82018-04-17 17:33:14 -07001033 rt->rt6i_gateway = ort->fib6_nh.nh_gw;
David Ahernae90d862018-04-17 17:33:12 -07001034 rt->rt6i_flags = ort->rt6i_flags;
1035 rt6_set_from(rt, ort);
1036 rt->rt6i_metric = ort->rt6i_metric;
1037#ifdef CONFIG_IPV6_SUBTREES
1038 rt->rt6i_src = ort->rt6i_src;
1039#endif
1040 rt->rt6i_prefsrc = ort->rt6i_prefsrc;
1041 rt->rt6i_table = ort->rt6i_table;
David Ahern5e670d82018-04-17 17:33:14 -07001042 rt->dst.lwtstate = lwtstate_get(ort->fib6_nh.nh_lwtstate);
David Ahernae90d862018-04-17 17:33:12 -07001043}
1044
Martin KaFai Laua3c00e42014-10-20 13:42:43 -07001045static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
1046 struct in6_addr *saddr)
1047{
Wei Wang66f5d6c2017-10-06 12:06:10 -07001048 struct fib6_node *pn, *sn;
Martin KaFai Laua3c00e42014-10-20 13:42:43 -07001049 while (1) {
1050 if (fn->fn_flags & RTN_TL_ROOT)
1051 return NULL;
Wei Wang66f5d6c2017-10-06 12:06:10 -07001052 pn = rcu_dereference(fn->parent);
1053 sn = FIB6_SUBTREE(pn);
1054 if (sn && sn != fn)
1055 fn = fib6_lookup(sn, NULL, saddr);
Martin KaFai Laua3c00e42014-10-20 13:42:43 -07001056 else
1057 fn = pn;
1058 if (fn->fn_flags & RTN_RTINFO)
1059 return fn;
1060 }
1061}
Thomas Grafc71099a2006-08-04 23:20:06 -07001062
Wei Wangd3843fe2017-10-06 12:06:06 -07001063static bool ip6_hold_safe(struct net *net, struct rt6_info **prt,
1064 bool null_fallback)
1065{
1066 struct rt6_info *rt = *prt;
1067
1068 if (dst_hold_safe(&rt->dst))
1069 return true;
1070 if (null_fallback) {
1071 rt = net->ipv6.ip6_null_entry;
1072 dst_hold(&rt->dst);
1073 } else {
1074 rt = NULL;
1075 }
1076 *prt = rt;
1077 return false;
1078}
1079
David Aherndec9b0e2018-04-17 17:33:19 -07001080/* called with rcu_lock held */
1081static struct rt6_info *ip6_create_rt_rcu(struct rt6_info *rt)
1082{
David Ahern3b6761d2018-04-17 17:33:20 -07001083 unsigned short flags = fib6_info_dst_flags(rt);
David Aherndec9b0e2018-04-17 17:33:19 -07001084 struct net_device *dev = rt->fib6_nh.nh_dev;
1085 struct rt6_info *nrt;
1086
David Ahern3b6761d2018-04-17 17:33:20 -07001087 nrt = __ip6_dst_alloc(dev_net(dev), dev, flags);
David Aherndec9b0e2018-04-17 17:33:19 -07001088 if (nrt)
1089 ip6_rt_copy_init(nrt, rt);
1090
1091 return nrt;
1092}
1093
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001094static struct rt6_info *ip6_pol_route_lookup(struct net *net,
1095 struct fib6_table *table,
David Ahernb75cc8f2018-03-02 08:32:17 -08001096 struct flowi6 *fl6,
1097 const struct sk_buff *skb,
1098 int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001099{
David Ahern23fb93a2018-04-17 17:33:23 -07001100 struct rt6_info *f6i;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001101 struct fib6_node *fn;
David Ahern23fb93a2018-04-17 17:33:23 -07001102 struct rt6_info *rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001103
David Ahernb6cdbc82018-03-29 17:44:57 -07001104 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1105 flags &= ~RT6_LOOKUP_F_IFACE;
1106
Wei Wang66f5d6c2017-10-06 12:06:10 -07001107 rcu_read_lock();
David S. Miller4c9483b2011-03-12 16:22:43 -05001108 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
Thomas Grafc71099a2006-08-04 23:20:06 -07001109restart:
David Ahern23fb93a2018-04-17 17:33:23 -07001110 f6i = rcu_dereference(fn->leaf);
1111 if (!f6i) {
1112 f6i = net->ipv6.fib6_null_entry;
Wei Wang66f5d6c2017-10-06 12:06:10 -07001113 } else {
David Ahern23fb93a2018-04-17 17:33:23 -07001114 f6i = rt6_device_match(net, f6i, &fl6->saddr,
Wei Wang66f5d6c2017-10-06 12:06:10 -07001115 fl6->flowi6_oif, flags);
David Ahern23fb93a2018-04-17 17:33:23 -07001116 if (f6i->rt6i_nsiblings && fl6->flowi6_oif == 0)
1117 f6i = rt6_multipath_select(net, f6i, fl6,
1118 fl6->flowi6_oif, skb, flags);
Wei Wang66f5d6c2017-10-06 12:06:10 -07001119 }
David Ahern23fb93a2018-04-17 17:33:23 -07001120 if (f6i == net->ipv6.fib6_null_entry) {
Martin KaFai Laua3c00e42014-10-20 13:42:43 -07001121 fn = fib6_backtrack(fn, &fl6->saddr);
1122 if (fn)
1123 goto restart;
1124 }
David Ahern23fb93a2018-04-17 17:33:23 -07001125
Wei Wang2b760fc2017-10-06 12:06:03 -07001126 /* Search through exception table */
David Ahern23fb93a2018-04-17 17:33:23 -07001127 rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr);
1128 if (rt) {
David Aherndec9b0e2018-04-17 17:33:19 -07001129 if (ip6_hold_safe(net, &rt, true))
1130 dst_use_noref(&rt->dst, jiffies);
David Ahern23fb93a2018-04-17 17:33:23 -07001131 } else if (f6i == net->ipv6.fib6_null_entry) {
David Aherndec9b0e2018-04-17 17:33:19 -07001132 rt = net->ipv6.ip6_null_entry;
1133 dst_hold(&rt->dst);
David Ahern23fb93a2018-04-17 17:33:23 -07001134 } else {
1135 rt = ip6_create_rt_rcu(f6i);
1136 if (!rt) {
1137 rt = net->ipv6.ip6_null_entry;
1138 dst_hold(&rt->dst);
1139 }
David Aherndec9b0e2018-04-17 17:33:19 -07001140 }
Wei Wangd3843fe2017-10-06 12:06:06 -07001141
Wei Wang66f5d6c2017-10-06 12:06:10 -07001142 rcu_read_unlock();
David Ahernb8115802015-11-19 12:24:22 -08001143
Paolo Abenib65f1642017-10-19 09:31:43 +02001144 trace_fib6_table_lookup(net, rt, table, fl6);
David Ahernb8115802015-11-19 12:24:22 -08001145
Thomas Grafc71099a2006-08-04 23:20:06 -07001146 return rt;
Thomas Grafc71099a2006-08-04 23:20:06 -07001147}
1148
Ian Morris67ba4152014-08-24 21:53:10 +01001149struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
David Ahernb75cc8f2018-03-02 08:32:17 -08001150 const struct sk_buff *skb, int flags)
Florian Westphalea6e5742011-09-05 16:05:44 +02001151{
David Ahernb75cc8f2018-03-02 08:32:17 -08001152 return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_lookup);
Florian Westphalea6e5742011-09-05 16:05:44 +02001153}
1154EXPORT_SYMBOL_GPL(ip6_route_lookup);
1155
YOSHIFUJI Hideaki9acd9f32008-04-10 15:42:10 +09001156struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
David Ahernb75cc8f2018-03-02 08:32:17 -08001157 const struct in6_addr *saddr, int oif,
1158 const struct sk_buff *skb, int strict)
Thomas Grafc71099a2006-08-04 23:20:06 -07001159{
David S. Miller4c9483b2011-03-12 16:22:43 -05001160 struct flowi6 fl6 = {
1161 .flowi6_oif = oif,
1162 .daddr = *daddr,
Thomas Grafc71099a2006-08-04 23:20:06 -07001163 };
1164 struct dst_entry *dst;
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -07001165 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
Thomas Grafc71099a2006-08-04 23:20:06 -07001166
Thomas Grafadaa70b2006-10-13 15:01:03 -07001167 if (saddr) {
David S. Miller4c9483b2011-03-12 16:22:43 -05001168 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
Thomas Grafadaa70b2006-10-13 15:01:03 -07001169 flags |= RT6_LOOKUP_F_HAS_SADDR;
1170 }
1171
David Ahernb75cc8f2018-03-02 08:32:17 -08001172 dst = fib6_rule_lookup(net, &fl6, skb, flags, ip6_pol_route_lookup);
Thomas Grafc71099a2006-08-04 23:20:06 -07001173 if (dst->error == 0)
1174 return (struct rt6_info *) dst;
1175
1176 dst_release(dst);
1177
Linus Torvalds1da177e2005-04-16 15:20:36 -07001178 return NULL;
1179}
YOSHIFUJI Hideaki71590392007-02-22 22:05:40 +09001180EXPORT_SYMBOL(rt6_lookup);
1181
Thomas Grafc71099a2006-08-04 23:20:06 -07001182/* ip6_ins_rt is called with FREE table->tb6_lock.
Wei Wang1cfb71e2017-06-17 10:42:33 -07001183 * It takes new route entry, the addition fails by any reason the
1184 * route is released.
1185 * Caller must hold dst before calling it.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001186 */
1187
Michal Kubečeke5fd3872014-03-27 13:04:08 +01001188static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
David Ahern333c4302017-05-21 10:12:04 -06001189 struct netlink_ext_ack *extack)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001190{
1191 int err;
Thomas Grafc71099a2006-08-04 23:20:06 -07001192 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001193
Thomas Grafc71099a2006-08-04 23:20:06 -07001194 table = rt->rt6i_table;
Wei Wang66f5d6c2017-10-06 12:06:10 -07001195 spin_lock_bh(&table->tb6_lock);
David Ahernd4ead6b2018-04-17 17:33:16 -07001196 err = fib6_add(&table->tb6_root, rt, info, extack);
Wei Wang66f5d6c2017-10-06 12:06:10 -07001197 spin_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001198
1199 return err;
1200}
1201
David Ahernafb1d4b52018-04-17 17:33:11 -07001202int ip6_ins_rt(struct net *net, struct rt6_info *rt)
Thomas Graf40e22e82006-08-22 00:00:45 -07001203{
David Ahernafb1d4b52018-04-17 17:33:11 -07001204 struct nl_info info = { .nl_net = net, };
Florian Westphale715b6d2015-01-05 23:57:44 +01001205
Wei Wang1cfb71e2017-06-17 10:42:33 -07001206 /* Hold dst to account for the reference from the fib6 tree */
1207 dst_hold(&rt->dst);
David Ahernd4ead6b2018-04-17 17:33:16 -07001208 return __ip6_ins_rt(rt, &info, NULL);
Thomas Graf40e22e82006-08-22 00:00:45 -07001209}
1210
Martin KaFai Lau8b9df262015-05-22 20:55:59 -07001211static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
1212 const struct in6_addr *daddr,
1213 const struct in6_addr *saddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001214{
David Ahern4832c302017-08-17 12:17:20 -07001215 struct net_device *dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001216 struct rt6_info *rt;
1217
1218 /*
1219 * Clone the route.
1220 */
1221
David Ahern4832c302017-08-17 12:17:20 -07001222 rcu_read_lock();
1223 dev = ip6_rt_get_dev_rcu(ort);
1224 rt = __ip6_dst_alloc(dev_net(dev), dev, 0);
1225 rcu_read_unlock();
Martin KaFai Lau83a09ab2015-05-22 20:56:05 -07001226 if (!rt)
1227 return NULL;
1228
1229 ip6_rt_copy_init(rt, ort);
1230 rt->rt6i_flags |= RTF_CACHE;
1231 rt->rt6i_metric = 0;
1232 rt->dst.flags |= DST_HOST;
1233 rt->rt6i_dst.addr = *daddr;
1234 rt->rt6i_dst.plen = 128;
1235
1236 if (!rt6_is_gw_or_nonexthop(ort)) {
1237 if (ort->rt6i_dst.plen != 128 &&
1238 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
1239 rt->rt6i_flags |= RTF_ANYCAST;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001240#ifdef CONFIG_IPV6_SUBTREES
Martin KaFai Lau83a09ab2015-05-22 20:56:05 -07001241 if (rt->rt6i_src.plen && saddr) {
1242 rt->rt6i_src.addr = *saddr;
1243 rt->rt6i_src.plen = 128;
Martin KaFai Lau8b9df262015-05-22 20:55:59 -07001244 }
Martin KaFai Lau83a09ab2015-05-22 20:56:05 -07001245#endif
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -08001246 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001247
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -08001248 return rt;
1249}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001250
Martin KaFai Laud52d3992015-05-22 20:56:06 -07001251static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
1252{
David Ahern3b6761d2018-04-17 17:33:20 -07001253 unsigned short flags = fib6_info_dst_flags(rt);
David Ahern4832c302017-08-17 12:17:20 -07001254 struct net_device *dev;
Martin KaFai Laud52d3992015-05-22 20:56:06 -07001255 struct rt6_info *pcpu_rt;
1256
David Ahern4832c302017-08-17 12:17:20 -07001257 rcu_read_lock();
1258 dev = ip6_rt_get_dev_rcu(rt);
David Ahern3b6761d2018-04-17 17:33:20 -07001259 pcpu_rt = __ip6_dst_alloc(dev_net(dev), dev, flags);
David Ahern4832c302017-08-17 12:17:20 -07001260 rcu_read_unlock();
Martin KaFai Laud52d3992015-05-22 20:56:06 -07001261 if (!pcpu_rt)
1262 return NULL;
1263 ip6_rt_copy_init(pcpu_rt, rt);
1264 pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
1265 pcpu_rt->rt6i_flags |= RTF_PCPU;
1266 return pcpu_rt;
1267}
1268
Wei Wang66f5d6c2017-10-06 12:06:10 -07001269/* It should be called with rcu_read_lock() acquired */
Martin KaFai Laud52d3992015-05-22 20:56:06 -07001270static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
1271{
Martin KaFai Laua73e4192015-08-14 11:05:53 -07001272 struct rt6_info *pcpu_rt, **p;
Martin KaFai Laud52d3992015-05-22 20:56:06 -07001273
1274 p = this_cpu_ptr(rt->rt6i_pcpu);
1275 pcpu_rt = *p;
1276
David Ahernd4ead6b2018-04-17 17:33:16 -07001277 if (pcpu_rt)
1278 ip6_hold_safe(NULL, &pcpu_rt, false);
Wei Wangd3843fe2017-10-06 12:06:06 -07001279
Martin KaFai Laua73e4192015-08-14 11:05:53 -07001280 return pcpu_rt;
1281}
1282
David Ahernafb1d4b52018-04-17 17:33:11 -07001283static struct rt6_info *rt6_make_pcpu_route(struct net *net,
1284 struct rt6_info *rt)
Martin KaFai Laua73e4192015-08-14 11:05:53 -07001285{
1286 struct rt6_info *pcpu_rt, *prev, **p;
Martin KaFai Laud52d3992015-05-22 20:56:06 -07001287
1288 pcpu_rt = ip6_rt_pcpu_alloc(rt);
1289 if (!pcpu_rt) {
Martin KaFai Lau9c7370a2015-08-14 11:05:54 -07001290 dst_hold(&net->ipv6.ip6_null_entry->dst);
1291 return net->ipv6.ip6_null_entry;
Martin KaFai Laud52d3992015-05-22 20:56:06 -07001292 }
1293
Martin KaFai Laud52d3992015-05-22 20:56:06 -07001294 dst_hold(&pcpu_rt->dst);
Wei Wanga94b9362017-10-06 12:06:04 -07001295 p = this_cpu_ptr(rt->rt6i_pcpu);
1296 prev = cmpxchg(p, NULL, pcpu_rt);
Eric Dumazet951f7882017-10-08 21:07:18 -07001297 BUG_ON(prev);
Wei Wanga94b9362017-10-06 12:06:04 -07001298
Martin KaFai Laud52d3992015-05-22 20:56:06 -07001299 return pcpu_rt;
1300}
1301
Wei Wang35732d02017-10-06 12:05:57 -07001302/* exception hash table implementation
1303 */
1304static DEFINE_SPINLOCK(rt6_exception_lock);
1305
1306/* Remove rt6_ex from hash table and free the memory
1307 * Caller must hold rt6_exception_lock
1308 */
1309static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
1310 struct rt6_exception *rt6_ex)
1311{
Colin Ian Kingb2427e62017-10-10 18:01:16 +01001312 struct net *net;
Wei Wang81eb8442017-10-06 12:06:11 -07001313
Wei Wang35732d02017-10-06 12:05:57 -07001314 if (!bucket || !rt6_ex)
1315 return;
Colin Ian Kingb2427e62017-10-10 18:01:16 +01001316
1317 net = dev_net(rt6_ex->rt6i->dst.dev);
Wei Wang35732d02017-10-06 12:05:57 -07001318 rt6_ex->rt6i->rt6i_node = NULL;
1319 hlist_del_rcu(&rt6_ex->hlist);
1320 rt6_release(rt6_ex->rt6i);
1321 kfree_rcu(rt6_ex, rcu);
1322 WARN_ON_ONCE(!bucket->depth);
1323 bucket->depth--;
Wei Wang81eb8442017-10-06 12:06:11 -07001324 net->ipv6.rt6_stats->fib_rt_cache--;
Wei Wang35732d02017-10-06 12:05:57 -07001325}
1326
1327/* Remove oldest rt6_ex in bucket and free the memory
1328 * Caller must hold rt6_exception_lock
1329 */
1330static void rt6_exception_remove_oldest(struct rt6_exception_bucket *bucket)
1331{
1332 struct rt6_exception *rt6_ex, *oldest = NULL;
1333
1334 if (!bucket)
1335 return;
1336
1337 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1338 if (!oldest || time_before(rt6_ex->stamp, oldest->stamp))
1339 oldest = rt6_ex;
1340 }
1341 rt6_remove_exception(bucket, oldest);
1342}
1343
1344static u32 rt6_exception_hash(const struct in6_addr *dst,
1345 const struct in6_addr *src)
1346{
1347 static u32 seed __read_mostly;
1348 u32 val;
1349
1350 net_get_random_once(&seed, sizeof(seed));
1351 val = jhash(dst, sizeof(*dst), seed);
1352
1353#ifdef CONFIG_IPV6_SUBTREES
1354 if (src)
1355 val = jhash(src, sizeof(*src), val);
1356#endif
1357 return hash_32(val, FIB6_EXCEPTION_BUCKET_SIZE_SHIFT);
1358}
1359
1360/* Helper function to find the cached rt in the hash table
1361 * and update bucket pointer to point to the bucket for this
1362 * (daddr, saddr) pair
1363 * Caller must hold rt6_exception_lock
1364 */
1365static struct rt6_exception *
1366__rt6_find_exception_spinlock(struct rt6_exception_bucket **bucket,
1367 const struct in6_addr *daddr,
1368 const struct in6_addr *saddr)
1369{
1370 struct rt6_exception *rt6_ex;
1371 u32 hval;
1372
1373 if (!(*bucket) || !daddr)
1374 return NULL;
1375
1376 hval = rt6_exception_hash(daddr, saddr);
1377 *bucket += hval;
1378
1379 hlist_for_each_entry(rt6_ex, &(*bucket)->chain, hlist) {
1380 struct rt6_info *rt6 = rt6_ex->rt6i;
1381 bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1382
1383#ifdef CONFIG_IPV6_SUBTREES
1384 if (matched && saddr)
1385 matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1386#endif
1387 if (matched)
1388 return rt6_ex;
1389 }
1390 return NULL;
1391}
1392
1393/* Helper function to find the cached rt in the hash table
1394 * and update bucket pointer to point to the bucket for this
1395 * (daddr, saddr) pair
1396 * Caller must hold rcu_read_lock()
1397 */
1398static struct rt6_exception *
1399__rt6_find_exception_rcu(struct rt6_exception_bucket **bucket,
1400 const struct in6_addr *daddr,
1401 const struct in6_addr *saddr)
1402{
1403 struct rt6_exception *rt6_ex;
1404 u32 hval;
1405
1406 WARN_ON_ONCE(!rcu_read_lock_held());
1407
1408 if (!(*bucket) || !daddr)
1409 return NULL;
1410
1411 hval = rt6_exception_hash(daddr, saddr);
1412 *bucket += hval;
1413
1414 hlist_for_each_entry_rcu(rt6_ex, &(*bucket)->chain, hlist) {
1415 struct rt6_info *rt6 = rt6_ex->rt6i;
1416 bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1417
1418#ifdef CONFIG_IPV6_SUBTREES
1419 if (matched && saddr)
1420 matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1421#endif
1422 if (matched)
1423 return rt6_ex;
1424 }
1425 return NULL;
1426}
1427
David Ahernd4ead6b2018-04-17 17:33:16 -07001428static unsigned int fib6_mtu(const struct rt6_info *rt)
1429{
1430 unsigned int mtu;
1431
1432 mtu = rt->fib6_pmtu ? : rt->rt6i_idev->cnf.mtu6;
1433 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
1434
1435 return mtu - lwtunnel_headroom(rt->fib6_nh.nh_lwtstate, mtu);
1436}
1437
Wei Wang35732d02017-10-06 12:05:57 -07001438static int rt6_insert_exception(struct rt6_info *nrt,
1439 struct rt6_info *ort)
1440{
David Ahern5e670d82018-04-17 17:33:14 -07001441 struct net *net = dev_net(nrt->dst.dev);
Wei Wang35732d02017-10-06 12:05:57 -07001442 struct rt6_exception_bucket *bucket;
1443 struct in6_addr *src_key = NULL;
1444 struct rt6_exception *rt6_ex;
1445 int err = 0;
1446
Wei Wang35732d02017-10-06 12:05:57 -07001447 spin_lock_bh(&rt6_exception_lock);
1448
1449 if (ort->exception_bucket_flushed) {
1450 err = -EINVAL;
1451 goto out;
1452 }
1453
1454 bucket = rcu_dereference_protected(ort->rt6i_exception_bucket,
1455 lockdep_is_held(&rt6_exception_lock));
1456 if (!bucket) {
1457 bucket = kcalloc(FIB6_EXCEPTION_BUCKET_SIZE, sizeof(*bucket),
1458 GFP_ATOMIC);
1459 if (!bucket) {
1460 err = -ENOMEM;
1461 goto out;
1462 }
1463 rcu_assign_pointer(ort->rt6i_exception_bucket, bucket);
1464 }
1465
1466#ifdef CONFIG_IPV6_SUBTREES
1467 /* rt6i_src.plen != 0 indicates ort is in subtree
1468 * and exception table is indexed by a hash of
1469 * both rt6i_dst and rt6i_src.
1470 * Otherwise, the exception table is indexed by
1471 * a hash of only rt6i_dst.
1472 */
1473 if (ort->rt6i_src.plen)
1474 src_key = &nrt->rt6i_src.addr;
1475#endif
Wei Wang60006a42017-10-06 12:05:58 -07001476
1477 /* Update rt6i_prefsrc as it could be changed
1478 * in rt6_remove_prefsrc()
1479 */
1480 nrt->rt6i_prefsrc = ort->rt6i_prefsrc;
Wei Wangf5bbe7e2017-10-06 12:05:59 -07001481 /* rt6_mtu_change() might lower mtu on ort.
1482 * Only insert this exception route if its mtu
1483 * is less than ort's mtu value.
1484 */
David Ahernd4ead6b2018-04-17 17:33:16 -07001485 if (dst_metric_raw(&nrt->dst, RTAX_MTU) >= fib6_mtu(ort)) {
Wei Wangf5bbe7e2017-10-06 12:05:59 -07001486 err = -EINVAL;
1487 goto out;
1488 }
Wei Wang60006a42017-10-06 12:05:58 -07001489
Wei Wang35732d02017-10-06 12:05:57 -07001490 rt6_ex = __rt6_find_exception_spinlock(&bucket, &nrt->rt6i_dst.addr,
1491 src_key);
1492 if (rt6_ex)
1493 rt6_remove_exception(bucket, rt6_ex);
1494
1495 rt6_ex = kzalloc(sizeof(*rt6_ex), GFP_ATOMIC);
1496 if (!rt6_ex) {
1497 err = -ENOMEM;
1498 goto out;
1499 }
1500 rt6_ex->rt6i = nrt;
1501 rt6_ex->stamp = jiffies;
1502 atomic_inc(&nrt->rt6i_ref);
1503 nrt->rt6i_node = ort->rt6i_node;
1504 hlist_add_head_rcu(&rt6_ex->hlist, &bucket->chain);
1505 bucket->depth++;
Wei Wang81eb8442017-10-06 12:06:11 -07001506 net->ipv6.rt6_stats->fib_rt_cache++;
Wei Wang35732d02017-10-06 12:05:57 -07001507
1508 if (bucket->depth > FIB6_MAX_DEPTH)
1509 rt6_exception_remove_oldest(bucket);
1510
1511out:
1512 spin_unlock_bh(&rt6_exception_lock);
1513
1514 /* Update fn->fn_sernum to invalidate all cached dst */
Paolo Abenib886d5f2017-10-19 16:07:10 +02001515 if (!err) {
Ido Schimmel922c2ac2018-01-07 12:45:14 +02001516 spin_lock_bh(&ort->rt6i_table->tb6_lock);
David Ahern7aef6852018-04-17 17:33:10 -07001517 fib6_update_sernum(net, ort);
Ido Schimmel922c2ac2018-01-07 12:45:14 +02001518 spin_unlock_bh(&ort->rt6i_table->tb6_lock);
Paolo Abenib886d5f2017-10-19 16:07:10 +02001519 fib6_force_start_gc(net);
1520 }
Wei Wang35732d02017-10-06 12:05:57 -07001521
1522 return err;
1523}
1524
1525void rt6_flush_exceptions(struct rt6_info *rt)
1526{
1527 struct rt6_exception_bucket *bucket;
1528 struct rt6_exception *rt6_ex;
1529 struct hlist_node *tmp;
1530 int i;
1531
1532 spin_lock_bh(&rt6_exception_lock);
1533 /* Prevent rt6_insert_exception() to recreate the bucket list */
1534 rt->exception_bucket_flushed = 1;
1535
1536 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1537 lockdep_is_held(&rt6_exception_lock));
1538 if (!bucket)
1539 goto out;
1540
1541 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1542 hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist)
1543 rt6_remove_exception(bucket, rt6_ex);
1544 WARN_ON_ONCE(bucket->depth);
1545 bucket++;
1546 }
1547
1548out:
1549 spin_unlock_bh(&rt6_exception_lock);
1550}
1551
1552/* Find cached rt in the hash table inside passed in rt
1553 * Caller has to hold rcu_read_lock()
1554 */
1555static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt,
1556 struct in6_addr *daddr,
1557 struct in6_addr *saddr)
1558{
1559 struct rt6_exception_bucket *bucket;
1560 struct in6_addr *src_key = NULL;
1561 struct rt6_exception *rt6_ex;
1562 struct rt6_info *res = NULL;
1563
1564 bucket = rcu_dereference(rt->rt6i_exception_bucket);
1565
1566#ifdef CONFIG_IPV6_SUBTREES
1567 /* rt6i_src.plen != 0 indicates rt is in subtree
1568 * and exception table is indexed by a hash of
1569 * both rt6i_dst and rt6i_src.
1570 * Otherwise, the exception table is indexed by
1571 * a hash of only rt6i_dst.
1572 */
1573 if (rt->rt6i_src.plen)
1574 src_key = saddr;
1575#endif
1576 rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
1577
1578 if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
1579 res = rt6_ex->rt6i;
1580
1581 return res;
1582}
1583
1584/* Remove the passed in cached rt from the hash table that contains it */
David Ahern23fb93a2018-04-17 17:33:23 -07001585static int rt6_remove_exception_rt(struct rt6_info *rt)
Wei Wang35732d02017-10-06 12:05:57 -07001586{
Wei Wang35732d02017-10-06 12:05:57 -07001587 struct rt6_exception_bucket *bucket;
David Miller3a2232e2017-11-28 15:40:40 -05001588 struct rt6_info *from = rt->from;
Wei Wang35732d02017-10-06 12:05:57 -07001589 struct in6_addr *src_key = NULL;
1590 struct rt6_exception *rt6_ex;
1591 int err;
1592
1593 if (!from ||
Colin Ian King442d7132017-10-10 19:10:30 +01001594 !(rt->rt6i_flags & RTF_CACHE))
Wei Wang35732d02017-10-06 12:05:57 -07001595 return -EINVAL;
1596
1597 if (!rcu_access_pointer(from->rt6i_exception_bucket))
1598 return -ENOENT;
1599
1600 spin_lock_bh(&rt6_exception_lock);
1601 bucket = rcu_dereference_protected(from->rt6i_exception_bucket,
1602 lockdep_is_held(&rt6_exception_lock));
1603#ifdef CONFIG_IPV6_SUBTREES
1604 /* rt6i_src.plen != 0 indicates 'from' is in subtree
1605 * and exception table is indexed by a hash of
1606 * both rt6i_dst and rt6i_src.
1607 * Otherwise, the exception table is indexed by
1608 * a hash of only rt6i_dst.
1609 */
1610 if (from->rt6i_src.plen)
1611 src_key = &rt->rt6i_src.addr;
1612#endif
1613 rt6_ex = __rt6_find_exception_spinlock(&bucket,
1614 &rt->rt6i_dst.addr,
1615 src_key);
1616 if (rt6_ex) {
1617 rt6_remove_exception(bucket, rt6_ex);
1618 err = 0;
1619 } else {
1620 err = -ENOENT;
1621 }
1622
1623 spin_unlock_bh(&rt6_exception_lock);
1624 return err;
1625}
1626
1627/* Find rt6_ex which contains the passed in rt cache and
1628 * refresh its stamp
1629 */
1630static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
1631{
Wei Wang35732d02017-10-06 12:05:57 -07001632 struct rt6_exception_bucket *bucket;
David Miller3a2232e2017-11-28 15:40:40 -05001633 struct rt6_info *from = rt->from;
Wei Wang35732d02017-10-06 12:05:57 -07001634 struct in6_addr *src_key = NULL;
1635 struct rt6_exception *rt6_ex;
1636
1637 if (!from ||
Colin Ian King442d7132017-10-10 19:10:30 +01001638 !(rt->rt6i_flags & RTF_CACHE))
Wei Wang35732d02017-10-06 12:05:57 -07001639 return;
1640
1641 rcu_read_lock();
1642 bucket = rcu_dereference(from->rt6i_exception_bucket);
1643
1644#ifdef CONFIG_IPV6_SUBTREES
1645 /* rt6i_src.plen != 0 indicates 'from' is in subtree
1646 * and exception table is indexed by a hash of
1647 * both rt6i_dst and rt6i_src.
1648 * Otherwise, the exception table is indexed by
1649 * a hash of only rt6i_dst.
1650 */
1651 if (from->rt6i_src.plen)
1652 src_key = &rt->rt6i_src.addr;
1653#endif
1654 rt6_ex = __rt6_find_exception_rcu(&bucket,
1655 &rt->rt6i_dst.addr,
1656 src_key);
1657 if (rt6_ex)
1658 rt6_ex->stamp = jiffies;
1659
1660 rcu_read_unlock();
1661}
1662
Wei Wang60006a42017-10-06 12:05:58 -07001663static void rt6_exceptions_remove_prefsrc(struct rt6_info *rt)
1664{
1665 struct rt6_exception_bucket *bucket;
1666 struct rt6_exception *rt6_ex;
1667 int i;
1668
1669 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1670 lockdep_is_held(&rt6_exception_lock));
1671
1672 if (bucket) {
1673 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1674 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1675 rt6_ex->rt6i->rt6i_prefsrc.plen = 0;
1676 }
1677 bucket++;
1678 }
1679 }
1680}
1681
Stefano Brivioe9fa1492018-03-06 11:10:19 +01001682static bool rt6_mtu_change_route_allowed(struct inet6_dev *idev,
1683 struct rt6_info *rt, int mtu)
1684{
1685 /* If the new MTU is lower than the route PMTU, this new MTU will be the
1686 * lowest MTU in the path: always allow updating the route PMTU to
1687 * reflect PMTU decreases.
1688 *
1689 * If the new MTU is higher, and the route PMTU is equal to the local
1690 * MTU, this means the old MTU is the lowest in the path, so allow
1691 * updating it: if other nodes now have lower MTUs, PMTU discovery will
1692 * handle this.
1693 */
1694
1695 if (dst_mtu(&rt->dst) >= mtu)
1696 return true;
1697
1698 if (dst_mtu(&rt->dst) == idev->cnf.mtu6)
1699 return true;
1700
1701 return false;
1702}
1703
1704static void rt6_exceptions_update_pmtu(struct inet6_dev *idev,
1705 struct rt6_info *rt, int mtu)
Wei Wangf5bbe7e2017-10-06 12:05:59 -07001706{
1707 struct rt6_exception_bucket *bucket;
1708 struct rt6_exception *rt6_ex;
1709 int i;
1710
1711 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1712 lockdep_is_held(&rt6_exception_lock));
1713
Stefano Brivioe9fa1492018-03-06 11:10:19 +01001714 if (!bucket)
1715 return;
1716
1717 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1718 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1719 struct rt6_info *entry = rt6_ex->rt6i;
1720
1721 /* For RTF_CACHE with rt6i_pmtu == 0 (i.e. a redirected
David Ahernd4ead6b2018-04-17 17:33:16 -07001722 * route), the metrics of its rt->from have already
Stefano Brivioe9fa1492018-03-06 11:10:19 +01001723 * been updated.
1724 */
David Ahernd4ead6b2018-04-17 17:33:16 -07001725 if (dst_metric_raw(&entry->dst, RTAX_MTU) &&
Stefano Brivioe9fa1492018-03-06 11:10:19 +01001726 rt6_mtu_change_route_allowed(idev, entry, mtu))
David Ahernd4ead6b2018-04-17 17:33:16 -07001727 dst_metric_set(&entry->dst, RTAX_MTU, mtu);
Wei Wangf5bbe7e2017-10-06 12:05:59 -07001728 }
Stefano Brivioe9fa1492018-03-06 11:10:19 +01001729 bucket++;
Wei Wangf5bbe7e2017-10-06 12:05:59 -07001730 }
1731}
1732
Wei Wangb16cb452017-10-06 12:06:00 -07001733#define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
1734
1735static void rt6_exceptions_clean_tohost(struct rt6_info *rt,
1736 struct in6_addr *gateway)
1737{
1738 struct rt6_exception_bucket *bucket;
1739 struct rt6_exception *rt6_ex;
1740 struct hlist_node *tmp;
1741 int i;
1742
1743 if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1744 return;
1745
1746 spin_lock_bh(&rt6_exception_lock);
1747 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1748 lockdep_is_held(&rt6_exception_lock));
1749
1750 if (bucket) {
1751 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1752 hlist_for_each_entry_safe(rt6_ex, tmp,
1753 &bucket->chain, hlist) {
1754 struct rt6_info *entry = rt6_ex->rt6i;
1755
1756 if ((entry->rt6i_flags & RTF_CACHE_GATEWAY) ==
1757 RTF_CACHE_GATEWAY &&
1758 ipv6_addr_equal(gateway,
1759 &entry->rt6i_gateway)) {
1760 rt6_remove_exception(bucket, rt6_ex);
1761 }
1762 }
1763 bucket++;
1764 }
1765 }
1766
1767 spin_unlock_bh(&rt6_exception_lock);
1768}
1769
Wei Wangc757faa2017-10-06 12:06:01 -07001770static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
1771 struct rt6_exception *rt6_ex,
1772 struct fib6_gc_args *gc_args,
1773 unsigned long now)
1774{
1775 struct rt6_info *rt = rt6_ex->rt6i;
1776
Paolo Abeni1859bac2017-10-19 16:07:11 +02001777 /* we are pruning and obsoleting aged-out and non gateway exceptions
1778 * even if others have still references to them, so that on next
1779 * dst_check() such references can be dropped.
1780 * EXPIRES exceptions - e.g. pmtu-generated ones are pruned when
1781 * expired, independently from their aging, as per RFC 8201 section 4
1782 */
Wei Wang31afeb42018-01-26 11:40:17 -08001783 if (!(rt->rt6i_flags & RTF_EXPIRES)) {
1784 if (time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) {
1785 RT6_TRACE("aging clone %p\n", rt);
1786 rt6_remove_exception(bucket, rt6_ex);
1787 return;
1788 }
1789 } else if (time_after(jiffies, rt->dst.expires)) {
1790 RT6_TRACE("purging expired route %p\n", rt);
Wei Wangc757faa2017-10-06 12:06:01 -07001791 rt6_remove_exception(bucket, rt6_ex);
1792 return;
Wei Wang31afeb42018-01-26 11:40:17 -08001793 }
1794
1795 if (rt->rt6i_flags & RTF_GATEWAY) {
Wei Wangc757faa2017-10-06 12:06:01 -07001796 struct neighbour *neigh;
1797 __u8 neigh_flags = 0;
1798
Eric Dumazet1bfa26f2018-03-23 07:56:58 -07001799 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
1800 if (neigh)
Wei Wangc757faa2017-10-06 12:06:01 -07001801 neigh_flags = neigh->flags;
Eric Dumazet1bfa26f2018-03-23 07:56:58 -07001802
Wei Wangc757faa2017-10-06 12:06:01 -07001803 if (!(neigh_flags & NTF_ROUTER)) {
1804 RT6_TRACE("purging route %p via non-router but gateway\n",
1805 rt);
1806 rt6_remove_exception(bucket, rt6_ex);
1807 return;
1808 }
1809 }
Wei Wang31afeb42018-01-26 11:40:17 -08001810
Wei Wangc757faa2017-10-06 12:06:01 -07001811 gc_args->more++;
1812}
1813
1814void rt6_age_exceptions(struct rt6_info *rt,
1815 struct fib6_gc_args *gc_args,
1816 unsigned long now)
1817{
1818 struct rt6_exception_bucket *bucket;
1819 struct rt6_exception *rt6_ex;
1820 struct hlist_node *tmp;
1821 int i;
1822
1823 if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1824 return;
1825
Eric Dumazet1bfa26f2018-03-23 07:56:58 -07001826 rcu_read_lock_bh();
1827 spin_lock(&rt6_exception_lock);
Wei Wangc757faa2017-10-06 12:06:01 -07001828 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1829 lockdep_is_held(&rt6_exception_lock));
1830
1831 if (bucket) {
1832 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1833 hlist_for_each_entry_safe(rt6_ex, tmp,
1834 &bucket->chain, hlist) {
1835 rt6_age_examine_exception(bucket, rt6_ex,
1836 gc_args, now);
1837 }
1838 bucket++;
1839 }
1840 }
Eric Dumazet1bfa26f2018-03-23 07:56:58 -07001841 spin_unlock(&rt6_exception_lock);
1842 rcu_read_unlock_bh();
Wei Wangc757faa2017-10-06 12:06:01 -07001843}
1844
David Ahern9ff74382016-06-13 13:44:19 -07001845struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
David Ahernb75cc8f2018-03-02 08:32:17 -08001846 int oif, struct flowi6 *fl6,
1847 const struct sk_buff *skb, int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001848{
Martin KaFai Lau367efcb2014-10-20 13:42:45 -07001849 struct fib6_node *fn, *saved_fn;
David Ahern23fb93a2018-04-17 17:33:23 -07001850 struct rt6_info *f6i;
1851 struct rt6_info *rt;
Thomas Grafc71099a2006-08-04 23:20:06 -07001852 int strict = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001853
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -07001854 strict |= flags & RT6_LOOKUP_F_IFACE;
David Ahernd5d32e42016-10-24 12:27:23 -07001855 strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
Martin KaFai Lau367efcb2014-10-20 13:42:45 -07001856 if (net->ipv6.devconf_all->forwarding == 0)
1857 strict |= RT6_LOOKUP_F_REACHABLE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001858
Wei Wang66f5d6c2017-10-06 12:06:10 -07001859 rcu_read_lock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001860
David S. Miller4c9483b2011-03-12 16:22:43 -05001861 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
Martin KaFai Lau367efcb2014-10-20 13:42:45 -07001862 saved_fn = fn;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001863
David Ahernca254492015-10-12 11:47:10 -07001864 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1865 oif = 0;
1866
Martin KaFai Laua3c00e42014-10-20 13:42:43 -07001867redo_rt6_select:
David Ahern23fb93a2018-04-17 17:33:23 -07001868 f6i = rt6_select(net, fn, oif, strict);
1869 if (f6i->rt6i_nsiblings)
1870 f6i = rt6_multipath_select(net, f6i, fl6, oif, skb, strict);
1871 if (f6i == net->ipv6.fib6_null_entry) {
Martin KaFai Laua3c00e42014-10-20 13:42:43 -07001872 fn = fib6_backtrack(fn, &fl6->saddr);
1873 if (fn)
1874 goto redo_rt6_select;
Martin KaFai Lau367efcb2014-10-20 13:42:45 -07001875 else if (strict & RT6_LOOKUP_F_REACHABLE) {
1876 /* also consider unreachable route */
1877 strict &= ~RT6_LOOKUP_F_REACHABLE;
1878 fn = saved_fn;
1879 goto redo_rt6_select;
Martin KaFai Lau367efcb2014-10-20 13:42:45 -07001880 }
Martin KaFai Laua3c00e42014-10-20 13:42:43 -07001881 }
1882
David Ahern23fb93a2018-04-17 17:33:23 -07001883 if (f6i == net->ipv6.fib6_null_entry) {
David Ahern421842e2018-04-17 17:33:18 -07001884 rt = net->ipv6.ip6_null_entry;
Wei Wang66f5d6c2017-10-06 12:06:10 -07001885 rcu_read_unlock();
Wei Wangd3843fe2017-10-06 12:06:06 -07001886 dst_hold(&rt->dst);
Paolo Abenib65f1642017-10-19 09:31:43 +02001887 trace_fib6_table_lookup(net, rt, table, fl6);
Wei Wangd3843fe2017-10-06 12:06:06 -07001888 return rt;
David Ahern23fb93a2018-04-17 17:33:23 -07001889 }
1890
1891 /*Search through exception table */
1892 rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr);
1893 if (rt) {
David Ahernd4ead6b2018-04-17 17:33:16 -07001894 if (ip6_hold_safe(net, &rt, true))
Wei Wangd3843fe2017-10-06 12:06:06 -07001895 dst_use_noref(&rt->dst, jiffies);
David Ahernd4ead6b2018-04-17 17:33:16 -07001896
Wei Wang66f5d6c2017-10-06 12:06:10 -07001897 rcu_read_unlock();
Paolo Abenib65f1642017-10-19 09:31:43 +02001898 trace_fib6_table_lookup(net, rt, table, fl6);
Martin KaFai Laud52d3992015-05-22 20:56:06 -07001899 return rt;
Martin KaFai Lau3da59bd2015-05-22 20:56:03 -07001900 } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
David Ahern23fb93a2018-04-17 17:33:23 -07001901 !(f6i->rt6i_flags & RTF_GATEWAY))) {
Martin KaFai Lau3da59bd2015-05-22 20:56:03 -07001902 /* Create a RTF_CACHE clone which will not be
1903 * owned by the fib6 tree. It is for the special case where
1904 * the daddr in the skb during the neighbor look-up is different
1905 * from the fl6->daddr used to look-up route here.
1906 */
Thomas Grafc71099a2006-08-04 23:20:06 -07001907
Martin KaFai Lau3da59bd2015-05-22 20:56:03 -07001908 struct rt6_info *uncached_rt;
1909
David Ahern23fb93a2018-04-17 17:33:23 -07001910 if (ip6_hold_safe(net, &f6i, true)) {
1911 dst_use_noref(&f6i->dst, jiffies);
Wei Wangd3843fe2017-10-06 12:06:06 -07001912 } else {
Wei Wang66f5d6c2017-10-06 12:06:10 -07001913 rcu_read_unlock();
David Ahern23fb93a2018-04-17 17:33:23 -07001914 uncached_rt = f6i;
Wei Wangd3843fe2017-10-06 12:06:06 -07001915 goto uncached_rt_out;
1916 }
Wei Wang66f5d6c2017-10-06 12:06:10 -07001917 rcu_read_unlock();
Martin KaFai Laud52d3992015-05-22 20:56:06 -07001918
David Ahern23fb93a2018-04-17 17:33:23 -07001919 uncached_rt = ip6_rt_cache_alloc(f6i, &fl6->daddr, NULL);
Martin KaFai Lau3da59bd2015-05-22 20:56:03 -07001920 dst_release(&rt->dst);
1921
Wei Wang1cfb71e2017-06-17 10:42:33 -07001922 if (uncached_rt) {
1923 /* Uncached_rt's refcnt is taken during ip6_rt_cache_alloc()
1924 * No need for another dst_hold()
1925 */
Martin KaFai Lau8d0b94a2015-05-22 20:56:04 -07001926 rt6_uncached_list_add(uncached_rt);
Wei Wang81eb8442017-10-06 12:06:11 -07001927 atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
Wei Wang1cfb71e2017-06-17 10:42:33 -07001928 } else {
Martin KaFai Lau3da59bd2015-05-22 20:56:03 -07001929 uncached_rt = net->ipv6.ip6_null_entry;
Wei Wang1cfb71e2017-06-17 10:42:33 -07001930 dst_hold(&uncached_rt->dst);
1931 }
David Ahernb8115802015-11-19 12:24:22 -08001932
Wei Wangd3843fe2017-10-06 12:06:06 -07001933uncached_rt_out:
Paolo Abenib65f1642017-10-19 09:31:43 +02001934 trace_fib6_table_lookup(net, uncached_rt, table, fl6);
Martin KaFai Lau3da59bd2015-05-22 20:56:03 -07001935 return uncached_rt;
Martin KaFai Lau3da59bd2015-05-22 20:56:03 -07001936
Martin KaFai Laud52d3992015-05-22 20:56:06 -07001937 } else {
1938 /* Get a percpu copy */
1939
1940 struct rt6_info *pcpu_rt;
1941
David Ahern23fb93a2018-04-17 17:33:23 -07001942 dst_use_noref(&f6i->dst, jiffies);
Eric Dumazet951f7882017-10-08 21:07:18 -07001943 local_bh_disable();
David Ahern23fb93a2018-04-17 17:33:23 -07001944 pcpu_rt = rt6_get_pcpu_route(f6i);
Martin KaFai Laud52d3992015-05-22 20:56:06 -07001945
Eric Dumazet951f7882017-10-08 21:07:18 -07001946 if (!pcpu_rt) {
Wei Wanga94b9362017-10-06 12:06:04 -07001947 /* atomic_inc_not_zero() is needed when using rcu */
David Ahern23fb93a2018-04-17 17:33:23 -07001948 if (atomic_inc_not_zero(&f6i->rt6i_ref)) {
Eric Dumazet951f7882017-10-08 21:07:18 -07001949 /* No dst_hold() on rt is needed because grabbing
Wei Wanga94b9362017-10-06 12:06:04 -07001950 * rt->rt6i_ref makes sure rt can't be released.
1951 */
David Ahern23fb93a2018-04-17 17:33:23 -07001952 pcpu_rt = rt6_make_pcpu_route(net, f6i);
1953 rt6_release(f6i);
Wei Wanga94b9362017-10-06 12:06:04 -07001954 } else {
1955 /* rt is already removed from tree */
Wei Wanga94b9362017-10-06 12:06:04 -07001956 pcpu_rt = net->ipv6.ip6_null_entry;
1957 dst_hold(&pcpu_rt->dst);
1958 }
Martin KaFai Lau9c7370a2015-08-14 11:05:54 -07001959 }
Eric Dumazet951f7882017-10-08 21:07:18 -07001960 local_bh_enable();
1961 rcu_read_unlock();
Paolo Abenib65f1642017-10-19 09:31:43 +02001962 trace_fib6_table_lookup(net, pcpu_rt, table, fl6);
Martin KaFai Laud52d3992015-05-22 20:56:06 -07001963 return pcpu_rt;
1964 }
Thomas Grafc71099a2006-08-04 23:20:06 -07001965}
David Ahern9ff74382016-06-13 13:44:19 -07001966EXPORT_SYMBOL_GPL(ip6_pol_route);
Thomas Grafc71099a2006-08-04 23:20:06 -07001967
David Ahernb75cc8f2018-03-02 08:32:17 -08001968static struct rt6_info *ip6_pol_route_input(struct net *net,
1969 struct fib6_table *table,
1970 struct flowi6 *fl6,
1971 const struct sk_buff *skb,
1972 int flags)
Pavel Emelyanov4acad722007-10-15 13:02:51 -07001973{
David Ahernb75cc8f2018-03-02 08:32:17 -08001974 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, skb, flags);
Pavel Emelyanov4acad722007-10-15 13:02:51 -07001975}
1976
Mahesh Bandeward409b842016-09-16 12:59:08 -07001977struct dst_entry *ip6_route_input_lookup(struct net *net,
1978 struct net_device *dev,
David Ahernb75cc8f2018-03-02 08:32:17 -08001979 struct flowi6 *fl6,
1980 const struct sk_buff *skb,
1981 int flags)
Shmulik Ladkani72331bc2012-04-01 04:03:45 +00001982{
1983 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1984 flags |= RT6_LOOKUP_F_IFACE;
1985
David Ahernb75cc8f2018-03-02 08:32:17 -08001986 return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_input);
Shmulik Ladkani72331bc2012-04-01 04:03:45 +00001987}
Mahesh Bandeward409b842016-09-16 12:59:08 -07001988EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
Shmulik Ladkani72331bc2012-04-01 04:03:45 +00001989
Jakub Sitnicki23aebda2017-08-23 09:58:29 +02001990static void ip6_multipath_l3_keys(const struct sk_buff *skb,
Roopa Prabhu5e5d6fe2018-02-28 22:43:22 -05001991 struct flow_keys *keys,
1992 struct flow_keys *flkeys)
Jakub Sitnicki23aebda2017-08-23 09:58:29 +02001993{
1994 const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
1995 const struct ipv6hdr *key_iph = outer_iph;
Roopa Prabhu5e5d6fe2018-02-28 22:43:22 -05001996 struct flow_keys *_flkeys = flkeys;
Jakub Sitnicki23aebda2017-08-23 09:58:29 +02001997 const struct ipv6hdr *inner_iph;
1998 const struct icmp6hdr *icmph;
1999 struct ipv6hdr _inner_iph;
2000
2001 if (likely(outer_iph->nexthdr != IPPROTO_ICMPV6))
2002 goto out;
2003
2004 icmph = icmp6_hdr(skb);
2005 if (icmph->icmp6_type != ICMPV6_DEST_UNREACH &&
2006 icmph->icmp6_type != ICMPV6_PKT_TOOBIG &&
2007 icmph->icmp6_type != ICMPV6_TIME_EXCEED &&
2008 icmph->icmp6_type != ICMPV6_PARAMPROB)
2009 goto out;
2010
2011 inner_iph = skb_header_pointer(skb,
2012 skb_transport_offset(skb) + sizeof(*icmph),
2013 sizeof(_inner_iph), &_inner_iph);
2014 if (!inner_iph)
2015 goto out;
2016
2017 key_iph = inner_iph;
Roopa Prabhu5e5d6fe2018-02-28 22:43:22 -05002018 _flkeys = NULL;
Jakub Sitnicki23aebda2017-08-23 09:58:29 +02002019out:
Roopa Prabhu5e5d6fe2018-02-28 22:43:22 -05002020 if (_flkeys) {
2021 keys->addrs.v6addrs.src = _flkeys->addrs.v6addrs.src;
2022 keys->addrs.v6addrs.dst = _flkeys->addrs.v6addrs.dst;
2023 keys->tags.flow_label = _flkeys->tags.flow_label;
2024 keys->basic.ip_proto = _flkeys->basic.ip_proto;
2025 } else {
2026 keys->addrs.v6addrs.src = key_iph->saddr;
2027 keys->addrs.v6addrs.dst = key_iph->daddr;
2028 keys->tags.flow_label = ip6_flowinfo(key_iph);
2029 keys->basic.ip_proto = key_iph->nexthdr;
2030 }
Jakub Sitnicki23aebda2017-08-23 09:58:29 +02002031}
2032
2033/* if skb is set it will be used and fl6 can be NULL */
David Ahernb4bac172018-03-02 08:32:18 -08002034u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
2035 const struct sk_buff *skb, struct flow_keys *flkeys)
Jakub Sitnicki23aebda2017-08-23 09:58:29 +02002036{
2037 struct flow_keys hash_keys;
David Ahern9a2a5372018-03-02 08:32:15 -08002038 u32 mhash;
Jakub Sitnicki23aebda2017-08-23 09:58:29 +02002039
David S. Millerbbfa0472018-03-12 11:09:33 -04002040 switch (ip6_multipath_hash_policy(net)) {
David Ahernb4bac172018-03-02 08:32:18 -08002041 case 0:
2042 memset(&hash_keys, 0, sizeof(hash_keys));
2043 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2044 if (skb) {
2045 ip6_multipath_l3_keys(skb, &hash_keys, flkeys);
2046 } else {
2047 hash_keys.addrs.v6addrs.src = fl6->saddr;
2048 hash_keys.addrs.v6addrs.dst = fl6->daddr;
2049 hash_keys.tags.flow_label = (__force u32)fl6->flowlabel;
2050 hash_keys.basic.ip_proto = fl6->flowi6_proto;
2051 }
2052 break;
2053 case 1:
2054 if (skb) {
2055 unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
2056 struct flow_keys keys;
2057
2058 /* short-circuit if we already have L4 hash present */
2059 if (skb->l4_hash)
2060 return skb_get_hash_raw(skb) >> 1;
2061
2062 memset(&hash_keys, 0, sizeof(hash_keys));
2063
2064 if (!flkeys) {
2065 skb_flow_dissect_flow_keys(skb, &keys, flag);
2066 flkeys = &keys;
2067 }
2068 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2069 hash_keys.addrs.v6addrs.src = flkeys->addrs.v6addrs.src;
2070 hash_keys.addrs.v6addrs.dst = flkeys->addrs.v6addrs.dst;
2071 hash_keys.ports.src = flkeys->ports.src;
2072 hash_keys.ports.dst = flkeys->ports.dst;
2073 hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
2074 } else {
2075 memset(&hash_keys, 0, sizeof(hash_keys));
2076 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2077 hash_keys.addrs.v6addrs.src = fl6->saddr;
2078 hash_keys.addrs.v6addrs.dst = fl6->daddr;
2079 hash_keys.ports.src = fl6->fl6_sport;
2080 hash_keys.ports.dst = fl6->fl6_dport;
2081 hash_keys.basic.ip_proto = fl6->flowi6_proto;
2082 }
2083 break;
Jakub Sitnicki23aebda2017-08-23 09:58:29 +02002084 }
David Ahern9a2a5372018-03-02 08:32:15 -08002085 mhash = flow_hash_from_keys(&hash_keys);
Jakub Sitnicki23aebda2017-08-23 09:58:29 +02002086
David Ahern9a2a5372018-03-02 08:32:15 -08002087 return mhash >> 1;
Jakub Sitnicki23aebda2017-08-23 09:58:29 +02002088}
2089
Thomas Grafc71099a2006-08-04 23:20:06 -07002090void ip6_route_input(struct sk_buff *skb)
2091{
Eric Dumazetb71d1d42011-04-22 04:53:02 +00002092 const struct ipv6hdr *iph = ipv6_hdr(skb);
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09002093 struct net *net = dev_net(skb->dev);
Thomas Grafadaa70b2006-10-13 15:01:03 -07002094 int flags = RT6_LOOKUP_F_HAS_SADDR;
Jiri Benc904af042015-08-20 13:56:31 +02002095 struct ip_tunnel_info *tun_info;
David S. Miller4c9483b2011-03-12 16:22:43 -05002096 struct flowi6 fl6 = {
David Aherne0d56fd2016-09-10 12:09:57 -07002097 .flowi6_iif = skb->dev->ifindex,
David S. Miller4c9483b2011-03-12 16:22:43 -05002098 .daddr = iph->daddr,
2099 .saddr = iph->saddr,
YOSHIFUJI Hideaki / 吉藤英明6502ca52013-01-13 05:01:51 +00002100 .flowlabel = ip6_flowinfo(iph),
David S. Miller4c9483b2011-03-12 16:22:43 -05002101 .flowi6_mark = skb->mark,
2102 .flowi6_proto = iph->nexthdr,
Thomas Grafc71099a2006-08-04 23:20:06 -07002103 };
Roopa Prabhu5e5d6fe2018-02-28 22:43:22 -05002104 struct flow_keys *flkeys = NULL, _flkeys;
Thomas Grafadaa70b2006-10-13 15:01:03 -07002105
Jiri Benc904af042015-08-20 13:56:31 +02002106 tun_info = skb_tunnel_info(skb);
Jiri Benc46fa0622015-08-28 20:48:19 +02002107 if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
Jiri Benc904af042015-08-20 13:56:31 +02002108 fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
Roopa Prabhu5e5d6fe2018-02-28 22:43:22 -05002109
2110 if (fib6_rules_early_flow_dissect(net, skb, &fl6, &_flkeys))
2111 flkeys = &_flkeys;
2112
Jakub Sitnicki23aebda2017-08-23 09:58:29 +02002113 if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
David Ahernb4bac172018-03-02 08:32:18 -08002114 fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, flkeys);
Jiri Benc06e9d042015-08-20 13:56:26 +02002115 skb_dst_drop(skb);
David Ahernb75cc8f2018-03-02 08:32:17 -08002116 skb_dst_set(skb,
2117 ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags));
Thomas Grafc71099a2006-08-04 23:20:06 -07002118}
2119
David Ahernb75cc8f2018-03-02 08:32:17 -08002120static struct rt6_info *ip6_pol_route_output(struct net *net,
2121 struct fib6_table *table,
2122 struct flowi6 *fl6,
2123 const struct sk_buff *skb,
2124 int flags)
Thomas Grafc71099a2006-08-04 23:20:06 -07002125{
David Ahernb75cc8f2018-03-02 08:32:17 -08002126 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, skb, flags);
Thomas Grafc71099a2006-08-04 23:20:06 -07002127}
2128
Paolo Abeni6f21c962016-01-29 12:30:19 +01002129struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
2130 struct flowi6 *fl6, int flags)
Thomas Grafc71099a2006-08-04 23:20:06 -07002131{
David Ahernd46a9d62015-10-21 08:42:22 -07002132 bool any_src;
Thomas Grafc71099a2006-08-04 23:20:06 -07002133
David Ahern4c1feac2016-09-10 12:09:56 -07002134 if (rt6_need_strict(&fl6->daddr)) {
2135 struct dst_entry *dst;
2136
2137 dst = l3mdev_link_scope_lookup(net, fl6);
2138 if (dst)
2139 return dst;
2140 }
David Ahernca254492015-10-12 11:47:10 -07002141
Pavel Emelyanov1fb94892012-08-08 21:53:36 +00002142 fl6->flowi6_iif = LOOPBACK_IFINDEX;
David McCullough4dc27d1c2012-06-25 15:42:26 +00002143
David Ahernd46a9d62015-10-21 08:42:22 -07002144 any_src = ipv6_addr_any(&fl6->saddr);
David Ahern741a11d2015-09-28 10:12:13 -07002145 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
David Ahernd46a9d62015-10-21 08:42:22 -07002146 (fl6->flowi6_oif && any_src))
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -07002147 flags |= RT6_LOOKUP_F_IFACE;
Thomas Grafc71099a2006-08-04 23:20:06 -07002148
David Ahernd46a9d62015-10-21 08:42:22 -07002149 if (!any_src)
Thomas Grafadaa70b2006-10-13 15:01:03 -07002150 flags |= RT6_LOOKUP_F_HAS_SADDR;
YOSHIFUJI Hideaki / 吉藤英明0c9a2ac2010-03-07 00:14:44 +00002151 else if (sk)
2152 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
Thomas Grafadaa70b2006-10-13 15:01:03 -07002153
David Ahernb75cc8f2018-03-02 08:32:17 -08002154 return fib6_rule_lookup(net, fl6, NULL, flags, ip6_pol_route_output);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002155}
Paolo Abeni6f21c962016-01-29 12:30:19 +01002156EXPORT_SYMBOL_GPL(ip6_route_output_flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002157
David S. Miller2774c132011-03-01 14:59:04 -08002158struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
David S. Miller14e50e52007-05-24 18:17:54 -07002159{
David S. Miller5c1e6aa2011-04-28 14:13:38 -07002160 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
Wei Wang1dbe32522017-06-17 10:42:26 -07002161 struct net_device *loopback_dev = net->loopback_dev;
David S. Miller14e50e52007-05-24 18:17:54 -07002162 struct dst_entry *new = NULL;
2163
Wei Wang1dbe32522017-06-17 10:42:26 -07002164 rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 1,
Steffen Klassert62cf27e2017-10-09 08:39:43 +02002165 DST_OBSOLETE_DEAD, 0);
David S. Miller14e50e52007-05-24 18:17:54 -07002166 if (rt) {
Martin KaFai Lau0a1f5962015-10-15 16:39:58 -07002167 rt6_info_init(rt);
Wei Wang81eb8442017-10-06 12:06:11 -07002168 atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
Martin KaFai Lau0a1f5962015-10-15 16:39:58 -07002169
Changli Gaod8d1f302010-06-10 23:31:35 -07002170 new = &rt->dst;
David S. Miller14e50e52007-05-24 18:17:54 -07002171 new->__use = 1;
Herbert Xu352e5122007-11-13 21:34:06 -08002172 new->input = dst_discard;
Eric W. Biedermanede20592015-10-07 16:48:47 -05002173 new->output = dst_discard_out;
David S. Miller14e50e52007-05-24 18:17:54 -07002174
Martin KaFai Lau0a1f5962015-10-15 16:39:58 -07002175 dst_copy_metrics(new, &ort->dst);
David S. Miller14e50e52007-05-24 18:17:54 -07002176
Wei Wang1dbe32522017-06-17 10:42:26 -07002177 rt->rt6i_idev = in6_dev_get(loopback_dev);
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00002178 rt->rt6i_gateway = ort->rt6i_gateway;
Martin KaFai Lau0a1f5962015-10-15 16:39:58 -07002179 rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
David S. Miller14e50e52007-05-24 18:17:54 -07002180 rt->rt6i_metric = 0;
2181
2182 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
2183#ifdef CONFIG_IPV6_SUBTREES
2184 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
2185#endif
David S. Miller14e50e52007-05-24 18:17:54 -07002186 }
2187
David S. Miller69ead7a2011-03-01 14:45:33 -08002188 dst_release(dst_orig);
2189 return new ? new : ERR_PTR(-ENOMEM);
David S. Miller14e50e52007-05-24 18:17:54 -07002190}
David S. Miller14e50e52007-05-24 18:17:54 -07002191
Linus Torvalds1da177e2005-04-16 15:20:36 -07002192/*
2193 * Destination cache support functions
2194 */
2195
Martin KaFai Lau3da59bd2015-05-22 20:56:03 -07002196static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
2197{
Steffen Klassert36143642017-08-25 09:05:42 +02002198 u32 rt_cookie = 0;
Wei Wangc5cff852017-08-21 09:47:10 -07002199
2200 if (!rt6_get_cookie_safe(rt, &rt_cookie) || rt_cookie != cookie)
Martin KaFai Lau3da59bd2015-05-22 20:56:03 -07002201 return NULL;
2202
2203 if (rt6_check_expired(rt))
2204 return NULL;
2205
2206 return &rt->dst;
2207}
2208
2209static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
2210{
Martin KaFai Lau5973fb12015-11-11 11:51:07 -08002211 if (!__rt6_check_expired(rt) &&
2212 rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
David Miller3a2232e2017-11-28 15:40:40 -05002213 rt6_check(rt->from, cookie))
Martin KaFai Lau3da59bd2015-05-22 20:56:03 -07002214 return &rt->dst;
2215 else
2216 return NULL;
2217}
2218
Linus Torvalds1da177e2005-04-16 15:20:36 -07002219static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
2220{
2221 struct rt6_info *rt;
2222
2223 rt = (struct rt6_info *) dst;
2224
Nicolas Dichtel6f3118b2012-09-10 22:09:46 +00002225 /* All IPV6 dsts are created with ->obsolete set to the value
2226 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
2227 * into this function always.
2228 */
Hannes Frederic Sowae3bc10b2013-10-24 07:48:24 +02002229
Martin KaFai Lau02bcf4e2015-11-11 11:51:08 -08002230 if (rt->rt6i_flags & RTF_PCPU ||
David Miller3a2232e2017-11-28 15:40:40 -05002231 (unlikely(!list_empty(&rt->rt6i_uncached)) && rt->from))
Martin KaFai Lau3da59bd2015-05-22 20:56:03 -07002232 return rt6_dst_from_check(rt, cookie);
2233 else
2234 return rt6_check(rt, cookie);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002235}
2236
2237static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
2238{
2239 struct rt6_info *rt = (struct rt6_info *) dst;
2240
2241 if (rt) {
YOSHIFUJI Hideaki / 吉藤英明54c1a852010-03-28 07:15:45 +00002242 if (rt->rt6i_flags & RTF_CACHE) {
2243 if (rt6_check_expired(rt)) {
David Ahernafb1d4b52018-04-17 17:33:11 -07002244 ip6_del_rt(dev_net(dst->dev), rt);
YOSHIFUJI Hideaki / 吉藤英明54c1a852010-03-28 07:15:45 +00002245 dst = NULL;
2246 }
2247 } else {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002248 dst_release(dst);
YOSHIFUJI Hideaki / 吉藤英明54c1a852010-03-28 07:15:45 +00002249 dst = NULL;
2250 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002251 }
YOSHIFUJI Hideaki / 吉藤英明54c1a852010-03-28 07:15:45 +00002252 return dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002253}
2254
2255static void ip6_link_failure(struct sk_buff *skb)
2256{
2257 struct rt6_info *rt;
2258
Alexey Dobriyan3ffe5332010-02-18 08:25:24 +00002259 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002260
Eric Dumazetadf30902009-06-02 05:19:30 +00002261 rt = (struct rt6_info *) skb_dst(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002262 if (rt) {
Hannes Frederic Sowa1eb4f752013-07-10 23:00:57 +02002263 if (rt->rt6i_flags & RTF_CACHE) {
Wei Wangad65a2f2017-06-17 10:42:35 -07002264 if (dst_hold_safe(&rt->dst))
David Ahernafb1d4b52018-04-17 17:33:11 -07002265 ip6_del_rt(dev_net(rt->dst.dev), rt);
Wei Wangc5cff852017-08-21 09:47:10 -07002266 } else {
2267 struct fib6_node *fn;
2268
2269 rcu_read_lock();
2270 fn = rcu_dereference(rt->rt6i_node);
2271 if (fn && (rt->rt6i_flags & RTF_DEFAULT))
2272 fn->fn_sernum = -1;
2273 rcu_read_unlock();
Hannes Frederic Sowa1eb4f752013-07-10 23:00:57 +02002274 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002275 }
2276}
2277
Martin KaFai Lau45e4fd22015-05-22 20:56:00 -07002278static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
2279{
2280 struct net *net = dev_net(rt->dst.dev);
2281
David Ahernd4ead6b2018-04-17 17:33:16 -07002282 dst_metric_set(&rt->dst, RTAX_MTU, mtu);
Martin KaFai Lau45e4fd22015-05-22 20:56:00 -07002283 rt->rt6i_flags |= RTF_MODIFIED;
Martin KaFai Lau45e4fd22015-05-22 20:56:00 -07002284 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
2285}
2286
Martin KaFai Lau0d3f6d22015-11-11 11:51:06 -08002287static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
2288{
2289 return !(rt->rt6i_flags & RTF_CACHE) &&
Wei Wang4e587ea2017-08-25 15:03:10 -07002290 (rt->rt6i_flags & RTF_PCPU ||
2291 rcu_access_pointer(rt->rt6i_node));
Martin KaFai Lau0d3f6d22015-11-11 11:51:06 -08002292}
2293
Martin KaFai Lau45e4fd22015-05-22 20:56:00 -07002294static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
2295 const struct ipv6hdr *iph, u32 mtu)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002296{
Julian Anastasov0dec8792017-02-06 23:14:16 +02002297 const struct in6_addr *daddr, *saddr;
Ian Morris67ba4152014-08-24 21:53:10 +01002298 struct rt6_info *rt6 = (struct rt6_info *)dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002299
Martin KaFai Lau45e4fd22015-05-22 20:56:00 -07002300 if (rt6->rt6i_flags & RTF_LOCAL)
2301 return;
2302
Xin Long19bda362016-10-28 18:18:01 +08002303 if (dst_metric_locked(dst, RTAX_MTU))
2304 return;
2305
Julian Anastasov0dec8792017-02-06 23:14:16 +02002306 if (iph) {
2307 daddr = &iph->daddr;
2308 saddr = &iph->saddr;
2309 } else if (sk) {
2310 daddr = &sk->sk_v6_daddr;
2311 saddr = &inet6_sk(sk)->saddr;
2312 } else {
2313 daddr = NULL;
2314 saddr = NULL;
2315 }
2316 dst_confirm_neigh(dst, daddr);
Martin KaFai Lau45e4fd22015-05-22 20:56:00 -07002317 mtu = max_t(u32, mtu, IPV6_MIN_MTU);
2318 if (mtu >= dst_mtu(dst))
2319 return;
David S. Miller81aded22012-06-15 14:54:11 -07002320
Martin KaFai Lau0d3f6d22015-11-11 11:51:06 -08002321 if (!rt6_cache_allowed_for_pmtu(rt6)) {
Martin KaFai Lau45e4fd22015-05-22 20:56:00 -07002322 rt6_do_update_pmtu(rt6, mtu);
Wei Wang2b760fc2017-10-06 12:06:03 -07002323 /* update rt6_ex->stamp for cache */
2324 if (rt6->rt6i_flags & RTF_CACHE)
2325 rt6_update_exception_stamp_rt(rt6);
Julian Anastasov0dec8792017-02-06 23:14:16 +02002326 } else if (daddr) {
Martin KaFai Lau45e4fd22015-05-22 20:56:00 -07002327 struct rt6_info *nrt6;
Hagen Paul Pfeifer9d289712015-01-15 22:34:25 +01002328
David Ahernd4ead6b2018-04-17 17:33:16 -07002329 nrt6 = ip6_rt_cache_alloc(rt6->from, daddr, saddr);
Martin KaFai Lau45e4fd22015-05-22 20:56:00 -07002330 if (nrt6) {
2331 rt6_do_update_pmtu(nrt6, mtu);
David Ahernd4ead6b2018-04-17 17:33:16 -07002332 if (rt6_insert_exception(nrt6, rt6->from))
Wei Wang2b760fc2017-10-06 12:06:03 -07002333 dst_release_immediate(&nrt6->dst);
Martin KaFai Lau45e4fd22015-05-22 20:56:00 -07002334 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002335 }
2336}
2337
Martin KaFai Lau45e4fd22015-05-22 20:56:00 -07002338static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
2339 struct sk_buff *skb, u32 mtu)
2340{
2341 __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
2342}
2343
David S. Miller42ae66c2012-06-15 20:01:57 -07002344void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09002345 int oif, u32 mark, kuid_t uid)
David S. Miller81aded22012-06-15 14:54:11 -07002346{
2347 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2348 struct dst_entry *dst;
2349 struct flowi6 fl6;
2350
2351 memset(&fl6, 0, sizeof(fl6));
2352 fl6.flowi6_oif = oif;
Lorenzo Colitti1b3c61d2014-05-13 10:17:34 -07002353 fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
David S. Miller81aded22012-06-15 14:54:11 -07002354 fl6.daddr = iph->daddr;
2355 fl6.saddr = iph->saddr;
YOSHIFUJI Hideaki / 吉藤英明6502ca52013-01-13 05:01:51 +00002356 fl6.flowlabel = ip6_flowinfo(iph);
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09002357 fl6.flowi6_uid = uid;
David S. Miller81aded22012-06-15 14:54:11 -07002358
2359 dst = ip6_route_output(net, NULL, &fl6);
2360 if (!dst->error)
Martin KaFai Lau45e4fd22015-05-22 20:56:00 -07002361 __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
David S. Miller81aded22012-06-15 14:54:11 -07002362 dst_release(dst);
2363}
2364EXPORT_SYMBOL_GPL(ip6_update_pmtu);
2365
2366void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
2367{
Martin KaFai Lau33c162a2016-04-11 15:29:36 -07002368 struct dst_entry *dst;
2369
David S. Miller81aded22012-06-15 14:54:11 -07002370 ip6_update_pmtu(skb, sock_net(sk), mtu,
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09002371 sk->sk_bound_dev_if, sk->sk_mark, sk->sk_uid);
Martin KaFai Lau33c162a2016-04-11 15:29:36 -07002372
2373 dst = __sk_dst_get(sk);
2374 if (!dst || !dst->obsolete ||
2375 dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
2376 return;
2377
2378 bh_lock_sock(sk);
2379 if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
2380 ip6_datagram_dst_update(sk, false);
2381 bh_unlock_sock(sk);
David S. Miller81aded22012-06-15 14:54:11 -07002382}
2383EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
2384
Alexey Kodanev7d6850f2018-04-03 15:00:07 +03002385void ip6_sk_dst_store_flow(struct sock *sk, struct dst_entry *dst,
2386 const struct flowi6 *fl6)
2387{
2388#ifdef CONFIG_IPV6_SUBTREES
2389 struct ipv6_pinfo *np = inet6_sk(sk);
2390#endif
2391
2392 ip6_dst_store(sk, dst,
2393 ipv6_addr_equal(&fl6->daddr, &sk->sk_v6_daddr) ?
2394 &sk->sk_v6_daddr : NULL,
2395#ifdef CONFIG_IPV6_SUBTREES
2396 ipv6_addr_equal(&fl6->saddr, &np->saddr) ?
2397 &np->saddr :
2398#endif
2399 NULL);
2400}
2401
Duan Jiongb55b76b2013-09-04 19:44:21 +08002402/* Handle redirects */
2403struct ip6rd_flowi {
2404 struct flowi6 fl6;
2405 struct in6_addr gateway;
2406};
2407
2408static struct rt6_info *__ip6_route_redirect(struct net *net,
2409 struct fib6_table *table,
2410 struct flowi6 *fl6,
David Ahernb75cc8f2018-03-02 08:32:17 -08002411 const struct sk_buff *skb,
Duan Jiongb55b76b2013-09-04 19:44:21 +08002412 int flags)
2413{
2414 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
David Ahern23fb93a2018-04-17 17:33:23 -07002415 struct rt6_info *ret = NULL, *rt_cache;
2416 struct rt6_info *rt;
Duan Jiongb55b76b2013-09-04 19:44:21 +08002417 struct fib6_node *fn;
2418
2419 /* Get the "current" route for this destination and
Alexander Alemayhu67c408c2017-01-07 23:53:00 +01002420 * check if the redirect has come from appropriate router.
Duan Jiongb55b76b2013-09-04 19:44:21 +08002421 *
2422 * RFC 4861 specifies that redirects should only be
2423 * accepted if they come from the nexthop to the target.
2424 * Due to the way the routes are chosen, this notion
2425 * is a bit fuzzy and one might need to check all possible
2426 * routes.
2427 */
2428
Wei Wang66f5d6c2017-10-06 12:06:10 -07002429 rcu_read_lock();
Duan Jiongb55b76b2013-09-04 19:44:21 +08002430 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
2431restart:
Wei Wang66f5d6c2017-10-06 12:06:10 -07002432 for_each_fib6_node_rt_rcu(fn) {
David Ahern5e670d82018-04-17 17:33:14 -07002433 if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
Ido Schimmel8067bb82018-01-07 12:45:09 +02002434 continue;
David Ahern14895682018-04-17 17:33:17 -07002435 if (fib6_check_expired(rt))
Duan Jiongb55b76b2013-09-04 19:44:21 +08002436 continue;
David Ahern6edb3c92018-04-17 17:33:15 -07002437 if (rt->rt6i_flags & RTF_REJECT)
Duan Jiongb55b76b2013-09-04 19:44:21 +08002438 break;
2439 if (!(rt->rt6i_flags & RTF_GATEWAY))
2440 continue;
David Ahern5e670d82018-04-17 17:33:14 -07002441 if (fl6->flowi6_oif != rt->fib6_nh.nh_dev->ifindex)
Duan Jiongb55b76b2013-09-04 19:44:21 +08002442 continue;
Wei Wang2b760fc2017-10-06 12:06:03 -07002443 /* rt_cache's gateway might be different from its 'parent'
2444 * in the case of an ip redirect.
2445 * So we keep searching in the exception table if the gateway
2446 * is different.
2447 */
David Ahern5e670d82018-04-17 17:33:14 -07002448 if (!ipv6_addr_equal(&rdfl->gateway, &rt->fib6_nh.nh_gw)) {
Wei Wang2b760fc2017-10-06 12:06:03 -07002449 rt_cache = rt6_find_cached_rt(rt,
2450 &fl6->daddr,
2451 &fl6->saddr);
2452 if (rt_cache &&
2453 ipv6_addr_equal(&rdfl->gateway,
2454 &rt_cache->rt6i_gateway)) {
David Ahern23fb93a2018-04-17 17:33:23 -07002455 ret = rt_cache;
Wei Wang2b760fc2017-10-06 12:06:03 -07002456 break;
2457 }
Duan Jiongb55b76b2013-09-04 19:44:21 +08002458 continue;
Wei Wang2b760fc2017-10-06 12:06:03 -07002459 }
Duan Jiongb55b76b2013-09-04 19:44:21 +08002460 break;
2461 }
2462
2463 if (!rt)
David Ahern421842e2018-04-17 17:33:18 -07002464 rt = net->ipv6.fib6_null_entry;
David Ahern6edb3c92018-04-17 17:33:15 -07002465 else if (rt->rt6i_flags & RTF_REJECT) {
David Ahern23fb93a2018-04-17 17:33:23 -07002466 ret = net->ipv6.ip6_null_entry;
Martin KaFai Laub0a1ba52015-01-20 19:16:02 -08002467 goto out;
2468 }
2469
David Ahern421842e2018-04-17 17:33:18 -07002470 if (rt == net->ipv6.fib6_null_entry) {
Martin KaFai Laua3c00e42014-10-20 13:42:43 -07002471 fn = fib6_backtrack(fn, &fl6->saddr);
2472 if (fn)
2473 goto restart;
Duan Jiongb55b76b2013-09-04 19:44:21 +08002474 }
Martin KaFai Laua3c00e42014-10-20 13:42:43 -07002475
Martin KaFai Laub0a1ba52015-01-20 19:16:02 -08002476out:
David Ahern23fb93a2018-04-17 17:33:23 -07002477 if (ret)
2478 dst_hold(&ret->dst);
2479 else
2480 ret = ip6_create_rt_rcu(rt);
Duan Jiongb55b76b2013-09-04 19:44:21 +08002481
Wei Wang66f5d6c2017-10-06 12:06:10 -07002482 rcu_read_unlock();
Duan Jiongb55b76b2013-09-04 19:44:21 +08002483
David Ahern23fb93a2018-04-17 17:33:23 -07002484 trace_fib6_table_lookup(net, ret, table, fl6);
2485 return ret;
Duan Jiongb55b76b2013-09-04 19:44:21 +08002486};
2487
2488static struct dst_entry *ip6_route_redirect(struct net *net,
David Ahernb75cc8f2018-03-02 08:32:17 -08002489 const struct flowi6 *fl6,
2490 const struct sk_buff *skb,
2491 const struct in6_addr *gateway)
Duan Jiongb55b76b2013-09-04 19:44:21 +08002492{
2493 int flags = RT6_LOOKUP_F_HAS_SADDR;
2494 struct ip6rd_flowi rdfl;
2495
2496 rdfl.fl6 = *fl6;
2497 rdfl.gateway = *gateway;
2498
David Ahernb75cc8f2018-03-02 08:32:17 -08002499 return fib6_rule_lookup(net, &rdfl.fl6, skb,
Duan Jiongb55b76b2013-09-04 19:44:21 +08002500 flags, __ip6_route_redirect);
2501}
2502
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09002503void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
2504 kuid_t uid)
David S. Miller3a5ad2e2012-07-12 00:08:07 -07002505{
2506 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2507 struct dst_entry *dst;
2508 struct flowi6 fl6;
2509
2510 memset(&fl6, 0, sizeof(fl6));
Julian Anastasove374c612014-04-28 10:51:56 +03002511 fl6.flowi6_iif = LOOPBACK_IFINDEX;
David S. Miller3a5ad2e2012-07-12 00:08:07 -07002512 fl6.flowi6_oif = oif;
2513 fl6.flowi6_mark = mark;
David S. Miller3a5ad2e2012-07-12 00:08:07 -07002514 fl6.daddr = iph->daddr;
2515 fl6.saddr = iph->saddr;
YOSHIFUJI Hideaki / 吉藤英明6502ca52013-01-13 05:01:51 +00002516 fl6.flowlabel = ip6_flowinfo(iph);
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09002517 fl6.flowi6_uid = uid;
David S. Miller3a5ad2e2012-07-12 00:08:07 -07002518
David Ahernb75cc8f2018-03-02 08:32:17 -08002519 dst = ip6_route_redirect(net, &fl6, skb, &ipv6_hdr(skb)->saddr);
Duan Jiongb55b76b2013-09-04 19:44:21 +08002520 rt6_do_redirect(dst, NULL, skb);
David S. Miller3a5ad2e2012-07-12 00:08:07 -07002521 dst_release(dst);
2522}
2523EXPORT_SYMBOL_GPL(ip6_redirect);
2524
Duan Jiongc92a59e2013-08-22 12:07:35 +08002525void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
2526 u32 mark)
2527{
2528 const struct ipv6hdr *iph = ipv6_hdr(skb);
2529 const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
2530 struct dst_entry *dst;
2531 struct flowi6 fl6;
2532
2533 memset(&fl6, 0, sizeof(fl6));
Julian Anastasove374c612014-04-28 10:51:56 +03002534 fl6.flowi6_iif = LOOPBACK_IFINDEX;
Duan Jiongc92a59e2013-08-22 12:07:35 +08002535 fl6.flowi6_oif = oif;
2536 fl6.flowi6_mark = mark;
Duan Jiongc92a59e2013-08-22 12:07:35 +08002537 fl6.daddr = msg->dest;
2538 fl6.saddr = iph->daddr;
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09002539 fl6.flowi6_uid = sock_net_uid(net, NULL);
Duan Jiongc92a59e2013-08-22 12:07:35 +08002540
David Ahernb75cc8f2018-03-02 08:32:17 -08002541 dst = ip6_route_redirect(net, &fl6, skb, &iph->saddr);
Duan Jiongb55b76b2013-09-04 19:44:21 +08002542 rt6_do_redirect(dst, NULL, skb);
Duan Jiongc92a59e2013-08-22 12:07:35 +08002543 dst_release(dst);
2544}
2545
David S. Miller3a5ad2e2012-07-12 00:08:07 -07002546void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
2547{
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09002548 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark,
2549 sk->sk_uid);
David S. Miller3a5ad2e2012-07-12 00:08:07 -07002550}
2551EXPORT_SYMBOL_GPL(ip6_sk_redirect);
2552
David S. Miller0dbaee32010-12-13 12:52:14 -08002553static unsigned int ip6_default_advmss(const struct dst_entry *dst)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002554{
David S. Miller0dbaee32010-12-13 12:52:14 -08002555 struct net_device *dev = dst->dev;
2556 unsigned int mtu = dst_mtu(dst);
2557 struct net *net = dev_net(dev);
2558
Linus Torvalds1da177e2005-04-16 15:20:36 -07002559 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
2560
Daniel Lezcano55786892008-03-04 13:47:47 -08002561 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
2562 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002563
2564 /*
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002565 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
2566 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
2567 * IPV6_MAXPLEN is also valid and means: "any MSS,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002568 * rely only on pmtu discovery"
2569 */
2570 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
2571 mtu = IPV6_MAXPLEN;
2572 return mtu;
2573}
2574
Steffen Klassertebb762f2011-11-23 02:12:51 +00002575static unsigned int ip6_mtu(const struct dst_entry *dst)
David S. Millerd33e4552010-12-14 13:01:14 -08002576{
David S. Millerd33e4552010-12-14 13:01:14 -08002577 struct inet6_dev *idev;
David Ahernd4ead6b2018-04-17 17:33:16 -07002578 unsigned int mtu;
Steffen Klassert618f9bc2011-11-23 02:13:31 +00002579
Martin KaFai Lau4b32b5a2015-04-28 13:03:06 -07002580 mtu = dst_metric_raw(dst, RTAX_MTU);
2581 if (mtu)
2582 goto out;
2583
Steffen Klassert618f9bc2011-11-23 02:13:31 +00002584 mtu = IPV6_MIN_MTU;
David S. Millerd33e4552010-12-14 13:01:14 -08002585
2586 rcu_read_lock();
2587 idev = __in6_dev_get(dst->dev);
2588 if (idev)
2589 mtu = idev->cnf.mtu6;
2590 rcu_read_unlock();
2591
Eric Dumazet30f78d82014-04-10 21:23:36 -07002592out:
Roopa Prabhu14972cb2016-08-24 20:10:43 -07002593 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
2594
2595 return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
David S. Millerd33e4552010-12-14 13:01:14 -08002596}
2597
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08002598struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
David S. Miller87a11572011-12-06 17:04:13 -05002599 struct flowi6 *fl6)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002600{
David S. Miller87a11572011-12-06 17:04:13 -05002601 struct dst_entry *dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002602 struct rt6_info *rt;
2603 struct inet6_dev *idev = in6_dev_get(dev);
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09002604 struct net *net = dev_net(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002605
David S. Miller38308472011-12-03 18:02:47 -05002606 if (unlikely(!idev))
Eric Dumazet122bdf62012-03-14 21:13:11 +00002607 return ERR_PTR(-ENODEV);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002608
Martin KaFai Lauad706862015-08-14 11:05:52 -07002609 rt = ip6_dst_alloc(net, dev, 0);
David S. Miller38308472011-12-03 18:02:47 -05002610 if (unlikely(!rt)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002611 in6_dev_put(idev);
David S. Miller87a11572011-12-06 17:04:13 -05002612 dst = ERR_PTR(-ENOMEM);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002613 goto out;
2614 }
2615
Yan, Zheng8e2ec632011-09-05 21:34:30 +00002616 rt->dst.flags |= DST_HOST;
Brendan McGrath588753f2017-12-13 22:14:57 +11002617 rt->dst.input = ip6_input;
Yan, Zheng8e2ec632011-09-05 21:34:30 +00002618 rt->dst.output = ip6_output;
Julian Anastasov550bab42013-10-20 15:43:04 +03002619 rt->rt6i_gateway = fl6->daddr;
David S. Miller87a11572011-12-06 17:04:13 -05002620 rt->rt6i_dst.addr = fl6->daddr;
Yan, Zheng8e2ec632011-09-05 21:34:30 +00002621 rt->rt6i_dst.plen = 128;
2622 rt->rt6i_idev = idev;
Li RongQing14edd872012-10-24 14:01:18 +08002623 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002624
Ido Schimmel4c981e22018-01-07 12:45:04 +02002625 /* Add this dst into uncached_list so that rt6_disable_ip() can
Wei Wang587fea72017-06-17 10:42:36 -07002626 * do proper release of the net_device
2627 */
2628 rt6_uncached_list_add(rt);
Wei Wang81eb8442017-10-06 12:06:11 -07002629 atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002630
David S. Miller87a11572011-12-06 17:04:13 -05002631 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
2632
Linus Torvalds1da177e2005-04-16 15:20:36 -07002633out:
David S. Miller87a11572011-12-06 17:04:13 -05002634 return dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002635}
2636
Daniel Lezcano569d3642008-01-18 03:56:57 -08002637static int ip6_dst_gc(struct dst_ops *ops)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002638{
Alexey Dobriyan86393e52009-08-29 01:34:49 +00002639 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
Daniel Lezcano7019b782008-03-04 13:50:14 -08002640 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
2641 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
2642 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
2643 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
2644 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
Eric Dumazetfc66f952010-10-08 06:37:34 +00002645 int entries;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002646
Eric Dumazetfc66f952010-10-08 06:37:34 +00002647 entries = dst_entries_get_fast(ops);
Michal Kubeček49a18d82013-08-01 10:04:24 +02002648 if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
Eric Dumazetfc66f952010-10-08 06:37:34 +00002649 entries <= rt_max_size)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002650 goto out;
2651
Benjamin Thery6891a342008-03-04 13:49:47 -08002652 net->ipv6.ip6_rt_gc_expire++;
Li RongQing14956642014-05-19 17:30:28 +08002653 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
Eric Dumazetfc66f952010-10-08 06:37:34 +00002654 entries = dst_entries_get_slow(ops);
2655 if (entries < ops->gc_thresh)
Daniel Lezcano7019b782008-03-04 13:50:14 -08002656 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002657out:
Daniel Lezcano7019b782008-03-04 13:50:14 -08002658 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
Eric Dumazetfc66f952010-10-08 06:37:34 +00002659 return entries > rt_max_size;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002660}
2661
David Ahernd4ead6b2018-04-17 17:33:16 -07002662static int ip6_convert_metrics(struct net *net, struct rt6_info *rt,
2663 struct fib6_config *cfg)
Florian Westphale715b6d2015-01-05 23:57:44 +01002664{
David Ahernd4ead6b2018-04-17 17:33:16 -07002665 int err = 0;
Florian Westphale715b6d2015-01-05 23:57:44 +01002666
David Ahernd4ead6b2018-04-17 17:33:16 -07002667 if (cfg->fc_mx) {
2668 rt->fib6_metrics = kzalloc(sizeof(*rt->fib6_metrics),
2669 GFP_KERNEL);
2670 if (unlikely(!rt->fib6_metrics))
2671 return -ENOMEM;
Florian Westphale715b6d2015-01-05 23:57:44 +01002672
David Ahernd4ead6b2018-04-17 17:33:16 -07002673 refcount_set(&rt->fib6_metrics->refcnt, 1);
Florian Westphale715b6d2015-01-05 23:57:44 +01002674
David Ahernd4ead6b2018-04-17 17:33:16 -07002675 err = ip_metrics_convert(net, cfg->fc_mx, cfg->fc_mx_len,
2676 rt->fib6_metrics->metrics);
Florian Westphale715b6d2015-01-05 23:57:44 +01002677 }
2678
David Ahernd4ead6b2018-04-17 17:33:16 -07002679 return err;
Florian Westphale715b6d2015-01-05 23:57:44 +01002680}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002681
David Ahern8c145862016-04-24 21:26:04 -07002682static struct rt6_info *ip6_nh_lookup_table(struct net *net,
2683 struct fib6_config *cfg,
David Ahernf4797b32018-01-25 16:55:08 -08002684 const struct in6_addr *gw_addr,
2685 u32 tbid, int flags)
David Ahern8c145862016-04-24 21:26:04 -07002686{
2687 struct flowi6 fl6 = {
2688 .flowi6_oif = cfg->fc_ifindex,
2689 .daddr = *gw_addr,
2690 .saddr = cfg->fc_prefsrc,
2691 };
2692 struct fib6_table *table;
2693 struct rt6_info *rt;
David Ahern8c145862016-04-24 21:26:04 -07002694
David Ahernf4797b32018-01-25 16:55:08 -08002695 table = fib6_get_table(net, tbid);
David Ahern8c145862016-04-24 21:26:04 -07002696 if (!table)
2697 return NULL;
2698
2699 if (!ipv6_addr_any(&cfg->fc_prefsrc))
2700 flags |= RT6_LOOKUP_F_HAS_SADDR;
2701
David Ahernf4797b32018-01-25 16:55:08 -08002702 flags |= RT6_LOOKUP_F_IGNORE_LINKSTATE;
David Ahernb75cc8f2018-03-02 08:32:17 -08002703 rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, NULL, flags);
David Ahern8c145862016-04-24 21:26:04 -07002704
2705 /* if table lookup failed, fall back to full lookup */
2706 if (rt == net->ipv6.ip6_null_entry) {
2707 ip6_rt_put(rt);
2708 rt = NULL;
2709 }
2710
2711 return rt;
2712}
2713
David Ahernfc1e64e2018-01-25 16:55:09 -08002714static int ip6_route_check_nh_onlink(struct net *net,
2715 struct fib6_config *cfg,
David Ahern9fbb7042018-03-13 08:29:36 -07002716 const struct net_device *dev,
David Ahernfc1e64e2018-01-25 16:55:09 -08002717 struct netlink_ext_ack *extack)
2718{
David Ahern44750f82018-02-06 13:17:06 -08002719 u32 tbid = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN;
David Ahernfc1e64e2018-01-25 16:55:09 -08002720 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2721 u32 flags = RTF_LOCAL | RTF_ANYCAST | RTF_REJECT;
2722 struct rt6_info *grt;
2723 int err;
2724
2725 err = 0;
2726 grt = ip6_nh_lookup_table(net, cfg, gw_addr, tbid, 0);
2727 if (grt) {
David Ahern58e354c2018-02-06 12:14:12 -08002728 if (!grt->dst.error &&
2729 (grt->rt6i_flags & flags || dev != grt->dst.dev)) {
David Ahern44750f82018-02-06 13:17:06 -08002730 NL_SET_ERR_MSG(extack,
2731 "Nexthop has invalid gateway or device mismatch");
David Ahernfc1e64e2018-01-25 16:55:09 -08002732 err = -EINVAL;
2733 }
2734
2735 ip6_rt_put(grt);
2736 }
2737
2738 return err;
2739}
2740
David Ahern1edce992018-01-25 16:55:07 -08002741static int ip6_route_check_nh(struct net *net,
2742 struct fib6_config *cfg,
2743 struct net_device **_dev,
2744 struct inet6_dev **idev)
2745{
2746 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2747 struct net_device *dev = _dev ? *_dev : NULL;
2748 struct rt6_info *grt = NULL;
2749 int err = -EHOSTUNREACH;
2750
2751 if (cfg->fc_table) {
David Ahernf4797b32018-01-25 16:55:08 -08002752 int flags = RT6_LOOKUP_F_IFACE;
2753
2754 grt = ip6_nh_lookup_table(net, cfg, gw_addr,
2755 cfg->fc_table, flags);
David Ahern1edce992018-01-25 16:55:07 -08002756 if (grt) {
2757 if (grt->rt6i_flags & RTF_GATEWAY ||
2758 (dev && dev != grt->dst.dev)) {
2759 ip6_rt_put(grt);
2760 grt = NULL;
2761 }
2762 }
2763 }
2764
2765 if (!grt)
David Ahernb75cc8f2018-03-02 08:32:17 -08002766 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, NULL, 1);
David Ahern1edce992018-01-25 16:55:07 -08002767
2768 if (!grt)
2769 goto out;
2770
2771 if (dev) {
2772 if (dev != grt->dst.dev) {
2773 ip6_rt_put(grt);
2774 goto out;
2775 }
2776 } else {
2777 *_dev = dev = grt->dst.dev;
2778 *idev = grt->rt6i_idev;
2779 dev_hold(dev);
2780 in6_dev_hold(grt->rt6i_idev);
2781 }
2782
2783 if (!(grt->rt6i_flags & RTF_GATEWAY))
2784 err = 0;
2785
2786 ip6_rt_put(grt);
2787
2788out:
2789 return err;
2790}
2791
David Ahern9fbb7042018-03-13 08:29:36 -07002792static int ip6_validate_gw(struct net *net, struct fib6_config *cfg,
2793 struct net_device **_dev, struct inet6_dev **idev,
2794 struct netlink_ext_ack *extack)
2795{
2796 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2797 int gwa_type = ipv6_addr_type(gw_addr);
David Ahern232378e2018-03-13 08:29:37 -07002798 bool skip_dev = gwa_type & IPV6_ADDR_LINKLOCAL ? false : true;
David Ahern9fbb7042018-03-13 08:29:36 -07002799 const struct net_device *dev = *_dev;
David Ahern232378e2018-03-13 08:29:37 -07002800 bool need_addr_check = !dev;
David Ahern9fbb7042018-03-13 08:29:36 -07002801 int err = -EINVAL;
2802
2803 /* if gw_addr is local we will fail to detect this in case
2804 * address is still TENTATIVE (DAD in progress). rt6_lookup()
2805 * will return already-added prefix route via interface that
2806 * prefix route was assigned to, which might be non-loopback.
2807 */
David Ahern232378e2018-03-13 08:29:37 -07002808 if (dev &&
2809 ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
2810 NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
David Ahern9fbb7042018-03-13 08:29:36 -07002811 goto out;
2812 }
2813
2814 if (gwa_type != (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_UNICAST)) {
2815 /* IPv6 strictly inhibits using not link-local
2816 * addresses as nexthop address.
2817 * Otherwise, router will not able to send redirects.
2818 * It is very good, but in some (rare!) circumstances
2819 * (SIT, PtP, NBMA NOARP links) it is handy to allow
2820 * some exceptions. --ANK
2821 * We allow IPv4-mapped nexthops to support RFC4798-type
2822 * addressing
2823 */
2824 if (!(gwa_type & (IPV6_ADDR_UNICAST | IPV6_ADDR_MAPPED))) {
2825 NL_SET_ERR_MSG(extack, "Invalid gateway address");
2826 goto out;
2827 }
2828
2829 if (cfg->fc_flags & RTNH_F_ONLINK)
2830 err = ip6_route_check_nh_onlink(net, cfg, dev, extack);
2831 else
2832 err = ip6_route_check_nh(net, cfg, _dev, idev);
2833
2834 if (err)
2835 goto out;
2836 }
2837
2838 /* reload in case device was changed */
2839 dev = *_dev;
2840
2841 err = -EINVAL;
2842 if (!dev) {
2843 NL_SET_ERR_MSG(extack, "Egress device not specified");
2844 goto out;
2845 } else if (dev->flags & IFF_LOOPBACK) {
2846 NL_SET_ERR_MSG(extack,
2847 "Egress device can not be loopback device for this route");
2848 goto out;
2849 }
David Ahern232378e2018-03-13 08:29:37 -07002850
2851 /* if we did not check gw_addr above, do so now that the
2852 * egress device has been resolved.
2853 */
2854 if (need_addr_check &&
2855 ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
2856 NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
2857 goto out;
2858 }
2859
David Ahern9fbb7042018-03-13 08:29:36 -07002860 err = 0;
2861out:
2862 return err;
2863}
2864
David Ahern333c4302017-05-21 10:12:04 -06002865static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
David Ahernacb54e32018-04-17 17:33:22 -07002866 gfp_t gfp_flags,
David Ahern333c4302017-05-21 10:12:04 -06002867 struct netlink_ext_ack *extack)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002868{
Daniel Lezcano55786892008-03-04 13:47:47 -08002869 struct net *net = cfg->fc_nlinfo.nl_net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002870 struct rt6_info *rt = NULL;
2871 struct net_device *dev = NULL;
2872 struct inet6_dev *idev = NULL;
Thomas Grafc71099a2006-08-04 23:20:06 -07002873 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002874 int addr_type;
Roopa Prabhu8c5b83f2015-10-10 08:26:36 -07002875 int err = -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002876
David Ahern557c44b2017-04-19 14:19:43 -07002877 /* RTF_PCPU is an internal flag; can not be set by userspace */
David Ahernd5d531c2017-05-21 10:12:05 -06002878 if (cfg->fc_flags & RTF_PCPU) {
2879 NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU");
David Ahern557c44b2017-04-19 14:19:43 -07002880 goto out;
David Ahernd5d531c2017-05-21 10:12:05 -06002881 }
David Ahern557c44b2017-04-19 14:19:43 -07002882
Wei Wang2ea23522017-10-27 17:30:12 -07002883 /* RTF_CACHE is an internal flag; can not be set by userspace */
2884 if (cfg->fc_flags & RTF_CACHE) {
2885 NL_SET_ERR_MSG(extack, "Userspace can not set RTF_CACHE");
2886 goto out;
2887 }
2888
David Aherne8478e82018-04-17 17:33:13 -07002889 if (cfg->fc_type > RTN_MAX) {
2890 NL_SET_ERR_MSG(extack, "Invalid route type");
2891 goto out;
2892 }
2893
David Ahernd5d531c2017-05-21 10:12:05 -06002894 if (cfg->fc_dst_len > 128) {
2895 NL_SET_ERR_MSG(extack, "Invalid prefix length");
Roopa Prabhu8c5b83f2015-10-10 08:26:36 -07002896 goto out;
David Ahernd5d531c2017-05-21 10:12:05 -06002897 }
2898 if (cfg->fc_src_len > 128) {
2899 NL_SET_ERR_MSG(extack, "Invalid source address length");
2900 goto out;
2901 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002902#ifndef CONFIG_IPV6_SUBTREES
David Ahernd5d531c2017-05-21 10:12:05 -06002903 if (cfg->fc_src_len) {
2904 NL_SET_ERR_MSG(extack,
2905 "Specifying source address requires IPV6_SUBTREES to be enabled");
Roopa Prabhu8c5b83f2015-10-10 08:26:36 -07002906 goto out;
David Ahernd5d531c2017-05-21 10:12:05 -06002907 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002908#endif
Thomas Graf86872cb2006-08-22 00:01:08 -07002909 if (cfg->fc_ifindex) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002910 err = -ENODEV;
Daniel Lezcano55786892008-03-04 13:47:47 -08002911 dev = dev_get_by_index(net, cfg->fc_ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002912 if (!dev)
2913 goto out;
2914 idev = in6_dev_get(dev);
2915 if (!idev)
2916 goto out;
2917 }
2918
Thomas Graf86872cb2006-08-22 00:01:08 -07002919 if (cfg->fc_metric == 0)
2920 cfg->fc_metric = IP6_RT_PRIO_USER;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002921
David Ahernfc1e64e2018-01-25 16:55:09 -08002922 if (cfg->fc_flags & RTNH_F_ONLINK) {
2923 if (!dev) {
2924 NL_SET_ERR_MSG(extack,
2925 "Nexthop device required for onlink");
2926 err = -ENODEV;
2927 goto out;
2928 }
2929
2930 if (!(dev->flags & IFF_UP)) {
2931 NL_SET_ERR_MSG(extack, "Nexthop device is not up");
2932 err = -ENETDOWN;
2933 goto out;
2934 }
2935 }
2936
Matti Vaittinend71314b2011-11-14 00:14:49 +00002937 err = -ENOBUFS;
David S. Miller38308472011-12-03 18:02:47 -05002938 if (cfg->fc_nlinfo.nlh &&
2939 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
Matti Vaittinend71314b2011-11-14 00:14:49 +00002940 table = fib6_get_table(net, cfg->fc_table);
David S. Miller38308472011-12-03 18:02:47 -05002941 if (!table) {
Joe Perchesf3213832012-05-15 14:11:53 +00002942 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
Matti Vaittinend71314b2011-11-14 00:14:49 +00002943 table = fib6_new_table(net, cfg->fc_table);
2944 }
2945 } else {
2946 table = fib6_new_table(net, cfg->fc_table);
2947 }
David S. Miller38308472011-12-03 18:02:47 -05002948
2949 if (!table)
Thomas Grafc71099a2006-08-04 23:20:06 -07002950 goto out;
Thomas Grafc71099a2006-08-04 23:20:06 -07002951
Martin KaFai Lauad706862015-08-14 11:05:52 -07002952 rt = ip6_dst_alloc(net, NULL,
2953 (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002954
David S. Miller38308472011-12-03 18:02:47 -05002955 if (!rt) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002956 err = -ENOMEM;
2957 goto out;
2958 }
2959
David Ahernd4ead6b2018-04-17 17:33:16 -07002960 err = ip6_convert_metrics(net, rt, cfg);
2961 if (err < 0)
2962 goto out;
2963
Gao feng1716a962012-04-06 00:13:10 +00002964 if (cfg->fc_flags & RTF_EXPIRES)
David Ahern14895682018-04-17 17:33:17 -07002965 fib6_set_expires(rt, jiffies +
Gao feng1716a962012-04-06 00:13:10 +00002966 clock_t_to_jiffies(cfg->fc_expires));
2967 else
David Ahern14895682018-04-17 17:33:17 -07002968 fib6_clean_expires(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002969
Thomas Graf86872cb2006-08-22 00:01:08 -07002970 if (cfg->fc_protocol == RTPROT_UNSPEC)
2971 cfg->fc_protocol = RTPROT_BOOT;
2972 rt->rt6i_protocol = cfg->fc_protocol;
2973
2974 addr_type = ipv6_addr_type(&cfg->fc_dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002975
Roopa Prabhu19e42e42015-07-21 10:43:48 +02002976 if (cfg->fc_encap) {
2977 struct lwtunnel_state *lwtstate;
2978
David Ahern30357d72017-01-30 12:07:37 -08002979 err = lwtunnel_build_state(cfg->fc_encap_type,
Tom Herbert127eb7c2015-08-24 09:45:41 -07002980 cfg->fc_encap, AF_INET6, cfg,
David Ahern9ae28722017-05-27 16:19:28 -06002981 &lwtstate, extack);
Roopa Prabhu19e42e42015-07-21 10:43:48 +02002982 if (err)
2983 goto out;
David Ahern5e670d82018-04-17 17:33:14 -07002984 rt->fib6_nh.nh_lwtstate = lwtstate_get(lwtstate);
Roopa Prabhu19e42e42015-07-21 10:43:48 +02002985 }
2986
Thomas Graf86872cb2006-08-22 00:01:08 -07002987 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
2988 rt->rt6i_dst.plen = cfg->fc_dst_len;
Martin KaFai Lauafc4eef2015-04-28 13:03:07 -07002989 if (rt->rt6i_dst.plen == 128)
David Ahern3b6761d2018-04-17 17:33:20 -07002990 rt->dst_host = true;
Michal Kubečeke5fd3872014-03-27 13:04:08 +01002991
Linus Torvalds1da177e2005-04-16 15:20:36 -07002992#ifdef CONFIG_IPV6_SUBTREES
Thomas Graf86872cb2006-08-22 00:01:08 -07002993 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
2994 rt->rt6i_src.plen = cfg->fc_src_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002995#endif
2996
Thomas Graf86872cb2006-08-22 00:01:08 -07002997 rt->rt6i_metric = cfg->fc_metric;
David Ahern5e670d82018-04-17 17:33:14 -07002998 rt->fib6_nh.nh_weight = 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002999
David Aherne8478e82018-04-17 17:33:13 -07003000 rt->fib6_type = cfg->fc_type;
3001
Linus Torvalds1da177e2005-04-16 15:20:36 -07003002 /* We cannot add true routes via loopback here,
3003 they would result in kernel looping; promote them to reject routes
3004 */
Thomas Graf86872cb2006-08-22 00:01:08 -07003005 if ((cfg->fc_flags & RTF_REJECT) ||
David S. Miller38308472011-12-03 18:02:47 -05003006 (dev && (dev->flags & IFF_LOOPBACK) &&
3007 !(addr_type & IPV6_ADDR_LOOPBACK) &&
3008 !(cfg->fc_flags & RTF_LOCAL))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003009 /* hold loopback dev/idev if we haven't done so. */
Daniel Lezcano55786892008-03-04 13:47:47 -08003010 if (dev != net->loopback_dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003011 if (dev) {
3012 dev_put(dev);
3013 in6_dev_put(idev);
3014 }
Daniel Lezcano55786892008-03-04 13:47:47 -08003015 dev = net->loopback_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003016 dev_hold(dev);
3017 idev = in6_dev_get(dev);
3018 if (!idev) {
3019 err = -ENODEV;
3020 goto out;
3021 }
3022 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07003023 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
3024 goto install_route;
3025 }
3026
Thomas Graf86872cb2006-08-22 00:01:08 -07003027 if (cfg->fc_flags & RTF_GATEWAY) {
David Ahern9fbb7042018-03-13 08:29:36 -07003028 err = ip6_validate_gw(net, cfg, &dev, &idev, extack);
3029 if (err)
Florian Westphal48ed7b22015-05-21 00:25:41 +02003030 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003031
David Ahern5e670d82018-04-17 17:33:14 -07003032 rt->fib6_nh.nh_gw = rt->rt6i_gateway = cfg->fc_gateway;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003033 }
3034
3035 err = -ENODEV;
David S. Miller38308472011-12-03 18:02:47 -05003036 if (!dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003037 goto out;
3038
Lorenzo Bianconi428604f2018-03-29 11:02:24 +02003039 if (idev->cnf.disable_ipv6) {
3040 NL_SET_ERR_MSG(extack, "IPv6 is disabled on nexthop device");
3041 err = -EACCES;
3042 goto out;
3043 }
3044
David Ahern955ec4c2018-01-24 19:45:29 -08003045 if (!(dev->flags & IFF_UP)) {
3046 NL_SET_ERR_MSG(extack, "Nexthop device is not up");
3047 err = -ENETDOWN;
3048 goto out;
3049 }
3050
Daniel Walterc3968a82011-04-13 21:10:57 +00003051 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
3052 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
David Ahernd5d531c2017-05-21 10:12:05 -06003053 NL_SET_ERR_MSG(extack, "Invalid source address");
Daniel Walterc3968a82011-04-13 21:10:57 +00003054 err = -EINVAL;
3055 goto out;
3056 }
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00003057 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
Daniel Walterc3968a82011-04-13 21:10:57 +00003058 rt->rt6i_prefsrc.plen = 128;
3059 } else
3060 rt->rt6i_prefsrc.plen = 0;
3061
Thomas Graf86872cb2006-08-22 00:01:08 -07003062 rt->rt6i_flags = cfg->fc_flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003063
3064install_route:
Ido Schimmel5609b802018-01-07 12:45:06 +02003065 if (!(rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST)) &&
3066 !netif_carrier_ok(dev))
David Ahern5e670d82018-04-17 17:33:14 -07003067 rt->fib6_nh.nh_flags |= RTNH_F_LINKDOWN;
3068 rt->fib6_nh.nh_flags |= (cfg->fc_flags & RTNH_F_ONLINK);
3069 rt->fib6_nh.nh_dev = rt->dst.dev = dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003070 rt->rt6i_idev = idev;
Thomas Grafc71099a2006-08-04 23:20:06 -07003071 rt->rt6i_table = table;
Daniel Lezcano63152fc2008-03-03 23:31:11 -08003072
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09003073 cfg->fc_nlinfo.nl_net = dev_net(dev);
Daniel Lezcano63152fc2008-03-03 23:31:11 -08003074
Roopa Prabhu8c5b83f2015-10-10 08:26:36 -07003075 return rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003076out:
3077 if (dev)
3078 dev_put(dev);
3079 if (idev)
3080 in6_dev_put(idev);
Wei Wang587fea72017-06-17 10:42:36 -07003081 if (rt)
3082 dst_release_immediate(&rt->dst);
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07003083
Roopa Prabhu8c5b83f2015-10-10 08:26:36 -07003084 return ERR_PTR(err);
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07003085}
3086
David Ahernacb54e32018-04-17 17:33:22 -07003087int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags,
3088 struct netlink_ext_ack *extack)
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07003089{
Roopa Prabhu8c5b83f2015-10-10 08:26:36 -07003090 struct rt6_info *rt;
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07003091 int err;
3092
David Ahernacb54e32018-04-17 17:33:22 -07003093 rt = ip6_route_info_create(cfg, gfp_flags, extack);
David Ahernd4ead6b2018-04-17 17:33:16 -07003094 if (IS_ERR(rt))
3095 return PTR_ERR(rt);
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07003096
David Ahernd4ead6b2018-04-17 17:33:16 -07003097 err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, extack);
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07003098
Linus Torvalds1da177e2005-04-16 15:20:36 -07003099 return err;
3100}
3101
Thomas Graf86872cb2006-08-22 00:01:08 -07003102static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003103{
David Ahernafb1d4b52018-04-17 17:33:11 -07003104 struct net *net = info->nl_net;
Thomas Grafc71099a2006-08-04 23:20:06 -07003105 struct fib6_table *table;
David Ahernafb1d4b52018-04-17 17:33:11 -07003106 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003107
David Ahern421842e2018-04-17 17:33:18 -07003108 if (rt == net->ipv6.fib6_null_entry) {
Gao feng6825a262012-09-19 19:25:34 +00003109 err = -ENOENT;
3110 goto out;
3111 }
Patrick McHardy6c813a72006-08-06 22:22:47 -07003112
Thomas Grafc71099a2006-08-04 23:20:06 -07003113 table = rt->rt6i_table;
Wei Wang66f5d6c2017-10-06 12:06:10 -07003114 spin_lock_bh(&table->tb6_lock);
Thomas Graf86872cb2006-08-22 00:01:08 -07003115 err = fib6_del(rt, info);
Wei Wang66f5d6c2017-10-06 12:06:10 -07003116 spin_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003117
Gao feng6825a262012-09-19 19:25:34 +00003118out:
Amerigo Wang94e187c2012-10-29 00:13:19 +00003119 ip6_rt_put(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003120 return err;
3121}
3122
David Ahernafb1d4b52018-04-17 17:33:11 -07003123int ip6_del_rt(struct net *net, struct rt6_info *rt)
Thomas Grafe0a1ad732006-08-22 00:00:21 -07003124{
David Ahernafb1d4b52018-04-17 17:33:11 -07003125 struct nl_info info = { .nl_net = net };
3126
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08003127 return __ip6_del_rt(rt, &info);
Thomas Grafe0a1ad732006-08-22 00:00:21 -07003128}
3129
David Ahern0ae81332017-02-02 12:37:08 -08003130static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg)
3131{
3132 struct nl_info *info = &cfg->fc_nlinfo;
WANG Conge3330032017-02-27 16:07:43 -08003133 struct net *net = info->nl_net;
David Ahern16a16cd2017-02-02 12:37:11 -08003134 struct sk_buff *skb = NULL;
David Ahern0ae81332017-02-02 12:37:08 -08003135 struct fib6_table *table;
WANG Conge3330032017-02-27 16:07:43 -08003136 int err = -ENOENT;
David Ahern0ae81332017-02-02 12:37:08 -08003137
David Ahern421842e2018-04-17 17:33:18 -07003138 if (rt == net->ipv6.fib6_null_entry)
WANG Conge3330032017-02-27 16:07:43 -08003139 goto out_put;
David Ahern0ae81332017-02-02 12:37:08 -08003140 table = rt->rt6i_table;
Wei Wang66f5d6c2017-10-06 12:06:10 -07003141 spin_lock_bh(&table->tb6_lock);
David Ahern0ae81332017-02-02 12:37:08 -08003142
3143 if (rt->rt6i_nsiblings && cfg->fc_delete_all_nh) {
3144 struct rt6_info *sibling, *next_sibling;
3145
David Ahern16a16cd2017-02-02 12:37:11 -08003146 /* prefer to send a single notification with all hops */
3147 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
3148 if (skb) {
3149 u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
3150
David Ahernd4ead6b2018-04-17 17:33:16 -07003151 if (rt6_fill_node(net, skb, rt, NULL,
David Ahern16a16cd2017-02-02 12:37:11 -08003152 NULL, NULL, 0, RTM_DELROUTE,
3153 info->portid, seq, 0) < 0) {
3154 kfree_skb(skb);
3155 skb = NULL;
3156 } else
3157 info->skip_notify = 1;
3158 }
3159
David Ahern0ae81332017-02-02 12:37:08 -08003160 list_for_each_entry_safe(sibling, next_sibling,
3161 &rt->rt6i_siblings,
3162 rt6i_siblings) {
3163 err = fib6_del(sibling, info);
3164 if (err)
WANG Conge3330032017-02-27 16:07:43 -08003165 goto out_unlock;
David Ahern0ae81332017-02-02 12:37:08 -08003166 }
3167 }
3168
3169 err = fib6_del(rt, info);
WANG Conge3330032017-02-27 16:07:43 -08003170out_unlock:
Wei Wang66f5d6c2017-10-06 12:06:10 -07003171 spin_unlock_bh(&table->tb6_lock);
WANG Conge3330032017-02-27 16:07:43 -08003172out_put:
David Ahern0ae81332017-02-02 12:37:08 -08003173 ip6_rt_put(rt);
David Ahern16a16cd2017-02-02 12:37:11 -08003174
3175 if (skb) {
WANG Conge3330032017-02-27 16:07:43 -08003176 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
David Ahern16a16cd2017-02-02 12:37:11 -08003177 info->nlh, gfp_any());
3178 }
David Ahern0ae81332017-02-02 12:37:08 -08003179 return err;
3180}
3181
David Ahern23fb93a2018-04-17 17:33:23 -07003182static int ip6_del_cached_rt(struct rt6_info *rt, struct fib6_config *cfg)
3183{
3184 int rc = -ESRCH;
3185
3186 if (cfg->fc_ifindex && rt->dst.dev->ifindex != cfg->fc_ifindex)
3187 goto out;
3188
3189 if (cfg->fc_flags & RTF_GATEWAY &&
3190 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
3191 goto out;
3192 if (dst_hold_safe(&rt->dst))
3193 rc = rt6_remove_exception_rt(rt);
3194out:
3195 return rc;
3196}
3197
David Ahern333c4302017-05-21 10:12:04 -06003198static int ip6_route_del(struct fib6_config *cfg,
3199 struct netlink_ext_ack *extack)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003200{
Wei Wang2b760fc2017-10-06 12:06:03 -07003201 struct rt6_info *rt, *rt_cache;
Thomas Grafc71099a2006-08-04 23:20:06 -07003202 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003203 struct fib6_node *fn;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003204 int err = -ESRCH;
3205
Daniel Lezcano55786892008-03-04 13:47:47 -08003206 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
David Ahernd5d531c2017-05-21 10:12:05 -06003207 if (!table) {
3208 NL_SET_ERR_MSG(extack, "FIB table does not exist");
Thomas Grafc71099a2006-08-04 23:20:06 -07003209 return err;
David Ahernd5d531c2017-05-21 10:12:05 -06003210 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07003211
Wei Wang66f5d6c2017-10-06 12:06:10 -07003212 rcu_read_lock();
Thomas Grafc71099a2006-08-04 23:20:06 -07003213
3214 fn = fib6_locate(&table->tb6_root,
Thomas Graf86872cb2006-08-22 00:01:08 -07003215 &cfg->fc_dst, cfg->fc_dst_len,
Wei Wang38fbeee2017-10-06 12:06:02 -07003216 &cfg->fc_src, cfg->fc_src_len,
Wei Wang2b760fc2017-10-06 12:06:03 -07003217 !(cfg->fc_flags & RTF_CACHE));
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09003218
Linus Torvalds1da177e2005-04-16 15:20:36 -07003219 if (fn) {
Wei Wang66f5d6c2017-10-06 12:06:10 -07003220 for_each_fib6_node_rt_rcu(fn) {
Wei Wang2b760fc2017-10-06 12:06:03 -07003221 if (cfg->fc_flags & RTF_CACHE) {
David Ahern23fb93a2018-04-17 17:33:23 -07003222 int rc;
3223
Wei Wang2b760fc2017-10-06 12:06:03 -07003224 rt_cache = rt6_find_cached_rt(rt, &cfg->fc_dst,
3225 &cfg->fc_src);
David Ahern23fb93a2018-04-17 17:33:23 -07003226 if (rt_cache) {
3227 rc = ip6_del_cached_rt(rt_cache, cfg);
3228 if (rc != -ESRCH)
3229 return rc;
3230 }
3231 continue;
Wei Wang2b760fc2017-10-06 12:06:03 -07003232 }
Thomas Graf86872cb2006-08-22 00:01:08 -07003233 if (cfg->fc_ifindex &&
David Ahern5e670d82018-04-17 17:33:14 -07003234 (!rt->fib6_nh.nh_dev ||
3235 rt->fib6_nh.nh_dev->ifindex != cfg->fc_ifindex))
Linus Torvalds1da177e2005-04-16 15:20:36 -07003236 continue;
Thomas Graf86872cb2006-08-22 00:01:08 -07003237 if (cfg->fc_flags & RTF_GATEWAY &&
David Ahern5e670d82018-04-17 17:33:14 -07003238 !ipv6_addr_equal(&cfg->fc_gateway, &rt->fib6_nh.nh_gw))
Linus Torvalds1da177e2005-04-16 15:20:36 -07003239 continue;
Thomas Graf86872cb2006-08-22 00:01:08 -07003240 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003241 continue;
Mantas Mc2ed1882016-12-16 10:30:59 +02003242 if (cfg->fc_protocol && cfg->fc_protocol != rt->rt6i_protocol)
3243 continue;
Wei Wangd3843fe2017-10-06 12:06:06 -07003244 if (!dst_hold_safe(&rt->dst))
3245 break;
Wei Wang66f5d6c2017-10-06 12:06:10 -07003246 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07003247
David Ahern0ae81332017-02-02 12:37:08 -08003248 /* if gateway was specified only delete the one hop */
3249 if (cfg->fc_flags & RTF_GATEWAY)
3250 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
3251
3252 return __ip6_del_rt_siblings(rt, cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003253 }
3254 }
Wei Wang66f5d6c2017-10-06 12:06:10 -07003255 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07003256
3257 return err;
3258}
3259
David S. Miller6700c272012-07-17 03:29:28 -07003260static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07003261{
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07003262 struct netevent_redirect netevent;
David S. Millere8599ff2012-07-11 23:43:53 -07003263 struct rt6_info *rt, *nrt = NULL;
David S. Millere8599ff2012-07-11 23:43:53 -07003264 struct ndisc_options ndopts;
3265 struct inet6_dev *in6_dev;
3266 struct neighbour *neigh;
YOSHIFUJI Hideaki / 吉藤英明71bcdba2013-01-05 16:34:51 +00003267 struct rd_msg *msg;
David S. Miller6e157b62012-07-12 00:05:02 -07003268 int optlen, on_link;
3269 u8 *lladdr;
David S. Millere8599ff2012-07-11 23:43:53 -07003270
Simon Horman29a3cad2013-05-28 20:34:26 +00003271 optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
YOSHIFUJI Hideaki / 吉藤英明71bcdba2013-01-05 16:34:51 +00003272 optlen -= sizeof(*msg);
David S. Millere8599ff2012-07-11 23:43:53 -07003273
3274 if (optlen < 0) {
David S. Miller6e157b62012-07-12 00:05:02 -07003275 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
David S. Millere8599ff2012-07-11 23:43:53 -07003276 return;
3277 }
3278
YOSHIFUJI Hideaki / 吉藤英明71bcdba2013-01-05 16:34:51 +00003279 msg = (struct rd_msg *)icmp6_hdr(skb);
David S. Millere8599ff2012-07-11 23:43:53 -07003280
YOSHIFUJI Hideaki / 吉藤英明71bcdba2013-01-05 16:34:51 +00003281 if (ipv6_addr_is_multicast(&msg->dest)) {
David S. Miller6e157b62012-07-12 00:05:02 -07003282 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
David S. Millere8599ff2012-07-11 23:43:53 -07003283 return;
3284 }
3285
David S. Miller6e157b62012-07-12 00:05:02 -07003286 on_link = 0;
YOSHIFUJI Hideaki / 吉藤英明71bcdba2013-01-05 16:34:51 +00003287 if (ipv6_addr_equal(&msg->dest, &msg->target)) {
David S. Millere8599ff2012-07-11 23:43:53 -07003288 on_link = 1;
YOSHIFUJI Hideaki / 吉藤英明71bcdba2013-01-05 16:34:51 +00003289 } else if (ipv6_addr_type(&msg->target) !=
David S. Millere8599ff2012-07-11 23:43:53 -07003290 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
David S. Miller6e157b62012-07-12 00:05:02 -07003291 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
David S. Millere8599ff2012-07-11 23:43:53 -07003292 return;
3293 }
3294
3295 in6_dev = __in6_dev_get(skb->dev);
3296 if (!in6_dev)
3297 return;
3298 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
3299 return;
3300
3301 /* RFC2461 8.1:
3302 * The IP source address of the Redirect MUST be the same as the current
3303 * first-hop router for the specified ICMP Destination Address.
3304 */
3305
Alexander Aringf997c552016-06-15 21:20:23 +02003306 if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) {
David S. Millere8599ff2012-07-11 23:43:53 -07003307 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
3308 return;
3309 }
David S. Miller6e157b62012-07-12 00:05:02 -07003310
3311 lladdr = NULL;
David S. Millere8599ff2012-07-11 23:43:53 -07003312 if (ndopts.nd_opts_tgt_lladdr) {
3313 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
3314 skb->dev);
3315 if (!lladdr) {
3316 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
3317 return;
3318 }
3319 }
3320
David S. Miller6e157b62012-07-12 00:05:02 -07003321 rt = (struct rt6_info *) dst;
Matthias Schifferec13ad12015-11-02 01:24:38 +01003322 if (rt->rt6i_flags & RTF_REJECT) {
David S. Miller6e157b62012-07-12 00:05:02 -07003323 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
3324 return;
3325 }
3326
3327 /* Redirect received -> path was valid.
3328 * Look, redirects are sent only in response to data packets,
3329 * so that this nexthop apparently is reachable. --ANK
3330 */
Julian Anastasov0dec8792017-02-06 23:14:16 +02003331 dst_confirm_neigh(&rt->dst, &ipv6_hdr(skb)->saddr);
David S. Miller6e157b62012-07-12 00:05:02 -07003332
YOSHIFUJI Hideaki / 吉藤英明71bcdba2013-01-05 16:34:51 +00003333 neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
David S. Millere8599ff2012-07-11 23:43:53 -07003334 if (!neigh)
3335 return;
3336
Linus Torvalds1da177e2005-04-16 15:20:36 -07003337 /*
3338 * We have finally decided to accept it.
3339 */
3340
Alexander Aringf997c552016-06-15 21:20:23 +02003341 ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003342 NEIGH_UPDATE_F_WEAK_OVERRIDE|
3343 NEIGH_UPDATE_F_OVERRIDE|
3344 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
Alexander Aringf997c552016-06-15 21:20:23 +02003345 NEIGH_UPDATE_F_ISROUTER)),
3346 NDISC_REDIRECT, &ndopts);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003347
David Ahern23fb93a2018-04-17 17:33:23 -07003348 nrt = ip6_rt_cache_alloc(rt->from, &msg->dest, NULL);
David S. Miller38308472011-12-03 18:02:47 -05003349 if (!nrt)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003350 goto out;
3351
3352 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
3353 if (on_link)
3354 nrt->rt6i_flags &= ~RTF_GATEWAY;
3355
Xin Longb91d5322017-08-03 14:13:46 +08003356 nrt->rt6i_protocol = RTPROT_REDIRECT;
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00003357 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003358
Wei Wang2b760fc2017-10-06 12:06:03 -07003359 /* No need to remove rt from the exception table if rt is
3360 * a cached route because rt6_insert_exception() will
3361 * takes care of it
3362 */
David Ahernd4ead6b2018-04-17 17:33:16 -07003363 if (rt6_insert_exception(nrt, rt->from)) {
Wei Wang2b760fc2017-10-06 12:06:03 -07003364 dst_release_immediate(&nrt->dst);
3365 goto out;
3366 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07003367
Changli Gaod8d1f302010-06-10 23:31:35 -07003368 netevent.old = &rt->dst;
3369 netevent.new = &nrt->dst;
YOSHIFUJI Hideaki / 吉藤英明71bcdba2013-01-05 16:34:51 +00003370 netevent.daddr = &msg->dest;
YOSHIFUJI Hideaki / 吉藤英明60592832013-01-14 09:28:27 +00003371 netevent.neigh = neigh;
Tom Tucker8d717402006-07-30 20:43:36 -07003372 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
3373
Linus Torvalds1da177e2005-04-16 15:20:36 -07003374out:
David S. Millere8599ff2012-07-11 23:43:53 -07003375 neigh_release(neigh);
David S. Miller6e157b62012-07-12 00:05:02 -07003376}
3377
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08003378#ifdef CONFIG_IPV6_ROUTE_INFO
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08003379static struct rt6_info *rt6_get_route_info(struct net *net,
Eric Dumazetb71d1d42011-04-22 04:53:02 +00003380 const struct in6_addr *prefix, int prefixlen,
David Ahern830218c2016-10-24 10:52:35 -07003381 const struct in6_addr *gwaddr,
3382 struct net_device *dev)
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08003383{
David Ahern830218c2016-10-24 10:52:35 -07003384 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
3385 int ifindex = dev->ifindex;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08003386 struct fib6_node *fn;
3387 struct rt6_info *rt = NULL;
Thomas Grafc71099a2006-08-04 23:20:06 -07003388 struct fib6_table *table;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08003389
David Ahern830218c2016-10-24 10:52:35 -07003390 table = fib6_get_table(net, tb_id);
David S. Miller38308472011-12-03 18:02:47 -05003391 if (!table)
Thomas Grafc71099a2006-08-04 23:20:06 -07003392 return NULL;
3393
Wei Wang66f5d6c2017-10-06 12:06:10 -07003394 rcu_read_lock();
Wei Wang38fbeee2017-10-06 12:06:02 -07003395 fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0, true);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08003396 if (!fn)
3397 goto out;
3398
Wei Wang66f5d6c2017-10-06 12:06:10 -07003399 for_each_fib6_node_rt_rcu(fn) {
David Ahern5e670d82018-04-17 17:33:14 -07003400 if (rt->fib6_nh.nh_dev->ifindex != ifindex)
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08003401 continue;
3402 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
3403 continue;
David Ahern5e670d82018-04-17 17:33:14 -07003404 if (!ipv6_addr_equal(&rt->fib6_nh.nh_gw, gwaddr))
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08003405 continue;
Wei Wangd3843fe2017-10-06 12:06:06 -07003406 ip6_hold_safe(NULL, &rt, false);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08003407 break;
3408 }
3409out:
Wei Wang66f5d6c2017-10-06 12:06:10 -07003410 rcu_read_unlock();
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08003411 return rt;
3412}
3413
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08003414static struct rt6_info *rt6_add_route_info(struct net *net,
Eric Dumazetb71d1d42011-04-22 04:53:02 +00003415 const struct in6_addr *prefix, int prefixlen,
David Ahern830218c2016-10-24 10:52:35 -07003416 const struct in6_addr *gwaddr,
3417 struct net_device *dev,
Eric Dumazet95c96172012-04-15 05:58:06 +00003418 unsigned int pref)
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08003419{
Thomas Graf86872cb2006-08-22 00:01:08 -07003420 struct fib6_config cfg = {
Rami Rosen238fc7e2008-02-09 23:43:11 -08003421 .fc_metric = IP6_RT_PRIO_USER,
David Ahern830218c2016-10-24 10:52:35 -07003422 .fc_ifindex = dev->ifindex,
Thomas Graf86872cb2006-08-22 00:01:08 -07003423 .fc_dst_len = prefixlen,
3424 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
3425 RTF_UP | RTF_PREF(pref),
Xin Longb91d5322017-08-03 14:13:46 +08003426 .fc_protocol = RTPROT_RA,
David Aherne8478e82018-04-17 17:33:13 -07003427 .fc_type = RTN_UNICAST,
Eric W. Biederman15e47302012-09-07 20:12:54 +00003428 .fc_nlinfo.portid = 0,
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08003429 .fc_nlinfo.nlh = NULL,
3430 .fc_nlinfo.nl_net = net,
Thomas Graf86872cb2006-08-22 00:01:08 -07003431 };
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08003432
David Ahern830218c2016-10-24 10:52:35 -07003433 cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO,
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00003434 cfg.fc_dst = *prefix;
3435 cfg.fc_gateway = *gwaddr;
Thomas Graf86872cb2006-08-22 00:01:08 -07003436
YOSHIFUJI Hideakie317da92006-03-20 17:06:42 -08003437 /* We should treat it as a default route if prefix length is 0. */
3438 if (!prefixlen)
Thomas Graf86872cb2006-08-22 00:01:08 -07003439 cfg.fc_flags |= RTF_DEFAULT;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08003440
David Ahernacb54e32018-04-17 17:33:22 -07003441 ip6_route_add(&cfg, GFP_ATOMIC, NULL);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08003442
David Ahern830218c2016-10-24 10:52:35 -07003443 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08003444}
3445#endif
3446
David Ahernafb1d4b52018-04-17 17:33:11 -07003447struct rt6_info *rt6_get_dflt_router(struct net *net,
3448 const struct in6_addr *addr,
3449 struct net_device *dev)
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09003450{
David Ahern830218c2016-10-24 10:52:35 -07003451 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003452 struct rt6_info *rt;
Thomas Grafc71099a2006-08-04 23:20:06 -07003453 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003454
David Ahernafb1d4b52018-04-17 17:33:11 -07003455 table = fib6_get_table(net, tb_id);
David S. Miller38308472011-12-03 18:02:47 -05003456 if (!table)
Thomas Grafc71099a2006-08-04 23:20:06 -07003457 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003458
Wei Wang66f5d6c2017-10-06 12:06:10 -07003459 rcu_read_lock();
3460 for_each_fib6_node_rt_rcu(&table->tb6_root) {
David Ahern5e670d82018-04-17 17:33:14 -07003461 if (dev == rt->fib6_nh.nh_dev &&
YOSHIFUJI Hideaki045927f2006-03-20 17:00:48 -08003462 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
David Ahern5e670d82018-04-17 17:33:14 -07003463 ipv6_addr_equal(&rt->fib6_nh.nh_gw, addr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07003464 break;
3465 }
3466 if (rt)
Wei Wangd3843fe2017-10-06 12:06:06 -07003467 ip6_hold_safe(NULL, &rt, false);
Wei Wang66f5d6c2017-10-06 12:06:10 -07003468 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07003469 return rt;
3470}
3471
David Ahernafb1d4b52018-04-17 17:33:11 -07003472struct rt6_info *rt6_add_dflt_router(struct net *net,
3473 const struct in6_addr *gwaddr,
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -08003474 struct net_device *dev,
3475 unsigned int pref)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003476{
Thomas Graf86872cb2006-08-22 00:01:08 -07003477 struct fib6_config cfg = {
David Ahernca254492015-10-12 11:47:10 -07003478 .fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
Rami Rosen238fc7e2008-02-09 23:43:11 -08003479 .fc_metric = IP6_RT_PRIO_USER,
Thomas Graf86872cb2006-08-22 00:01:08 -07003480 .fc_ifindex = dev->ifindex,
3481 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
3482 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
Xin Longb91d5322017-08-03 14:13:46 +08003483 .fc_protocol = RTPROT_RA,
David Aherne8478e82018-04-17 17:33:13 -07003484 .fc_type = RTN_UNICAST,
Eric W. Biederman15e47302012-09-07 20:12:54 +00003485 .fc_nlinfo.portid = 0,
Daniel Lezcano55786892008-03-04 13:47:47 -08003486 .fc_nlinfo.nlh = NULL,
David Ahernafb1d4b52018-04-17 17:33:11 -07003487 .fc_nlinfo.nl_net = net,
Thomas Graf86872cb2006-08-22 00:01:08 -07003488 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07003489
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00003490 cfg.fc_gateway = *gwaddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003491
David Ahernacb54e32018-04-17 17:33:22 -07003492 if (!ip6_route_add(&cfg, GFP_ATOMIC, NULL)) {
David Ahern830218c2016-10-24 10:52:35 -07003493 struct fib6_table *table;
3494
3495 table = fib6_get_table(dev_net(dev), cfg.fc_table);
3496 if (table)
3497 table->flags |= RT6_TABLE_HAS_DFLT_ROUTER;
3498 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07003499
David Ahernafb1d4b52018-04-17 17:33:11 -07003500 return rt6_get_dflt_router(net, gwaddr, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003501}
3502
David Ahernafb1d4b52018-04-17 17:33:11 -07003503static void __rt6_purge_dflt_routers(struct net *net,
3504 struct fib6_table *table)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003505{
3506 struct rt6_info *rt;
3507
3508restart:
Wei Wang66f5d6c2017-10-06 12:06:10 -07003509 rcu_read_lock();
3510 for_each_fib6_node_rt_rcu(&table->tb6_root) {
Lorenzo Colitti3e8b0ac2013-03-03 20:46:46 +00003511 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
3512 (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
Wei Wangd3843fe2017-10-06 12:06:06 -07003513 if (dst_hold_safe(&rt->dst)) {
Wei Wang66f5d6c2017-10-06 12:06:10 -07003514 rcu_read_unlock();
David Ahernafb1d4b52018-04-17 17:33:11 -07003515 ip6_del_rt(net, rt);
Wei Wangd3843fe2017-10-06 12:06:06 -07003516 } else {
Wei Wang66f5d6c2017-10-06 12:06:10 -07003517 rcu_read_unlock();
Wei Wangd3843fe2017-10-06 12:06:06 -07003518 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07003519 goto restart;
3520 }
3521 }
Wei Wang66f5d6c2017-10-06 12:06:10 -07003522 rcu_read_unlock();
David Ahern830218c2016-10-24 10:52:35 -07003523
3524 table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER;
3525}
3526
3527void rt6_purge_dflt_routers(struct net *net)
3528{
3529 struct fib6_table *table;
3530 struct hlist_head *head;
3531 unsigned int h;
3532
3533 rcu_read_lock();
3534
3535 for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
3536 head = &net->ipv6.fib_table_hash[h];
3537 hlist_for_each_entry_rcu(table, head, tb6_hlist) {
3538 if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER)
David Ahernafb1d4b52018-04-17 17:33:11 -07003539 __rt6_purge_dflt_routers(net, table);
David Ahern830218c2016-10-24 10:52:35 -07003540 }
3541 }
3542
3543 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07003544}
3545
Daniel Lezcano55786892008-03-04 13:47:47 -08003546static void rtmsg_to_fib6_config(struct net *net,
3547 struct in6_rtmsg *rtmsg,
Thomas Graf86872cb2006-08-22 00:01:08 -07003548 struct fib6_config *cfg)
3549{
3550 memset(cfg, 0, sizeof(*cfg));
3551
David Ahernca254492015-10-12 11:47:10 -07003552 cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
3553 : RT6_TABLE_MAIN;
Thomas Graf86872cb2006-08-22 00:01:08 -07003554 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
3555 cfg->fc_metric = rtmsg->rtmsg_metric;
3556 cfg->fc_expires = rtmsg->rtmsg_info;
3557 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
3558 cfg->fc_src_len = rtmsg->rtmsg_src_len;
3559 cfg->fc_flags = rtmsg->rtmsg_flags;
David Aherne8478e82018-04-17 17:33:13 -07003560 cfg->fc_type = rtmsg->rtmsg_type;
Thomas Graf86872cb2006-08-22 00:01:08 -07003561
Daniel Lezcano55786892008-03-04 13:47:47 -08003562 cfg->fc_nlinfo.nl_net = net;
Benjamin Theryf1243c22008-02-26 18:10:03 -08003563
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00003564 cfg->fc_dst = rtmsg->rtmsg_dst;
3565 cfg->fc_src = rtmsg->rtmsg_src;
3566 cfg->fc_gateway = rtmsg->rtmsg_gateway;
Thomas Graf86872cb2006-08-22 00:01:08 -07003567}
3568
Daniel Lezcano55786892008-03-04 13:47:47 -08003569int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003570{
Thomas Graf86872cb2006-08-22 00:01:08 -07003571 struct fib6_config cfg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003572 struct in6_rtmsg rtmsg;
3573 int err;
3574
Ian Morris67ba4152014-08-24 21:53:10 +01003575 switch (cmd) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003576 case SIOCADDRT: /* Add a route */
3577 case SIOCDELRT: /* Delete a route */
Eric W. Biedermanaf31f412012-11-16 03:03:06 +00003578 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
Linus Torvalds1da177e2005-04-16 15:20:36 -07003579 return -EPERM;
3580 err = copy_from_user(&rtmsg, arg,
3581 sizeof(struct in6_rtmsg));
3582 if (err)
3583 return -EFAULT;
Thomas Graf86872cb2006-08-22 00:01:08 -07003584
Daniel Lezcano55786892008-03-04 13:47:47 -08003585 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
Thomas Graf86872cb2006-08-22 00:01:08 -07003586
Linus Torvalds1da177e2005-04-16 15:20:36 -07003587 rtnl_lock();
3588 switch (cmd) {
3589 case SIOCADDRT:
David Ahernacb54e32018-04-17 17:33:22 -07003590 err = ip6_route_add(&cfg, GFP_KERNEL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003591 break;
3592 case SIOCDELRT:
David Ahern333c4302017-05-21 10:12:04 -06003593 err = ip6_route_del(&cfg, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003594 break;
3595 default:
3596 err = -EINVAL;
3597 }
3598 rtnl_unlock();
3599
3600 return err;
Stephen Hemminger3ff50b72007-04-20 17:09:22 -07003601 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07003602
3603 return -EINVAL;
3604}
3605
3606/*
3607 * Drop the packet on the floor
3608 */
3609
Brian Haleyd5fdd6b2009-06-23 04:31:07 -07003610static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003611{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07003612 int type;
Eric Dumazetadf30902009-06-02 05:19:30 +00003613 struct dst_entry *dst = skb_dst(skb);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07003614 switch (ipstats_mib_noroutes) {
3615 case IPSTATS_MIB_INNOROUTES:
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -07003616 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
Ulrich Weber45bb0062010-02-25 23:28:58 +00003617 if (type == IPV6_ADDR_ANY) {
Stephen Suryaputrabdb7cc62018-04-16 13:42:16 -04003618 IP6_INC_STATS(dev_net(dst->dev),
3619 __in6_dev_get_safely(skb->dev),
Denis V. Lunev3bd653c2008-10-08 10:54:51 -07003620 IPSTATS_MIB_INADDRERRORS);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07003621 break;
3622 }
3623 /* FALLTHROUGH */
3624 case IPSTATS_MIB_OUTNOROUTES:
Denis V. Lunev3bd653c2008-10-08 10:54:51 -07003625 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
3626 ipstats_mib_noroutes);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07003627 break;
3628 }
Alexey Dobriyan3ffe5332010-02-18 08:25:24 +00003629 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003630 kfree_skb(skb);
3631 return 0;
3632}
3633
Thomas Graf9ce8ade2006-10-18 20:46:54 -07003634static int ip6_pkt_discard(struct sk_buff *skb)
3635{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07003636 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07003637}
3638
Eric W. Biedermanede20592015-10-07 16:48:47 -05003639static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003640{
Eric Dumazetadf30902009-06-02 05:19:30 +00003641 skb->dev = skb_dst(skb)->dev;
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07003642 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003643}
3644
Thomas Graf9ce8ade2006-10-18 20:46:54 -07003645static int ip6_pkt_prohibit(struct sk_buff *skb)
3646{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07003647 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07003648}
3649
Eric W. Biedermanede20592015-10-07 16:48:47 -05003650static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
Thomas Graf9ce8ade2006-10-18 20:46:54 -07003651{
Eric Dumazetadf30902009-06-02 05:19:30 +00003652 skb->dev = skb_dst(skb)->dev;
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07003653 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07003654}
3655
Linus Torvalds1da177e2005-04-16 15:20:36 -07003656/*
3657 * Allocate a dst for local (unicast / anycast) address.
3658 */
3659
David Ahernafb1d4b52018-04-17 17:33:11 -07003660struct rt6_info *addrconf_dst_alloc(struct net *net,
3661 struct inet6_dev *idev,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003662 const struct in6_addr *addr,
David Ahernacb54e32018-04-17 17:33:22 -07003663 bool anycast, gfp_t gfp_flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003664{
David Ahernca254492015-10-12 11:47:10 -07003665 u32 tb_id;
David Ahern4832c302017-08-17 12:17:20 -07003666 struct net_device *dev = idev->dev;
David Ahern5f02ce242016-09-10 12:09:54 -07003667 struct rt6_info *rt;
3668
David Ahern5f02ce242016-09-10 12:09:54 -07003669 rt = ip6_dst_alloc(net, dev, DST_NOCOUNT);
Hannes Frederic Sowaa3300ef2013-12-07 03:33:45 +01003670 if (!rt)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003671 return ERR_PTR(-ENOMEM);
3672
David Ahern3b6761d2018-04-17 17:33:20 -07003673 rt->dst_nocount = true;
3674
Linus Torvalds1da177e2005-04-16 15:20:36 -07003675 in6_dev_hold(idev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003676 rt->rt6i_idev = idev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003677
David Ahern3b6761d2018-04-17 17:33:20 -07003678 rt->dst_host = true;
David Ahern94b5e0f2017-02-02 08:52:21 -08003679 rt->rt6i_protocol = RTPROT_KERNEL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003680 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
David Aherne8478e82018-04-17 17:33:13 -07003681 if (anycast) {
3682 rt->fib6_type = RTN_ANYCAST;
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +09003683 rt->rt6i_flags |= RTF_ANYCAST;
David Aherne8478e82018-04-17 17:33:13 -07003684 } else {
3685 rt->fib6_type = RTN_LOCAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003686 rt->rt6i_flags |= RTF_LOCAL;
David Aherne8478e82018-04-17 17:33:13 -07003687 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07003688
David Ahern5e670d82018-04-17 17:33:14 -07003689 rt->fib6_nh.nh_gw = *addr;
3690 rt->fib6_nh.nh_dev = dev;
Julian Anastasov550bab42013-10-20 15:43:04 +03003691 rt->rt6i_gateway = *addr;
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00003692 rt->rt6i_dst.addr = *addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003693 rt->rt6i_dst.plen = 128;
David Ahernca254492015-10-12 11:47:10 -07003694 tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
3695 rt->rt6i_table = fib6_get_table(net, tb_id);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003696
Linus Torvalds1da177e2005-04-16 15:20:36 -07003697 return rt;
3698}
3699
Daniel Walterc3968a82011-04-13 21:10:57 +00003700/* remove deleted ip from prefsrc entries */
3701struct arg_dev_net_ip {
3702 struct net_device *dev;
3703 struct net *net;
3704 struct in6_addr *addr;
3705};
3706
3707static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
3708{
3709 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
3710 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
3711 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
3712
David Ahern5e670d82018-04-17 17:33:14 -07003713 if (((void *)rt->fib6_nh.nh_dev == dev || !dev) &&
David Ahern421842e2018-04-17 17:33:18 -07003714 rt != net->ipv6.fib6_null_entry &&
Daniel Walterc3968a82011-04-13 21:10:57 +00003715 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
Wei Wang60006a42017-10-06 12:05:58 -07003716 spin_lock_bh(&rt6_exception_lock);
Daniel Walterc3968a82011-04-13 21:10:57 +00003717 /* remove prefsrc entry */
3718 rt->rt6i_prefsrc.plen = 0;
Wei Wang60006a42017-10-06 12:05:58 -07003719 /* need to update cache as well */
3720 rt6_exceptions_remove_prefsrc(rt);
3721 spin_unlock_bh(&rt6_exception_lock);
Daniel Walterc3968a82011-04-13 21:10:57 +00003722 }
3723 return 0;
3724}
3725
3726void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
3727{
3728 struct net *net = dev_net(ifp->idev->dev);
3729 struct arg_dev_net_ip adni = {
3730 .dev = ifp->idev->dev,
3731 .net = net,
3732 .addr = &ifp->addr,
3733 };
Li RongQing0c3584d2013-12-27 16:32:38 +08003734 fib6_clean_all(net, fib6_remove_prefsrc, &adni);
Daniel Walterc3968a82011-04-13 21:10:57 +00003735}
3736
Duan Jiongbe7a0102014-05-15 15:56:14 +08003737#define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
Duan Jiongbe7a0102014-05-15 15:56:14 +08003738
3739/* Remove routers and update dst entries when gateway turn into host. */
3740static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
3741{
3742 struct in6_addr *gateway = (struct in6_addr *)arg;
3743
Wei Wang2b760fc2017-10-06 12:06:03 -07003744 if (((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) &&
David Ahern5e670d82018-04-17 17:33:14 -07003745 ipv6_addr_equal(gateway, &rt->fib6_nh.nh_gw)) {
Duan Jiongbe7a0102014-05-15 15:56:14 +08003746 return -1;
3747 }
Wei Wangb16cb452017-10-06 12:06:00 -07003748
3749 /* Further clean up cached routes in exception table.
3750 * This is needed because cached route may have a different
3751 * gateway than its 'parent' in the case of an ip redirect.
3752 */
3753 rt6_exceptions_clean_tohost(rt, gateway);
3754
Duan Jiongbe7a0102014-05-15 15:56:14 +08003755 return 0;
3756}
3757
3758void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
3759{
3760 fib6_clean_all(net, fib6_clean_tohost, gateway);
3761}
3762
Ido Schimmel2127d952018-01-07 12:45:03 +02003763struct arg_netdev_event {
3764 const struct net_device *dev;
Ido Schimmel4c981e22018-01-07 12:45:04 +02003765 union {
3766 unsigned int nh_flags;
3767 unsigned long event;
3768 };
Ido Schimmel2127d952018-01-07 12:45:03 +02003769};
3770
Ido Schimmeld7dedee2018-01-09 16:40:25 +02003771static struct rt6_info *rt6_multipath_first_sibling(const struct rt6_info *rt)
3772{
3773 struct rt6_info *iter;
3774 struct fib6_node *fn;
3775
3776 fn = rcu_dereference_protected(rt->rt6i_node,
3777 lockdep_is_held(&rt->rt6i_table->tb6_lock));
3778 iter = rcu_dereference_protected(fn->leaf,
3779 lockdep_is_held(&rt->rt6i_table->tb6_lock));
3780 while (iter) {
3781 if (iter->rt6i_metric == rt->rt6i_metric &&
3782 rt6_qualify_for_ecmp(iter))
3783 return iter;
3784 iter = rcu_dereference_protected(iter->rt6_next,
3785 lockdep_is_held(&rt->rt6i_table->tb6_lock));
3786 }
3787
3788 return NULL;
3789}
3790
3791static bool rt6_is_dead(const struct rt6_info *rt)
3792{
David Ahern5e670d82018-04-17 17:33:14 -07003793 if (rt->fib6_nh.nh_flags & RTNH_F_DEAD ||
3794 (rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN &&
Ido Schimmeld7dedee2018-01-09 16:40:25 +02003795 rt->rt6i_idev->cnf.ignore_routes_with_linkdown))
3796 return true;
3797
3798 return false;
3799}
3800
3801static int rt6_multipath_total_weight(const struct rt6_info *rt)
3802{
3803 struct rt6_info *iter;
3804 int total = 0;
3805
3806 if (!rt6_is_dead(rt))
David Ahern5e670d82018-04-17 17:33:14 -07003807 total += rt->fib6_nh.nh_weight;
Ido Schimmeld7dedee2018-01-09 16:40:25 +02003808
3809 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings) {
3810 if (!rt6_is_dead(iter))
David Ahern5e670d82018-04-17 17:33:14 -07003811 total += iter->fib6_nh.nh_weight;
Ido Schimmeld7dedee2018-01-09 16:40:25 +02003812 }
3813
3814 return total;
3815}
3816
3817static void rt6_upper_bound_set(struct rt6_info *rt, int *weight, int total)
3818{
3819 int upper_bound = -1;
3820
3821 if (!rt6_is_dead(rt)) {
David Ahern5e670d82018-04-17 17:33:14 -07003822 *weight += rt->fib6_nh.nh_weight;
Ido Schimmeld7dedee2018-01-09 16:40:25 +02003823 upper_bound = DIV_ROUND_CLOSEST_ULL((u64) (*weight) << 31,
3824 total) - 1;
3825 }
David Ahern5e670d82018-04-17 17:33:14 -07003826 atomic_set(&rt->fib6_nh.nh_upper_bound, upper_bound);
Ido Schimmeld7dedee2018-01-09 16:40:25 +02003827}
3828
3829static void rt6_multipath_upper_bound_set(struct rt6_info *rt, int total)
3830{
3831 struct rt6_info *iter;
3832 int weight = 0;
3833
3834 rt6_upper_bound_set(rt, &weight, total);
3835
3836 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
3837 rt6_upper_bound_set(iter, &weight, total);
3838}
3839
3840void rt6_multipath_rebalance(struct rt6_info *rt)
3841{
3842 struct rt6_info *first;
3843 int total;
3844
3845 /* In case the entire multipath route was marked for flushing,
3846 * then there is no need to rebalance upon the removal of every
3847 * sibling route.
3848 */
3849 if (!rt->rt6i_nsiblings || rt->should_flush)
3850 return;
3851
3852 /* During lookup routes are evaluated in order, so we need to
3853 * make sure upper bounds are assigned from the first sibling
3854 * onwards.
3855 */
3856 first = rt6_multipath_first_sibling(rt);
3857 if (WARN_ON_ONCE(!first))
3858 return;
3859
3860 total = rt6_multipath_total_weight(first);
3861 rt6_multipath_upper_bound_set(first, total);
3862}
3863
Ido Schimmel2127d952018-01-07 12:45:03 +02003864static int fib6_ifup(struct rt6_info *rt, void *p_arg)
3865{
3866 const struct arg_netdev_event *arg = p_arg;
David Ahern7aef6852018-04-17 17:33:10 -07003867 struct net *net = dev_net(arg->dev);
Ido Schimmel2127d952018-01-07 12:45:03 +02003868
David Ahern421842e2018-04-17 17:33:18 -07003869 if (rt != net->ipv6.fib6_null_entry && rt->fib6_nh.nh_dev == arg->dev) {
David Ahern5e670d82018-04-17 17:33:14 -07003870 rt->fib6_nh.nh_flags &= ~arg->nh_flags;
David Ahern7aef6852018-04-17 17:33:10 -07003871 fib6_update_sernum_upto_root(net, rt);
Ido Schimmeld7dedee2018-01-09 16:40:25 +02003872 rt6_multipath_rebalance(rt);
Ido Schimmel1de178e2018-01-07 12:45:15 +02003873 }
Ido Schimmel2127d952018-01-07 12:45:03 +02003874
3875 return 0;
3876}
3877
3878void rt6_sync_up(struct net_device *dev, unsigned int nh_flags)
3879{
3880 struct arg_netdev_event arg = {
3881 .dev = dev,
Ido Schimmel6802f3a2018-01-12 22:07:36 +02003882 {
3883 .nh_flags = nh_flags,
3884 },
Ido Schimmel2127d952018-01-07 12:45:03 +02003885 };
3886
3887 if (nh_flags & RTNH_F_DEAD && netif_carrier_ok(dev))
3888 arg.nh_flags |= RTNH_F_LINKDOWN;
3889
3890 fib6_clean_all(dev_net(dev), fib6_ifup, &arg);
3891}
3892
Ido Schimmel1de178e2018-01-07 12:45:15 +02003893static bool rt6_multipath_uses_dev(const struct rt6_info *rt,
3894 const struct net_device *dev)
3895{
3896 struct rt6_info *iter;
3897
David Ahern5e670d82018-04-17 17:33:14 -07003898 if (rt->fib6_nh.nh_dev == dev)
Ido Schimmel1de178e2018-01-07 12:45:15 +02003899 return true;
3900 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
David Ahern5e670d82018-04-17 17:33:14 -07003901 if (iter->fib6_nh.nh_dev == dev)
Ido Schimmel1de178e2018-01-07 12:45:15 +02003902 return true;
3903
3904 return false;
3905}
3906
3907static void rt6_multipath_flush(struct rt6_info *rt)
3908{
3909 struct rt6_info *iter;
3910
3911 rt->should_flush = 1;
3912 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
3913 iter->should_flush = 1;
3914}
3915
3916static unsigned int rt6_multipath_dead_count(const struct rt6_info *rt,
3917 const struct net_device *down_dev)
3918{
3919 struct rt6_info *iter;
3920 unsigned int dead = 0;
3921
David Ahern5e670d82018-04-17 17:33:14 -07003922 if (rt->fib6_nh.nh_dev == down_dev ||
3923 rt->fib6_nh.nh_flags & RTNH_F_DEAD)
Ido Schimmel1de178e2018-01-07 12:45:15 +02003924 dead++;
3925 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
David Ahern5e670d82018-04-17 17:33:14 -07003926 if (iter->fib6_nh.nh_dev == down_dev ||
3927 iter->fib6_nh.nh_flags & RTNH_F_DEAD)
Ido Schimmel1de178e2018-01-07 12:45:15 +02003928 dead++;
3929
3930 return dead;
3931}
3932
3933static void rt6_multipath_nh_flags_set(struct rt6_info *rt,
3934 const struct net_device *dev,
3935 unsigned int nh_flags)
3936{
3937 struct rt6_info *iter;
3938
David Ahern5e670d82018-04-17 17:33:14 -07003939 if (rt->fib6_nh.nh_dev == dev)
3940 rt->fib6_nh.nh_flags |= nh_flags;
Ido Schimmel1de178e2018-01-07 12:45:15 +02003941 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
David Ahern5e670d82018-04-17 17:33:14 -07003942 if (iter->fib6_nh.nh_dev == dev)
3943 iter->fib6_nh.nh_flags |= nh_flags;
Ido Schimmel1de178e2018-01-07 12:45:15 +02003944}
3945
David Aherna1a22c12017-01-18 07:40:36 -08003946/* called with write lock held for table with rt */
Ido Schimmel4c981e22018-01-07 12:45:04 +02003947static int fib6_ifdown(struct rt6_info *rt, void *p_arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003948{
Ido Schimmel4c981e22018-01-07 12:45:04 +02003949 const struct arg_netdev_event *arg = p_arg;
3950 const struct net_device *dev = arg->dev;
David Ahern7aef6852018-04-17 17:33:10 -07003951 struct net *net = dev_net(dev);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08003952
David Ahern421842e2018-04-17 17:33:18 -07003953 if (rt == net->ipv6.fib6_null_entry)
Ido Schimmel27c6fa72018-01-07 12:45:05 +02003954 return 0;
3955
3956 switch (arg->event) {
3957 case NETDEV_UNREGISTER:
David Ahern5e670d82018-04-17 17:33:14 -07003958 return rt->fib6_nh.nh_dev == dev ? -1 : 0;
Ido Schimmel27c6fa72018-01-07 12:45:05 +02003959 case NETDEV_DOWN:
Ido Schimmel1de178e2018-01-07 12:45:15 +02003960 if (rt->should_flush)
Ido Schimmel27c6fa72018-01-07 12:45:05 +02003961 return -1;
Ido Schimmel1de178e2018-01-07 12:45:15 +02003962 if (!rt->rt6i_nsiblings)
David Ahern5e670d82018-04-17 17:33:14 -07003963 return rt->fib6_nh.nh_dev == dev ? -1 : 0;
Ido Schimmel1de178e2018-01-07 12:45:15 +02003964 if (rt6_multipath_uses_dev(rt, dev)) {
3965 unsigned int count;
3966
3967 count = rt6_multipath_dead_count(rt, dev);
3968 if (rt->rt6i_nsiblings + 1 == count) {
3969 rt6_multipath_flush(rt);
3970 return -1;
3971 }
3972 rt6_multipath_nh_flags_set(rt, dev, RTNH_F_DEAD |
3973 RTNH_F_LINKDOWN);
David Ahern7aef6852018-04-17 17:33:10 -07003974 fib6_update_sernum(net, rt);
Ido Schimmeld7dedee2018-01-09 16:40:25 +02003975 rt6_multipath_rebalance(rt);
Ido Schimmel1de178e2018-01-07 12:45:15 +02003976 }
3977 return -2;
Ido Schimmel27c6fa72018-01-07 12:45:05 +02003978 case NETDEV_CHANGE:
David Ahern5e670d82018-04-17 17:33:14 -07003979 if (rt->fib6_nh.nh_dev != dev ||
Ido Schimmel1de178e2018-01-07 12:45:15 +02003980 rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST))
Ido Schimmel27c6fa72018-01-07 12:45:05 +02003981 break;
David Ahern5e670d82018-04-17 17:33:14 -07003982 rt->fib6_nh.nh_flags |= RTNH_F_LINKDOWN;
Ido Schimmeld7dedee2018-01-09 16:40:25 +02003983 rt6_multipath_rebalance(rt);
Ido Schimmel27c6fa72018-01-07 12:45:05 +02003984 break;
Ido Schimmel2b241362018-01-07 12:45:02 +02003985 }
David S. Millerc159d302011-12-26 15:24:36 -05003986
Linus Torvalds1da177e2005-04-16 15:20:36 -07003987 return 0;
3988}
3989
Ido Schimmel27c6fa72018-01-07 12:45:05 +02003990void rt6_sync_down_dev(struct net_device *dev, unsigned long event)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003991{
Ido Schimmel4c981e22018-01-07 12:45:04 +02003992 struct arg_netdev_event arg = {
Daniel Lezcano8ed67782008-03-04 13:48:30 -08003993 .dev = dev,
Ido Schimmel6802f3a2018-01-12 22:07:36 +02003994 {
3995 .event = event,
3996 },
Daniel Lezcano8ed67782008-03-04 13:48:30 -08003997 };
3998
Ido Schimmel4c981e22018-01-07 12:45:04 +02003999 fib6_clean_all(dev_net(dev), fib6_ifdown, &arg);
4000}
4001
4002void rt6_disable_ip(struct net_device *dev, unsigned long event)
4003{
4004 rt6_sync_down_dev(dev, event);
4005 rt6_uncached_list_flush_dev(dev_net(dev), dev);
4006 neigh_ifdown(&nd_tbl, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004007}
4008
Eric Dumazet95c96172012-04-15 05:58:06 +00004009struct rt6_mtu_change_arg {
Linus Torvalds1da177e2005-04-16 15:20:36 -07004010 struct net_device *dev;
Eric Dumazet95c96172012-04-15 05:58:06 +00004011 unsigned int mtu;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004012};
4013
4014static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
4015{
4016 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
4017 struct inet6_dev *idev;
4018
4019 /* In IPv6 pmtu discovery is not optional,
4020 so that RTAX_MTU lock cannot disable it.
4021 We still use this lock to block changes
4022 caused by addrconf/ndisc.
4023 */
4024
4025 idev = __in6_dev_get(arg->dev);
David S. Miller38308472011-12-03 18:02:47 -05004026 if (!idev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004027 return 0;
4028
4029 /* For administrative MTU increase, there is no way to discover
4030 IPv6 PMTU increase, so PMTU increase should be updated here.
4031 Since RFC 1981 doesn't include administrative MTU increase
4032 update PMTU increase is a MUST. (i.e. jumbo frame)
4033 */
David Ahern5e670d82018-04-17 17:33:14 -07004034 if (rt->fib6_nh.nh_dev == arg->dev &&
David Ahernd4ead6b2018-04-17 17:33:16 -07004035 !fib6_metric_locked(rt, RTAX_MTU)) {
4036 u32 mtu = rt->fib6_pmtu;
4037
4038 if (mtu >= arg->mtu ||
4039 (mtu < arg->mtu && mtu == idev->cnf.mtu6))
4040 fib6_metric_set(rt, RTAX_MTU, arg->mtu);
4041
Wei Wangf5bbe7e2017-10-06 12:05:59 -07004042 spin_lock_bh(&rt6_exception_lock);
Stefano Brivioe9fa1492018-03-06 11:10:19 +01004043 rt6_exceptions_update_pmtu(idev, rt, arg->mtu);
Wei Wangf5bbe7e2017-10-06 12:05:59 -07004044 spin_unlock_bh(&rt6_exception_lock);
Simon Arlott566cfd82007-07-26 00:09:55 -07004045 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07004046 return 0;
4047}
4048
Eric Dumazet95c96172012-04-15 05:58:06 +00004049void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004050{
Thomas Grafc71099a2006-08-04 23:20:06 -07004051 struct rt6_mtu_change_arg arg = {
4052 .dev = dev,
4053 .mtu = mtu,
4054 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07004055
Li RongQing0c3584d2013-12-27 16:32:38 +08004056 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004057}
4058
Patrick McHardyef7c79e2007-06-05 12:38:30 -07004059static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
Thomas Graf5176f912006-08-26 20:13:18 -07004060 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
Thomas Graf86872cb2006-08-22 00:01:08 -07004061 [RTA_OIF] = { .type = NLA_U32 },
Thomas Grafab364a62006-08-22 00:01:47 -07004062 [RTA_IIF] = { .type = NLA_U32 },
Thomas Graf86872cb2006-08-22 00:01:08 -07004063 [RTA_PRIORITY] = { .type = NLA_U32 },
4064 [RTA_METRICS] = { .type = NLA_NESTED },
Nicolas Dichtel51ebd312012-10-22 03:42:09 +00004065 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
Lubomir Rintelc78ba6d2015-03-11 15:39:21 +01004066 [RTA_PREF] = { .type = NLA_U8 },
Roopa Prabhu19e42e42015-07-21 10:43:48 +02004067 [RTA_ENCAP_TYPE] = { .type = NLA_U16 },
4068 [RTA_ENCAP] = { .type = NLA_NESTED },
Xin Long32bc2012015-12-16 17:50:11 +08004069 [RTA_EXPIRES] = { .type = NLA_U32 },
Lorenzo Colitti622ec2c2016-11-04 02:23:42 +09004070 [RTA_UID] = { .type = NLA_U32 },
Liping Zhang3b45a412017-02-27 20:59:39 +08004071 [RTA_MARK] = { .type = NLA_U32 },
Thomas Graf86872cb2006-08-22 00:01:08 -07004072};
4073
4074static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
David Ahern333c4302017-05-21 10:12:04 -06004075 struct fib6_config *cfg,
4076 struct netlink_ext_ack *extack)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004077{
Thomas Graf86872cb2006-08-22 00:01:08 -07004078 struct rtmsg *rtm;
4079 struct nlattr *tb[RTA_MAX+1];
Lubomir Rintelc78ba6d2015-03-11 15:39:21 +01004080 unsigned int pref;
Thomas Graf86872cb2006-08-22 00:01:08 -07004081 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004082
Johannes Bergfceb6432017-04-12 14:34:07 +02004083 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
4084 NULL);
Thomas Graf86872cb2006-08-22 00:01:08 -07004085 if (err < 0)
4086 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004087
Thomas Graf86872cb2006-08-22 00:01:08 -07004088 err = -EINVAL;
4089 rtm = nlmsg_data(nlh);
4090 memset(cfg, 0, sizeof(*cfg));
4091
4092 cfg->fc_table = rtm->rtm_table;
4093 cfg->fc_dst_len = rtm->rtm_dst_len;
4094 cfg->fc_src_len = rtm->rtm_src_len;
4095 cfg->fc_flags = RTF_UP;
4096 cfg->fc_protocol = rtm->rtm_protocol;
Nicolas Dichtelef2c7d72012-09-05 02:12:42 +00004097 cfg->fc_type = rtm->rtm_type;
Thomas Graf86872cb2006-08-22 00:01:08 -07004098
Nicolas Dichtelef2c7d72012-09-05 02:12:42 +00004099 if (rtm->rtm_type == RTN_UNREACHABLE ||
4100 rtm->rtm_type == RTN_BLACKHOLE ||
Nicolas Dichtelb4949ab2012-09-06 05:53:35 +00004101 rtm->rtm_type == RTN_PROHIBIT ||
4102 rtm->rtm_type == RTN_THROW)
Thomas Graf86872cb2006-08-22 00:01:08 -07004103 cfg->fc_flags |= RTF_REJECT;
4104
Maciej Żenczykowskiab79ad12010-09-27 00:07:02 +00004105 if (rtm->rtm_type == RTN_LOCAL)
4106 cfg->fc_flags |= RTF_LOCAL;
4107
Martin KaFai Lau1f56a012015-04-28 13:03:03 -07004108 if (rtm->rtm_flags & RTM_F_CLONED)
4109 cfg->fc_flags |= RTF_CACHE;
4110
David Ahernfc1e64e2018-01-25 16:55:09 -08004111 cfg->fc_flags |= (rtm->rtm_flags & RTNH_F_ONLINK);
4112
Eric W. Biederman15e47302012-09-07 20:12:54 +00004113 cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
Thomas Graf86872cb2006-08-22 00:01:08 -07004114 cfg->fc_nlinfo.nlh = nlh;
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09004115 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
Thomas Graf86872cb2006-08-22 00:01:08 -07004116
4117 if (tb[RTA_GATEWAY]) {
Jiri Benc67b61f62015-03-29 16:59:26 +02004118 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
Thomas Graf86872cb2006-08-22 00:01:08 -07004119 cfg->fc_flags |= RTF_GATEWAY;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004120 }
Thomas Graf86872cb2006-08-22 00:01:08 -07004121
4122 if (tb[RTA_DST]) {
4123 int plen = (rtm->rtm_dst_len + 7) >> 3;
4124
4125 if (nla_len(tb[RTA_DST]) < plen)
4126 goto errout;
4127
4128 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004129 }
Thomas Graf86872cb2006-08-22 00:01:08 -07004130
4131 if (tb[RTA_SRC]) {
4132 int plen = (rtm->rtm_src_len + 7) >> 3;
4133
4134 if (nla_len(tb[RTA_SRC]) < plen)
4135 goto errout;
4136
4137 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004138 }
Thomas Graf86872cb2006-08-22 00:01:08 -07004139
Daniel Walterc3968a82011-04-13 21:10:57 +00004140 if (tb[RTA_PREFSRC])
Jiri Benc67b61f62015-03-29 16:59:26 +02004141 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
Daniel Walterc3968a82011-04-13 21:10:57 +00004142
Thomas Graf86872cb2006-08-22 00:01:08 -07004143 if (tb[RTA_OIF])
4144 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
4145
4146 if (tb[RTA_PRIORITY])
4147 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
4148
4149 if (tb[RTA_METRICS]) {
4150 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
4151 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004152 }
Thomas Graf86872cb2006-08-22 00:01:08 -07004153
4154 if (tb[RTA_TABLE])
4155 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
4156
Nicolas Dichtel51ebd312012-10-22 03:42:09 +00004157 if (tb[RTA_MULTIPATH]) {
4158 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
4159 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
David Ahern9ed59592017-01-17 14:57:36 -08004160
4161 err = lwtunnel_valid_encap_type_attr(cfg->fc_mp,
David Ahernc255bd62017-05-27 16:19:27 -06004162 cfg->fc_mp_len, extack);
David Ahern9ed59592017-01-17 14:57:36 -08004163 if (err < 0)
4164 goto errout;
Nicolas Dichtel51ebd312012-10-22 03:42:09 +00004165 }
4166
Lubomir Rintelc78ba6d2015-03-11 15:39:21 +01004167 if (tb[RTA_PREF]) {
4168 pref = nla_get_u8(tb[RTA_PREF]);
4169 if (pref != ICMPV6_ROUTER_PREF_LOW &&
4170 pref != ICMPV6_ROUTER_PREF_HIGH)
4171 pref = ICMPV6_ROUTER_PREF_MEDIUM;
4172 cfg->fc_flags |= RTF_PREF(pref);
4173 }
4174
Roopa Prabhu19e42e42015-07-21 10:43:48 +02004175 if (tb[RTA_ENCAP])
4176 cfg->fc_encap = tb[RTA_ENCAP];
4177
David Ahern9ed59592017-01-17 14:57:36 -08004178 if (tb[RTA_ENCAP_TYPE]) {
Roopa Prabhu19e42e42015-07-21 10:43:48 +02004179 cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
4180
David Ahernc255bd62017-05-27 16:19:27 -06004181 err = lwtunnel_valid_encap_type(cfg->fc_encap_type, extack);
David Ahern9ed59592017-01-17 14:57:36 -08004182 if (err < 0)
4183 goto errout;
4184 }
4185
Xin Long32bc2012015-12-16 17:50:11 +08004186 if (tb[RTA_EXPIRES]) {
4187 unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
4188
4189 if (addrconf_finite_timeout(timeout)) {
4190 cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
4191 cfg->fc_flags |= RTF_EXPIRES;
4192 }
4193 }
4194
Thomas Graf86872cb2006-08-22 00:01:08 -07004195 err = 0;
4196errout:
4197 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004198}
4199
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07004200struct rt6_nh {
4201 struct rt6_info *rt6_info;
4202 struct fib6_config r_cfg;
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07004203 struct list_head next;
4204};
4205
4206static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
4207{
4208 struct rt6_nh *nh;
4209
4210 list_for_each_entry(nh, rt6_nh_list, next) {
David Ahern7d4d5062017-02-02 12:37:12 -08004211 pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6c nexthop %pI6c ifi %d\n",
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07004212 &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
4213 nh->r_cfg.fc_ifindex);
4214 }
4215}
4216
David Ahernd4ead6b2018-04-17 17:33:16 -07004217static int ip6_route_info_append(struct net *net,
4218 struct list_head *rt6_nh_list,
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07004219 struct rt6_info *rt, struct fib6_config *r_cfg)
4220{
4221 struct rt6_nh *nh;
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07004222 int err = -EEXIST;
4223
4224 list_for_each_entry(nh, rt6_nh_list, next) {
4225 /* check if rt6_info already exists */
David Ahernf06b7542017-07-05 14:41:46 -06004226 if (rt6_duplicate_nexthop(nh->rt6_info, rt))
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07004227 return err;
4228 }
4229
4230 nh = kzalloc(sizeof(*nh), GFP_KERNEL);
4231 if (!nh)
4232 return -ENOMEM;
4233 nh->rt6_info = rt;
David Ahernd4ead6b2018-04-17 17:33:16 -07004234 err = ip6_convert_metrics(net, rt, r_cfg);
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07004235 if (err) {
4236 kfree(nh);
4237 return err;
4238 }
4239 memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
4240 list_add_tail(&nh->next, rt6_nh_list);
4241
4242 return 0;
4243}
4244
David Ahern3b1137f2017-02-02 12:37:10 -08004245static void ip6_route_mpath_notify(struct rt6_info *rt,
4246 struct rt6_info *rt_last,
4247 struct nl_info *info,
4248 __u16 nlflags)
4249{
4250 /* if this is an APPEND route, then rt points to the first route
4251 * inserted and rt_last points to last route inserted. Userspace
4252 * wants a consistent dump of the route which starts at the first
4253 * nexthop. Since sibling routes are always added at the end of
4254 * the list, find the first sibling of the last route appended
4255 */
4256 if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->rt6i_nsiblings) {
4257 rt = list_first_entry(&rt_last->rt6i_siblings,
4258 struct rt6_info,
4259 rt6i_siblings);
4260 }
4261
4262 if (rt)
4263 inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
4264}
4265
David Ahern333c4302017-05-21 10:12:04 -06004266static int ip6_route_multipath_add(struct fib6_config *cfg,
4267 struct netlink_ext_ack *extack)
Nicolas Dichtel51ebd312012-10-22 03:42:09 +00004268{
David Ahern3b1137f2017-02-02 12:37:10 -08004269 struct rt6_info *rt_notif = NULL, *rt_last = NULL;
4270 struct nl_info *info = &cfg->fc_nlinfo;
Nicolas Dichtel51ebd312012-10-22 03:42:09 +00004271 struct fib6_config r_cfg;
4272 struct rtnexthop *rtnh;
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07004273 struct rt6_info *rt;
4274 struct rt6_nh *err_nh;
4275 struct rt6_nh *nh, *nh_safe;
David Ahern3b1137f2017-02-02 12:37:10 -08004276 __u16 nlflags;
Nicolas Dichtel51ebd312012-10-22 03:42:09 +00004277 int remaining;
4278 int attrlen;
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07004279 int err = 1;
4280 int nhn = 0;
4281 int replace = (cfg->fc_nlinfo.nlh &&
4282 (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
4283 LIST_HEAD(rt6_nh_list);
Nicolas Dichtel51ebd312012-10-22 03:42:09 +00004284
David Ahern3b1137f2017-02-02 12:37:10 -08004285 nlflags = replace ? NLM_F_REPLACE : NLM_F_CREATE;
4286 if (info->nlh && info->nlh->nlmsg_flags & NLM_F_APPEND)
4287 nlflags |= NLM_F_APPEND;
4288
Michal Kubeček35f1b4e2015-05-18 20:53:55 +02004289 remaining = cfg->fc_mp_len;
Nicolas Dichtel51ebd312012-10-22 03:42:09 +00004290 rtnh = (struct rtnexthop *)cfg->fc_mp;
Nicolas Dichtel51ebd312012-10-22 03:42:09 +00004291
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07004292 /* Parse a Multipath Entry and build a list (rt6_nh_list) of
4293 * rt6_info structs per nexthop
4294 */
Nicolas Dichtel51ebd312012-10-22 03:42:09 +00004295 while (rtnh_ok(rtnh, remaining)) {
4296 memcpy(&r_cfg, cfg, sizeof(*cfg));
4297 if (rtnh->rtnh_ifindex)
4298 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
4299
4300 attrlen = rtnh_attrlen(rtnh);
4301 if (attrlen > 0) {
4302 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
4303
4304 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
4305 if (nla) {
Jiri Benc67b61f62015-03-29 16:59:26 +02004306 r_cfg.fc_gateway = nla_get_in6_addr(nla);
Nicolas Dichtel51ebd312012-10-22 03:42:09 +00004307 r_cfg.fc_flags |= RTF_GATEWAY;
4308 }
Roopa Prabhu19e42e42015-07-21 10:43:48 +02004309 r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
4310 nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
4311 if (nla)
4312 r_cfg.fc_encap_type = nla_get_u16(nla);
Nicolas Dichtel51ebd312012-10-22 03:42:09 +00004313 }
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07004314
David Ahern68e2ffd2018-03-20 10:06:59 -07004315 r_cfg.fc_flags |= (rtnh->rtnh_flags & RTNH_F_ONLINK);
David Ahernacb54e32018-04-17 17:33:22 -07004316 rt = ip6_route_info_create(&r_cfg, GFP_KERNEL, extack);
Roopa Prabhu8c5b83f2015-10-10 08:26:36 -07004317 if (IS_ERR(rt)) {
4318 err = PTR_ERR(rt);
4319 rt = NULL;
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07004320 goto cleanup;
Roopa Prabhu8c5b83f2015-10-10 08:26:36 -07004321 }
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07004322
David Ahern5e670d82018-04-17 17:33:14 -07004323 rt->fib6_nh.nh_weight = rtnh->rtnh_hops + 1;
Ido Schimmel398958a2018-01-09 16:40:28 +02004324
David Ahernd4ead6b2018-04-17 17:33:16 -07004325 err = ip6_route_info_append(info->nl_net, &rt6_nh_list,
4326 rt, &r_cfg);
Nicolas Dichtel51ebd312012-10-22 03:42:09 +00004327 if (err) {
Wei Wang587fea72017-06-17 10:42:36 -07004328 dst_release_immediate(&rt->dst);
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07004329 goto cleanup;
Nicolas Dichtel51ebd312012-10-22 03:42:09 +00004330 }
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07004331
4332 rtnh = rtnh_next(rtnh, &remaining);
4333 }
4334
David Ahern3b1137f2017-02-02 12:37:10 -08004335 /* for add and replace send one notification with all nexthops.
4336 * Skip the notification in fib6_add_rt2node and send one with
4337 * the full route when done
4338 */
4339 info->skip_notify = 1;
4340
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07004341 err_nh = NULL;
4342 list_for_each_entry(nh, &rt6_nh_list, next) {
David Ahern3b1137f2017-02-02 12:37:10 -08004343 rt_last = nh->rt6_info;
David Ahernd4ead6b2018-04-17 17:33:16 -07004344 err = __ip6_ins_rt(nh->rt6_info, info, extack);
David Ahern3b1137f2017-02-02 12:37:10 -08004345 /* save reference to first route for notification */
4346 if (!rt_notif && !err)
4347 rt_notif = nh->rt6_info;
4348
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07004349 /* nh->rt6_info is used or freed at this point, reset to NULL*/
4350 nh->rt6_info = NULL;
4351 if (err) {
4352 if (replace && nhn)
4353 ip6_print_replace_route_err(&rt6_nh_list);
4354 err_nh = nh;
4355 goto add_errout;
4356 }
4357
Nicolas Dichtel1a724182012-11-01 22:58:22 +00004358 /* Because each route is added like a single route we remove
Michal Kubeček27596472015-05-18 20:54:00 +02004359 * these flags after the first nexthop: if there is a collision,
4360 * we have already failed to add the first nexthop:
4361 * fib6_add_rt2node() has rejected it; when replacing, old
4362 * nexthops have been replaced by first new, the rest should
4363 * be added to it.
Nicolas Dichtel1a724182012-11-01 22:58:22 +00004364 */
Michal Kubeček27596472015-05-18 20:54:00 +02004365 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
4366 NLM_F_REPLACE);
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07004367 nhn++;
4368 }
4369
David Ahern3b1137f2017-02-02 12:37:10 -08004370 /* success ... tell user about new route */
4371 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07004372 goto cleanup;
4373
4374add_errout:
David Ahern3b1137f2017-02-02 12:37:10 -08004375 /* send notification for routes that were added so that
4376 * the delete notifications sent by ip6_route_del are
4377 * coherent
4378 */
4379 if (rt_notif)
4380 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
4381
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07004382 /* Delete routes that were already added */
4383 list_for_each_entry(nh, &rt6_nh_list, next) {
4384 if (err_nh == nh)
4385 break;
David Ahern333c4302017-05-21 10:12:04 -06004386 ip6_route_del(&nh->r_cfg, extack);
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07004387 }
4388
4389cleanup:
4390 list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
Wei Wang587fea72017-06-17 10:42:36 -07004391 if (nh->rt6_info)
4392 dst_release_immediate(&nh->rt6_info->dst);
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07004393 list_del(&nh->next);
4394 kfree(nh);
4395 }
4396
4397 return err;
4398}
4399
David Ahern333c4302017-05-21 10:12:04 -06004400static int ip6_route_multipath_del(struct fib6_config *cfg,
4401 struct netlink_ext_ack *extack)
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07004402{
4403 struct fib6_config r_cfg;
4404 struct rtnexthop *rtnh;
4405 int remaining;
4406 int attrlen;
4407 int err = 1, last_err = 0;
4408
4409 remaining = cfg->fc_mp_len;
4410 rtnh = (struct rtnexthop *)cfg->fc_mp;
4411
4412 /* Parse a Multipath Entry */
4413 while (rtnh_ok(rtnh, remaining)) {
4414 memcpy(&r_cfg, cfg, sizeof(*cfg));
4415 if (rtnh->rtnh_ifindex)
4416 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
4417
4418 attrlen = rtnh_attrlen(rtnh);
4419 if (attrlen > 0) {
4420 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
4421
4422 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
4423 if (nla) {
4424 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
4425 r_cfg.fc_flags |= RTF_GATEWAY;
4426 }
4427 }
David Ahern333c4302017-05-21 10:12:04 -06004428 err = ip6_route_del(&r_cfg, extack);
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07004429 if (err)
4430 last_err = err;
4431
Nicolas Dichtel51ebd312012-10-22 03:42:09 +00004432 rtnh = rtnh_next(rtnh, &remaining);
4433 }
4434
4435 return last_err;
4436}
4437
David Ahernc21ef3e2017-04-16 09:48:24 -07004438static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
4439 struct netlink_ext_ack *extack)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004440{
Thomas Graf86872cb2006-08-22 00:01:08 -07004441 struct fib6_config cfg;
4442 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004443
David Ahern333c4302017-05-21 10:12:04 -06004444 err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
Thomas Graf86872cb2006-08-22 00:01:08 -07004445 if (err < 0)
4446 return err;
4447
Nicolas Dichtel51ebd312012-10-22 03:42:09 +00004448 if (cfg.fc_mp)
David Ahern333c4302017-05-21 10:12:04 -06004449 return ip6_route_multipath_del(&cfg, extack);
David Ahern0ae81332017-02-02 12:37:08 -08004450 else {
4451 cfg.fc_delete_all_nh = 1;
David Ahern333c4302017-05-21 10:12:04 -06004452 return ip6_route_del(&cfg, extack);
David Ahern0ae81332017-02-02 12:37:08 -08004453 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07004454}
4455
David Ahernc21ef3e2017-04-16 09:48:24 -07004456static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
4457 struct netlink_ext_ack *extack)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004458{
Thomas Graf86872cb2006-08-22 00:01:08 -07004459 struct fib6_config cfg;
4460 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004461
David Ahern333c4302017-05-21 10:12:04 -06004462 err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
Thomas Graf86872cb2006-08-22 00:01:08 -07004463 if (err < 0)
4464 return err;
4465
Nicolas Dichtel51ebd312012-10-22 03:42:09 +00004466 if (cfg.fc_mp)
David Ahern333c4302017-05-21 10:12:04 -06004467 return ip6_route_multipath_add(&cfg, extack);
Nicolas Dichtel51ebd312012-10-22 03:42:09 +00004468 else
David Ahernacb54e32018-04-17 17:33:22 -07004469 return ip6_route_add(&cfg, GFP_KERNEL, extack);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004470}
4471
David Ahernbeb1afac52017-02-02 12:37:09 -08004472static size_t rt6_nlmsg_size(struct rt6_info *rt)
Thomas Graf339bf982006-11-10 14:10:15 -08004473{
David Ahernbeb1afac52017-02-02 12:37:09 -08004474 int nexthop_len = 0;
4475
4476 if (rt->rt6i_nsiblings) {
4477 nexthop_len = nla_total_size(0) /* RTA_MULTIPATH */
4478 + NLA_ALIGN(sizeof(struct rtnexthop))
4479 + nla_total_size(16) /* RTA_GATEWAY */
David Ahern5e670d82018-04-17 17:33:14 -07004480 + lwtunnel_get_encap_size(rt->fib6_nh.nh_lwtstate);
David Ahernbeb1afac52017-02-02 12:37:09 -08004481
4482 nexthop_len *= rt->rt6i_nsiblings;
4483 }
4484
Thomas Graf339bf982006-11-10 14:10:15 -08004485 return NLMSG_ALIGN(sizeof(struct rtmsg))
4486 + nla_total_size(16) /* RTA_SRC */
4487 + nla_total_size(16) /* RTA_DST */
4488 + nla_total_size(16) /* RTA_GATEWAY */
4489 + nla_total_size(16) /* RTA_PREFSRC */
4490 + nla_total_size(4) /* RTA_TABLE */
4491 + nla_total_size(4) /* RTA_IIF */
4492 + nla_total_size(4) /* RTA_OIF */
4493 + nla_total_size(4) /* RTA_PRIORITY */
Noriaki TAKAMIYA6a2b9ce2007-01-23 22:09:41 -08004494 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
Daniel Borkmannea697632015-01-05 23:57:47 +01004495 + nla_total_size(sizeof(struct rta_cacheinfo))
Lubomir Rintelc78ba6d2015-03-11 15:39:21 +01004496 + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
Roopa Prabhu19e42e42015-07-21 10:43:48 +02004497 + nla_total_size(1) /* RTA_PREF */
David Ahern5e670d82018-04-17 17:33:14 -07004498 + lwtunnel_get_encap_size(rt->fib6_nh.nh_lwtstate)
David Ahernbeb1afac52017-02-02 12:37:09 -08004499 + nexthop_len;
4500}
4501
4502static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt,
David Ahern5be083c2017-03-06 15:57:31 -08004503 unsigned int *flags, bool skip_oif)
David Ahernbeb1afac52017-02-02 12:37:09 -08004504{
David Ahern5e670d82018-04-17 17:33:14 -07004505 if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
Ido Schimmelf9d882e2018-01-07 12:45:10 +02004506 *flags |= RTNH_F_DEAD;
4507
David Ahern5e670d82018-04-17 17:33:14 -07004508 if (rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN) {
David Ahernbeb1afac52017-02-02 12:37:09 -08004509 *flags |= RTNH_F_LINKDOWN;
4510 if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
4511 *flags |= RTNH_F_DEAD;
4512 }
4513
4514 if (rt->rt6i_flags & RTF_GATEWAY) {
David Ahern5e670d82018-04-17 17:33:14 -07004515 if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->fib6_nh.nh_gw) < 0)
David Ahernbeb1afac52017-02-02 12:37:09 -08004516 goto nla_put_failure;
4517 }
4518
David Ahern5e670d82018-04-17 17:33:14 -07004519 *flags |= (rt->fib6_nh.nh_flags & RTNH_F_ONLINK);
4520 if (rt->fib6_nh.nh_flags & RTNH_F_OFFLOAD)
Ido Schimmel61e4d012017-08-03 13:28:20 +02004521 *flags |= RTNH_F_OFFLOAD;
4522
David Ahern5be083c2017-03-06 15:57:31 -08004523 /* not needed for multipath encoding b/c it has a rtnexthop struct */
David Ahern5e670d82018-04-17 17:33:14 -07004524 if (!skip_oif && rt->fib6_nh.nh_dev &&
4525 nla_put_u32(skb, RTA_OIF, rt->fib6_nh.nh_dev->ifindex))
David Ahernbeb1afac52017-02-02 12:37:09 -08004526 goto nla_put_failure;
4527
David Ahern5e670d82018-04-17 17:33:14 -07004528 if (rt->fib6_nh.nh_lwtstate &&
4529 lwtunnel_fill_encap(skb, rt->fib6_nh.nh_lwtstate) < 0)
David Ahernbeb1afac52017-02-02 12:37:09 -08004530 goto nla_put_failure;
4531
4532 return 0;
4533
4534nla_put_failure:
4535 return -EMSGSIZE;
4536}
4537
David Ahern5be083c2017-03-06 15:57:31 -08004538/* add multipath next hop */
David Ahernbeb1afac52017-02-02 12:37:09 -08004539static int rt6_add_nexthop(struct sk_buff *skb, struct rt6_info *rt)
4540{
David Ahern5e670d82018-04-17 17:33:14 -07004541 const struct net_device *dev = rt->fib6_nh.nh_dev;
David Ahernbeb1afac52017-02-02 12:37:09 -08004542 struct rtnexthop *rtnh;
4543 unsigned int flags = 0;
4544
4545 rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
4546 if (!rtnh)
4547 goto nla_put_failure;
4548
David Ahern5e670d82018-04-17 17:33:14 -07004549 rtnh->rtnh_hops = rt->fib6_nh.nh_weight - 1;
4550 rtnh->rtnh_ifindex = dev ? dev->ifindex : 0;
David Ahernbeb1afac52017-02-02 12:37:09 -08004551
David Ahern5be083c2017-03-06 15:57:31 -08004552 if (rt6_nexthop_info(skb, rt, &flags, true) < 0)
David Ahernbeb1afac52017-02-02 12:37:09 -08004553 goto nla_put_failure;
4554
4555 rtnh->rtnh_flags = flags;
4556
4557 /* length of rtnetlink header + attributes */
4558 rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh;
4559
4560 return 0;
4561
4562nla_put_failure:
4563 return -EMSGSIZE;
Thomas Graf339bf982006-11-10 14:10:15 -08004564}
4565
David Ahernd4ead6b2018-04-17 17:33:16 -07004566static int rt6_fill_node(struct net *net, struct sk_buff *skb,
4567 struct rt6_info *rt, struct dst_entry *dst,
4568 struct in6_addr *dest, struct in6_addr *src,
Eric W. Biederman15e47302012-09-07 20:12:54 +00004569 int iif, int type, u32 portid, u32 seq,
David Ahernf8cfe2c2017-01-17 15:51:08 -08004570 unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004571{
4572 struct rtmsg *rtm;
Thomas Graf2d7202b2006-08-22 00:01:27 -07004573 struct nlmsghdr *nlh;
David Ahernd4ead6b2018-04-17 17:33:16 -07004574 long expires = 0;
4575 u32 *pmetrics;
Patrick McHardy9e762a42006-08-10 23:09:48 -07004576 u32 table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004577
Eric W. Biederman15e47302012-09-07 20:12:54 +00004578 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
David S. Miller38308472011-12-03 18:02:47 -05004579 if (!nlh)
Patrick McHardy26932562007-01-31 23:16:40 -08004580 return -EMSGSIZE;
Thomas Graf2d7202b2006-08-22 00:01:27 -07004581
4582 rtm = nlmsg_data(nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004583 rtm->rtm_family = AF_INET6;
4584 rtm->rtm_dst_len = rt->rt6i_dst.plen;
4585 rtm->rtm_src_len = rt->rt6i_src.plen;
4586 rtm->rtm_tos = 0;
Thomas Grafc71099a2006-08-04 23:20:06 -07004587 if (rt->rt6i_table)
Patrick McHardy9e762a42006-08-10 23:09:48 -07004588 table = rt->rt6i_table->tb6_id;
Thomas Grafc71099a2006-08-04 23:20:06 -07004589 else
Patrick McHardy9e762a42006-08-10 23:09:48 -07004590 table = RT6_TABLE_UNSPEC;
4591 rtm->rtm_table = table;
David S. Millerc78679e2012-04-01 20:27:33 -04004592 if (nla_put_u32(skb, RTA_TABLE, table))
4593 goto nla_put_failure;
David Aherne8478e82018-04-17 17:33:13 -07004594
4595 rtm->rtm_type = rt->fib6_type;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004596 rtm->rtm_flags = 0;
4597 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
4598 rtm->rtm_protocol = rt->rt6i_protocol;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004599
David S. Miller38308472011-12-03 18:02:47 -05004600 if (rt->rt6i_flags & RTF_CACHE)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004601 rtm->rtm_flags |= RTM_F_CLONED;
4602
David Ahernd4ead6b2018-04-17 17:33:16 -07004603 if (dest) {
4604 if (nla_put_in6_addr(skb, RTA_DST, dest))
David S. Millerc78679e2012-04-01 20:27:33 -04004605 goto nla_put_failure;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09004606 rtm->rtm_dst_len = 128;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004607 } else if (rtm->rtm_dst_len)
Jiri Benc930345e2015-03-29 16:59:25 +02004608 if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
David S. Millerc78679e2012-04-01 20:27:33 -04004609 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004610#ifdef CONFIG_IPV6_SUBTREES
4611 if (src) {
Jiri Benc930345e2015-03-29 16:59:25 +02004612 if (nla_put_in6_addr(skb, RTA_SRC, src))
David S. Millerc78679e2012-04-01 20:27:33 -04004613 goto nla_put_failure;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09004614 rtm->rtm_src_len = 128;
David S. Millerc78679e2012-04-01 20:27:33 -04004615 } else if (rtm->rtm_src_len &&
Jiri Benc930345e2015-03-29 16:59:25 +02004616 nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
David S. Millerc78679e2012-04-01 20:27:33 -04004617 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004618#endif
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09004619 if (iif) {
4620#ifdef CONFIG_IPV6_MROUTE
4621 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
David Ahernfd61c6b2017-01-17 15:51:07 -08004622 int err = ip6mr_get_route(net, skb, rtm, portid);
Nikolay Aleksandrov2cf75072016-09-25 23:08:31 +02004623
David Ahernfd61c6b2017-01-17 15:51:07 -08004624 if (err == 0)
4625 return 0;
4626 if (err < 0)
4627 goto nla_put_failure;
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09004628 } else
4629#endif
David S. Millerc78679e2012-04-01 20:27:33 -04004630 if (nla_put_u32(skb, RTA_IIF, iif))
4631 goto nla_put_failure;
David Ahernd4ead6b2018-04-17 17:33:16 -07004632 } else if (dest) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07004633 struct in6_addr saddr_buf;
David Ahernd4ead6b2018-04-17 17:33:16 -07004634 if (ip6_route_get_saddr(net, rt, dest, 0, &saddr_buf) == 0 &&
Jiri Benc930345e2015-03-29 16:59:25 +02004635 nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
David S. Millerc78679e2012-04-01 20:27:33 -04004636 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004637 }
Thomas Graf2d7202b2006-08-22 00:01:27 -07004638
Daniel Walterc3968a82011-04-13 21:10:57 +00004639 if (rt->rt6i_prefsrc.plen) {
4640 struct in6_addr saddr_buf;
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00004641 saddr_buf = rt->rt6i_prefsrc.addr;
Jiri Benc930345e2015-03-29 16:59:25 +02004642 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
David S. Millerc78679e2012-04-01 20:27:33 -04004643 goto nla_put_failure;
Daniel Walterc3968a82011-04-13 21:10:57 +00004644 }
4645
David Ahernd4ead6b2018-04-17 17:33:16 -07004646 pmetrics = dst ? dst_metrics_ptr(dst) : rt->fib6_metrics->metrics;
4647 if (rtnetlink_put_metrics(skb, pmetrics) < 0)
Thomas Graf2d7202b2006-08-22 00:01:27 -07004648 goto nla_put_failure;
4649
David S. Millerc78679e2012-04-01 20:27:33 -04004650 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
4651 goto nla_put_failure;
Li Wei82539472012-07-29 16:01:30 +00004652
David Ahernbeb1afac52017-02-02 12:37:09 -08004653 /* For multipath routes, walk the siblings list and add
4654 * each as a nexthop within RTA_MULTIPATH.
4655 */
4656 if (rt->rt6i_nsiblings) {
4657 struct rt6_info *sibling, *next_sibling;
4658 struct nlattr *mp;
4659
4660 mp = nla_nest_start(skb, RTA_MULTIPATH);
4661 if (!mp)
4662 goto nla_put_failure;
4663
4664 if (rt6_add_nexthop(skb, rt) < 0)
4665 goto nla_put_failure;
4666
4667 list_for_each_entry_safe(sibling, next_sibling,
4668 &rt->rt6i_siblings, rt6i_siblings) {
4669 if (rt6_add_nexthop(skb, sibling) < 0)
4670 goto nla_put_failure;
4671 }
4672
4673 nla_nest_end(skb, mp);
4674 } else {
David Ahern5be083c2017-03-06 15:57:31 -08004675 if (rt6_nexthop_info(skb, rt, &rtm->rtm_flags, false) < 0)
David Ahernbeb1afac52017-02-02 12:37:09 -08004676 goto nla_put_failure;
4677 }
4678
David Ahern14895682018-04-17 17:33:17 -07004679 if (rt->rt6i_flags & RTF_EXPIRES) {
4680 expires = dst ? dst->expires : rt->expires;
4681 expires -= jiffies;
4682 }
YOSHIFUJI Hideaki69cdf8f2008-05-19 16:55:13 -07004683
David Ahernd4ead6b2018-04-17 17:33:16 -07004684 if (rtnl_put_cacheinfo(skb, dst, 0, expires, dst ? dst->error : 0) < 0)
Thomas Grafe3703b32006-11-27 09:27:07 -08004685 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004686
Lubomir Rintelc78ba6d2015-03-11 15:39:21 +01004687 if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
4688 goto nla_put_failure;
4689
Roopa Prabhu19e42e42015-07-21 10:43:48 +02004690
Johannes Berg053c0952015-01-16 22:09:00 +01004691 nlmsg_end(skb, nlh);
4692 return 0;
Thomas Graf2d7202b2006-08-22 00:01:27 -07004693
4694nla_put_failure:
Patrick McHardy26932562007-01-31 23:16:40 -08004695 nlmsg_cancel(skb, nlh);
4696 return -EMSGSIZE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004697}
4698
Patrick McHardy1b43af52006-08-10 23:11:17 -07004699int rt6_dump_route(struct rt6_info *rt, void *p_arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004700{
4701 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
David Ahern1f17e2f2017-01-26 13:54:08 -08004702 struct net *net = arg->net;
4703
David Ahern421842e2018-04-17 17:33:18 -07004704 if (rt == net->ipv6.fib6_null_entry)
David Ahern1f17e2f2017-01-26 13:54:08 -08004705 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004706
Thomas Graf2d7202b2006-08-22 00:01:27 -07004707 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
4708 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
David Ahernf8cfe2c2017-01-17 15:51:08 -08004709
4710 /* user wants prefix routes only */
4711 if (rtm->rtm_flags & RTM_F_PREFIX &&
4712 !(rt->rt6i_flags & RTF_PREFIX_RT)) {
4713 /* success since this is not a prefix route */
4714 return 1;
4715 }
4716 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07004717
David Ahernd4ead6b2018-04-17 17:33:16 -07004718 return rt6_fill_node(net, arg->skb, rt, NULL, NULL, NULL, 0,
4719 RTM_NEWROUTE, NETLINK_CB(arg->cb->skb).portid,
4720 arg->cb->nlh->nlmsg_seq, NLM_F_MULTI);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004721}
4722
David Ahernc21ef3e2017-04-16 09:48:24 -07004723static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
4724 struct netlink_ext_ack *extack)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004725{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09004726 struct net *net = sock_net(in_skb->sk);
Thomas Grafab364a62006-08-22 00:01:47 -07004727 struct nlattr *tb[RTA_MAX+1];
Roopa Prabhu18c3a612017-05-25 10:42:40 -07004728 int err, iif = 0, oif = 0;
4729 struct dst_entry *dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004730 struct rt6_info *rt;
Thomas Grafab364a62006-08-22 00:01:47 -07004731 struct sk_buff *skb;
4732 struct rtmsg *rtm;
David S. Miller4c9483b2011-03-12 16:22:43 -05004733 struct flowi6 fl6;
Roopa Prabhu18c3a612017-05-25 10:42:40 -07004734 bool fibmatch;
Thomas Grafab364a62006-08-22 00:01:47 -07004735
Johannes Bergfceb6432017-04-12 14:34:07 +02004736 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
David Ahernc21ef3e2017-04-16 09:48:24 -07004737 extack);
Thomas Grafab364a62006-08-22 00:01:47 -07004738 if (err < 0)
4739 goto errout;
4740
4741 err = -EINVAL;
David S. Miller4c9483b2011-03-12 16:22:43 -05004742 memset(&fl6, 0, sizeof(fl6));
Hannes Frederic Sowa38b70972016-06-11 20:08:19 +02004743 rtm = nlmsg_data(nlh);
4744 fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
Roopa Prabhu18c3a612017-05-25 10:42:40 -07004745 fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH);
Thomas Grafab364a62006-08-22 00:01:47 -07004746
4747 if (tb[RTA_SRC]) {
4748 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
4749 goto errout;
4750
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00004751 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
Thomas Grafab364a62006-08-22 00:01:47 -07004752 }
4753
4754 if (tb[RTA_DST]) {
4755 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
4756 goto errout;
4757
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00004758 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
Thomas Grafab364a62006-08-22 00:01:47 -07004759 }
4760
4761 if (tb[RTA_IIF])
4762 iif = nla_get_u32(tb[RTA_IIF]);
4763
4764 if (tb[RTA_OIF])
Shmulik Ladkani72331bc2012-04-01 04:03:45 +00004765 oif = nla_get_u32(tb[RTA_OIF]);
Thomas Grafab364a62006-08-22 00:01:47 -07004766
Lorenzo Colitti2e47b292014-05-15 16:38:41 -07004767 if (tb[RTA_MARK])
4768 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
4769
Lorenzo Colitti622ec2c2016-11-04 02:23:42 +09004770 if (tb[RTA_UID])
4771 fl6.flowi6_uid = make_kuid(current_user_ns(),
4772 nla_get_u32(tb[RTA_UID]));
4773 else
4774 fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
4775
Thomas Grafab364a62006-08-22 00:01:47 -07004776 if (iif) {
4777 struct net_device *dev;
Shmulik Ladkani72331bc2012-04-01 04:03:45 +00004778 int flags = 0;
4779
Florian Westphal121622d2017-08-15 16:34:42 +02004780 rcu_read_lock();
4781
4782 dev = dev_get_by_index_rcu(net, iif);
Thomas Grafab364a62006-08-22 00:01:47 -07004783 if (!dev) {
Florian Westphal121622d2017-08-15 16:34:42 +02004784 rcu_read_unlock();
Thomas Grafab364a62006-08-22 00:01:47 -07004785 err = -ENODEV;
4786 goto errout;
4787 }
Shmulik Ladkani72331bc2012-04-01 04:03:45 +00004788
4789 fl6.flowi6_iif = iif;
4790
4791 if (!ipv6_addr_any(&fl6.saddr))
4792 flags |= RT6_LOOKUP_F_HAS_SADDR;
4793
David Ahernb75cc8f2018-03-02 08:32:17 -08004794 dst = ip6_route_input_lookup(net, dev, &fl6, NULL, flags);
Florian Westphal121622d2017-08-15 16:34:42 +02004795
4796 rcu_read_unlock();
Shmulik Ladkani72331bc2012-04-01 04:03:45 +00004797 } else {
4798 fl6.flowi6_oif = oif;
4799
Ido Schimmel58acfd72017-12-20 12:28:25 +02004800 dst = ip6_route_output(net, NULL, &fl6);
Roopa Prabhu18c3a612017-05-25 10:42:40 -07004801 }
4802
Roopa Prabhu18c3a612017-05-25 10:42:40 -07004803
4804 rt = container_of(dst, struct rt6_info, dst);
4805 if (rt->dst.error) {
4806 err = rt->dst.error;
4807 ip6_rt_put(rt);
4808 goto errout;
Thomas Grafab364a62006-08-22 00:01:47 -07004809 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07004810
WANG Cong9d6acb32017-03-01 20:48:39 -08004811 if (rt == net->ipv6.ip6_null_entry) {
4812 err = rt->dst.error;
4813 ip6_rt_put(rt);
4814 goto errout;
4815 }
4816
David S. Millerfba961a2017-12-22 11:16:31 -05004817 if (fibmatch && rt->from) {
4818 struct rt6_info *ort = rt->from;
Ido Schimmel58acfd72017-12-20 12:28:25 +02004819
4820 dst_hold(&ort->dst);
4821 ip6_rt_put(rt);
4822 rt = ort;
4823 }
4824
Linus Torvalds1da177e2005-04-16 15:20:36 -07004825 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
David S. Miller38308472011-12-03 18:02:47 -05004826 if (!skb) {
Amerigo Wang94e187c2012-10-29 00:13:19 +00004827 ip6_rt_put(rt);
Thomas Grafab364a62006-08-22 00:01:47 -07004828 err = -ENOBUFS;
4829 goto errout;
4830 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07004831
Changli Gaod8d1f302010-06-10 23:31:35 -07004832 skb_dst_set(skb, &rt->dst);
Roopa Prabhu18c3a612017-05-25 10:42:40 -07004833 if (fibmatch)
David Ahernd4ead6b2018-04-17 17:33:16 -07004834 err = rt6_fill_node(net, skb, rt, NULL, NULL, NULL, iif,
Roopa Prabhu18c3a612017-05-25 10:42:40 -07004835 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
4836 nlh->nlmsg_seq, 0);
4837 else
David Ahernd4ead6b2018-04-17 17:33:16 -07004838 err = rt6_fill_node(net, skb, rt, dst, &fl6.daddr, &fl6.saddr,
4839 iif, RTM_NEWROUTE,
4840 NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
4841 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004842 if (err < 0) {
Thomas Grafab364a62006-08-22 00:01:47 -07004843 kfree_skb(skb);
4844 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004845 }
4846
Eric W. Biederman15e47302012-09-07 20:12:54 +00004847 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
Thomas Grafab364a62006-08-22 00:01:47 -07004848errout:
Linus Torvalds1da177e2005-04-16 15:20:36 -07004849 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004850}
4851
Roopa Prabhu37a1d362015-09-13 10:18:33 -07004852void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
4853 unsigned int nlm_flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004854{
4855 struct sk_buff *skb;
Daniel Lezcano55786892008-03-04 13:47:47 -08004856 struct net *net = info->nl_net;
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08004857 u32 seq;
4858 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004859
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08004860 err = -ENOBUFS;
David S. Miller38308472011-12-03 18:02:47 -05004861 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
Thomas Graf86872cb2006-08-22 00:01:08 -07004862
Roopa Prabhu19e42e42015-07-21 10:43:48 +02004863 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
David S. Miller38308472011-12-03 18:02:47 -05004864 if (!skb)
Thomas Graf21713eb2006-08-15 00:35:24 -07004865 goto errout;
4866
David Ahernd4ead6b2018-04-17 17:33:16 -07004867 err = rt6_fill_node(net, skb, rt, NULL, NULL, NULL, 0,
4868 event, info->portid, seq, nlm_flags);
Patrick McHardy26932562007-01-31 23:16:40 -08004869 if (err < 0) {
4870 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
4871 WARN_ON(err == -EMSGSIZE);
4872 kfree_skb(skb);
4873 goto errout;
4874 }
Eric W. Biederman15e47302012-09-07 20:12:54 +00004875 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
Pablo Neira Ayuso1ce85fe2009-02-24 23:18:28 -08004876 info->nlh, gfp_any());
4877 return;
Thomas Graf21713eb2006-08-15 00:35:24 -07004878errout:
4879 if (err < 0)
Daniel Lezcano55786892008-03-04 13:47:47 -08004880 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004881}
4882
Daniel Lezcano8ed67782008-03-04 13:48:30 -08004883static int ip6_route_dev_notify(struct notifier_block *this,
Jiri Pirko351638e2013-05-28 01:30:21 +00004884 unsigned long event, void *ptr)
Daniel Lezcano8ed67782008-03-04 13:48:30 -08004885{
Jiri Pirko351638e2013-05-28 01:30:21 +00004886 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09004887 struct net *net = dev_net(dev);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08004888
WANG Cong242d3a42017-05-08 10:12:13 -07004889 if (!(dev->flags & IFF_LOOPBACK))
4890 return NOTIFY_OK;
4891
4892 if (event == NETDEV_REGISTER) {
David Ahern421842e2018-04-17 17:33:18 -07004893 net->ipv6.fib6_null_entry->fib6_nh.nh_dev = dev;
4894 net->ipv6.fib6_null_entry->rt6i_idev = in6_dev_get(dev);
Changli Gaod8d1f302010-06-10 23:31:35 -07004895 net->ipv6.ip6_null_entry->dst.dev = dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08004896 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
4897#ifdef CONFIG_IPV6_MULTIPLE_TABLES
Changli Gaod8d1f302010-06-10 23:31:35 -07004898 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08004899 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
Changli Gaod8d1f302010-06-10 23:31:35 -07004900 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08004901 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
4902#endif
WANG Cong76da0702017-06-20 11:42:27 -07004903 } else if (event == NETDEV_UNREGISTER &&
4904 dev->reg_state != NETREG_UNREGISTERED) {
4905 /* NETDEV_UNREGISTER could be fired for multiple times by
4906 * netdev_wait_allrefs(). Make sure we only call this once.
4907 */
David Ahern421842e2018-04-17 17:33:18 -07004908 in6_dev_put_clear(&net->ipv6.fib6_null_entry->rt6i_idev);
Eric Dumazet12d94a82017-08-15 04:09:51 -07004909 in6_dev_put_clear(&net->ipv6.ip6_null_entry->rt6i_idev);
WANG Cong242d3a42017-05-08 10:12:13 -07004910#ifdef CONFIG_IPV6_MULTIPLE_TABLES
Eric Dumazet12d94a82017-08-15 04:09:51 -07004911 in6_dev_put_clear(&net->ipv6.ip6_prohibit_entry->rt6i_idev);
4912 in6_dev_put_clear(&net->ipv6.ip6_blk_hole_entry->rt6i_idev);
WANG Cong242d3a42017-05-08 10:12:13 -07004913#endif
Daniel Lezcano8ed67782008-03-04 13:48:30 -08004914 }
4915
4916 return NOTIFY_OK;
4917}
4918
Linus Torvalds1da177e2005-04-16 15:20:36 -07004919/*
4920 * /proc
4921 */
4922
4923#ifdef CONFIG_PROC_FS
4924
Alexey Dobriyan33120b32007-11-06 05:27:11 -08004925static const struct file_operations ipv6_route_proc_fops = {
Alexey Dobriyan33120b32007-11-06 05:27:11 -08004926 .open = ipv6_route_open,
4927 .read = seq_read,
4928 .llseek = seq_lseek,
Hannes Frederic Sowa8d2ca1d2013-09-21 16:55:59 +02004929 .release = seq_release_net,
Alexey Dobriyan33120b32007-11-06 05:27:11 -08004930};
4931
Linus Torvalds1da177e2005-04-16 15:20:36 -07004932static int rt6_stats_seq_show(struct seq_file *seq, void *v)
4933{
Daniel Lezcano69ddb802008-03-04 13:46:23 -08004934 struct net *net = (struct net *)seq->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004935 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
Daniel Lezcano69ddb802008-03-04 13:46:23 -08004936 net->ipv6.rt6_stats->fib_nodes,
4937 net->ipv6.rt6_stats->fib_route_nodes,
Wei Wang81eb8442017-10-06 12:06:11 -07004938 atomic_read(&net->ipv6.rt6_stats->fib_rt_alloc),
Daniel Lezcano69ddb802008-03-04 13:46:23 -08004939 net->ipv6.rt6_stats->fib_rt_entries,
4940 net->ipv6.rt6_stats->fib_rt_cache,
Eric Dumazetfc66f952010-10-08 06:37:34 +00004941 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
Daniel Lezcano69ddb802008-03-04 13:46:23 -08004942 net->ipv6.rt6_stats->fib_discarded_routes);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004943
4944 return 0;
4945}
4946
4947static int rt6_stats_seq_open(struct inode *inode, struct file *file)
4948{
Pavel Emelyanovde05c552008-07-18 04:07:21 -07004949 return single_open_net(inode, file, rt6_stats_seq_show);
Daniel Lezcano69ddb802008-03-04 13:46:23 -08004950}
4951
Arjan van de Ven9a321442007-02-12 00:55:35 -08004952static const struct file_operations rt6_stats_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07004953 .open = rt6_stats_seq_open,
4954 .read = seq_read,
4955 .llseek = seq_lseek,
Pavel Emelyanovb6fcbdb2008-07-18 04:07:44 -07004956 .release = single_release_net,
Linus Torvalds1da177e2005-04-16 15:20:36 -07004957};
4958#endif /* CONFIG_PROC_FS */
4959
4960#ifdef CONFIG_SYSCTL
4961
Linus Torvalds1da177e2005-04-16 15:20:36 -07004962static
Joe Perchesfe2c6332013-06-11 23:04:25 -07004963int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
Linus Torvalds1da177e2005-04-16 15:20:36 -07004964 void __user *buffer, size_t *lenp, loff_t *ppos)
4965{
Lucian Adrian Grijincuc486da32011-02-24 19:48:03 +00004966 struct net *net;
4967 int delay;
4968 if (!write)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004969 return -EINVAL;
Lucian Adrian Grijincuc486da32011-02-24 19:48:03 +00004970
4971 net = (struct net *)ctl->extra1;
4972 delay = net->ipv6.sysctl.flush_delay;
4973 proc_dointvec(ctl, write, buffer, lenp, ppos);
Michal Kubeček2ac3ac82013-08-01 10:04:14 +02004974 fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
Lucian Adrian Grijincuc486da32011-02-24 19:48:03 +00004975 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004976}
4977
Joe Perchesfe2c6332013-06-11 23:04:25 -07004978struct ctl_table ipv6_route_table_template[] = {
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09004979 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07004980 .procname = "flush",
Daniel Lezcano49905092008-01-10 03:01:01 -08004981 .data = &init_net.ipv6.sysctl.flush_delay,
Linus Torvalds1da177e2005-04-16 15:20:36 -07004982 .maxlen = sizeof(int),
Dave Jones89c8b3a12005-04-28 12:11:49 -07004983 .mode = 0200,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08004984 .proc_handler = ipv6_sysctl_rtcache_flush
Linus Torvalds1da177e2005-04-16 15:20:36 -07004985 },
4986 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07004987 .procname = "gc_thresh",
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -08004988 .data = &ip6_dst_ops_template.gc_thresh,
Linus Torvalds1da177e2005-04-16 15:20:36 -07004989 .maxlen = sizeof(int),
4990 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08004991 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07004992 },
4993 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07004994 .procname = "max_size",
Daniel Lezcano49905092008-01-10 03:01:01 -08004995 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
Linus Torvalds1da177e2005-04-16 15:20:36 -07004996 .maxlen = sizeof(int),
4997 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08004998 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07004999 },
5000 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07005001 .procname = "gc_min_interval",
Daniel Lezcano49905092008-01-10 03:01:01 -08005002 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07005003 .maxlen = sizeof(int),
5004 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08005005 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07005006 },
5007 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07005008 .procname = "gc_timeout",
Daniel Lezcano49905092008-01-10 03:01:01 -08005009 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
Linus Torvalds1da177e2005-04-16 15:20:36 -07005010 .maxlen = sizeof(int),
5011 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08005012 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07005013 },
5014 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07005015 .procname = "gc_interval",
Daniel Lezcano49905092008-01-10 03:01:01 -08005016 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07005017 .maxlen = sizeof(int),
5018 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08005019 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07005020 },
5021 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07005022 .procname = "gc_elasticity",
Daniel Lezcano49905092008-01-10 03:01:01 -08005023 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
Linus Torvalds1da177e2005-04-16 15:20:36 -07005024 .maxlen = sizeof(int),
5025 .mode = 0644,
Min Zhangf3d3f612010-08-14 22:42:51 -07005026 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07005027 },
5028 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07005029 .procname = "mtu_expires",
Daniel Lezcano49905092008-01-10 03:01:01 -08005030 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
Linus Torvalds1da177e2005-04-16 15:20:36 -07005031 .maxlen = sizeof(int),
5032 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08005033 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07005034 },
5035 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07005036 .procname = "min_adv_mss",
Daniel Lezcano49905092008-01-10 03:01:01 -08005037 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
Linus Torvalds1da177e2005-04-16 15:20:36 -07005038 .maxlen = sizeof(int),
5039 .mode = 0644,
Min Zhangf3d3f612010-08-14 22:42:51 -07005040 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07005041 },
5042 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07005043 .procname = "gc_min_interval_ms",
Daniel Lezcano49905092008-01-10 03:01:01 -08005044 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07005045 .maxlen = sizeof(int),
5046 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08005047 .proc_handler = proc_dointvec_ms_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07005048 },
Eric W. Biedermanf8572d82009-11-05 13:32:03 -08005049 { }
Linus Torvalds1da177e2005-04-16 15:20:36 -07005050};
5051
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00005052struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
Daniel Lezcano760f2d02008-01-10 02:53:43 -08005053{
5054 struct ctl_table *table;
5055
5056 table = kmemdup(ipv6_route_table_template,
5057 sizeof(ipv6_route_table_template),
5058 GFP_KERNEL);
YOSHIFUJI Hideaki5ee09102008-02-28 00:24:28 +09005059
5060 if (table) {
5061 table[0].data = &net->ipv6.sysctl.flush_delay;
Lucian Adrian Grijincuc486da32011-02-24 19:48:03 +00005062 table[0].extra1 = net;
Alexey Dobriyan86393e52009-08-29 01:34:49 +00005063 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
YOSHIFUJI Hideaki5ee09102008-02-28 00:24:28 +09005064 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
5065 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5066 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
5067 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
5068 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
5069 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
5070 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
Alexey Dobriyan9c69fab2009-12-18 20:11:03 -08005071 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
Eric W. Biederman464dc802012-11-16 03:02:59 +00005072
5073 /* Don't export sysctls to unprivileged users */
5074 if (net->user_ns != &init_user_ns)
5075 table[0].procname = NULL;
YOSHIFUJI Hideaki5ee09102008-02-28 00:24:28 +09005076 }
5077
Daniel Lezcano760f2d02008-01-10 02:53:43 -08005078 return table;
5079}
Linus Torvalds1da177e2005-04-16 15:20:36 -07005080#endif
5081
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00005082static int __net_init ip6_route_net_init(struct net *net)
Daniel Lezcanocdb18762008-03-04 13:45:33 -08005083{
Pavel Emelyanov633d424b2008-04-21 14:25:23 -07005084 int ret = -ENOMEM;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08005085
Alexey Dobriyan86393e52009-08-29 01:34:49 +00005086 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
5087 sizeof(net->ipv6.ip6_dst_ops));
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08005088
Eric Dumazetfc66f952010-10-08 06:37:34 +00005089 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
5090 goto out_ip6_dst_ops;
5091
David Ahern421842e2018-04-17 17:33:18 -07005092 net->ipv6.fib6_null_entry = kmemdup(&fib6_null_entry_template,
5093 sizeof(*net->ipv6.fib6_null_entry),
5094 GFP_KERNEL);
5095 if (!net->ipv6.fib6_null_entry)
5096 goto out_ip6_dst_entries;
5097
Daniel Lezcano8ed67782008-03-04 13:48:30 -08005098 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
5099 sizeof(*net->ipv6.ip6_null_entry),
5100 GFP_KERNEL);
5101 if (!net->ipv6.ip6_null_entry)
David Ahern421842e2018-04-17 17:33:18 -07005102 goto out_fib6_null_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07005103 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
David S. Miller62fa8a82011-01-26 20:51:05 -08005104 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
5105 ip6_template_metrics, true);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08005106
5107#ifdef CONFIG_IPV6_MULTIPLE_TABLES
Vincent Bernatfeca7d82017-08-08 20:23:49 +02005108 net->ipv6.fib6_has_custom_rules = false;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08005109 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
5110 sizeof(*net->ipv6.ip6_prohibit_entry),
5111 GFP_KERNEL);
Peter Zijlstra68fffc62008-10-07 14:12:10 -07005112 if (!net->ipv6.ip6_prohibit_entry)
5113 goto out_ip6_null_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07005114 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
David S. Miller62fa8a82011-01-26 20:51:05 -08005115 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
5116 ip6_template_metrics, true);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08005117
5118 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
5119 sizeof(*net->ipv6.ip6_blk_hole_entry),
5120 GFP_KERNEL);
Peter Zijlstra68fffc62008-10-07 14:12:10 -07005121 if (!net->ipv6.ip6_blk_hole_entry)
5122 goto out_ip6_prohibit_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07005123 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
David S. Miller62fa8a82011-01-26 20:51:05 -08005124 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
5125 ip6_template_metrics, true);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08005126#endif
5127
Peter Zijlstrab339a47c2008-10-07 14:15:00 -07005128 net->ipv6.sysctl.flush_delay = 0;
5129 net->ipv6.sysctl.ip6_rt_max_size = 4096;
5130 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
5131 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
5132 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
5133 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
5134 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
5135 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
5136
Benjamin Thery6891a342008-03-04 13:49:47 -08005137 net->ipv6.ip6_rt_gc_expire = 30*HZ;
5138
Daniel Lezcano8ed67782008-03-04 13:48:30 -08005139 ret = 0;
5140out:
5141 return ret;
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08005142
Peter Zijlstra68fffc62008-10-07 14:12:10 -07005143#ifdef CONFIG_IPV6_MULTIPLE_TABLES
5144out_ip6_prohibit_entry:
5145 kfree(net->ipv6.ip6_prohibit_entry);
5146out_ip6_null_entry:
5147 kfree(net->ipv6.ip6_null_entry);
5148#endif
David Ahern421842e2018-04-17 17:33:18 -07005149out_fib6_null_entry:
5150 kfree(net->ipv6.fib6_null_entry);
Eric Dumazetfc66f952010-10-08 06:37:34 +00005151out_ip6_dst_entries:
5152 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08005153out_ip6_dst_ops:
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08005154 goto out;
Daniel Lezcanocdb18762008-03-04 13:45:33 -08005155}
5156
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00005157static void __net_exit ip6_route_net_exit(struct net *net)
Daniel Lezcanocdb18762008-03-04 13:45:33 -08005158{
David Ahern421842e2018-04-17 17:33:18 -07005159 kfree(net->ipv6.fib6_null_entry);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08005160 kfree(net->ipv6.ip6_null_entry);
5161#ifdef CONFIG_IPV6_MULTIPLE_TABLES
5162 kfree(net->ipv6.ip6_prohibit_entry);
5163 kfree(net->ipv6.ip6_blk_hole_entry);
5164#endif
Xiaotian Feng41bb78b2010-11-02 16:11:05 +00005165 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
Daniel Lezcanocdb18762008-03-04 13:45:33 -08005166}
5167
Thomas Grafd1896342012-06-18 12:08:33 +00005168static int __net_init ip6_route_net_init_late(struct net *net)
5169{
5170#ifdef CONFIG_PROC_FS
Gao fengd4beaa62013-02-18 01:34:54 +00005171 proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
Joe Perchesd6444062018-03-23 15:54:38 -07005172 proc_create("rt6_stats", 0444, net->proc_net, &rt6_stats_seq_fops);
Thomas Grafd1896342012-06-18 12:08:33 +00005173#endif
5174 return 0;
5175}
5176
5177static void __net_exit ip6_route_net_exit_late(struct net *net)
5178{
5179#ifdef CONFIG_PROC_FS
Gao fengece31ff2013-02-18 01:34:56 +00005180 remove_proc_entry("ipv6_route", net->proc_net);
5181 remove_proc_entry("rt6_stats", net->proc_net);
Thomas Grafd1896342012-06-18 12:08:33 +00005182#endif
5183}
5184
Daniel Lezcanocdb18762008-03-04 13:45:33 -08005185static struct pernet_operations ip6_route_net_ops = {
5186 .init = ip6_route_net_init,
5187 .exit = ip6_route_net_exit,
5188};
5189
David S. Millerc3426b42012-06-09 16:27:05 -07005190static int __net_init ipv6_inetpeer_init(struct net *net)
5191{
5192 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
5193
5194 if (!bp)
5195 return -ENOMEM;
5196 inet_peer_base_init(bp);
5197 net->ipv6.peers = bp;
5198 return 0;
5199}
5200
5201static void __net_exit ipv6_inetpeer_exit(struct net *net)
5202{
5203 struct inet_peer_base *bp = net->ipv6.peers;
5204
5205 net->ipv6.peers = NULL;
David S. Miller56a6b242012-06-09 16:32:41 -07005206 inetpeer_invalidate_tree(bp);
David S. Millerc3426b42012-06-09 16:27:05 -07005207 kfree(bp);
5208}
5209
David S. Miller2b823f72012-06-09 19:00:16 -07005210static struct pernet_operations ipv6_inetpeer_ops = {
David S. Millerc3426b42012-06-09 16:27:05 -07005211 .init = ipv6_inetpeer_init,
5212 .exit = ipv6_inetpeer_exit,
5213};
5214
Thomas Grafd1896342012-06-18 12:08:33 +00005215static struct pernet_operations ip6_route_net_late_ops = {
5216 .init = ip6_route_net_init_late,
5217 .exit = ip6_route_net_exit_late,
5218};
5219
Daniel Lezcano8ed67782008-03-04 13:48:30 -08005220static struct notifier_block ip6_route_dev_notifier = {
5221 .notifier_call = ip6_route_dev_notify,
WANG Cong242d3a42017-05-08 10:12:13 -07005222 .priority = ADDRCONF_NOTIFY_PRIORITY - 10,
Daniel Lezcano8ed67782008-03-04 13:48:30 -08005223};
5224
WANG Cong2f460932017-05-03 22:07:31 -07005225void __init ip6_route_init_special_entries(void)
5226{
5227 /* Registering of the loopback is done before this portion of code,
5228 * the loopback reference in rt6_info will not be taken, do it
5229 * manually for init_net */
David Ahern421842e2018-04-17 17:33:18 -07005230 init_net.ipv6.fib6_null_entry->fib6_nh.nh_dev = init_net.loopback_dev;
5231 init_net.ipv6.fib6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
WANG Cong2f460932017-05-03 22:07:31 -07005232 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
5233 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5234 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
5235 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
5236 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5237 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
5238 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5239 #endif
5240}
5241
Daniel Lezcano433d49c2007-12-07 00:43:48 -08005242int __init ip6_route_init(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07005243{
Daniel Lezcano433d49c2007-12-07 00:43:48 -08005244 int ret;
Martin KaFai Lau8d0b94a2015-05-22 20:56:04 -07005245 int cpu;
Daniel Lezcano433d49c2007-12-07 00:43:48 -08005246
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -08005247 ret = -ENOMEM;
5248 ip6_dst_ops_template.kmem_cachep =
5249 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
5250 SLAB_HWCACHE_ALIGN, NULL);
5251 if (!ip6_dst_ops_template.kmem_cachep)
Fernando Carrijoc19a28e2009-01-07 18:09:08 -08005252 goto out;
David S. Miller14e50e52007-05-24 18:17:54 -07005253
Eric Dumazetfc66f952010-10-08 06:37:34 +00005254 ret = dst_entries_init(&ip6_dst_blackhole_ops);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08005255 if (ret)
Daniel Lezcanobdb32892008-03-04 13:48:10 -08005256 goto out_kmem_cache;
Daniel Lezcanobdb32892008-03-04 13:48:10 -08005257
David S. Millerc3426b42012-06-09 16:27:05 -07005258 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
5259 if (ret)
David S. Millere8803b62012-06-16 01:12:19 -07005260 goto out_dst_entries;
Thomas Graf2a0c4512012-06-14 23:00:17 +00005261
David S. Miller7e52b332012-06-15 15:51:55 -07005262 ret = register_pernet_subsys(&ip6_route_net_ops);
5263 if (ret)
5264 goto out_register_inetpeer;
David S. Millerc3426b42012-06-09 16:27:05 -07005265
Arnaud Ebalard5dc121e2008-10-01 02:37:56 -07005266 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
5267
David S. Millere8803b62012-06-16 01:12:19 -07005268 ret = fib6_init();
Daniel Lezcano433d49c2007-12-07 00:43:48 -08005269 if (ret)
Daniel Lezcano8ed67782008-03-04 13:48:30 -08005270 goto out_register_subsys;
Daniel Lezcano433d49c2007-12-07 00:43:48 -08005271
Daniel Lezcano433d49c2007-12-07 00:43:48 -08005272 ret = xfrm6_init();
5273 if (ret)
David S. Millere8803b62012-06-16 01:12:19 -07005274 goto out_fib6_init;
Daniel Lezcanoc35b7e72007-12-08 00:14:11 -08005275
Daniel Lezcano433d49c2007-12-07 00:43:48 -08005276 ret = fib6_rules_init();
5277 if (ret)
5278 goto xfrm6_init;
Daniel Lezcano7e5449c2007-12-08 00:14:54 -08005279
Thomas Grafd1896342012-06-18 12:08:33 +00005280 ret = register_pernet_subsys(&ip6_route_net_late_ops);
5281 if (ret)
5282 goto fib6_rules_init;
5283
Florian Westphal16feebc2017-12-02 21:44:08 +01005284 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWROUTE,
5285 inet6_rtm_newroute, NULL, 0);
5286 if (ret < 0)
5287 goto out_register_late_subsys;
5288
5289 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELROUTE,
5290 inet6_rtm_delroute, NULL, 0);
5291 if (ret < 0)
5292 goto out_register_late_subsys;
5293
5294 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE,
5295 inet6_rtm_getroute, NULL,
5296 RTNL_FLAG_DOIT_UNLOCKED);
5297 if (ret < 0)
Thomas Grafd1896342012-06-18 12:08:33 +00005298 goto out_register_late_subsys;
Daniel Lezcano433d49c2007-12-07 00:43:48 -08005299
Daniel Lezcano8ed67782008-03-04 13:48:30 -08005300 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
Daniel Lezcanocdb18762008-03-04 13:45:33 -08005301 if (ret)
Thomas Grafd1896342012-06-18 12:08:33 +00005302 goto out_register_late_subsys;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08005303
Martin KaFai Lau8d0b94a2015-05-22 20:56:04 -07005304 for_each_possible_cpu(cpu) {
5305 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
5306
5307 INIT_LIST_HEAD(&ul->head);
5308 spin_lock_init(&ul->lock);
5309 }
5310
Daniel Lezcano433d49c2007-12-07 00:43:48 -08005311out:
5312 return ret;
5313
Thomas Grafd1896342012-06-18 12:08:33 +00005314out_register_late_subsys:
Florian Westphal16feebc2017-12-02 21:44:08 +01005315 rtnl_unregister_all(PF_INET6);
Thomas Grafd1896342012-06-18 12:08:33 +00005316 unregister_pernet_subsys(&ip6_route_net_late_ops);
Daniel Lezcano433d49c2007-12-07 00:43:48 -08005317fib6_rules_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08005318 fib6_rules_cleanup();
5319xfrm6_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08005320 xfrm6_fini();
Thomas Graf2a0c4512012-06-14 23:00:17 +00005321out_fib6_init:
5322 fib6_gc_cleanup();
Daniel Lezcano8ed67782008-03-04 13:48:30 -08005323out_register_subsys:
5324 unregister_pernet_subsys(&ip6_route_net_ops);
David S. Miller7e52b332012-06-15 15:51:55 -07005325out_register_inetpeer:
5326 unregister_pernet_subsys(&ipv6_inetpeer_ops);
Eric Dumazetfc66f952010-10-08 06:37:34 +00005327out_dst_entries:
5328 dst_entries_destroy(&ip6_dst_blackhole_ops);
Daniel Lezcano433d49c2007-12-07 00:43:48 -08005329out_kmem_cache:
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08005330 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
Daniel Lezcano433d49c2007-12-07 00:43:48 -08005331 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005332}
5333
5334void ip6_route_cleanup(void)
5335{
Daniel Lezcano8ed67782008-03-04 13:48:30 -08005336 unregister_netdevice_notifier(&ip6_route_dev_notifier);
Thomas Grafd1896342012-06-18 12:08:33 +00005337 unregister_pernet_subsys(&ip6_route_net_late_ops);
Thomas Graf101367c2006-08-04 03:39:02 -07005338 fib6_rules_cleanup();
Linus Torvalds1da177e2005-04-16 15:20:36 -07005339 xfrm6_fini();
Linus Torvalds1da177e2005-04-16 15:20:36 -07005340 fib6_gc_cleanup();
David S. Millerc3426b42012-06-09 16:27:05 -07005341 unregister_pernet_subsys(&ipv6_inetpeer_ops);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08005342 unregister_pernet_subsys(&ip6_route_net_ops);
Xiaotian Feng41bb78b2010-11-02 16:11:05 +00005343 dst_entries_destroy(&ip6_dst_blackhole_ops);
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08005344 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005345}