blob: 78a89e61925d6ae27937098f906145d2d8c48f5d [file] [log] [blame]
Pravin B Shelarc5441932013-03-25 14:49:35 +00001/*
2 * Copyright (c) 2013 Nicira, Inc.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16 * 02110-1301, USA
17 */
18
19#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20
21#include <linux/capability.h>
22#include <linux/module.h>
23#include <linux/types.h>
24#include <linux/kernel.h>
25#include <linux/slab.h>
26#include <linux/uaccess.h>
27#include <linux/skbuff.h>
28#include <linux/netdevice.h>
29#include <linux/in.h>
30#include <linux/tcp.h>
31#include <linux/udp.h>
32#include <linux/if_arp.h>
33#include <linux/mroute.h>
34#include <linux/init.h>
35#include <linux/in6.h>
36#include <linux/inetdevice.h>
37#include <linux/igmp.h>
38#include <linux/netfilter_ipv4.h>
39#include <linux/etherdevice.h>
40#include <linux/if_ether.h>
41#include <linux/if_vlan.h>
42#include <linux/rculist.h>
Sachin Kamat27d79f32014-01-27 12:13:57 +053043#include <linux/err.h>
Pravin B Shelarc5441932013-03-25 14:49:35 +000044
45#include <net/sock.h>
46#include <net/ip.h>
47#include <net/icmp.h>
48#include <net/protocol.h>
49#include <net/ip_tunnels.h>
50#include <net/arp.h>
51#include <net/checksum.h>
52#include <net/dsfield.h>
53#include <net/inet_ecn.h>
54#include <net/xfrm.h>
55#include <net/net_namespace.h>
56#include <net/netns/generic.h>
57#include <net/rtnetlink.h>
58
59#if IS_ENABLED(CONFIG_IPV6)
60#include <net/ipv6.h>
61#include <net/ip6_fib.h>
62#include <net/ip6_route.h>
63#endif
64
Duan Jiong967680e2014-01-19 16:43:42 +080065static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
Pravin B Shelarc5441932013-03-25 14:49:35 +000066{
67 return hash_32((__force u32)key ^ (__force u32)remote,
68 IP_TNL_HASH_BITS);
69}
70
Eric Dumazet6c7e7612014-01-16 16:41:19 -080071static void __tunnel_dst_set(struct ip_tunnel_dst *idst,
72 struct dst_entry *dst)
Tom Herbert7d442fa2014-01-02 11:48:26 -080073{
74 struct dst_entry *old_dst;
75
Eric Dumazet6c7e7612014-01-16 16:41:19 -080076 if (dst) {
77 if (dst->flags & DST_NOCACHE)
78 dst = NULL;
79 else
80 dst_clone(dst);
81 }
82 old_dst = xchg((__force struct dst_entry **)&idst->dst, dst);
Tom Herbert7d442fa2014-01-02 11:48:26 -080083 dst_release(old_dst);
Tom Herbert7d442fa2014-01-02 11:48:26 -080084}
85
Eric Dumazet6c7e7612014-01-16 16:41:19 -080086static void tunnel_dst_set(struct ip_tunnel *t, struct dst_entry *dst)
Tom Herbert7d442fa2014-01-02 11:48:26 -080087{
Tom Herbert9a4aa9a2014-01-02 11:48:33 -080088 __tunnel_dst_set(this_cpu_ptr(t->dst_cache), dst);
Tom Herbert7d442fa2014-01-02 11:48:26 -080089}
90
Eric Dumazet6c7e7612014-01-16 16:41:19 -080091static void tunnel_dst_reset(struct ip_tunnel *t)
Tom Herbert7d442fa2014-01-02 11:48:26 -080092{
93 tunnel_dst_set(t, NULL);
94}
95
Nicolas Dichtelcf71d2bc2014-02-20 10:19:31 +010096void ip_tunnel_dst_reset_all(struct ip_tunnel *t)
Tom Herbert9a4aa9a2014-01-02 11:48:33 -080097{
98 int i;
99
100 for_each_possible_cpu(i)
101 __tunnel_dst_set(per_cpu_ptr(t->dst_cache, i), NULL);
102}
Nicolas Dichtelcf71d2bc2014-02-20 10:19:31 +0100103EXPORT_SYMBOL(ip_tunnel_dst_reset_all);
Tom Herbert9a4aa9a2014-01-02 11:48:33 -0800104
Eric Dumazetb045d372014-02-03 12:52:14 -0800105static struct rtable *tunnel_rtable_get(struct ip_tunnel *t, u32 cookie)
Tom Herbert7d442fa2014-01-02 11:48:26 -0800106{
107 struct dst_entry *dst;
108
109 rcu_read_lock();
Tom Herbert9a4aa9a2014-01-02 11:48:33 -0800110 dst = rcu_dereference(this_cpu_ptr(t->dst_cache)->dst);
Eric Dumazetb045d372014-02-03 12:52:14 -0800111 if (dst) {
112 if (dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
113 rcu_read_unlock();
114 tunnel_dst_reset(t);
115 return NULL;
116 }
Tom Herbert7d442fa2014-01-02 11:48:26 -0800117 dst_hold(dst);
Tom Herbert7d442fa2014-01-02 11:48:26 -0800118 }
Eric Dumazetb045d372014-02-03 12:52:14 -0800119 rcu_read_unlock();
120 return (struct rtable *)dst;
Tom Herbert7d442fa2014-01-02 11:48:26 -0800121}
122
Pravin B Shelarc5441932013-03-25 14:49:35 +0000123static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
124 __be16 flags, __be32 key)
125{
126 if (p->i_flags & TUNNEL_KEY) {
127 if (flags & TUNNEL_KEY)
128 return key == p->i_key;
129 else
130 /* key expected, none present */
131 return false;
132 } else
133 return !(flags & TUNNEL_KEY);
134}
135
136/* Fallback tunnel: no source, no destination, no key, no options
137
138 Tunnel hash table:
139 We require exact key match i.e. if a key is present in packet
140 it will match only tunnel with the same key; if it is not present,
141 it will match only keyless tunnel.
142
143 All keysless packets, if not matched configured keyless tunnels
144 will match fallback tunnel.
145 Given src, dst and key, find appropriate for input tunnel.
146*/
147struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
148 int link, __be16 flags,
149 __be32 remote, __be32 local,
150 __be32 key)
151{
152 unsigned int hash;
153 struct ip_tunnel *t, *cand = NULL;
154 struct hlist_head *head;
155
Duan Jiong967680e2014-01-19 16:43:42 +0800156 hash = ip_tunnel_hash(key, remote);
Pravin B Shelarc5441932013-03-25 14:49:35 +0000157 head = &itn->tunnels[hash];
158
159 hlist_for_each_entry_rcu(t, head, hash_node) {
160 if (local != t->parms.iph.saddr ||
161 remote != t->parms.iph.daddr ||
162 !(t->dev->flags & IFF_UP))
163 continue;
164
165 if (!ip_tunnel_key_match(&t->parms, flags, key))
166 continue;
167
168 if (t->parms.link == link)
169 return t;
170 else
171 cand = t;
172 }
173
174 hlist_for_each_entry_rcu(t, head, hash_node) {
175 if (remote != t->parms.iph.daddr ||
176 !(t->dev->flags & IFF_UP))
177 continue;
178
179 if (!ip_tunnel_key_match(&t->parms, flags, key))
180 continue;
181
182 if (t->parms.link == link)
183 return t;
184 else if (!cand)
185 cand = t;
186 }
187
Duan Jiong967680e2014-01-19 16:43:42 +0800188 hash = ip_tunnel_hash(key, 0);
Pravin B Shelarc5441932013-03-25 14:49:35 +0000189 head = &itn->tunnels[hash];
190
191 hlist_for_each_entry_rcu(t, head, hash_node) {
192 if ((local != t->parms.iph.saddr &&
193 (local != t->parms.iph.daddr ||
194 !ipv4_is_multicast(local))) ||
195 !(t->dev->flags & IFF_UP))
196 continue;
197
198 if (!ip_tunnel_key_match(&t->parms, flags, key))
199 continue;
200
201 if (t->parms.link == link)
202 return t;
203 else if (!cand)
204 cand = t;
205 }
206
207 if (flags & TUNNEL_NO_KEY)
208 goto skip_key_lookup;
209
210 hlist_for_each_entry_rcu(t, head, hash_node) {
211 if (t->parms.i_key != key ||
212 !(t->dev->flags & IFF_UP))
213 continue;
214
215 if (t->parms.link == link)
216 return t;
217 else if (!cand)
218 cand = t;
219 }
220
221skip_key_lookup:
222 if (cand)
223 return cand;
224
225 if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
226 return netdev_priv(itn->fb_tunnel_dev);
227
228
229 return NULL;
230}
231EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
232
233static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
234 struct ip_tunnel_parm *parms)
235{
236 unsigned int h;
237 __be32 remote;
238
239 if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
240 remote = parms->iph.daddr;
241 else
242 remote = 0;
243
Duan Jiong967680e2014-01-19 16:43:42 +0800244 h = ip_tunnel_hash(parms->i_key, remote);
Pravin B Shelarc5441932013-03-25 14:49:35 +0000245 return &itn->tunnels[h];
246}
247
248static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
249{
250 struct hlist_head *head = ip_bucket(itn, &t->parms);
251
252 hlist_add_head_rcu(&t->hash_node, head);
253}
254
255static void ip_tunnel_del(struct ip_tunnel *t)
256{
257 hlist_del_init_rcu(&t->hash_node);
258}
259
260static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
261 struct ip_tunnel_parm *parms,
262 int type)
263{
264 __be32 remote = parms->iph.daddr;
265 __be32 local = parms->iph.saddr;
266 __be32 key = parms->i_key;
267 int link = parms->link;
268 struct ip_tunnel *t = NULL;
269 struct hlist_head *head = ip_bucket(itn, parms);
270
271 hlist_for_each_entry_rcu(t, head, hash_node) {
272 if (local == t->parms.iph.saddr &&
273 remote == t->parms.iph.daddr &&
274 key == t->parms.i_key &&
275 link == t->parms.link &&
276 type == t->dev->type)
277 break;
278 }
279 return t;
280}
281
282static struct net_device *__ip_tunnel_create(struct net *net,
283 const struct rtnl_link_ops *ops,
284 struct ip_tunnel_parm *parms)
285{
286 int err;
287 struct ip_tunnel *tunnel;
288 struct net_device *dev;
289 char name[IFNAMSIZ];
290
291 if (parms->name[0])
292 strlcpy(name, parms->name, IFNAMSIZ);
293 else {
Pravin B Shelar54a5d382013-03-28 08:21:46 +0000294 if (strlen(ops->kind) > (IFNAMSIZ - 3)) {
Pravin B Shelarc5441932013-03-25 14:49:35 +0000295 err = -E2BIG;
296 goto failed;
297 }
298 strlcpy(name, ops->kind, IFNAMSIZ);
299 strncat(name, "%d", 2);
300 }
301
302 ASSERT_RTNL();
303 dev = alloc_netdev(ops->priv_size, name, ops->setup);
304 if (!dev) {
305 err = -ENOMEM;
306 goto failed;
307 }
308 dev_net_set(dev, net);
309
310 dev->rtnl_link_ops = ops;
311
312 tunnel = netdev_priv(dev);
313 tunnel->parms = *parms;
Nicolas Dichtel5e6700b2013-06-26 16:11:28 +0200314 tunnel->net = net;
Pravin B Shelarc5441932013-03-25 14:49:35 +0000315
316 err = register_netdevice(dev);
317 if (err)
318 goto failed_free;
319
320 return dev;
321
322failed_free:
323 free_netdev(dev);
324failed:
325 return ERR_PTR(err);
326}
327
Tom Herbert7d442fa2014-01-02 11:48:26 -0800328static inline void init_tunnel_flow(struct flowi4 *fl4,
329 int proto,
330 __be32 daddr, __be32 saddr,
331 __be32 key, __u8 tos, int oif)
Pravin B Shelarc5441932013-03-25 14:49:35 +0000332{
333 memset(fl4, 0, sizeof(*fl4));
334 fl4->flowi4_oif = oif;
335 fl4->daddr = daddr;
336 fl4->saddr = saddr;
337 fl4->flowi4_tos = tos;
338 fl4->flowi4_proto = proto;
339 fl4->fl4_gre_key = key;
Pravin B Shelarc5441932013-03-25 14:49:35 +0000340}
341
342static int ip_tunnel_bind_dev(struct net_device *dev)
343{
344 struct net_device *tdev = NULL;
345 struct ip_tunnel *tunnel = netdev_priv(dev);
346 const struct iphdr *iph;
347 int hlen = LL_MAX_HEADER;
348 int mtu = ETH_DATA_LEN;
349 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
350
351 iph = &tunnel->parms.iph;
352
353 /* Guess output device to choose reasonable mtu and needed_headroom */
354 if (iph->daddr) {
355 struct flowi4 fl4;
356 struct rtable *rt;
357
Tom Herbert7d442fa2014-01-02 11:48:26 -0800358 init_tunnel_flow(&fl4, iph->protocol, iph->daddr,
359 iph->saddr, tunnel->parms.o_key,
360 RT_TOS(iph->tos), tunnel->parms.link);
361 rt = ip_route_output_key(tunnel->net, &fl4);
362
Pravin B Shelarc5441932013-03-25 14:49:35 +0000363 if (!IS_ERR(rt)) {
364 tdev = rt->dst.dev;
Eric Dumazet6c7e7612014-01-16 16:41:19 -0800365 tunnel_dst_set(tunnel, &rt->dst);
Pravin B Shelarc5441932013-03-25 14:49:35 +0000366 ip_rt_put(rt);
367 }
368 if (dev->type != ARPHRD_ETHER)
369 dev->flags |= IFF_POINTOPOINT;
370 }
371
372 if (!tdev && tunnel->parms.link)
Nicolas Dichtel6c742e72013-08-13 17:51:11 +0200373 tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
Pravin B Shelarc5441932013-03-25 14:49:35 +0000374
375 if (tdev) {
376 hlen = tdev->hard_header_len + tdev->needed_headroom;
377 mtu = tdev->mtu;
378 }
379 dev->iflink = tunnel->parms.link;
380
381 dev->needed_headroom = t_hlen + hlen;
382 mtu -= (dev->hard_header_len + t_hlen);
383
384 if (mtu < 68)
385 mtu = 68;
386
387 return mtu;
388}
389
390static struct ip_tunnel *ip_tunnel_create(struct net *net,
391 struct ip_tunnel_net *itn,
392 struct ip_tunnel_parm *parms)
393{
394 struct ip_tunnel *nt, *fbt;
395 struct net_device *dev;
396
397 BUG_ON(!itn->fb_tunnel_dev);
398 fbt = netdev_priv(itn->fb_tunnel_dev);
399 dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms);
400 if (IS_ERR(dev))
401 return NULL;
402
403 dev->mtu = ip_tunnel_bind_dev(dev);
404
405 nt = netdev_priv(dev);
406 ip_tunnel_add(itn, nt);
407 return nt;
408}
409
410int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
411 const struct tnl_ptk_info *tpi, bool log_ecn_error)
412{
Li RongQing8f849852014-01-04 13:57:59 +0800413 struct pcpu_sw_netstats *tstats;
Pravin B Shelarc5441932013-03-25 14:49:35 +0000414 const struct iphdr *iph = ip_hdr(skb);
415 int err;
416
Pravin B Shelarc5441932013-03-25 14:49:35 +0000417#ifdef CONFIG_NET_IPGRE_BROADCAST
418 if (ipv4_is_multicast(iph->daddr)) {
419 /* Looped back packet, drop it! */
420 if (rt_is_output_route(skb_rtable(skb)))
421 goto drop;
422 tunnel->dev->stats.multicast++;
423 skb->pkt_type = PACKET_BROADCAST;
424 }
425#endif
426
427 if ((!(tpi->flags&TUNNEL_CSUM) && (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
428 ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
429 tunnel->dev->stats.rx_crc_errors++;
430 tunnel->dev->stats.rx_errors++;
431 goto drop;
432 }
433
434 if (tunnel->parms.i_flags&TUNNEL_SEQ) {
435 if (!(tpi->flags&TUNNEL_SEQ) ||
436 (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
437 tunnel->dev->stats.rx_fifo_errors++;
438 tunnel->dev->stats.rx_errors++;
439 goto drop;
440 }
441 tunnel->i_seqno = ntohl(tpi->seq) + 1;
442 }
443
Pravin B Shelarc5441932013-03-25 14:49:35 +0000444 err = IP_ECN_decapsulate(iph, skb);
445 if (unlikely(err)) {
446 if (log_ecn_error)
447 net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
448 &iph->saddr, iph->tos);
449 if (err > 1) {
450 ++tunnel->dev->stats.rx_frame_errors;
451 ++tunnel->dev->stats.rx_errors;
452 goto drop;
453 }
454 }
455
456 tstats = this_cpu_ptr(tunnel->dev->tstats);
457 u64_stats_update_begin(&tstats->syncp);
458 tstats->rx_packets++;
459 tstats->rx_bytes += skb->len;
460 u64_stats_update_end(&tstats->syncp);
461
Alexei Starovoitov81b9eab2013-11-12 14:39:13 -0800462 skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
463
Pravin B Shelar3d7b46c2013-06-17 17:50:02 -0700464 if (tunnel->dev->type == ARPHRD_ETHER) {
465 skb->protocol = eth_type_trans(skb, tunnel->dev);
466 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
467 } else {
468 skb->dev = tunnel->dev;
469 }
Nicolas Dichtel64261f22013-08-13 17:51:09 +0200470
Pravin B Shelarc5441932013-03-25 14:49:35 +0000471 gro_cells_receive(&tunnel->gro_cells, skb);
472 return 0;
473
474drop:
475 kfree_skb(skb);
476 return 0;
477}
478EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
479
Pravin B Shelar23a36472013-07-02 10:57:33 -0700480static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
481 struct rtable *rt, __be16 df)
482{
483 struct ip_tunnel *tunnel = netdev_priv(dev);
Alexander Duyck8c91e162013-07-11 13:12:22 -0700484 int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len;
Pravin B Shelar23a36472013-07-02 10:57:33 -0700485 int mtu;
486
487 if (df)
488 mtu = dst_mtu(&rt->dst) - dev->hard_header_len
489 - sizeof(struct iphdr) - tunnel->hlen;
490 else
491 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
492
493 if (skb_dst(skb))
494 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
495
496 if (skb->protocol == htons(ETH_P_IP)) {
497 if (!skb_is_gso(skb) &&
498 (df & htons(IP_DF)) && mtu < pkt_size) {
499 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
500 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
501 return -E2BIG;
502 }
503 }
504#if IS_ENABLED(CONFIG_IPV6)
505 else if (skb->protocol == htons(ETH_P_IPV6)) {
506 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
507
508 if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
509 mtu >= IPV6_MIN_MTU) {
510 if ((tunnel->parms.iph.daddr &&
511 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
512 rt6->rt6i_dst.plen == 128) {
513 rt6->rt6i_flags |= RTF_MODIFIED;
514 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
515 }
516 }
517
518 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
519 mtu < pkt_size) {
520 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
521 return -E2BIG;
522 }
523 }
524#endif
525 return 0;
526}
527
Pravin B Shelarc5441932013-03-25 14:49:35 +0000528void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
Nicolas Dichtelbf3d6a82013-05-27 23:48:15 +0000529 const struct iphdr *tnl_params, const u8 protocol)
Pravin B Shelarc5441932013-03-25 14:49:35 +0000530{
531 struct ip_tunnel *tunnel = netdev_priv(dev);
532 const struct iphdr *inner_iph;
Pravin B Shelarc5441932013-03-25 14:49:35 +0000533 struct flowi4 fl4;
534 u8 tos, ttl;
535 __be16 df;
Eric Dumazetb045d372014-02-03 12:52:14 -0800536 struct rtable *rt; /* Route to the other host */
Pravin B Shelarc5441932013-03-25 14:49:35 +0000537 unsigned int max_headroom; /* The extra header space needed */
538 __be32 dst;
Pravin B Shelar0e6fbc52013-06-17 17:49:56 -0700539 int err;
Tom Herbert7d442fa2014-01-02 11:48:26 -0800540 bool connected = true;
Pravin B Shelarc5441932013-03-25 14:49:35 +0000541
542 inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
543
544 dst = tnl_params->daddr;
545 if (dst == 0) {
546 /* NBMA tunnel */
547
548 if (skb_dst(skb) == NULL) {
549 dev->stats.tx_fifo_errors++;
550 goto tx_error;
551 }
552
553 if (skb->protocol == htons(ETH_P_IP)) {
554 rt = skb_rtable(skb);
555 dst = rt_nexthop(rt, inner_iph->daddr);
556 }
557#if IS_ENABLED(CONFIG_IPV6)
558 else if (skb->protocol == htons(ETH_P_IPV6)) {
559 const struct in6_addr *addr6;
560 struct neighbour *neigh;
561 bool do_tx_error_icmp;
562 int addr_type;
563
564 neigh = dst_neigh_lookup(skb_dst(skb),
565 &ipv6_hdr(skb)->daddr);
566 if (neigh == NULL)
567 goto tx_error;
568
569 addr6 = (const struct in6_addr *)&neigh->primary_key;
570 addr_type = ipv6_addr_type(addr6);
571
572 if (addr_type == IPV6_ADDR_ANY) {
573 addr6 = &ipv6_hdr(skb)->daddr;
574 addr_type = ipv6_addr_type(addr6);
575 }
576
577 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
578 do_tx_error_icmp = true;
579 else {
580 do_tx_error_icmp = false;
581 dst = addr6->s6_addr32[3];
582 }
583 neigh_release(neigh);
584 if (do_tx_error_icmp)
585 goto tx_error_icmp;
586 }
587#endif
588 else
589 goto tx_error;
Tom Herbert7d442fa2014-01-02 11:48:26 -0800590
591 connected = false;
Pravin B Shelarc5441932013-03-25 14:49:35 +0000592 }
593
594 tos = tnl_params->tos;
595 if (tos & 0x1) {
596 tos &= ~0x1;
Tom Herbert7d442fa2014-01-02 11:48:26 -0800597 if (skb->protocol == htons(ETH_P_IP)) {
Pravin B Shelarc5441932013-03-25 14:49:35 +0000598 tos = inner_iph->tos;
Tom Herbert7d442fa2014-01-02 11:48:26 -0800599 connected = false;
600 } else if (skb->protocol == htons(ETH_P_IPV6)) {
Pravin B Shelarc5441932013-03-25 14:49:35 +0000601 tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
Tom Herbert7d442fa2014-01-02 11:48:26 -0800602 connected = false;
603 }
Pravin B Shelarc5441932013-03-25 14:49:35 +0000604 }
605
Tom Herbert7d442fa2014-01-02 11:48:26 -0800606 init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
607 tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link);
608
Eric Dumazetb045d372014-02-03 12:52:14 -0800609 rt = connected ? tunnel_rtable_get(tunnel, 0) : NULL;
Tom Herbert7d442fa2014-01-02 11:48:26 -0800610
611 if (!rt) {
612 rt = ip_route_output_key(tunnel->net, &fl4);
613
614 if (IS_ERR(rt)) {
615 dev->stats.tx_carrier_errors++;
616 goto tx_error;
617 }
618 if (connected)
Eric Dumazet6c7e7612014-01-16 16:41:19 -0800619 tunnel_dst_set(tunnel, &rt->dst);
Pravin B Shelarc5441932013-03-25 14:49:35 +0000620 }
Tom Herbert7d442fa2014-01-02 11:48:26 -0800621
Pravin B Shelar0e6fbc52013-06-17 17:49:56 -0700622 if (rt->dst.dev == dev) {
Pravin B Shelarc5441932013-03-25 14:49:35 +0000623 ip_rt_put(rt);
624 dev->stats.collisions++;
625 goto tx_error;
626 }
Pravin B Shelarc5441932013-03-25 14:49:35 +0000627
Pravin B Shelar23a36472013-07-02 10:57:33 -0700628 if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off)) {
629 ip_rt_put(rt);
630 goto tx_error;
Pravin B Shelarc5441932013-03-25 14:49:35 +0000631 }
Pravin B Shelarc5441932013-03-25 14:49:35 +0000632
633 if (tunnel->err_count > 0) {
634 if (time_before(jiffies,
635 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
636 tunnel->err_count--;
637
Duan Jiong11c21a32014-01-23 14:00:25 +0800638 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
Pravin B Shelarc5441932013-03-25 14:49:35 +0000639 dst_link_failure(skb);
640 } else
641 tunnel->err_count = 0;
642 }
643
Pravin B Shelard4a71b12013-09-25 09:57:47 -0700644 tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
Pravin B Shelarc5441932013-03-25 14:49:35 +0000645 ttl = tnl_params->ttl;
646 if (ttl == 0) {
647 if (skb->protocol == htons(ETH_P_IP))
648 ttl = inner_iph->ttl;
649#if IS_ENABLED(CONFIG_IPV6)
650 else if (skb->protocol == htons(ETH_P_IPV6))
651 ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
652#endif
653 else
654 ttl = ip4_dst_hoplimit(&rt->dst);
655 }
656
Pravin B Shelar23a36472013-07-02 10:57:33 -0700657 df = tnl_params->frag_off;
658 if (skb->protocol == htons(ETH_P_IP))
659 df |= (inner_iph->frag_off&htons(IP_DF));
660
Pravin B Shelar0e6fbc52013-06-17 17:49:56 -0700661 max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
662 + rt->dst.header_len;
Steffen Klassert3e08f4a2013-10-01 11:33:59 +0200663 if (max_headroom > dev->needed_headroom)
Pravin B Shelarc5441932013-03-25 14:49:35 +0000664 dev->needed_headroom = max_headroom;
Steffen Klassert3e08f4a2013-10-01 11:33:59 +0200665
666 if (skb_cow_head(skb, dev->needed_headroom)) {
667 dev->stats.tx_dropped++;
Eric Dumazet3acfa1e2014-01-18 18:27:49 -0800668 kfree_skb(skb);
Steffen Klassert3e08f4a2013-10-01 11:33:59 +0200669 return;
Pravin B Shelarc5441932013-03-25 14:49:35 +0000670 }
671
Nicolas Dichtel8b7ed2d2013-09-02 15:34:54 +0200672 err = iptunnel_xmit(rt, skb, fl4.saddr, fl4.daddr, protocol,
Pravin B Shelard4a71b12013-09-25 09:57:47 -0700673 tos, ttl, df, !net_eq(tunnel->net, dev_net(dev)));
Pravin B Shelar0e6fbc52013-06-17 17:49:56 -0700674 iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
Pravin B Shelarc5441932013-03-25 14:49:35 +0000675
Pravin B Shelarc5441932013-03-25 14:49:35 +0000676 return;
677
678#if IS_ENABLED(CONFIG_IPV6)
679tx_error_icmp:
680 dst_link_failure(skb);
681#endif
682tx_error:
683 dev->stats.tx_errors++;
Eric Dumazet3acfa1e2014-01-18 18:27:49 -0800684 kfree_skb(skb);
Pravin B Shelarc5441932013-03-25 14:49:35 +0000685}
686EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
687
688static void ip_tunnel_update(struct ip_tunnel_net *itn,
689 struct ip_tunnel *t,
690 struct net_device *dev,
691 struct ip_tunnel_parm *p,
692 bool set_mtu)
693{
694 ip_tunnel_del(t);
695 t->parms.iph.saddr = p->iph.saddr;
696 t->parms.iph.daddr = p->iph.daddr;
697 t->parms.i_key = p->i_key;
698 t->parms.o_key = p->o_key;
699 if (dev->type != ARPHRD_ETHER) {
700 memcpy(dev->dev_addr, &p->iph.saddr, 4);
701 memcpy(dev->broadcast, &p->iph.daddr, 4);
702 }
703 ip_tunnel_add(itn, t);
704
705 t->parms.iph.ttl = p->iph.ttl;
706 t->parms.iph.tos = p->iph.tos;
707 t->parms.iph.frag_off = p->iph.frag_off;
708
709 if (t->parms.link != p->link) {
710 int mtu;
711
712 t->parms.link = p->link;
713 mtu = ip_tunnel_bind_dev(dev);
714 if (set_mtu)
715 dev->mtu = mtu;
716 }
Nicolas Dichtelcf71d2bc2014-02-20 10:19:31 +0100717 ip_tunnel_dst_reset_all(t);
Pravin B Shelarc5441932013-03-25 14:49:35 +0000718 netdev_state_change(dev);
719}
720
721int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
722{
723 int err = 0;
724 struct ip_tunnel *t;
725 struct net *net = dev_net(dev);
726 struct ip_tunnel *tunnel = netdev_priv(dev);
727 struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
728
729 BUG_ON(!itn->fb_tunnel_dev);
730 switch (cmd) {
731 case SIOCGETTUNNEL:
732 t = NULL;
733 if (dev == itn->fb_tunnel_dev)
734 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
735 if (t == NULL)
736 t = netdev_priv(dev);
737 memcpy(p, &t->parms, sizeof(*p));
738 break;
739
740 case SIOCADDTUNNEL:
741 case SIOCCHGTUNNEL:
742 err = -EPERM;
743 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
744 goto done;
745 if (p->iph.ttl)
746 p->iph.frag_off |= htons(IP_DF);
747 if (!(p->i_flags&TUNNEL_KEY))
748 p->i_key = 0;
749 if (!(p->o_flags&TUNNEL_KEY))
750 p->o_key = 0;
751
752 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
753
754 if (!t && (cmd == SIOCADDTUNNEL))
755 t = ip_tunnel_create(net, itn, p);
756
757 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
758 if (t != NULL) {
759 if (t->dev != dev) {
760 err = -EEXIST;
761 break;
762 }
763 } else {
764 unsigned int nflags = 0;
765
766 if (ipv4_is_multicast(p->iph.daddr))
767 nflags = IFF_BROADCAST;
768 else if (p->iph.daddr)
769 nflags = IFF_POINTOPOINT;
770
771 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
772 err = -EINVAL;
773 break;
774 }
775
776 t = netdev_priv(dev);
777 }
778 }
779
780 if (t) {
781 err = 0;
782 ip_tunnel_update(itn, t, dev, p, true);
783 } else
784 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
785 break;
786
787 case SIOCDELTUNNEL:
788 err = -EPERM;
789 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
790 goto done;
791
792 if (dev == itn->fb_tunnel_dev) {
793 err = -ENOENT;
794 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
795 if (t == NULL)
796 goto done;
797 err = -EPERM;
798 if (t == netdev_priv(itn->fb_tunnel_dev))
799 goto done;
800 dev = t->dev;
801 }
802 unregister_netdevice(dev);
803 err = 0;
804 break;
805
806 default:
807 err = -EINVAL;
808 }
809
810done:
811 return err;
812}
813EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
814
815int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
816{
817 struct ip_tunnel *tunnel = netdev_priv(dev);
818 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
819
820 if (new_mtu < 68 ||
821 new_mtu > 0xFFF8 - dev->hard_header_len - t_hlen)
822 return -EINVAL;
823 dev->mtu = new_mtu;
824 return 0;
825}
826EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
827
828static void ip_tunnel_dev_free(struct net_device *dev)
829{
830 struct ip_tunnel *tunnel = netdev_priv(dev);
831
832 gro_cells_destroy(&tunnel->gro_cells);
Tom Herbert9a4aa9a2014-01-02 11:48:33 -0800833 free_percpu(tunnel->dst_cache);
Pravin B Shelarc5441932013-03-25 14:49:35 +0000834 free_percpu(dev->tstats);
835 free_netdev(dev);
836}
837
838void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
839{
Pravin B Shelarc5441932013-03-25 14:49:35 +0000840 struct ip_tunnel *tunnel = netdev_priv(dev);
841 struct ip_tunnel_net *itn;
842
Nicolas Dichtel6c742e72013-08-13 17:51:11 +0200843 itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
Pravin B Shelarc5441932013-03-25 14:49:35 +0000844
845 if (itn->fb_tunnel_dev != dev) {
846 ip_tunnel_del(netdev_priv(dev));
847 unregister_netdevice_queue(dev, head);
848 }
849}
850EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
851
Eric Dumazetd3b6f612013-06-07 13:26:05 -0700852int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
Pravin B Shelarc5441932013-03-25 14:49:35 +0000853 struct rtnl_link_ops *ops, char *devname)
854{
855 struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
856 struct ip_tunnel_parm parms;
stephen hemminger6261d982013-08-05 22:51:37 -0700857 unsigned int i;
Pravin B Shelarc5441932013-03-25 14:49:35 +0000858
stephen hemminger6261d982013-08-05 22:51:37 -0700859 for (i = 0; i < IP_TNL_HASH_SIZE; i++)
860 INIT_HLIST_HEAD(&itn->tunnels[i]);
Pravin B Shelarc5441932013-03-25 14:49:35 +0000861
862 if (!ops) {
863 itn->fb_tunnel_dev = NULL;
864 return 0;
865 }
stephen hemminger6261d982013-08-05 22:51:37 -0700866
Pravin B Shelarc5441932013-03-25 14:49:35 +0000867 memset(&parms, 0, sizeof(parms));
868 if (devname)
869 strlcpy(parms.name, devname, IFNAMSIZ);
870
871 rtnl_lock();
872 itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
Dan Carpenterea857f22013-08-19 10:05:10 +0300873 /* FB netdevice is special: we have one, and only one per netns.
874 * Allowing to move it to another netns is clearly unsafe.
875 */
Steffen Klassert67013282013-10-01 11:34:48 +0200876 if (!IS_ERR(itn->fb_tunnel_dev)) {
Dan Carpenterb4de77a2013-08-23 11:15:37 +0300877 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
Steffen Klassert67013282013-10-01 11:34:48 +0200878 ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
879 }
Dan Carpenterb4de77a2013-08-23 11:15:37 +0300880 rtnl_unlock();
Pravin B Shelarc5441932013-03-25 14:49:35 +0000881
Sachin Kamat27d79f32014-01-27 12:13:57 +0530882 return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev);
Pravin B Shelarc5441932013-03-25 14:49:35 +0000883}
884EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
885
Nicolas Dichtel6c742e72013-08-13 17:51:11 +0200886static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head,
887 struct rtnl_link_ops *ops)
Pravin B Shelarc5441932013-03-25 14:49:35 +0000888{
Nicolas Dichtel6c742e72013-08-13 17:51:11 +0200889 struct net *net = dev_net(itn->fb_tunnel_dev);
890 struct net_device *dev, *aux;
Pravin B Shelarc5441932013-03-25 14:49:35 +0000891 int h;
892
Nicolas Dichtel6c742e72013-08-13 17:51:11 +0200893 for_each_netdev_safe(net, dev, aux)
894 if (dev->rtnl_link_ops == ops)
895 unregister_netdevice_queue(dev, head);
896
Pravin B Shelarc5441932013-03-25 14:49:35 +0000897 for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
898 struct ip_tunnel *t;
899 struct hlist_node *n;
900 struct hlist_head *thead = &itn->tunnels[h];
901
902 hlist_for_each_entry_safe(t, n, thead, hash_node)
Nicolas Dichtel6c742e72013-08-13 17:51:11 +0200903 /* If dev is in the same netns, it has already
904 * been added to the list by the previous loop.
905 */
906 if (!net_eq(dev_net(t->dev), net))
907 unregister_netdevice_queue(t->dev, head);
Pravin B Shelarc5441932013-03-25 14:49:35 +0000908 }
Pravin B Shelarc5441932013-03-25 14:49:35 +0000909}
910
Nicolas Dichtel6c742e72013-08-13 17:51:11 +0200911void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops)
Pravin B Shelarc5441932013-03-25 14:49:35 +0000912{
913 LIST_HEAD(list);
914
915 rtnl_lock();
Nicolas Dichtel6c742e72013-08-13 17:51:11 +0200916 ip_tunnel_destroy(itn, &list, ops);
Pravin B Shelarc5441932013-03-25 14:49:35 +0000917 unregister_netdevice_many(&list);
918 rtnl_unlock();
Pravin B Shelarc5441932013-03-25 14:49:35 +0000919}
920EXPORT_SYMBOL_GPL(ip_tunnel_delete_net);
921
922int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
923 struct ip_tunnel_parm *p)
924{
925 struct ip_tunnel *nt;
926 struct net *net = dev_net(dev);
927 struct ip_tunnel_net *itn;
928 int mtu;
929 int err;
930
931 nt = netdev_priv(dev);
932 itn = net_generic(net, nt->ip_tnl_net_id);
933
934 if (ip_tunnel_find(itn, p, dev->type))
935 return -EEXIST;
936
Nicolas Dichtel5e6700b2013-06-26 16:11:28 +0200937 nt->net = net;
Pravin B Shelarc5441932013-03-25 14:49:35 +0000938 nt->parms = *p;
939 err = register_netdevice(dev);
940 if (err)
941 goto out;
942
943 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
944 eth_hw_addr_random(dev);
945
946 mtu = ip_tunnel_bind_dev(dev);
947 if (!tb[IFLA_MTU])
948 dev->mtu = mtu;
949
950 ip_tunnel_add(itn, nt);
951
952out:
953 return err;
954}
955EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
956
957int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
958 struct ip_tunnel_parm *p)
959{
Nicolas Dichtel6c742e72013-08-13 17:51:11 +0200960 struct ip_tunnel *t;
Pravin B Shelarc5441932013-03-25 14:49:35 +0000961 struct ip_tunnel *tunnel = netdev_priv(dev);
Nicolas Dichtel6c742e72013-08-13 17:51:11 +0200962 struct net *net = tunnel->net;
Pravin B Shelarc5441932013-03-25 14:49:35 +0000963 struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
964
965 if (dev == itn->fb_tunnel_dev)
966 return -EINVAL;
967
Pravin B Shelarc5441932013-03-25 14:49:35 +0000968 t = ip_tunnel_find(itn, p, dev->type);
969
970 if (t) {
971 if (t->dev != dev)
972 return -EEXIST;
973 } else {
Nicolas Dichtel6c742e72013-08-13 17:51:11 +0200974 t = tunnel;
Pravin B Shelarc5441932013-03-25 14:49:35 +0000975
976 if (dev->type != ARPHRD_ETHER) {
977 unsigned int nflags = 0;
978
979 if (ipv4_is_multicast(p->iph.daddr))
980 nflags = IFF_BROADCAST;
981 else if (p->iph.daddr)
982 nflags = IFF_POINTOPOINT;
983
984 if ((dev->flags ^ nflags) &
985 (IFF_POINTOPOINT | IFF_BROADCAST))
986 return -EINVAL;
987 }
988 }
989
990 ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU]);
991 return 0;
992}
993EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
994
995int ip_tunnel_init(struct net_device *dev)
996{
997 struct ip_tunnel *tunnel = netdev_priv(dev);
998 struct iphdr *iph = &tunnel->parms.iph;
John Stultz827da442013-10-07 15:51:58 -0700999 int i, err;
Pravin B Shelarc5441932013-03-25 14:49:35 +00001000
1001 dev->destructor = ip_tunnel_dev_free;
Li RongQing8f849852014-01-04 13:57:59 +08001002 dev->tstats = alloc_percpu(struct pcpu_sw_netstats);
Pravin B Shelarc5441932013-03-25 14:49:35 +00001003 if (!dev->tstats)
1004 return -ENOMEM;
1005
John Stultz827da442013-10-07 15:51:58 -07001006 for_each_possible_cpu(i) {
Li RongQing8f849852014-01-04 13:57:59 +08001007 struct pcpu_sw_netstats *ipt_stats;
John Stultz827da442013-10-07 15:51:58 -07001008 ipt_stats = per_cpu_ptr(dev->tstats, i);
1009 u64_stats_init(&ipt_stats->syncp);
1010 }
1011
Tom Herbert9a4aa9a2014-01-02 11:48:33 -08001012 tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst);
1013 if (!tunnel->dst_cache) {
1014 free_percpu(dev->tstats);
1015 return -ENOMEM;
1016 }
1017
Pravin B Shelarc5441932013-03-25 14:49:35 +00001018 err = gro_cells_init(&tunnel->gro_cells, dev);
1019 if (err) {
Tom Herbert9a4aa9a2014-01-02 11:48:33 -08001020 free_percpu(tunnel->dst_cache);
Pravin B Shelarc5441932013-03-25 14:49:35 +00001021 free_percpu(dev->tstats);
1022 return err;
1023 }
1024
1025 tunnel->dev = dev;
Nicolas Dichtel6c742e72013-08-13 17:51:11 +02001026 tunnel->net = dev_net(dev);
Pravin B Shelarc5441932013-03-25 14:49:35 +00001027 strcpy(tunnel->parms.name, dev->name);
1028 iph->version = 4;
1029 iph->ihl = 5;
1030
1031 return 0;
1032}
1033EXPORT_SYMBOL_GPL(ip_tunnel_init);
1034
1035void ip_tunnel_uninit(struct net_device *dev)
1036{
Pravin B Shelarc5441932013-03-25 14:49:35 +00001037 struct ip_tunnel *tunnel = netdev_priv(dev);
Nicolas Dichtel6c742e72013-08-13 17:51:11 +02001038 struct net *net = tunnel->net;
Pravin B Shelarc5441932013-03-25 14:49:35 +00001039 struct ip_tunnel_net *itn;
1040
1041 itn = net_generic(net, tunnel->ip_tnl_net_id);
1042 /* fb_tunnel_dev will be unregisted in net-exit call. */
1043 if (itn->fb_tunnel_dev != dev)
1044 ip_tunnel_del(netdev_priv(dev));
Tom Herbert7d442fa2014-01-02 11:48:26 -08001045
Nicolas Dichtelcf71d2bc2014-02-20 10:19:31 +01001046 ip_tunnel_dst_reset_all(tunnel);
Pravin B Shelarc5441932013-03-25 14:49:35 +00001047}
1048EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1049
1050/* Do least required initialization, rest of init is done in tunnel_init call */
1051void ip_tunnel_setup(struct net_device *dev, int net_id)
1052{
1053 struct ip_tunnel *tunnel = netdev_priv(dev);
1054 tunnel->ip_tnl_net_id = net_id;
1055}
1056EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1057
1058MODULE_LICENSE("GPL");