blob: 9d96b6853f219e792b680319cc9d591db1f8fcf0 [file] [log] [blame]
Pravin B Shelarc5441932013-03-25 14:49:35 +00001/*
2 * Copyright (c) 2013 Nicira, Inc.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16 * 02110-1301, USA
17 */
18
19#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20
21#include <linux/capability.h>
22#include <linux/module.h>
23#include <linux/types.h>
24#include <linux/kernel.h>
25#include <linux/slab.h>
26#include <linux/uaccess.h>
27#include <linux/skbuff.h>
28#include <linux/netdevice.h>
29#include <linux/in.h>
30#include <linux/tcp.h>
31#include <linux/udp.h>
32#include <linux/if_arp.h>
33#include <linux/mroute.h>
34#include <linux/init.h>
35#include <linux/in6.h>
36#include <linux/inetdevice.h>
37#include <linux/igmp.h>
38#include <linux/netfilter_ipv4.h>
39#include <linux/etherdevice.h>
40#include <linux/if_ether.h>
41#include <linux/if_vlan.h>
42#include <linux/rculist.h>
43
44#include <net/sock.h>
45#include <net/ip.h>
46#include <net/icmp.h>
47#include <net/protocol.h>
48#include <net/ip_tunnels.h>
49#include <net/arp.h>
50#include <net/checksum.h>
51#include <net/dsfield.h>
52#include <net/inet_ecn.h>
53#include <net/xfrm.h>
54#include <net/net_namespace.h>
55#include <net/netns/generic.h>
56#include <net/rtnetlink.h>
57
58#if IS_ENABLED(CONFIG_IPV6)
59#include <net/ipv6.h>
60#include <net/ip6_fib.h>
61#include <net/ip6_route.h>
62#endif
63
64static unsigned int ip_tunnel_hash(struct ip_tunnel_net *itn,
65 __be32 key, __be32 remote)
66{
67 return hash_32((__force u32)key ^ (__force u32)remote,
68 IP_TNL_HASH_BITS);
69}
70
71/* Often modified stats are per cpu, other are shared (netdev->stats) */
72struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev,
73 struct rtnl_link_stats64 *tot)
74{
75 int i;
76
77 for_each_possible_cpu(i) {
78 const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
79 u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
80 unsigned int start;
81
82 do {
83 start = u64_stats_fetch_begin_bh(&tstats->syncp);
84 rx_packets = tstats->rx_packets;
85 tx_packets = tstats->tx_packets;
86 rx_bytes = tstats->rx_bytes;
87 tx_bytes = tstats->tx_bytes;
88 } while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
89
90 tot->rx_packets += rx_packets;
91 tot->tx_packets += tx_packets;
92 tot->rx_bytes += rx_bytes;
93 tot->tx_bytes += tx_bytes;
94 }
95
96 tot->multicast = dev->stats.multicast;
97
98 tot->rx_crc_errors = dev->stats.rx_crc_errors;
99 tot->rx_fifo_errors = dev->stats.rx_fifo_errors;
100 tot->rx_length_errors = dev->stats.rx_length_errors;
101 tot->rx_frame_errors = dev->stats.rx_frame_errors;
102 tot->rx_errors = dev->stats.rx_errors;
103
104 tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
105 tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
106 tot->tx_dropped = dev->stats.tx_dropped;
107 tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
108 tot->tx_errors = dev->stats.tx_errors;
109
110 tot->collisions = dev->stats.collisions;
111
112 return tot;
113}
114EXPORT_SYMBOL_GPL(ip_tunnel_get_stats64);
115
116static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
117 __be16 flags, __be32 key)
118{
119 if (p->i_flags & TUNNEL_KEY) {
120 if (flags & TUNNEL_KEY)
121 return key == p->i_key;
122 else
123 /* key expected, none present */
124 return false;
125 } else
126 return !(flags & TUNNEL_KEY);
127}
128
129/* Fallback tunnel: no source, no destination, no key, no options
130
131 Tunnel hash table:
132 We require exact key match i.e. if a key is present in packet
133 it will match only tunnel with the same key; if it is not present,
134 it will match only keyless tunnel.
135
136 All keysless packets, if not matched configured keyless tunnels
137 will match fallback tunnel.
138 Given src, dst and key, find appropriate for input tunnel.
139*/
140struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
141 int link, __be16 flags,
142 __be32 remote, __be32 local,
143 __be32 key)
144{
145 unsigned int hash;
146 struct ip_tunnel *t, *cand = NULL;
147 struct hlist_head *head;
148
149 hash = ip_tunnel_hash(itn, key, remote);
150 head = &itn->tunnels[hash];
151
152 hlist_for_each_entry_rcu(t, head, hash_node) {
153 if (local != t->parms.iph.saddr ||
154 remote != t->parms.iph.daddr ||
155 !(t->dev->flags & IFF_UP))
156 continue;
157
158 if (!ip_tunnel_key_match(&t->parms, flags, key))
159 continue;
160
161 if (t->parms.link == link)
162 return t;
163 else
164 cand = t;
165 }
166
167 hlist_for_each_entry_rcu(t, head, hash_node) {
168 if (remote != t->parms.iph.daddr ||
169 !(t->dev->flags & IFF_UP))
170 continue;
171
172 if (!ip_tunnel_key_match(&t->parms, flags, key))
173 continue;
174
175 if (t->parms.link == link)
176 return t;
177 else if (!cand)
178 cand = t;
179 }
180
181 hash = ip_tunnel_hash(itn, key, 0);
182 head = &itn->tunnels[hash];
183
184 hlist_for_each_entry_rcu(t, head, hash_node) {
185 if ((local != t->parms.iph.saddr &&
186 (local != t->parms.iph.daddr ||
187 !ipv4_is_multicast(local))) ||
188 !(t->dev->flags & IFF_UP))
189 continue;
190
191 if (!ip_tunnel_key_match(&t->parms, flags, key))
192 continue;
193
194 if (t->parms.link == link)
195 return t;
196 else if (!cand)
197 cand = t;
198 }
199
200 if (flags & TUNNEL_NO_KEY)
201 goto skip_key_lookup;
202
203 hlist_for_each_entry_rcu(t, head, hash_node) {
204 if (t->parms.i_key != key ||
205 !(t->dev->flags & IFF_UP))
206 continue;
207
208 if (t->parms.link == link)
209 return t;
210 else if (!cand)
211 cand = t;
212 }
213
214skip_key_lookup:
215 if (cand)
216 return cand;
217
218 if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
219 return netdev_priv(itn->fb_tunnel_dev);
220
221
222 return NULL;
223}
224EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
225
226static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
227 struct ip_tunnel_parm *parms)
228{
229 unsigned int h;
230 __be32 remote;
231
232 if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
233 remote = parms->iph.daddr;
234 else
235 remote = 0;
236
237 h = ip_tunnel_hash(itn, parms->i_key, remote);
238 return &itn->tunnels[h];
239}
240
241static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
242{
243 struct hlist_head *head = ip_bucket(itn, &t->parms);
244
245 hlist_add_head_rcu(&t->hash_node, head);
246}
247
248static void ip_tunnel_del(struct ip_tunnel *t)
249{
250 hlist_del_init_rcu(&t->hash_node);
251}
252
253static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
254 struct ip_tunnel_parm *parms,
255 int type)
256{
257 __be32 remote = parms->iph.daddr;
258 __be32 local = parms->iph.saddr;
259 __be32 key = parms->i_key;
260 int link = parms->link;
261 struct ip_tunnel *t = NULL;
262 struct hlist_head *head = ip_bucket(itn, parms);
263
264 hlist_for_each_entry_rcu(t, head, hash_node) {
265 if (local == t->parms.iph.saddr &&
266 remote == t->parms.iph.daddr &&
267 key == t->parms.i_key &&
268 link == t->parms.link &&
269 type == t->dev->type)
270 break;
271 }
272 return t;
273}
274
275static struct net_device *__ip_tunnel_create(struct net *net,
276 const struct rtnl_link_ops *ops,
277 struct ip_tunnel_parm *parms)
278{
279 int err;
280 struct ip_tunnel *tunnel;
281 struct net_device *dev;
282 char name[IFNAMSIZ];
283
284 if (parms->name[0])
285 strlcpy(name, parms->name, IFNAMSIZ);
286 else {
287 if (strlen(ops->kind) + 3 >= IFNAMSIZ) {
288 err = -E2BIG;
289 goto failed;
290 }
291 strlcpy(name, ops->kind, IFNAMSIZ);
292 strncat(name, "%d", 2);
293 }
294
295 ASSERT_RTNL();
296 dev = alloc_netdev(ops->priv_size, name, ops->setup);
297 if (!dev) {
298 err = -ENOMEM;
299 goto failed;
300 }
301 dev_net_set(dev, net);
302
303 dev->rtnl_link_ops = ops;
304
305 tunnel = netdev_priv(dev);
306 tunnel->parms = *parms;
307
308 err = register_netdevice(dev);
309 if (err)
310 goto failed_free;
311
312 return dev;
313
314failed_free:
315 free_netdev(dev);
316failed:
317 return ERR_PTR(err);
318}
319
320static inline struct rtable *ip_route_output_tunnel(struct net *net,
321 struct flowi4 *fl4,
322 int proto,
323 __be32 daddr, __be32 saddr,
324 __be32 key, __u8 tos, int oif)
325{
326 memset(fl4, 0, sizeof(*fl4));
327 fl4->flowi4_oif = oif;
328 fl4->daddr = daddr;
329 fl4->saddr = saddr;
330 fl4->flowi4_tos = tos;
331 fl4->flowi4_proto = proto;
332 fl4->fl4_gre_key = key;
333 return ip_route_output_key(net, fl4);
334}
335
336static int ip_tunnel_bind_dev(struct net_device *dev)
337{
338 struct net_device *tdev = NULL;
339 struct ip_tunnel *tunnel = netdev_priv(dev);
340 const struct iphdr *iph;
341 int hlen = LL_MAX_HEADER;
342 int mtu = ETH_DATA_LEN;
343 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
344
345 iph = &tunnel->parms.iph;
346
347 /* Guess output device to choose reasonable mtu and needed_headroom */
348 if (iph->daddr) {
349 struct flowi4 fl4;
350 struct rtable *rt;
351
352 rt = ip_route_output_tunnel(dev_net(dev), &fl4,
353 tunnel->parms.iph.protocol,
354 iph->daddr, iph->saddr,
355 tunnel->parms.o_key,
356 RT_TOS(iph->tos),
357 tunnel->parms.link);
358 if (!IS_ERR(rt)) {
359 tdev = rt->dst.dev;
360 ip_rt_put(rt);
361 }
362 if (dev->type != ARPHRD_ETHER)
363 dev->flags |= IFF_POINTOPOINT;
364 }
365
366 if (!tdev && tunnel->parms.link)
367 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
368
369 if (tdev) {
370 hlen = tdev->hard_header_len + tdev->needed_headroom;
371 mtu = tdev->mtu;
372 }
373 dev->iflink = tunnel->parms.link;
374
375 dev->needed_headroom = t_hlen + hlen;
376 mtu -= (dev->hard_header_len + t_hlen);
377
378 if (mtu < 68)
379 mtu = 68;
380
381 return mtu;
382}
383
384static struct ip_tunnel *ip_tunnel_create(struct net *net,
385 struct ip_tunnel_net *itn,
386 struct ip_tunnel_parm *parms)
387{
388 struct ip_tunnel *nt, *fbt;
389 struct net_device *dev;
390
391 BUG_ON(!itn->fb_tunnel_dev);
392 fbt = netdev_priv(itn->fb_tunnel_dev);
393 dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms);
394 if (IS_ERR(dev))
395 return NULL;
396
397 dev->mtu = ip_tunnel_bind_dev(dev);
398
399 nt = netdev_priv(dev);
400 ip_tunnel_add(itn, nt);
401 return nt;
402}
403
404int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
405 const struct tnl_ptk_info *tpi, bool log_ecn_error)
406{
407 struct pcpu_tstats *tstats;
408 const struct iphdr *iph = ip_hdr(skb);
409 int err;
410
411 secpath_reset(skb);
412
413 skb->protocol = tpi->proto;
414
415 skb->mac_header = skb->network_header;
416 __pskb_pull(skb, tunnel->hlen);
417 skb_postpull_rcsum(skb, skb_transport_header(skb), tunnel->hlen);
418#ifdef CONFIG_NET_IPGRE_BROADCAST
419 if (ipv4_is_multicast(iph->daddr)) {
420 /* Looped back packet, drop it! */
421 if (rt_is_output_route(skb_rtable(skb)))
422 goto drop;
423 tunnel->dev->stats.multicast++;
424 skb->pkt_type = PACKET_BROADCAST;
425 }
426#endif
427
428 if ((!(tpi->flags&TUNNEL_CSUM) && (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
429 ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
430 tunnel->dev->stats.rx_crc_errors++;
431 tunnel->dev->stats.rx_errors++;
432 goto drop;
433 }
434
435 if (tunnel->parms.i_flags&TUNNEL_SEQ) {
436 if (!(tpi->flags&TUNNEL_SEQ) ||
437 (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
438 tunnel->dev->stats.rx_fifo_errors++;
439 tunnel->dev->stats.rx_errors++;
440 goto drop;
441 }
442 tunnel->i_seqno = ntohl(tpi->seq) + 1;
443 }
444
445 /* Warning: All skb pointers will be invalidated! */
446 if (tunnel->dev->type == ARPHRD_ETHER) {
447 if (!pskb_may_pull(skb, ETH_HLEN)) {
448 tunnel->dev->stats.rx_length_errors++;
449 tunnel->dev->stats.rx_errors++;
450 goto drop;
451 }
452
453 iph = ip_hdr(skb);
454 skb->protocol = eth_type_trans(skb, tunnel->dev);
455 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
456 }
457
458 skb->pkt_type = PACKET_HOST;
459 __skb_tunnel_rx(skb, tunnel->dev);
460
461 skb_reset_network_header(skb);
462 err = IP_ECN_decapsulate(iph, skb);
463 if (unlikely(err)) {
464 if (log_ecn_error)
465 net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
466 &iph->saddr, iph->tos);
467 if (err > 1) {
468 ++tunnel->dev->stats.rx_frame_errors;
469 ++tunnel->dev->stats.rx_errors;
470 goto drop;
471 }
472 }
473
474 tstats = this_cpu_ptr(tunnel->dev->tstats);
475 u64_stats_update_begin(&tstats->syncp);
476 tstats->rx_packets++;
477 tstats->rx_bytes += skb->len;
478 u64_stats_update_end(&tstats->syncp);
479
480 gro_cells_receive(&tunnel->gro_cells, skb);
481 return 0;
482
483drop:
484 kfree_skb(skb);
485 return 0;
486}
487EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
488
489void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
490 const struct iphdr *tnl_params)
491{
492 struct ip_tunnel *tunnel = netdev_priv(dev);
493 const struct iphdr *inner_iph;
494 struct iphdr *iph;
495 struct flowi4 fl4;
496 u8 tos, ttl;
497 __be16 df;
498 struct rtable *rt; /* Route to the other host */
499 struct net_device *tdev; /* Device to other host */
500 unsigned int max_headroom; /* The extra header space needed */
501 __be32 dst;
502 int mtu;
503
504 inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
505
506 dst = tnl_params->daddr;
507 if (dst == 0) {
508 /* NBMA tunnel */
509
510 if (skb_dst(skb) == NULL) {
511 dev->stats.tx_fifo_errors++;
512 goto tx_error;
513 }
514
515 if (skb->protocol == htons(ETH_P_IP)) {
516 rt = skb_rtable(skb);
517 dst = rt_nexthop(rt, inner_iph->daddr);
518 }
519#if IS_ENABLED(CONFIG_IPV6)
520 else if (skb->protocol == htons(ETH_P_IPV6)) {
521 const struct in6_addr *addr6;
522 struct neighbour *neigh;
523 bool do_tx_error_icmp;
524 int addr_type;
525
526 neigh = dst_neigh_lookup(skb_dst(skb),
527 &ipv6_hdr(skb)->daddr);
528 if (neigh == NULL)
529 goto tx_error;
530
531 addr6 = (const struct in6_addr *)&neigh->primary_key;
532 addr_type = ipv6_addr_type(addr6);
533
534 if (addr_type == IPV6_ADDR_ANY) {
535 addr6 = &ipv6_hdr(skb)->daddr;
536 addr_type = ipv6_addr_type(addr6);
537 }
538
539 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
540 do_tx_error_icmp = true;
541 else {
542 do_tx_error_icmp = false;
543 dst = addr6->s6_addr32[3];
544 }
545 neigh_release(neigh);
546 if (do_tx_error_icmp)
547 goto tx_error_icmp;
548 }
549#endif
550 else
551 goto tx_error;
552 }
553
554 tos = tnl_params->tos;
555 if (tos & 0x1) {
556 tos &= ~0x1;
557 if (skb->protocol == htons(ETH_P_IP))
558 tos = inner_iph->tos;
559 else if (skb->protocol == htons(ETH_P_IPV6))
560 tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
561 }
562
563 rt = ip_route_output_tunnel(dev_net(dev), &fl4,
564 tunnel->parms.iph.protocol,
565 dst, tnl_params->saddr,
566 tunnel->parms.o_key,
567 RT_TOS(tos),
568 tunnel->parms.link);
569 if (IS_ERR(rt)) {
570 dev->stats.tx_carrier_errors++;
571 goto tx_error;
572 }
573 tdev = rt->dst.dev;
574
575 if (tdev == dev) {
576 ip_rt_put(rt);
577 dev->stats.collisions++;
578 goto tx_error;
579 }
580
581 df = tnl_params->frag_off;
582
583 if (df)
584 mtu = dst_mtu(&rt->dst) - dev->hard_header_len
585 - sizeof(struct iphdr);
586 else
587 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
588
589 if (skb_dst(skb))
590 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
591
592 if (skb->protocol == htons(ETH_P_IP)) {
593 df |= (inner_iph->frag_off&htons(IP_DF));
594
595 if (!skb_is_gso(skb) &&
596 (inner_iph->frag_off&htons(IP_DF)) &&
597 mtu < ntohs(inner_iph->tot_len)) {
598 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
599 ip_rt_put(rt);
600 goto tx_error;
601 }
602 }
603#if IS_ENABLED(CONFIG_IPV6)
604 else if (skb->protocol == htons(ETH_P_IPV6)) {
605 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
606
607 if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
608 mtu >= IPV6_MIN_MTU) {
609 if ((tunnel->parms.iph.daddr &&
610 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
611 rt6->rt6i_dst.plen == 128) {
612 rt6->rt6i_flags |= RTF_MODIFIED;
613 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
614 }
615 }
616
617 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
618 mtu < skb->len) {
619 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
620 ip_rt_put(rt);
621 goto tx_error;
622 }
623 }
624#endif
625
626 if (tunnel->err_count > 0) {
627 if (time_before(jiffies,
628 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
629 tunnel->err_count--;
630
631 dst_link_failure(skb);
632 } else
633 tunnel->err_count = 0;
634 }
635
636 ttl = tnl_params->ttl;
637 if (ttl == 0) {
638 if (skb->protocol == htons(ETH_P_IP))
639 ttl = inner_iph->ttl;
640#if IS_ENABLED(CONFIG_IPV6)
641 else if (skb->protocol == htons(ETH_P_IPV6))
642 ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
643#endif
644 else
645 ttl = ip4_dst_hoplimit(&rt->dst);
646 }
647
648 max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr)
649 + rt->dst.header_len;
650 if (max_headroom > dev->needed_headroom) {
651 dev->needed_headroom = max_headroom;
652 if (skb_cow_head(skb, dev->needed_headroom)) {
653 dev->stats.tx_dropped++;
654 dev_kfree_skb(skb);
655 return;
656 }
657 }
658
659 skb_dst_drop(skb);
660 skb_dst_set(skb, &rt->dst);
661 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
662
663 /* Push down and install the IP header. */
664 skb_push(skb, sizeof(struct iphdr));
665 skb_reset_network_header(skb);
666
667 iph = ip_hdr(skb);
668 inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
669
670 iph->version = 4;
671 iph->ihl = sizeof(struct iphdr) >> 2;
672 iph->frag_off = df;
673 iph->protocol = tnl_params->protocol;
674 iph->tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
675 iph->daddr = fl4.daddr;
676 iph->saddr = fl4.saddr;
677 iph->ttl = ttl;
678 tunnel_ip_select_ident(skb, inner_iph, &rt->dst);
679
680 iptunnel_xmit(skb, dev);
681 return;
682
683#if IS_ENABLED(CONFIG_IPV6)
684tx_error_icmp:
685 dst_link_failure(skb);
686#endif
687tx_error:
688 dev->stats.tx_errors++;
689 dev_kfree_skb(skb);
690}
691EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
692
693static void ip_tunnel_update(struct ip_tunnel_net *itn,
694 struct ip_tunnel *t,
695 struct net_device *dev,
696 struct ip_tunnel_parm *p,
697 bool set_mtu)
698{
699 ip_tunnel_del(t);
700 t->parms.iph.saddr = p->iph.saddr;
701 t->parms.iph.daddr = p->iph.daddr;
702 t->parms.i_key = p->i_key;
703 t->parms.o_key = p->o_key;
704 if (dev->type != ARPHRD_ETHER) {
705 memcpy(dev->dev_addr, &p->iph.saddr, 4);
706 memcpy(dev->broadcast, &p->iph.daddr, 4);
707 }
708 ip_tunnel_add(itn, t);
709
710 t->parms.iph.ttl = p->iph.ttl;
711 t->parms.iph.tos = p->iph.tos;
712 t->parms.iph.frag_off = p->iph.frag_off;
713
714 if (t->parms.link != p->link) {
715 int mtu;
716
717 t->parms.link = p->link;
718 mtu = ip_tunnel_bind_dev(dev);
719 if (set_mtu)
720 dev->mtu = mtu;
721 }
722 netdev_state_change(dev);
723}
724
725int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
726{
727 int err = 0;
728 struct ip_tunnel *t;
729 struct net *net = dev_net(dev);
730 struct ip_tunnel *tunnel = netdev_priv(dev);
731 struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
732
733 BUG_ON(!itn->fb_tunnel_dev);
734 switch (cmd) {
735 case SIOCGETTUNNEL:
736 t = NULL;
737 if (dev == itn->fb_tunnel_dev)
738 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
739 if (t == NULL)
740 t = netdev_priv(dev);
741 memcpy(p, &t->parms, sizeof(*p));
742 break;
743
744 case SIOCADDTUNNEL:
745 case SIOCCHGTUNNEL:
746 err = -EPERM;
747 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
748 goto done;
749 if (p->iph.ttl)
750 p->iph.frag_off |= htons(IP_DF);
751 if (!(p->i_flags&TUNNEL_KEY))
752 p->i_key = 0;
753 if (!(p->o_flags&TUNNEL_KEY))
754 p->o_key = 0;
755
756 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
757
758 if (!t && (cmd == SIOCADDTUNNEL))
759 t = ip_tunnel_create(net, itn, p);
760
761 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
762 if (t != NULL) {
763 if (t->dev != dev) {
764 err = -EEXIST;
765 break;
766 }
767 } else {
768 unsigned int nflags = 0;
769
770 if (ipv4_is_multicast(p->iph.daddr))
771 nflags = IFF_BROADCAST;
772 else if (p->iph.daddr)
773 nflags = IFF_POINTOPOINT;
774
775 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
776 err = -EINVAL;
777 break;
778 }
779
780 t = netdev_priv(dev);
781 }
782 }
783
784 if (t) {
785 err = 0;
786 ip_tunnel_update(itn, t, dev, p, true);
787 } else
788 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
789 break;
790
791 case SIOCDELTUNNEL:
792 err = -EPERM;
793 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
794 goto done;
795
796 if (dev == itn->fb_tunnel_dev) {
797 err = -ENOENT;
798 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
799 if (t == NULL)
800 goto done;
801 err = -EPERM;
802 if (t == netdev_priv(itn->fb_tunnel_dev))
803 goto done;
804 dev = t->dev;
805 }
806 unregister_netdevice(dev);
807 err = 0;
808 break;
809
810 default:
811 err = -EINVAL;
812 }
813
814done:
815 return err;
816}
817EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
818
819int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
820{
821 struct ip_tunnel *tunnel = netdev_priv(dev);
822 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
823
824 if (new_mtu < 68 ||
825 new_mtu > 0xFFF8 - dev->hard_header_len - t_hlen)
826 return -EINVAL;
827 dev->mtu = new_mtu;
828 return 0;
829}
830EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
831
832static void ip_tunnel_dev_free(struct net_device *dev)
833{
834 struct ip_tunnel *tunnel = netdev_priv(dev);
835
836 gro_cells_destroy(&tunnel->gro_cells);
837 free_percpu(dev->tstats);
838 free_netdev(dev);
839}
840
841void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
842{
843 struct net *net = dev_net(dev);
844 struct ip_tunnel *tunnel = netdev_priv(dev);
845 struct ip_tunnel_net *itn;
846
847 itn = net_generic(net, tunnel->ip_tnl_net_id);
848
849 if (itn->fb_tunnel_dev != dev) {
850 ip_tunnel_del(netdev_priv(dev));
851 unregister_netdevice_queue(dev, head);
852 }
853}
854EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
855
856int __net_init ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
857 struct rtnl_link_ops *ops, char *devname)
858{
859 struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
860 struct ip_tunnel_parm parms;
861
862 itn->tunnels = kzalloc(IP_TNL_HASH_SIZE * sizeof(struct hlist_head), GFP_KERNEL);
863 if (!itn->tunnels)
864 return -ENOMEM;
865
866 if (!ops) {
867 itn->fb_tunnel_dev = NULL;
868 return 0;
869 }
870 memset(&parms, 0, sizeof(parms));
871 if (devname)
872 strlcpy(parms.name, devname, IFNAMSIZ);
873
874 rtnl_lock();
875 itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
876 rtnl_unlock();
877 if (IS_ERR(itn->fb_tunnel_dev)) {
878 kfree(itn->tunnels);
879 return PTR_ERR(itn->fb_tunnel_dev);
880 }
881
882 return 0;
883}
884EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
885
886static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head)
887{
888 int h;
889
890 for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
891 struct ip_tunnel *t;
892 struct hlist_node *n;
893 struct hlist_head *thead = &itn->tunnels[h];
894
895 hlist_for_each_entry_safe(t, n, thead, hash_node)
896 unregister_netdevice_queue(t->dev, head);
897 }
898 if (itn->fb_tunnel_dev)
899 unregister_netdevice_queue(itn->fb_tunnel_dev, head);
900}
901
902void __net_exit ip_tunnel_delete_net(struct ip_tunnel_net *itn)
903{
904 LIST_HEAD(list);
905
906 rtnl_lock();
907 ip_tunnel_destroy(itn, &list);
908 unregister_netdevice_many(&list);
909 rtnl_unlock();
910 kfree(itn->tunnels);
911}
912EXPORT_SYMBOL_GPL(ip_tunnel_delete_net);
913
914int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
915 struct ip_tunnel_parm *p)
916{
917 struct ip_tunnel *nt;
918 struct net *net = dev_net(dev);
919 struct ip_tunnel_net *itn;
920 int mtu;
921 int err;
922
923 nt = netdev_priv(dev);
924 itn = net_generic(net, nt->ip_tnl_net_id);
925
926 if (ip_tunnel_find(itn, p, dev->type))
927 return -EEXIST;
928
929 nt->parms = *p;
930 err = register_netdevice(dev);
931 if (err)
932 goto out;
933
934 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
935 eth_hw_addr_random(dev);
936
937 mtu = ip_tunnel_bind_dev(dev);
938 if (!tb[IFLA_MTU])
939 dev->mtu = mtu;
940
941 ip_tunnel_add(itn, nt);
942
943out:
944 return err;
945}
946EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
947
948int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
949 struct ip_tunnel_parm *p)
950{
951 struct ip_tunnel *t, *nt;
952 struct net *net = dev_net(dev);
953 struct ip_tunnel *tunnel = netdev_priv(dev);
954 struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
955
956 if (dev == itn->fb_tunnel_dev)
957 return -EINVAL;
958
959 nt = netdev_priv(dev);
960
961 t = ip_tunnel_find(itn, p, dev->type);
962
963 if (t) {
964 if (t->dev != dev)
965 return -EEXIST;
966 } else {
967 t = nt;
968
969 if (dev->type != ARPHRD_ETHER) {
970 unsigned int nflags = 0;
971
972 if (ipv4_is_multicast(p->iph.daddr))
973 nflags = IFF_BROADCAST;
974 else if (p->iph.daddr)
975 nflags = IFF_POINTOPOINT;
976
977 if ((dev->flags ^ nflags) &
978 (IFF_POINTOPOINT | IFF_BROADCAST))
979 return -EINVAL;
980 }
981 }
982
983 ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU]);
984 return 0;
985}
986EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
987
988int ip_tunnel_init(struct net_device *dev)
989{
990 struct ip_tunnel *tunnel = netdev_priv(dev);
991 struct iphdr *iph = &tunnel->parms.iph;
992 int err;
993
994 dev->destructor = ip_tunnel_dev_free;
995 dev->tstats = alloc_percpu(struct pcpu_tstats);
996 if (!dev->tstats)
997 return -ENOMEM;
998
999 err = gro_cells_init(&tunnel->gro_cells, dev);
1000 if (err) {
1001 free_percpu(dev->tstats);
1002 return err;
1003 }
1004
1005 tunnel->dev = dev;
1006 strcpy(tunnel->parms.name, dev->name);
1007 iph->version = 4;
1008 iph->ihl = 5;
1009
1010 return 0;
1011}
1012EXPORT_SYMBOL_GPL(ip_tunnel_init);
1013
1014void ip_tunnel_uninit(struct net_device *dev)
1015{
1016 struct net *net = dev_net(dev);
1017 struct ip_tunnel *tunnel = netdev_priv(dev);
1018 struct ip_tunnel_net *itn;
1019
1020 itn = net_generic(net, tunnel->ip_tnl_net_id);
1021 /* fb_tunnel_dev will be unregisted in net-exit call. */
1022 if (itn->fb_tunnel_dev != dev)
1023 ip_tunnel_del(netdev_priv(dev));
1024}
1025EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1026
1027/* Do least required initialization, rest of init is done in tunnel_init call */
1028void ip_tunnel_setup(struct net_device *dev, int net_id)
1029{
1030 struct ip_tunnel *tunnel = netdev_priv(dev);
1031 tunnel->ip_tnl_net_id = net_id;
1032}
1033EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1034
1035MODULE_LICENSE("GPL");