blob: 7240f8e2dd4511dde4de0bd08290bb718eab140f [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002 * Linux NET3: GRE over IP protocol decoder.
Linus Torvalds1da177e2005-04-16 15:20:36 -07003 *
4 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
Joe Perchesafd465032012-03-12 07:03:32 +000013#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
14
Randy Dunlap4fc268d2006-01-11 12:17:47 -080015#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070016#include <linux/module.h>
17#include <linux/types.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070018#include <linux/kernel.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090019#include <linux/slab.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070020#include <asm/uaccess.h>
21#include <linux/skbuff.h>
22#include <linux/netdevice.h>
23#include <linux/in.h>
24#include <linux/tcp.h>
25#include <linux/udp.h>
26#include <linux/if_arp.h>
27#include <linux/mroute.h>
28#include <linux/init.h>
29#include <linux/in6.h>
30#include <linux/inetdevice.h>
31#include <linux/igmp.h>
32#include <linux/netfilter_ipv4.h>
Herbert Xue1a80002008-10-09 12:00:17 -070033#include <linux/etherdevice.h>
Kris Katterjohn46f25df2006-01-05 16:35:42 -080034#include <linux/if_ether.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070035
36#include <net/sock.h>
37#include <net/ip.h>
38#include <net/icmp.h>
39#include <net/protocol.h>
40#include <net/ipip.h>
41#include <net/arp.h>
42#include <net/checksum.h>
43#include <net/dsfield.h>
44#include <net/inet_ecn.h>
45#include <net/xfrm.h>
Pavel Emelyanov59a4c752008-04-16 01:08:53 -070046#include <net/net_namespace.h>
47#include <net/netns/generic.h>
Herbert Xuc19e6542008-10-09 11:59:55 -070048#include <net/rtnetlink.h>
Dmitry Kozlov00959ad2010-08-21 23:05:39 -070049#include <net/gre.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070050
Eric Dumazetdfd56b82011-12-10 09:48:31 +000051#if IS_ENABLED(CONFIG_IPV6)
Linus Torvalds1da177e2005-04-16 15:20:36 -070052#include <net/ipv6.h>
53#include <net/ip6_fib.h>
54#include <net/ip6_route.h>
55#endif
56
57/*
58 Problems & solutions
59 --------------------
60
61 1. The most important issue is detecting local dead loops.
62 They would cause complete host lockup in transmit, which
63 would be "resolved" by stack overflow or, if queueing is enabled,
64 with infinite looping in net_bh.
65
66 We cannot track such dead loops during route installation,
67 it is infeasible task. The most general solutions would be
68 to keep skb->encapsulation counter (sort of local ttl),
Eric Dumazet6d0722a2010-09-29 23:35:10 -070069 and silently drop packet when it expires. It is a good
stephen hemmingerbff52852012-02-24 08:08:20 +000070 solution, but it supposes maintaining new variable in ALL
Linus Torvalds1da177e2005-04-16 15:20:36 -070071 skb, even if no tunneling is used.
72
Eric Dumazet6d0722a2010-09-29 23:35:10 -070073 Current solution: xmit_recursion breaks dead loops. This is a percpu
74 counter, since when we enter the first ndo_xmit(), cpu migration is
75 forbidden. We force an exit if this counter reaches RECURSION_LIMIT
Linus Torvalds1da177e2005-04-16 15:20:36 -070076
77 2. Networking dead loops would not kill routers, but would really
78 kill network. IP hop limit plays role of "t->recursion" in this case,
79 if we copy it from packet being encapsulated to upper header.
80 It is very good solution, but it introduces two problems:
81
82 - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
83 do not work over tunnels.
84 - traceroute does not work. I planned to relay ICMP from tunnel,
85 so that this problem would be solved and traceroute output
86 would even more informative. This idea appeared to be wrong:
87 only Linux complies to rfc1812 now (yes, guys, Linux is the only
88 true router now :-)), all routers (at least, in neighbourhood of mine)
89 return only 8 bytes of payload. It is the end.
90
91 Hence, if we want that OSPF worked or traceroute said something reasonable,
92 we should search for another solution.
93
94 One of them is to parse packet trying to detect inner encapsulation
95 made by our node. It is difficult or even impossible, especially,
stephen hemmingerbff52852012-02-24 08:08:20 +000096 taking into account fragmentation. TO be short, ttl is not solution at all.
Linus Torvalds1da177e2005-04-16 15:20:36 -070097
98 Current solution: The solution was UNEXPECTEDLY SIMPLE.
99 We force DF flag on tunnels with preconfigured hop limit,
100 that is ALL. :-) Well, it does not remove the problem completely,
101 but exponential growth of network traffic is changed to linear
102 (branches, that exceed pmtu are pruned) and tunnel mtu
stephen hemmingerbff52852012-02-24 08:08:20 +0000103 rapidly degrades to value <68, where looping stops.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700104 Yes, it is not good if there exists a router in the loop,
105 which does not force DF, even when encapsulating packets have DF set.
106 But it is not our problem! Nobody could accuse us, we made
107 all that we could make. Even if it is your gated who injected
108 fatal route to network, even if it were you who configured
109 fatal static route: you are innocent. :-)
110
111
112
113 3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
114 practically identical code. It would be good to glue them
115 together, but it is not very evident, how to make them modular.
116 sit is integral part of IPv6, ipip and gre are naturally modular.
117 We could extract common parts (hash table, ioctl etc)
118 to a separate module (ip_tunnel.c).
119
120 Alexey Kuznetsov.
121 */
122
stephen hemmingereccc1bb2012-09-25 11:02:48 +0000123static bool log_ecn_error = true;
124module_param(log_ecn_error, bool, 0644);
125MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
126
Herbert Xuc19e6542008-10-09 11:59:55 -0700127static struct rtnl_link_ops ipgre_link_ops __read_mostly;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700128static int ipgre_tunnel_init(struct net_device *dev);
129static void ipgre_tunnel_setup(struct net_device *dev);
Herbert Xu42aa9162008-10-09 11:59:32 -0700130static int ipgre_tunnel_bind_dev(struct net_device *dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700131
132/* Fallback tunnel: no source, no destination, no key, no options */
133
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700134#define HASH_SIZE 16
135
Eric Dumazetf99189b2009-11-17 10:42:49 +0000136static int ipgre_net_id __read_mostly;
Pavel Emelyanov59a4c752008-04-16 01:08:53 -0700137struct ipgre_net {
Eric Dumazet15078502010-09-15 11:07:53 +0000138 struct ip_tunnel __rcu *tunnels[4][HASH_SIZE];
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700139
Pavel Emelyanov7daa0002008-04-16 01:10:05 -0700140 struct net_device *fb_tunnel_dev;
Pavel Emelyanov59a4c752008-04-16 01:08:53 -0700141};
142
Linus Torvalds1da177e2005-04-16 15:20:36 -0700143/* Tunnel hash table */
144
145/*
146 4 hash tables:
147
148 3: (remote,local)
149 2: (remote,*)
150 1: (*,local)
151 0: (*,*)
152
153 We require exact key match i.e. if a key is present in packet
154 it will match only tunnel with the same key; if it is not present,
155 it will match only keyless tunnel.
156
157 All keysless packets, if not matched configured keyless tunnels
158 will match fallback tunnel.
159 */
160
Al Virod5a0a1e2006-11-08 00:23:14 -0800161#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700162
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700163#define tunnels_r_l tunnels[3]
164#define tunnels_r tunnels[2]
165#define tunnels_l tunnels[1]
166#define tunnels_wc tunnels[0]
Eric Dumazet8d5b2c02009-10-23 06:14:38 +0000167/*
Eric Dumazet15078502010-09-15 11:07:53 +0000168 * Locking : hash tables are protected by RCU and RTNL
Eric Dumazet8d5b2c02009-10-23 06:14:38 +0000169 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700170
Eric Dumazet8d5b2c02009-10-23 06:14:38 +0000171#define for_each_ip_tunnel_rcu(start) \
172 for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700173
Eric Dumazete985aad2010-09-27 03:57:11 +0000174/* often modified stats are per cpu, other are shared (netdev->stats) */
175struct pcpu_tstats {
stephen hemminger87b6d212012-04-12 06:31:16 +0000176 u64 rx_packets;
177 u64 rx_bytes;
178 u64 tx_packets;
179 u64 tx_bytes;
180 struct u64_stats_sync syncp;
181};
Eric Dumazete985aad2010-09-27 03:57:11 +0000182
stephen hemminger87b6d212012-04-12 06:31:16 +0000183static struct rtnl_link_stats64 *ipgre_get_stats64(struct net_device *dev,
184 struct rtnl_link_stats64 *tot)
Eric Dumazete985aad2010-09-27 03:57:11 +0000185{
Eric Dumazete985aad2010-09-27 03:57:11 +0000186 int i;
187
188 for_each_possible_cpu(i) {
189 const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
stephen hemminger87b6d212012-04-12 06:31:16 +0000190 u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
191 unsigned int start;
Eric Dumazete985aad2010-09-27 03:57:11 +0000192
stephen hemminger87b6d212012-04-12 06:31:16 +0000193 do {
194 start = u64_stats_fetch_begin_bh(&tstats->syncp);
195 rx_packets = tstats->rx_packets;
196 tx_packets = tstats->tx_packets;
197 rx_bytes = tstats->rx_bytes;
198 tx_bytes = tstats->tx_bytes;
199 } while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
200
201 tot->rx_packets += rx_packets;
202 tot->tx_packets += tx_packets;
203 tot->rx_bytes += rx_bytes;
204 tot->tx_bytes += tx_bytes;
Eric Dumazete985aad2010-09-27 03:57:11 +0000205 }
stephen hemminger87b6d212012-04-12 06:31:16 +0000206
207 tot->multicast = dev->stats.multicast;
208 tot->rx_crc_errors = dev->stats.rx_crc_errors;
209 tot->rx_fifo_errors = dev->stats.rx_fifo_errors;
210 tot->rx_length_errors = dev->stats.rx_length_errors;
stephen hemmingereccc1bb2012-09-25 11:02:48 +0000211 tot->rx_frame_errors = dev->stats.rx_frame_errors;
stephen hemminger87b6d212012-04-12 06:31:16 +0000212 tot->rx_errors = dev->stats.rx_errors;
stephen hemmingereccc1bb2012-09-25 11:02:48 +0000213
stephen hemminger87b6d212012-04-12 06:31:16 +0000214 tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
215 tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
216 tot->tx_dropped = dev->stats.tx_dropped;
217 tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
218 tot->tx_errors = dev->stats.tx_errors;
219
220 return tot;
Eric Dumazete985aad2010-09-27 03:57:11 +0000221}
222
stephen hemmingerd2083282012-09-24 18:12:23 +0000223/* Does key in tunnel parameters match packet */
224static bool ipgre_key_match(const struct ip_tunnel_parm *p,
stephen hemminger9fbef052012-10-01 05:21:14 +0000225 __be16 flags, __be32 key)
stephen hemmingerd2083282012-09-24 18:12:23 +0000226{
227 if (p->i_flags & GRE_KEY) {
228 if (flags & GRE_KEY)
229 return key == p->i_key;
230 else
231 return false; /* key expected, none present */
232 } else
233 return !(flags & GRE_KEY);
234}
235
Linus Torvalds1da177e2005-04-16 15:20:36 -0700236/* Given src, dst and key, find appropriate for input tunnel. */
237
Daniel Baluta5e73ea12012-04-15 01:34:41 +0000238static struct ip_tunnel *ipgre_tunnel_lookup(struct net_device *dev,
239 __be32 remote, __be32 local,
stephen hemminger9fbef052012-10-01 05:21:14 +0000240 __be16 flags, __be32 key,
stephen hemmingerd2083282012-09-24 18:12:23 +0000241 __be16 gre_proto)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700242{
Timo Teras749c10f2009-01-19 17:22:12 -0800243 struct net *net = dev_net(dev);
244 int link = dev->ifindex;
Eric Dumazet15078502010-09-15 11:07:53 +0000245 unsigned int h0 = HASH(remote);
246 unsigned int h1 = HASH(key);
Timo Terasafcf1242009-01-26 20:56:10 -0800247 struct ip_tunnel *t, *cand = NULL;
Pavel Emelyanov7daa0002008-04-16 01:10:05 -0700248 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
Herbert Xue1a80002008-10-09 12:00:17 -0700249 int dev_type = (gre_proto == htons(ETH_P_TEB)) ?
250 ARPHRD_ETHER : ARPHRD_IPGRE;
Timo Terasafcf1242009-01-26 20:56:10 -0800251 int score, cand_score = 4;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700252
Eric Dumazet8d5b2c02009-10-23 06:14:38 +0000253 for_each_ip_tunnel_rcu(ign->tunnels_r_l[h0 ^ h1]) {
Timo Teras749c10f2009-01-19 17:22:12 -0800254 if (local != t->parms.iph.saddr ||
255 remote != t->parms.iph.daddr ||
Timo Teras749c10f2009-01-19 17:22:12 -0800256 !(t->dev->flags & IFF_UP))
257 continue;
258
stephen hemmingerd2083282012-09-24 18:12:23 +0000259 if (!ipgre_key_match(&t->parms, flags, key))
260 continue;
261
Timo Teras749c10f2009-01-19 17:22:12 -0800262 if (t->dev->type != ARPHRD_IPGRE &&
263 t->dev->type != dev_type)
264 continue;
265
Timo Terasafcf1242009-01-26 20:56:10 -0800266 score = 0;
Timo Teras749c10f2009-01-19 17:22:12 -0800267 if (t->parms.link != link)
Timo Terasafcf1242009-01-26 20:56:10 -0800268 score |= 1;
Timo Teras749c10f2009-01-19 17:22:12 -0800269 if (t->dev->type != dev_type)
Timo Terasafcf1242009-01-26 20:56:10 -0800270 score |= 2;
271 if (score == 0)
Timo Teras749c10f2009-01-19 17:22:12 -0800272 return t;
Timo Terasafcf1242009-01-26 20:56:10 -0800273
274 if (score < cand_score) {
275 cand = t;
276 cand_score = score;
277 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700278 }
Herbert Xue1a80002008-10-09 12:00:17 -0700279
Eric Dumazet8d5b2c02009-10-23 06:14:38 +0000280 for_each_ip_tunnel_rcu(ign->tunnels_r[h0 ^ h1]) {
Timo Teras749c10f2009-01-19 17:22:12 -0800281 if (remote != t->parms.iph.daddr ||
Timo Teras749c10f2009-01-19 17:22:12 -0800282 !(t->dev->flags & IFF_UP))
283 continue;
284
stephen hemmingerd2083282012-09-24 18:12:23 +0000285 if (!ipgre_key_match(&t->parms, flags, key))
286 continue;
287
Timo Teras749c10f2009-01-19 17:22:12 -0800288 if (t->dev->type != ARPHRD_IPGRE &&
289 t->dev->type != dev_type)
290 continue;
291
Timo Terasafcf1242009-01-26 20:56:10 -0800292 score = 0;
Timo Teras749c10f2009-01-19 17:22:12 -0800293 if (t->parms.link != link)
Timo Terasafcf1242009-01-26 20:56:10 -0800294 score |= 1;
Timo Teras749c10f2009-01-19 17:22:12 -0800295 if (t->dev->type != dev_type)
Timo Terasafcf1242009-01-26 20:56:10 -0800296 score |= 2;
297 if (score == 0)
Timo Teras749c10f2009-01-19 17:22:12 -0800298 return t;
Timo Terasafcf1242009-01-26 20:56:10 -0800299
300 if (score < cand_score) {
301 cand = t;
302 cand_score = score;
303 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700304 }
Herbert Xue1a80002008-10-09 12:00:17 -0700305
Eric Dumazet8d5b2c02009-10-23 06:14:38 +0000306 for_each_ip_tunnel_rcu(ign->tunnels_l[h1]) {
Timo Teras749c10f2009-01-19 17:22:12 -0800307 if ((local != t->parms.iph.saddr &&
308 (local != t->parms.iph.daddr ||
309 !ipv4_is_multicast(local))) ||
Timo Teras749c10f2009-01-19 17:22:12 -0800310 !(t->dev->flags & IFF_UP))
311 continue;
312
stephen hemmingerd2083282012-09-24 18:12:23 +0000313 if (!ipgre_key_match(&t->parms, flags, key))
314 continue;
315
Timo Teras749c10f2009-01-19 17:22:12 -0800316 if (t->dev->type != ARPHRD_IPGRE &&
317 t->dev->type != dev_type)
318 continue;
319
Timo Terasafcf1242009-01-26 20:56:10 -0800320 score = 0;
Timo Teras749c10f2009-01-19 17:22:12 -0800321 if (t->parms.link != link)
Timo Terasafcf1242009-01-26 20:56:10 -0800322 score |= 1;
Timo Teras749c10f2009-01-19 17:22:12 -0800323 if (t->dev->type != dev_type)
Timo Terasafcf1242009-01-26 20:56:10 -0800324 score |= 2;
325 if (score == 0)
Timo Teras749c10f2009-01-19 17:22:12 -0800326 return t;
Timo Terasafcf1242009-01-26 20:56:10 -0800327
328 if (score < cand_score) {
329 cand = t;
330 cand_score = score;
331 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700332 }
Herbert Xue1a80002008-10-09 12:00:17 -0700333
Eric Dumazet8d5b2c02009-10-23 06:14:38 +0000334 for_each_ip_tunnel_rcu(ign->tunnels_wc[h1]) {
Timo Teras749c10f2009-01-19 17:22:12 -0800335 if (t->parms.i_key != key ||
336 !(t->dev->flags & IFF_UP))
337 continue;
338
339 if (t->dev->type != ARPHRD_IPGRE &&
340 t->dev->type != dev_type)
341 continue;
342
Timo Terasafcf1242009-01-26 20:56:10 -0800343 score = 0;
Timo Teras749c10f2009-01-19 17:22:12 -0800344 if (t->parms.link != link)
Timo Terasafcf1242009-01-26 20:56:10 -0800345 score |= 1;
Timo Teras749c10f2009-01-19 17:22:12 -0800346 if (t->dev->type != dev_type)
Timo Terasafcf1242009-01-26 20:56:10 -0800347 score |= 2;
348 if (score == 0)
Timo Teras749c10f2009-01-19 17:22:12 -0800349 return t;
Timo Terasafcf1242009-01-26 20:56:10 -0800350
351 if (score < cand_score) {
352 cand = t;
353 cand_score = score;
354 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700355 }
356
Timo Terasafcf1242009-01-26 20:56:10 -0800357 if (cand != NULL)
358 return cand;
Herbert Xue1a80002008-10-09 12:00:17 -0700359
Eric Dumazet8d5b2c02009-10-23 06:14:38 +0000360 dev = ign->fb_tunnel_dev;
361 if (dev->flags & IFF_UP)
362 return netdev_priv(dev);
Timo Teras749c10f2009-01-19 17:22:12 -0800363
Linus Torvalds1da177e2005-04-16 15:20:36 -0700364 return NULL;
365}
366
Eric Dumazet15078502010-09-15 11:07:53 +0000367static struct ip_tunnel __rcu **__ipgre_bucket(struct ipgre_net *ign,
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700368 struct ip_tunnel_parm *parms)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700369{
YOSHIFUJI Hideaki5056a1e2007-04-24 20:44:48 +0900370 __be32 remote = parms->iph.daddr;
371 __be32 local = parms->iph.saddr;
372 __be32 key = parms->i_key;
Eric Dumazet15078502010-09-15 11:07:53 +0000373 unsigned int h = HASH(key);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700374 int prio = 0;
375
376 if (local)
377 prio |= 1;
Joe Perchesf97c1e02007-12-16 13:45:43 -0800378 if (remote && !ipv4_is_multicast(remote)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700379 prio |= 2;
380 h ^= HASH(remote);
381 }
382
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700383 return &ign->tunnels[prio][h];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700384}
385
Eric Dumazet15078502010-09-15 11:07:53 +0000386static inline struct ip_tunnel __rcu **ipgre_bucket(struct ipgre_net *ign,
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700387 struct ip_tunnel *t)
YOSHIFUJI Hideaki5056a1e2007-04-24 20:44:48 +0900388{
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700389 return __ipgre_bucket(ign, &t->parms);
YOSHIFUJI Hideaki5056a1e2007-04-24 20:44:48 +0900390}
391
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700392static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700393{
Eric Dumazet15078502010-09-15 11:07:53 +0000394 struct ip_tunnel __rcu **tp = ipgre_bucket(ign, t);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700395
Eric Dumazet15078502010-09-15 11:07:53 +0000396 rcu_assign_pointer(t->next, rtnl_dereference(*tp));
Eric Dumazet8d5b2c02009-10-23 06:14:38 +0000397 rcu_assign_pointer(*tp, t);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700398}
399
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700400static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700401{
Eric Dumazet15078502010-09-15 11:07:53 +0000402 struct ip_tunnel __rcu **tp;
403 struct ip_tunnel *iter;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700404
Eric Dumazet15078502010-09-15 11:07:53 +0000405 for (tp = ipgre_bucket(ign, t);
406 (iter = rtnl_dereference(*tp)) != NULL;
407 tp = &iter->next) {
408 if (t == iter) {
409 rcu_assign_pointer(*tp, t->next);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700410 break;
411 }
412 }
413}
414
Herbert Xue1a80002008-10-09 12:00:17 -0700415static struct ip_tunnel *ipgre_tunnel_find(struct net *net,
416 struct ip_tunnel_parm *parms,
417 int type)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700418{
Al Virod5a0a1e2006-11-08 00:23:14 -0800419 __be32 remote = parms->iph.daddr;
420 __be32 local = parms->iph.saddr;
421 __be32 key = parms->i_key;
Timo Teras749c10f2009-01-19 17:22:12 -0800422 int link = parms->link;
Eric Dumazet15078502010-09-15 11:07:53 +0000423 struct ip_tunnel *t;
424 struct ip_tunnel __rcu **tp;
Herbert Xue1a80002008-10-09 12:00:17 -0700425 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
426
Eric Dumazet15078502010-09-15 11:07:53 +0000427 for (tp = __ipgre_bucket(ign, parms);
428 (t = rtnl_dereference(*tp)) != NULL;
429 tp = &t->next)
Herbert Xue1a80002008-10-09 12:00:17 -0700430 if (local == t->parms.iph.saddr &&
431 remote == t->parms.iph.daddr &&
432 key == t->parms.i_key &&
Timo Teras749c10f2009-01-19 17:22:12 -0800433 link == t->parms.link &&
Herbert Xue1a80002008-10-09 12:00:17 -0700434 type == t->dev->type)
435 break;
436
437 return t;
438}
439
Eric Dumazet15078502010-09-15 11:07:53 +0000440static struct ip_tunnel *ipgre_tunnel_locate(struct net *net,
Herbert Xue1a80002008-10-09 12:00:17 -0700441 struct ip_tunnel_parm *parms, int create)
442{
443 struct ip_tunnel *t, *nt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700444 struct net_device *dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700445 char name[IFNAMSIZ];
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700446 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700447
Herbert Xue1a80002008-10-09 12:00:17 -0700448 t = ipgre_tunnel_find(net, parms, ARPHRD_IPGRE);
449 if (t || !create)
450 return t;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700451
452 if (parms->name[0])
453 strlcpy(name, parms->name, IFNAMSIZ);
Pavel Emelyanov34cc7ba2008-02-23 20:19:20 -0800454 else
stephen hemminger407d6fc2010-11-29 09:47:47 +0000455 strcpy(name, "gre%d");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700456
457 dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup);
458 if (!dev)
stephen hemminger407d6fc2010-11-29 09:47:47 +0000459 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700460
Pavel Emelyanov0b67ece2008-04-16 01:11:13 -0700461 dev_net_set(dev, net);
462
Patrick McHardy2941a482006-01-08 22:05:26 -0800463 nt = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700464 nt->parms = *parms;
Herbert Xuc19e6542008-10-09 11:59:55 -0700465 dev->rtnl_link_ops = &ipgre_link_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700466
Herbert Xu42aa9162008-10-09 11:59:32 -0700467 dev->mtu = ipgre_tunnel_bind_dev(dev);
468
Pavel Emelyanovb37d428b2008-02-26 23:51:04 -0800469 if (register_netdevice(dev) < 0)
470 goto failed_free;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700471
Willem de Bruijnf2b3ee92012-01-26 10:34:35 +0000472 /* Can use a lockless transmit, unless we generate output sequences */
473 if (!(nt->parms.o_flags & GRE_SEQ))
474 dev->features |= NETIF_F_LLTX;
475
Linus Torvalds1da177e2005-04-16 15:20:36 -0700476 dev_hold(dev);
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700477 ipgre_tunnel_link(ign, nt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700478 return nt;
479
Pavel Emelyanovb37d428b2008-02-26 23:51:04 -0800480failed_free:
481 free_netdev(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700482 return NULL;
483}
484
485static void ipgre_tunnel_uninit(struct net_device *dev)
486{
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700487 struct net *net = dev_net(dev);
488 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
489
490 ipgre_tunnel_unlink(ign, netdev_priv(dev));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700491 dev_put(dev);
492}
493
494
495static void ipgre_err(struct sk_buff *skb, u32 info)
496{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700497
Rami Rosen071f92d2008-05-21 17:47:54 -0700498/* All the routers (except for Linux) return only
Linus Torvalds1da177e2005-04-16 15:20:36 -0700499 8 bytes of packet payload. It means, that precise relaying of
500 ICMP in the real Internet is absolutely infeasible.
501
502 Moreover, Cisco "wise men" put GRE key to the third word
503 in GRE header. It makes impossible maintaining even soft state for keyed
504 GRE tunnels with enabled checksum. Tell them "thank you".
505
506 Well, I wonder, rfc1812 was written by Cisco employee,
stephen hemmingerbff52852012-02-24 08:08:20 +0000507 what the hell these idiots break standards established
508 by themselves???
Linus Torvalds1da177e2005-04-16 15:20:36 -0700509 */
510
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000511 const struct iphdr *iph = (const struct iphdr *)skb->data;
Daniel Baluta5e73ea12012-04-15 01:34:41 +0000512 __be16 *p = (__be16 *)(skb->data+(iph->ihl<<2));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700513 int grehlen = (iph->ihl<<2) + 4;
Arnaldo Carvalho de Melo88c76642007-03-13 14:43:18 -0300514 const int type = icmp_hdr(skb)->type;
515 const int code = icmp_hdr(skb)->code;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700516 struct ip_tunnel *t;
Al Virod5a0a1e2006-11-08 00:23:14 -0800517 __be16 flags;
stephen hemmingerd2083282012-09-24 18:12:23 +0000518 __be32 key = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700519
520 flags = p[0];
521 if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
522 if (flags&(GRE_VERSION|GRE_ROUTING))
523 return;
524 if (flags&GRE_KEY) {
525 grehlen += 4;
526 if (flags&GRE_CSUM)
527 grehlen += 4;
528 }
529 }
530
531 /* If only 8 bytes returned, keyed message will be dropped here */
532 if (skb_headlen(skb) < grehlen)
533 return;
534
stephen hemmingerd2083282012-09-24 18:12:23 +0000535 if (flags & GRE_KEY)
536 key = *(((__be32 *)p) + (grehlen / 4) - 1);
537
Linus Torvalds1da177e2005-04-16 15:20:36 -0700538 switch (type) {
539 default:
540 case ICMP_PARAMETERPROB:
541 return;
542
543 case ICMP_DEST_UNREACH:
544 switch (code) {
545 case ICMP_SR_FAILED:
546 case ICMP_PORT_UNREACH:
547 /* Impossible event. */
548 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700549 default:
550 /* All others are translated to HOST_UNREACH.
551 rfc2003 contains "deep thoughts" about NET_UNREACH,
552 I believe they are just ether pollution. --ANK
553 */
554 break;
555 }
556 break;
557 case ICMP_TIME_EXCEEDED:
558 if (code != ICMP_EXC_TTL)
559 return;
560 break;
David S. Miller55be7a92012-07-11 21:27:49 -0700561
562 case ICMP_REDIRECT:
563 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700564 }
565
Timo Teras749c10f2009-01-19 17:22:12 -0800566 t = ipgre_tunnel_lookup(skb->dev, iph->daddr, iph->saddr,
stephen hemmingerd2083282012-09-24 18:12:23 +0000567 flags, key, p[1]);
568
David S. Miller36393392012-06-14 22:21:46 -0700569 if (t == NULL)
stephen hemminger0c5794a2012-09-24 18:12:24 +0000570 return;
David S. Miller36393392012-06-14 22:21:46 -0700571
572 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
573 ipv4_update_pmtu(skb, dev_net(skb->dev), info,
574 t->parms.link, 0, IPPROTO_GRE, 0);
stephen hemminger0c5794a2012-09-24 18:12:24 +0000575 return;
David S. Miller36393392012-06-14 22:21:46 -0700576 }
David S. Miller55be7a92012-07-11 21:27:49 -0700577 if (type == ICMP_REDIRECT) {
578 ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0,
579 IPPROTO_GRE, 0);
stephen hemminger0c5794a2012-09-24 18:12:24 +0000580 return;
David S. Miller55be7a92012-07-11 21:27:49 -0700581 }
David S. Miller36393392012-06-14 22:21:46 -0700582 if (t->parms.iph.daddr == 0 ||
Joe Perchesf97c1e02007-12-16 13:45:43 -0800583 ipv4_is_multicast(t->parms.iph.daddr))
stephen hemminger0c5794a2012-09-24 18:12:24 +0000584 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700585
586 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
stephen hemminger0c5794a2012-09-24 18:12:24 +0000587 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700588
Wei Yongjunda6185d82009-02-24 23:34:48 -0800589 if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700590 t->err_count++;
591 else
592 t->err_count = 1;
593 t->err_time = jiffies;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700594}
595
Linus Torvalds1da177e2005-04-16 15:20:36 -0700596static inline u8
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000597ipgre_ecn_encapsulate(u8 tos, const struct iphdr *old_iph, struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700598{
599 u8 inner = 0;
600 if (skb->protocol == htons(ETH_P_IP))
601 inner = old_iph->tos;
602 else if (skb->protocol == htons(ETH_P_IPV6))
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000603 inner = ipv6_get_dsfield((const struct ipv6hdr *)old_iph);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700604 return INET_ECN_encapsulate(tos, inner);
605}
606
607static int ipgre_rcv(struct sk_buff *skb)
608{
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000609 const struct iphdr *iph;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700610 u8 *h;
Al Virod5a0a1e2006-11-08 00:23:14 -0800611 __be16 flags;
Al Virod3bc23e2006-11-14 21:24:49 -0800612 __sum16 csum = 0;
Al Virod5a0a1e2006-11-08 00:23:14 -0800613 __be32 key = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700614 u32 seqno = 0;
615 struct ip_tunnel *tunnel;
616 int offset = 4;
Herbert Xue1a80002008-10-09 12:00:17 -0700617 __be16 gre_proto;
stephen hemmingereccc1bb2012-09-25 11:02:48 +0000618 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700619
620 if (!pskb_may_pull(skb, 16))
stephen hemminger0c5794a2012-09-24 18:12:24 +0000621 goto drop;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700622
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700623 iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700624 h = skb->data;
Daniel Baluta5e73ea12012-04-15 01:34:41 +0000625 flags = *(__be16 *)h;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700626
627 if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
628 /* - Version must be 0.
629 - We do not support routing headers.
630 */
631 if (flags&(GRE_VERSION|GRE_ROUTING))
stephen hemminger0c5794a2012-09-24 18:12:24 +0000632 goto drop;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700633
634 if (flags&GRE_CSUM) {
Herbert Xufb286bb2005-11-10 13:01:24 -0800635 switch (skb->ip_summed) {
Patrick McHardy84fa7932006-08-29 16:44:56 -0700636 case CHECKSUM_COMPLETE:
Al Virod3bc23e2006-11-14 21:24:49 -0800637 csum = csum_fold(skb->csum);
Herbert Xufb286bb2005-11-10 13:01:24 -0800638 if (!csum)
639 break;
640 /* fall through */
641 case CHECKSUM_NONE:
642 skb->csum = 0;
643 csum = __skb_checksum_complete(skb);
Patrick McHardy84fa7932006-08-29 16:44:56 -0700644 skb->ip_summed = CHECKSUM_COMPLETE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700645 }
646 offset += 4;
647 }
648 if (flags&GRE_KEY) {
Daniel Baluta5e73ea12012-04-15 01:34:41 +0000649 key = *(__be32 *)(h + offset);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700650 offset += 4;
651 }
652 if (flags&GRE_SEQ) {
Daniel Baluta5e73ea12012-04-15 01:34:41 +0000653 seqno = ntohl(*(__be32 *)(h + offset));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700654 offset += 4;
655 }
656 }
657
Herbert Xue1a80002008-10-09 12:00:17 -0700658 gre_proto = *(__be16 *)(h + 2);
659
stephen hemmingerd2083282012-09-24 18:12:23 +0000660 tunnel = ipgre_tunnel_lookup(skb->dev,
661 iph->saddr, iph->daddr, flags, key,
662 gre_proto);
663 if (tunnel) {
Eric Dumazete985aad2010-09-27 03:57:11 +0000664 struct pcpu_tstats *tstats;
Pavel Emelyanovaddd68e2008-05-21 14:14:22 -0700665
Linus Torvalds1da177e2005-04-16 15:20:36 -0700666 secpath_reset(skb);
667
Herbert Xue1a80002008-10-09 12:00:17 -0700668 skb->protocol = gre_proto;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700669 /* WCCP version 1 and 2 protocol decoding.
670 * - Change protocol to IP
671 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
672 */
Herbert Xue1a80002008-10-09 12:00:17 -0700673 if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) {
YOSHIFUJI Hideaki496c98d2006-10-10 19:41:21 -0700674 skb->protocol = htons(ETH_P_IP);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900675 if ((*(h + offset) & 0xF0) != 0x40)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700676 offset += 4;
677 }
678
Timo Teras1d069162007-12-20 00:10:33 -0800679 skb->mac_header = skb->network_header;
Arnaldo Carvalho de Melo4209fb62007-03-10 18:42:03 -0300680 __pskb_pull(skb, offset);
Arnaldo Carvalho de Melo9c702202007-04-25 18:04:18 -0700681 skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700682 skb->pkt_type = PACKET_HOST;
683#ifdef CONFIG_NET_IPGRE_BROADCAST
Joe Perchesf97c1e02007-12-16 13:45:43 -0800684 if (ipv4_is_multicast(iph->daddr)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700685 /* Looped back packet, drop it! */
David S. Millerc7537962010-11-11 17:07:48 -0800686 if (rt_is_output_route(skb_rtable(skb)))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700687 goto drop;
Eric Dumazete985aad2010-09-27 03:57:11 +0000688 tunnel->dev->stats.multicast++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700689 skb->pkt_type = PACKET_BROADCAST;
690 }
691#endif
692
693 if (((flags&GRE_CSUM) && csum) ||
694 (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
Eric Dumazete985aad2010-09-27 03:57:11 +0000695 tunnel->dev->stats.rx_crc_errors++;
696 tunnel->dev->stats.rx_errors++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700697 goto drop;
698 }
699 if (tunnel->parms.i_flags&GRE_SEQ) {
700 if (!(flags&GRE_SEQ) ||
701 (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
Eric Dumazete985aad2010-09-27 03:57:11 +0000702 tunnel->dev->stats.rx_fifo_errors++;
703 tunnel->dev->stats.rx_errors++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700704 goto drop;
705 }
706 tunnel->i_seqno = seqno + 1;
707 }
Herbert Xue1a80002008-10-09 12:00:17 -0700708
709 /* Warning: All skb pointers will be invalidated! */
710 if (tunnel->dev->type == ARPHRD_ETHER) {
711 if (!pskb_may_pull(skb, ETH_HLEN)) {
Eric Dumazete985aad2010-09-27 03:57:11 +0000712 tunnel->dev->stats.rx_length_errors++;
713 tunnel->dev->stats.rx_errors++;
Herbert Xue1a80002008-10-09 12:00:17 -0700714 goto drop;
715 }
716
717 iph = ip_hdr(skb);
718 skb->protocol = eth_type_trans(skb, tunnel->dev);
719 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
720 }
721
stephen hemmingereccc1bb2012-09-25 11:02:48 +0000722 __skb_tunnel_rx(skb, tunnel->dev);
723
724 skb_reset_network_header(skb);
725 err = IP_ECN_decapsulate(iph, skb);
726 if (unlikely(err)) {
727 if (log_ecn_error)
728 net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
729 &iph->saddr, iph->tos);
730 if (err > 1) {
731 ++tunnel->dev->stats.rx_frame_errors;
732 ++tunnel->dev->stats.rx_errors;
733 goto drop;
734 }
735 }
736
Eric Dumazete985aad2010-09-27 03:57:11 +0000737 tstats = this_cpu_ptr(tunnel->dev->tstats);
stephen hemminger87b6d212012-04-12 06:31:16 +0000738 u64_stats_update_begin(&tstats->syncp);
Eric Dumazete985aad2010-09-27 03:57:11 +0000739 tstats->rx_packets++;
740 tstats->rx_bytes += skb->len;
stephen hemminger87b6d212012-04-12 06:31:16 +0000741 u64_stats_update_end(&tstats->syncp);
Eric Dumazete985aad2010-09-27 03:57:11 +0000742
Eric Dumazet60769a52012-09-27 02:48:50 +0000743 gro_cells_receive(&tunnel->gro_cells, skb);
Eric Dumazet8990f462010-09-20 00:12:11 +0000744 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700745 }
Herbert Xu45af08b2006-04-05 22:31:19 -0700746 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700747
748drop:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700749 kfree_skb(skb);
Eric Dumazeta02cec22010-09-22 20:43:57 +0000750 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700751}
752
Stephen Hemminger6fef4c02009-08-31 19:50:41 +0000753static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700754{
Patrick McHardy2941a482006-01-08 22:05:26 -0800755 struct ip_tunnel *tunnel = netdev_priv(dev);
Eric Dumazete985aad2010-09-27 03:57:11 +0000756 struct pcpu_tstats *tstats;
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000757 const struct iphdr *old_iph = ip_hdr(skb);
758 const struct iphdr *tiph;
David S. Millercbb1e852011-05-04 12:33:34 -0700759 struct flowi4 fl4;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700760 u8 tos;
Al Virod5a0a1e2006-11-08 00:23:14 -0800761 __be16 df;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700762 struct rtable *rt; /* Route to the other host */
Eric Dumazet15078502010-09-15 11:07:53 +0000763 struct net_device *tdev; /* Device to other host */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700764 struct iphdr *iph; /* Our new IP header */
Chuck Leverc2636b42007-10-23 21:07:32 -0700765 unsigned int max_headroom; /* The extra header space needed */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700766 int gre_hlen;
Al Virod5a0a1e2006-11-08 00:23:14 -0800767 __be32 dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700768 int mtu;
769
Eric Dumazet6b78f162012-09-13 21:25:33 +0000770 if (skb->ip_summed == CHECKSUM_PARTIAL &&
771 skb_checksum_help(skb))
772 goto tx_error;
773
Herbert Xue1a80002008-10-09 12:00:17 -0700774 if (dev->type == ARPHRD_ETHER)
775 IPCB(skb)->flags = 0;
776
777 if (dev->header_ops && dev->type == ARPHRD_IPGRE) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700778 gre_hlen = 0;
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000779 tiph = (const struct iphdr *)skb->data;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700780 } else {
781 gre_hlen = tunnel->hlen;
782 tiph = &tunnel->parms.iph;
783 }
784
785 if ((dst = tiph->daddr) == 0) {
786 /* NBMA tunnel */
787
Eric Dumazetadf30902009-06-02 05:19:30 +0000788 if (skb_dst(skb) == NULL) {
Eric Dumazete985aad2010-09-27 03:57:11 +0000789 dev->stats.tx_fifo_errors++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700790 goto tx_error;
791 }
792
David S. Miller61d57f82012-01-24 18:23:30 -0500793 if (skb->protocol == htons(ETH_P_IP)) {
Eric Dumazet511c3f92009-06-02 05:14:27 +0000794 rt = skb_rtable(skb);
David S. Millerf8126f12012-07-13 05:03:45 -0700795 dst = rt_nexthop(rt, old_iph->daddr);
David S. Miller61d57f82012-01-24 18:23:30 -0500796 }
Eric Dumazetdfd56b82011-12-10 09:48:31 +0000797#if IS_ENABLED(CONFIG_IPV6)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700798 else if (skb->protocol == htons(ETH_P_IPV6)) {
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000799 const struct in6_addr *addr6;
David S. Miller0ec88662012-01-27 15:01:08 -0800800 struct neighbour *neigh;
801 bool do_tx_error_icmp;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700802 int addr_type;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700803
David S. Miller0ec88662012-01-27 15:01:08 -0800804 neigh = dst_neigh_lookup(skb_dst(skb), &ipv6_hdr(skb)->daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700805 if (neigh == NULL)
806 goto tx_error;
807
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000808 addr6 = (const struct in6_addr *)&neigh->primary_key;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700809 addr_type = ipv6_addr_type(addr6);
810
811 if (addr_type == IPV6_ADDR_ANY) {
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -0700812 addr6 = &ipv6_hdr(skb)->daddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700813 addr_type = ipv6_addr_type(addr6);
814 }
815
816 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
David S. Miller0ec88662012-01-27 15:01:08 -0800817 do_tx_error_icmp = true;
818 else {
819 do_tx_error_icmp = false;
820 dst = addr6->s6_addr32[3];
821 }
822 neigh_release(neigh);
823 if (do_tx_error_icmp)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700824 goto tx_error_icmp;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700825 }
826#endif
827 else
828 goto tx_error;
829 }
830
831 tos = tiph->tos;
Andreas Jaggiee686ca2009-07-14 09:35:59 -0700832 if (tos == 1) {
833 tos = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700834 if (skb->protocol == htons(ETH_P_IP))
835 tos = old_iph->tos;
Stephen Hemmingerdd4ba832010-07-08 21:35:58 -0700836 else if (skb->protocol == htons(ETH_P_IPV6))
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000837 tos = ipv6_get_dsfield((const struct ipv6hdr *)old_iph);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700838 }
839
David S. Millercbb1e852011-05-04 12:33:34 -0700840 rt = ip_route_output_gre(dev_net(dev), &fl4, dst, tiph->saddr,
David S. Miller78fbfd82011-03-12 00:00:52 -0500841 tunnel->parms.o_key, RT_TOS(tos),
842 tunnel->parms.link);
843 if (IS_ERR(rt)) {
844 dev->stats.tx_carrier_errors++;
845 goto tx_error;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700846 }
Changli Gaod8d1f302010-06-10 23:31:35 -0700847 tdev = rt->dst.dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700848
849 if (tdev == dev) {
850 ip_rt_put(rt);
Eric Dumazete985aad2010-09-27 03:57:11 +0000851 dev->stats.collisions++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700852 goto tx_error;
853 }
854
855 df = tiph->frag_off;
856 if (df)
Changli Gaod8d1f302010-06-10 23:31:35 -0700857 mtu = dst_mtu(&rt->dst) - dev->hard_header_len - tunnel->hlen;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700858 else
Eric Dumazetadf30902009-06-02 05:19:30 +0000859 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700860
Eric Dumazetadf30902009-06-02 05:19:30 +0000861 if (skb_dst(skb))
David S. Miller6700c272012-07-17 03:29:28 -0700862 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700863
864 if (skb->protocol == htons(ETH_P_IP)) {
865 df |= (old_iph->frag_off&htons(IP_DF));
866
867 if ((old_iph->frag_off&htons(IP_DF)) &&
868 mtu < ntohs(old_iph->tot_len)) {
869 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
870 ip_rt_put(rt);
871 goto tx_error;
872 }
873 }
Eric Dumazetdfd56b82011-12-10 09:48:31 +0000874#if IS_ENABLED(CONFIG_IPV6)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700875 else if (skb->protocol == htons(ETH_P_IPV6)) {
Eric Dumazetadf30902009-06-02 05:19:30 +0000876 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700877
Eric Dumazetadf30902009-06-02 05:19:30 +0000878 if (rt6 && mtu < dst_mtu(skb_dst(skb)) && mtu >= IPV6_MIN_MTU) {
Joe Perchesf97c1e02007-12-16 13:45:43 -0800879 if ((tunnel->parms.iph.daddr &&
880 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
Linus Torvalds1da177e2005-04-16 15:20:36 -0700881 rt6->rt6i_dst.plen == 128) {
882 rt6->rt6i_flags |= RTF_MODIFIED;
David S. Millerdefb3512010-12-08 21:16:57 -0800883 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700884 }
885 }
886
887 if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) {
Alexey Dobriyan3ffe5332010-02-18 08:25:24 +0000888 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700889 ip_rt_put(rt);
890 goto tx_error;
891 }
892 }
893#endif
894
895 if (tunnel->err_count > 0) {
Wei Yongjunda6185d82009-02-24 23:34:48 -0800896 if (time_before(jiffies,
897 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700898 tunnel->err_count--;
899
900 dst_link_failure(skb);
901 } else
902 tunnel->err_count = 0;
903 }
904
Changli Gaod8d1f302010-06-10 23:31:35 -0700905 max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + rt->dst.header_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700906
Patrick McHardycfbba492007-07-09 15:33:40 -0700907 if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
908 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700909 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
Herbert Xu805dc1d2011-11-18 02:20:06 +0000910 if (max_headroom > dev->needed_headroom)
911 dev->needed_headroom = max_headroom;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700912 if (!new_skb) {
913 ip_rt_put(rt);
Eric Dumazete985aad2010-09-27 03:57:11 +0000914 dev->stats.tx_dropped++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700915 dev_kfree_skb(skb);
Patrick McHardy6ed10652009-06-23 06:03:08 +0000916 return NETDEV_TX_OK;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700917 }
918 if (skb->sk)
919 skb_set_owner_w(new_skb, skb->sk);
920 dev_kfree_skb(skb);
921 skb = new_skb;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700922 old_iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700923 }
924
Herbert Xu64194c32008-10-09 12:03:17 -0700925 skb_reset_transport_header(skb);
Arnaldo Carvalho de Meloe2d1bca2007-04-10 20:46:21 -0700926 skb_push(skb, gre_hlen);
927 skb_reset_network_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700928 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
Patrick McHardy48d5cad2006-02-15 15:10:22 -0800929 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
930 IPSKB_REROUTED);
Eric Dumazetadf30902009-06-02 05:19:30 +0000931 skb_dst_drop(skb);
Changli Gaod8d1f302010-06-10 23:31:35 -0700932 skb_dst_set(skb, &rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700933
934 /*
935 * Push down and install the IPIP header.
936 */
937
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700938 iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700939 iph->version = 4;
940 iph->ihl = sizeof(struct iphdr) >> 2;
941 iph->frag_off = df;
942 iph->protocol = IPPROTO_GRE;
943 iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb);
David S. Millercbb1e852011-05-04 12:33:34 -0700944 iph->daddr = fl4.daddr;
945 iph->saddr = fl4.saddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700946
947 if ((iph->ttl = tiph->ttl) == 0) {
948 if (skb->protocol == htons(ETH_P_IP))
949 iph->ttl = old_iph->ttl;
Eric Dumazetdfd56b82011-12-10 09:48:31 +0000950#if IS_ENABLED(CONFIG_IPV6)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700951 else if (skb->protocol == htons(ETH_P_IPV6))
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000952 iph->ttl = ((const struct ipv6hdr *)old_iph)->hop_limit;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700953#endif
954 else
David S. Miller323e1262010-12-12 21:55:08 -0800955 iph->ttl = ip4_dst_hoplimit(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700956 }
957
Herbert Xue1a80002008-10-09 12:00:17 -0700958 ((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags;
959 ((__be16 *)(iph + 1))[1] = (dev->type == ARPHRD_ETHER) ?
960 htons(ETH_P_TEB) : skb->protocol;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700961
962 if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
Daniel Baluta5e73ea12012-04-15 01:34:41 +0000963 __be32 *ptr = (__be32 *)(((u8 *)iph) + tunnel->hlen - 4);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700964
965 if (tunnel->parms.o_flags&GRE_SEQ) {
966 ++tunnel->o_seqno;
967 *ptr = htonl(tunnel->o_seqno);
968 ptr--;
969 }
970 if (tunnel->parms.o_flags&GRE_KEY) {
971 *ptr = tunnel->parms.o_key;
972 ptr--;
973 }
974 if (tunnel->parms.o_flags&GRE_CSUM) {
975 *ptr = 0;
Daniel Baluta5e73ea12012-04-15 01:34:41 +0000976 *(__sum16 *)ptr = ip_compute_csum((void *)(iph+1), skb->len - sizeof(struct iphdr));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700977 }
978 }
979
980 nf_reset(skb);
Eric Dumazete985aad2010-09-27 03:57:11 +0000981 tstats = this_cpu_ptr(dev->tstats);
982 __IPTUNNEL_XMIT(tstats, &dev->stats);
Patrick McHardy6ed10652009-06-23 06:03:08 +0000983 return NETDEV_TX_OK;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700984
David S. Miller496053f2012-01-11 16:46:32 -0800985#if IS_ENABLED(CONFIG_IPV6)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700986tx_error_icmp:
987 dst_link_failure(skb);
David S. Miller496053f2012-01-11 16:46:32 -0800988#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700989tx_error:
Eric Dumazete985aad2010-09-27 03:57:11 +0000990 dev->stats.tx_errors++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700991 dev_kfree_skb(skb);
Patrick McHardy6ed10652009-06-23 06:03:08 +0000992 return NETDEV_TX_OK;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700993}
994
Herbert Xu42aa9162008-10-09 11:59:32 -0700995static int ipgre_tunnel_bind_dev(struct net_device *dev)
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800996{
997 struct net_device *tdev = NULL;
998 struct ip_tunnel *tunnel;
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000999 const struct iphdr *iph;
Michal Schmidtee34c1e2007-12-13 09:46:32 -08001000 int hlen = LL_MAX_HEADER;
1001 int mtu = ETH_DATA_LEN;
1002 int addend = sizeof(struct iphdr) + 4;
1003
1004 tunnel = netdev_priv(dev);
1005 iph = &tunnel->parms.iph;
1006
Herbert Xuc95b8192008-10-09 11:58:54 -07001007 /* Guess output device to choose reasonable mtu and needed_headroom */
Michal Schmidtee34c1e2007-12-13 09:46:32 -08001008
1009 if (iph->daddr) {
David S. Millercbb1e852011-05-04 12:33:34 -07001010 struct flowi4 fl4;
1011 struct rtable *rt;
Eric Dumazete985aad2010-09-27 03:57:11 +00001012
David S. Millercbb1e852011-05-04 12:33:34 -07001013 rt = ip_route_output_gre(dev_net(dev), &fl4,
1014 iph->daddr, iph->saddr,
1015 tunnel->parms.o_key,
1016 RT_TOS(iph->tos),
1017 tunnel->parms.link);
David S. Millerb23dd4f2011-03-02 14:31:35 -08001018 if (!IS_ERR(rt)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001019 tdev = rt->dst.dev;
Michal Schmidtee34c1e2007-12-13 09:46:32 -08001020 ip_rt_put(rt);
1021 }
Herbert Xue1a80002008-10-09 12:00:17 -07001022
1023 if (dev->type != ARPHRD_ETHER)
1024 dev->flags |= IFF_POINTOPOINT;
Michal Schmidtee34c1e2007-12-13 09:46:32 -08001025 }
1026
1027 if (!tdev && tunnel->parms.link)
Pavel Emelyanov96635522008-04-16 01:10:44 -07001028 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
Michal Schmidtee34c1e2007-12-13 09:46:32 -08001029
1030 if (tdev) {
Herbert Xuc95b8192008-10-09 11:58:54 -07001031 hlen = tdev->hard_header_len + tdev->needed_headroom;
Michal Schmidtee34c1e2007-12-13 09:46:32 -08001032 mtu = tdev->mtu;
1033 }
1034 dev->iflink = tunnel->parms.link;
1035
1036 /* Precalculate GRE options length */
1037 if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
1038 if (tunnel->parms.o_flags&GRE_CSUM)
1039 addend += 4;
1040 if (tunnel->parms.o_flags&GRE_KEY)
1041 addend += 4;
1042 if (tunnel->parms.o_flags&GRE_SEQ)
1043 addend += 4;
1044 }
Herbert Xuc95b8192008-10-09 11:58:54 -07001045 dev->needed_headroom = addend + hlen;
Tom Goff8cdb0452009-08-14 16:33:56 -07001046 mtu -= dev->hard_header_len + addend;
Herbert Xu42aa9162008-10-09 11:59:32 -07001047
1048 if (mtu < 68)
1049 mtu = 68;
1050
Michal Schmidtee34c1e2007-12-13 09:46:32 -08001051 tunnel->hlen = addend;
1052
Herbert Xu42aa9162008-10-09 11:59:32 -07001053 return mtu;
Michal Schmidtee34c1e2007-12-13 09:46:32 -08001054}
1055
Linus Torvalds1da177e2005-04-16 15:20:36 -07001056static int
1057ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
1058{
1059 int err = 0;
1060 struct ip_tunnel_parm p;
1061 struct ip_tunnel *t;
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -07001062 struct net *net = dev_net(dev);
1063 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001064
1065 switch (cmd) {
1066 case SIOCGETTUNNEL:
1067 t = NULL;
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001068 if (dev == ign->fb_tunnel_dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001069 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
1070 err = -EFAULT;
1071 break;
1072 }
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -07001073 t = ipgre_tunnel_locate(net, &p, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001074 }
1075 if (t == NULL)
Patrick McHardy2941a482006-01-08 22:05:26 -08001076 t = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001077 memcpy(&p, &t->parms, sizeof(p));
1078 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
1079 err = -EFAULT;
1080 break;
1081
1082 case SIOCADDTUNNEL:
1083 case SIOCCHGTUNNEL:
1084 err = -EPERM;
1085 if (!capable(CAP_NET_ADMIN))
1086 goto done;
1087
1088 err = -EFAULT;
1089 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1090 goto done;
1091
1092 err = -EINVAL;
1093 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
1094 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
1095 ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
1096 goto done;
1097 if (p.iph.ttl)
1098 p.iph.frag_off |= htons(IP_DF);
1099
1100 if (!(p.i_flags&GRE_KEY))
1101 p.i_key = 0;
1102 if (!(p.o_flags&GRE_KEY))
1103 p.o_key = 0;
1104
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -07001105 t = ipgre_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001106
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001107 if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001108 if (t != NULL) {
1109 if (t->dev != dev) {
1110 err = -EEXIST;
1111 break;
1112 }
1113 } else {
Eric Dumazet15078502010-09-15 11:07:53 +00001114 unsigned int nflags = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001115
Patrick McHardy2941a482006-01-08 22:05:26 -08001116 t = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001117
Joe Perchesf97c1e02007-12-16 13:45:43 -08001118 if (ipv4_is_multicast(p.iph.daddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001119 nflags = IFF_BROADCAST;
1120 else if (p.iph.daddr)
1121 nflags = IFF_POINTOPOINT;
1122
1123 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
1124 err = -EINVAL;
1125 break;
1126 }
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -07001127 ipgre_tunnel_unlink(ign, t);
Pavel Emelyanov74b0b852010-10-27 05:43:53 +00001128 synchronize_net();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001129 t->parms.iph.saddr = p.iph.saddr;
1130 t->parms.iph.daddr = p.iph.daddr;
1131 t->parms.i_key = p.i_key;
1132 t->parms.o_key = p.o_key;
1133 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1134 memcpy(dev->broadcast, &p.iph.daddr, 4);
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -07001135 ipgre_tunnel_link(ign, t);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001136 netdev_state_change(dev);
1137 }
1138 }
1139
1140 if (t) {
1141 err = 0;
1142 if (cmd == SIOCCHGTUNNEL) {
1143 t->parms.iph.ttl = p.iph.ttl;
1144 t->parms.iph.tos = p.iph.tos;
1145 t->parms.iph.frag_off = p.iph.frag_off;
Michal Schmidtee34c1e2007-12-13 09:46:32 -08001146 if (t->parms.link != p.link) {
1147 t->parms.link = p.link;
Herbert Xu42aa9162008-10-09 11:59:32 -07001148 dev->mtu = ipgre_tunnel_bind_dev(dev);
Michal Schmidtee34c1e2007-12-13 09:46:32 -08001149 netdev_state_change(dev);
1150 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001151 }
1152 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
1153 err = -EFAULT;
1154 } else
1155 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
1156 break;
1157
1158 case SIOCDELTUNNEL:
1159 err = -EPERM;
1160 if (!capable(CAP_NET_ADMIN))
1161 goto done;
1162
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001163 if (dev == ign->fb_tunnel_dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001164 err = -EFAULT;
1165 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1166 goto done;
1167 err = -ENOENT;
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -07001168 if ((t = ipgre_tunnel_locate(net, &p, 0)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001169 goto done;
1170 err = -EPERM;
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001171 if (t == netdev_priv(ign->fb_tunnel_dev))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001172 goto done;
1173 dev = t->dev;
1174 }
Stephen Hemminger22f8cde2007-02-07 00:09:58 -08001175 unregister_netdevice(dev);
1176 err = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001177 break;
1178
1179 default:
1180 err = -EINVAL;
1181 }
1182
1183done:
1184 return err;
1185}
1186
Linus Torvalds1da177e2005-04-16 15:20:36 -07001187static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1188{
Patrick McHardy2941a482006-01-08 22:05:26 -08001189 struct ip_tunnel *tunnel = netdev_priv(dev);
Herbert Xuc95b8192008-10-09 11:58:54 -07001190 if (new_mtu < 68 ||
1191 new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001192 return -EINVAL;
1193 dev->mtu = new_mtu;
1194 return 0;
1195}
1196
Linus Torvalds1da177e2005-04-16 15:20:36 -07001197/* Nice toy. Unfortunately, useless in real life :-)
1198 It allows to construct virtual multiprotocol broadcast "LAN"
1199 over the Internet, provided multicast routing is tuned.
1200
1201
1202 I have no idea was this bicycle invented before me,
1203 so that I had to set ARPHRD_IPGRE to a random value.
1204 I have an impression, that Cisco could make something similar,
1205 but this feature is apparently missing in IOS<=11.2(8).
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001206
Linus Torvalds1da177e2005-04-16 15:20:36 -07001207 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
1208 with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
1209
1210 ping -t 255 224.66.66.66
1211
1212 If nobody answers, mbone does not work.
1213
1214 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
1215 ip addr add 10.66.66.<somewhat>/24 dev Universe
1216 ifconfig Universe up
1217 ifconfig Universe add fe80::<Your_real_addr>/10
1218 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
1219 ftp 10.66.66.66
1220 ...
1221 ftp fec0:6666:6666::193.233.7.65
1222 ...
1223
1224 */
1225
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -07001226static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
1227 unsigned short type,
Eric Dumazet15078502010-09-15 11:07:53 +00001228 const void *daddr, const void *saddr, unsigned int len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001229{
Patrick McHardy2941a482006-01-08 22:05:26 -08001230 struct ip_tunnel *t = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001231 struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
Daniel Baluta5e73ea12012-04-15 01:34:41 +00001232 __be16 *p = (__be16 *)(iph+1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001233
1234 memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
1235 p[0] = t->parms.o_flags;
1236 p[1] = htons(type);
1237
1238 /*
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001239 * Set the source hardware address.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001240 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001241
Linus Torvalds1da177e2005-04-16 15:20:36 -07001242 if (saddr)
1243 memcpy(&iph->saddr, saddr, 4);
Timo Teräs6d55cb92010-03-03 04:01:13 +00001244 if (daddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001245 memcpy(&iph->daddr, daddr, 4);
Timo Teräs6d55cb92010-03-03 04:01:13 +00001246 if (iph->daddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001247 return t->hlen;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001248
Linus Torvalds1da177e2005-04-16 15:20:36 -07001249 return -t->hlen;
1250}
1251
Timo Teras6a5f44d2007-10-23 20:31:53 -07001252static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
1253{
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001254 const struct iphdr *iph = (const struct iphdr *) skb_mac_header(skb);
Timo Teras6a5f44d2007-10-23 20:31:53 -07001255 memcpy(haddr, &iph->saddr, 4);
1256 return 4;
1257}
1258
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -07001259static const struct header_ops ipgre_header_ops = {
1260 .create = ipgre_header,
Timo Teras6a5f44d2007-10-23 20:31:53 -07001261 .parse = ipgre_header_parse,
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -07001262};
1263
Timo Teras6a5f44d2007-10-23 20:31:53 -07001264#ifdef CONFIG_NET_IPGRE_BROADCAST
Linus Torvalds1da177e2005-04-16 15:20:36 -07001265static int ipgre_open(struct net_device *dev)
1266{
Patrick McHardy2941a482006-01-08 22:05:26 -08001267 struct ip_tunnel *t = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001268
Joe Perchesf97c1e02007-12-16 13:45:43 -08001269 if (ipv4_is_multicast(t->parms.iph.daddr)) {
David S. Millercbb1e852011-05-04 12:33:34 -07001270 struct flowi4 fl4;
1271 struct rtable *rt;
Eric Dumazete985aad2010-09-27 03:57:11 +00001272
David S. Millercbb1e852011-05-04 12:33:34 -07001273 rt = ip_route_output_gre(dev_net(dev), &fl4,
1274 t->parms.iph.daddr,
1275 t->parms.iph.saddr,
1276 t->parms.o_key,
1277 RT_TOS(t->parms.iph.tos),
1278 t->parms.link);
David S. Millerb23dd4f2011-03-02 14:31:35 -08001279 if (IS_ERR(rt))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001280 return -EADDRNOTAVAIL;
Changli Gaod8d1f302010-06-10 23:31:35 -07001281 dev = rt->dst.dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001282 ip_rt_put(rt);
Herbert Xue5ed6392005-10-03 14:35:55 -07001283 if (__in_dev_get_rtnl(dev) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001284 return -EADDRNOTAVAIL;
1285 t->mlink = dev->ifindex;
Herbert Xue5ed6392005-10-03 14:35:55 -07001286 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001287 }
1288 return 0;
1289}
1290
1291static int ipgre_close(struct net_device *dev)
1292{
Patrick McHardy2941a482006-01-08 22:05:26 -08001293 struct ip_tunnel *t = netdev_priv(dev);
Stephen Hemmingerb8c26a32008-11-20 20:34:29 -08001294
Joe Perchesf97c1e02007-12-16 13:45:43 -08001295 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
Denis V. Lunev7fee0ca2008-01-21 17:32:38 -08001296 struct in_device *in_dev;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001297 in_dev = inetdev_by_index(dev_net(dev), t->mlink);
Eric Dumazet8723e1b2010-10-19 00:39:26 +00001298 if (in_dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001299 ip_mc_dec_group(in_dev, t->parms.iph.daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001300 }
1301 return 0;
1302}
1303
1304#endif
1305
Stephen Hemmingerb8c26a32008-11-20 20:34:29 -08001306static const struct net_device_ops ipgre_netdev_ops = {
1307 .ndo_init = ipgre_tunnel_init,
1308 .ndo_uninit = ipgre_tunnel_uninit,
1309#ifdef CONFIG_NET_IPGRE_BROADCAST
1310 .ndo_open = ipgre_open,
1311 .ndo_stop = ipgre_close,
1312#endif
1313 .ndo_start_xmit = ipgre_tunnel_xmit,
1314 .ndo_do_ioctl = ipgre_tunnel_ioctl,
1315 .ndo_change_mtu = ipgre_tunnel_change_mtu,
stephen hemminger87b6d212012-04-12 06:31:16 +00001316 .ndo_get_stats64 = ipgre_get_stats64,
Stephen Hemmingerb8c26a32008-11-20 20:34:29 -08001317};
1318
Eric Dumazete985aad2010-09-27 03:57:11 +00001319static void ipgre_dev_free(struct net_device *dev)
1320{
Eric Dumazet60769a52012-09-27 02:48:50 +00001321 struct ip_tunnel *tunnel = netdev_priv(dev);
1322
1323 gro_cells_destroy(&tunnel->gro_cells);
Eric Dumazete985aad2010-09-27 03:57:11 +00001324 free_percpu(dev->tstats);
1325 free_netdev(dev);
1326}
1327
Eric Dumazet6b78f162012-09-13 21:25:33 +00001328#define GRE_FEATURES (NETIF_F_SG | \
1329 NETIF_F_FRAGLIST | \
1330 NETIF_F_HIGHDMA | \
1331 NETIF_F_HW_CSUM)
1332
Linus Torvalds1da177e2005-04-16 15:20:36 -07001333static void ipgre_tunnel_setup(struct net_device *dev)
1334{
Stephen Hemmingerb8c26a32008-11-20 20:34:29 -08001335 dev->netdev_ops = &ipgre_netdev_ops;
Eric Dumazete985aad2010-09-27 03:57:11 +00001336 dev->destructor = ipgre_dev_free;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001337
1338 dev->type = ARPHRD_IPGRE;
Herbert Xuc95b8192008-10-09 11:58:54 -07001339 dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
Kris Katterjohn46f25df2006-01-05 16:35:42 -08001340 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001341 dev->flags = IFF_NOARP;
1342 dev->iflink = 0;
1343 dev->addr_len = 4;
Pavel Emelyanov0b67ece2008-04-16 01:11:13 -07001344 dev->features |= NETIF_F_NETNS_LOCAL;
Eric Dumazet108bfa82009-05-28 22:35:10 +00001345 dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
Eric Dumazet6b78f162012-09-13 21:25:33 +00001346
1347 dev->features |= GRE_FEATURES;
1348 dev->hw_features |= GRE_FEATURES;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001349}
1350
1351static int ipgre_tunnel_init(struct net_device *dev)
1352{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001353 struct ip_tunnel *tunnel;
1354 struct iphdr *iph;
Eric Dumazet60769a52012-09-27 02:48:50 +00001355 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001356
Patrick McHardy2941a482006-01-08 22:05:26 -08001357 tunnel = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001358 iph = &tunnel->parms.iph;
1359
1360 tunnel->dev = dev;
1361 strcpy(tunnel->parms.name, dev->name);
1362
1363 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
1364 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
1365
Linus Torvalds1da177e2005-04-16 15:20:36 -07001366 if (iph->daddr) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001367#ifdef CONFIG_NET_IPGRE_BROADCAST
Joe Perchesf97c1e02007-12-16 13:45:43 -08001368 if (ipv4_is_multicast(iph->daddr)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001369 if (!iph->saddr)
1370 return -EINVAL;
1371 dev->flags = IFF_BROADCAST;
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -07001372 dev->header_ops = &ipgre_header_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001373 }
1374#endif
Michal Schmidtee34c1e2007-12-13 09:46:32 -08001375 } else
Timo Teras6a5f44d2007-10-23 20:31:53 -07001376 dev->header_ops = &ipgre_header_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001377
Eric Dumazete985aad2010-09-27 03:57:11 +00001378 dev->tstats = alloc_percpu(struct pcpu_tstats);
1379 if (!dev->tstats)
1380 return -ENOMEM;
1381
Eric Dumazet60769a52012-09-27 02:48:50 +00001382 err = gro_cells_init(&tunnel->gro_cells, dev);
1383 if (err) {
1384 free_percpu(dev->tstats);
1385 return err;
1386 }
1387
Linus Torvalds1da177e2005-04-16 15:20:36 -07001388 return 0;
1389}
1390
Stephen Hemmingerb8c26a32008-11-20 20:34:29 -08001391static void ipgre_fb_tunnel_init(struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001392{
Patrick McHardy2941a482006-01-08 22:05:26 -08001393 struct ip_tunnel *tunnel = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001394 struct iphdr *iph = &tunnel->parms.iph;
1395
1396 tunnel->dev = dev;
1397 strcpy(tunnel->parms.name, dev->name);
1398
1399 iph->version = 4;
1400 iph->protocol = IPPROTO_GRE;
1401 iph->ihl = 5;
1402 tunnel->hlen = sizeof(struct iphdr) + 4;
1403
1404 dev_hold(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001405}
1406
1407
Dmitry Kozlov00959ad2010-08-21 23:05:39 -07001408static const struct gre_protocol ipgre_protocol = {
1409 .handler = ipgre_rcv,
1410 .err_handler = ipgre_err,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001411};
1412
Eric Dumazeteef6dd62009-10-27 07:07:16 +00001413static void ipgre_destroy_tunnels(struct ipgre_net *ign, struct list_head *head)
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -07001414{
1415 int prio;
1416
1417 for (prio = 0; prio < 4; prio++) {
1418 int h;
1419 for (h = 0; h < HASH_SIZE; h++) {
Eric Dumazet15078502010-09-15 11:07:53 +00001420 struct ip_tunnel *t;
1421
1422 t = rtnl_dereference(ign->tunnels[prio][h]);
Eric Dumazeteef6dd62009-10-27 07:07:16 +00001423
1424 while (t != NULL) {
1425 unregister_netdevice_queue(t->dev, head);
Eric Dumazet15078502010-09-15 11:07:53 +00001426 t = rtnl_dereference(t->next);
Eric Dumazeteef6dd62009-10-27 07:07:16 +00001427 }
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -07001428 }
1429 }
1430}
1431
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00001432static int __net_init ipgre_init_net(struct net *net)
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001433{
Eric W. Biedermancfb8fbf2009-11-29 15:46:13 +00001434 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001435 int err;
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001436
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001437 ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0",
1438 ipgre_tunnel_setup);
1439 if (!ign->fb_tunnel_dev) {
1440 err = -ENOMEM;
1441 goto err_alloc_dev;
1442 }
Alexey Dobriyanbe77e592008-11-23 17:26:26 -08001443 dev_net_set(ign->fb_tunnel_dev, net);
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001444
Stephen Hemmingerb8c26a32008-11-20 20:34:29 -08001445 ipgre_fb_tunnel_init(ign->fb_tunnel_dev);
Herbert Xuc19e6542008-10-09 11:59:55 -07001446 ign->fb_tunnel_dev->rtnl_link_ops = &ipgre_link_ops;
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001447
1448 if ((err = register_netdev(ign->fb_tunnel_dev)))
1449 goto err_reg_dev;
1450
Eric Dumazet3285ee32010-10-30 16:21:28 -07001451 rcu_assign_pointer(ign->tunnels_wc[0],
1452 netdev_priv(ign->fb_tunnel_dev));
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001453 return 0;
1454
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001455err_reg_dev:
Eric Dumazet3285ee32010-10-30 16:21:28 -07001456 ipgre_dev_free(ign->fb_tunnel_dev);
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001457err_alloc_dev:
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001458 return err;
1459}
1460
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00001461static void __net_exit ipgre_exit_net(struct net *net)
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001462{
1463 struct ipgre_net *ign;
Eric Dumazeteef6dd62009-10-27 07:07:16 +00001464 LIST_HEAD(list);
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001465
1466 ign = net_generic(net, ipgre_net_id);
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001467 rtnl_lock();
Eric Dumazeteef6dd62009-10-27 07:07:16 +00001468 ipgre_destroy_tunnels(ign, &list);
1469 unregister_netdevice_many(&list);
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001470 rtnl_unlock();
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001471}
1472
1473static struct pernet_operations ipgre_net_ops = {
1474 .init = ipgre_init_net,
1475 .exit = ipgre_exit_net,
Eric W. Biedermancfb8fbf2009-11-29 15:46:13 +00001476 .id = &ipgre_net_id,
1477 .size = sizeof(struct ipgre_net),
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001478};
Linus Torvalds1da177e2005-04-16 15:20:36 -07001479
Herbert Xuc19e6542008-10-09 11:59:55 -07001480static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
1481{
1482 __be16 flags;
1483
1484 if (!data)
1485 return 0;
1486
1487 flags = 0;
1488 if (data[IFLA_GRE_IFLAGS])
1489 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1490 if (data[IFLA_GRE_OFLAGS])
1491 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1492 if (flags & (GRE_VERSION|GRE_ROUTING))
1493 return -EINVAL;
1494
1495 return 0;
1496}
1497
Herbert Xue1a80002008-10-09 12:00:17 -07001498static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
1499{
1500 __be32 daddr;
1501
1502 if (tb[IFLA_ADDRESS]) {
1503 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1504 return -EINVAL;
1505 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1506 return -EADDRNOTAVAIL;
1507 }
1508
1509 if (!data)
1510 goto out;
1511
1512 if (data[IFLA_GRE_REMOTE]) {
1513 memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
1514 if (!daddr)
1515 return -EINVAL;
1516 }
1517
1518out:
1519 return ipgre_tunnel_validate(tb, data);
1520}
1521
Herbert Xuc19e6542008-10-09 11:59:55 -07001522static void ipgre_netlink_parms(struct nlattr *data[],
1523 struct ip_tunnel_parm *parms)
1524{
Herbert Xu7bb82d92008-10-11 12:20:15 -07001525 memset(parms, 0, sizeof(*parms));
Herbert Xuc19e6542008-10-09 11:59:55 -07001526
1527 parms->iph.protocol = IPPROTO_GRE;
1528
1529 if (!data)
1530 return;
1531
1532 if (data[IFLA_GRE_LINK])
1533 parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1534
1535 if (data[IFLA_GRE_IFLAGS])
1536 parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]);
1537
1538 if (data[IFLA_GRE_OFLAGS])
1539 parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]);
1540
1541 if (data[IFLA_GRE_IKEY])
1542 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1543
1544 if (data[IFLA_GRE_OKEY])
1545 parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1546
1547 if (data[IFLA_GRE_LOCAL])
Patrick McHardy4d74f8b2008-10-10 12:11:06 -07001548 parms->iph.saddr = nla_get_be32(data[IFLA_GRE_LOCAL]);
Herbert Xuc19e6542008-10-09 11:59:55 -07001549
1550 if (data[IFLA_GRE_REMOTE])
Patrick McHardy4d74f8b2008-10-10 12:11:06 -07001551 parms->iph.daddr = nla_get_be32(data[IFLA_GRE_REMOTE]);
Herbert Xuc19e6542008-10-09 11:59:55 -07001552
1553 if (data[IFLA_GRE_TTL])
1554 parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
1555
1556 if (data[IFLA_GRE_TOS])
1557 parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
1558
1559 if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC]))
1560 parms->iph.frag_off = htons(IP_DF);
1561}
1562
Herbert Xue1a80002008-10-09 12:00:17 -07001563static int ipgre_tap_init(struct net_device *dev)
1564{
1565 struct ip_tunnel *tunnel;
1566
1567 tunnel = netdev_priv(dev);
1568
1569 tunnel->dev = dev;
1570 strcpy(tunnel->parms.name, dev->name);
1571
1572 ipgre_tunnel_bind_dev(dev);
1573
Eric Dumazete985aad2010-09-27 03:57:11 +00001574 dev->tstats = alloc_percpu(struct pcpu_tstats);
1575 if (!dev->tstats)
1576 return -ENOMEM;
1577
Herbert Xue1a80002008-10-09 12:00:17 -07001578 return 0;
1579}
1580
Stephen Hemmingerb8c26a32008-11-20 20:34:29 -08001581static const struct net_device_ops ipgre_tap_netdev_ops = {
1582 .ndo_init = ipgre_tap_init,
1583 .ndo_uninit = ipgre_tunnel_uninit,
1584 .ndo_start_xmit = ipgre_tunnel_xmit,
1585 .ndo_set_mac_address = eth_mac_addr,
1586 .ndo_validate_addr = eth_validate_addr,
1587 .ndo_change_mtu = ipgre_tunnel_change_mtu,
stephen hemminger87b6d212012-04-12 06:31:16 +00001588 .ndo_get_stats64 = ipgre_get_stats64,
Stephen Hemmingerb8c26a32008-11-20 20:34:29 -08001589};
1590
Herbert Xue1a80002008-10-09 12:00:17 -07001591static void ipgre_tap_setup(struct net_device *dev)
1592{
1593
1594 ether_setup(dev);
1595
Herbert Xu2e9526b2009-10-30 05:51:48 +00001596 dev->netdev_ops = &ipgre_tap_netdev_ops;
Eric Dumazete985aad2010-09-27 03:57:11 +00001597 dev->destructor = ipgre_dev_free;
Herbert Xue1a80002008-10-09 12:00:17 -07001598
1599 dev->iflink = 0;
1600 dev->features |= NETIF_F_NETNS_LOCAL;
1601}
1602
Eric W. Biederman81adee42009-11-08 00:53:51 -08001603static int ipgre_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[],
Herbert Xuc19e6542008-10-09 11:59:55 -07001604 struct nlattr *data[])
1605{
1606 struct ip_tunnel *nt;
1607 struct net *net = dev_net(dev);
1608 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1609 int mtu;
1610 int err;
1611
1612 nt = netdev_priv(dev);
1613 ipgre_netlink_parms(data, &nt->parms);
1614
Herbert Xue1a80002008-10-09 12:00:17 -07001615 if (ipgre_tunnel_find(net, &nt->parms, dev->type))
Herbert Xuc19e6542008-10-09 11:59:55 -07001616 return -EEXIST;
1617
Herbert Xue1a80002008-10-09 12:00:17 -07001618 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
Danny Kukawkaf2cedb62012-02-15 06:45:39 +00001619 eth_hw_addr_random(dev);
Herbert Xue1a80002008-10-09 12:00:17 -07001620
Herbert Xuc19e6542008-10-09 11:59:55 -07001621 mtu = ipgre_tunnel_bind_dev(dev);
1622 if (!tb[IFLA_MTU])
1623 dev->mtu = mtu;
1624
Eric Dumazetb790e012010-09-27 23:05:47 +00001625 /* Can use a lockless transmit, unless we generate output sequences */
1626 if (!(nt->parms.o_flags & GRE_SEQ))
1627 dev->features |= NETIF_F_LLTX;
1628
Herbert Xuc19e6542008-10-09 11:59:55 -07001629 err = register_netdevice(dev);
1630 if (err)
1631 goto out;
1632
1633 dev_hold(dev);
1634 ipgre_tunnel_link(ign, nt);
1635
1636out:
1637 return err;
1638}
1639
1640static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1641 struct nlattr *data[])
1642{
1643 struct ip_tunnel *t, *nt;
1644 struct net *net = dev_net(dev);
1645 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1646 struct ip_tunnel_parm p;
1647 int mtu;
1648
1649 if (dev == ign->fb_tunnel_dev)
1650 return -EINVAL;
1651
1652 nt = netdev_priv(dev);
1653 ipgre_netlink_parms(data, &p);
1654
1655 t = ipgre_tunnel_locate(net, &p, 0);
1656
1657 if (t) {
1658 if (t->dev != dev)
1659 return -EEXIST;
1660 } else {
Herbert Xuc19e6542008-10-09 11:59:55 -07001661 t = nt;
1662
Herbert Xu2e9526b2009-10-30 05:51:48 +00001663 if (dev->type != ARPHRD_ETHER) {
Eric Dumazet15078502010-09-15 11:07:53 +00001664 unsigned int nflags = 0;
Herbert Xuc19e6542008-10-09 11:59:55 -07001665
Herbert Xu2e9526b2009-10-30 05:51:48 +00001666 if (ipv4_is_multicast(p.iph.daddr))
1667 nflags = IFF_BROADCAST;
1668 else if (p.iph.daddr)
1669 nflags = IFF_POINTOPOINT;
1670
1671 if ((dev->flags ^ nflags) &
1672 (IFF_POINTOPOINT | IFF_BROADCAST))
1673 return -EINVAL;
1674 }
Herbert Xuc19e6542008-10-09 11:59:55 -07001675
1676 ipgre_tunnel_unlink(ign, t);
1677 t->parms.iph.saddr = p.iph.saddr;
1678 t->parms.iph.daddr = p.iph.daddr;
1679 t->parms.i_key = p.i_key;
Herbert Xu2e9526b2009-10-30 05:51:48 +00001680 if (dev->type != ARPHRD_ETHER) {
1681 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1682 memcpy(dev->broadcast, &p.iph.daddr, 4);
1683 }
Herbert Xuc19e6542008-10-09 11:59:55 -07001684 ipgre_tunnel_link(ign, t);
1685 netdev_state_change(dev);
1686 }
1687
1688 t->parms.o_key = p.o_key;
1689 t->parms.iph.ttl = p.iph.ttl;
1690 t->parms.iph.tos = p.iph.tos;
1691 t->parms.iph.frag_off = p.iph.frag_off;
1692
1693 if (t->parms.link != p.link) {
1694 t->parms.link = p.link;
1695 mtu = ipgre_tunnel_bind_dev(dev);
1696 if (!tb[IFLA_MTU])
1697 dev->mtu = mtu;
1698 netdev_state_change(dev);
1699 }
1700
1701 return 0;
1702}
1703
1704static size_t ipgre_get_size(const struct net_device *dev)
1705{
1706 return
1707 /* IFLA_GRE_LINK */
1708 nla_total_size(4) +
1709 /* IFLA_GRE_IFLAGS */
1710 nla_total_size(2) +
1711 /* IFLA_GRE_OFLAGS */
1712 nla_total_size(2) +
1713 /* IFLA_GRE_IKEY */
1714 nla_total_size(4) +
1715 /* IFLA_GRE_OKEY */
1716 nla_total_size(4) +
1717 /* IFLA_GRE_LOCAL */
1718 nla_total_size(4) +
1719 /* IFLA_GRE_REMOTE */
1720 nla_total_size(4) +
1721 /* IFLA_GRE_TTL */
1722 nla_total_size(1) +
1723 /* IFLA_GRE_TOS */
1724 nla_total_size(1) +
1725 /* IFLA_GRE_PMTUDISC */
1726 nla_total_size(1) +
1727 0;
1728}
1729
1730static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1731{
1732 struct ip_tunnel *t = netdev_priv(dev);
1733 struct ip_tunnel_parm *p = &t->parms;
1734
David S. Millerf3756b72012-04-01 20:39:02 -04001735 if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
1736 nla_put_be16(skb, IFLA_GRE_IFLAGS, p->i_flags) ||
1737 nla_put_be16(skb, IFLA_GRE_OFLAGS, p->o_flags) ||
1738 nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
1739 nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
1740 nla_put_be32(skb, IFLA_GRE_LOCAL, p->iph.saddr) ||
1741 nla_put_be32(skb, IFLA_GRE_REMOTE, p->iph.daddr) ||
1742 nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) ||
1743 nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) ||
1744 nla_put_u8(skb, IFLA_GRE_PMTUDISC,
1745 !!(p->iph.frag_off & htons(IP_DF))))
1746 goto nla_put_failure;
Herbert Xuc19e6542008-10-09 11:59:55 -07001747 return 0;
1748
1749nla_put_failure:
1750 return -EMSGSIZE;
1751}
1752
1753static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1754 [IFLA_GRE_LINK] = { .type = NLA_U32 },
1755 [IFLA_GRE_IFLAGS] = { .type = NLA_U16 },
1756 [IFLA_GRE_OFLAGS] = { .type = NLA_U16 },
1757 [IFLA_GRE_IKEY] = { .type = NLA_U32 },
1758 [IFLA_GRE_OKEY] = { .type = NLA_U32 },
Patrick McHardy4d74f8b2008-10-10 12:11:06 -07001759 [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
1760 [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
Herbert Xuc19e6542008-10-09 11:59:55 -07001761 [IFLA_GRE_TTL] = { .type = NLA_U8 },
1762 [IFLA_GRE_TOS] = { .type = NLA_U8 },
1763 [IFLA_GRE_PMTUDISC] = { .type = NLA_U8 },
1764};
1765
1766static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1767 .kind = "gre",
1768 .maxtype = IFLA_GRE_MAX,
1769 .policy = ipgre_policy,
1770 .priv_size = sizeof(struct ip_tunnel),
1771 .setup = ipgre_tunnel_setup,
1772 .validate = ipgre_tunnel_validate,
1773 .newlink = ipgre_newlink,
1774 .changelink = ipgre_changelink,
1775 .get_size = ipgre_get_size,
1776 .fill_info = ipgre_fill_info,
1777};
1778
Herbert Xue1a80002008-10-09 12:00:17 -07001779static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1780 .kind = "gretap",
1781 .maxtype = IFLA_GRE_MAX,
1782 .policy = ipgre_policy,
1783 .priv_size = sizeof(struct ip_tunnel),
1784 .setup = ipgre_tap_setup,
1785 .validate = ipgre_tap_validate,
1786 .newlink = ipgre_newlink,
1787 .changelink = ipgre_changelink,
1788 .get_size = ipgre_get_size,
1789 .fill_info = ipgre_fill_info,
1790};
1791
Linus Torvalds1da177e2005-04-16 15:20:36 -07001792/*
1793 * And now the modules code and kernel interface.
1794 */
1795
1796static int __init ipgre_init(void)
1797{
1798 int err;
1799
Joe Perches058bd4d2012-03-11 18:36:11 +00001800 pr_info("GRE over IPv4 tunneling driver\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001801
Eric W. Biedermancfb8fbf2009-11-29 15:46:13 +00001802 err = register_pernet_device(&ipgre_net_ops);
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001803 if (err < 0)
Alexey Dobriyanc2892f02010-02-16 07:57:44 +00001804 return err;
1805
Dmitry Kozlov00959ad2010-08-21 23:05:39 -07001806 err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
Alexey Dobriyanc2892f02010-02-16 07:57:44 +00001807 if (err < 0) {
Joe Perches058bd4d2012-03-11 18:36:11 +00001808 pr_info("%s: can't add protocol\n", __func__);
Alexey Dobriyanc2892f02010-02-16 07:57:44 +00001809 goto add_proto_failed;
1810 }
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001811
Herbert Xuc19e6542008-10-09 11:59:55 -07001812 err = rtnl_link_register(&ipgre_link_ops);
1813 if (err < 0)
1814 goto rtnl_link_failed;
1815
Herbert Xue1a80002008-10-09 12:00:17 -07001816 err = rtnl_link_register(&ipgre_tap_ops);
1817 if (err < 0)
1818 goto tap_ops_failed;
1819
Herbert Xuc19e6542008-10-09 11:59:55 -07001820out:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001821 return err;
Herbert Xuc19e6542008-10-09 11:59:55 -07001822
Herbert Xue1a80002008-10-09 12:00:17 -07001823tap_ops_failed:
1824 rtnl_link_unregister(&ipgre_link_ops);
Herbert Xuc19e6542008-10-09 11:59:55 -07001825rtnl_link_failed:
Dmitry Kozlov00959ad2010-08-21 23:05:39 -07001826 gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
Alexey Dobriyanc2892f02010-02-16 07:57:44 +00001827add_proto_failed:
1828 unregister_pernet_device(&ipgre_net_ops);
Herbert Xuc19e6542008-10-09 11:59:55 -07001829 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001830}
1831
Alexey Kuznetsovdb445752005-07-30 17:46:44 -07001832static void __exit ipgre_fini(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001833{
Herbert Xue1a80002008-10-09 12:00:17 -07001834 rtnl_link_unregister(&ipgre_tap_ops);
Herbert Xuc19e6542008-10-09 11:59:55 -07001835 rtnl_link_unregister(&ipgre_link_ops);
Dmitry Kozlov00959ad2010-08-21 23:05:39 -07001836 if (gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO) < 0)
Joe Perches058bd4d2012-03-11 18:36:11 +00001837 pr_info("%s: can't remove protocol\n", __func__);
Alexey Dobriyanc2892f02010-02-16 07:57:44 +00001838 unregister_pernet_device(&ipgre_net_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001839}
1840
1841module_init(ipgre_init);
1842module_exit(ipgre_fini);
1843MODULE_LICENSE("GPL");
Patrick McHardy4d74f8b2008-10-10 12:11:06 -07001844MODULE_ALIAS_RTNL_LINK("gre");
1845MODULE_ALIAS_RTNL_LINK("gretap");
Vasiliy Kulikov8909c9a2011-03-02 00:33:13 +03001846MODULE_ALIAS_NETDEV("gre0");