blob: 0d4c3832d490c7b17ae04a54cf8c4cbae1d02b54 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002 * Linux NET3: GRE over IP protocol decoder.
Linus Torvalds1da177e2005-04-16 15:20:36 -07003 *
4 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
Joe Perchesafd465032012-03-12 07:03:32 +000013#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
14
Randy Dunlap4fc268d2006-01-11 12:17:47 -080015#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070016#include <linux/module.h>
17#include <linux/types.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070018#include <linux/kernel.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090019#include <linux/slab.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070020#include <asm/uaccess.h>
21#include <linux/skbuff.h>
22#include <linux/netdevice.h>
23#include <linux/in.h>
24#include <linux/tcp.h>
25#include <linux/udp.h>
26#include <linux/if_arp.h>
27#include <linux/mroute.h>
28#include <linux/init.h>
29#include <linux/in6.h>
30#include <linux/inetdevice.h>
31#include <linux/igmp.h>
32#include <linux/netfilter_ipv4.h>
Herbert Xue1a80002008-10-09 12:00:17 -070033#include <linux/etherdevice.h>
Kris Katterjohn46f25df2006-01-05 16:35:42 -080034#include <linux/if_ether.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070035
36#include <net/sock.h>
37#include <net/ip.h>
38#include <net/icmp.h>
39#include <net/protocol.h>
40#include <net/ipip.h>
41#include <net/arp.h>
42#include <net/checksum.h>
43#include <net/dsfield.h>
44#include <net/inet_ecn.h>
45#include <net/xfrm.h>
Pavel Emelyanov59a4c752008-04-16 01:08:53 -070046#include <net/net_namespace.h>
47#include <net/netns/generic.h>
Herbert Xuc19e6542008-10-09 11:59:55 -070048#include <net/rtnetlink.h>
Dmitry Kozlov00959ad2010-08-21 23:05:39 -070049#include <net/gre.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070050
Eric Dumazetdfd56b82011-12-10 09:48:31 +000051#if IS_ENABLED(CONFIG_IPV6)
Linus Torvalds1da177e2005-04-16 15:20:36 -070052#include <net/ipv6.h>
53#include <net/ip6_fib.h>
54#include <net/ip6_route.h>
55#endif
56
57/*
58 Problems & solutions
59 --------------------
60
61 1. The most important issue is detecting local dead loops.
62 They would cause complete host lockup in transmit, which
63 would be "resolved" by stack overflow or, if queueing is enabled,
64 with infinite looping in net_bh.
65
66 We cannot track such dead loops during route installation,
67 it is infeasible task. The most general solutions would be
68 to keep skb->encapsulation counter (sort of local ttl),
Eric Dumazet6d0722a2010-09-29 23:35:10 -070069 and silently drop packet when it expires. It is a good
stephen hemmingerbff52852012-02-24 08:08:20 +000070 solution, but it supposes maintaining new variable in ALL
Linus Torvalds1da177e2005-04-16 15:20:36 -070071 skb, even if no tunneling is used.
72
Eric Dumazet6d0722a2010-09-29 23:35:10 -070073 Current solution: xmit_recursion breaks dead loops. This is a percpu
74 counter, since when we enter the first ndo_xmit(), cpu migration is
75 forbidden. We force an exit if this counter reaches RECURSION_LIMIT
Linus Torvalds1da177e2005-04-16 15:20:36 -070076
77 2. Networking dead loops would not kill routers, but would really
78 kill network. IP hop limit plays role of "t->recursion" in this case,
79 if we copy it from packet being encapsulated to upper header.
80 It is very good solution, but it introduces two problems:
81
82 - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
83 do not work over tunnels.
84 - traceroute does not work. I planned to relay ICMP from tunnel,
85 so that this problem would be solved and traceroute output
86 would even more informative. This idea appeared to be wrong:
87 only Linux complies to rfc1812 now (yes, guys, Linux is the only
88 true router now :-)), all routers (at least, in neighbourhood of mine)
89 return only 8 bytes of payload. It is the end.
90
91 Hence, if we want that OSPF worked or traceroute said something reasonable,
92 we should search for another solution.
93
94 One of them is to parse packet trying to detect inner encapsulation
95 made by our node. It is difficult or even impossible, especially,
stephen hemmingerbff52852012-02-24 08:08:20 +000096 taking into account fragmentation. TO be short, ttl is not solution at all.
Linus Torvalds1da177e2005-04-16 15:20:36 -070097
98 Current solution: The solution was UNEXPECTEDLY SIMPLE.
99 We force DF flag on tunnels with preconfigured hop limit,
100 that is ALL. :-) Well, it does not remove the problem completely,
101 but exponential growth of network traffic is changed to linear
102 (branches, that exceed pmtu are pruned) and tunnel mtu
stephen hemmingerbff52852012-02-24 08:08:20 +0000103 rapidly degrades to value <68, where looping stops.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700104 Yes, it is not good if there exists a router in the loop,
105 which does not force DF, even when encapsulating packets have DF set.
106 But it is not our problem! Nobody could accuse us, we made
107 all that we could make. Even if it is your gated who injected
108 fatal route to network, even if it were you who configured
109 fatal static route: you are innocent. :-)
110
111
112
113 3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
114 practically identical code. It would be good to glue them
115 together, but it is not very evident, how to make them modular.
116 sit is integral part of IPv6, ipip and gre are naturally modular.
117 We could extract common parts (hash table, ioctl etc)
118 to a separate module (ip_tunnel.c).
119
120 Alexey Kuznetsov.
121 */
122
Herbert Xuc19e6542008-10-09 11:59:55 -0700123static struct rtnl_link_ops ipgre_link_ops __read_mostly;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700124static int ipgre_tunnel_init(struct net_device *dev);
125static void ipgre_tunnel_setup(struct net_device *dev);
Herbert Xu42aa9162008-10-09 11:59:32 -0700126static int ipgre_tunnel_bind_dev(struct net_device *dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700127
128/* Fallback tunnel: no source, no destination, no key, no options */
129
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700130#define HASH_SIZE 16
131
Eric Dumazetf99189b2009-11-17 10:42:49 +0000132static int ipgre_net_id __read_mostly;
Pavel Emelyanov59a4c752008-04-16 01:08:53 -0700133struct ipgre_net {
Eric Dumazet15078502010-09-15 11:07:53 +0000134 struct ip_tunnel __rcu *tunnels[4][HASH_SIZE];
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700135
Pavel Emelyanov7daa0002008-04-16 01:10:05 -0700136 struct net_device *fb_tunnel_dev;
Pavel Emelyanov59a4c752008-04-16 01:08:53 -0700137};
138
Linus Torvalds1da177e2005-04-16 15:20:36 -0700139/* Tunnel hash table */
140
141/*
142 4 hash tables:
143
144 3: (remote,local)
145 2: (remote,*)
146 1: (*,local)
147 0: (*,*)
148
149 We require exact key match i.e. if a key is present in packet
150 it will match only tunnel with the same key; if it is not present,
151 it will match only keyless tunnel.
152
153 All keysless packets, if not matched configured keyless tunnels
154 will match fallback tunnel.
155 */
156
Al Virod5a0a1e2006-11-08 00:23:14 -0800157#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700158
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700159#define tunnels_r_l tunnels[3]
160#define tunnels_r tunnels[2]
161#define tunnels_l tunnels[1]
162#define tunnels_wc tunnels[0]
Eric Dumazet8d5b2c02009-10-23 06:14:38 +0000163/*
Eric Dumazet15078502010-09-15 11:07:53 +0000164 * Locking : hash tables are protected by RCU and RTNL
Eric Dumazet8d5b2c02009-10-23 06:14:38 +0000165 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700166
Eric Dumazet8d5b2c02009-10-23 06:14:38 +0000167#define for_each_ip_tunnel_rcu(start) \
168 for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700169
Eric Dumazete985aad2010-09-27 03:57:11 +0000170/* often modified stats are per cpu, other are shared (netdev->stats) */
171struct pcpu_tstats {
stephen hemminger87b6d212012-04-12 06:31:16 +0000172 u64 rx_packets;
173 u64 rx_bytes;
174 u64 tx_packets;
175 u64 tx_bytes;
176 struct u64_stats_sync syncp;
177};
Eric Dumazete985aad2010-09-27 03:57:11 +0000178
stephen hemminger87b6d212012-04-12 06:31:16 +0000179static struct rtnl_link_stats64 *ipgre_get_stats64(struct net_device *dev,
180 struct rtnl_link_stats64 *tot)
Eric Dumazete985aad2010-09-27 03:57:11 +0000181{
Eric Dumazete985aad2010-09-27 03:57:11 +0000182 int i;
183
184 for_each_possible_cpu(i) {
185 const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
stephen hemminger87b6d212012-04-12 06:31:16 +0000186 u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
187 unsigned int start;
Eric Dumazete985aad2010-09-27 03:57:11 +0000188
stephen hemminger87b6d212012-04-12 06:31:16 +0000189 do {
190 start = u64_stats_fetch_begin_bh(&tstats->syncp);
191 rx_packets = tstats->rx_packets;
192 tx_packets = tstats->tx_packets;
193 rx_bytes = tstats->rx_bytes;
194 tx_bytes = tstats->tx_bytes;
195 } while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
196
197 tot->rx_packets += rx_packets;
198 tot->tx_packets += tx_packets;
199 tot->rx_bytes += rx_bytes;
200 tot->tx_bytes += tx_bytes;
Eric Dumazete985aad2010-09-27 03:57:11 +0000201 }
stephen hemminger87b6d212012-04-12 06:31:16 +0000202
203 tot->multicast = dev->stats.multicast;
204 tot->rx_crc_errors = dev->stats.rx_crc_errors;
205 tot->rx_fifo_errors = dev->stats.rx_fifo_errors;
206 tot->rx_length_errors = dev->stats.rx_length_errors;
207 tot->rx_errors = dev->stats.rx_errors;
208 tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
209 tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
210 tot->tx_dropped = dev->stats.tx_dropped;
211 tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
212 tot->tx_errors = dev->stats.tx_errors;
213
214 return tot;
Eric Dumazete985aad2010-09-27 03:57:11 +0000215}
216
stephen hemmingerd2083282012-09-24 18:12:23 +0000217/* Does key in tunnel parameters match packet */
218static bool ipgre_key_match(const struct ip_tunnel_parm *p,
219 __u32 flags, __be32 key)
220{
221 if (p->i_flags & GRE_KEY) {
222 if (flags & GRE_KEY)
223 return key == p->i_key;
224 else
225 return false; /* key expected, none present */
226 } else
227 return !(flags & GRE_KEY);
228}
229
Linus Torvalds1da177e2005-04-16 15:20:36 -0700230/* Given src, dst and key, find appropriate for input tunnel. */
231
Daniel Baluta5e73ea12012-04-15 01:34:41 +0000232static struct ip_tunnel *ipgre_tunnel_lookup(struct net_device *dev,
233 __be32 remote, __be32 local,
stephen hemmingerd2083282012-09-24 18:12:23 +0000234 __u32 flags, __be32 key,
235 __be16 gre_proto)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700236{
Timo Teras749c10f2009-01-19 17:22:12 -0800237 struct net *net = dev_net(dev);
238 int link = dev->ifindex;
Eric Dumazet15078502010-09-15 11:07:53 +0000239 unsigned int h0 = HASH(remote);
240 unsigned int h1 = HASH(key);
Timo Terasafcf1242009-01-26 20:56:10 -0800241 struct ip_tunnel *t, *cand = NULL;
Pavel Emelyanov7daa0002008-04-16 01:10:05 -0700242 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
Herbert Xue1a80002008-10-09 12:00:17 -0700243 int dev_type = (gre_proto == htons(ETH_P_TEB)) ?
244 ARPHRD_ETHER : ARPHRD_IPGRE;
Timo Terasafcf1242009-01-26 20:56:10 -0800245 int score, cand_score = 4;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700246
Eric Dumazet8d5b2c02009-10-23 06:14:38 +0000247 for_each_ip_tunnel_rcu(ign->tunnels_r_l[h0 ^ h1]) {
Timo Teras749c10f2009-01-19 17:22:12 -0800248 if (local != t->parms.iph.saddr ||
249 remote != t->parms.iph.daddr ||
Timo Teras749c10f2009-01-19 17:22:12 -0800250 !(t->dev->flags & IFF_UP))
251 continue;
252
stephen hemmingerd2083282012-09-24 18:12:23 +0000253 if (!ipgre_key_match(&t->parms, flags, key))
254 continue;
255
Timo Teras749c10f2009-01-19 17:22:12 -0800256 if (t->dev->type != ARPHRD_IPGRE &&
257 t->dev->type != dev_type)
258 continue;
259
Timo Terasafcf1242009-01-26 20:56:10 -0800260 score = 0;
Timo Teras749c10f2009-01-19 17:22:12 -0800261 if (t->parms.link != link)
Timo Terasafcf1242009-01-26 20:56:10 -0800262 score |= 1;
Timo Teras749c10f2009-01-19 17:22:12 -0800263 if (t->dev->type != dev_type)
Timo Terasafcf1242009-01-26 20:56:10 -0800264 score |= 2;
265 if (score == 0)
Timo Teras749c10f2009-01-19 17:22:12 -0800266 return t;
Timo Terasafcf1242009-01-26 20:56:10 -0800267
268 if (score < cand_score) {
269 cand = t;
270 cand_score = score;
271 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700272 }
Herbert Xue1a80002008-10-09 12:00:17 -0700273
Eric Dumazet8d5b2c02009-10-23 06:14:38 +0000274 for_each_ip_tunnel_rcu(ign->tunnels_r[h0 ^ h1]) {
Timo Teras749c10f2009-01-19 17:22:12 -0800275 if (remote != t->parms.iph.daddr ||
Timo Teras749c10f2009-01-19 17:22:12 -0800276 !(t->dev->flags & IFF_UP))
277 continue;
278
stephen hemmingerd2083282012-09-24 18:12:23 +0000279 if (!ipgre_key_match(&t->parms, flags, key))
280 continue;
281
Timo Teras749c10f2009-01-19 17:22:12 -0800282 if (t->dev->type != ARPHRD_IPGRE &&
283 t->dev->type != dev_type)
284 continue;
285
Timo Terasafcf1242009-01-26 20:56:10 -0800286 score = 0;
Timo Teras749c10f2009-01-19 17:22:12 -0800287 if (t->parms.link != link)
Timo Terasafcf1242009-01-26 20:56:10 -0800288 score |= 1;
Timo Teras749c10f2009-01-19 17:22:12 -0800289 if (t->dev->type != dev_type)
Timo Terasafcf1242009-01-26 20:56:10 -0800290 score |= 2;
291 if (score == 0)
Timo Teras749c10f2009-01-19 17:22:12 -0800292 return t;
Timo Terasafcf1242009-01-26 20:56:10 -0800293
294 if (score < cand_score) {
295 cand = t;
296 cand_score = score;
297 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700298 }
Herbert Xue1a80002008-10-09 12:00:17 -0700299
Eric Dumazet8d5b2c02009-10-23 06:14:38 +0000300 for_each_ip_tunnel_rcu(ign->tunnels_l[h1]) {
Timo Teras749c10f2009-01-19 17:22:12 -0800301 if ((local != t->parms.iph.saddr &&
302 (local != t->parms.iph.daddr ||
303 !ipv4_is_multicast(local))) ||
Timo Teras749c10f2009-01-19 17:22:12 -0800304 !(t->dev->flags & IFF_UP))
305 continue;
306
stephen hemmingerd2083282012-09-24 18:12:23 +0000307 if (!ipgre_key_match(&t->parms, flags, key))
308 continue;
309
Timo Teras749c10f2009-01-19 17:22:12 -0800310 if (t->dev->type != ARPHRD_IPGRE &&
311 t->dev->type != dev_type)
312 continue;
313
Timo Terasafcf1242009-01-26 20:56:10 -0800314 score = 0;
Timo Teras749c10f2009-01-19 17:22:12 -0800315 if (t->parms.link != link)
Timo Terasafcf1242009-01-26 20:56:10 -0800316 score |= 1;
Timo Teras749c10f2009-01-19 17:22:12 -0800317 if (t->dev->type != dev_type)
Timo Terasafcf1242009-01-26 20:56:10 -0800318 score |= 2;
319 if (score == 0)
Timo Teras749c10f2009-01-19 17:22:12 -0800320 return t;
Timo Terasafcf1242009-01-26 20:56:10 -0800321
322 if (score < cand_score) {
323 cand = t;
324 cand_score = score;
325 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700326 }
Herbert Xue1a80002008-10-09 12:00:17 -0700327
Eric Dumazet8d5b2c02009-10-23 06:14:38 +0000328 for_each_ip_tunnel_rcu(ign->tunnels_wc[h1]) {
Timo Teras749c10f2009-01-19 17:22:12 -0800329 if (t->parms.i_key != key ||
330 !(t->dev->flags & IFF_UP))
331 continue;
332
333 if (t->dev->type != ARPHRD_IPGRE &&
334 t->dev->type != dev_type)
335 continue;
336
Timo Terasafcf1242009-01-26 20:56:10 -0800337 score = 0;
Timo Teras749c10f2009-01-19 17:22:12 -0800338 if (t->parms.link != link)
Timo Terasafcf1242009-01-26 20:56:10 -0800339 score |= 1;
Timo Teras749c10f2009-01-19 17:22:12 -0800340 if (t->dev->type != dev_type)
Timo Terasafcf1242009-01-26 20:56:10 -0800341 score |= 2;
342 if (score == 0)
Timo Teras749c10f2009-01-19 17:22:12 -0800343 return t;
Timo Terasafcf1242009-01-26 20:56:10 -0800344
345 if (score < cand_score) {
346 cand = t;
347 cand_score = score;
348 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700349 }
350
Timo Terasafcf1242009-01-26 20:56:10 -0800351 if (cand != NULL)
352 return cand;
Herbert Xue1a80002008-10-09 12:00:17 -0700353
Eric Dumazet8d5b2c02009-10-23 06:14:38 +0000354 dev = ign->fb_tunnel_dev;
355 if (dev->flags & IFF_UP)
356 return netdev_priv(dev);
Timo Teras749c10f2009-01-19 17:22:12 -0800357
Linus Torvalds1da177e2005-04-16 15:20:36 -0700358 return NULL;
359}
360
Eric Dumazet15078502010-09-15 11:07:53 +0000361static struct ip_tunnel __rcu **__ipgre_bucket(struct ipgre_net *ign,
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700362 struct ip_tunnel_parm *parms)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700363{
YOSHIFUJI Hideaki5056a1e2007-04-24 20:44:48 +0900364 __be32 remote = parms->iph.daddr;
365 __be32 local = parms->iph.saddr;
366 __be32 key = parms->i_key;
Eric Dumazet15078502010-09-15 11:07:53 +0000367 unsigned int h = HASH(key);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700368 int prio = 0;
369
370 if (local)
371 prio |= 1;
Joe Perchesf97c1e02007-12-16 13:45:43 -0800372 if (remote && !ipv4_is_multicast(remote)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700373 prio |= 2;
374 h ^= HASH(remote);
375 }
376
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700377 return &ign->tunnels[prio][h];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700378}
379
Eric Dumazet15078502010-09-15 11:07:53 +0000380static inline struct ip_tunnel __rcu **ipgre_bucket(struct ipgre_net *ign,
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700381 struct ip_tunnel *t)
YOSHIFUJI Hideaki5056a1e2007-04-24 20:44:48 +0900382{
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700383 return __ipgre_bucket(ign, &t->parms);
YOSHIFUJI Hideaki5056a1e2007-04-24 20:44:48 +0900384}
385
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700386static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700387{
Eric Dumazet15078502010-09-15 11:07:53 +0000388 struct ip_tunnel __rcu **tp = ipgre_bucket(ign, t);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700389
Eric Dumazet15078502010-09-15 11:07:53 +0000390 rcu_assign_pointer(t->next, rtnl_dereference(*tp));
Eric Dumazet8d5b2c02009-10-23 06:14:38 +0000391 rcu_assign_pointer(*tp, t);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700392}
393
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700394static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700395{
Eric Dumazet15078502010-09-15 11:07:53 +0000396 struct ip_tunnel __rcu **tp;
397 struct ip_tunnel *iter;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700398
Eric Dumazet15078502010-09-15 11:07:53 +0000399 for (tp = ipgre_bucket(ign, t);
400 (iter = rtnl_dereference(*tp)) != NULL;
401 tp = &iter->next) {
402 if (t == iter) {
403 rcu_assign_pointer(*tp, t->next);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700404 break;
405 }
406 }
407}
408
Herbert Xue1a80002008-10-09 12:00:17 -0700409static struct ip_tunnel *ipgre_tunnel_find(struct net *net,
410 struct ip_tunnel_parm *parms,
411 int type)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700412{
Al Virod5a0a1e2006-11-08 00:23:14 -0800413 __be32 remote = parms->iph.daddr;
414 __be32 local = parms->iph.saddr;
415 __be32 key = parms->i_key;
Timo Teras749c10f2009-01-19 17:22:12 -0800416 int link = parms->link;
Eric Dumazet15078502010-09-15 11:07:53 +0000417 struct ip_tunnel *t;
418 struct ip_tunnel __rcu **tp;
Herbert Xue1a80002008-10-09 12:00:17 -0700419 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
420
Eric Dumazet15078502010-09-15 11:07:53 +0000421 for (tp = __ipgre_bucket(ign, parms);
422 (t = rtnl_dereference(*tp)) != NULL;
423 tp = &t->next)
Herbert Xue1a80002008-10-09 12:00:17 -0700424 if (local == t->parms.iph.saddr &&
425 remote == t->parms.iph.daddr &&
426 key == t->parms.i_key &&
Timo Teras749c10f2009-01-19 17:22:12 -0800427 link == t->parms.link &&
Herbert Xue1a80002008-10-09 12:00:17 -0700428 type == t->dev->type)
429 break;
430
431 return t;
432}
433
Eric Dumazet15078502010-09-15 11:07:53 +0000434static struct ip_tunnel *ipgre_tunnel_locate(struct net *net,
Herbert Xue1a80002008-10-09 12:00:17 -0700435 struct ip_tunnel_parm *parms, int create)
436{
437 struct ip_tunnel *t, *nt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700438 struct net_device *dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700439 char name[IFNAMSIZ];
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700440 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700441
Herbert Xue1a80002008-10-09 12:00:17 -0700442 t = ipgre_tunnel_find(net, parms, ARPHRD_IPGRE);
443 if (t || !create)
444 return t;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700445
446 if (parms->name[0])
447 strlcpy(name, parms->name, IFNAMSIZ);
Pavel Emelyanov34cc7ba2008-02-23 20:19:20 -0800448 else
stephen hemminger407d6fc2010-11-29 09:47:47 +0000449 strcpy(name, "gre%d");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700450
451 dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup);
452 if (!dev)
stephen hemminger407d6fc2010-11-29 09:47:47 +0000453 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700454
Pavel Emelyanov0b67ece2008-04-16 01:11:13 -0700455 dev_net_set(dev, net);
456
Patrick McHardy2941a482006-01-08 22:05:26 -0800457 nt = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700458 nt->parms = *parms;
Herbert Xuc19e6542008-10-09 11:59:55 -0700459 dev->rtnl_link_ops = &ipgre_link_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700460
Herbert Xu42aa9162008-10-09 11:59:32 -0700461 dev->mtu = ipgre_tunnel_bind_dev(dev);
462
Pavel Emelyanovb37d428b2008-02-26 23:51:04 -0800463 if (register_netdevice(dev) < 0)
464 goto failed_free;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700465
Willem de Bruijnf2b3ee92012-01-26 10:34:35 +0000466 /* Can use a lockless transmit, unless we generate output sequences */
467 if (!(nt->parms.o_flags & GRE_SEQ))
468 dev->features |= NETIF_F_LLTX;
469
Linus Torvalds1da177e2005-04-16 15:20:36 -0700470 dev_hold(dev);
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700471 ipgre_tunnel_link(ign, nt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700472 return nt;
473
Pavel Emelyanovb37d428b2008-02-26 23:51:04 -0800474failed_free:
475 free_netdev(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700476 return NULL;
477}
478
479static void ipgre_tunnel_uninit(struct net_device *dev)
480{
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700481 struct net *net = dev_net(dev);
482 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
483
484 ipgre_tunnel_unlink(ign, netdev_priv(dev));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700485 dev_put(dev);
486}
487
488
489static void ipgre_err(struct sk_buff *skb, u32 info)
490{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700491
Rami Rosen071f92d2008-05-21 17:47:54 -0700492/* All the routers (except for Linux) return only
Linus Torvalds1da177e2005-04-16 15:20:36 -0700493 8 bytes of packet payload. It means, that precise relaying of
494 ICMP in the real Internet is absolutely infeasible.
495
496 Moreover, Cisco "wise men" put GRE key to the third word
497 in GRE header. It makes impossible maintaining even soft state for keyed
498 GRE tunnels with enabled checksum. Tell them "thank you".
499
500 Well, I wonder, rfc1812 was written by Cisco employee,
stephen hemmingerbff52852012-02-24 08:08:20 +0000501 what the hell these idiots break standards established
502 by themselves???
Linus Torvalds1da177e2005-04-16 15:20:36 -0700503 */
504
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000505 const struct iphdr *iph = (const struct iphdr *)skb->data;
Daniel Baluta5e73ea12012-04-15 01:34:41 +0000506 __be16 *p = (__be16 *)(skb->data+(iph->ihl<<2));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700507 int grehlen = (iph->ihl<<2) + 4;
Arnaldo Carvalho de Melo88c76642007-03-13 14:43:18 -0300508 const int type = icmp_hdr(skb)->type;
509 const int code = icmp_hdr(skb)->code;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700510 struct ip_tunnel *t;
Al Virod5a0a1e2006-11-08 00:23:14 -0800511 __be16 flags;
stephen hemmingerd2083282012-09-24 18:12:23 +0000512 __be32 key = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700513
514 flags = p[0];
515 if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
516 if (flags&(GRE_VERSION|GRE_ROUTING))
517 return;
518 if (flags&GRE_KEY) {
519 grehlen += 4;
520 if (flags&GRE_CSUM)
521 grehlen += 4;
522 }
523 }
524
525 /* If only 8 bytes returned, keyed message will be dropped here */
526 if (skb_headlen(skb) < grehlen)
527 return;
528
stephen hemmingerd2083282012-09-24 18:12:23 +0000529 if (flags & GRE_KEY)
530 key = *(((__be32 *)p) + (grehlen / 4) - 1);
531
Linus Torvalds1da177e2005-04-16 15:20:36 -0700532 switch (type) {
533 default:
534 case ICMP_PARAMETERPROB:
535 return;
536
537 case ICMP_DEST_UNREACH:
538 switch (code) {
539 case ICMP_SR_FAILED:
540 case ICMP_PORT_UNREACH:
541 /* Impossible event. */
542 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700543 default:
544 /* All others are translated to HOST_UNREACH.
545 rfc2003 contains "deep thoughts" about NET_UNREACH,
546 I believe they are just ether pollution. --ANK
547 */
548 break;
549 }
550 break;
551 case ICMP_TIME_EXCEEDED:
552 if (code != ICMP_EXC_TTL)
553 return;
554 break;
David S. Miller55be7a92012-07-11 21:27:49 -0700555
556 case ICMP_REDIRECT:
557 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700558 }
559
Eric Dumazet8d5b2c02009-10-23 06:14:38 +0000560 rcu_read_lock();
Timo Teras749c10f2009-01-19 17:22:12 -0800561 t = ipgre_tunnel_lookup(skb->dev, iph->daddr, iph->saddr,
stephen hemmingerd2083282012-09-24 18:12:23 +0000562 flags, key, p[1]);
563
David S. Miller36393392012-06-14 22:21:46 -0700564 if (t == NULL)
565 goto out;
566
567 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
568 ipv4_update_pmtu(skb, dev_net(skb->dev), info,
569 t->parms.link, 0, IPPROTO_GRE, 0);
570 goto out;
571 }
David S. Miller55be7a92012-07-11 21:27:49 -0700572 if (type == ICMP_REDIRECT) {
573 ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0,
574 IPPROTO_GRE, 0);
575 goto out;
576 }
David S. Miller36393392012-06-14 22:21:46 -0700577 if (t->parms.iph.daddr == 0 ||
Joe Perchesf97c1e02007-12-16 13:45:43 -0800578 ipv4_is_multicast(t->parms.iph.daddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700579 goto out;
580
581 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
582 goto out;
583
Wei Yongjunda6185d82009-02-24 23:34:48 -0800584 if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700585 t->err_count++;
586 else
587 t->err_count = 1;
588 t->err_time = jiffies;
589out:
Eric Dumazet8d5b2c02009-10-23 06:14:38 +0000590 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700591}
592
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000593static inline void ipgre_ecn_decapsulate(const struct iphdr *iph, struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700594{
595 if (INET_ECN_is_ce(iph->tos)) {
596 if (skb->protocol == htons(ETH_P_IP)) {
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700597 IP_ECN_set_ce(ip_hdr(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700598 } else if (skb->protocol == htons(ETH_P_IPV6)) {
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -0700599 IP6_ECN_set_ce(ipv6_hdr(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700600 }
601 }
602}
603
604static inline u8
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000605ipgre_ecn_encapsulate(u8 tos, const struct iphdr *old_iph, struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700606{
607 u8 inner = 0;
608 if (skb->protocol == htons(ETH_P_IP))
609 inner = old_iph->tos;
610 else if (skb->protocol == htons(ETH_P_IPV6))
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000611 inner = ipv6_get_dsfield((const struct ipv6hdr *)old_iph);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700612 return INET_ECN_encapsulate(tos, inner);
613}
614
615static int ipgre_rcv(struct sk_buff *skb)
616{
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000617 const struct iphdr *iph;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700618 u8 *h;
Al Virod5a0a1e2006-11-08 00:23:14 -0800619 __be16 flags;
Al Virod3bc23e2006-11-14 21:24:49 -0800620 __sum16 csum = 0;
Al Virod5a0a1e2006-11-08 00:23:14 -0800621 __be32 key = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700622 u32 seqno = 0;
623 struct ip_tunnel *tunnel;
624 int offset = 4;
Herbert Xue1a80002008-10-09 12:00:17 -0700625 __be16 gre_proto;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700626
627 if (!pskb_may_pull(skb, 16))
628 goto drop_nolock;
629
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700630 iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700631 h = skb->data;
Daniel Baluta5e73ea12012-04-15 01:34:41 +0000632 flags = *(__be16 *)h;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700633
634 if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
635 /* - Version must be 0.
636 - We do not support routing headers.
637 */
638 if (flags&(GRE_VERSION|GRE_ROUTING))
639 goto drop_nolock;
640
641 if (flags&GRE_CSUM) {
Herbert Xufb286bb2005-11-10 13:01:24 -0800642 switch (skb->ip_summed) {
Patrick McHardy84fa7932006-08-29 16:44:56 -0700643 case CHECKSUM_COMPLETE:
Al Virod3bc23e2006-11-14 21:24:49 -0800644 csum = csum_fold(skb->csum);
Herbert Xufb286bb2005-11-10 13:01:24 -0800645 if (!csum)
646 break;
647 /* fall through */
648 case CHECKSUM_NONE:
649 skb->csum = 0;
650 csum = __skb_checksum_complete(skb);
Patrick McHardy84fa7932006-08-29 16:44:56 -0700651 skb->ip_summed = CHECKSUM_COMPLETE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700652 }
653 offset += 4;
654 }
655 if (flags&GRE_KEY) {
Daniel Baluta5e73ea12012-04-15 01:34:41 +0000656 key = *(__be32 *)(h + offset);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700657 offset += 4;
658 }
659 if (flags&GRE_SEQ) {
Daniel Baluta5e73ea12012-04-15 01:34:41 +0000660 seqno = ntohl(*(__be32 *)(h + offset));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700661 offset += 4;
662 }
663 }
664
Herbert Xue1a80002008-10-09 12:00:17 -0700665 gre_proto = *(__be16 *)(h + 2);
666
Eric Dumazet8d5b2c02009-10-23 06:14:38 +0000667 rcu_read_lock();
stephen hemmingerd2083282012-09-24 18:12:23 +0000668 tunnel = ipgre_tunnel_lookup(skb->dev,
669 iph->saddr, iph->daddr, flags, key,
670 gre_proto);
671 if (tunnel) {
Eric Dumazete985aad2010-09-27 03:57:11 +0000672 struct pcpu_tstats *tstats;
Pavel Emelyanovaddd68e2008-05-21 14:14:22 -0700673
Linus Torvalds1da177e2005-04-16 15:20:36 -0700674 secpath_reset(skb);
675
Herbert Xue1a80002008-10-09 12:00:17 -0700676 skb->protocol = gre_proto;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700677 /* WCCP version 1 and 2 protocol decoding.
678 * - Change protocol to IP
679 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
680 */
Herbert Xue1a80002008-10-09 12:00:17 -0700681 if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) {
YOSHIFUJI Hideaki496c98d2006-10-10 19:41:21 -0700682 skb->protocol = htons(ETH_P_IP);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900683 if ((*(h + offset) & 0xF0) != 0x40)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700684 offset += 4;
685 }
686
Timo Teras1d069162007-12-20 00:10:33 -0800687 skb->mac_header = skb->network_header;
Arnaldo Carvalho de Melo4209fb62007-03-10 18:42:03 -0300688 __pskb_pull(skb, offset);
Arnaldo Carvalho de Melo9c702202007-04-25 18:04:18 -0700689 skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700690 skb->pkt_type = PACKET_HOST;
691#ifdef CONFIG_NET_IPGRE_BROADCAST
Joe Perchesf97c1e02007-12-16 13:45:43 -0800692 if (ipv4_is_multicast(iph->daddr)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700693 /* Looped back packet, drop it! */
David S. Millerc7537962010-11-11 17:07:48 -0800694 if (rt_is_output_route(skb_rtable(skb)))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700695 goto drop;
Eric Dumazete985aad2010-09-27 03:57:11 +0000696 tunnel->dev->stats.multicast++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700697 skb->pkt_type = PACKET_BROADCAST;
698 }
699#endif
700
701 if (((flags&GRE_CSUM) && csum) ||
702 (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
Eric Dumazete985aad2010-09-27 03:57:11 +0000703 tunnel->dev->stats.rx_crc_errors++;
704 tunnel->dev->stats.rx_errors++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700705 goto drop;
706 }
707 if (tunnel->parms.i_flags&GRE_SEQ) {
708 if (!(flags&GRE_SEQ) ||
709 (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
Eric Dumazete985aad2010-09-27 03:57:11 +0000710 tunnel->dev->stats.rx_fifo_errors++;
711 tunnel->dev->stats.rx_errors++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700712 goto drop;
713 }
714 tunnel->i_seqno = seqno + 1;
715 }
Herbert Xue1a80002008-10-09 12:00:17 -0700716
717 /* Warning: All skb pointers will be invalidated! */
718 if (tunnel->dev->type == ARPHRD_ETHER) {
719 if (!pskb_may_pull(skb, ETH_HLEN)) {
Eric Dumazete985aad2010-09-27 03:57:11 +0000720 tunnel->dev->stats.rx_length_errors++;
721 tunnel->dev->stats.rx_errors++;
Herbert Xue1a80002008-10-09 12:00:17 -0700722 goto drop;
723 }
724
725 iph = ip_hdr(skb);
726 skb->protocol = eth_type_trans(skb, tunnel->dev);
727 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
728 }
729
Eric Dumazete985aad2010-09-27 03:57:11 +0000730 tstats = this_cpu_ptr(tunnel->dev->tstats);
stephen hemminger87b6d212012-04-12 06:31:16 +0000731 u64_stats_update_begin(&tstats->syncp);
Eric Dumazete985aad2010-09-27 03:57:11 +0000732 tstats->rx_packets++;
733 tstats->rx_bytes += skb->len;
stephen hemminger87b6d212012-04-12 06:31:16 +0000734 u64_stats_update_end(&tstats->syncp);
Eric Dumazete985aad2010-09-27 03:57:11 +0000735
736 __skb_tunnel_rx(skb, tunnel->dev);
Herbert Xue1a80002008-10-09 12:00:17 -0700737
738 skb_reset_network_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700739 ipgre_ecn_decapsulate(iph, skb);
Herbert Xue1a80002008-10-09 12:00:17 -0700740
Eric Dumazetcaf586e2010-09-30 21:06:55 +0000741 netif_rx(skb);
Eric Dumazet8990f462010-09-20 00:12:11 +0000742
Eric Dumazet8d5b2c02009-10-23 06:14:38 +0000743 rcu_read_unlock();
Eric Dumazet8990f462010-09-20 00:12:11 +0000744 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700745 }
Herbert Xu45af08b2006-04-05 22:31:19 -0700746 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700747
748drop:
Eric Dumazet8d5b2c02009-10-23 06:14:38 +0000749 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700750drop_nolock:
751 kfree_skb(skb);
Eric Dumazeta02cec22010-09-22 20:43:57 +0000752 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700753}
754
Stephen Hemminger6fef4c02009-08-31 19:50:41 +0000755static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700756{
Patrick McHardy2941a482006-01-08 22:05:26 -0800757 struct ip_tunnel *tunnel = netdev_priv(dev);
Eric Dumazete985aad2010-09-27 03:57:11 +0000758 struct pcpu_tstats *tstats;
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000759 const struct iphdr *old_iph = ip_hdr(skb);
760 const struct iphdr *tiph;
David S. Millercbb1e852011-05-04 12:33:34 -0700761 struct flowi4 fl4;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700762 u8 tos;
Al Virod5a0a1e2006-11-08 00:23:14 -0800763 __be16 df;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700764 struct rtable *rt; /* Route to the other host */
Eric Dumazet15078502010-09-15 11:07:53 +0000765 struct net_device *tdev; /* Device to other host */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700766 struct iphdr *iph; /* Our new IP header */
Chuck Leverc2636b42007-10-23 21:07:32 -0700767 unsigned int max_headroom; /* The extra header space needed */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700768 int gre_hlen;
Al Virod5a0a1e2006-11-08 00:23:14 -0800769 __be32 dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700770 int mtu;
771
Eric Dumazet6b78f162012-09-13 21:25:33 +0000772 if (skb->ip_summed == CHECKSUM_PARTIAL &&
773 skb_checksum_help(skb))
774 goto tx_error;
775
Herbert Xue1a80002008-10-09 12:00:17 -0700776 if (dev->type == ARPHRD_ETHER)
777 IPCB(skb)->flags = 0;
778
779 if (dev->header_ops && dev->type == ARPHRD_IPGRE) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700780 gre_hlen = 0;
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000781 tiph = (const struct iphdr *)skb->data;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700782 } else {
783 gre_hlen = tunnel->hlen;
784 tiph = &tunnel->parms.iph;
785 }
786
787 if ((dst = tiph->daddr) == 0) {
788 /* NBMA tunnel */
789
Eric Dumazetadf30902009-06-02 05:19:30 +0000790 if (skb_dst(skb) == NULL) {
Eric Dumazete985aad2010-09-27 03:57:11 +0000791 dev->stats.tx_fifo_errors++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700792 goto tx_error;
793 }
794
David S. Miller61d57f82012-01-24 18:23:30 -0500795 if (skb->protocol == htons(ETH_P_IP)) {
Eric Dumazet511c3f92009-06-02 05:14:27 +0000796 rt = skb_rtable(skb);
David S. Millerf8126f12012-07-13 05:03:45 -0700797 dst = rt_nexthop(rt, old_iph->daddr);
David S. Miller61d57f82012-01-24 18:23:30 -0500798 }
Eric Dumazetdfd56b82011-12-10 09:48:31 +0000799#if IS_ENABLED(CONFIG_IPV6)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700800 else if (skb->protocol == htons(ETH_P_IPV6)) {
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000801 const struct in6_addr *addr6;
David S. Miller0ec88662012-01-27 15:01:08 -0800802 struct neighbour *neigh;
803 bool do_tx_error_icmp;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700804 int addr_type;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700805
David S. Miller0ec88662012-01-27 15:01:08 -0800806 neigh = dst_neigh_lookup(skb_dst(skb), &ipv6_hdr(skb)->daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700807 if (neigh == NULL)
808 goto tx_error;
809
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000810 addr6 = (const struct in6_addr *)&neigh->primary_key;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700811 addr_type = ipv6_addr_type(addr6);
812
813 if (addr_type == IPV6_ADDR_ANY) {
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -0700814 addr6 = &ipv6_hdr(skb)->daddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700815 addr_type = ipv6_addr_type(addr6);
816 }
817
818 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
David S. Miller0ec88662012-01-27 15:01:08 -0800819 do_tx_error_icmp = true;
820 else {
821 do_tx_error_icmp = false;
822 dst = addr6->s6_addr32[3];
823 }
824 neigh_release(neigh);
825 if (do_tx_error_icmp)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700826 goto tx_error_icmp;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700827 }
828#endif
829 else
830 goto tx_error;
831 }
832
833 tos = tiph->tos;
Andreas Jaggiee686ca2009-07-14 09:35:59 -0700834 if (tos == 1) {
835 tos = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700836 if (skb->protocol == htons(ETH_P_IP))
837 tos = old_iph->tos;
Stephen Hemmingerdd4ba832010-07-08 21:35:58 -0700838 else if (skb->protocol == htons(ETH_P_IPV6))
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000839 tos = ipv6_get_dsfield((const struct ipv6hdr *)old_iph);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700840 }
841
David S. Millercbb1e852011-05-04 12:33:34 -0700842 rt = ip_route_output_gre(dev_net(dev), &fl4, dst, tiph->saddr,
David S. Miller78fbfd82011-03-12 00:00:52 -0500843 tunnel->parms.o_key, RT_TOS(tos),
844 tunnel->parms.link);
845 if (IS_ERR(rt)) {
846 dev->stats.tx_carrier_errors++;
847 goto tx_error;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700848 }
Changli Gaod8d1f302010-06-10 23:31:35 -0700849 tdev = rt->dst.dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700850
851 if (tdev == dev) {
852 ip_rt_put(rt);
Eric Dumazete985aad2010-09-27 03:57:11 +0000853 dev->stats.collisions++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700854 goto tx_error;
855 }
856
857 df = tiph->frag_off;
858 if (df)
Changli Gaod8d1f302010-06-10 23:31:35 -0700859 mtu = dst_mtu(&rt->dst) - dev->hard_header_len - tunnel->hlen;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700860 else
Eric Dumazetadf30902009-06-02 05:19:30 +0000861 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700862
Eric Dumazetadf30902009-06-02 05:19:30 +0000863 if (skb_dst(skb))
David S. Miller6700c272012-07-17 03:29:28 -0700864 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700865
866 if (skb->protocol == htons(ETH_P_IP)) {
867 df |= (old_iph->frag_off&htons(IP_DF));
868
869 if ((old_iph->frag_off&htons(IP_DF)) &&
870 mtu < ntohs(old_iph->tot_len)) {
871 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
872 ip_rt_put(rt);
873 goto tx_error;
874 }
875 }
Eric Dumazetdfd56b82011-12-10 09:48:31 +0000876#if IS_ENABLED(CONFIG_IPV6)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700877 else if (skb->protocol == htons(ETH_P_IPV6)) {
Eric Dumazetadf30902009-06-02 05:19:30 +0000878 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700879
Eric Dumazetadf30902009-06-02 05:19:30 +0000880 if (rt6 && mtu < dst_mtu(skb_dst(skb)) && mtu >= IPV6_MIN_MTU) {
Joe Perchesf97c1e02007-12-16 13:45:43 -0800881 if ((tunnel->parms.iph.daddr &&
882 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
Linus Torvalds1da177e2005-04-16 15:20:36 -0700883 rt6->rt6i_dst.plen == 128) {
884 rt6->rt6i_flags |= RTF_MODIFIED;
David S. Millerdefb3512010-12-08 21:16:57 -0800885 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700886 }
887 }
888
889 if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) {
Alexey Dobriyan3ffe5332010-02-18 08:25:24 +0000890 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700891 ip_rt_put(rt);
892 goto tx_error;
893 }
894 }
895#endif
896
897 if (tunnel->err_count > 0) {
Wei Yongjunda6185d82009-02-24 23:34:48 -0800898 if (time_before(jiffies,
899 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700900 tunnel->err_count--;
901
902 dst_link_failure(skb);
903 } else
904 tunnel->err_count = 0;
905 }
906
Changli Gaod8d1f302010-06-10 23:31:35 -0700907 max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + rt->dst.header_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700908
Patrick McHardycfbba492007-07-09 15:33:40 -0700909 if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
910 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700911 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
Herbert Xu805dc1d2011-11-18 02:20:06 +0000912 if (max_headroom > dev->needed_headroom)
913 dev->needed_headroom = max_headroom;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700914 if (!new_skb) {
915 ip_rt_put(rt);
Eric Dumazete985aad2010-09-27 03:57:11 +0000916 dev->stats.tx_dropped++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700917 dev_kfree_skb(skb);
Patrick McHardy6ed10652009-06-23 06:03:08 +0000918 return NETDEV_TX_OK;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700919 }
920 if (skb->sk)
921 skb_set_owner_w(new_skb, skb->sk);
922 dev_kfree_skb(skb);
923 skb = new_skb;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700924 old_iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700925 }
926
Herbert Xu64194c32008-10-09 12:03:17 -0700927 skb_reset_transport_header(skb);
Arnaldo Carvalho de Meloe2d1bca2007-04-10 20:46:21 -0700928 skb_push(skb, gre_hlen);
929 skb_reset_network_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700930 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
Patrick McHardy48d5cad2006-02-15 15:10:22 -0800931 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
932 IPSKB_REROUTED);
Eric Dumazetadf30902009-06-02 05:19:30 +0000933 skb_dst_drop(skb);
Changli Gaod8d1f302010-06-10 23:31:35 -0700934 skb_dst_set(skb, &rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700935
936 /*
937 * Push down and install the IPIP header.
938 */
939
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700940 iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700941 iph->version = 4;
942 iph->ihl = sizeof(struct iphdr) >> 2;
943 iph->frag_off = df;
944 iph->protocol = IPPROTO_GRE;
945 iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb);
David S. Millercbb1e852011-05-04 12:33:34 -0700946 iph->daddr = fl4.daddr;
947 iph->saddr = fl4.saddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700948
949 if ((iph->ttl = tiph->ttl) == 0) {
950 if (skb->protocol == htons(ETH_P_IP))
951 iph->ttl = old_iph->ttl;
Eric Dumazetdfd56b82011-12-10 09:48:31 +0000952#if IS_ENABLED(CONFIG_IPV6)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700953 else if (skb->protocol == htons(ETH_P_IPV6))
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000954 iph->ttl = ((const struct ipv6hdr *)old_iph)->hop_limit;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700955#endif
956 else
David S. Miller323e1262010-12-12 21:55:08 -0800957 iph->ttl = ip4_dst_hoplimit(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700958 }
959
Herbert Xue1a80002008-10-09 12:00:17 -0700960 ((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags;
961 ((__be16 *)(iph + 1))[1] = (dev->type == ARPHRD_ETHER) ?
962 htons(ETH_P_TEB) : skb->protocol;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700963
964 if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
Daniel Baluta5e73ea12012-04-15 01:34:41 +0000965 __be32 *ptr = (__be32 *)(((u8 *)iph) + tunnel->hlen - 4);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700966
967 if (tunnel->parms.o_flags&GRE_SEQ) {
968 ++tunnel->o_seqno;
969 *ptr = htonl(tunnel->o_seqno);
970 ptr--;
971 }
972 if (tunnel->parms.o_flags&GRE_KEY) {
973 *ptr = tunnel->parms.o_key;
974 ptr--;
975 }
976 if (tunnel->parms.o_flags&GRE_CSUM) {
977 *ptr = 0;
Daniel Baluta5e73ea12012-04-15 01:34:41 +0000978 *(__sum16 *)ptr = ip_compute_csum((void *)(iph+1), skb->len - sizeof(struct iphdr));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700979 }
980 }
981
982 nf_reset(skb);
Eric Dumazete985aad2010-09-27 03:57:11 +0000983 tstats = this_cpu_ptr(dev->tstats);
984 __IPTUNNEL_XMIT(tstats, &dev->stats);
Patrick McHardy6ed10652009-06-23 06:03:08 +0000985 return NETDEV_TX_OK;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700986
David S. Miller496053f2012-01-11 16:46:32 -0800987#if IS_ENABLED(CONFIG_IPV6)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700988tx_error_icmp:
989 dst_link_failure(skb);
David S. Miller496053f2012-01-11 16:46:32 -0800990#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700991tx_error:
Eric Dumazete985aad2010-09-27 03:57:11 +0000992 dev->stats.tx_errors++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700993 dev_kfree_skb(skb);
Patrick McHardy6ed10652009-06-23 06:03:08 +0000994 return NETDEV_TX_OK;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700995}
996
Herbert Xu42aa9162008-10-09 11:59:32 -0700997static int ipgre_tunnel_bind_dev(struct net_device *dev)
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800998{
999 struct net_device *tdev = NULL;
1000 struct ip_tunnel *tunnel;
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001001 const struct iphdr *iph;
Michal Schmidtee34c1e2007-12-13 09:46:32 -08001002 int hlen = LL_MAX_HEADER;
1003 int mtu = ETH_DATA_LEN;
1004 int addend = sizeof(struct iphdr) + 4;
1005
1006 tunnel = netdev_priv(dev);
1007 iph = &tunnel->parms.iph;
1008
Herbert Xuc95b8192008-10-09 11:58:54 -07001009 /* Guess output device to choose reasonable mtu and needed_headroom */
Michal Schmidtee34c1e2007-12-13 09:46:32 -08001010
1011 if (iph->daddr) {
David S. Millercbb1e852011-05-04 12:33:34 -07001012 struct flowi4 fl4;
1013 struct rtable *rt;
Eric Dumazete985aad2010-09-27 03:57:11 +00001014
David S. Millercbb1e852011-05-04 12:33:34 -07001015 rt = ip_route_output_gre(dev_net(dev), &fl4,
1016 iph->daddr, iph->saddr,
1017 tunnel->parms.o_key,
1018 RT_TOS(iph->tos),
1019 tunnel->parms.link);
David S. Millerb23dd4f2011-03-02 14:31:35 -08001020 if (!IS_ERR(rt)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001021 tdev = rt->dst.dev;
Michal Schmidtee34c1e2007-12-13 09:46:32 -08001022 ip_rt_put(rt);
1023 }
Herbert Xue1a80002008-10-09 12:00:17 -07001024
1025 if (dev->type != ARPHRD_ETHER)
1026 dev->flags |= IFF_POINTOPOINT;
Michal Schmidtee34c1e2007-12-13 09:46:32 -08001027 }
1028
1029 if (!tdev && tunnel->parms.link)
Pavel Emelyanov96635522008-04-16 01:10:44 -07001030 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
Michal Schmidtee34c1e2007-12-13 09:46:32 -08001031
1032 if (tdev) {
Herbert Xuc95b8192008-10-09 11:58:54 -07001033 hlen = tdev->hard_header_len + tdev->needed_headroom;
Michal Schmidtee34c1e2007-12-13 09:46:32 -08001034 mtu = tdev->mtu;
1035 }
1036 dev->iflink = tunnel->parms.link;
1037
1038 /* Precalculate GRE options length */
1039 if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
1040 if (tunnel->parms.o_flags&GRE_CSUM)
1041 addend += 4;
1042 if (tunnel->parms.o_flags&GRE_KEY)
1043 addend += 4;
1044 if (tunnel->parms.o_flags&GRE_SEQ)
1045 addend += 4;
1046 }
Herbert Xuc95b8192008-10-09 11:58:54 -07001047 dev->needed_headroom = addend + hlen;
Tom Goff8cdb0452009-08-14 16:33:56 -07001048 mtu -= dev->hard_header_len + addend;
Herbert Xu42aa9162008-10-09 11:59:32 -07001049
1050 if (mtu < 68)
1051 mtu = 68;
1052
Michal Schmidtee34c1e2007-12-13 09:46:32 -08001053 tunnel->hlen = addend;
1054
Herbert Xu42aa9162008-10-09 11:59:32 -07001055 return mtu;
Michal Schmidtee34c1e2007-12-13 09:46:32 -08001056}
1057
Linus Torvalds1da177e2005-04-16 15:20:36 -07001058static int
1059ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
1060{
1061 int err = 0;
1062 struct ip_tunnel_parm p;
1063 struct ip_tunnel *t;
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -07001064 struct net *net = dev_net(dev);
1065 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001066
1067 switch (cmd) {
1068 case SIOCGETTUNNEL:
1069 t = NULL;
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001070 if (dev == ign->fb_tunnel_dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001071 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
1072 err = -EFAULT;
1073 break;
1074 }
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -07001075 t = ipgre_tunnel_locate(net, &p, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001076 }
1077 if (t == NULL)
Patrick McHardy2941a482006-01-08 22:05:26 -08001078 t = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001079 memcpy(&p, &t->parms, sizeof(p));
1080 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
1081 err = -EFAULT;
1082 break;
1083
1084 case SIOCADDTUNNEL:
1085 case SIOCCHGTUNNEL:
1086 err = -EPERM;
1087 if (!capable(CAP_NET_ADMIN))
1088 goto done;
1089
1090 err = -EFAULT;
1091 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1092 goto done;
1093
1094 err = -EINVAL;
1095 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
1096 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
1097 ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
1098 goto done;
1099 if (p.iph.ttl)
1100 p.iph.frag_off |= htons(IP_DF);
1101
1102 if (!(p.i_flags&GRE_KEY))
1103 p.i_key = 0;
1104 if (!(p.o_flags&GRE_KEY))
1105 p.o_key = 0;
1106
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -07001107 t = ipgre_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001108
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001109 if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001110 if (t != NULL) {
1111 if (t->dev != dev) {
1112 err = -EEXIST;
1113 break;
1114 }
1115 } else {
Eric Dumazet15078502010-09-15 11:07:53 +00001116 unsigned int nflags = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001117
Patrick McHardy2941a482006-01-08 22:05:26 -08001118 t = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001119
Joe Perchesf97c1e02007-12-16 13:45:43 -08001120 if (ipv4_is_multicast(p.iph.daddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001121 nflags = IFF_BROADCAST;
1122 else if (p.iph.daddr)
1123 nflags = IFF_POINTOPOINT;
1124
1125 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
1126 err = -EINVAL;
1127 break;
1128 }
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -07001129 ipgre_tunnel_unlink(ign, t);
Pavel Emelyanov74b0b852010-10-27 05:43:53 +00001130 synchronize_net();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001131 t->parms.iph.saddr = p.iph.saddr;
1132 t->parms.iph.daddr = p.iph.daddr;
1133 t->parms.i_key = p.i_key;
1134 t->parms.o_key = p.o_key;
1135 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1136 memcpy(dev->broadcast, &p.iph.daddr, 4);
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -07001137 ipgre_tunnel_link(ign, t);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001138 netdev_state_change(dev);
1139 }
1140 }
1141
1142 if (t) {
1143 err = 0;
1144 if (cmd == SIOCCHGTUNNEL) {
1145 t->parms.iph.ttl = p.iph.ttl;
1146 t->parms.iph.tos = p.iph.tos;
1147 t->parms.iph.frag_off = p.iph.frag_off;
Michal Schmidtee34c1e2007-12-13 09:46:32 -08001148 if (t->parms.link != p.link) {
1149 t->parms.link = p.link;
Herbert Xu42aa9162008-10-09 11:59:32 -07001150 dev->mtu = ipgre_tunnel_bind_dev(dev);
Michal Schmidtee34c1e2007-12-13 09:46:32 -08001151 netdev_state_change(dev);
1152 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001153 }
1154 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
1155 err = -EFAULT;
1156 } else
1157 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
1158 break;
1159
1160 case SIOCDELTUNNEL:
1161 err = -EPERM;
1162 if (!capable(CAP_NET_ADMIN))
1163 goto done;
1164
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001165 if (dev == ign->fb_tunnel_dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001166 err = -EFAULT;
1167 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1168 goto done;
1169 err = -ENOENT;
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -07001170 if ((t = ipgre_tunnel_locate(net, &p, 0)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001171 goto done;
1172 err = -EPERM;
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001173 if (t == netdev_priv(ign->fb_tunnel_dev))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001174 goto done;
1175 dev = t->dev;
1176 }
Stephen Hemminger22f8cde2007-02-07 00:09:58 -08001177 unregister_netdevice(dev);
1178 err = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001179 break;
1180
1181 default:
1182 err = -EINVAL;
1183 }
1184
1185done:
1186 return err;
1187}
1188
Linus Torvalds1da177e2005-04-16 15:20:36 -07001189static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1190{
Patrick McHardy2941a482006-01-08 22:05:26 -08001191 struct ip_tunnel *tunnel = netdev_priv(dev);
Herbert Xuc95b8192008-10-09 11:58:54 -07001192 if (new_mtu < 68 ||
1193 new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001194 return -EINVAL;
1195 dev->mtu = new_mtu;
1196 return 0;
1197}
1198
Linus Torvalds1da177e2005-04-16 15:20:36 -07001199/* Nice toy. Unfortunately, useless in real life :-)
1200 It allows to construct virtual multiprotocol broadcast "LAN"
1201 over the Internet, provided multicast routing is tuned.
1202
1203
1204 I have no idea was this bicycle invented before me,
1205 so that I had to set ARPHRD_IPGRE to a random value.
1206 I have an impression, that Cisco could make something similar,
1207 but this feature is apparently missing in IOS<=11.2(8).
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001208
Linus Torvalds1da177e2005-04-16 15:20:36 -07001209 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
1210 with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
1211
1212 ping -t 255 224.66.66.66
1213
1214 If nobody answers, mbone does not work.
1215
1216 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
1217 ip addr add 10.66.66.<somewhat>/24 dev Universe
1218 ifconfig Universe up
1219 ifconfig Universe add fe80::<Your_real_addr>/10
1220 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
1221 ftp 10.66.66.66
1222 ...
1223 ftp fec0:6666:6666::193.233.7.65
1224 ...
1225
1226 */
1227
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -07001228static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
1229 unsigned short type,
Eric Dumazet15078502010-09-15 11:07:53 +00001230 const void *daddr, const void *saddr, unsigned int len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001231{
Patrick McHardy2941a482006-01-08 22:05:26 -08001232 struct ip_tunnel *t = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001233 struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
Daniel Baluta5e73ea12012-04-15 01:34:41 +00001234 __be16 *p = (__be16 *)(iph+1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001235
1236 memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
1237 p[0] = t->parms.o_flags;
1238 p[1] = htons(type);
1239
1240 /*
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001241 * Set the source hardware address.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001242 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001243
Linus Torvalds1da177e2005-04-16 15:20:36 -07001244 if (saddr)
1245 memcpy(&iph->saddr, saddr, 4);
Timo Teräs6d55cb92010-03-03 04:01:13 +00001246 if (daddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001247 memcpy(&iph->daddr, daddr, 4);
Timo Teräs6d55cb92010-03-03 04:01:13 +00001248 if (iph->daddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001249 return t->hlen;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001250
Linus Torvalds1da177e2005-04-16 15:20:36 -07001251 return -t->hlen;
1252}
1253
Timo Teras6a5f44d2007-10-23 20:31:53 -07001254static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
1255{
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001256 const struct iphdr *iph = (const struct iphdr *) skb_mac_header(skb);
Timo Teras6a5f44d2007-10-23 20:31:53 -07001257 memcpy(haddr, &iph->saddr, 4);
1258 return 4;
1259}
1260
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -07001261static const struct header_ops ipgre_header_ops = {
1262 .create = ipgre_header,
Timo Teras6a5f44d2007-10-23 20:31:53 -07001263 .parse = ipgre_header_parse,
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -07001264};
1265
Timo Teras6a5f44d2007-10-23 20:31:53 -07001266#ifdef CONFIG_NET_IPGRE_BROADCAST
Linus Torvalds1da177e2005-04-16 15:20:36 -07001267static int ipgre_open(struct net_device *dev)
1268{
Patrick McHardy2941a482006-01-08 22:05:26 -08001269 struct ip_tunnel *t = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001270
Joe Perchesf97c1e02007-12-16 13:45:43 -08001271 if (ipv4_is_multicast(t->parms.iph.daddr)) {
David S. Millercbb1e852011-05-04 12:33:34 -07001272 struct flowi4 fl4;
1273 struct rtable *rt;
Eric Dumazete985aad2010-09-27 03:57:11 +00001274
David S. Millercbb1e852011-05-04 12:33:34 -07001275 rt = ip_route_output_gre(dev_net(dev), &fl4,
1276 t->parms.iph.daddr,
1277 t->parms.iph.saddr,
1278 t->parms.o_key,
1279 RT_TOS(t->parms.iph.tos),
1280 t->parms.link);
David S. Millerb23dd4f2011-03-02 14:31:35 -08001281 if (IS_ERR(rt))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001282 return -EADDRNOTAVAIL;
Changli Gaod8d1f302010-06-10 23:31:35 -07001283 dev = rt->dst.dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001284 ip_rt_put(rt);
Herbert Xue5ed6392005-10-03 14:35:55 -07001285 if (__in_dev_get_rtnl(dev) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001286 return -EADDRNOTAVAIL;
1287 t->mlink = dev->ifindex;
Herbert Xue5ed6392005-10-03 14:35:55 -07001288 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001289 }
1290 return 0;
1291}
1292
1293static int ipgre_close(struct net_device *dev)
1294{
Patrick McHardy2941a482006-01-08 22:05:26 -08001295 struct ip_tunnel *t = netdev_priv(dev);
Stephen Hemmingerb8c26a32008-11-20 20:34:29 -08001296
Joe Perchesf97c1e02007-12-16 13:45:43 -08001297 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
Denis V. Lunev7fee0ca2008-01-21 17:32:38 -08001298 struct in_device *in_dev;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001299 in_dev = inetdev_by_index(dev_net(dev), t->mlink);
Eric Dumazet8723e1b2010-10-19 00:39:26 +00001300 if (in_dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001301 ip_mc_dec_group(in_dev, t->parms.iph.daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001302 }
1303 return 0;
1304}
1305
1306#endif
1307
Stephen Hemmingerb8c26a32008-11-20 20:34:29 -08001308static const struct net_device_ops ipgre_netdev_ops = {
1309 .ndo_init = ipgre_tunnel_init,
1310 .ndo_uninit = ipgre_tunnel_uninit,
1311#ifdef CONFIG_NET_IPGRE_BROADCAST
1312 .ndo_open = ipgre_open,
1313 .ndo_stop = ipgre_close,
1314#endif
1315 .ndo_start_xmit = ipgre_tunnel_xmit,
1316 .ndo_do_ioctl = ipgre_tunnel_ioctl,
1317 .ndo_change_mtu = ipgre_tunnel_change_mtu,
stephen hemminger87b6d212012-04-12 06:31:16 +00001318 .ndo_get_stats64 = ipgre_get_stats64,
Stephen Hemmingerb8c26a32008-11-20 20:34:29 -08001319};
1320
Eric Dumazete985aad2010-09-27 03:57:11 +00001321static void ipgre_dev_free(struct net_device *dev)
1322{
1323 free_percpu(dev->tstats);
1324 free_netdev(dev);
1325}
1326
Eric Dumazet6b78f162012-09-13 21:25:33 +00001327#define GRE_FEATURES (NETIF_F_SG | \
1328 NETIF_F_FRAGLIST | \
1329 NETIF_F_HIGHDMA | \
1330 NETIF_F_HW_CSUM)
1331
Linus Torvalds1da177e2005-04-16 15:20:36 -07001332static void ipgre_tunnel_setup(struct net_device *dev)
1333{
Stephen Hemmingerb8c26a32008-11-20 20:34:29 -08001334 dev->netdev_ops = &ipgre_netdev_ops;
Eric Dumazete985aad2010-09-27 03:57:11 +00001335 dev->destructor = ipgre_dev_free;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001336
1337 dev->type = ARPHRD_IPGRE;
Herbert Xuc95b8192008-10-09 11:58:54 -07001338 dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
Kris Katterjohn46f25df2006-01-05 16:35:42 -08001339 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001340 dev->flags = IFF_NOARP;
1341 dev->iflink = 0;
1342 dev->addr_len = 4;
Pavel Emelyanov0b67ece2008-04-16 01:11:13 -07001343 dev->features |= NETIF_F_NETNS_LOCAL;
Eric Dumazet108bfa82009-05-28 22:35:10 +00001344 dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
Eric Dumazet6b78f162012-09-13 21:25:33 +00001345
1346 dev->features |= GRE_FEATURES;
1347 dev->hw_features |= GRE_FEATURES;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001348}
1349
1350static int ipgre_tunnel_init(struct net_device *dev)
1351{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001352 struct ip_tunnel *tunnel;
1353 struct iphdr *iph;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001354
Patrick McHardy2941a482006-01-08 22:05:26 -08001355 tunnel = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001356 iph = &tunnel->parms.iph;
1357
1358 tunnel->dev = dev;
1359 strcpy(tunnel->parms.name, dev->name);
1360
1361 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
1362 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
1363
Linus Torvalds1da177e2005-04-16 15:20:36 -07001364 if (iph->daddr) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001365#ifdef CONFIG_NET_IPGRE_BROADCAST
Joe Perchesf97c1e02007-12-16 13:45:43 -08001366 if (ipv4_is_multicast(iph->daddr)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001367 if (!iph->saddr)
1368 return -EINVAL;
1369 dev->flags = IFF_BROADCAST;
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -07001370 dev->header_ops = &ipgre_header_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001371 }
1372#endif
Michal Schmidtee34c1e2007-12-13 09:46:32 -08001373 } else
Timo Teras6a5f44d2007-10-23 20:31:53 -07001374 dev->header_ops = &ipgre_header_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001375
Eric Dumazete985aad2010-09-27 03:57:11 +00001376 dev->tstats = alloc_percpu(struct pcpu_tstats);
1377 if (!dev->tstats)
1378 return -ENOMEM;
1379
Linus Torvalds1da177e2005-04-16 15:20:36 -07001380 return 0;
1381}
1382
Stephen Hemmingerb8c26a32008-11-20 20:34:29 -08001383static void ipgre_fb_tunnel_init(struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001384{
Patrick McHardy2941a482006-01-08 22:05:26 -08001385 struct ip_tunnel *tunnel = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001386 struct iphdr *iph = &tunnel->parms.iph;
1387
1388 tunnel->dev = dev;
1389 strcpy(tunnel->parms.name, dev->name);
1390
1391 iph->version = 4;
1392 iph->protocol = IPPROTO_GRE;
1393 iph->ihl = 5;
1394 tunnel->hlen = sizeof(struct iphdr) + 4;
1395
1396 dev_hold(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001397}
1398
1399
Dmitry Kozlov00959ad2010-08-21 23:05:39 -07001400static const struct gre_protocol ipgre_protocol = {
1401 .handler = ipgre_rcv,
1402 .err_handler = ipgre_err,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001403};
1404
Eric Dumazeteef6dd62009-10-27 07:07:16 +00001405static void ipgre_destroy_tunnels(struct ipgre_net *ign, struct list_head *head)
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -07001406{
1407 int prio;
1408
1409 for (prio = 0; prio < 4; prio++) {
1410 int h;
1411 for (h = 0; h < HASH_SIZE; h++) {
Eric Dumazet15078502010-09-15 11:07:53 +00001412 struct ip_tunnel *t;
1413
1414 t = rtnl_dereference(ign->tunnels[prio][h]);
Eric Dumazeteef6dd62009-10-27 07:07:16 +00001415
1416 while (t != NULL) {
1417 unregister_netdevice_queue(t->dev, head);
Eric Dumazet15078502010-09-15 11:07:53 +00001418 t = rtnl_dereference(t->next);
Eric Dumazeteef6dd62009-10-27 07:07:16 +00001419 }
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -07001420 }
1421 }
1422}
1423
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00001424static int __net_init ipgre_init_net(struct net *net)
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001425{
Eric W. Biedermancfb8fbf2009-11-29 15:46:13 +00001426 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001427 int err;
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001428
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001429 ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0",
1430 ipgre_tunnel_setup);
1431 if (!ign->fb_tunnel_dev) {
1432 err = -ENOMEM;
1433 goto err_alloc_dev;
1434 }
Alexey Dobriyanbe77e592008-11-23 17:26:26 -08001435 dev_net_set(ign->fb_tunnel_dev, net);
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001436
Stephen Hemmingerb8c26a32008-11-20 20:34:29 -08001437 ipgre_fb_tunnel_init(ign->fb_tunnel_dev);
Herbert Xuc19e6542008-10-09 11:59:55 -07001438 ign->fb_tunnel_dev->rtnl_link_ops = &ipgre_link_ops;
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001439
1440 if ((err = register_netdev(ign->fb_tunnel_dev)))
1441 goto err_reg_dev;
1442
Eric Dumazet3285ee32010-10-30 16:21:28 -07001443 rcu_assign_pointer(ign->tunnels_wc[0],
1444 netdev_priv(ign->fb_tunnel_dev));
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001445 return 0;
1446
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001447err_reg_dev:
Eric Dumazet3285ee32010-10-30 16:21:28 -07001448 ipgre_dev_free(ign->fb_tunnel_dev);
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001449err_alloc_dev:
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001450 return err;
1451}
1452
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00001453static void __net_exit ipgre_exit_net(struct net *net)
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001454{
1455 struct ipgre_net *ign;
Eric Dumazeteef6dd62009-10-27 07:07:16 +00001456 LIST_HEAD(list);
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001457
1458 ign = net_generic(net, ipgre_net_id);
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001459 rtnl_lock();
Eric Dumazeteef6dd62009-10-27 07:07:16 +00001460 ipgre_destroy_tunnels(ign, &list);
1461 unregister_netdevice_many(&list);
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001462 rtnl_unlock();
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001463}
1464
1465static struct pernet_operations ipgre_net_ops = {
1466 .init = ipgre_init_net,
1467 .exit = ipgre_exit_net,
Eric W. Biedermancfb8fbf2009-11-29 15:46:13 +00001468 .id = &ipgre_net_id,
1469 .size = sizeof(struct ipgre_net),
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001470};
Linus Torvalds1da177e2005-04-16 15:20:36 -07001471
Herbert Xuc19e6542008-10-09 11:59:55 -07001472static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
1473{
1474 __be16 flags;
1475
1476 if (!data)
1477 return 0;
1478
1479 flags = 0;
1480 if (data[IFLA_GRE_IFLAGS])
1481 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1482 if (data[IFLA_GRE_OFLAGS])
1483 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1484 if (flags & (GRE_VERSION|GRE_ROUTING))
1485 return -EINVAL;
1486
1487 return 0;
1488}
1489
Herbert Xue1a80002008-10-09 12:00:17 -07001490static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
1491{
1492 __be32 daddr;
1493
1494 if (tb[IFLA_ADDRESS]) {
1495 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1496 return -EINVAL;
1497 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1498 return -EADDRNOTAVAIL;
1499 }
1500
1501 if (!data)
1502 goto out;
1503
1504 if (data[IFLA_GRE_REMOTE]) {
1505 memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
1506 if (!daddr)
1507 return -EINVAL;
1508 }
1509
1510out:
1511 return ipgre_tunnel_validate(tb, data);
1512}
1513
Herbert Xuc19e6542008-10-09 11:59:55 -07001514static void ipgre_netlink_parms(struct nlattr *data[],
1515 struct ip_tunnel_parm *parms)
1516{
Herbert Xu7bb82d92008-10-11 12:20:15 -07001517 memset(parms, 0, sizeof(*parms));
Herbert Xuc19e6542008-10-09 11:59:55 -07001518
1519 parms->iph.protocol = IPPROTO_GRE;
1520
1521 if (!data)
1522 return;
1523
1524 if (data[IFLA_GRE_LINK])
1525 parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1526
1527 if (data[IFLA_GRE_IFLAGS])
1528 parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]);
1529
1530 if (data[IFLA_GRE_OFLAGS])
1531 parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]);
1532
1533 if (data[IFLA_GRE_IKEY])
1534 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1535
1536 if (data[IFLA_GRE_OKEY])
1537 parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1538
1539 if (data[IFLA_GRE_LOCAL])
Patrick McHardy4d74f8b2008-10-10 12:11:06 -07001540 parms->iph.saddr = nla_get_be32(data[IFLA_GRE_LOCAL]);
Herbert Xuc19e6542008-10-09 11:59:55 -07001541
1542 if (data[IFLA_GRE_REMOTE])
Patrick McHardy4d74f8b2008-10-10 12:11:06 -07001543 parms->iph.daddr = nla_get_be32(data[IFLA_GRE_REMOTE]);
Herbert Xuc19e6542008-10-09 11:59:55 -07001544
1545 if (data[IFLA_GRE_TTL])
1546 parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
1547
1548 if (data[IFLA_GRE_TOS])
1549 parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
1550
1551 if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC]))
1552 parms->iph.frag_off = htons(IP_DF);
1553}
1554
Herbert Xue1a80002008-10-09 12:00:17 -07001555static int ipgre_tap_init(struct net_device *dev)
1556{
1557 struct ip_tunnel *tunnel;
1558
1559 tunnel = netdev_priv(dev);
1560
1561 tunnel->dev = dev;
1562 strcpy(tunnel->parms.name, dev->name);
1563
1564 ipgre_tunnel_bind_dev(dev);
1565
Eric Dumazete985aad2010-09-27 03:57:11 +00001566 dev->tstats = alloc_percpu(struct pcpu_tstats);
1567 if (!dev->tstats)
1568 return -ENOMEM;
1569
Herbert Xue1a80002008-10-09 12:00:17 -07001570 return 0;
1571}
1572
Stephen Hemmingerb8c26a32008-11-20 20:34:29 -08001573static const struct net_device_ops ipgre_tap_netdev_ops = {
1574 .ndo_init = ipgre_tap_init,
1575 .ndo_uninit = ipgre_tunnel_uninit,
1576 .ndo_start_xmit = ipgre_tunnel_xmit,
1577 .ndo_set_mac_address = eth_mac_addr,
1578 .ndo_validate_addr = eth_validate_addr,
1579 .ndo_change_mtu = ipgre_tunnel_change_mtu,
stephen hemminger87b6d212012-04-12 06:31:16 +00001580 .ndo_get_stats64 = ipgre_get_stats64,
Stephen Hemmingerb8c26a32008-11-20 20:34:29 -08001581};
1582
Herbert Xue1a80002008-10-09 12:00:17 -07001583static void ipgre_tap_setup(struct net_device *dev)
1584{
1585
1586 ether_setup(dev);
1587
Herbert Xu2e9526b2009-10-30 05:51:48 +00001588 dev->netdev_ops = &ipgre_tap_netdev_ops;
Eric Dumazete985aad2010-09-27 03:57:11 +00001589 dev->destructor = ipgre_dev_free;
Herbert Xue1a80002008-10-09 12:00:17 -07001590
1591 dev->iflink = 0;
1592 dev->features |= NETIF_F_NETNS_LOCAL;
1593}
1594
Eric W. Biederman81adee42009-11-08 00:53:51 -08001595static int ipgre_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[],
Herbert Xuc19e6542008-10-09 11:59:55 -07001596 struct nlattr *data[])
1597{
1598 struct ip_tunnel *nt;
1599 struct net *net = dev_net(dev);
1600 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1601 int mtu;
1602 int err;
1603
1604 nt = netdev_priv(dev);
1605 ipgre_netlink_parms(data, &nt->parms);
1606
Herbert Xue1a80002008-10-09 12:00:17 -07001607 if (ipgre_tunnel_find(net, &nt->parms, dev->type))
Herbert Xuc19e6542008-10-09 11:59:55 -07001608 return -EEXIST;
1609
Herbert Xue1a80002008-10-09 12:00:17 -07001610 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
Danny Kukawkaf2cedb62012-02-15 06:45:39 +00001611 eth_hw_addr_random(dev);
Herbert Xue1a80002008-10-09 12:00:17 -07001612
Herbert Xuc19e6542008-10-09 11:59:55 -07001613 mtu = ipgre_tunnel_bind_dev(dev);
1614 if (!tb[IFLA_MTU])
1615 dev->mtu = mtu;
1616
Eric Dumazetb790e012010-09-27 23:05:47 +00001617 /* Can use a lockless transmit, unless we generate output sequences */
1618 if (!(nt->parms.o_flags & GRE_SEQ))
1619 dev->features |= NETIF_F_LLTX;
1620
Herbert Xuc19e6542008-10-09 11:59:55 -07001621 err = register_netdevice(dev);
1622 if (err)
1623 goto out;
1624
1625 dev_hold(dev);
1626 ipgre_tunnel_link(ign, nt);
1627
1628out:
1629 return err;
1630}
1631
1632static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1633 struct nlattr *data[])
1634{
1635 struct ip_tunnel *t, *nt;
1636 struct net *net = dev_net(dev);
1637 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1638 struct ip_tunnel_parm p;
1639 int mtu;
1640
1641 if (dev == ign->fb_tunnel_dev)
1642 return -EINVAL;
1643
1644 nt = netdev_priv(dev);
1645 ipgre_netlink_parms(data, &p);
1646
1647 t = ipgre_tunnel_locate(net, &p, 0);
1648
1649 if (t) {
1650 if (t->dev != dev)
1651 return -EEXIST;
1652 } else {
Herbert Xuc19e6542008-10-09 11:59:55 -07001653 t = nt;
1654
Herbert Xu2e9526b2009-10-30 05:51:48 +00001655 if (dev->type != ARPHRD_ETHER) {
Eric Dumazet15078502010-09-15 11:07:53 +00001656 unsigned int nflags = 0;
Herbert Xuc19e6542008-10-09 11:59:55 -07001657
Herbert Xu2e9526b2009-10-30 05:51:48 +00001658 if (ipv4_is_multicast(p.iph.daddr))
1659 nflags = IFF_BROADCAST;
1660 else if (p.iph.daddr)
1661 nflags = IFF_POINTOPOINT;
1662
1663 if ((dev->flags ^ nflags) &
1664 (IFF_POINTOPOINT | IFF_BROADCAST))
1665 return -EINVAL;
1666 }
Herbert Xuc19e6542008-10-09 11:59:55 -07001667
1668 ipgre_tunnel_unlink(ign, t);
1669 t->parms.iph.saddr = p.iph.saddr;
1670 t->parms.iph.daddr = p.iph.daddr;
1671 t->parms.i_key = p.i_key;
Herbert Xu2e9526b2009-10-30 05:51:48 +00001672 if (dev->type != ARPHRD_ETHER) {
1673 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1674 memcpy(dev->broadcast, &p.iph.daddr, 4);
1675 }
Herbert Xuc19e6542008-10-09 11:59:55 -07001676 ipgre_tunnel_link(ign, t);
1677 netdev_state_change(dev);
1678 }
1679
1680 t->parms.o_key = p.o_key;
1681 t->parms.iph.ttl = p.iph.ttl;
1682 t->parms.iph.tos = p.iph.tos;
1683 t->parms.iph.frag_off = p.iph.frag_off;
1684
1685 if (t->parms.link != p.link) {
1686 t->parms.link = p.link;
1687 mtu = ipgre_tunnel_bind_dev(dev);
1688 if (!tb[IFLA_MTU])
1689 dev->mtu = mtu;
1690 netdev_state_change(dev);
1691 }
1692
1693 return 0;
1694}
1695
1696static size_t ipgre_get_size(const struct net_device *dev)
1697{
1698 return
1699 /* IFLA_GRE_LINK */
1700 nla_total_size(4) +
1701 /* IFLA_GRE_IFLAGS */
1702 nla_total_size(2) +
1703 /* IFLA_GRE_OFLAGS */
1704 nla_total_size(2) +
1705 /* IFLA_GRE_IKEY */
1706 nla_total_size(4) +
1707 /* IFLA_GRE_OKEY */
1708 nla_total_size(4) +
1709 /* IFLA_GRE_LOCAL */
1710 nla_total_size(4) +
1711 /* IFLA_GRE_REMOTE */
1712 nla_total_size(4) +
1713 /* IFLA_GRE_TTL */
1714 nla_total_size(1) +
1715 /* IFLA_GRE_TOS */
1716 nla_total_size(1) +
1717 /* IFLA_GRE_PMTUDISC */
1718 nla_total_size(1) +
1719 0;
1720}
1721
1722static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1723{
1724 struct ip_tunnel *t = netdev_priv(dev);
1725 struct ip_tunnel_parm *p = &t->parms;
1726
David S. Millerf3756b72012-04-01 20:39:02 -04001727 if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
1728 nla_put_be16(skb, IFLA_GRE_IFLAGS, p->i_flags) ||
1729 nla_put_be16(skb, IFLA_GRE_OFLAGS, p->o_flags) ||
1730 nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
1731 nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
1732 nla_put_be32(skb, IFLA_GRE_LOCAL, p->iph.saddr) ||
1733 nla_put_be32(skb, IFLA_GRE_REMOTE, p->iph.daddr) ||
1734 nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) ||
1735 nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) ||
1736 nla_put_u8(skb, IFLA_GRE_PMTUDISC,
1737 !!(p->iph.frag_off & htons(IP_DF))))
1738 goto nla_put_failure;
Herbert Xuc19e6542008-10-09 11:59:55 -07001739 return 0;
1740
1741nla_put_failure:
1742 return -EMSGSIZE;
1743}
1744
1745static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1746 [IFLA_GRE_LINK] = { .type = NLA_U32 },
1747 [IFLA_GRE_IFLAGS] = { .type = NLA_U16 },
1748 [IFLA_GRE_OFLAGS] = { .type = NLA_U16 },
1749 [IFLA_GRE_IKEY] = { .type = NLA_U32 },
1750 [IFLA_GRE_OKEY] = { .type = NLA_U32 },
Patrick McHardy4d74f8b2008-10-10 12:11:06 -07001751 [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
1752 [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
Herbert Xuc19e6542008-10-09 11:59:55 -07001753 [IFLA_GRE_TTL] = { .type = NLA_U8 },
1754 [IFLA_GRE_TOS] = { .type = NLA_U8 },
1755 [IFLA_GRE_PMTUDISC] = { .type = NLA_U8 },
1756};
1757
1758static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1759 .kind = "gre",
1760 .maxtype = IFLA_GRE_MAX,
1761 .policy = ipgre_policy,
1762 .priv_size = sizeof(struct ip_tunnel),
1763 .setup = ipgre_tunnel_setup,
1764 .validate = ipgre_tunnel_validate,
1765 .newlink = ipgre_newlink,
1766 .changelink = ipgre_changelink,
1767 .get_size = ipgre_get_size,
1768 .fill_info = ipgre_fill_info,
1769};
1770
Herbert Xue1a80002008-10-09 12:00:17 -07001771static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1772 .kind = "gretap",
1773 .maxtype = IFLA_GRE_MAX,
1774 .policy = ipgre_policy,
1775 .priv_size = sizeof(struct ip_tunnel),
1776 .setup = ipgre_tap_setup,
1777 .validate = ipgre_tap_validate,
1778 .newlink = ipgre_newlink,
1779 .changelink = ipgre_changelink,
1780 .get_size = ipgre_get_size,
1781 .fill_info = ipgre_fill_info,
1782};
1783
Linus Torvalds1da177e2005-04-16 15:20:36 -07001784/*
1785 * And now the modules code and kernel interface.
1786 */
1787
1788static int __init ipgre_init(void)
1789{
1790 int err;
1791
Joe Perches058bd4d2012-03-11 18:36:11 +00001792 pr_info("GRE over IPv4 tunneling driver\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001793
Eric W. Biedermancfb8fbf2009-11-29 15:46:13 +00001794 err = register_pernet_device(&ipgre_net_ops);
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001795 if (err < 0)
Alexey Dobriyanc2892f02010-02-16 07:57:44 +00001796 return err;
1797
Dmitry Kozlov00959ad2010-08-21 23:05:39 -07001798 err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
Alexey Dobriyanc2892f02010-02-16 07:57:44 +00001799 if (err < 0) {
Joe Perches058bd4d2012-03-11 18:36:11 +00001800 pr_info("%s: can't add protocol\n", __func__);
Alexey Dobriyanc2892f02010-02-16 07:57:44 +00001801 goto add_proto_failed;
1802 }
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001803
Herbert Xuc19e6542008-10-09 11:59:55 -07001804 err = rtnl_link_register(&ipgre_link_ops);
1805 if (err < 0)
1806 goto rtnl_link_failed;
1807
Herbert Xue1a80002008-10-09 12:00:17 -07001808 err = rtnl_link_register(&ipgre_tap_ops);
1809 if (err < 0)
1810 goto tap_ops_failed;
1811
Herbert Xuc19e6542008-10-09 11:59:55 -07001812out:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001813 return err;
Herbert Xuc19e6542008-10-09 11:59:55 -07001814
Herbert Xue1a80002008-10-09 12:00:17 -07001815tap_ops_failed:
1816 rtnl_link_unregister(&ipgre_link_ops);
Herbert Xuc19e6542008-10-09 11:59:55 -07001817rtnl_link_failed:
Dmitry Kozlov00959ad2010-08-21 23:05:39 -07001818 gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
Alexey Dobriyanc2892f02010-02-16 07:57:44 +00001819add_proto_failed:
1820 unregister_pernet_device(&ipgre_net_ops);
Herbert Xuc19e6542008-10-09 11:59:55 -07001821 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001822}
1823
Alexey Kuznetsovdb445752005-07-30 17:46:44 -07001824static void __exit ipgre_fini(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001825{
Herbert Xue1a80002008-10-09 12:00:17 -07001826 rtnl_link_unregister(&ipgre_tap_ops);
Herbert Xuc19e6542008-10-09 11:59:55 -07001827 rtnl_link_unregister(&ipgre_link_ops);
Dmitry Kozlov00959ad2010-08-21 23:05:39 -07001828 if (gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO) < 0)
Joe Perches058bd4d2012-03-11 18:36:11 +00001829 pr_info("%s: can't remove protocol\n", __func__);
Alexey Dobriyanc2892f02010-02-16 07:57:44 +00001830 unregister_pernet_device(&ipgre_net_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001831}
1832
1833module_init(ipgre_init);
1834module_exit(ipgre_fini);
1835MODULE_LICENSE("GPL");
Patrick McHardy4d74f8b2008-10-10 12:11:06 -07001836MODULE_ALIAS_RTNL_LINK("gre");
1837MODULE_ALIAS_RTNL_LINK("gretap");
Vasiliy Kulikov8909c9a2011-03-02 00:33:13 +03001838MODULE_ALIAS_NETDEV("gre0");