blob: 1c012cb2cb941ad09132f0eceb18de7dd24737eb [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002 * Linux NET3: GRE over IP protocol decoder.
Linus Torvalds1da177e2005-04-16 15:20:36 -07003 *
4 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
Joe Perchesafd465032012-03-12 07:03:32 +000013#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
14
Randy Dunlap4fc268d2006-01-11 12:17:47 -080015#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070016#include <linux/module.h>
17#include <linux/types.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070018#include <linux/kernel.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090019#include <linux/slab.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070020#include <asm/uaccess.h>
21#include <linux/skbuff.h>
22#include <linux/netdevice.h>
23#include <linux/in.h>
24#include <linux/tcp.h>
25#include <linux/udp.h>
26#include <linux/if_arp.h>
27#include <linux/mroute.h>
28#include <linux/init.h>
29#include <linux/in6.h>
30#include <linux/inetdevice.h>
31#include <linux/igmp.h>
32#include <linux/netfilter_ipv4.h>
Herbert Xue1a80002008-10-09 12:00:17 -070033#include <linux/etherdevice.h>
Kris Katterjohn46f25df2006-01-05 16:35:42 -080034#include <linux/if_ether.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070035
36#include <net/sock.h>
37#include <net/ip.h>
38#include <net/icmp.h>
39#include <net/protocol.h>
40#include <net/ipip.h>
41#include <net/arp.h>
42#include <net/checksum.h>
43#include <net/dsfield.h>
44#include <net/inet_ecn.h>
45#include <net/xfrm.h>
Pavel Emelyanov59a4c752008-04-16 01:08:53 -070046#include <net/net_namespace.h>
47#include <net/netns/generic.h>
Herbert Xuc19e6542008-10-09 11:59:55 -070048#include <net/rtnetlink.h>
Dmitry Kozlov00959ad2010-08-21 23:05:39 -070049#include <net/gre.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070050
Eric Dumazetdfd56b82011-12-10 09:48:31 +000051#if IS_ENABLED(CONFIG_IPV6)
Linus Torvalds1da177e2005-04-16 15:20:36 -070052#include <net/ipv6.h>
53#include <net/ip6_fib.h>
54#include <net/ip6_route.h>
55#endif
56
57/*
58 Problems & solutions
59 --------------------
60
61 1. The most important issue is detecting local dead loops.
62 They would cause complete host lockup in transmit, which
63 would be "resolved" by stack overflow or, if queueing is enabled,
64 with infinite looping in net_bh.
65
66 We cannot track such dead loops during route installation,
67 it is infeasible task. The most general solutions would be
68 to keep skb->encapsulation counter (sort of local ttl),
Eric Dumazet6d0722a2010-09-29 23:35:10 -070069 and silently drop packet when it expires. It is a good
stephen hemmingerbff52852012-02-24 08:08:20 +000070 solution, but it supposes maintaining new variable in ALL
Linus Torvalds1da177e2005-04-16 15:20:36 -070071 skb, even if no tunneling is used.
72
Eric Dumazet6d0722a2010-09-29 23:35:10 -070073 Current solution: xmit_recursion breaks dead loops. This is a percpu
74 counter, since when we enter the first ndo_xmit(), cpu migration is
75 forbidden. We force an exit if this counter reaches RECURSION_LIMIT
Linus Torvalds1da177e2005-04-16 15:20:36 -070076
77 2. Networking dead loops would not kill routers, but would really
78 kill network. IP hop limit plays role of "t->recursion" in this case,
79 if we copy it from packet being encapsulated to upper header.
80 It is very good solution, but it introduces two problems:
81
82 - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
83 do not work over tunnels.
84 - traceroute does not work. I planned to relay ICMP from tunnel,
85 so that this problem would be solved and traceroute output
86 would even more informative. This idea appeared to be wrong:
87 only Linux complies to rfc1812 now (yes, guys, Linux is the only
88 true router now :-)), all routers (at least, in neighbourhood of mine)
89 return only 8 bytes of payload. It is the end.
90
91 Hence, if we want that OSPF worked or traceroute said something reasonable,
92 we should search for another solution.
93
94 One of them is to parse packet trying to detect inner encapsulation
95 made by our node. It is difficult or even impossible, especially,
stephen hemmingerbff52852012-02-24 08:08:20 +000096 taking into account fragmentation. TO be short, ttl is not solution at all.
Linus Torvalds1da177e2005-04-16 15:20:36 -070097
98 Current solution: The solution was UNEXPECTEDLY SIMPLE.
99 We force DF flag on tunnels with preconfigured hop limit,
100 that is ALL. :-) Well, it does not remove the problem completely,
101 but exponential growth of network traffic is changed to linear
102 (branches, that exceed pmtu are pruned) and tunnel mtu
stephen hemmingerbff52852012-02-24 08:08:20 +0000103 rapidly degrades to value <68, where looping stops.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700104 Yes, it is not good if there exists a router in the loop,
105 which does not force DF, even when encapsulating packets have DF set.
106 But it is not our problem! Nobody could accuse us, we made
107 all that we could make. Even if it is your gated who injected
108 fatal route to network, even if it were you who configured
109 fatal static route: you are innocent. :-)
110
111
112
113 3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
114 practically identical code. It would be good to glue them
115 together, but it is not very evident, how to make them modular.
116 sit is integral part of IPv6, ipip and gre are naturally modular.
117 We could extract common parts (hash table, ioctl etc)
118 to a separate module (ip_tunnel.c).
119
120 Alexey Kuznetsov.
121 */
122
Herbert Xuc19e6542008-10-09 11:59:55 -0700123static struct rtnl_link_ops ipgre_link_ops __read_mostly;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700124static int ipgre_tunnel_init(struct net_device *dev);
125static void ipgre_tunnel_setup(struct net_device *dev);
Herbert Xu42aa9162008-10-09 11:59:32 -0700126static int ipgre_tunnel_bind_dev(struct net_device *dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700127
128/* Fallback tunnel: no source, no destination, no key, no options */
129
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700130#define HASH_SIZE 16
131
Eric Dumazetf99189b2009-11-17 10:42:49 +0000132static int ipgre_net_id __read_mostly;
Pavel Emelyanov59a4c752008-04-16 01:08:53 -0700133struct ipgre_net {
Eric Dumazet15078502010-09-15 11:07:53 +0000134 struct ip_tunnel __rcu *tunnels[4][HASH_SIZE];
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700135
Pavel Emelyanov7daa0002008-04-16 01:10:05 -0700136 struct net_device *fb_tunnel_dev;
Pavel Emelyanov59a4c752008-04-16 01:08:53 -0700137};
138
Linus Torvalds1da177e2005-04-16 15:20:36 -0700139/* Tunnel hash table */
140
141/*
142 4 hash tables:
143
144 3: (remote,local)
145 2: (remote,*)
146 1: (*,local)
147 0: (*,*)
148
149 We require exact key match i.e. if a key is present in packet
150 it will match only tunnel with the same key; if it is not present,
151 it will match only keyless tunnel.
152
153 All keysless packets, if not matched configured keyless tunnels
154 will match fallback tunnel.
155 */
156
Al Virod5a0a1e2006-11-08 00:23:14 -0800157#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700158
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700159#define tunnels_r_l tunnels[3]
160#define tunnels_r tunnels[2]
161#define tunnels_l tunnels[1]
162#define tunnels_wc tunnels[0]
Eric Dumazet8d5b2c02009-10-23 06:14:38 +0000163/*
Eric Dumazet15078502010-09-15 11:07:53 +0000164 * Locking : hash tables are protected by RCU and RTNL
Eric Dumazet8d5b2c02009-10-23 06:14:38 +0000165 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700166
Eric Dumazet8d5b2c02009-10-23 06:14:38 +0000167#define for_each_ip_tunnel_rcu(start) \
168 for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700169
Eric Dumazete985aad2010-09-27 03:57:11 +0000170/* often modified stats are per cpu, other are shared (netdev->stats) */
171struct pcpu_tstats {
stephen hemminger87b6d212012-04-12 06:31:16 +0000172 u64 rx_packets;
173 u64 rx_bytes;
174 u64 tx_packets;
175 u64 tx_bytes;
176 struct u64_stats_sync syncp;
177};
Eric Dumazete985aad2010-09-27 03:57:11 +0000178
stephen hemminger87b6d212012-04-12 06:31:16 +0000179static struct rtnl_link_stats64 *ipgre_get_stats64(struct net_device *dev,
180 struct rtnl_link_stats64 *tot)
Eric Dumazete985aad2010-09-27 03:57:11 +0000181{
Eric Dumazete985aad2010-09-27 03:57:11 +0000182 int i;
183
184 for_each_possible_cpu(i) {
185 const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
stephen hemminger87b6d212012-04-12 06:31:16 +0000186 u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
187 unsigned int start;
Eric Dumazete985aad2010-09-27 03:57:11 +0000188
stephen hemminger87b6d212012-04-12 06:31:16 +0000189 do {
190 start = u64_stats_fetch_begin_bh(&tstats->syncp);
191 rx_packets = tstats->rx_packets;
192 tx_packets = tstats->tx_packets;
193 rx_bytes = tstats->rx_bytes;
194 tx_bytes = tstats->tx_bytes;
195 } while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
196
197 tot->rx_packets += rx_packets;
198 tot->tx_packets += tx_packets;
199 tot->rx_bytes += rx_bytes;
200 tot->tx_bytes += tx_bytes;
Eric Dumazete985aad2010-09-27 03:57:11 +0000201 }
stephen hemminger87b6d212012-04-12 06:31:16 +0000202
203 tot->multicast = dev->stats.multicast;
204 tot->rx_crc_errors = dev->stats.rx_crc_errors;
205 tot->rx_fifo_errors = dev->stats.rx_fifo_errors;
206 tot->rx_length_errors = dev->stats.rx_length_errors;
207 tot->rx_errors = dev->stats.rx_errors;
208 tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
209 tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
210 tot->tx_dropped = dev->stats.tx_dropped;
211 tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
212 tot->tx_errors = dev->stats.tx_errors;
213
214 return tot;
Eric Dumazete985aad2010-09-27 03:57:11 +0000215}
216
stephen hemmingerd2083282012-09-24 18:12:23 +0000217/* Does key in tunnel parameters match packet */
218static bool ipgre_key_match(const struct ip_tunnel_parm *p,
219 __u32 flags, __be32 key)
220{
221 if (p->i_flags & GRE_KEY) {
222 if (flags & GRE_KEY)
223 return key == p->i_key;
224 else
225 return false; /* key expected, none present */
226 } else
227 return !(flags & GRE_KEY);
228}
229
Linus Torvalds1da177e2005-04-16 15:20:36 -0700230/* Given src, dst and key, find appropriate for input tunnel. */
231
Daniel Baluta5e73ea12012-04-15 01:34:41 +0000232static struct ip_tunnel *ipgre_tunnel_lookup(struct net_device *dev,
233 __be32 remote, __be32 local,
stephen hemmingerd2083282012-09-24 18:12:23 +0000234 __u32 flags, __be32 key,
235 __be16 gre_proto)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700236{
Timo Teras749c10f2009-01-19 17:22:12 -0800237 struct net *net = dev_net(dev);
238 int link = dev->ifindex;
Eric Dumazet15078502010-09-15 11:07:53 +0000239 unsigned int h0 = HASH(remote);
240 unsigned int h1 = HASH(key);
Timo Terasafcf1242009-01-26 20:56:10 -0800241 struct ip_tunnel *t, *cand = NULL;
Pavel Emelyanov7daa0002008-04-16 01:10:05 -0700242 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
Herbert Xue1a80002008-10-09 12:00:17 -0700243 int dev_type = (gre_proto == htons(ETH_P_TEB)) ?
244 ARPHRD_ETHER : ARPHRD_IPGRE;
Timo Terasafcf1242009-01-26 20:56:10 -0800245 int score, cand_score = 4;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700246
Eric Dumazet8d5b2c02009-10-23 06:14:38 +0000247 for_each_ip_tunnel_rcu(ign->tunnels_r_l[h0 ^ h1]) {
Timo Teras749c10f2009-01-19 17:22:12 -0800248 if (local != t->parms.iph.saddr ||
249 remote != t->parms.iph.daddr ||
Timo Teras749c10f2009-01-19 17:22:12 -0800250 !(t->dev->flags & IFF_UP))
251 continue;
252
stephen hemmingerd2083282012-09-24 18:12:23 +0000253 if (!ipgre_key_match(&t->parms, flags, key))
254 continue;
255
Timo Teras749c10f2009-01-19 17:22:12 -0800256 if (t->dev->type != ARPHRD_IPGRE &&
257 t->dev->type != dev_type)
258 continue;
259
Timo Terasafcf1242009-01-26 20:56:10 -0800260 score = 0;
Timo Teras749c10f2009-01-19 17:22:12 -0800261 if (t->parms.link != link)
Timo Terasafcf1242009-01-26 20:56:10 -0800262 score |= 1;
Timo Teras749c10f2009-01-19 17:22:12 -0800263 if (t->dev->type != dev_type)
Timo Terasafcf1242009-01-26 20:56:10 -0800264 score |= 2;
265 if (score == 0)
Timo Teras749c10f2009-01-19 17:22:12 -0800266 return t;
Timo Terasafcf1242009-01-26 20:56:10 -0800267
268 if (score < cand_score) {
269 cand = t;
270 cand_score = score;
271 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700272 }
Herbert Xue1a80002008-10-09 12:00:17 -0700273
Eric Dumazet8d5b2c02009-10-23 06:14:38 +0000274 for_each_ip_tunnel_rcu(ign->tunnels_r[h0 ^ h1]) {
Timo Teras749c10f2009-01-19 17:22:12 -0800275 if (remote != t->parms.iph.daddr ||
Timo Teras749c10f2009-01-19 17:22:12 -0800276 !(t->dev->flags & IFF_UP))
277 continue;
278
stephen hemmingerd2083282012-09-24 18:12:23 +0000279 if (!ipgre_key_match(&t->parms, flags, key))
280 continue;
281
Timo Teras749c10f2009-01-19 17:22:12 -0800282 if (t->dev->type != ARPHRD_IPGRE &&
283 t->dev->type != dev_type)
284 continue;
285
Timo Terasafcf1242009-01-26 20:56:10 -0800286 score = 0;
Timo Teras749c10f2009-01-19 17:22:12 -0800287 if (t->parms.link != link)
Timo Terasafcf1242009-01-26 20:56:10 -0800288 score |= 1;
Timo Teras749c10f2009-01-19 17:22:12 -0800289 if (t->dev->type != dev_type)
Timo Terasafcf1242009-01-26 20:56:10 -0800290 score |= 2;
291 if (score == 0)
Timo Teras749c10f2009-01-19 17:22:12 -0800292 return t;
Timo Terasafcf1242009-01-26 20:56:10 -0800293
294 if (score < cand_score) {
295 cand = t;
296 cand_score = score;
297 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700298 }
Herbert Xue1a80002008-10-09 12:00:17 -0700299
Eric Dumazet8d5b2c02009-10-23 06:14:38 +0000300 for_each_ip_tunnel_rcu(ign->tunnels_l[h1]) {
Timo Teras749c10f2009-01-19 17:22:12 -0800301 if ((local != t->parms.iph.saddr &&
302 (local != t->parms.iph.daddr ||
303 !ipv4_is_multicast(local))) ||
Timo Teras749c10f2009-01-19 17:22:12 -0800304 !(t->dev->flags & IFF_UP))
305 continue;
306
stephen hemmingerd2083282012-09-24 18:12:23 +0000307 if (!ipgre_key_match(&t->parms, flags, key))
308 continue;
309
Timo Teras749c10f2009-01-19 17:22:12 -0800310 if (t->dev->type != ARPHRD_IPGRE &&
311 t->dev->type != dev_type)
312 continue;
313
Timo Terasafcf1242009-01-26 20:56:10 -0800314 score = 0;
Timo Teras749c10f2009-01-19 17:22:12 -0800315 if (t->parms.link != link)
Timo Terasafcf1242009-01-26 20:56:10 -0800316 score |= 1;
Timo Teras749c10f2009-01-19 17:22:12 -0800317 if (t->dev->type != dev_type)
Timo Terasafcf1242009-01-26 20:56:10 -0800318 score |= 2;
319 if (score == 0)
Timo Teras749c10f2009-01-19 17:22:12 -0800320 return t;
Timo Terasafcf1242009-01-26 20:56:10 -0800321
322 if (score < cand_score) {
323 cand = t;
324 cand_score = score;
325 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700326 }
Herbert Xue1a80002008-10-09 12:00:17 -0700327
Eric Dumazet8d5b2c02009-10-23 06:14:38 +0000328 for_each_ip_tunnel_rcu(ign->tunnels_wc[h1]) {
Timo Teras749c10f2009-01-19 17:22:12 -0800329 if (t->parms.i_key != key ||
330 !(t->dev->flags & IFF_UP))
331 continue;
332
333 if (t->dev->type != ARPHRD_IPGRE &&
334 t->dev->type != dev_type)
335 continue;
336
Timo Terasafcf1242009-01-26 20:56:10 -0800337 score = 0;
Timo Teras749c10f2009-01-19 17:22:12 -0800338 if (t->parms.link != link)
Timo Terasafcf1242009-01-26 20:56:10 -0800339 score |= 1;
Timo Teras749c10f2009-01-19 17:22:12 -0800340 if (t->dev->type != dev_type)
Timo Terasafcf1242009-01-26 20:56:10 -0800341 score |= 2;
342 if (score == 0)
Timo Teras749c10f2009-01-19 17:22:12 -0800343 return t;
Timo Terasafcf1242009-01-26 20:56:10 -0800344
345 if (score < cand_score) {
346 cand = t;
347 cand_score = score;
348 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700349 }
350
Timo Terasafcf1242009-01-26 20:56:10 -0800351 if (cand != NULL)
352 return cand;
Herbert Xue1a80002008-10-09 12:00:17 -0700353
Eric Dumazet8d5b2c02009-10-23 06:14:38 +0000354 dev = ign->fb_tunnel_dev;
355 if (dev->flags & IFF_UP)
356 return netdev_priv(dev);
Timo Teras749c10f2009-01-19 17:22:12 -0800357
Linus Torvalds1da177e2005-04-16 15:20:36 -0700358 return NULL;
359}
360
Eric Dumazet15078502010-09-15 11:07:53 +0000361static struct ip_tunnel __rcu **__ipgre_bucket(struct ipgre_net *ign,
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700362 struct ip_tunnel_parm *parms)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700363{
YOSHIFUJI Hideaki5056a1e2007-04-24 20:44:48 +0900364 __be32 remote = parms->iph.daddr;
365 __be32 local = parms->iph.saddr;
366 __be32 key = parms->i_key;
Eric Dumazet15078502010-09-15 11:07:53 +0000367 unsigned int h = HASH(key);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700368 int prio = 0;
369
370 if (local)
371 prio |= 1;
Joe Perchesf97c1e02007-12-16 13:45:43 -0800372 if (remote && !ipv4_is_multicast(remote)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700373 prio |= 2;
374 h ^= HASH(remote);
375 }
376
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700377 return &ign->tunnels[prio][h];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700378}
379
Eric Dumazet15078502010-09-15 11:07:53 +0000380static inline struct ip_tunnel __rcu **ipgre_bucket(struct ipgre_net *ign,
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700381 struct ip_tunnel *t)
YOSHIFUJI Hideaki5056a1e2007-04-24 20:44:48 +0900382{
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700383 return __ipgre_bucket(ign, &t->parms);
YOSHIFUJI Hideaki5056a1e2007-04-24 20:44:48 +0900384}
385
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700386static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700387{
Eric Dumazet15078502010-09-15 11:07:53 +0000388 struct ip_tunnel __rcu **tp = ipgre_bucket(ign, t);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700389
Eric Dumazet15078502010-09-15 11:07:53 +0000390 rcu_assign_pointer(t->next, rtnl_dereference(*tp));
Eric Dumazet8d5b2c02009-10-23 06:14:38 +0000391 rcu_assign_pointer(*tp, t);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700392}
393
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700394static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700395{
Eric Dumazet15078502010-09-15 11:07:53 +0000396 struct ip_tunnel __rcu **tp;
397 struct ip_tunnel *iter;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700398
Eric Dumazet15078502010-09-15 11:07:53 +0000399 for (tp = ipgre_bucket(ign, t);
400 (iter = rtnl_dereference(*tp)) != NULL;
401 tp = &iter->next) {
402 if (t == iter) {
403 rcu_assign_pointer(*tp, t->next);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700404 break;
405 }
406 }
407}
408
Herbert Xue1a80002008-10-09 12:00:17 -0700409static struct ip_tunnel *ipgre_tunnel_find(struct net *net,
410 struct ip_tunnel_parm *parms,
411 int type)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700412{
Al Virod5a0a1e2006-11-08 00:23:14 -0800413 __be32 remote = parms->iph.daddr;
414 __be32 local = parms->iph.saddr;
415 __be32 key = parms->i_key;
Timo Teras749c10f2009-01-19 17:22:12 -0800416 int link = parms->link;
Eric Dumazet15078502010-09-15 11:07:53 +0000417 struct ip_tunnel *t;
418 struct ip_tunnel __rcu **tp;
Herbert Xue1a80002008-10-09 12:00:17 -0700419 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
420
Eric Dumazet15078502010-09-15 11:07:53 +0000421 for (tp = __ipgre_bucket(ign, parms);
422 (t = rtnl_dereference(*tp)) != NULL;
423 tp = &t->next)
Herbert Xue1a80002008-10-09 12:00:17 -0700424 if (local == t->parms.iph.saddr &&
425 remote == t->parms.iph.daddr &&
426 key == t->parms.i_key &&
Timo Teras749c10f2009-01-19 17:22:12 -0800427 link == t->parms.link &&
Herbert Xue1a80002008-10-09 12:00:17 -0700428 type == t->dev->type)
429 break;
430
431 return t;
432}
433
Eric Dumazet15078502010-09-15 11:07:53 +0000434static struct ip_tunnel *ipgre_tunnel_locate(struct net *net,
Herbert Xue1a80002008-10-09 12:00:17 -0700435 struct ip_tunnel_parm *parms, int create)
436{
437 struct ip_tunnel *t, *nt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700438 struct net_device *dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700439 char name[IFNAMSIZ];
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700440 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700441
Herbert Xue1a80002008-10-09 12:00:17 -0700442 t = ipgre_tunnel_find(net, parms, ARPHRD_IPGRE);
443 if (t || !create)
444 return t;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700445
446 if (parms->name[0])
447 strlcpy(name, parms->name, IFNAMSIZ);
Pavel Emelyanov34cc7ba2008-02-23 20:19:20 -0800448 else
stephen hemminger407d6fc2010-11-29 09:47:47 +0000449 strcpy(name, "gre%d");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700450
451 dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup);
452 if (!dev)
stephen hemminger407d6fc2010-11-29 09:47:47 +0000453 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700454
Pavel Emelyanov0b67ece2008-04-16 01:11:13 -0700455 dev_net_set(dev, net);
456
Patrick McHardy2941a482006-01-08 22:05:26 -0800457 nt = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700458 nt->parms = *parms;
Herbert Xuc19e6542008-10-09 11:59:55 -0700459 dev->rtnl_link_ops = &ipgre_link_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700460
Herbert Xu42aa9162008-10-09 11:59:32 -0700461 dev->mtu = ipgre_tunnel_bind_dev(dev);
462
Pavel Emelyanovb37d428b2008-02-26 23:51:04 -0800463 if (register_netdevice(dev) < 0)
464 goto failed_free;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700465
Willem de Bruijnf2b3ee92012-01-26 10:34:35 +0000466 /* Can use a lockless transmit, unless we generate output sequences */
467 if (!(nt->parms.o_flags & GRE_SEQ))
468 dev->features |= NETIF_F_LLTX;
469
Linus Torvalds1da177e2005-04-16 15:20:36 -0700470 dev_hold(dev);
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700471 ipgre_tunnel_link(ign, nt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700472 return nt;
473
Pavel Emelyanovb37d428b2008-02-26 23:51:04 -0800474failed_free:
475 free_netdev(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700476 return NULL;
477}
478
479static void ipgre_tunnel_uninit(struct net_device *dev)
480{
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700481 struct net *net = dev_net(dev);
482 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
483
484 ipgre_tunnel_unlink(ign, netdev_priv(dev));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700485 dev_put(dev);
486}
487
488
489static void ipgre_err(struct sk_buff *skb, u32 info)
490{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700491
Rami Rosen071f92d2008-05-21 17:47:54 -0700492/* All the routers (except for Linux) return only
Linus Torvalds1da177e2005-04-16 15:20:36 -0700493 8 bytes of packet payload. It means, that precise relaying of
494 ICMP in the real Internet is absolutely infeasible.
495
496 Moreover, Cisco "wise men" put GRE key to the third word
497 in GRE header. It makes impossible maintaining even soft state for keyed
498 GRE tunnels with enabled checksum. Tell them "thank you".
499
500 Well, I wonder, rfc1812 was written by Cisco employee,
stephen hemmingerbff52852012-02-24 08:08:20 +0000501 what the hell these idiots break standards established
502 by themselves???
Linus Torvalds1da177e2005-04-16 15:20:36 -0700503 */
504
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000505 const struct iphdr *iph = (const struct iphdr *)skb->data;
Daniel Baluta5e73ea12012-04-15 01:34:41 +0000506 __be16 *p = (__be16 *)(skb->data+(iph->ihl<<2));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700507 int grehlen = (iph->ihl<<2) + 4;
Arnaldo Carvalho de Melo88c76642007-03-13 14:43:18 -0300508 const int type = icmp_hdr(skb)->type;
509 const int code = icmp_hdr(skb)->code;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700510 struct ip_tunnel *t;
Al Virod5a0a1e2006-11-08 00:23:14 -0800511 __be16 flags;
stephen hemmingerd2083282012-09-24 18:12:23 +0000512 __be32 key = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700513
514 flags = p[0];
515 if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
516 if (flags&(GRE_VERSION|GRE_ROUTING))
517 return;
518 if (flags&GRE_KEY) {
519 grehlen += 4;
520 if (flags&GRE_CSUM)
521 grehlen += 4;
522 }
523 }
524
525 /* If only 8 bytes returned, keyed message will be dropped here */
526 if (skb_headlen(skb) < grehlen)
527 return;
528
stephen hemmingerd2083282012-09-24 18:12:23 +0000529 if (flags & GRE_KEY)
530 key = *(((__be32 *)p) + (grehlen / 4) - 1);
531
Linus Torvalds1da177e2005-04-16 15:20:36 -0700532 switch (type) {
533 default:
534 case ICMP_PARAMETERPROB:
535 return;
536
537 case ICMP_DEST_UNREACH:
538 switch (code) {
539 case ICMP_SR_FAILED:
540 case ICMP_PORT_UNREACH:
541 /* Impossible event. */
542 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700543 default:
544 /* All others are translated to HOST_UNREACH.
545 rfc2003 contains "deep thoughts" about NET_UNREACH,
546 I believe they are just ether pollution. --ANK
547 */
548 break;
549 }
550 break;
551 case ICMP_TIME_EXCEEDED:
552 if (code != ICMP_EXC_TTL)
553 return;
554 break;
David S. Miller55be7a92012-07-11 21:27:49 -0700555
556 case ICMP_REDIRECT:
557 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700558 }
559
Timo Teras749c10f2009-01-19 17:22:12 -0800560 t = ipgre_tunnel_lookup(skb->dev, iph->daddr, iph->saddr,
stephen hemmingerd2083282012-09-24 18:12:23 +0000561 flags, key, p[1]);
562
David S. Miller36393392012-06-14 22:21:46 -0700563 if (t == NULL)
stephen hemminger0c5794a2012-09-24 18:12:24 +0000564 return;
David S. Miller36393392012-06-14 22:21:46 -0700565
566 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
567 ipv4_update_pmtu(skb, dev_net(skb->dev), info,
568 t->parms.link, 0, IPPROTO_GRE, 0);
stephen hemminger0c5794a2012-09-24 18:12:24 +0000569 return;
David S. Miller36393392012-06-14 22:21:46 -0700570 }
David S. Miller55be7a92012-07-11 21:27:49 -0700571 if (type == ICMP_REDIRECT) {
572 ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0,
573 IPPROTO_GRE, 0);
stephen hemminger0c5794a2012-09-24 18:12:24 +0000574 return;
David S. Miller55be7a92012-07-11 21:27:49 -0700575 }
David S. Miller36393392012-06-14 22:21:46 -0700576 if (t->parms.iph.daddr == 0 ||
Joe Perchesf97c1e02007-12-16 13:45:43 -0800577 ipv4_is_multicast(t->parms.iph.daddr))
stephen hemminger0c5794a2012-09-24 18:12:24 +0000578 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700579
580 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
stephen hemminger0c5794a2012-09-24 18:12:24 +0000581 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700582
Wei Yongjunda6185d82009-02-24 23:34:48 -0800583 if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700584 t->err_count++;
585 else
586 t->err_count = 1;
587 t->err_time = jiffies;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700588}
589
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000590static inline void ipgre_ecn_decapsulate(const struct iphdr *iph, struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700591{
592 if (INET_ECN_is_ce(iph->tos)) {
593 if (skb->protocol == htons(ETH_P_IP)) {
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700594 IP_ECN_set_ce(ip_hdr(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700595 } else if (skb->protocol == htons(ETH_P_IPV6)) {
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -0700596 IP6_ECN_set_ce(ipv6_hdr(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700597 }
598 }
599}
600
601static inline u8
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000602ipgre_ecn_encapsulate(u8 tos, const struct iphdr *old_iph, struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700603{
604 u8 inner = 0;
605 if (skb->protocol == htons(ETH_P_IP))
606 inner = old_iph->tos;
607 else if (skb->protocol == htons(ETH_P_IPV6))
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000608 inner = ipv6_get_dsfield((const struct ipv6hdr *)old_iph);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700609 return INET_ECN_encapsulate(tos, inner);
610}
611
612static int ipgre_rcv(struct sk_buff *skb)
613{
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000614 const struct iphdr *iph;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700615 u8 *h;
Al Virod5a0a1e2006-11-08 00:23:14 -0800616 __be16 flags;
Al Virod3bc23e2006-11-14 21:24:49 -0800617 __sum16 csum = 0;
Al Virod5a0a1e2006-11-08 00:23:14 -0800618 __be32 key = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700619 u32 seqno = 0;
620 struct ip_tunnel *tunnel;
621 int offset = 4;
Herbert Xue1a80002008-10-09 12:00:17 -0700622 __be16 gre_proto;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700623
624 if (!pskb_may_pull(skb, 16))
stephen hemminger0c5794a2012-09-24 18:12:24 +0000625 goto drop;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700626
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700627 iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700628 h = skb->data;
Daniel Baluta5e73ea12012-04-15 01:34:41 +0000629 flags = *(__be16 *)h;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700630
631 if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
632 /* - Version must be 0.
633 - We do not support routing headers.
634 */
635 if (flags&(GRE_VERSION|GRE_ROUTING))
stephen hemminger0c5794a2012-09-24 18:12:24 +0000636 goto drop;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700637
638 if (flags&GRE_CSUM) {
Herbert Xufb286bb2005-11-10 13:01:24 -0800639 switch (skb->ip_summed) {
Patrick McHardy84fa7932006-08-29 16:44:56 -0700640 case CHECKSUM_COMPLETE:
Al Virod3bc23e2006-11-14 21:24:49 -0800641 csum = csum_fold(skb->csum);
Herbert Xufb286bb2005-11-10 13:01:24 -0800642 if (!csum)
643 break;
644 /* fall through */
645 case CHECKSUM_NONE:
646 skb->csum = 0;
647 csum = __skb_checksum_complete(skb);
Patrick McHardy84fa7932006-08-29 16:44:56 -0700648 skb->ip_summed = CHECKSUM_COMPLETE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700649 }
650 offset += 4;
651 }
652 if (flags&GRE_KEY) {
Daniel Baluta5e73ea12012-04-15 01:34:41 +0000653 key = *(__be32 *)(h + offset);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700654 offset += 4;
655 }
656 if (flags&GRE_SEQ) {
Daniel Baluta5e73ea12012-04-15 01:34:41 +0000657 seqno = ntohl(*(__be32 *)(h + offset));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700658 offset += 4;
659 }
660 }
661
Herbert Xue1a80002008-10-09 12:00:17 -0700662 gre_proto = *(__be16 *)(h + 2);
663
stephen hemmingerd2083282012-09-24 18:12:23 +0000664 tunnel = ipgre_tunnel_lookup(skb->dev,
665 iph->saddr, iph->daddr, flags, key,
666 gre_proto);
667 if (tunnel) {
Eric Dumazete985aad2010-09-27 03:57:11 +0000668 struct pcpu_tstats *tstats;
Pavel Emelyanovaddd68e2008-05-21 14:14:22 -0700669
Linus Torvalds1da177e2005-04-16 15:20:36 -0700670 secpath_reset(skb);
671
Herbert Xue1a80002008-10-09 12:00:17 -0700672 skb->protocol = gre_proto;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700673 /* WCCP version 1 and 2 protocol decoding.
674 * - Change protocol to IP
675 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
676 */
Herbert Xue1a80002008-10-09 12:00:17 -0700677 if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) {
YOSHIFUJI Hideaki496c98d2006-10-10 19:41:21 -0700678 skb->protocol = htons(ETH_P_IP);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900679 if ((*(h + offset) & 0xF0) != 0x40)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700680 offset += 4;
681 }
682
Timo Teras1d069162007-12-20 00:10:33 -0800683 skb->mac_header = skb->network_header;
Arnaldo Carvalho de Melo4209fb62007-03-10 18:42:03 -0300684 __pskb_pull(skb, offset);
Arnaldo Carvalho de Melo9c702202007-04-25 18:04:18 -0700685 skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700686 skb->pkt_type = PACKET_HOST;
687#ifdef CONFIG_NET_IPGRE_BROADCAST
Joe Perchesf97c1e02007-12-16 13:45:43 -0800688 if (ipv4_is_multicast(iph->daddr)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700689 /* Looped back packet, drop it! */
David S. Millerc7537962010-11-11 17:07:48 -0800690 if (rt_is_output_route(skb_rtable(skb)))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700691 goto drop;
Eric Dumazete985aad2010-09-27 03:57:11 +0000692 tunnel->dev->stats.multicast++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700693 skb->pkt_type = PACKET_BROADCAST;
694 }
695#endif
696
697 if (((flags&GRE_CSUM) && csum) ||
698 (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
Eric Dumazete985aad2010-09-27 03:57:11 +0000699 tunnel->dev->stats.rx_crc_errors++;
700 tunnel->dev->stats.rx_errors++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700701 goto drop;
702 }
703 if (tunnel->parms.i_flags&GRE_SEQ) {
704 if (!(flags&GRE_SEQ) ||
705 (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
Eric Dumazete985aad2010-09-27 03:57:11 +0000706 tunnel->dev->stats.rx_fifo_errors++;
707 tunnel->dev->stats.rx_errors++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700708 goto drop;
709 }
710 tunnel->i_seqno = seqno + 1;
711 }
Herbert Xue1a80002008-10-09 12:00:17 -0700712
713 /* Warning: All skb pointers will be invalidated! */
714 if (tunnel->dev->type == ARPHRD_ETHER) {
715 if (!pskb_may_pull(skb, ETH_HLEN)) {
Eric Dumazete985aad2010-09-27 03:57:11 +0000716 tunnel->dev->stats.rx_length_errors++;
717 tunnel->dev->stats.rx_errors++;
Herbert Xue1a80002008-10-09 12:00:17 -0700718 goto drop;
719 }
720
721 iph = ip_hdr(skb);
722 skb->protocol = eth_type_trans(skb, tunnel->dev);
723 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
724 }
725
Eric Dumazete985aad2010-09-27 03:57:11 +0000726 tstats = this_cpu_ptr(tunnel->dev->tstats);
stephen hemminger87b6d212012-04-12 06:31:16 +0000727 u64_stats_update_begin(&tstats->syncp);
Eric Dumazete985aad2010-09-27 03:57:11 +0000728 tstats->rx_packets++;
729 tstats->rx_bytes += skb->len;
stephen hemminger87b6d212012-04-12 06:31:16 +0000730 u64_stats_update_end(&tstats->syncp);
Eric Dumazete985aad2010-09-27 03:57:11 +0000731
732 __skb_tunnel_rx(skb, tunnel->dev);
Herbert Xue1a80002008-10-09 12:00:17 -0700733
734 skb_reset_network_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700735 ipgre_ecn_decapsulate(iph, skb);
Herbert Xue1a80002008-10-09 12:00:17 -0700736
Eric Dumazetcaf586e2010-09-30 21:06:55 +0000737 netif_rx(skb);
Eric Dumazet8990f462010-09-20 00:12:11 +0000738
Eric Dumazet8990f462010-09-20 00:12:11 +0000739 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700740 }
Herbert Xu45af08b2006-04-05 22:31:19 -0700741 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700742
743drop:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700744 kfree_skb(skb);
Eric Dumazeta02cec22010-09-22 20:43:57 +0000745 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700746}
747
Stephen Hemminger6fef4c02009-08-31 19:50:41 +0000748static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700749{
Patrick McHardy2941a482006-01-08 22:05:26 -0800750 struct ip_tunnel *tunnel = netdev_priv(dev);
Eric Dumazete985aad2010-09-27 03:57:11 +0000751 struct pcpu_tstats *tstats;
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000752 const struct iphdr *old_iph = ip_hdr(skb);
753 const struct iphdr *tiph;
David S. Millercbb1e852011-05-04 12:33:34 -0700754 struct flowi4 fl4;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700755 u8 tos;
Al Virod5a0a1e2006-11-08 00:23:14 -0800756 __be16 df;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700757 struct rtable *rt; /* Route to the other host */
Eric Dumazet15078502010-09-15 11:07:53 +0000758 struct net_device *tdev; /* Device to other host */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700759 struct iphdr *iph; /* Our new IP header */
Chuck Leverc2636b42007-10-23 21:07:32 -0700760 unsigned int max_headroom; /* The extra header space needed */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700761 int gre_hlen;
Al Virod5a0a1e2006-11-08 00:23:14 -0800762 __be32 dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700763 int mtu;
764
Eric Dumazet6b78f162012-09-13 21:25:33 +0000765 if (skb->ip_summed == CHECKSUM_PARTIAL &&
766 skb_checksum_help(skb))
767 goto tx_error;
768
Herbert Xue1a80002008-10-09 12:00:17 -0700769 if (dev->type == ARPHRD_ETHER)
770 IPCB(skb)->flags = 0;
771
772 if (dev->header_ops && dev->type == ARPHRD_IPGRE) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700773 gre_hlen = 0;
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000774 tiph = (const struct iphdr *)skb->data;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700775 } else {
776 gre_hlen = tunnel->hlen;
777 tiph = &tunnel->parms.iph;
778 }
779
780 if ((dst = tiph->daddr) == 0) {
781 /* NBMA tunnel */
782
Eric Dumazetadf30902009-06-02 05:19:30 +0000783 if (skb_dst(skb) == NULL) {
Eric Dumazete985aad2010-09-27 03:57:11 +0000784 dev->stats.tx_fifo_errors++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700785 goto tx_error;
786 }
787
David S. Miller61d57f82012-01-24 18:23:30 -0500788 if (skb->protocol == htons(ETH_P_IP)) {
Eric Dumazet511c3f92009-06-02 05:14:27 +0000789 rt = skb_rtable(skb);
David S. Millerf8126f12012-07-13 05:03:45 -0700790 dst = rt_nexthop(rt, old_iph->daddr);
David S. Miller61d57f82012-01-24 18:23:30 -0500791 }
Eric Dumazetdfd56b82011-12-10 09:48:31 +0000792#if IS_ENABLED(CONFIG_IPV6)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700793 else if (skb->protocol == htons(ETH_P_IPV6)) {
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000794 const struct in6_addr *addr6;
David S. Miller0ec88662012-01-27 15:01:08 -0800795 struct neighbour *neigh;
796 bool do_tx_error_icmp;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700797 int addr_type;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700798
David S. Miller0ec88662012-01-27 15:01:08 -0800799 neigh = dst_neigh_lookup(skb_dst(skb), &ipv6_hdr(skb)->daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700800 if (neigh == NULL)
801 goto tx_error;
802
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000803 addr6 = (const struct in6_addr *)&neigh->primary_key;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700804 addr_type = ipv6_addr_type(addr6);
805
806 if (addr_type == IPV6_ADDR_ANY) {
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -0700807 addr6 = &ipv6_hdr(skb)->daddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700808 addr_type = ipv6_addr_type(addr6);
809 }
810
811 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
David S. Miller0ec88662012-01-27 15:01:08 -0800812 do_tx_error_icmp = true;
813 else {
814 do_tx_error_icmp = false;
815 dst = addr6->s6_addr32[3];
816 }
817 neigh_release(neigh);
818 if (do_tx_error_icmp)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700819 goto tx_error_icmp;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700820 }
821#endif
822 else
823 goto tx_error;
824 }
825
826 tos = tiph->tos;
Andreas Jaggiee686ca2009-07-14 09:35:59 -0700827 if (tos == 1) {
828 tos = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700829 if (skb->protocol == htons(ETH_P_IP))
830 tos = old_iph->tos;
Stephen Hemmingerdd4ba832010-07-08 21:35:58 -0700831 else if (skb->protocol == htons(ETH_P_IPV6))
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000832 tos = ipv6_get_dsfield((const struct ipv6hdr *)old_iph);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700833 }
834
David S. Millercbb1e852011-05-04 12:33:34 -0700835 rt = ip_route_output_gre(dev_net(dev), &fl4, dst, tiph->saddr,
David S. Miller78fbfd82011-03-12 00:00:52 -0500836 tunnel->parms.o_key, RT_TOS(tos),
837 tunnel->parms.link);
838 if (IS_ERR(rt)) {
839 dev->stats.tx_carrier_errors++;
840 goto tx_error;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700841 }
Changli Gaod8d1f302010-06-10 23:31:35 -0700842 tdev = rt->dst.dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700843
844 if (tdev == dev) {
845 ip_rt_put(rt);
Eric Dumazete985aad2010-09-27 03:57:11 +0000846 dev->stats.collisions++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700847 goto tx_error;
848 }
849
850 df = tiph->frag_off;
851 if (df)
Changli Gaod8d1f302010-06-10 23:31:35 -0700852 mtu = dst_mtu(&rt->dst) - dev->hard_header_len - tunnel->hlen;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700853 else
Eric Dumazetadf30902009-06-02 05:19:30 +0000854 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700855
Eric Dumazetadf30902009-06-02 05:19:30 +0000856 if (skb_dst(skb))
David S. Miller6700c272012-07-17 03:29:28 -0700857 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700858
859 if (skb->protocol == htons(ETH_P_IP)) {
860 df |= (old_iph->frag_off&htons(IP_DF));
861
862 if ((old_iph->frag_off&htons(IP_DF)) &&
863 mtu < ntohs(old_iph->tot_len)) {
864 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
865 ip_rt_put(rt);
866 goto tx_error;
867 }
868 }
Eric Dumazetdfd56b82011-12-10 09:48:31 +0000869#if IS_ENABLED(CONFIG_IPV6)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700870 else if (skb->protocol == htons(ETH_P_IPV6)) {
Eric Dumazetadf30902009-06-02 05:19:30 +0000871 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700872
Eric Dumazetadf30902009-06-02 05:19:30 +0000873 if (rt6 && mtu < dst_mtu(skb_dst(skb)) && mtu >= IPV6_MIN_MTU) {
Joe Perchesf97c1e02007-12-16 13:45:43 -0800874 if ((tunnel->parms.iph.daddr &&
875 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
Linus Torvalds1da177e2005-04-16 15:20:36 -0700876 rt6->rt6i_dst.plen == 128) {
877 rt6->rt6i_flags |= RTF_MODIFIED;
David S. Millerdefb3512010-12-08 21:16:57 -0800878 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700879 }
880 }
881
882 if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) {
Alexey Dobriyan3ffe5332010-02-18 08:25:24 +0000883 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700884 ip_rt_put(rt);
885 goto tx_error;
886 }
887 }
888#endif
889
890 if (tunnel->err_count > 0) {
Wei Yongjunda6185d82009-02-24 23:34:48 -0800891 if (time_before(jiffies,
892 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700893 tunnel->err_count--;
894
895 dst_link_failure(skb);
896 } else
897 tunnel->err_count = 0;
898 }
899
Changli Gaod8d1f302010-06-10 23:31:35 -0700900 max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + rt->dst.header_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700901
Patrick McHardycfbba492007-07-09 15:33:40 -0700902 if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
903 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700904 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
Herbert Xu805dc1d2011-11-18 02:20:06 +0000905 if (max_headroom > dev->needed_headroom)
906 dev->needed_headroom = max_headroom;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700907 if (!new_skb) {
908 ip_rt_put(rt);
Eric Dumazete985aad2010-09-27 03:57:11 +0000909 dev->stats.tx_dropped++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700910 dev_kfree_skb(skb);
Patrick McHardy6ed10652009-06-23 06:03:08 +0000911 return NETDEV_TX_OK;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700912 }
913 if (skb->sk)
914 skb_set_owner_w(new_skb, skb->sk);
915 dev_kfree_skb(skb);
916 skb = new_skb;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700917 old_iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700918 }
919
Herbert Xu64194c32008-10-09 12:03:17 -0700920 skb_reset_transport_header(skb);
Arnaldo Carvalho de Meloe2d1bca2007-04-10 20:46:21 -0700921 skb_push(skb, gre_hlen);
922 skb_reset_network_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700923 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
Patrick McHardy48d5cad2006-02-15 15:10:22 -0800924 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
925 IPSKB_REROUTED);
Eric Dumazetadf30902009-06-02 05:19:30 +0000926 skb_dst_drop(skb);
Changli Gaod8d1f302010-06-10 23:31:35 -0700927 skb_dst_set(skb, &rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700928
929 /*
930 * Push down and install the IPIP header.
931 */
932
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700933 iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700934 iph->version = 4;
935 iph->ihl = sizeof(struct iphdr) >> 2;
936 iph->frag_off = df;
937 iph->protocol = IPPROTO_GRE;
938 iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb);
David S. Millercbb1e852011-05-04 12:33:34 -0700939 iph->daddr = fl4.daddr;
940 iph->saddr = fl4.saddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700941
942 if ((iph->ttl = tiph->ttl) == 0) {
943 if (skb->protocol == htons(ETH_P_IP))
944 iph->ttl = old_iph->ttl;
Eric Dumazetdfd56b82011-12-10 09:48:31 +0000945#if IS_ENABLED(CONFIG_IPV6)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700946 else if (skb->protocol == htons(ETH_P_IPV6))
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000947 iph->ttl = ((const struct ipv6hdr *)old_iph)->hop_limit;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700948#endif
949 else
David S. Miller323e1262010-12-12 21:55:08 -0800950 iph->ttl = ip4_dst_hoplimit(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700951 }
952
Herbert Xue1a80002008-10-09 12:00:17 -0700953 ((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags;
954 ((__be16 *)(iph + 1))[1] = (dev->type == ARPHRD_ETHER) ?
955 htons(ETH_P_TEB) : skb->protocol;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700956
957 if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
Daniel Baluta5e73ea12012-04-15 01:34:41 +0000958 __be32 *ptr = (__be32 *)(((u8 *)iph) + tunnel->hlen - 4);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700959
960 if (tunnel->parms.o_flags&GRE_SEQ) {
961 ++tunnel->o_seqno;
962 *ptr = htonl(tunnel->o_seqno);
963 ptr--;
964 }
965 if (tunnel->parms.o_flags&GRE_KEY) {
966 *ptr = tunnel->parms.o_key;
967 ptr--;
968 }
969 if (tunnel->parms.o_flags&GRE_CSUM) {
970 *ptr = 0;
Daniel Baluta5e73ea12012-04-15 01:34:41 +0000971 *(__sum16 *)ptr = ip_compute_csum((void *)(iph+1), skb->len - sizeof(struct iphdr));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700972 }
973 }
974
975 nf_reset(skb);
Eric Dumazete985aad2010-09-27 03:57:11 +0000976 tstats = this_cpu_ptr(dev->tstats);
977 __IPTUNNEL_XMIT(tstats, &dev->stats);
Patrick McHardy6ed10652009-06-23 06:03:08 +0000978 return NETDEV_TX_OK;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700979
David S. Miller496053f2012-01-11 16:46:32 -0800980#if IS_ENABLED(CONFIG_IPV6)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700981tx_error_icmp:
982 dst_link_failure(skb);
David S. Miller496053f2012-01-11 16:46:32 -0800983#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700984tx_error:
Eric Dumazete985aad2010-09-27 03:57:11 +0000985 dev->stats.tx_errors++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700986 dev_kfree_skb(skb);
Patrick McHardy6ed10652009-06-23 06:03:08 +0000987 return NETDEV_TX_OK;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700988}
989
Herbert Xu42aa9162008-10-09 11:59:32 -0700990static int ipgre_tunnel_bind_dev(struct net_device *dev)
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800991{
992 struct net_device *tdev = NULL;
993 struct ip_tunnel *tunnel;
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000994 const struct iphdr *iph;
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800995 int hlen = LL_MAX_HEADER;
996 int mtu = ETH_DATA_LEN;
997 int addend = sizeof(struct iphdr) + 4;
998
999 tunnel = netdev_priv(dev);
1000 iph = &tunnel->parms.iph;
1001
Herbert Xuc95b8192008-10-09 11:58:54 -07001002 /* Guess output device to choose reasonable mtu and needed_headroom */
Michal Schmidtee34c1e2007-12-13 09:46:32 -08001003
1004 if (iph->daddr) {
David S. Millercbb1e852011-05-04 12:33:34 -07001005 struct flowi4 fl4;
1006 struct rtable *rt;
Eric Dumazete985aad2010-09-27 03:57:11 +00001007
David S. Millercbb1e852011-05-04 12:33:34 -07001008 rt = ip_route_output_gre(dev_net(dev), &fl4,
1009 iph->daddr, iph->saddr,
1010 tunnel->parms.o_key,
1011 RT_TOS(iph->tos),
1012 tunnel->parms.link);
David S. Millerb23dd4f2011-03-02 14:31:35 -08001013 if (!IS_ERR(rt)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001014 tdev = rt->dst.dev;
Michal Schmidtee34c1e2007-12-13 09:46:32 -08001015 ip_rt_put(rt);
1016 }
Herbert Xue1a80002008-10-09 12:00:17 -07001017
1018 if (dev->type != ARPHRD_ETHER)
1019 dev->flags |= IFF_POINTOPOINT;
Michal Schmidtee34c1e2007-12-13 09:46:32 -08001020 }
1021
1022 if (!tdev && tunnel->parms.link)
Pavel Emelyanov96635522008-04-16 01:10:44 -07001023 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
Michal Schmidtee34c1e2007-12-13 09:46:32 -08001024
1025 if (tdev) {
Herbert Xuc95b8192008-10-09 11:58:54 -07001026 hlen = tdev->hard_header_len + tdev->needed_headroom;
Michal Schmidtee34c1e2007-12-13 09:46:32 -08001027 mtu = tdev->mtu;
1028 }
1029 dev->iflink = tunnel->parms.link;
1030
1031 /* Precalculate GRE options length */
1032 if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
1033 if (tunnel->parms.o_flags&GRE_CSUM)
1034 addend += 4;
1035 if (tunnel->parms.o_flags&GRE_KEY)
1036 addend += 4;
1037 if (tunnel->parms.o_flags&GRE_SEQ)
1038 addend += 4;
1039 }
Herbert Xuc95b8192008-10-09 11:58:54 -07001040 dev->needed_headroom = addend + hlen;
Tom Goff8cdb0452009-08-14 16:33:56 -07001041 mtu -= dev->hard_header_len + addend;
Herbert Xu42aa9162008-10-09 11:59:32 -07001042
1043 if (mtu < 68)
1044 mtu = 68;
1045
Michal Schmidtee34c1e2007-12-13 09:46:32 -08001046 tunnel->hlen = addend;
1047
Herbert Xu42aa9162008-10-09 11:59:32 -07001048 return mtu;
Michal Schmidtee34c1e2007-12-13 09:46:32 -08001049}
1050
Linus Torvalds1da177e2005-04-16 15:20:36 -07001051static int
1052ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
1053{
1054 int err = 0;
1055 struct ip_tunnel_parm p;
1056 struct ip_tunnel *t;
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -07001057 struct net *net = dev_net(dev);
1058 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001059
1060 switch (cmd) {
1061 case SIOCGETTUNNEL:
1062 t = NULL;
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001063 if (dev == ign->fb_tunnel_dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001064 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
1065 err = -EFAULT;
1066 break;
1067 }
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -07001068 t = ipgre_tunnel_locate(net, &p, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001069 }
1070 if (t == NULL)
Patrick McHardy2941a482006-01-08 22:05:26 -08001071 t = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001072 memcpy(&p, &t->parms, sizeof(p));
1073 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
1074 err = -EFAULT;
1075 break;
1076
1077 case SIOCADDTUNNEL:
1078 case SIOCCHGTUNNEL:
1079 err = -EPERM;
1080 if (!capable(CAP_NET_ADMIN))
1081 goto done;
1082
1083 err = -EFAULT;
1084 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1085 goto done;
1086
1087 err = -EINVAL;
1088 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
1089 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
1090 ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
1091 goto done;
1092 if (p.iph.ttl)
1093 p.iph.frag_off |= htons(IP_DF);
1094
1095 if (!(p.i_flags&GRE_KEY))
1096 p.i_key = 0;
1097 if (!(p.o_flags&GRE_KEY))
1098 p.o_key = 0;
1099
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -07001100 t = ipgre_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001101
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001102 if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001103 if (t != NULL) {
1104 if (t->dev != dev) {
1105 err = -EEXIST;
1106 break;
1107 }
1108 } else {
Eric Dumazet15078502010-09-15 11:07:53 +00001109 unsigned int nflags = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001110
Patrick McHardy2941a482006-01-08 22:05:26 -08001111 t = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001112
Joe Perchesf97c1e02007-12-16 13:45:43 -08001113 if (ipv4_is_multicast(p.iph.daddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001114 nflags = IFF_BROADCAST;
1115 else if (p.iph.daddr)
1116 nflags = IFF_POINTOPOINT;
1117
1118 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
1119 err = -EINVAL;
1120 break;
1121 }
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -07001122 ipgre_tunnel_unlink(ign, t);
Pavel Emelyanov74b0b852010-10-27 05:43:53 +00001123 synchronize_net();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001124 t->parms.iph.saddr = p.iph.saddr;
1125 t->parms.iph.daddr = p.iph.daddr;
1126 t->parms.i_key = p.i_key;
1127 t->parms.o_key = p.o_key;
1128 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1129 memcpy(dev->broadcast, &p.iph.daddr, 4);
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -07001130 ipgre_tunnel_link(ign, t);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001131 netdev_state_change(dev);
1132 }
1133 }
1134
1135 if (t) {
1136 err = 0;
1137 if (cmd == SIOCCHGTUNNEL) {
1138 t->parms.iph.ttl = p.iph.ttl;
1139 t->parms.iph.tos = p.iph.tos;
1140 t->parms.iph.frag_off = p.iph.frag_off;
Michal Schmidtee34c1e2007-12-13 09:46:32 -08001141 if (t->parms.link != p.link) {
1142 t->parms.link = p.link;
Herbert Xu42aa9162008-10-09 11:59:32 -07001143 dev->mtu = ipgre_tunnel_bind_dev(dev);
Michal Schmidtee34c1e2007-12-13 09:46:32 -08001144 netdev_state_change(dev);
1145 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001146 }
1147 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
1148 err = -EFAULT;
1149 } else
1150 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
1151 break;
1152
1153 case SIOCDELTUNNEL:
1154 err = -EPERM;
1155 if (!capable(CAP_NET_ADMIN))
1156 goto done;
1157
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001158 if (dev == ign->fb_tunnel_dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001159 err = -EFAULT;
1160 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1161 goto done;
1162 err = -ENOENT;
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -07001163 if ((t = ipgre_tunnel_locate(net, &p, 0)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001164 goto done;
1165 err = -EPERM;
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001166 if (t == netdev_priv(ign->fb_tunnel_dev))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001167 goto done;
1168 dev = t->dev;
1169 }
Stephen Hemminger22f8cde2007-02-07 00:09:58 -08001170 unregister_netdevice(dev);
1171 err = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001172 break;
1173
1174 default:
1175 err = -EINVAL;
1176 }
1177
1178done:
1179 return err;
1180}
1181
Linus Torvalds1da177e2005-04-16 15:20:36 -07001182static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1183{
Patrick McHardy2941a482006-01-08 22:05:26 -08001184 struct ip_tunnel *tunnel = netdev_priv(dev);
Herbert Xuc95b8192008-10-09 11:58:54 -07001185 if (new_mtu < 68 ||
1186 new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001187 return -EINVAL;
1188 dev->mtu = new_mtu;
1189 return 0;
1190}
1191
Linus Torvalds1da177e2005-04-16 15:20:36 -07001192/* Nice toy. Unfortunately, useless in real life :-)
1193 It allows to construct virtual multiprotocol broadcast "LAN"
1194 over the Internet, provided multicast routing is tuned.
1195
1196
1197 I have no idea was this bicycle invented before me,
1198 so that I had to set ARPHRD_IPGRE to a random value.
1199 I have an impression, that Cisco could make something similar,
1200 but this feature is apparently missing in IOS<=11.2(8).
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001201
Linus Torvalds1da177e2005-04-16 15:20:36 -07001202 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
1203 with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
1204
1205 ping -t 255 224.66.66.66
1206
1207 If nobody answers, mbone does not work.
1208
1209 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
1210 ip addr add 10.66.66.<somewhat>/24 dev Universe
1211 ifconfig Universe up
1212 ifconfig Universe add fe80::<Your_real_addr>/10
1213 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
1214 ftp 10.66.66.66
1215 ...
1216 ftp fec0:6666:6666::193.233.7.65
1217 ...
1218
1219 */
1220
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -07001221static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
1222 unsigned short type,
Eric Dumazet15078502010-09-15 11:07:53 +00001223 const void *daddr, const void *saddr, unsigned int len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001224{
Patrick McHardy2941a482006-01-08 22:05:26 -08001225 struct ip_tunnel *t = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001226 struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
Daniel Baluta5e73ea12012-04-15 01:34:41 +00001227 __be16 *p = (__be16 *)(iph+1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001228
1229 memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
1230 p[0] = t->parms.o_flags;
1231 p[1] = htons(type);
1232
1233 /*
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001234 * Set the source hardware address.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001235 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001236
Linus Torvalds1da177e2005-04-16 15:20:36 -07001237 if (saddr)
1238 memcpy(&iph->saddr, saddr, 4);
Timo Teräs6d55cb92010-03-03 04:01:13 +00001239 if (daddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001240 memcpy(&iph->daddr, daddr, 4);
Timo Teräs6d55cb92010-03-03 04:01:13 +00001241 if (iph->daddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001242 return t->hlen;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001243
Linus Torvalds1da177e2005-04-16 15:20:36 -07001244 return -t->hlen;
1245}
1246
Timo Teras6a5f44d2007-10-23 20:31:53 -07001247static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
1248{
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001249 const struct iphdr *iph = (const struct iphdr *) skb_mac_header(skb);
Timo Teras6a5f44d2007-10-23 20:31:53 -07001250 memcpy(haddr, &iph->saddr, 4);
1251 return 4;
1252}
1253
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -07001254static const struct header_ops ipgre_header_ops = {
1255 .create = ipgre_header,
Timo Teras6a5f44d2007-10-23 20:31:53 -07001256 .parse = ipgre_header_parse,
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -07001257};
1258
Timo Teras6a5f44d2007-10-23 20:31:53 -07001259#ifdef CONFIG_NET_IPGRE_BROADCAST
Linus Torvalds1da177e2005-04-16 15:20:36 -07001260static int ipgre_open(struct net_device *dev)
1261{
Patrick McHardy2941a482006-01-08 22:05:26 -08001262 struct ip_tunnel *t = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001263
Joe Perchesf97c1e02007-12-16 13:45:43 -08001264 if (ipv4_is_multicast(t->parms.iph.daddr)) {
David S. Millercbb1e852011-05-04 12:33:34 -07001265 struct flowi4 fl4;
1266 struct rtable *rt;
Eric Dumazete985aad2010-09-27 03:57:11 +00001267
David S. Millercbb1e852011-05-04 12:33:34 -07001268 rt = ip_route_output_gre(dev_net(dev), &fl4,
1269 t->parms.iph.daddr,
1270 t->parms.iph.saddr,
1271 t->parms.o_key,
1272 RT_TOS(t->parms.iph.tos),
1273 t->parms.link);
David S. Millerb23dd4f2011-03-02 14:31:35 -08001274 if (IS_ERR(rt))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001275 return -EADDRNOTAVAIL;
Changli Gaod8d1f302010-06-10 23:31:35 -07001276 dev = rt->dst.dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001277 ip_rt_put(rt);
Herbert Xue5ed6392005-10-03 14:35:55 -07001278 if (__in_dev_get_rtnl(dev) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001279 return -EADDRNOTAVAIL;
1280 t->mlink = dev->ifindex;
Herbert Xue5ed6392005-10-03 14:35:55 -07001281 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001282 }
1283 return 0;
1284}
1285
1286static int ipgre_close(struct net_device *dev)
1287{
Patrick McHardy2941a482006-01-08 22:05:26 -08001288 struct ip_tunnel *t = netdev_priv(dev);
Stephen Hemmingerb8c26a32008-11-20 20:34:29 -08001289
Joe Perchesf97c1e02007-12-16 13:45:43 -08001290 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
Denis V. Lunev7fee0ca2008-01-21 17:32:38 -08001291 struct in_device *in_dev;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001292 in_dev = inetdev_by_index(dev_net(dev), t->mlink);
Eric Dumazet8723e1b2010-10-19 00:39:26 +00001293 if (in_dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001294 ip_mc_dec_group(in_dev, t->parms.iph.daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001295 }
1296 return 0;
1297}
1298
1299#endif
1300
Stephen Hemmingerb8c26a32008-11-20 20:34:29 -08001301static const struct net_device_ops ipgre_netdev_ops = {
1302 .ndo_init = ipgre_tunnel_init,
1303 .ndo_uninit = ipgre_tunnel_uninit,
1304#ifdef CONFIG_NET_IPGRE_BROADCAST
1305 .ndo_open = ipgre_open,
1306 .ndo_stop = ipgre_close,
1307#endif
1308 .ndo_start_xmit = ipgre_tunnel_xmit,
1309 .ndo_do_ioctl = ipgre_tunnel_ioctl,
1310 .ndo_change_mtu = ipgre_tunnel_change_mtu,
stephen hemminger87b6d212012-04-12 06:31:16 +00001311 .ndo_get_stats64 = ipgre_get_stats64,
Stephen Hemmingerb8c26a32008-11-20 20:34:29 -08001312};
1313
Eric Dumazete985aad2010-09-27 03:57:11 +00001314static void ipgre_dev_free(struct net_device *dev)
1315{
1316 free_percpu(dev->tstats);
1317 free_netdev(dev);
1318}
1319
Eric Dumazet6b78f162012-09-13 21:25:33 +00001320#define GRE_FEATURES (NETIF_F_SG | \
1321 NETIF_F_FRAGLIST | \
1322 NETIF_F_HIGHDMA | \
1323 NETIF_F_HW_CSUM)
1324
Linus Torvalds1da177e2005-04-16 15:20:36 -07001325static void ipgre_tunnel_setup(struct net_device *dev)
1326{
Stephen Hemmingerb8c26a32008-11-20 20:34:29 -08001327 dev->netdev_ops = &ipgre_netdev_ops;
Eric Dumazete985aad2010-09-27 03:57:11 +00001328 dev->destructor = ipgre_dev_free;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001329
1330 dev->type = ARPHRD_IPGRE;
Herbert Xuc95b8192008-10-09 11:58:54 -07001331 dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
Kris Katterjohn46f25df2006-01-05 16:35:42 -08001332 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001333 dev->flags = IFF_NOARP;
1334 dev->iflink = 0;
1335 dev->addr_len = 4;
Pavel Emelyanov0b67ece2008-04-16 01:11:13 -07001336 dev->features |= NETIF_F_NETNS_LOCAL;
Eric Dumazet108bfa82009-05-28 22:35:10 +00001337 dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
Eric Dumazet6b78f162012-09-13 21:25:33 +00001338
1339 dev->features |= GRE_FEATURES;
1340 dev->hw_features |= GRE_FEATURES;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001341}
1342
1343static int ipgre_tunnel_init(struct net_device *dev)
1344{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001345 struct ip_tunnel *tunnel;
1346 struct iphdr *iph;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001347
Patrick McHardy2941a482006-01-08 22:05:26 -08001348 tunnel = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001349 iph = &tunnel->parms.iph;
1350
1351 tunnel->dev = dev;
1352 strcpy(tunnel->parms.name, dev->name);
1353
1354 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
1355 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
1356
Linus Torvalds1da177e2005-04-16 15:20:36 -07001357 if (iph->daddr) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001358#ifdef CONFIG_NET_IPGRE_BROADCAST
Joe Perchesf97c1e02007-12-16 13:45:43 -08001359 if (ipv4_is_multicast(iph->daddr)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001360 if (!iph->saddr)
1361 return -EINVAL;
1362 dev->flags = IFF_BROADCAST;
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -07001363 dev->header_ops = &ipgre_header_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001364 }
1365#endif
Michal Schmidtee34c1e2007-12-13 09:46:32 -08001366 } else
Timo Teras6a5f44d2007-10-23 20:31:53 -07001367 dev->header_ops = &ipgre_header_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001368
Eric Dumazete985aad2010-09-27 03:57:11 +00001369 dev->tstats = alloc_percpu(struct pcpu_tstats);
1370 if (!dev->tstats)
1371 return -ENOMEM;
1372
Linus Torvalds1da177e2005-04-16 15:20:36 -07001373 return 0;
1374}
1375
Stephen Hemmingerb8c26a32008-11-20 20:34:29 -08001376static void ipgre_fb_tunnel_init(struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001377{
Patrick McHardy2941a482006-01-08 22:05:26 -08001378 struct ip_tunnel *tunnel = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001379 struct iphdr *iph = &tunnel->parms.iph;
1380
1381 tunnel->dev = dev;
1382 strcpy(tunnel->parms.name, dev->name);
1383
1384 iph->version = 4;
1385 iph->protocol = IPPROTO_GRE;
1386 iph->ihl = 5;
1387 tunnel->hlen = sizeof(struct iphdr) + 4;
1388
1389 dev_hold(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001390}
1391
1392
Dmitry Kozlov00959ad2010-08-21 23:05:39 -07001393static const struct gre_protocol ipgre_protocol = {
1394 .handler = ipgre_rcv,
1395 .err_handler = ipgre_err,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001396};
1397
Eric Dumazeteef6dd62009-10-27 07:07:16 +00001398static void ipgre_destroy_tunnels(struct ipgre_net *ign, struct list_head *head)
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -07001399{
1400 int prio;
1401
1402 for (prio = 0; prio < 4; prio++) {
1403 int h;
1404 for (h = 0; h < HASH_SIZE; h++) {
Eric Dumazet15078502010-09-15 11:07:53 +00001405 struct ip_tunnel *t;
1406
1407 t = rtnl_dereference(ign->tunnels[prio][h]);
Eric Dumazeteef6dd62009-10-27 07:07:16 +00001408
1409 while (t != NULL) {
1410 unregister_netdevice_queue(t->dev, head);
Eric Dumazet15078502010-09-15 11:07:53 +00001411 t = rtnl_dereference(t->next);
Eric Dumazeteef6dd62009-10-27 07:07:16 +00001412 }
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -07001413 }
1414 }
1415}
1416
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00001417static int __net_init ipgre_init_net(struct net *net)
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001418{
Eric W. Biedermancfb8fbf2009-11-29 15:46:13 +00001419 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001420 int err;
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001421
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001422 ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0",
1423 ipgre_tunnel_setup);
1424 if (!ign->fb_tunnel_dev) {
1425 err = -ENOMEM;
1426 goto err_alloc_dev;
1427 }
Alexey Dobriyanbe77e592008-11-23 17:26:26 -08001428 dev_net_set(ign->fb_tunnel_dev, net);
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001429
Stephen Hemmingerb8c26a32008-11-20 20:34:29 -08001430 ipgre_fb_tunnel_init(ign->fb_tunnel_dev);
Herbert Xuc19e6542008-10-09 11:59:55 -07001431 ign->fb_tunnel_dev->rtnl_link_ops = &ipgre_link_ops;
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001432
1433 if ((err = register_netdev(ign->fb_tunnel_dev)))
1434 goto err_reg_dev;
1435
Eric Dumazet3285ee32010-10-30 16:21:28 -07001436 rcu_assign_pointer(ign->tunnels_wc[0],
1437 netdev_priv(ign->fb_tunnel_dev));
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001438 return 0;
1439
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001440err_reg_dev:
Eric Dumazet3285ee32010-10-30 16:21:28 -07001441 ipgre_dev_free(ign->fb_tunnel_dev);
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001442err_alloc_dev:
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001443 return err;
1444}
1445
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00001446static void __net_exit ipgre_exit_net(struct net *net)
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001447{
1448 struct ipgre_net *ign;
Eric Dumazeteef6dd62009-10-27 07:07:16 +00001449 LIST_HEAD(list);
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001450
1451 ign = net_generic(net, ipgre_net_id);
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001452 rtnl_lock();
Eric Dumazeteef6dd62009-10-27 07:07:16 +00001453 ipgre_destroy_tunnels(ign, &list);
1454 unregister_netdevice_many(&list);
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001455 rtnl_unlock();
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001456}
1457
1458static struct pernet_operations ipgre_net_ops = {
1459 .init = ipgre_init_net,
1460 .exit = ipgre_exit_net,
Eric W. Biedermancfb8fbf2009-11-29 15:46:13 +00001461 .id = &ipgre_net_id,
1462 .size = sizeof(struct ipgre_net),
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001463};
Linus Torvalds1da177e2005-04-16 15:20:36 -07001464
Herbert Xuc19e6542008-10-09 11:59:55 -07001465static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
1466{
1467 __be16 flags;
1468
1469 if (!data)
1470 return 0;
1471
1472 flags = 0;
1473 if (data[IFLA_GRE_IFLAGS])
1474 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1475 if (data[IFLA_GRE_OFLAGS])
1476 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1477 if (flags & (GRE_VERSION|GRE_ROUTING))
1478 return -EINVAL;
1479
1480 return 0;
1481}
1482
Herbert Xue1a80002008-10-09 12:00:17 -07001483static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
1484{
1485 __be32 daddr;
1486
1487 if (tb[IFLA_ADDRESS]) {
1488 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1489 return -EINVAL;
1490 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1491 return -EADDRNOTAVAIL;
1492 }
1493
1494 if (!data)
1495 goto out;
1496
1497 if (data[IFLA_GRE_REMOTE]) {
1498 memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
1499 if (!daddr)
1500 return -EINVAL;
1501 }
1502
1503out:
1504 return ipgre_tunnel_validate(tb, data);
1505}
1506
Herbert Xuc19e6542008-10-09 11:59:55 -07001507static void ipgre_netlink_parms(struct nlattr *data[],
1508 struct ip_tunnel_parm *parms)
1509{
Herbert Xu7bb82d92008-10-11 12:20:15 -07001510 memset(parms, 0, sizeof(*parms));
Herbert Xuc19e6542008-10-09 11:59:55 -07001511
1512 parms->iph.protocol = IPPROTO_GRE;
1513
1514 if (!data)
1515 return;
1516
1517 if (data[IFLA_GRE_LINK])
1518 parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1519
1520 if (data[IFLA_GRE_IFLAGS])
1521 parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]);
1522
1523 if (data[IFLA_GRE_OFLAGS])
1524 parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]);
1525
1526 if (data[IFLA_GRE_IKEY])
1527 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1528
1529 if (data[IFLA_GRE_OKEY])
1530 parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1531
1532 if (data[IFLA_GRE_LOCAL])
Patrick McHardy4d74f8b2008-10-10 12:11:06 -07001533 parms->iph.saddr = nla_get_be32(data[IFLA_GRE_LOCAL]);
Herbert Xuc19e6542008-10-09 11:59:55 -07001534
1535 if (data[IFLA_GRE_REMOTE])
Patrick McHardy4d74f8b2008-10-10 12:11:06 -07001536 parms->iph.daddr = nla_get_be32(data[IFLA_GRE_REMOTE]);
Herbert Xuc19e6542008-10-09 11:59:55 -07001537
1538 if (data[IFLA_GRE_TTL])
1539 parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
1540
1541 if (data[IFLA_GRE_TOS])
1542 parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
1543
1544 if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC]))
1545 parms->iph.frag_off = htons(IP_DF);
1546}
1547
Herbert Xue1a80002008-10-09 12:00:17 -07001548static int ipgre_tap_init(struct net_device *dev)
1549{
1550 struct ip_tunnel *tunnel;
1551
1552 tunnel = netdev_priv(dev);
1553
1554 tunnel->dev = dev;
1555 strcpy(tunnel->parms.name, dev->name);
1556
1557 ipgre_tunnel_bind_dev(dev);
1558
Eric Dumazete985aad2010-09-27 03:57:11 +00001559 dev->tstats = alloc_percpu(struct pcpu_tstats);
1560 if (!dev->tstats)
1561 return -ENOMEM;
1562
Herbert Xue1a80002008-10-09 12:00:17 -07001563 return 0;
1564}
1565
Stephen Hemmingerb8c26a32008-11-20 20:34:29 -08001566static const struct net_device_ops ipgre_tap_netdev_ops = {
1567 .ndo_init = ipgre_tap_init,
1568 .ndo_uninit = ipgre_tunnel_uninit,
1569 .ndo_start_xmit = ipgre_tunnel_xmit,
1570 .ndo_set_mac_address = eth_mac_addr,
1571 .ndo_validate_addr = eth_validate_addr,
1572 .ndo_change_mtu = ipgre_tunnel_change_mtu,
stephen hemminger87b6d212012-04-12 06:31:16 +00001573 .ndo_get_stats64 = ipgre_get_stats64,
Stephen Hemmingerb8c26a32008-11-20 20:34:29 -08001574};
1575
Herbert Xue1a80002008-10-09 12:00:17 -07001576static void ipgre_tap_setup(struct net_device *dev)
1577{
1578
1579 ether_setup(dev);
1580
Herbert Xu2e9526b2009-10-30 05:51:48 +00001581 dev->netdev_ops = &ipgre_tap_netdev_ops;
Eric Dumazete985aad2010-09-27 03:57:11 +00001582 dev->destructor = ipgre_dev_free;
Herbert Xue1a80002008-10-09 12:00:17 -07001583
1584 dev->iflink = 0;
1585 dev->features |= NETIF_F_NETNS_LOCAL;
1586}
1587
Eric W. Biederman81adee42009-11-08 00:53:51 -08001588static int ipgre_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[],
Herbert Xuc19e6542008-10-09 11:59:55 -07001589 struct nlattr *data[])
1590{
1591 struct ip_tunnel *nt;
1592 struct net *net = dev_net(dev);
1593 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1594 int mtu;
1595 int err;
1596
1597 nt = netdev_priv(dev);
1598 ipgre_netlink_parms(data, &nt->parms);
1599
Herbert Xue1a80002008-10-09 12:00:17 -07001600 if (ipgre_tunnel_find(net, &nt->parms, dev->type))
Herbert Xuc19e6542008-10-09 11:59:55 -07001601 return -EEXIST;
1602
Herbert Xue1a80002008-10-09 12:00:17 -07001603 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
Danny Kukawkaf2cedb62012-02-15 06:45:39 +00001604 eth_hw_addr_random(dev);
Herbert Xue1a80002008-10-09 12:00:17 -07001605
Herbert Xuc19e6542008-10-09 11:59:55 -07001606 mtu = ipgre_tunnel_bind_dev(dev);
1607 if (!tb[IFLA_MTU])
1608 dev->mtu = mtu;
1609
Eric Dumazetb790e012010-09-27 23:05:47 +00001610 /* Can use a lockless transmit, unless we generate output sequences */
1611 if (!(nt->parms.o_flags & GRE_SEQ))
1612 dev->features |= NETIF_F_LLTX;
1613
Herbert Xuc19e6542008-10-09 11:59:55 -07001614 err = register_netdevice(dev);
1615 if (err)
1616 goto out;
1617
1618 dev_hold(dev);
1619 ipgre_tunnel_link(ign, nt);
1620
1621out:
1622 return err;
1623}
1624
1625static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1626 struct nlattr *data[])
1627{
1628 struct ip_tunnel *t, *nt;
1629 struct net *net = dev_net(dev);
1630 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1631 struct ip_tunnel_parm p;
1632 int mtu;
1633
1634 if (dev == ign->fb_tunnel_dev)
1635 return -EINVAL;
1636
1637 nt = netdev_priv(dev);
1638 ipgre_netlink_parms(data, &p);
1639
1640 t = ipgre_tunnel_locate(net, &p, 0);
1641
1642 if (t) {
1643 if (t->dev != dev)
1644 return -EEXIST;
1645 } else {
Herbert Xuc19e6542008-10-09 11:59:55 -07001646 t = nt;
1647
Herbert Xu2e9526b2009-10-30 05:51:48 +00001648 if (dev->type != ARPHRD_ETHER) {
Eric Dumazet15078502010-09-15 11:07:53 +00001649 unsigned int nflags = 0;
Herbert Xuc19e6542008-10-09 11:59:55 -07001650
Herbert Xu2e9526b2009-10-30 05:51:48 +00001651 if (ipv4_is_multicast(p.iph.daddr))
1652 nflags = IFF_BROADCAST;
1653 else if (p.iph.daddr)
1654 nflags = IFF_POINTOPOINT;
1655
1656 if ((dev->flags ^ nflags) &
1657 (IFF_POINTOPOINT | IFF_BROADCAST))
1658 return -EINVAL;
1659 }
Herbert Xuc19e6542008-10-09 11:59:55 -07001660
1661 ipgre_tunnel_unlink(ign, t);
1662 t->parms.iph.saddr = p.iph.saddr;
1663 t->parms.iph.daddr = p.iph.daddr;
1664 t->parms.i_key = p.i_key;
Herbert Xu2e9526b2009-10-30 05:51:48 +00001665 if (dev->type != ARPHRD_ETHER) {
1666 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1667 memcpy(dev->broadcast, &p.iph.daddr, 4);
1668 }
Herbert Xuc19e6542008-10-09 11:59:55 -07001669 ipgre_tunnel_link(ign, t);
1670 netdev_state_change(dev);
1671 }
1672
1673 t->parms.o_key = p.o_key;
1674 t->parms.iph.ttl = p.iph.ttl;
1675 t->parms.iph.tos = p.iph.tos;
1676 t->parms.iph.frag_off = p.iph.frag_off;
1677
1678 if (t->parms.link != p.link) {
1679 t->parms.link = p.link;
1680 mtu = ipgre_tunnel_bind_dev(dev);
1681 if (!tb[IFLA_MTU])
1682 dev->mtu = mtu;
1683 netdev_state_change(dev);
1684 }
1685
1686 return 0;
1687}
1688
1689static size_t ipgre_get_size(const struct net_device *dev)
1690{
1691 return
1692 /* IFLA_GRE_LINK */
1693 nla_total_size(4) +
1694 /* IFLA_GRE_IFLAGS */
1695 nla_total_size(2) +
1696 /* IFLA_GRE_OFLAGS */
1697 nla_total_size(2) +
1698 /* IFLA_GRE_IKEY */
1699 nla_total_size(4) +
1700 /* IFLA_GRE_OKEY */
1701 nla_total_size(4) +
1702 /* IFLA_GRE_LOCAL */
1703 nla_total_size(4) +
1704 /* IFLA_GRE_REMOTE */
1705 nla_total_size(4) +
1706 /* IFLA_GRE_TTL */
1707 nla_total_size(1) +
1708 /* IFLA_GRE_TOS */
1709 nla_total_size(1) +
1710 /* IFLA_GRE_PMTUDISC */
1711 nla_total_size(1) +
1712 0;
1713}
1714
1715static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1716{
1717 struct ip_tunnel *t = netdev_priv(dev);
1718 struct ip_tunnel_parm *p = &t->parms;
1719
David S. Millerf3756b72012-04-01 20:39:02 -04001720 if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
1721 nla_put_be16(skb, IFLA_GRE_IFLAGS, p->i_flags) ||
1722 nla_put_be16(skb, IFLA_GRE_OFLAGS, p->o_flags) ||
1723 nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
1724 nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
1725 nla_put_be32(skb, IFLA_GRE_LOCAL, p->iph.saddr) ||
1726 nla_put_be32(skb, IFLA_GRE_REMOTE, p->iph.daddr) ||
1727 nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) ||
1728 nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) ||
1729 nla_put_u8(skb, IFLA_GRE_PMTUDISC,
1730 !!(p->iph.frag_off & htons(IP_DF))))
1731 goto nla_put_failure;
Herbert Xuc19e6542008-10-09 11:59:55 -07001732 return 0;
1733
1734nla_put_failure:
1735 return -EMSGSIZE;
1736}
1737
1738static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1739 [IFLA_GRE_LINK] = { .type = NLA_U32 },
1740 [IFLA_GRE_IFLAGS] = { .type = NLA_U16 },
1741 [IFLA_GRE_OFLAGS] = { .type = NLA_U16 },
1742 [IFLA_GRE_IKEY] = { .type = NLA_U32 },
1743 [IFLA_GRE_OKEY] = { .type = NLA_U32 },
Patrick McHardy4d74f8b2008-10-10 12:11:06 -07001744 [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
1745 [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
Herbert Xuc19e6542008-10-09 11:59:55 -07001746 [IFLA_GRE_TTL] = { .type = NLA_U8 },
1747 [IFLA_GRE_TOS] = { .type = NLA_U8 },
1748 [IFLA_GRE_PMTUDISC] = { .type = NLA_U8 },
1749};
1750
1751static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1752 .kind = "gre",
1753 .maxtype = IFLA_GRE_MAX,
1754 .policy = ipgre_policy,
1755 .priv_size = sizeof(struct ip_tunnel),
1756 .setup = ipgre_tunnel_setup,
1757 .validate = ipgre_tunnel_validate,
1758 .newlink = ipgre_newlink,
1759 .changelink = ipgre_changelink,
1760 .get_size = ipgre_get_size,
1761 .fill_info = ipgre_fill_info,
1762};
1763
Herbert Xue1a80002008-10-09 12:00:17 -07001764static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1765 .kind = "gretap",
1766 .maxtype = IFLA_GRE_MAX,
1767 .policy = ipgre_policy,
1768 .priv_size = sizeof(struct ip_tunnel),
1769 .setup = ipgre_tap_setup,
1770 .validate = ipgre_tap_validate,
1771 .newlink = ipgre_newlink,
1772 .changelink = ipgre_changelink,
1773 .get_size = ipgre_get_size,
1774 .fill_info = ipgre_fill_info,
1775};
1776
Linus Torvalds1da177e2005-04-16 15:20:36 -07001777/*
1778 * And now the modules code and kernel interface.
1779 */
1780
1781static int __init ipgre_init(void)
1782{
1783 int err;
1784
Joe Perches058bd4d2012-03-11 18:36:11 +00001785 pr_info("GRE over IPv4 tunneling driver\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001786
Eric W. Biedermancfb8fbf2009-11-29 15:46:13 +00001787 err = register_pernet_device(&ipgre_net_ops);
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001788 if (err < 0)
Alexey Dobriyanc2892f02010-02-16 07:57:44 +00001789 return err;
1790
Dmitry Kozlov00959ad2010-08-21 23:05:39 -07001791 err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
Alexey Dobriyanc2892f02010-02-16 07:57:44 +00001792 if (err < 0) {
Joe Perches058bd4d2012-03-11 18:36:11 +00001793 pr_info("%s: can't add protocol\n", __func__);
Alexey Dobriyanc2892f02010-02-16 07:57:44 +00001794 goto add_proto_failed;
1795 }
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001796
Herbert Xuc19e6542008-10-09 11:59:55 -07001797 err = rtnl_link_register(&ipgre_link_ops);
1798 if (err < 0)
1799 goto rtnl_link_failed;
1800
Herbert Xue1a80002008-10-09 12:00:17 -07001801 err = rtnl_link_register(&ipgre_tap_ops);
1802 if (err < 0)
1803 goto tap_ops_failed;
1804
Herbert Xuc19e6542008-10-09 11:59:55 -07001805out:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001806 return err;
Herbert Xuc19e6542008-10-09 11:59:55 -07001807
Herbert Xue1a80002008-10-09 12:00:17 -07001808tap_ops_failed:
1809 rtnl_link_unregister(&ipgre_link_ops);
Herbert Xuc19e6542008-10-09 11:59:55 -07001810rtnl_link_failed:
Dmitry Kozlov00959ad2010-08-21 23:05:39 -07001811 gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
Alexey Dobriyanc2892f02010-02-16 07:57:44 +00001812add_proto_failed:
1813 unregister_pernet_device(&ipgre_net_ops);
Herbert Xuc19e6542008-10-09 11:59:55 -07001814 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001815}
1816
Alexey Kuznetsovdb445752005-07-30 17:46:44 -07001817static void __exit ipgre_fini(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001818{
Herbert Xue1a80002008-10-09 12:00:17 -07001819 rtnl_link_unregister(&ipgre_tap_ops);
Herbert Xuc19e6542008-10-09 11:59:55 -07001820 rtnl_link_unregister(&ipgre_link_ops);
Dmitry Kozlov00959ad2010-08-21 23:05:39 -07001821 if (gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO) < 0)
Joe Perches058bd4d2012-03-11 18:36:11 +00001822 pr_info("%s: can't remove protocol\n", __func__);
Alexey Dobriyanc2892f02010-02-16 07:57:44 +00001823 unregister_pernet_device(&ipgre_net_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001824}
1825
1826module_init(ipgre_init);
1827module_exit(ipgre_fini);
1828MODULE_LICENSE("GPL");
Patrick McHardy4d74f8b2008-10-10 12:11:06 -07001829MODULE_ALIAS_RTNL_LINK("gre");
1830MODULE_ALIAS_RTNL_LINK("gretap");
Vasiliy Kulikov8909c9a2011-03-02 00:33:13 +03001831MODULE_ALIAS_NETDEV("gre0");