blob: f233c1da20771df78613f22c7389e0e4aefc2b5e [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002 * Linux NET3: GRE over IP protocol decoder.
Linus Torvalds1da177e2005-04-16 15:20:36 -07003 *
4 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
Joe Perchesafd465032012-03-12 07:03:32 +000013#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
14
Randy Dunlap4fc268d2006-01-11 12:17:47 -080015#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070016#include <linux/module.h>
17#include <linux/types.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070018#include <linux/kernel.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090019#include <linux/slab.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070020#include <asm/uaccess.h>
21#include <linux/skbuff.h>
22#include <linux/netdevice.h>
23#include <linux/in.h>
24#include <linux/tcp.h>
25#include <linux/udp.h>
26#include <linux/if_arp.h>
27#include <linux/mroute.h>
28#include <linux/init.h>
29#include <linux/in6.h>
30#include <linux/inetdevice.h>
31#include <linux/igmp.h>
32#include <linux/netfilter_ipv4.h>
Herbert Xue1a80002008-10-09 12:00:17 -070033#include <linux/etherdevice.h>
Kris Katterjohn46f25df2006-01-05 16:35:42 -080034#include <linux/if_ether.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070035
36#include <net/sock.h>
37#include <net/ip.h>
38#include <net/icmp.h>
39#include <net/protocol.h>
40#include <net/ipip.h>
41#include <net/arp.h>
42#include <net/checksum.h>
43#include <net/dsfield.h>
44#include <net/inet_ecn.h>
45#include <net/xfrm.h>
Pavel Emelyanov59a4c752008-04-16 01:08:53 -070046#include <net/net_namespace.h>
47#include <net/netns/generic.h>
Herbert Xuc19e6542008-10-09 11:59:55 -070048#include <net/rtnetlink.h>
Dmitry Kozlov00959ad2010-08-21 23:05:39 -070049#include <net/gre.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070050
Eric Dumazetdfd56b82011-12-10 09:48:31 +000051#if IS_ENABLED(CONFIG_IPV6)
Linus Torvalds1da177e2005-04-16 15:20:36 -070052#include <net/ipv6.h>
53#include <net/ip6_fib.h>
54#include <net/ip6_route.h>
55#endif
56
57/*
58 Problems & solutions
59 --------------------
60
61 1. The most important issue is detecting local dead loops.
62 They would cause complete host lockup in transmit, which
63 would be "resolved" by stack overflow or, if queueing is enabled,
64 with infinite looping in net_bh.
65
66 We cannot track such dead loops during route installation,
67 it is infeasible task. The most general solutions would be
68 to keep skb->encapsulation counter (sort of local ttl),
Eric Dumazet6d0722a2010-09-29 23:35:10 -070069 and silently drop packet when it expires. It is a good
stephen hemmingerbff52852012-02-24 08:08:20 +000070 solution, but it supposes maintaining new variable in ALL
Linus Torvalds1da177e2005-04-16 15:20:36 -070071 skb, even if no tunneling is used.
72
Eric Dumazet6d0722a2010-09-29 23:35:10 -070073 Current solution: xmit_recursion breaks dead loops. This is a percpu
74 counter, since when we enter the first ndo_xmit(), cpu migration is
75 forbidden. We force an exit if this counter reaches RECURSION_LIMIT
Linus Torvalds1da177e2005-04-16 15:20:36 -070076
77 2. Networking dead loops would not kill routers, but would really
78 kill network. IP hop limit plays role of "t->recursion" in this case,
79 if we copy it from packet being encapsulated to upper header.
80 It is very good solution, but it introduces two problems:
81
82 - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
83 do not work over tunnels.
84 - traceroute does not work. I planned to relay ICMP from tunnel,
85 so that this problem would be solved and traceroute output
86 would even more informative. This idea appeared to be wrong:
87 only Linux complies to rfc1812 now (yes, guys, Linux is the only
88 true router now :-)), all routers (at least, in neighbourhood of mine)
89 return only 8 bytes of payload. It is the end.
90
91 Hence, if we want that OSPF worked or traceroute said something reasonable,
92 we should search for another solution.
93
94 One of them is to parse packet trying to detect inner encapsulation
95 made by our node. It is difficult or even impossible, especially,
stephen hemmingerbff52852012-02-24 08:08:20 +000096 taking into account fragmentation. TO be short, ttl is not solution at all.
Linus Torvalds1da177e2005-04-16 15:20:36 -070097
98 Current solution: The solution was UNEXPECTEDLY SIMPLE.
99 We force DF flag on tunnels with preconfigured hop limit,
100 that is ALL. :-) Well, it does not remove the problem completely,
101 but exponential growth of network traffic is changed to linear
102 (branches, that exceed pmtu are pruned) and tunnel mtu
stephen hemmingerbff52852012-02-24 08:08:20 +0000103 rapidly degrades to value <68, where looping stops.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700104 Yes, it is not good if there exists a router in the loop,
105 which does not force DF, even when encapsulating packets have DF set.
106 But it is not our problem! Nobody could accuse us, we made
107 all that we could make. Even if it is your gated who injected
108 fatal route to network, even if it were you who configured
109 fatal static route: you are innocent. :-)
110
111
112
113 3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
114 practically identical code. It would be good to glue them
115 together, but it is not very evident, how to make them modular.
116 sit is integral part of IPv6, ipip and gre are naturally modular.
117 We could extract common parts (hash table, ioctl etc)
118 to a separate module (ip_tunnel.c).
119
120 Alexey Kuznetsov.
121 */
122
Herbert Xuc19e6542008-10-09 11:59:55 -0700123static struct rtnl_link_ops ipgre_link_ops __read_mostly;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700124static int ipgre_tunnel_init(struct net_device *dev);
125static void ipgre_tunnel_setup(struct net_device *dev);
Herbert Xu42aa9162008-10-09 11:59:32 -0700126static int ipgre_tunnel_bind_dev(struct net_device *dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700127
128/* Fallback tunnel: no source, no destination, no key, no options */
129
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700130#define HASH_SIZE 16
131
Eric Dumazetf99189b2009-11-17 10:42:49 +0000132static int ipgre_net_id __read_mostly;
Pavel Emelyanov59a4c752008-04-16 01:08:53 -0700133struct ipgre_net {
Eric Dumazet15078502010-09-15 11:07:53 +0000134 struct ip_tunnel __rcu *tunnels[4][HASH_SIZE];
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700135
Pavel Emelyanov7daa0002008-04-16 01:10:05 -0700136 struct net_device *fb_tunnel_dev;
Pavel Emelyanov59a4c752008-04-16 01:08:53 -0700137};
138
Linus Torvalds1da177e2005-04-16 15:20:36 -0700139/* Tunnel hash table */
140
141/*
142 4 hash tables:
143
144 3: (remote,local)
145 2: (remote,*)
146 1: (*,local)
147 0: (*,*)
148
149 We require exact key match i.e. if a key is present in packet
150 it will match only tunnel with the same key; if it is not present,
151 it will match only keyless tunnel.
152
153 All keysless packets, if not matched configured keyless tunnels
154 will match fallback tunnel.
155 */
156
Al Virod5a0a1e2006-11-08 00:23:14 -0800157#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700158
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700159#define tunnels_r_l tunnels[3]
160#define tunnels_r tunnels[2]
161#define tunnels_l tunnels[1]
162#define tunnels_wc tunnels[0]
Eric Dumazet8d5b2c02009-10-23 06:14:38 +0000163/*
Eric Dumazet15078502010-09-15 11:07:53 +0000164 * Locking : hash tables are protected by RCU and RTNL
Eric Dumazet8d5b2c02009-10-23 06:14:38 +0000165 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700166
Eric Dumazet8d5b2c02009-10-23 06:14:38 +0000167#define for_each_ip_tunnel_rcu(start) \
168 for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700169
Eric Dumazete985aad2010-09-27 03:57:11 +0000170/* often modified stats are per cpu, other are shared (netdev->stats) */
171struct pcpu_tstats {
stephen hemminger87b6d212012-04-12 06:31:16 +0000172 u64 rx_packets;
173 u64 rx_bytes;
174 u64 tx_packets;
175 u64 tx_bytes;
176 struct u64_stats_sync syncp;
177};
Eric Dumazete985aad2010-09-27 03:57:11 +0000178
stephen hemminger87b6d212012-04-12 06:31:16 +0000179static struct rtnl_link_stats64 *ipgre_get_stats64(struct net_device *dev,
180 struct rtnl_link_stats64 *tot)
Eric Dumazete985aad2010-09-27 03:57:11 +0000181{
Eric Dumazete985aad2010-09-27 03:57:11 +0000182 int i;
183
184 for_each_possible_cpu(i) {
185 const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
stephen hemminger87b6d212012-04-12 06:31:16 +0000186 u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
187 unsigned int start;
Eric Dumazete985aad2010-09-27 03:57:11 +0000188
stephen hemminger87b6d212012-04-12 06:31:16 +0000189 do {
190 start = u64_stats_fetch_begin_bh(&tstats->syncp);
191 rx_packets = tstats->rx_packets;
192 tx_packets = tstats->tx_packets;
193 rx_bytes = tstats->rx_bytes;
194 tx_bytes = tstats->tx_bytes;
195 } while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
196
197 tot->rx_packets += rx_packets;
198 tot->tx_packets += tx_packets;
199 tot->rx_bytes += rx_bytes;
200 tot->tx_bytes += tx_bytes;
Eric Dumazete985aad2010-09-27 03:57:11 +0000201 }
stephen hemminger87b6d212012-04-12 06:31:16 +0000202
203 tot->multicast = dev->stats.multicast;
204 tot->rx_crc_errors = dev->stats.rx_crc_errors;
205 tot->rx_fifo_errors = dev->stats.rx_fifo_errors;
206 tot->rx_length_errors = dev->stats.rx_length_errors;
207 tot->rx_errors = dev->stats.rx_errors;
208 tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
209 tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
210 tot->tx_dropped = dev->stats.tx_dropped;
211 tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
212 tot->tx_errors = dev->stats.tx_errors;
213
214 return tot;
Eric Dumazete985aad2010-09-27 03:57:11 +0000215}
216
Linus Torvalds1da177e2005-04-16 15:20:36 -0700217/* Given src, dst and key, find appropriate for input tunnel. */
218
Daniel Baluta5e73ea12012-04-15 01:34:41 +0000219static struct ip_tunnel *ipgre_tunnel_lookup(struct net_device *dev,
220 __be32 remote, __be32 local,
221 __be32 key, __be16 gre_proto)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700222{
Timo Teras749c10f2009-01-19 17:22:12 -0800223 struct net *net = dev_net(dev);
224 int link = dev->ifindex;
Eric Dumazet15078502010-09-15 11:07:53 +0000225 unsigned int h0 = HASH(remote);
226 unsigned int h1 = HASH(key);
Timo Terasafcf1242009-01-26 20:56:10 -0800227 struct ip_tunnel *t, *cand = NULL;
Pavel Emelyanov7daa0002008-04-16 01:10:05 -0700228 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
Herbert Xue1a80002008-10-09 12:00:17 -0700229 int dev_type = (gre_proto == htons(ETH_P_TEB)) ?
230 ARPHRD_ETHER : ARPHRD_IPGRE;
Timo Terasafcf1242009-01-26 20:56:10 -0800231 int score, cand_score = 4;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700232
Eric Dumazet8d5b2c02009-10-23 06:14:38 +0000233 for_each_ip_tunnel_rcu(ign->tunnels_r_l[h0 ^ h1]) {
Timo Teras749c10f2009-01-19 17:22:12 -0800234 if (local != t->parms.iph.saddr ||
235 remote != t->parms.iph.daddr ||
236 key != t->parms.i_key ||
237 !(t->dev->flags & IFF_UP))
238 continue;
239
240 if (t->dev->type != ARPHRD_IPGRE &&
241 t->dev->type != dev_type)
242 continue;
243
Timo Terasafcf1242009-01-26 20:56:10 -0800244 score = 0;
Timo Teras749c10f2009-01-19 17:22:12 -0800245 if (t->parms.link != link)
Timo Terasafcf1242009-01-26 20:56:10 -0800246 score |= 1;
Timo Teras749c10f2009-01-19 17:22:12 -0800247 if (t->dev->type != dev_type)
Timo Terasafcf1242009-01-26 20:56:10 -0800248 score |= 2;
249 if (score == 0)
Timo Teras749c10f2009-01-19 17:22:12 -0800250 return t;
Timo Terasafcf1242009-01-26 20:56:10 -0800251
252 if (score < cand_score) {
253 cand = t;
254 cand_score = score;
255 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700256 }
Herbert Xue1a80002008-10-09 12:00:17 -0700257
Eric Dumazet8d5b2c02009-10-23 06:14:38 +0000258 for_each_ip_tunnel_rcu(ign->tunnels_r[h0 ^ h1]) {
Timo Teras749c10f2009-01-19 17:22:12 -0800259 if (remote != t->parms.iph.daddr ||
260 key != t->parms.i_key ||
261 !(t->dev->flags & IFF_UP))
262 continue;
263
264 if (t->dev->type != ARPHRD_IPGRE &&
265 t->dev->type != dev_type)
266 continue;
267
Timo Terasafcf1242009-01-26 20:56:10 -0800268 score = 0;
Timo Teras749c10f2009-01-19 17:22:12 -0800269 if (t->parms.link != link)
Timo Terasafcf1242009-01-26 20:56:10 -0800270 score |= 1;
Timo Teras749c10f2009-01-19 17:22:12 -0800271 if (t->dev->type != dev_type)
Timo Terasafcf1242009-01-26 20:56:10 -0800272 score |= 2;
273 if (score == 0)
Timo Teras749c10f2009-01-19 17:22:12 -0800274 return t;
Timo Terasafcf1242009-01-26 20:56:10 -0800275
276 if (score < cand_score) {
277 cand = t;
278 cand_score = score;
279 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700280 }
Herbert Xue1a80002008-10-09 12:00:17 -0700281
Eric Dumazet8d5b2c02009-10-23 06:14:38 +0000282 for_each_ip_tunnel_rcu(ign->tunnels_l[h1]) {
Timo Teras749c10f2009-01-19 17:22:12 -0800283 if ((local != t->parms.iph.saddr &&
284 (local != t->parms.iph.daddr ||
285 !ipv4_is_multicast(local))) ||
286 key != t->parms.i_key ||
287 !(t->dev->flags & IFF_UP))
288 continue;
289
290 if (t->dev->type != ARPHRD_IPGRE &&
291 t->dev->type != dev_type)
292 continue;
293
Timo Terasafcf1242009-01-26 20:56:10 -0800294 score = 0;
Timo Teras749c10f2009-01-19 17:22:12 -0800295 if (t->parms.link != link)
Timo Terasafcf1242009-01-26 20:56:10 -0800296 score |= 1;
Timo Teras749c10f2009-01-19 17:22:12 -0800297 if (t->dev->type != dev_type)
Timo Terasafcf1242009-01-26 20:56:10 -0800298 score |= 2;
299 if (score == 0)
Timo Teras749c10f2009-01-19 17:22:12 -0800300 return t;
Timo Terasafcf1242009-01-26 20:56:10 -0800301
302 if (score < cand_score) {
303 cand = t;
304 cand_score = score;
305 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700306 }
Herbert Xue1a80002008-10-09 12:00:17 -0700307
Eric Dumazet8d5b2c02009-10-23 06:14:38 +0000308 for_each_ip_tunnel_rcu(ign->tunnels_wc[h1]) {
Timo Teras749c10f2009-01-19 17:22:12 -0800309 if (t->parms.i_key != key ||
310 !(t->dev->flags & IFF_UP))
311 continue;
312
313 if (t->dev->type != ARPHRD_IPGRE &&
314 t->dev->type != dev_type)
315 continue;
316
Timo Terasafcf1242009-01-26 20:56:10 -0800317 score = 0;
Timo Teras749c10f2009-01-19 17:22:12 -0800318 if (t->parms.link != link)
Timo Terasafcf1242009-01-26 20:56:10 -0800319 score |= 1;
Timo Teras749c10f2009-01-19 17:22:12 -0800320 if (t->dev->type != dev_type)
Timo Terasafcf1242009-01-26 20:56:10 -0800321 score |= 2;
322 if (score == 0)
Timo Teras749c10f2009-01-19 17:22:12 -0800323 return t;
Timo Terasafcf1242009-01-26 20:56:10 -0800324
325 if (score < cand_score) {
326 cand = t;
327 cand_score = score;
328 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700329 }
330
Timo Terasafcf1242009-01-26 20:56:10 -0800331 if (cand != NULL)
332 return cand;
Herbert Xue1a80002008-10-09 12:00:17 -0700333
Eric Dumazet8d5b2c02009-10-23 06:14:38 +0000334 dev = ign->fb_tunnel_dev;
335 if (dev->flags & IFF_UP)
336 return netdev_priv(dev);
Timo Teras749c10f2009-01-19 17:22:12 -0800337
Linus Torvalds1da177e2005-04-16 15:20:36 -0700338 return NULL;
339}
340
Eric Dumazet15078502010-09-15 11:07:53 +0000341static struct ip_tunnel __rcu **__ipgre_bucket(struct ipgre_net *ign,
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700342 struct ip_tunnel_parm *parms)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700343{
YOSHIFUJI Hideaki5056a1e2007-04-24 20:44:48 +0900344 __be32 remote = parms->iph.daddr;
345 __be32 local = parms->iph.saddr;
346 __be32 key = parms->i_key;
Eric Dumazet15078502010-09-15 11:07:53 +0000347 unsigned int h = HASH(key);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700348 int prio = 0;
349
350 if (local)
351 prio |= 1;
Joe Perchesf97c1e02007-12-16 13:45:43 -0800352 if (remote && !ipv4_is_multicast(remote)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700353 prio |= 2;
354 h ^= HASH(remote);
355 }
356
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700357 return &ign->tunnels[prio][h];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700358}
359
Eric Dumazet15078502010-09-15 11:07:53 +0000360static inline struct ip_tunnel __rcu **ipgre_bucket(struct ipgre_net *ign,
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700361 struct ip_tunnel *t)
YOSHIFUJI Hideaki5056a1e2007-04-24 20:44:48 +0900362{
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700363 return __ipgre_bucket(ign, &t->parms);
YOSHIFUJI Hideaki5056a1e2007-04-24 20:44:48 +0900364}
365
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700366static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700367{
Eric Dumazet15078502010-09-15 11:07:53 +0000368 struct ip_tunnel __rcu **tp = ipgre_bucket(ign, t);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700369
Eric Dumazet15078502010-09-15 11:07:53 +0000370 rcu_assign_pointer(t->next, rtnl_dereference(*tp));
Eric Dumazet8d5b2c02009-10-23 06:14:38 +0000371 rcu_assign_pointer(*tp, t);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700372}
373
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700374static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700375{
Eric Dumazet15078502010-09-15 11:07:53 +0000376 struct ip_tunnel __rcu **tp;
377 struct ip_tunnel *iter;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700378
Eric Dumazet15078502010-09-15 11:07:53 +0000379 for (tp = ipgre_bucket(ign, t);
380 (iter = rtnl_dereference(*tp)) != NULL;
381 tp = &iter->next) {
382 if (t == iter) {
383 rcu_assign_pointer(*tp, t->next);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700384 break;
385 }
386 }
387}
388
Herbert Xue1a80002008-10-09 12:00:17 -0700389static struct ip_tunnel *ipgre_tunnel_find(struct net *net,
390 struct ip_tunnel_parm *parms,
391 int type)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700392{
Al Virod5a0a1e2006-11-08 00:23:14 -0800393 __be32 remote = parms->iph.daddr;
394 __be32 local = parms->iph.saddr;
395 __be32 key = parms->i_key;
Timo Teras749c10f2009-01-19 17:22:12 -0800396 int link = parms->link;
Eric Dumazet15078502010-09-15 11:07:53 +0000397 struct ip_tunnel *t;
398 struct ip_tunnel __rcu **tp;
Herbert Xue1a80002008-10-09 12:00:17 -0700399 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
400
Eric Dumazet15078502010-09-15 11:07:53 +0000401 for (tp = __ipgre_bucket(ign, parms);
402 (t = rtnl_dereference(*tp)) != NULL;
403 tp = &t->next)
Herbert Xue1a80002008-10-09 12:00:17 -0700404 if (local == t->parms.iph.saddr &&
405 remote == t->parms.iph.daddr &&
406 key == t->parms.i_key &&
Timo Teras749c10f2009-01-19 17:22:12 -0800407 link == t->parms.link &&
Herbert Xue1a80002008-10-09 12:00:17 -0700408 type == t->dev->type)
409 break;
410
411 return t;
412}
413
Eric Dumazet15078502010-09-15 11:07:53 +0000414static struct ip_tunnel *ipgre_tunnel_locate(struct net *net,
Herbert Xue1a80002008-10-09 12:00:17 -0700415 struct ip_tunnel_parm *parms, int create)
416{
417 struct ip_tunnel *t, *nt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700418 struct net_device *dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700419 char name[IFNAMSIZ];
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700420 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700421
Herbert Xue1a80002008-10-09 12:00:17 -0700422 t = ipgre_tunnel_find(net, parms, ARPHRD_IPGRE);
423 if (t || !create)
424 return t;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700425
426 if (parms->name[0])
427 strlcpy(name, parms->name, IFNAMSIZ);
Pavel Emelyanov34cc7ba2008-02-23 20:19:20 -0800428 else
stephen hemminger407d6fc2010-11-29 09:47:47 +0000429 strcpy(name, "gre%d");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700430
431 dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup);
432 if (!dev)
stephen hemminger407d6fc2010-11-29 09:47:47 +0000433 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700434
Pavel Emelyanov0b67ece2008-04-16 01:11:13 -0700435 dev_net_set(dev, net);
436
Patrick McHardy2941a482006-01-08 22:05:26 -0800437 nt = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700438 nt->parms = *parms;
Herbert Xuc19e6542008-10-09 11:59:55 -0700439 dev->rtnl_link_ops = &ipgre_link_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700440
Herbert Xu42aa9162008-10-09 11:59:32 -0700441 dev->mtu = ipgre_tunnel_bind_dev(dev);
442
Pavel Emelyanovb37d428b2008-02-26 23:51:04 -0800443 if (register_netdevice(dev) < 0)
444 goto failed_free;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700445
Willem de Bruijnf2b3ee92012-01-26 10:34:35 +0000446 /* Can use a lockless transmit, unless we generate output sequences */
447 if (!(nt->parms.o_flags & GRE_SEQ))
448 dev->features |= NETIF_F_LLTX;
449
Linus Torvalds1da177e2005-04-16 15:20:36 -0700450 dev_hold(dev);
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700451 ipgre_tunnel_link(ign, nt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700452 return nt;
453
Pavel Emelyanovb37d428b2008-02-26 23:51:04 -0800454failed_free:
455 free_netdev(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700456 return NULL;
457}
458
459static void ipgre_tunnel_uninit(struct net_device *dev)
460{
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700461 struct net *net = dev_net(dev);
462 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
463
464 ipgre_tunnel_unlink(ign, netdev_priv(dev));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700465 dev_put(dev);
466}
467
468
469static void ipgre_err(struct sk_buff *skb, u32 info)
470{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700471
Rami Rosen071f92d2008-05-21 17:47:54 -0700472/* All the routers (except for Linux) return only
Linus Torvalds1da177e2005-04-16 15:20:36 -0700473 8 bytes of packet payload. It means, that precise relaying of
474 ICMP in the real Internet is absolutely infeasible.
475
476 Moreover, Cisco "wise men" put GRE key to the third word
477 in GRE header. It makes impossible maintaining even soft state for keyed
478 GRE tunnels with enabled checksum. Tell them "thank you".
479
480 Well, I wonder, rfc1812 was written by Cisco employee,
stephen hemmingerbff52852012-02-24 08:08:20 +0000481 what the hell these idiots break standards established
482 by themselves???
Linus Torvalds1da177e2005-04-16 15:20:36 -0700483 */
484
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000485 const struct iphdr *iph = (const struct iphdr *)skb->data;
Daniel Baluta5e73ea12012-04-15 01:34:41 +0000486 __be16 *p = (__be16 *)(skb->data+(iph->ihl<<2));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700487 int grehlen = (iph->ihl<<2) + 4;
Arnaldo Carvalho de Melo88c76642007-03-13 14:43:18 -0300488 const int type = icmp_hdr(skb)->type;
489 const int code = icmp_hdr(skb)->code;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700490 struct ip_tunnel *t;
Al Virod5a0a1e2006-11-08 00:23:14 -0800491 __be16 flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700492
493 flags = p[0];
494 if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
495 if (flags&(GRE_VERSION|GRE_ROUTING))
496 return;
497 if (flags&GRE_KEY) {
498 grehlen += 4;
499 if (flags&GRE_CSUM)
500 grehlen += 4;
501 }
502 }
503
504 /* If only 8 bytes returned, keyed message will be dropped here */
505 if (skb_headlen(skb) < grehlen)
506 return;
507
508 switch (type) {
509 default:
510 case ICMP_PARAMETERPROB:
511 return;
512
513 case ICMP_DEST_UNREACH:
514 switch (code) {
515 case ICMP_SR_FAILED:
516 case ICMP_PORT_UNREACH:
517 /* Impossible event. */
518 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700519 default:
520 /* All others are translated to HOST_UNREACH.
521 rfc2003 contains "deep thoughts" about NET_UNREACH,
522 I believe they are just ether pollution. --ANK
523 */
524 break;
525 }
526 break;
527 case ICMP_TIME_EXCEEDED:
528 if (code != ICMP_EXC_TTL)
529 return;
530 break;
David S. Miller55be7a92012-07-11 21:27:49 -0700531
532 case ICMP_REDIRECT:
533 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700534 }
535
Eric Dumazet8d5b2c02009-10-23 06:14:38 +0000536 rcu_read_lock();
Timo Teras749c10f2009-01-19 17:22:12 -0800537 t = ipgre_tunnel_lookup(skb->dev, iph->daddr, iph->saddr,
Herbert Xue1a80002008-10-09 12:00:17 -0700538 flags & GRE_KEY ?
539 *(((__be32 *)p) + (grehlen / 4) - 1) : 0,
540 p[1]);
David S. Miller36393392012-06-14 22:21:46 -0700541 if (t == NULL)
542 goto out;
543
544 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
545 ipv4_update_pmtu(skb, dev_net(skb->dev), info,
546 t->parms.link, 0, IPPROTO_GRE, 0);
547 goto out;
548 }
David S. Miller55be7a92012-07-11 21:27:49 -0700549 if (type == ICMP_REDIRECT) {
550 ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0,
551 IPPROTO_GRE, 0);
552 goto out;
553 }
David S. Miller36393392012-06-14 22:21:46 -0700554 if (t->parms.iph.daddr == 0 ||
Joe Perchesf97c1e02007-12-16 13:45:43 -0800555 ipv4_is_multicast(t->parms.iph.daddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700556 goto out;
557
558 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
559 goto out;
560
Wei Yongjunda6185d82009-02-24 23:34:48 -0800561 if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700562 t->err_count++;
563 else
564 t->err_count = 1;
565 t->err_time = jiffies;
566out:
Eric Dumazet8d5b2c02009-10-23 06:14:38 +0000567 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700568}
569
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000570static inline void ipgre_ecn_decapsulate(const struct iphdr *iph, struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700571{
572 if (INET_ECN_is_ce(iph->tos)) {
573 if (skb->protocol == htons(ETH_P_IP)) {
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700574 IP_ECN_set_ce(ip_hdr(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700575 } else if (skb->protocol == htons(ETH_P_IPV6)) {
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -0700576 IP6_ECN_set_ce(ipv6_hdr(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700577 }
578 }
579}
580
581static inline u8
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000582ipgre_ecn_encapsulate(u8 tos, const struct iphdr *old_iph, struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700583{
584 u8 inner = 0;
585 if (skb->protocol == htons(ETH_P_IP))
586 inner = old_iph->tos;
587 else if (skb->protocol == htons(ETH_P_IPV6))
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000588 inner = ipv6_get_dsfield((const struct ipv6hdr *)old_iph);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700589 return INET_ECN_encapsulate(tos, inner);
590}
591
592static int ipgre_rcv(struct sk_buff *skb)
593{
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000594 const struct iphdr *iph;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700595 u8 *h;
Al Virod5a0a1e2006-11-08 00:23:14 -0800596 __be16 flags;
Al Virod3bc23e2006-11-14 21:24:49 -0800597 __sum16 csum = 0;
Al Virod5a0a1e2006-11-08 00:23:14 -0800598 __be32 key = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700599 u32 seqno = 0;
600 struct ip_tunnel *tunnel;
601 int offset = 4;
Herbert Xue1a80002008-10-09 12:00:17 -0700602 __be16 gre_proto;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700603
604 if (!pskb_may_pull(skb, 16))
605 goto drop_nolock;
606
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700607 iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700608 h = skb->data;
Daniel Baluta5e73ea12012-04-15 01:34:41 +0000609 flags = *(__be16 *)h;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700610
611 if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
612 /* - Version must be 0.
613 - We do not support routing headers.
614 */
615 if (flags&(GRE_VERSION|GRE_ROUTING))
616 goto drop_nolock;
617
618 if (flags&GRE_CSUM) {
Herbert Xufb286bb2005-11-10 13:01:24 -0800619 switch (skb->ip_summed) {
Patrick McHardy84fa7932006-08-29 16:44:56 -0700620 case CHECKSUM_COMPLETE:
Al Virod3bc23e2006-11-14 21:24:49 -0800621 csum = csum_fold(skb->csum);
Herbert Xufb286bb2005-11-10 13:01:24 -0800622 if (!csum)
623 break;
624 /* fall through */
625 case CHECKSUM_NONE:
626 skb->csum = 0;
627 csum = __skb_checksum_complete(skb);
Patrick McHardy84fa7932006-08-29 16:44:56 -0700628 skb->ip_summed = CHECKSUM_COMPLETE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700629 }
630 offset += 4;
631 }
632 if (flags&GRE_KEY) {
Daniel Baluta5e73ea12012-04-15 01:34:41 +0000633 key = *(__be32 *)(h + offset);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700634 offset += 4;
635 }
636 if (flags&GRE_SEQ) {
Daniel Baluta5e73ea12012-04-15 01:34:41 +0000637 seqno = ntohl(*(__be32 *)(h + offset));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700638 offset += 4;
639 }
640 }
641
Herbert Xue1a80002008-10-09 12:00:17 -0700642 gre_proto = *(__be16 *)(h + 2);
643
Eric Dumazet8d5b2c02009-10-23 06:14:38 +0000644 rcu_read_lock();
Timo Teras749c10f2009-01-19 17:22:12 -0800645 if ((tunnel = ipgre_tunnel_lookup(skb->dev,
Herbert Xue1a80002008-10-09 12:00:17 -0700646 iph->saddr, iph->daddr, key,
647 gre_proto))) {
Eric Dumazete985aad2010-09-27 03:57:11 +0000648 struct pcpu_tstats *tstats;
Pavel Emelyanovaddd68e2008-05-21 14:14:22 -0700649
Linus Torvalds1da177e2005-04-16 15:20:36 -0700650 secpath_reset(skb);
651
Herbert Xue1a80002008-10-09 12:00:17 -0700652 skb->protocol = gre_proto;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700653 /* WCCP version 1 and 2 protocol decoding.
654 * - Change protocol to IP
655 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
656 */
Herbert Xue1a80002008-10-09 12:00:17 -0700657 if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) {
YOSHIFUJI Hideaki496c98d2006-10-10 19:41:21 -0700658 skb->protocol = htons(ETH_P_IP);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900659 if ((*(h + offset) & 0xF0) != 0x40)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700660 offset += 4;
661 }
662
Timo Teras1d069162007-12-20 00:10:33 -0800663 skb->mac_header = skb->network_header;
Arnaldo Carvalho de Melo4209fb62007-03-10 18:42:03 -0300664 __pskb_pull(skb, offset);
Arnaldo Carvalho de Melo9c702202007-04-25 18:04:18 -0700665 skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700666 skb->pkt_type = PACKET_HOST;
667#ifdef CONFIG_NET_IPGRE_BROADCAST
Joe Perchesf97c1e02007-12-16 13:45:43 -0800668 if (ipv4_is_multicast(iph->daddr)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700669 /* Looped back packet, drop it! */
David S. Millerc7537962010-11-11 17:07:48 -0800670 if (rt_is_output_route(skb_rtable(skb)))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700671 goto drop;
Eric Dumazete985aad2010-09-27 03:57:11 +0000672 tunnel->dev->stats.multicast++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700673 skb->pkt_type = PACKET_BROADCAST;
674 }
675#endif
676
677 if (((flags&GRE_CSUM) && csum) ||
678 (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
Eric Dumazete985aad2010-09-27 03:57:11 +0000679 tunnel->dev->stats.rx_crc_errors++;
680 tunnel->dev->stats.rx_errors++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700681 goto drop;
682 }
683 if (tunnel->parms.i_flags&GRE_SEQ) {
684 if (!(flags&GRE_SEQ) ||
685 (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
Eric Dumazete985aad2010-09-27 03:57:11 +0000686 tunnel->dev->stats.rx_fifo_errors++;
687 tunnel->dev->stats.rx_errors++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700688 goto drop;
689 }
690 tunnel->i_seqno = seqno + 1;
691 }
Herbert Xue1a80002008-10-09 12:00:17 -0700692
693 /* Warning: All skb pointers will be invalidated! */
694 if (tunnel->dev->type == ARPHRD_ETHER) {
695 if (!pskb_may_pull(skb, ETH_HLEN)) {
Eric Dumazete985aad2010-09-27 03:57:11 +0000696 tunnel->dev->stats.rx_length_errors++;
697 tunnel->dev->stats.rx_errors++;
Herbert Xue1a80002008-10-09 12:00:17 -0700698 goto drop;
699 }
700
701 iph = ip_hdr(skb);
702 skb->protocol = eth_type_trans(skb, tunnel->dev);
703 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
704 }
705
Eric Dumazete985aad2010-09-27 03:57:11 +0000706 tstats = this_cpu_ptr(tunnel->dev->tstats);
stephen hemminger87b6d212012-04-12 06:31:16 +0000707 u64_stats_update_begin(&tstats->syncp);
Eric Dumazete985aad2010-09-27 03:57:11 +0000708 tstats->rx_packets++;
709 tstats->rx_bytes += skb->len;
stephen hemminger87b6d212012-04-12 06:31:16 +0000710 u64_stats_update_end(&tstats->syncp);
Eric Dumazete985aad2010-09-27 03:57:11 +0000711
712 __skb_tunnel_rx(skb, tunnel->dev);
Herbert Xue1a80002008-10-09 12:00:17 -0700713
714 skb_reset_network_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700715 ipgre_ecn_decapsulate(iph, skb);
Herbert Xue1a80002008-10-09 12:00:17 -0700716
Eric Dumazetcaf586e2010-09-30 21:06:55 +0000717 netif_rx(skb);
Eric Dumazet8990f462010-09-20 00:12:11 +0000718
Eric Dumazet8d5b2c02009-10-23 06:14:38 +0000719 rcu_read_unlock();
Eric Dumazet8990f462010-09-20 00:12:11 +0000720 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700721 }
Herbert Xu45af08b2006-04-05 22:31:19 -0700722 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700723
724drop:
Eric Dumazet8d5b2c02009-10-23 06:14:38 +0000725 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700726drop_nolock:
727 kfree_skb(skb);
Eric Dumazeta02cec22010-09-22 20:43:57 +0000728 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700729}
730
Stephen Hemminger6fef4c02009-08-31 19:50:41 +0000731static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700732{
Patrick McHardy2941a482006-01-08 22:05:26 -0800733 struct ip_tunnel *tunnel = netdev_priv(dev);
Eric Dumazete985aad2010-09-27 03:57:11 +0000734 struct pcpu_tstats *tstats;
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000735 const struct iphdr *old_iph = ip_hdr(skb);
736 const struct iphdr *tiph;
David S. Millercbb1e852011-05-04 12:33:34 -0700737 struct flowi4 fl4;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700738 u8 tos;
Al Virod5a0a1e2006-11-08 00:23:14 -0800739 __be16 df;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700740 struct rtable *rt; /* Route to the other host */
Eric Dumazet15078502010-09-15 11:07:53 +0000741 struct net_device *tdev; /* Device to other host */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700742 struct iphdr *iph; /* Our new IP header */
Chuck Leverc2636b42007-10-23 21:07:32 -0700743 unsigned int max_headroom; /* The extra header space needed */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700744 int gre_hlen;
Al Virod5a0a1e2006-11-08 00:23:14 -0800745 __be32 dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700746 int mtu;
747
Eric Dumazet6b78f162012-09-13 21:25:33 +0000748 if (skb->ip_summed == CHECKSUM_PARTIAL &&
749 skb_checksum_help(skb))
750 goto tx_error;
751
Herbert Xue1a80002008-10-09 12:00:17 -0700752 if (dev->type == ARPHRD_ETHER)
753 IPCB(skb)->flags = 0;
754
755 if (dev->header_ops && dev->type == ARPHRD_IPGRE) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700756 gre_hlen = 0;
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000757 tiph = (const struct iphdr *)skb->data;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700758 } else {
759 gre_hlen = tunnel->hlen;
760 tiph = &tunnel->parms.iph;
761 }
762
763 if ((dst = tiph->daddr) == 0) {
764 /* NBMA tunnel */
765
Eric Dumazetadf30902009-06-02 05:19:30 +0000766 if (skb_dst(skb) == NULL) {
Eric Dumazete985aad2010-09-27 03:57:11 +0000767 dev->stats.tx_fifo_errors++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700768 goto tx_error;
769 }
770
David S. Miller61d57f82012-01-24 18:23:30 -0500771 if (skb->protocol == htons(ETH_P_IP)) {
Eric Dumazet511c3f92009-06-02 05:14:27 +0000772 rt = skb_rtable(skb);
David S. Millerf8126f12012-07-13 05:03:45 -0700773 dst = rt_nexthop(rt, old_iph->daddr);
David S. Miller61d57f82012-01-24 18:23:30 -0500774 }
Eric Dumazetdfd56b82011-12-10 09:48:31 +0000775#if IS_ENABLED(CONFIG_IPV6)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700776 else if (skb->protocol == htons(ETH_P_IPV6)) {
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000777 const struct in6_addr *addr6;
David S. Miller0ec88662012-01-27 15:01:08 -0800778 struct neighbour *neigh;
779 bool do_tx_error_icmp;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700780 int addr_type;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700781
David S. Miller0ec88662012-01-27 15:01:08 -0800782 neigh = dst_neigh_lookup(skb_dst(skb), &ipv6_hdr(skb)->daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700783 if (neigh == NULL)
784 goto tx_error;
785
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000786 addr6 = (const struct in6_addr *)&neigh->primary_key;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700787 addr_type = ipv6_addr_type(addr6);
788
789 if (addr_type == IPV6_ADDR_ANY) {
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -0700790 addr6 = &ipv6_hdr(skb)->daddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700791 addr_type = ipv6_addr_type(addr6);
792 }
793
794 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
David S. Miller0ec88662012-01-27 15:01:08 -0800795 do_tx_error_icmp = true;
796 else {
797 do_tx_error_icmp = false;
798 dst = addr6->s6_addr32[3];
799 }
800 neigh_release(neigh);
801 if (do_tx_error_icmp)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700802 goto tx_error_icmp;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700803 }
804#endif
805 else
806 goto tx_error;
807 }
808
809 tos = tiph->tos;
Andreas Jaggiee686ca2009-07-14 09:35:59 -0700810 if (tos == 1) {
811 tos = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700812 if (skb->protocol == htons(ETH_P_IP))
813 tos = old_iph->tos;
Stephen Hemmingerdd4ba832010-07-08 21:35:58 -0700814 else if (skb->protocol == htons(ETH_P_IPV6))
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000815 tos = ipv6_get_dsfield((const struct ipv6hdr *)old_iph);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700816 }
817
David S. Millercbb1e852011-05-04 12:33:34 -0700818 rt = ip_route_output_gre(dev_net(dev), &fl4, dst, tiph->saddr,
David S. Miller78fbfd82011-03-12 00:00:52 -0500819 tunnel->parms.o_key, RT_TOS(tos),
820 tunnel->parms.link);
821 if (IS_ERR(rt)) {
822 dev->stats.tx_carrier_errors++;
823 goto tx_error;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700824 }
Changli Gaod8d1f302010-06-10 23:31:35 -0700825 tdev = rt->dst.dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700826
827 if (tdev == dev) {
828 ip_rt_put(rt);
Eric Dumazete985aad2010-09-27 03:57:11 +0000829 dev->stats.collisions++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700830 goto tx_error;
831 }
832
833 df = tiph->frag_off;
834 if (df)
Changli Gaod8d1f302010-06-10 23:31:35 -0700835 mtu = dst_mtu(&rt->dst) - dev->hard_header_len - tunnel->hlen;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700836 else
Eric Dumazetadf30902009-06-02 05:19:30 +0000837 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700838
Eric Dumazetadf30902009-06-02 05:19:30 +0000839 if (skb_dst(skb))
David S. Miller6700c272012-07-17 03:29:28 -0700840 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700841
842 if (skb->protocol == htons(ETH_P_IP)) {
843 df |= (old_iph->frag_off&htons(IP_DF));
844
845 if ((old_iph->frag_off&htons(IP_DF)) &&
846 mtu < ntohs(old_iph->tot_len)) {
847 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
848 ip_rt_put(rt);
849 goto tx_error;
850 }
851 }
Eric Dumazetdfd56b82011-12-10 09:48:31 +0000852#if IS_ENABLED(CONFIG_IPV6)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700853 else if (skb->protocol == htons(ETH_P_IPV6)) {
Eric Dumazetadf30902009-06-02 05:19:30 +0000854 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700855
Eric Dumazetadf30902009-06-02 05:19:30 +0000856 if (rt6 && mtu < dst_mtu(skb_dst(skb)) && mtu >= IPV6_MIN_MTU) {
Joe Perchesf97c1e02007-12-16 13:45:43 -0800857 if ((tunnel->parms.iph.daddr &&
858 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
Linus Torvalds1da177e2005-04-16 15:20:36 -0700859 rt6->rt6i_dst.plen == 128) {
860 rt6->rt6i_flags |= RTF_MODIFIED;
David S. Millerdefb3512010-12-08 21:16:57 -0800861 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700862 }
863 }
864
865 if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) {
Alexey Dobriyan3ffe5332010-02-18 08:25:24 +0000866 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700867 ip_rt_put(rt);
868 goto tx_error;
869 }
870 }
871#endif
872
873 if (tunnel->err_count > 0) {
Wei Yongjunda6185d82009-02-24 23:34:48 -0800874 if (time_before(jiffies,
875 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700876 tunnel->err_count--;
877
878 dst_link_failure(skb);
879 } else
880 tunnel->err_count = 0;
881 }
882
Changli Gaod8d1f302010-06-10 23:31:35 -0700883 max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + rt->dst.header_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700884
Patrick McHardycfbba492007-07-09 15:33:40 -0700885 if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
886 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700887 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
Herbert Xu805dc1d2011-11-18 02:20:06 +0000888 if (max_headroom > dev->needed_headroom)
889 dev->needed_headroom = max_headroom;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700890 if (!new_skb) {
891 ip_rt_put(rt);
Eric Dumazete985aad2010-09-27 03:57:11 +0000892 dev->stats.tx_dropped++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700893 dev_kfree_skb(skb);
Patrick McHardy6ed10652009-06-23 06:03:08 +0000894 return NETDEV_TX_OK;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700895 }
896 if (skb->sk)
897 skb_set_owner_w(new_skb, skb->sk);
898 dev_kfree_skb(skb);
899 skb = new_skb;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700900 old_iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700901 }
902
Herbert Xu64194c32008-10-09 12:03:17 -0700903 skb_reset_transport_header(skb);
Arnaldo Carvalho de Meloe2d1bca2007-04-10 20:46:21 -0700904 skb_push(skb, gre_hlen);
905 skb_reset_network_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700906 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
Patrick McHardy48d5cad2006-02-15 15:10:22 -0800907 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
908 IPSKB_REROUTED);
Eric Dumazetadf30902009-06-02 05:19:30 +0000909 skb_dst_drop(skb);
Changli Gaod8d1f302010-06-10 23:31:35 -0700910 skb_dst_set(skb, &rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700911
912 /*
913 * Push down and install the IPIP header.
914 */
915
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700916 iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700917 iph->version = 4;
918 iph->ihl = sizeof(struct iphdr) >> 2;
919 iph->frag_off = df;
920 iph->protocol = IPPROTO_GRE;
921 iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb);
David S. Millercbb1e852011-05-04 12:33:34 -0700922 iph->daddr = fl4.daddr;
923 iph->saddr = fl4.saddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700924
925 if ((iph->ttl = tiph->ttl) == 0) {
926 if (skb->protocol == htons(ETH_P_IP))
927 iph->ttl = old_iph->ttl;
Eric Dumazetdfd56b82011-12-10 09:48:31 +0000928#if IS_ENABLED(CONFIG_IPV6)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700929 else if (skb->protocol == htons(ETH_P_IPV6))
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000930 iph->ttl = ((const struct ipv6hdr *)old_iph)->hop_limit;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700931#endif
932 else
David S. Miller323e1262010-12-12 21:55:08 -0800933 iph->ttl = ip4_dst_hoplimit(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700934 }
935
Herbert Xue1a80002008-10-09 12:00:17 -0700936 ((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags;
937 ((__be16 *)(iph + 1))[1] = (dev->type == ARPHRD_ETHER) ?
938 htons(ETH_P_TEB) : skb->protocol;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700939
940 if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
Daniel Baluta5e73ea12012-04-15 01:34:41 +0000941 __be32 *ptr = (__be32 *)(((u8 *)iph) + tunnel->hlen - 4);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700942
943 if (tunnel->parms.o_flags&GRE_SEQ) {
944 ++tunnel->o_seqno;
945 *ptr = htonl(tunnel->o_seqno);
946 ptr--;
947 }
948 if (tunnel->parms.o_flags&GRE_KEY) {
949 *ptr = tunnel->parms.o_key;
950 ptr--;
951 }
952 if (tunnel->parms.o_flags&GRE_CSUM) {
953 *ptr = 0;
Daniel Baluta5e73ea12012-04-15 01:34:41 +0000954 *(__sum16 *)ptr = ip_compute_csum((void *)(iph+1), skb->len - sizeof(struct iphdr));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700955 }
956 }
957
958 nf_reset(skb);
Eric Dumazete985aad2010-09-27 03:57:11 +0000959 tstats = this_cpu_ptr(dev->tstats);
960 __IPTUNNEL_XMIT(tstats, &dev->stats);
Patrick McHardy6ed10652009-06-23 06:03:08 +0000961 return NETDEV_TX_OK;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700962
David S. Miller496053f2012-01-11 16:46:32 -0800963#if IS_ENABLED(CONFIG_IPV6)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700964tx_error_icmp:
965 dst_link_failure(skb);
David S. Miller496053f2012-01-11 16:46:32 -0800966#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700967tx_error:
Eric Dumazete985aad2010-09-27 03:57:11 +0000968 dev->stats.tx_errors++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700969 dev_kfree_skb(skb);
Patrick McHardy6ed10652009-06-23 06:03:08 +0000970 return NETDEV_TX_OK;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700971}
972
Herbert Xu42aa9162008-10-09 11:59:32 -0700973static int ipgre_tunnel_bind_dev(struct net_device *dev)
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800974{
975 struct net_device *tdev = NULL;
976 struct ip_tunnel *tunnel;
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000977 const struct iphdr *iph;
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800978 int hlen = LL_MAX_HEADER;
979 int mtu = ETH_DATA_LEN;
980 int addend = sizeof(struct iphdr) + 4;
981
982 tunnel = netdev_priv(dev);
983 iph = &tunnel->parms.iph;
984
Herbert Xuc95b8192008-10-09 11:58:54 -0700985 /* Guess output device to choose reasonable mtu and needed_headroom */
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800986
987 if (iph->daddr) {
David S. Millercbb1e852011-05-04 12:33:34 -0700988 struct flowi4 fl4;
989 struct rtable *rt;
Eric Dumazete985aad2010-09-27 03:57:11 +0000990
David S. Millercbb1e852011-05-04 12:33:34 -0700991 rt = ip_route_output_gre(dev_net(dev), &fl4,
992 iph->daddr, iph->saddr,
993 tunnel->parms.o_key,
994 RT_TOS(iph->tos),
995 tunnel->parms.link);
David S. Millerb23dd4f2011-03-02 14:31:35 -0800996 if (!IS_ERR(rt)) {
Changli Gaod8d1f302010-06-10 23:31:35 -0700997 tdev = rt->dst.dev;
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800998 ip_rt_put(rt);
999 }
Herbert Xue1a80002008-10-09 12:00:17 -07001000
1001 if (dev->type != ARPHRD_ETHER)
1002 dev->flags |= IFF_POINTOPOINT;
Michal Schmidtee34c1e2007-12-13 09:46:32 -08001003 }
1004
1005 if (!tdev && tunnel->parms.link)
Pavel Emelyanov96635522008-04-16 01:10:44 -07001006 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
Michal Schmidtee34c1e2007-12-13 09:46:32 -08001007
1008 if (tdev) {
Herbert Xuc95b8192008-10-09 11:58:54 -07001009 hlen = tdev->hard_header_len + tdev->needed_headroom;
Michal Schmidtee34c1e2007-12-13 09:46:32 -08001010 mtu = tdev->mtu;
1011 }
1012 dev->iflink = tunnel->parms.link;
1013
1014 /* Precalculate GRE options length */
1015 if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
1016 if (tunnel->parms.o_flags&GRE_CSUM)
1017 addend += 4;
1018 if (tunnel->parms.o_flags&GRE_KEY)
1019 addend += 4;
1020 if (tunnel->parms.o_flags&GRE_SEQ)
1021 addend += 4;
1022 }
Herbert Xuc95b8192008-10-09 11:58:54 -07001023 dev->needed_headroom = addend + hlen;
Tom Goff8cdb0452009-08-14 16:33:56 -07001024 mtu -= dev->hard_header_len + addend;
Herbert Xu42aa9162008-10-09 11:59:32 -07001025
1026 if (mtu < 68)
1027 mtu = 68;
1028
Michal Schmidtee34c1e2007-12-13 09:46:32 -08001029 tunnel->hlen = addend;
1030
Herbert Xu42aa9162008-10-09 11:59:32 -07001031 return mtu;
Michal Schmidtee34c1e2007-12-13 09:46:32 -08001032}
1033
Linus Torvalds1da177e2005-04-16 15:20:36 -07001034static int
1035ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
1036{
1037 int err = 0;
1038 struct ip_tunnel_parm p;
1039 struct ip_tunnel *t;
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -07001040 struct net *net = dev_net(dev);
1041 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001042
1043 switch (cmd) {
1044 case SIOCGETTUNNEL:
1045 t = NULL;
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001046 if (dev == ign->fb_tunnel_dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001047 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
1048 err = -EFAULT;
1049 break;
1050 }
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -07001051 t = ipgre_tunnel_locate(net, &p, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001052 }
1053 if (t == NULL)
Patrick McHardy2941a482006-01-08 22:05:26 -08001054 t = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001055 memcpy(&p, &t->parms, sizeof(p));
1056 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
1057 err = -EFAULT;
1058 break;
1059
1060 case SIOCADDTUNNEL:
1061 case SIOCCHGTUNNEL:
1062 err = -EPERM;
1063 if (!capable(CAP_NET_ADMIN))
1064 goto done;
1065
1066 err = -EFAULT;
1067 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1068 goto done;
1069
1070 err = -EINVAL;
1071 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
1072 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
1073 ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
1074 goto done;
1075 if (p.iph.ttl)
1076 p.iph.frag_off |= htons(IP_DF);
1077
1078 if (!(p.i_flags&GRE_KEY))
1079 p.i_key = 0;
1080 if (!(p.o_flags&GRE_KEY))
1081 p.o_key = 0;
1082
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -07001083 t = ipgre_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001084
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001085 if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001086 if (t != NULL) {
1087 if (t->dev != dev) {
1088 err = -EEXIST;
1089 break;
1090 }
1091 } else {
Eric Dumazet15078502010-09-15 11:07:53 +00001092 unsigned int nflags = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001093
Patrick McHardy2941a482006-01-08 22:05:26 -08001094 t = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001095
Joe Perchesf97c1e02007-12-16 13:45:43 -08001096 if (ipv4_is_multicast(p.iph.daddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001097 nflags = IFF_BROADCAST;
1098 else if (p.iph.daddr)
1099 nflags = IFF_POINTOPOINT;
1100
1101 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
1102 err = -EINVAL;
1103 break;
1104 }
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -07001105 ipgre_tunnel_unlink(ign, t);
Pavel Emelyanov74b0b852010-10-27 05:43:53 +00001106 synchronize_net();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001107 t->parms.iph.saddr = p.iph.saddr;
1108 t->parms.iph.daddr = p.iph.daddr;
1109 t->parms.i_key = p.i_key;
1110 t->parms.o_key = p.o_key;
1111 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1112 memcpy(dev->broadcast, &p.iph.daddr, 4);
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -07001113 ipgre_tunnel_link(ign, t);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001114 netdev_state_change(dev);
1115 }
1116 }
1117
1118 if (t) {
1119 err = 0;
1120 if (cmd == SIOCCHGTUNNEL) {
1121 t->parms.iph.ttl = p.iph.ttl;
1122 t->parms.iph.tos = p.iph.tos;
1123 t->parms.iph.frag_off = p.iph.frag_off;
Michal Schmidtee34c1e2007-12-13 09:46:32 -08001124 if (t->parms.link != p.link) {
1125 t->parms.link = p.link;
Herbert Xu42aa9162008-10-09 11:59:32 -07001126 dev->mtu = ipgre_tunnel_bind_dev(dev);
Michal Schmidtee34c1e2007-12-13 09:46:32 -08001127 netdev_state_change(dev);
1128 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001129 }
1130 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
1131 err = -EFAULT;
1132 } else
1133 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
1134 break;
1135
1136 case SIOCDELTUNNEL:
1137 err = -EPERM;
1138 if (!capable(CAP_NET_ADMIN))
1139 goto done;
1140
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001141 if (dev == ign->fb_tunnel_dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001142 err = -EFAULT;
1143 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1144 goto done;
1145 err = -ENOENT;
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -07001146 if ((t = ipgre_tunnel_locate(net, &p, 0)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001147 goto done;
1148 err = -EPERM;
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001149 if (t == netdev_priv(ign->fb_tunnel_dev))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001150 goto done;
1151 dev = t->dev;
1152 }
Stephen Hemminger22f8cde2007-02-07 00:09:58 -08001153 unregister_netdevice(dev);
1154 err = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001155 break;
1156
1157 default:
1158 err = -EINVAL;
1159 }
1160
1161done:
1162 return err;
1163}
1164
Linus Torvalds1da177e2005-04-16 15:20:36 -07001165static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1166{
Patrick McHardy2941a482006-01-08 22:05:26 -08001167 struct ip_tunnel *tunnel = netdev_priv(dev);
Herbert Xuc95b8192008-10-09 11:58:54 -07001168 if (new_mtu < 68 ||
1169 new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001170 return -EINVAL;
1171 dev->mtu = new_mtu;
1172 return 0;
1173}
1174
Linus Torvalds1da177e2005-04-16 15:20:36 -07001175/* Nice toy. Unfortunately, useless in real life :-)
1176 It allows to construct virtual multiprotocol broadcast "LAN"
1177 over the Internet, provided multicast routing is tuned.
1178
1179
1180 I have no idea was this bicycle invented before me,
1181 so that I had to set ARPHRD_IPGRE to a random value.
1182 I have an impression, that Cisco could make something similar,
1183 but this feature is apparently missing in IOS<=11.2(8).
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001184
Linus Torvalds1da177e2005-04-16 15:20:36 -07001185 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
1186 with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
1187
1188 ping -t 255 224.66.66.66
1189
1190 If nobody answers, mbone does not work.
1191
1192 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
1193 ip addr add 10.66.66.<somewhat>/24 dev Universe
1194 ifconfig Universe up
1195 ifconfig Universe add fe80::<Your_real_addr>/10
1196 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
1197 ftp 10.66.66.66
1198 ...
1199 ftp fec0:6666:6666::193.233.7.65
1200 ...
1201
1202 */
1203
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -07001204static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
1205 unsigned short type,
Eric Dumazet15078502010-09-15 11:07:53 +00001206 const void *daddr, const void *saddr, unsigned int len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001207{
Patrick McHardy2941a482006-01-08 22:05:26 -08001208 struct ip_tunnel *t = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001209 struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
Daniel Baluta5e73ea12012-04-15 01:34:41 +00001210 __be16 *p = (__be16 *)(iph+1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001211
1212 memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
1213 p[0] = t->parms.o_flags;
1214 p[1] = htons(type);
1215
1216 /*
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001217 * Set the source hardware address.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001218 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001219
Linus Torvalds1da177e2005-04-16 15:20:36 -07001220 if (saddr)
1221 memcpy(&iph->saddr, saddr, 4);
Timo Teräs6d55cb92010-03-03 04:01:13 +00001222 if (daddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001223 memcpy(&iph->daddr, daddr, 4);
Timo Teräs6d55cb92010-03-03 04:01:13 +00001224 if (iph->daddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001225 return t->hlen;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001226
Linus Torvalds1da177e2005-04-16 15:20:36 -07001227 return -t->hlen;
1228}
1229
Timo Teras6a5f44d2007-10-23 20:31:53 -07001230static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
1231{
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001232 const struct iphdr *iph = (const struct iphdr *) skb_mac_header(skb);
Timo Teras6a5f44d2007-10-23 20:31:53 -07001233 memcpy(haddr, &iph->saddr, 4);
1234 return 4;
1235}
1236
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -07001237static const struct header_ops ipgre_header_ops = {
1238 .create = ipgre_header,
Timo Teras6a5f44d2007-10-23 20:31:53 -07001239 .parse = ipgre_header_parse,
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -07001240};
1241
Timo Teras6a5f44d2007-10-23 20:31:53 -07001242#ifdef CONFIG_NET_IPGRE_BROADCAST
Linus Torvalds1da177e2005-04-16 15:20:36 -07001243static int ipgre_open(struct net_device *dev)
1244{
Patrick McHardy2941a482006-01-08 22:05:26 -08001245 struct ip_tunnel *t = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001246
Joe Perchesf97c1e02007-12-16 13:45:43 -08001247 if (ipv4_is_multicast(t->parms.iph.daddr)) {
David S. Millercbb1e852011-05-04 12:33:34 -07001248 struct flowi4 fl4;
1249 struct rtable *rt;
Eric Dumazete985aad2010-09-27 03:57:11 +00001250
David S. Millercbb1e852011-05-04 12:33:34 -07001251 rt = ip_route_output_gre(dev_net(dev), &fl4,
1252 t->parms.iph.daddr,
1253 t->parms.iph.saddr,
1254 t->parms.o_key,
1255 RT_TOS(t->parms.iph.tos),
1256 t->parms.link);
David S. Millerb23dd4f2011-03-02 14:31:35 -08001257 if (IS_ERR(rt))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001258 return -EADDRNOTAVAIL;
Changli Gaod8d1f302010-06-10 23:31:35 -07001259 dev = rt->dst.dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001260 ip_rt_put(rt);
Herbert Xue5ed6392005-10-03 14:35:55 -07001261 if (__in_dev_get_rtnl(dev) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001262 return -EADDRNOTAVAIL;
1263 t->mlink = dev->ifindex;
Herbert Xue5ed6392005-10-03 14:35:55 -07001264 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001265 }
1266 return 0;
1267}
1268
1269static int ipgre_close(struct net_device *dev)
1270{
Patrick McHardy2941a482006-01-08 22:05:26 -08001271 struct ip_tunnel *t = netdev_priv(dev);
Stephen Hemmingerb8c26a32008-11-20 20:34:29 -08001272
Joe Perchesf97c1e02007-12-16 13:45:43 -08001273 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
Denis V. Lunev7fee0ca2008-01-21 17:32:38 -08001274 struct in_device *in_dev;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001275 in_dev = inetdev_by_index(dev_net(dev), t->mlink);
Eric Dumazet8723e1b2010-10-19 00:39:26 +00001276 if (in_dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001277 ip_mc_dec_group(in_dev, t->parms.iph.daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001278 }
1279 return 0;
1280}
1281
1282#endif
1283
Stephen Hemmingerb8c26a32008-11-20 20:34:29 -08001284static const struct net_device_ops ipgre_netdev_ops = {
1285 .ndo_init = ipgre_tunnel_init,
1286 .ndo_uninit = ipgre_tunnel_uninit,
1287#ifdef CONFIG_NET_IPGRE_BROADCAST
1288 .ndo_open = ipgre_open,
1289 .ndo_stop = ipgre_close,
1290#endif
1291 .ndo_start_xmit = ipgre_tunnel_xmit,
1292 .ndo_do_ioctl = ipgre_tunnel_ioctl,
1293 .ndo_change_mtu = ipgre_tunnel_change_mtu,
stephen hemminger87b6d212012-04-12 06:31:16 +00001294 .ndo_get_stats64 = ipgre_get_stats64,
Stephen Hemmingerb8c26a32008-11-20 20:34:29 -08001295};
1296
Eric Dumazete985aad2010-09-27 03:57:11 +00001297static void ipgre_dev_free(struct net_device *dev)
1298{
1299 free_percpu(dev->tstats);
1300 free_netdev(dev);
1301}
1302
Eric Dumazet6b78f162012-09-13 21:25:33 +00001303#define GRE_FEATURES (NETIF_F_SG | \
1304 NETIF_F_FRAGLIST | \
1305 NETIF_F_HIGHDMA | \
1306 NETIF_F_HW_CSUM)
1307
Linus Torvalds1da177e2005-04-16 15:20:36 -07001308static void ipgre_tunnel_setup(struct net_device *dev)
1309{
Stephen Hemmingerb8c26a32008-11-20 20:34:29 -08001310 dev->netdev_ops = &ipgre_netdev_ops;
Eric Dumazete985aad2010-09-27 03:57:11 +00001311 dev->destructor = ipgre_dev_free;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001312
1313 dev->type = ARPHRD_IPGRE;
Herbert Xuc95b8192008-10-09 11:58:54 -07001314 dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
Kris Katterjohn46f25df2006-01-05 16:35:42 -08001315 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001316 dev->flags = IFF_NOARP;
1317 dev->iflink = 0;
1318 dev->addr_len = 4;
Pavel Emelyanov0b67ece2008-04-16 01:11:13 -07001319 dev->features |= NETIF_F_NETNS_LOCAL;
Eric Dumazet108bfa82009-05-28 22:35:10 +00001320 dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
Eric Dumazet6b78f162012-09-13 21:25:33 +00001321
1322 dev->features |= GRE_FEATURES;
1323 dev->hw_features |= GRE_FEATURES;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001324}
1325
1326static int ipgre_tunnel_init(struct net_device *dev)
1327{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001328 struct ip_tunnel *tunnel;
1329 struct iphdr *iph;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001330
Patrick McHardy2941a482006-01-08 22:05:26 -08001331 tunnel = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001332 iph = &tunnel->parms.iph;
1333
1334 tunnel->dev = dev;
1335 strcpy(tunnel->parms.name, dev->name);
1336
1337 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
1338 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
1339
Linus Torvalds1da177e2005-04-16 15:20:36 -07001340 if (iph->daddr) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001341#ifdef CONFIG_NET_IPGRE_BROADCAST
Joe Perchesf97c1e02007-12-16 13:45:43 -08001342 if (ipv4_is_multicast(iph->daddr)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001343 if (!iph->saddr)
1344 return -EINVAL;
1345 dev->flags = IFF_BROADCAST;
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -07001346 dev->header_ops = &ipgre_header_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001347 }
1348#endif
Michal Schmidtee34c1e2007-12-13 09:46:32 -08001349 } else
Timo Teras6a5f44d2007-10-23 20:31:53 -07001350 dev->header_ops = &ipgre_header_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001351
Eric Dumazete985aad2010-09-27 03:57:11 +00001352 dev->tstats = alloc_percpu(struct pcpu_tstats);
1353 if (!dev->tstats)
1354 return -ENOMEM;
1355
Linus Torvalds1da177e2005-04-16 15:20:36 -07001356 return 0;
1357}
1358
Stephen Hemmingerb8c26a32008-11-20 20:34:29 -08001359static void ipgre_fb_tunnel_init(struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001360{
Patrick McHardy2941a482006-01-08 22:05:26 -08001361 struct ip_tunnel *tunnel = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001362 struct iphdr *iph = &tunnel->parms.iph;
1363
1364 tunnel->dev = dev;
1365 strcpy(tunnel->parms.name, dev->name);
1366
1367 iph->version = 4;
1368 iph->protocol = IPPROTO_GRE;
1369 iph->ihl = 5;
1370 tunnel->hlen = sizeof(struct iphdr) + 4;
1371
1372 dev_hold(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001373}
1374
1375
Dmitry Kozlov00959ad2010-08-21 23:05:39 -07001376static const struct gre_protocol ipgre_protocol = {
1377 .handler = ipgre_rcv,
1378 .err_handler = ipgre_err,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001379};
1380
Eric Dumazeteef6dd62009-10-27 07:07:16 +00001381static void ipgre_destroy_tunnels(struct ipgre_net *ign, struct list_head *head)
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -07001382{
1383 int prio;
1384
1385 for (prio = 0; prio < 4; prio++) {
1386 int h;
1387 for (h = 0; h < HASH_SIZE; h++) {
Eric Dumazet15078502010-09-15 11:07:53 +00001388 struct ip_tunnel *t;
1389
1390 t = rtnl_dereference(ign->tunnels[prio][h]);
Eric Dumazeteef6dd62009-10-27 07:07:16 +00001391
1392 while (t != NULL) {
1393 unregister_netdevice_queue(t->dev, head);
Eric Dumazet15078502010-09-15 11:07:53 +00001394 t = rtnl_dereference(t->next);
Eric Dumazeteef6dd62009-10-27 07:07:16 +00001395 }
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -07001396 }
1397 }
1398}
1399
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00001400static int __net_init ipgre_init_net(struct net *net)
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001401{
Eric W. Biedermancfb8fbf2009-11-29 15:46:13 +00001402 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001403 int err;
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001404
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001405 ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0",
1406 ipgre_tunnel_setup);
1407 if (!ign->fb_tunnel_dev) {
1408 err = -ENOMEM;
1409 goto err_alloc_dev;
1410 }
Alexey Dobriyanbe77e592008-11-23 17:26:26 -08001411 dev_net_set(ign->fb_tunnel_dev, net);
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001412
Stephen Hemmingerb8c26a32008-11-20 20:34:29 -08001413 ipgre_fb_tunnel_init(ign->fb_tunnel_dev);
Herbert Xuc19e6542008-10-09 11:59:55 -07001414 ign->fb_tunnel_dev->rtnl_link_ops = &ipgre_link_ops;
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001415
1416 if ((err = register_netdev(ign->fb_tunnel_dev)))
1417 goto err_reg_dev;
1418
Eric Dumazet3285ee32010-10-30 16:21:28 -07001419 rcu_assign_pointer(ign->tunnels_wc[0],
1420 netdev_priv(ign->fb_tunnel_dev));
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001421 return 0;
1422
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001423err_reg_dev:
Eric Dumazet3285ee32010-10-30 16:21:28 -07001424 ipgre_dev_free(ign->fb_tunnel_dev);
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001425err_alloc_dev:
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001426 return err;
1427}
1428
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00001429static void __net_exit ipgre_exit_net(struct net *net)
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001430{
1431 struct ipgre_net *ign;
Eric Dumazeteef6dd62009-10-27 07:07:16 +00001432 LIST_HEAD(list);
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001433
1434 ign = net_generic(net, ipgre_net_id);
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001435 rtnl_lock();
Eric Dumazeteef6dd62009-10-27 07:07:16 +00001436 ipgre_destroy_tunnels(ign, &list);
1437 unregister_netdevice_many(&list);
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001438 rtnl_unlock();
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001439}
1440
1441static struct pernet_operations ipgre_net_ops = {
1442 .init = ipgre_init_net,
1443 .exit = ipgre_exit_net,
Eric W. Biedermancfb8fbf2009-11-29 15:46:13 +00001444 .id = &ipgre_net_id,
1445 .size = sizeof(struct ipgre_net),
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001446};
Linus Torvalds1da177e2005-04-16 15:20:36 -07001447
Herbert Xuc19e6542008-10-09 11:59:55 -07001448static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
1449{
1450 __be16 flags;
1451
1452 if (!data)
1453 return 0;
1454
1455 flags = 0;
1456 if (data[IFLA_GRE_IFLAGS])
1457 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1458 if (data[IFLA_GRE_OFLAGS])
1459 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1460 if (flags & (GRE_VERSION|GRE_ROUTING))
1461 return -EINVAL;
1462
1463 return 0;
1464}
1465
Herbert Xue1a80002008-10-09 12:00:17 -07001466static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
1467{
1468 __be32 daddr;
1469
1470 if (tb[IFLA_ADDRESS]) {
1471 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1472 return -EINVAL;
1473 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1474 return -EADDRNOTAVAIL;
1475 }
1476
1477 if (!data)
1478 goto out;
1479
1480 if (data[IFLA_GRE_REMOTE]) {
1481 memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
1482 if (!daddr)
1483 return -EINVAL;
1484 }
1485
1486out:
1487 return ipgre_tunnel_validate(tb, data);
1488}
1489
Herbert Xuc19e6542008-10-09 11:59:55 -07001490static void ipgre_netlink_parms(struct nlattr *data[],
1491 struct ip_tunnel_parm *parms)
1492{
Herbert Xu7bb82d92008-10-11 12:20:15 -07001493 memset(parms, 0, sizeof(*parms));
Herbert Xuc19e6542008-10-09 11:59:55 -07001494
1495 parms->iph.protocol = IPPROTO_GRE;
1496
1497 if (!data)
1498 return;
1499
1500 if (data[IFLA_GRE_LINK])
1501 parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1502
1503 if (data[IFLA_GRE_IFLAGS])
1504 parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]);
1505
1506 if (data[IFLA_GRE_OFLAGS])
1507 parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]);
1508
1509 if (data[IFLA_GRE_IKEY])
1510 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1511
1512 if (data[IFLA_GRE_OKEY])
1513 parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1514
1515 if (data[IFLA_GRE_LOCAL])
Patrick McHardy4d74f8b2008-10-10 12:11:06 -07001516 parms->iph.saddr = nla_get_be32(data[IFLA_GRE_LOCAL]);
Herbert Xuc19e6542008-10-09 11:59:55 -07001517
1518 if (data[IFLA_GRE_REMOTE])
Patrick McHardy4d74f8b2008-10-10 12:11:06 -07001519 parms->iph.daddr = nla_get_be32(data[IFLA_GRE_REMOTE]);
Herbert Xuc19e6542008-10-09 11:59:55 -07001520
1521 if (data[IFLA_GRE_TTL])
1522 parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
1523
1524 if (data[IFLA_GRE_TOS])
1525 parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
1526
1527 if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC]))
1528 parms->iph.frag_off = htons(IP_DF);
1529}
1530
Herbert Xue1a80002008-10-09 12:00:17 -07001531static int ipgre_tap_init(struct net_device *dev)
1532{
1533 struct ip_tunnel *tunnel;
1534
1535 tunnel = netdev_priv(dev);
1536
1537 tunnel->dev = dev;
1538 strcpy(tunnel->parms.name, dev->name);
1539
1540 ipgre_tunnel_bind_dev(dev);
1541
Eric Dumazete985aad2010-09-27 03:57:11 +00001542 dev->tstats = alloc_percpu(struct pcpu_tstats);
1543 if (!dev->tstats)
1544 return -ENOMEM;
1545
Herbert Xue1a80002008-10-09 12:00:17 -07001546 return 0;
1547}
1548
Stephen Hemmingerb8c26a32008-11-20 20:34:29 -08001549static const struct net_device_ops ipgre_tap_netdev_ops = {
1550 .ndo_init = ipgre_tap_init,
1551 .ndo_uninit = ipgre_tunnel_uninit,
1552 .ndo_start_xmit = ipgre_tunnel_xmit,
1553 .ndo_set_mac_address = eth_mac_addr,
1554 .ndo_validate_addr = eth_validate_addr,
1555 .ndo_change_mtu = ipgre_tunnel_change_mtu,
stephen hemminger87b6d212012-04-12 06:31:16 +00001556 .ndo_get_stats64 = ipgre_get_stats64,
Stephen Hemmingerb8c26a32008-11-20 20:34:29 -08001557};
1558
Herbert Xue1a80002008-10-09 12:00:17 -07001559static void ipgre_tap_setup(struct net_device *dev)
1560{
1561
1562 ether_setup(dev);
1563
Herbert Xu2e9526b2009-10-30 05:51:48 +00001564 dev->netdev_ops = &ipgre_tap_netdev_ops;
Eric Dumazete985aad2010-09-27 03:57:11 +00001565 dev->destructor = ipgre_dev_free;
Herbert Xue1a80002008-10-09 12:00:17 -07001566
1567 dev->iflink = 0;
1568 dev->features |= NETIF_F_NETNS_LOCAL;
1569}
1570
Eric W. Biederman81adee42009-11-08 00:53:51 -08001571static int ipgre_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[],
Herbert Xuc19e6542008-10-09 11:59:55 -07001572 struct nlattr *data[])
1573{
1574 struct ip_tunnel *nt;
1575 struct net *net = dev_net(dev);
1576 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1577 int mtu;
1578 int err;
1579
1580 nt = netdev_priv(dev);
1581 ipgre_netlink_parms(data, &nt->parms);
1582
Herbert Xue1a80002008-10-09 12:00:17 -07001583 if (ipgre_tunnel_find(net, &nt->parms, dev->type))
Herbert Xuc19e6542008-10-09 11:59:55 -07001584 return -EEXIST;
1585
Herbert Xue1a80002008-10-09 12:00:17 -07001586 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
Danny Kukawkaf2cedb62012-02-15 06:45:39 +00001587 eth_hw_addr_random(dev);
Herbert Xue1a80002008-10-09 12:00:17 -07001588
Herbert Xuc19e6542008-10-09 11:59:55 -07001589 mtu = ipgre_tunnel_bind_dev(dev);
1590 if (!tb[IFLA_MTU])
1591 dev->mtu = mtu;
1592
Eric Dumazetb790e012010-09-27 23:05:47 +00001593 /* Can use a lockless transmit, unless we generate output sequences */
1594 if (!(nt->parms.o_flags & GRE_SEQ))
1595 dev->features |= NETIF_F_LLTX;
1596
Herbert Xuc19e6542008-10-09 11:59:55 -07001597 err = register_netdevice(dev);
1598 if (err)
1599 goto out;
1600
1601 dev_hold(dev);
1602 ipgre_tunnel_link(ign, nt);
1603
1604out:
1605 return err;
1606}
1607
1608static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1609 struct nlattr *data[])
1610{
1611 struct ip_tunnel *t, *nt;
1612 struct net *net = dev_net(dev);
1613 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1614 struct ip_tunnel_parm p;
1615 int mtu;
1616
1617 if (dev == ign->fb_tunnel_dev)
1618 return -EINVAL;
1619
1620 nt = netdev_priv(dev);
1621 ipgre_netlink_parms(data, &p);
1622
1623 t = ipgre_tunnel_locate(net, &p, 0);
1624
1625 if (t) {
1626 if (t->dev != dev)
1627 return -EEXIST;
1628 } else {
Herbert Xuc19e6542008-10-09 11:59:55 -07001629 t = nt;
1630
Herbert Xu2e9526b2009-10-30 05:51:48 +00001631 if (dev->type != ARPHRD_ETHER) {
Eric Dumazet15078502010-09-15 11:07:53 +00001632 unsigned int nflags = 0;
Herbert Xuc19e6542008-10-09 11:59:55 -07001633
Herbert Xu2e9526b2009-10-30 05:51:48 +00001634 if (ipv4_is_multicast(p.iph.daddr))
1635 nflags = IFF_BROADCAST;
1636 else if (p.iph.daddr)
1637 nflags = IFF_POINTOPOINT;
1638
1639 if ((dev->flags ^ nflags) &
1640 (IFF_POINTOPOINT | IFF_BROADCAST))
1641 return -EINVAL;
1642 }
Herbert Xuc19e6542008-10-09 11:59:55 -07001643
1644 ipgre_tunnel_unlink(ign, t);
1645 t->parms.iph.saddr = p.iph.saddr;
1646 t->parms.iph.daddr = p.iph.daddr;
1647 t->parms.i_key = p.i_key;
Herbert Xu2e9526b2009-10-30 05:51:48 +00001648 if (dev->type != ARPHRD_ETHER) {
1649 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1650 memcpy(dev->broadcast, &p.iph.daddr, 4);
1651 }
Herbert Xuc19e6542008-10-09 11:59:55 -07001652 ipgre_tunnel_link(ign, t);
1653 netdev_state_change(dev);
1654 }
1655
1656 t->parms.o_key = p.o_key;
1657 t->parms.iph.ttl = p.iph.ttl;
1658 t->parms.iph.tos = p.iph.tos;
1659 t->parms.iph.frag_off = p.iph.frag_off;
1660
1661 if (t->parms.link != p.link) {
1662 t->parms.link = p.link;
1663 mtu = ipgre_tunnel_bind_dev(dev);
1664 if (!tb[IFLA_MTU])
1665 dev->mtu = mtu;
1666 netdev_state_change(dev);
1667 }
1668
1669 return 0;
1670}
1671
1672static size_t ipgre_get_size(const struct net_device *dev)
1673{
1674 return
1675 /* IFLA_GRE_LINK */
1676 nla_total_size(4) +
1677 /* IFLA_GRE_IFLAGS */
1678 nla_total_size(2) +
1679 /* IFLA_GRE_OFLAGS */
1680 nla_total_size(2) +
1681 /* IFLA_GRE_IKEY */
1682 nla_total_size(4) +
1683 /* IFLA_GRE_OKEY */
1684 nla_total_size(4) +
1685 /* IFLA_GRE_LOCAL */
1686 nla_total_size(4) +
1687 /* IFLA_GRE_REMOTE */
1688 nla_total_size(4) +
1689 /* IFLA_GRE_TTL */
1690 nla_total_size(1) +
1691 /* IFLA_GRE_TOS */
1692 nla_total_size(1) +
1693 /* IFLA_GRE_PMTUDISC */
1694 nla_total_size(1) +
1695 0;
1696}
1697
1698static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1699{
1700 struct ip_tunnel *t = netdev_priv(dev);
1701 struct ip_tunnel_parm *p = &t->parms;
1702
David S. Millerf3756b72012-04-01 20:39:02 -04001703 if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
1704 nla_put_be16(skb, IFLA_GRE_IFLAGS, p->i_flags) ||
1705 nla_put_be16(skb, IFLA_GRE_OFLAGS, p->o_flags) ||
1706 nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
1707 nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
1708 nla_put_be32(skb, IFLA_GRE_LOCAL, p->iph.saddr) ||
1709 nla_put_be32(skb, IFLA_GRE_REMOTE, p->iph.daddr) ||
1710 nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) ||
1711 nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) ||
1712 nla_put_u8(skb, IFLA_GRE_PMTUDISC,
1713 !!(p->iph.frag_off & htons(IP_DF))))
1714 goto nla_put_failure;
Herbert Xuc19e6542008-10-09 11:59:55 -07001715 return 0;
1716
1717nla_put_failure:
1718 return -EMSGSIZE;
1719}
1720
1721static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1722 [IFLA_GRE_LINK] = { .type = NLA_U32 },
1723 [IFLA_GRE_IFLAGS] = { .type = NLA_U16 },
1724 [IFLA_GRE_OFLAGS] = { .type = NLA_U16 },
1725 [IFLA_GRE_IKEY] = { .type = NLA_U32 },
1726 [IFLA_GRE_OKEY] = { .type = NLA_U32 },
Patrick McHardy4d74f8b2008-10-10 12:11:06 -07001727 [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
1728 [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
Herbert Xuc19e6542008-10-09 11:59:55 -07001729 [IFLA_GRE_TTL] = { .type = NLA_U8 },
1730 [IFLA_GRE_TOS] = { .type = NLA_U8 },
1731 [IFLA_GRE_PMTUDISC] = { .type = NLA_U8 },
1732};
1733
1734static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1735 .kind = "gre",
1736 .maxtype = IFLA_GRE_MAX,
1737 .policy = ipgre_policy,
1738 .priv_size = sizeof(struct ip_tunnel),
1739 .setup = ipgre_tunnel_setup,
1740 .validate = ipgre_tunnel_validate,
1741 .newlink = ipgre_newlink,
1742 .changelink = ipgre_changelink,
1743 .get_size = ipgre_get_size,
1744 .fill_info = ipgre_fill_info,
1745};
1746
Herbert Xue1a80002008-10-09 12:00:17 -07001747static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1748 .kind = "gretap",
1749 .maxtype = IFLA_GRE_MAX,
1750 .policy = ipgre_policy,
1751 .priv_size = sizeof(struct ip_tunnel),
1752 .setup = ipgre_tap_setup,
1753 .validate = ipgre_tap_validate,
1754 .newlink = ipgre_newlink,
1755 .changelink = ipgre_changelink,
1756 .get_size = ipgre_get_size,
1757 .fill_info = ipgre_fill_info,
1758};
1759
Linus Torvalds1da177e2005-04-16 15:20:36 -07001760/*
1761 * And now the modules code and kernel interface.
1762 */
1763
1764static int __init ipgre_init(void)
1765{
1766 int err;
1767
Joe Perches058bd4d2012-03-11 18:36:11 +00001768 pr_info("GRE over IPv4 tunneling driver\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001769
Eric W. Biedermancfb8fbf2009-11-29 15:46:13 +00001770 err = register_pernet_device(&ipgre_net_ops);
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001771 if (err < 0)
Alexey Dobriyanc2892f02010-02-16 07:57:44 +00001772 return err;
1773
Dmitry Kozlov00959ad2010-08-21 23:05:39 -07001774 err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
Alexey Dobriyanc2892f02010-02-16 07:57:44 +00001775 if (err < 0) {
Joe Perches058bd4d2012-03-11 18:36:11 +00001776 pr_info("%s: can't add protocol\n", __func__);
Alexey Dobriyanc2892f02010-02-16 07:57:44 +00001777 goto add_proto_failed;
1778 }
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001779
Herbert Xuc19e6542008-10-09 11:59:55 -07001780 err = rtnl_link_register(&ipgre_link_ops);
1781 if (err < 0)
1782 goto rtnl_link_failed;
1783
Herbert Xue1a80002008-10-09 12:00:17 -07001784 err = rtnl_link_register(&ipgre_tap_ops);
1785 if (err < 0)
1786 goto tap_ops_failed;
1787
Herbert Xuc19e6542008-10-09 11:59:55 -07001788out:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001789 return err;
Herbert Xuc19e6542008-10-09 11:59:55 -07001790
Herbert Xue1a80002008-10-09 12:00:17 -07001791tap_ops_failed:
1792 rtnl_link_unregister(&ipgre_link_ops);
Herbert Xuc19e6542008-10-09 11:59:55 -07001793rtnl_link_failed:
Dmitry Kozlov00959ad2010-08-21 23:05:39 -07001794 gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
Alexey Dobriyanc2892f02010-02-16 07:57:44 +00001795add_proto_failed:
1796 unregister_pernet_device(&ipgre_net_ops);
Herbert Xuc19e6542008-10-09 11:59:55 -07001797 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001798}
1799
Alexey Kuznetsovdb445752005-07-30 17:46:44 -07001800static void __exit ipgre_fini(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001801{
Herbert Xue1a80002008-10-09 12:00:17 -07001802 rtnl_link_unregister(&ipgre_tap_ops);
Herbert Xuc19e6542008-10-09 11:59:55 -07001803 rtnl_link_unregister(&ipgre_link_ops);
Dmitry Kozlov00959ad2010-08-21 23:05:39 -07001804 if (gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO) < 0)
Joe Perches058bd4d2012-03-11 18:36:11 +00001805 pr_info("%s: can't remove protocol\n", __func__);
Alexey Dobriyanc2892f02010-02-16 07:57:44 +00001806 unregister_pernet_device(&ipgre_net_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001807}
1808
1809module_init(ipgre_init);
1810module_exit(ipgre_fini);
1811MODULE_LICENSE("GPL");
Patrick McHardy4d74f8b2008-10-10 12:11:06 -07001812MODULE_ALIAS_RTNL_LINK("gre");
1813MODULE_ALIAS_RTNL_LINK("gretap");
Vasiliy Kulikov8909c9a2011-03-02 00:33:13 +03001814MODULE_ALIAS_NETDEV("gre0");