blob: 594cec35ac4df198b4afb956104c1e5120c8c0ed [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002 * Linux NET3: GRE over IP protocol decoder.
Linus Torvalds1da177e2005-04-16 15:20:36 -07003 *
4 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
Joe Perchesafd465032012-03-12 07:03:32 +000013#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
14
Randy Dunlap4fc268d2006-01-11 12:17:47 -080015#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070016#include <linux/module.h>
17#include <linux/types.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070018#include <linux/kernel.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090019#include <linux/slab.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070020#include <asm/uaccess.h>
21#include <linux/skbuff.h>
22#include <linux/netdevice.h>
23#include <linux/in.h>
24#include <linux/tcp.h>
25#include <linux/udp.h>
26#include <linux/if_arp.h>
27#include <linux/mroute.h>
28#include <linux/init.h>
29#include <linux/in6.h>
30#include <linux/inetdevice.h>
31#include <linux/igmp.h>
32#include <linux/netfilter_ipv4.h>
Herbert Xue1a80002008-10-09 12:00:17 -070033#include <linux/etherdevice.h>
Kris Katterjohn46f25df2006-01-05 16:35:42 -080034#include <linux/if_ether.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070035
36#include <net/sock.h>
37#include <net/ip.h>
38#include <net/icmp.h>
39#include <net/protocol.h>
40#include <net/ipip.h>
41#include <net/arp.h>
42#include <net/checksum.h>
43#include <net/dsfield.h>
44#include <net/inet_ecn.h>
45#include <net/xfrm.h>
Pavel Emelyanov59a4c752008-04-16 01:08:53 -070046#include <net/net_namespace.h>
47#include <net/netns/generic.h>
Herbert Xuc19e6542008-10-09 11:59:55 -070048#include <net/rtnetlink.h>
Dmitry Kozlov00959ad2010-08-21 23:05:39 -070049#include <net/gre.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070050
Eric Dumazetdfd56b82011-12-10 09:48:31 +000051#if IS_ENABLED(CONFIG_IPV6)
Linus Torvalds1da177e2005-04-16 15:20:36 -070052#include <net/ipv6.h>
53#include <net/ip6_fib.h>
54#include <net/ip6_route.h>
55#endif
56
57/*
58 Problems & solutions
59 --------------------
60
61 1. The most important issue is detecting local dead loops.
62 They would cause complete host lockup in transmit, which
63 would be "resolved" by stack overflow or, if queueing is enabled,
64 with infinite looping in net_bh.
65
66 We cannot track such dead loops during route installation,
67 it is infeasible task. The most general solutions would be
68 to keep skb->encapsulation counter (sort of local ttl),
Eric Dumazet6d0722a2010-09-29 23:35:10 -070069 and silently drop packet when it expires. It is a good
stephen hemmingerbff52852012-02-24 08:08:20 +000070 solution, but it supposes maintaining new variable in ALL
Linus Torvalds1da177e2005-04-16 15:20:36 -070071 skb, even if no tunneling is used.
72
Eric Dumazet6d0722a2010-09-29 23:35:10 -070073 Current solution: xmit_recursion breaks dead loops. This is a percpu
74 counter, since when we enter the first ndo_xmit(), cpu migration is
75 forbidden. We force an exit if this counter reaches RECURSION_LIMIT
Linus Torvalds1da177e2005-04-16 15:20:36 -070076
77 2. Networking dead loops would not kill routers, but would really
78 kill network. IP hop limit plays role of "t->recursion" in this case,
79 if we copy it from packet being encapsulated to upper header.
80 It is very good solution, but it introduces two problems:
81
82 - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
83 do not work over tunnels.
84 - traceroute does not work. I planned to relay ICMP from tunnel,
85 so that this problem would be solved and traceroute output
86 would even more informative. This idea appeared to be wrong:
87 only Linux complies to rfc1812 now (yes, guys, Linux is the only
88 true router now :-)), all routers (at least, in neighbourhood of mine)
89 return only 8 bytes of payload. It is the end.
90
91 Hence, if we want that OSPF worked or traceroute said something reasonable,
92 we should search for another solution.
93
94 One of them is to parse packet trying to detect inner encapsulation
95 made by our node. It is difficult or even impossible, especially,
stephen hemmingerbff52852012-02-24 08:08:20 +000096 taking into account fragmentation. TO be short, ttl is not solution at all.
Linus Torvalds1da177e2005-04-16 15:20:36 -070097
98 Current solution: The solution was UNEXPECTEDLY SIMPLE.
99 We force DF flag on tunnels with preconfigured hop limit,
100 that is ALL. :-) Well, it does not remove the problem completely,
101 but exponential growth of network traffic is changed to linear
102 (branches, that exceed pmtu are pruned) and tunnel mtu
stephen hemmingerbff52852012-02-24 08:08:20 +0000103 rapidly degrades to value <68, where looping stops.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700104 Yes, it is not good if there exists a router in the loop,
105 which does not force DF, even when encapsulating packets have DF set.
106 But it is not our problem! Nobody could accuse us, we made
107 all that we could make. Even if it is your gated who injected
108 fatal route to network, even if it were you who configured
109 fatal static route: you are innocent. :-)
110
111
112
113 3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
114 practically identical code. It would be good to glue them
115 together, but it is not very evident, how to make them modular.
116 sit is integral part of IPv6, ipip and gre are naturally modular.
117 We could extract common parts (hash table, ioctl etc)
118 to a separate module (ip_tunnel.c).
119
120 Alexey Kuznetsov.
121 */
122
Herbert Xuc19e6542008-10-09 11:59:55 -0700123static struct rtnl_link_ops ipgre_link_ops __read_mostly;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700124static int ipgre_tunnel_init(struct net_device *dev);
125static void ipgre_tunnel_setup(struct net_device *dev);
Herbert Xu42aa9162008-10-09 11:59:32 -0700126static int ipgre_tunnel_bind_dev(struct net_device *dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700127
128/* Fallback tunnel: no source, no destination, no key, no options */
129
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700130#define HASH_SIZE 16
131
Eric Dumazetf99189b2009-11-17 10:42:49 +0000132static int ipgre_net_id __read_mostly;
Pavel Emelyanov59a4c752008-04-16 01:08:53 -0700133struct ipgre_net {
Eric Dumazet15078502010-09-15 11:07:53 +0000134 struct ip_tunnel __rcu *tunnels[4][HASH_SIZE];
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700135
Pavel Emelyanov7daa0002008-04-16 01:10:05 -0700136 struct net_device *fb_tunnel_dev;
Pavel Emelyanov59a4c752008-04-16 01:08:53 -0700137};
138
Linus Torvalds1da177e2005-04-16 15:20:36 -0700139/* Tunnel hash table */
140
141/*
142 4 hash tables:
143
144 3: (remote,local)
145 2: (remote,*)
146 1: (*,local)
147 0: (*,*)
148
149 We require exact key match i.e. if a key is present in packet
150 it will match only tunnel with the same key; if it is not present,
151 it will match only keyless tunnel.
152
153 All keysless packets, if not matched configured keyless tunnels
154 will match fallback tunnel.
155 */
156
Al Virod5a0a1e2006-11-08 00:23:14 -0800157#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700158
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700159#define tunnels_r_l tunnels[3]
160#define tunnels_r tunnels[2]
161#define tunnels_l tunnels[1]
162#define tunnels_wc tunnels[0]
Eric Dumazet8d5b2c02009-10-23 06:14:38 +0000163/*
Eric Dumazet15078502010-09-15 11:07:53 +0000164 * Locking : hash tables are protected by RCU and RTNL
Eric Dumazet8d5b2c02009-10-23 06:14:38 +0000165 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700166
Eric Dumazet8d5b2c02009-10-23 06:14:38 +0000167#define for_each_ip_tunnel_rcu(start) \
168 for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700169
Eric Dumazete985aad2010-09-27 03:57:11 +0000170/* often modified stats are per cpu, other are shared (netdev->stats) */
171struct pcpu_tstats {
stephen hemminger87b6d212012-04-12 06:31:16 +0000172 u64 rx_packets;
173 u64 rx_bytes;
174 u64 tx_packets;
175 u64 tx_bytes;
176 struct u64_stats_sync syncp;
177};
Eric Dumazete985aad2010-09-27 03:57:11 +0000178
stephen hemminger87b6d212012-04-12 06:31:16 +0000179static struct rtnl_link_stats64 *ipgre_get_stats64(struct net_device *dev,
180 struct rtnl_link_stats64 *tot)
Eric Dumazete985aad2010-09-27 03:57:11 +0000181{
Eric Dumazete985aad2010-09-27 03:57:11 +0000182 int i;
183
184 for_each_possible_cpu(i) {
185 const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
stephen hemminger87b6d212012-04-12 06:31:16 +0000186 u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
187 unsigned int start;
Eric Dumazete985aad2010-09-27 03:57:11 +0000188
stephen hemminger87b6d212012-04-12 06:31:16 +0000189 do {
190 start = u64_stats_fetch_begin_bh(&tstats->syncp);
191 rx_packets = tstats->rx_packets;
192 tx_packets = tstats->tx_packets;
193 rx_bytes = tstats->rx_bytes;
194 tx_bytes = tstats->tx_bytes;
195 } while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
196
197 tot->rx_packets += rx_packets;
198 tot->tx_packets += tx_packets;
199 tot->rx_bytes += rx_bytes;
200 tot->tx_bytes += tx_bytes;
Eric Dumazete985aad2010-09-27 03:57:11 +0000201 }
stephen hemminger87b6d212012-04-12 06:31:16 +0000202
203 tot->multicast = dev->stats.multicast;
204 tot->rx_crc_errors = dev->stats.rx_crc_errors;
205 tot->rx_fifo_errors = dev->stats.rx_fifo_errors;
206 tot->rx_length_errors = dev->stats.rx_length_errors;
207 tot->rx_errors = dev->stats.rx_errors;
208 tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
209 tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
210 tot->tx_dropped = dev->stats.tx_dropped;
211 tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
212 tot->tx_errors = dev->stats.tx_errors;
213
214 return tot;
Eric Dumazete985aad2010-09-27 03:57:11 +0000215}
216
Linus Torvalds1da177e2005-04-16 15:20:36 -0700217/* Given src, dst and key, find appropriate for input tunnel. */
218
Daniel Baluta5e73ea12012-04-15 01:34:41 +0000219static struct ip_tunnel *ipgre_tunnel_lookup(struct net_device *dev,
220 __be32 remote, __be32 local,
221 __be32 key, __be16 gre_proto)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700222{
Timo Teras749c10f2009-01-19 17:22:12 -0800223 struct net *net = dev_net(dev);
224 int link = dev->ifindex;
Eric Dumazet15078502010-09-15 11:07:53 +0000225 unsigned int h0 = HASH(remote);
226 unsigned int h1 = HASH(key);
Timo Terasafcf1242009-01-26 20:56:10 -0800227 struct ip_tunnel *t, *cand = NULL;
Pavel Emelyanov7daa0002008-04-16 01:10:05 -0700228 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
Herbert Xue1a80002008-10-09 12:00:17 -0700229 int dev_type = (gre_proto == htons(ETH_P_TEB)) ?
230 ARPHRD_ETHER : ARPHRD_IPGRE;
Timo Terasafcf1242009-01-26 20:56:10 -0800231 int score, cand_score = 4;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700232
Eric Dumazet8d5b2c02009-10-23 06:14:38 +0000233 for_each_ip_tunnel_rcu(ign->tunnels_r_l[h0 ^ h1]) {
Timo Teras749c10f2009-01-19 17:22:12 -0800234 if (local != t->parms.iph.saddr ||
235 remote != t->parms.iph.daddr ||
236 key != t->parms.i_key ||
237 !(t->dev->flags & IFF_UP))
238 continue;
239
240 if (t->dev->type != ARPHRD_IPGRE &&
241 t->dev->type != dev_type)
242 continue;
243
Timo Terasafcf1242009-01-26 20:56:10 -0800244 score = 0;
Timo Teras749c10f2009-01-19 17:22:12 -0800245 if (t->parms.link != link)
Timo Terasafcf1242009-01-26 20:56:10 -0800246 score |= 1;
Timo Teras749c10f2009-01-19 17:22:12 -0800247 if (t->dev->type != dev_type)
Timo Terasafcf1242009-01-26 20:56:10 -0800248 score |= 2;
249 if (score == 0)
Timo Teras749c10f2009-01-19 17:22:12 -0800250 return t;
Timo Terasafcf1242009-01-26 20:56:10 -0800251
252 if (score < cand_score) {
253 cand = t;
254 cand_score = score;
255 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700256 }
Herbert Xue1a80002008-10-09 12:00:17 -0700257
Eric Dumazet8d5b2c02009-10-23 06:14:38 +0000258 for_each_ip_tunnel_rcu(ign->tunnels_r[h0 ^ h1]) {
Timo Teras749c10f2009-01-19 17:22:12 -0800259 if (remote != t->parms.iph.daddr ||
260 key != t->parms.i_key ||
261 !(t->dev->flags & IFF_UP))
262 continue;
263
264 if (t->dev->type != ARPHRD_IPGRE &&
265 t->dev->type != dev_type)
266 continue;
267
Timo Terasafcf1242009-01-26 20:56:10 -0800268 score = 0;
Timo Teras749c10f2009-01-19 17:22:12 -0800269 if (t->parms.link != link)
Timo Terasafcf1242009-01-26 20:56:10 -0800270 score |= 1;
Timo Teras749c10f2009-01-19 17:22:12 -0800271 if (t->dev->type != dev_type)
Timo Terasafcf1242009-01-26 20:56:10 -0800272 score |= 2;
273 if (score == 0)
Timo Teras749c10f2009-01-19 17:22:12 -0800274 return t;
Timo Terasafcf1242009-01-26 20:56:10 -0800275
276 if (score < cand_score) {
277 cand = t;
278 cand_score = score;
279 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700280 }
Herbert Xue1a80002008-10-09 12:00:17 -0700281
Eric Dumazet8d5b2c02009-10-23 06:14:38 +0000282 for_each_ip_tunnel_rcu(ign->tunnels_l[h1]) {
Timo Teras749c10f2009-01-19 17:22:12 -0800283 if ((local != t->parms.iph.saddr &&
284 (local != t->parms.iph.daddr ||
285 !ipv4_is_multicast(local))) ||
286 key != t->parms.i_key ||
287 !(t->dev->flags & IFF_UP))
288 continue;
289
290 if (t->dev->type != ARPHRD_IPGRE &&
291 t->dev->type != dev_type)
292 continue;
293
Timo Terasafcf1242009-01-26 20:56:10 -0800294 score = 0;
Timo Teras749c10f2009-01-19 17:22:12 -0800295 if (t->parms.link != link)
Timo Terasafcf1242009-01-26 20:56:10 -0800296 score |= 1;
Timo Teras749c10f2009-01-19 17:22:12 -0800297 if (t->dev->type != dev_type)
Timo Terasafcf1242009-01-26 20:56:10 -0800298 score |= 2;
299 if (score == 0)
Timo Teras749c10f2009-01-19 17:22:12 -0800300 return t;
Timo Terasafcf1242009-01-26 20:56:10 -0800301
302 if (score < cand_score) {
303 cand = t;
304 cand_score = score;
305 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700306 }
Herbert Xue1a80002008-10-09 12:00:17 -0700307
Eric Dumazet8d5b2c02009-10-23 06:14:38 +0000308 for_each_ip_tunnel_rcu(ign->tunnels_wc[h1]) {
Timo Teras749c10f2009-01-19 17:22:12 -0800309 if (t->parms.i_key != key ||
310 !(t->dev->flags & IFF_UP))
311 continue;
312
313 if (t->dev->type != ARPHRD_IPGRE &&
314 t->dev->type != dev_type)
315 continue;
316
Timo Terasafcf1242009-01-26 20:56:10 -0800317 score = 0;
Timo Teras749c10f2009-01-19 17:22:12 -0800318 if (t->parms.link != link)
Timo Terasafcf1242009-01-26 20:56:10 -0800319 score |= 1;
Timo Teras749c10f2009-01-19 17:22:12 -0800320 if (t->dev->type != dev_type)
Timo Terasafcf1242009-01-26 20:56:10 -0800321 score |= 2;
322 if (score == 0)
Timo Teras749c10f2009-01-19 17:22:12 -0800323 return t;
Timo Terasafcf1242009-01-26 20:56:10 -0800324
325 if (score < cand_score) {
326 cand = t;
327 cand_score = score;
328 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700329 }
330
Timo Terasafcf1242009-01-26 20:56:10 -0800331 if (cand != NULL)
332 return cand;
Herbert Xue1a80002008-10-09 12:00:17 -0700333
Eric Dumazet8d5b2c02009-10-23 06:14:38 +0000334 dev = ign->fb_tunnel_dev;
335 if (dev->flags & IFF_UP)
336 return netdev_priv(dev);
Timo Teras749c10f2009-01-19 17:22:12 -0800337
Linus Torvalds1da177e2005-04-16 15:20:36 -0700338 return NULL;
339}
340
Eric Dumazet15078502010-09-15 11:07:53 +0000341static struct ip_tunnel __rcu **__ipgre_bucket(struct ipgre_net *ign,
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700342 struct ip_tunnel_parm *parms)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700343{
YOSHIFUJI Hideaki5056a1e2007-04-24 20:44:48 +0900344 __be32 remote = parms->iph.daddr;
345 __be32 local = parms->iph.saddr;
346 __be32 key = parms->i_key;
Eric Dumazet15078502010-09-15 11:07:53 +0000347 unsigned int h = HASH(key);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700348 int prio = 0;
349
350 if (local)
351 prio |= 1;
Joe Perchesf97c1e02007-12-16 13:45:43 -0800352 if (remote && !ipv4_is_multicast(remote)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700353 prio |= 2;
354 h ^= HASH(remote);
355 }
356
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700357 return &ign->tunnels[prio][h];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700358}
359
Eric Dumazet15078502010-09-15 11:07:53 +0000360static inline struct ip_tunnel __rcu **ipgre_bucket(struct ipgre_net *ign,
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700361 struct ip_tunnel *t)
YOSHIFUJI Hideaki5056a1e2007-04-24 20:44:48 +0900362{
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700363 return __ipgre_bucket(ign, &t->parms);
YOSHIFUJI Hideaki5056a1e2007-04-24 20:44:48 +0900364}
365
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700366static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700367{
Eric Dumazet15078502010-09-15 11:07:53 +0000368 struct ip_tunnel __rcu **tp = ipgre_bucket(ign, t);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700369
Eric Dumazet15078502010-09-15 11:07:53 +0000370 rcu_assign_pointer(t->next, rtnl_dereference(*tp));
Eric Dumazet8d5b2c02009-10-23 06:14:38 +0000371 rcu_assign_pointer(*tp, t);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700372}
373
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700374static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700375{
Eric Dumazet15078502010-09-15 11:07:53 +0000376 struct ip_tunnel __rcu **tp;
377 struct ip_tunnel *iter;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700378
Eric Dumazet15078502010-09-15 11:07:53 +0000379 for (tp = ipgre_bucket(ign, t);
380 (iter = rtnl_dereference(*tp)) != NULL;
381 tp = &iter->next) {
382 if (t == iter) {
383 rcu_assign_pointer(*tp, t->next);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700384 break;
385 }
386 }
387}
388
Herbert Xue1a80002008-10-09 12:00:17 -0700389static struct ip_tunnel *ipgre_tunnel_find(struct net *net,
390 struct ip_tunnel_parm *parms,
391 int type)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700392{
Al Virod5a0a1e2006-11-08 00:23:14 -0800393 __be32 remote = parms->iph.daddr;
394 __be32 local = parms->iph.saddr;
395 __be32 key = parms->i_key;
Timo Teras749c10f2009-01-19 17:22:12 -0800396 int link = parms->link;
Eric Dumazet15078502010-09-15 11:07:53 +0000397 struct ip_tunnel *t;
398 struct ip_tunnel __rcu **tp;
Herbert Xue1a80002008-10-09 12:00:17 -0700399 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
400
Eric Dumazet15078502010-09-15 11:07:53 +0000401 for (tp = __ipgre_bucket(ign, parms);
402 (t = rtnl_dereference(*tp)) != NULL;
403 tp = &t->next)
Herbert Xue1a80002008-10-09 12:00:17 -0700404 if (local == t->parms.iph.saddr &&
405 remote == t->parms.iph.daddr &&
406 key == t->parms.i_key &&
Timo Teras749c10f2009-01-19 17:22:12 -0800407 link == t->parms.link &&
Herbert Xue1a80002008-10-09 12:00:17 -0700408 type == t->dev->type)
409 break;
410
411 return t;
412}
413
Eric Dumazet15078502010-09-15 11:07:53 +0000414static struct ip_tunnel *ipgre_tunnel_locate(struct net *net,
Herbert Xue1a80002008-10-09 12:00:17 -0700415 struct ip_tunnel_parm *parms, int create)
416{
417 struct ip_tunnel *t, *nt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700418 struct net_device *dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700419 char name[IFNAMSIZ];
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700420 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700421
Herbert Xue1a80002008-10-09 12:00:17 -0700422 t = ipgre_tunnel_find(net, parms, ARPHRD_IPGRE);
423 if (t || !create)
424 return t;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700425
426 if (parms->name[0])
427 strlcpy(name, parms->name, IFNAMSIZ);
Pavel Emelyanov34cc7ba2008-02-23 20:19:20 -0800428 else
stephen hemminger407d6fc2010-11-29 09:47:47 +0000429 strcpy(name, "gre%d");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700430
431 dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup);
432 if (!dev)
stephen hemminger407d6fc2010-11-29 09:47:47 +0000433 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700434
Pavel Emelyanov0b67ece2008-04-16 01:11:13 -0700435 dev_net_set(dev, net);
436
Patrick McHardy2941a482006-01-08 22:05:26 -0800437 nt = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700438 nt->parms = *parms;
Herbert Xuc19e6542008-10-09 11:59:55 -0700439 dev->rtnl_link_ops = &ipgre_link_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700440
Herbert Xu42aa9162008-10-09 11:59:32 -0700441 dev->mtu = ipgre_tunnel_bind_dev(dev);
442
Pavel Emelyanovb37d428b2008-02-26 23:51:04 -0800443 if (register_netdevice(dev) < 0)
444 goto failed_free;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700445
Willem de Bruijnf2b3ee92012-01-26 10:34:35 +0000446 /* Can use a lockless transmit, unless we generate output sequences */
447 if (!(nt->parms.o_flags & GRE_SEQ))
448 dev->features |= NETIF_F_LLTX;
449
Linus Torvalds1da177e2005-04-16 15:20:36 -0700450 dev_hold(dev);
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700451 ipgre_tunnel_link(ign, nt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700452 return nt;
453
Pavel Emelyanovb37d428b2008-02-26 23:51:04 -0800454failed_free:
455 free_netdev(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700456 return NULL;
457}
458
459static void ipgre_tunnel_uninit(struct net_device *dev)
460{
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700461 struct net *net = dev_net(dev);
462 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
463
464 ipgre_tunnel_unlink(ign, netdev_priv(dev));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700465 dev_put(dev);
466}
467
468
469static void ipgre_err(struct sk_buff *skb, u32 info)
470{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700471
Rami Rosen071f92d2008-05-21 17:47:54 -0700472/* All the routers (except for Linux) return only
Linus Torvalds1da177e2005-04-16 15:20:36 -0700473 8 bytes of packet payload. It means, that precise relaying of
474 ICMP in the real Internet is absolutely infeasible.
475
476 Moreover, Cisco "wise men" put GRE key to the third word
477 in GRE header. It makes impossible maintaining even soft state for keyed
478 GRE tunnels with enabled checksum. Tell them "thank you".
479
480 Well, I wonder, rfc1812 was written by Cisco employee,
stephen hemmingerbff52852012-02-24 08:08:20 +0000481 what the hell these idiots break standards established
482 by themselves???
Linus Torvalds1da177e2005-04-16 15:20:36 -0700483 */
484
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000485 const struct iphdr *iph = (const struct iphdr *)skb->data;
Daniel Baluta5e73ea12012-04-15 01:34:41 +0000486 __be16 *p = (__be16 *)(skb->data+(iph->ihl<<2));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700487 int grehlen = (iph->ihl<<2) + 4;
Arnaldo Carvalho de Melo88c76642007-03-13 14:43:18 -0300488 const int type = icmp_hdr(skb)->type;
489 const int code = icmp_hdr(skb)->code;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700490 struct ip_tunnel *t;
Al Virod5a0a1e2006-11-08 00:23:14 -0800491 __be16 flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700492
493 flags = p[0];
494 if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
495 if (flags&(GRE_VERSION|GRE_ROUTING))
496 return;
497 if (flags&GRE_KEY) {
498 grehlen += 4;
499 if (flags&GRE_CSUM)
500 grehlen += 4;
501 }
502 }
503
504 /* If only 8 bytes returned, keyed message will be dropped here */
505 if (skb_headlen(skb) < grehlen)
506 return;
507
508 switch (type) {
509 default:
510 case ICMP_PARAMETERPROB:
511 return;
512
513 case ICMP_DEST_UNREACH:
514 switch (code) {
515 case ICMP_SR_FAILED:
516 case ICMP_PORT_UNREACH:
517 /* Impossible event. */
518 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700519 default:
520 /* All others are translated to HOST_UNREACH.
521 rfc2003 contains "deep thoughts" about NET_UNREACH,
522 I believe they are just ether pollution. --ANK
523 */
524 break;
525 }
526 break;
527 case ICMP_TIME_EXCEEDED:
528 if (code != ICMP_EXC_TTL)
529 return;
530 break;
531 }
532
Eric Dumazet8d5b2c02009-10-23 06:14:38 +0000533 rcu_read_lock();
Timo Teras749c10f2009-01-19 17:22:12 -0800534 t = ipgre_tunnel_lookup(skb->dev, iph->daddr, iph->saddr,
Herbert Xue1a80002008-10-09 12:00:17 -0700535 flags & GRE_KEY ?
536 *(((__be32 *)p) + (grehlen / 4) - 1) : 0,
537 p[1]);
David S. Miller36393392012-06-14 22:21:46 -0700538 if (t == NULL)
539 goto out;
540
541 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
542 ipv4_update_pmtu(skb, dev_net(skb->dev), info,
543 t->parms.link, 0, IPPROTO_GRE, 0);
544 goto out;
545 }
546
547 if (t->parms.iph.daddr == 0 ||
Joe Perchesf97c1e02007-12-16 13:45:43 -0800548 ipv4_is_multicast(t->parms.iph.daddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700549 goto out;
550
551 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
552 goto out;
553
Wei Yongjunda6185d82009-02-24 23:34:48 -0800554 if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700555 t->err_count++;
556 else
557 t->err_count = 1;
558 t->err_time = jiffies;
559out:
Eric Dumazet8d5b2c02009-10-23 06:14:38 +0000560 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700561}
562
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000563static inline void ipgre_ecn_decapsulate(const struct iphdr *iph, struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700564{
565 if (INET_ECN_is_ce(iph->tos)) {
566 if (skb->protocol == htons(ETH_P_IP)) {
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700567 IP_ECN_set_ce(ip_hdr(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700568 } else if (skb->protocol == htons(ETH_P_IPV6)) {
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -0700569 IP6_ECN_set_ce(ipv6_hdr(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700570 }
571 }
572}
573
574static inline u8
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000575ipgre_ecn_encapsulate(u8 tos, const struct iphdr *old_iph, struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700576{
577 u8 inner = 0;
578 if (skb->protocol == htons(ETH_P_IP))
579 inner = old_iph->tos;
580 else if (skb->protocol == htons(ETH_P_IPV6))
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000581 inner = ipv6_get_dsfield((const struct ipv6hdr *)old_iph);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700582 return INET_ECN_encapsulate(tos, inner);
583}
584
585static int ipgre_rcv(struct sk_buff *skb)
586{
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000587 const struct iphdr *iph;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700588 u8 *h;
Al Virod5a0a1e2006-11-08 00:23:14 -0800589 __be16 flags;
Al Virod3bc23e2006-11-14 21:24:49 -0800590 __sum16 csum = 0;
Al Virod5a0a1e2006-11-08 00:23:14 -0800591 __be32 key = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700592 u32 seqno = 0;
593 struct ip_tunnel *tunnel;
594 int offset = 4;
Herbert Xue1a80002008-10-09 12:00:17 -0700595 __be16 gre_proto;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700596
597 if (!pskb_may_pull(skb, 16))
598 goto drop_nolock;
599
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700600 iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700601 h = skb->data;
Daniel Baluta5e73ea12012-04-15 01:34:41 +0000602 flags = *(__be16 *)h;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700603
604 if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
605 /* - Version must be 0.
606 - We do not support routing headers.
607 */
608 if (flags&(GRE_VERSION|GRE_ROUTING))
609 goto drop_nolock;
610
611 if (flags&GRE_CSUM) {
Herbert Xufb286bb2005-11-10 13:01:24 -0800612 switch (skb->ip_summed) {
Patrick McHardy84fa7932006-08-29 16:44:56 -0700613 case CHECKSUM_COMPLETE:
Al Virod3bc23e2006-11-14 21:24:49 -0800614 csum = csum_fold(skb->csum);
Herbert Xufb286bb2005-11-10 13:01:24 -0800615 if (!csum)
616 break;
617 /* fall through */
618 case CHECKSUM_NONE:
619 skb->csum = 0;
620 csum = __skb_checksum_complete(skb);
Patrick McHardy84fa7932006-08-29 16:44:56 -0700621 skb->ip_summed = CHECKSUM_COMPLETE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700622 }
623 offset += 4;
624 }
625 if (flags&GRE_KEY) {
Daniel Baluta5e73ea12012-04-15 01:34:41 +0000626 key = *(__be32 *)(h + offset);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700627 offset += 4;
628 }
629 if (flags&GRE_SEQ) {
Daniel Baluta5e73ea12012-04-15 01:34:41 +0000630 seqno = ntohl(*(__be32 *)(h + offset));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700631 offset += 4;
632 }
633 }
634
Herbert Xue1a80002008-10-09 12:00:17 -0700635 gre_proto = *(__be16 *)(h + 2);
636
Eric Dumazet8d5b2c02009-10-23 06:14:38 +0000637 rcu_read_lock();
Timo Teras749c10f2009-01-19 17:22:12 -0800638 if ((tunnel = ipgre_tunnel_lookup(skb->dev,
Herbert Xue1a80002008-10-09 12:00:17 -0700639 iph->saddr, iph->daddr, key,
640 gre_proto))) {
Eric Dumazete985aad2010-09-27 03:57:11 +0000641 struct pcpu_tstats *tstats;
Pavel Emelyanovaddd68e2008-05-21 14:14:22 -0700642
Linus Torvalds1da177e2005-04-16 15:20:36 -0700643 secpath_reset(skb);
644
Herbert Xue1a80002008-10-09 12:00:17 -0700645 skb->protocol = gre_proto;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700646 /* WCCP version 1 and 2 protocol decoding.
647 * - Change protocol to IP
648 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
649 */
Herbert Xue1a80002008-10-09 12:00:17 -0700650 if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) {
YOSHIFUJI Hideaki496c98d2006-10-10 19:41:21 -0700651 skb->protocol = htons(ETH_P_IP);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900652 if ((*(h + offset) & 0xF0) != 0x40)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700653 offset += 4;
654 }
655
Timo Teras1d069162007-12-20 00:10:33 -0800656 skb->mac_header = skb->network_header;
Arnaldo Carvalho de Melo4209fb62007-03-10 18:42:03 -0300657 __pskb_pull(skb, offset);
Arnaldo Carvalho de Melo9c702202007-04-25 18:04:18 -0700658 skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700659 skb->pkt_type = PACKET_HOST;
660#ifdef CONFIG_NET_IPGRE_BROADCAST
Joe Perchesf97c1e02007-12-16 13:45:43 -0800661 if (ipv4_is_multicast(iph->daddr)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700662 /* Looped back packet, drop it! */
David S. Millerc7537962010-11-11 17:07:48 -0800663 if (rt_is_output_route(skb_rtable(skb)))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700664 goto drop;
Eric Dumazete985aad2010-09-27 03:57:11 +0000665 tunnel->dev->stats.multicast++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700666 skb->pkt_type = PACKET_BROADCAST;
667 }
668#endif
669
670 if (((flags&GRE_CSUM) && csum) ||
671 (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
Eric Dumazete985aad2010-09-27 03:57:11 +0000672 tunnel->dev->stats.rx_crc_errors++;
673 tunnel->dev->stats.rx_errors++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700674 goto drop;
675 }
676 if (tunnel->parms.i_flags&GRE_SEQ) {
677 if (!(flags&GRE_SEQ) ||
678 (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
Eric Dumazete985aad2010-09-27 03:57:11 +0000679 tunnel->dev->stats.rx_fifo_errors++;
680 tunnel->dev->stats.rx_errors++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700681 goto drop;
682 }
683 tunnel->i_seqno = seqno + 1;
684 }
Herbert Xue1a80002008-10-09 12:00:17 -0700685
686 /* Warning: All skb pointers will be invalidated! */
687 if (tunnel->dev->type == ARPHRD_ETHER) {
688 if (!pskb_may_pull(skb, ETH_HLEN)) {
Eric Dumazete985aad2010-09-27 03:57:11 +0000689 tunnel->dev->stats.rx_length_errors++;
690 tunnel->dev->stats.rx_errors++;
Herbert Xue1a80002008-10-09 12:00:17 -0700691 goto drop;
692 }
693
694 iph = ip_hdr(skb);
695 skb->protocol = eth_type_trans(skb, tunnel->dev);
696 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
697 }
698
Eric Dumazete985aad2010-09-27 03:57:11 +0000699 tstats = this_cpu_ptr(tunnel->dev->tstats);
stephen hemminger87b6d212012-04-12 06:31:16 +0000700 u64_stats_update_begin(&tstats->syncp);
Eric Dumazete985aad2010-09-27 03:57:11 +0000701 tstats->rx_packets++;
702 tstats->rx_bytes += skb->len;
stephen hemminger87b6d212012-04-12 06:31:16 +0000703 u64_stats_update_end(&tstats->syncp);
Eric Dumazete985aad2010-09-27 03:57:11 +0000704
705 __skb_tunnel_rx(skb, tunnel->dev);
Herbert Xue1a80002008-10-09 12:00:17 -0700706
707 skb_reset_network_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700708 ipgre_ecn_decapsulate(iph, skb);
Herbert Xue1a80002008-10-09 12:00:17 -0700709
Eric Dumazetcaf586e2010-09-30 21:06:55 +0000710 netif_rx(skb);
Eric Dumazet8990f462010-09-20 00:12:11 +0000711
Eric Dumazet8d5b2c02009-10-23 06:14:38 +0000712 rcu_read_unlock();
Eric Dumazet8990f462010-09-20 00:12:11 +0000713 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700714 }
Herbert Xu45af08b2006-04-05 22:31:19 -0700715 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700716
717drop:
Eric Dumazet8d5b2c02009-10-23 06:14:38 +0000718 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700719drop_nolock:
720 kfree_skb(skb);
Eric Dumazeta02cec22010-09-22 20:43:57 +0000721 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700722}
723
Stephen Hemminger6fef4c02009-08-31 19:50:41 +0000724static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700725{
Patrick McHardy2941a482006-01-08 22:05:26 -0800726 struct ip_tunnel *tunnel = netdev_priv(dev);
Eric Dumazete985aad2010-09-27 03:57:11 +0000727 struct pcpu_tstats *tstats;
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000728 const struct iphdr *old_iph = ip_hdr(skb);
729 const struct iphdr *tiph;
David S. Millercbb1e852011-05-04 12:33:34 -0700730 struct flowi4 fl4;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700731 u8 tos;
Al Virod5a0a1e2006-11-08 00:23:14 -0800732 __be16 df;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700733 struct rtable *rt; /* Route to the other host */
Eric Dumazet15078502010-09-15 11:07:53 +0000734 struct net_device *tdev; /* Device to other host */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700735 struct iphdr *iph; /* Our new IP header */
Chuck Leverc2636b42007-10-23 21:07:32 -0700736 unsigned int max_headroom; /* The extra header space needed */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700737 int gre_hlen;
Al Virod5a0a1e2006-11-08 00:23:14 -0800738 __be32 dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700739 int mtu;
740
Herbert Xue1a80002008-10-09 12:00:17 -0700741 if (dev->type == ARPHRD_ETHER)
742 IPCB(skb)->flags = 0;
743
744 if (dev->header_ops && dev->type == ARPHRD_IPGRE) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700745 gre_hlen = 0;
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000746 tiph = (const struct iphdr *)skb->data;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700747 } else {
748 gre_hlen = tunnel->hlen;
749 tiph = &tunnel->parms.iph;
750 }
751
752 if ((dst = tiph->daddr) == 0) {
753 /* NBMA tunnel */
754
Eric Dumazetadf30902009-06-02 05:19:30 +0000755 if (skb_dst(skb) == NULL) {
Eric Dumazete985aad2010-09-27 03:57:11 +0000756 dev->stats.tx_fifo_errors++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700757 goto tx_error;
758 }
759
David S. Miller61d57f82012-01-24 18:23:30 -0500760 if (skb->protocol == htons(ETH_P_IP)) {
Eric Dumazet511c3f92009-06-02 05:14:27 +0000761 rt = skb_rtable(skb);
David S. Miller61d57f82012-01-24 18:23:30 -0500762 dst = rt->rt_gateway;
763 }
Eric Dumazetdfd56b82011-12-10 09:48:31 +0000764#if IS_ENABLED(CONFIG_IPV6)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700765 else if (skb->protocol == htons(ETH_P_IPV6)) {
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000766 const struct in6_addr *addr6;
David S. Miller0ec88662012-01-27 15:01:08 -0800767 struct neighbour *neigh;
768 bool do_tx_error_icmp;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700769 int addr_type;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700770
David S. Miller0ec88662012-01-27 15:01:08 -0800771 neigh = dst_neigh_lookup(skb_dst(skb), &ipv6_hdr(skb)->daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700772 if (neigh == NULL)
773 goto tx_error;
774
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000775 addr6 = (const struct in6_addr *)&neigh->primary_key;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700776 addr_type = ipv6_addr_type(addr6);
777
778 if (addr_type == IPV6_ADDR_ANY) {
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -0700779 addr6 = &ipv6_hdr(skb)->daddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700780 addr_type = ipv6_addr_type(addr6);
781 }
782
783 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
David S. Miller0ec88662012-01-27 15:01:08 -0800784 do_tx_error_icmp = true;
785 else {
786 do_tx_error_icmp = false;
787 dst = addr6->s6_addr32[3];
788 }
789 neigh_release(neigh);
790 if (do_tx_error_icmp)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700791 goto tx_error_icmp;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700792 }
793#endif
794 else
795 goto tx_error;
796 }
797
798 tos = tiph->tos;
Andreas Jaggiee686ca2009-07-14 09:35:59 -0700799 if (tos == 1) {
800 tos = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700801 if (skb->protocol == htons(ETH_P_IP))
802 tos = old_iph->tos;
Stephen Hemmingerdd4ba832010-07-08 21:35:58 -0700803 else if (skb->protocol == htons(ETH_P_IPV6))
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000804 tos = ipv6_get_dsfield((const struct ipv6hdr *)old_iph);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700805 }
806
David S. Millercbb1e852011-05-04 12:33:34 -0700807 rt = ip_route_output_gre(dev_net(dev), &fl4, dst, tiph->saddr,
David S. Miller78fbfd82011-03-12 00:00:52 -0500808 tunnel->parms.o_key, RT_TOS(tos),
809 tunnel->parms.link);
810 if (IS_ERR(rt)) {
811 dev->stats.tx_carrier_errors++;
812 goto tx_error;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700813 }
Changli Gaod8d1f302010-06-10 23:31:35 -0700814 tdev = rt->dst.dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700815
816 if (tdev == dev) {
817 ip_rt_put(rt);
Eric Dumazete985aad2010-09-27 03:57:11 +0000818 dev->stats.collisions++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700819 goto tx_error;
820 }
821
822 df = tiph->frag_off;
823 if (df)
Changli Gaod8d1f302010-06-10 23:31:35 -0700824 mtu = dst_mtu(&rt->dst) - dev->hard_header_len - tunnel->hlen;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700825 else
Eric Dumazetadf30902009-06-02 05:19:30 +0000826 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700827
Eric Dumazetadf30902009-06-02 05:19:30 +0000828 if (skb_dst(skb))
829 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700830
831 if (skb->protocol == htons(ETH_P_IP)) {
832 df |= (old_iph->frag_off&htons(IP_DF));
833
834 if ((old_iph->frag_off&htons(IP_DF)) &&
835 mtu < ntohs(old_iph->tot_len)) {
836 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
837 ip_rt_put(rt);
838 goto tx_error;
839 }
840 }
Eric Dumazetdfd56b82011-12-10 09:48:31 +0000841#if IS_ENABLED(CONFIG_IPV6)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700842 else if (skb->protocol == htons(ETH_P_IPV6)) {
Eric Dumazetadf30902009-06-02 05:19:30 +0000843 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700844
Eric Dumazetadf30902009-06-02 05:19:30 +0000845 if (rt6 && mtu < dst_mtu(skb_dst(skb)) && mtu >= IPV6_MIN_MTU) {
Joe Perchesf97c1e02007-12-16 13:45:43 -0800846 if ((tunnel->parms.iph.daddr &&
847 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
Linus Torvalds1da177e2005-04-16 15:20:36 -0700848 rt6->rt6i_dst.plen == 128) {
849 rt6->rt6i_flags |= RTF_MODIFIED;
David S. Millerdefb3512010-12-08 21:16:57 -0800850 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700851 }
852 }
853
854 if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) {
Alexey Dobriyan3ffe5332010-02-18 08:25:24 +0000855 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700856 ip_rt_put(rt);
857 goto tx_error;
858 }
859 }
860#endif
861
862 if (tunnel->err_count > 0) {
Wei Yongjunda6185d82009-02-24 23:34:48 -0800863 if (time_before(jiffies,
864 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700865 tunnel->err_count--;
866
867 dst_link_failure(skb);
868 } else
869 tunnel->err_count = 0;
870 }
871
Changli Gaod8d1f302010-06-10 23:31:35 -0700872 max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + rt->dst.header_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700873
Patrick McHardycfbba492007-07-09 15:33:40 -0700874 if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
875 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700876 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
Herbert Xu805dc1d2011-11-18 02:20:06 +0000877 if (max_headroom > dev->needed_headroom)
878 dev->needed_headroom = max_headroom;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700879 if (!new_skb) {
880 ip_rt_put(rt);
Eric Dumazete985aad2010-09-27 03:57:11 +0000881 dev->stats.tx_dropped++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700882 dev_kfree_skb(skb);
Patrick McHardy6ed10652009-06-23 06:03:08 +0000883 return NETDEV_TX_OK;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700884 }
885 if (skb->sk)
886 skb_set_owner_w(new_skb, skb->sk);
887 dev_kfree_skb(skb);
888 skb = new_skb;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700889 old_iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700890 }
891
Herbert Xu64194c32008-10-09 12:03:17 -0700892 skb_reset_transport_header(skb);
Arnaldo Carvalho de Meloe2d1bca2007-04-10 20:46:21 -0700893 skb_push(skb, gre_hlen);
894 skb_reset_network_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700895 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
Patrick McHardy48d5cad2006-02-15 15:10:22 -0800896 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
897 IPSKB_REROUTED);
Eric Dumazetadf30902009-06-02 05:19:30 +0000898 skb_dst_drop(skb);
Changli Gaod8d1f302010-06-10 23:31:35 -0700899 skb_dst_set(skb, &rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700900
901 /*
902 * Push down and install the IPIP header.
903 */
904
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700905 iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700906 iph->version = 4;
907 iph->ihl = sizeof(struct iphdr) >> 2;
908 iph->frag_off = df;
909 iph->protocol = IPPROTO_GRE;
910 iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb);
David S. Millercbb1e852011-05-04 12:33:34 -0700911 iph->daddr = fl4.daddr;
912 iph->saddr = fl4.saddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700913
914 if ((iph->ttl = tiph->ttl) == 0) {
915 if (skb->protocol == htons(ETH_P_IP))
916 iph->ttl = old_iph->ttl;
Eric Dumazetdfd56b82011-12-10 09:48:31 +0000917#if IS_ENABLED(CONFIG_IPV6)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700918 else if (skb->protocol == htons(ETH_P_IPV6))
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000919 iph->ttl = ((const struct ipv6hdr *)old_iph)->hop_limit;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700920#endif
921 else
David S. Miller323e1262010-12-12 21:55:08 -0800922 iph->ttl = ip4_dst_hoplimit(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700923 }
924
Herbert Xue1a80002008-10-09 12:00:17 -0700925 ((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags;
926 ((__be16 *)(iph + 1))[1] = (dev->type == ARPHRD_ETHER) ?
927 htons(ETH_P_TEB) : skb->protocol;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700928
929 if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
Daniel Baluta5e73ea12012-04-15 01:34:41 +0000930 __be32 *ptr = (__be32 *)(((u8 *)iph) + tunnel->hlen - 4);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700931
932 if (tunnel->parms.o_flags&GRE_SEQ) {
933 ++tunnel->o_seqno;
934 *ptr = htonl(tunnel->o_seqno);
935 ptr--;
936 }
937 if (tunnel->parms.o_flags&GRE_KEY) {
938 *ptr = tunnel->parms.o_key;
939 ptr--;
940 }
941 if (tunnel->parms.o_flags&GRE_CSUM) {
942 *ptr = 0;
Daniel Baluta5e73ea12012-04-15 01:34:41 +0000943 *(__sum16 *)ptr = ip_compute_csum((void *)(iph+1), skb->len - sizeof(struct iphdr));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700944 }
945 }
946
947 nf_reset(skb);
Eric Dumazete985aad2010-09-27 03:57:11 +0000948 tstats = this_cpu_ptr(dev->tstats);
949 __IPTUNNEL_XMIT(tstats, &dev->stats);
Patrick McHardy6ed10652009-06-23 06:03:08 +0000950 return NETDEV_TX_OK;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700951
David S. Miller496053f2012-01-11 16:46:32 -0800952#if IS_ENABLED(CONFIG_IPV6)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700953tx_error_icmp:
954 dst_link_failure(skb);
David S. Miller496053f2012-01-11 16:46:32 -0800955#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700956tx_error:
Eric Dumazete985aad2010-09-27 03:57:11 +0000957 dev->stats.tx_errors++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700958 dev_kfree_skb(skb);
Patrick McHardy6ed10652009-06-23 06:03:08 +0000959 return NETDEV_TX_OK;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700960}
961
Herbert Xu42aa9162008-10-09 11:59:32 -0700962static int ipgre_tunnel_bind_dev(struct net_device *dev)
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800963{
964 struct net_device *tdev = NULL;
965 struct ip_tunnel *tunnel;
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000966 const struct iphdr *iph;
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800967 int hlen = LL_MAX_HEADER;
968 int mtu = ETH_DATA_LEN;
969 int addend = sizeof(struct iphdr) + 4;
970
971 tunnel = netdev_priv(dev);
972 iph = &tunnel->parms.iph;
973
Herbert Xuc95b8192008-10-09 11:58:54 -0700974 /* Guess output device to choose reasonable mtu and needed_headroom */
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800975
976 if (iph->daddr) {
David S. Millercbb1e852011-05-04 12:33:34 -0700977 struct flowi4 fl4;
978 struct rtable *rt;
Eric Dumazete985aad2010-09-27 03:57:11 +0000979
David S. Millercbb1e852011-05-04 12:33:34 -0700980 rt = ip_route_output_gre(dev_net(dev), &fl4,
981 iph->daddr, iph->saddr,
982 tunnel->parms.o_key,
983 RT_TOS(iph->tos),
984 tunnel->parms.link);
David S. Millerb23dd4f2011-03-02 14:31:35 -0800985 if (!IS_ERR(rt)) {
Changli Gaod8d1f302010-06-10 23:31:35 -0700986 tdev = rt->dst.dev;
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800987 ip_rt_put(rt);
988 }
Herbert Xue1a80002008-10-09 12:00:17 -0700989
990 if (dev->type != ARPHRD_ETHER)
991 dev->flags |= IFF_POINTOPOINT;
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800992 }
993
994 if (!tdev && tunnel->parms.link)
Pavel Emelyanov96635522008-04-16 01:10:44 -0700995 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800996
997 if (tdev) {
Herbert Xuc95b8192008-10-09 11:58:54 -0700998 hlen = tdev->hard_header_len + tdev->needed_headroom;
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800999 mtu = tdev->mtu;
1000 }
1001 dev->iflink = tunnel->parms.link;
1002
1003 /* Precalculate GRE options length */
1004 if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
1005 if (tunnel->parms.o_flags&GRE_CSUM)
1006 addend += 4;
1007 if (tunnel->parms.o_flags&GRE_KEY)
1008 addend += 4;
1009 if (tunnel->parms.o_flags&GRE_SEQ)
1010 addend += 4;
1011 }
Herbert Xuc95b8192008-10-09 11:58:54 -07001012 dev->needed_headroom = addend + hlen;
Tom Goff8cdb0452009-08-14 16:33:56 -07001013 mtu -= dev->hard_header_len + addend;
Herbert Xu42aa9162008-10-09 11:59:32 -07001014
1015 if (mtu < 68)
1016 mtu = 68;
1017
Michal Schmidtee34c1e2007-12-13 09:46:32 -08001018 tunnel->hlen = addend;
1019
Herbert Xu42aa9162008-10-09 11:59:32 -07001020 return mtu;
Michal Schmidtee34c1e2007-12-13 09:46:32 -08001021}
1022
Linus Torvalds1da177e2005-04-16 15:20:36 -07001023static int
1024ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
1025{
1026 int err = 0;
1027 struct ip_tunnel_parm p;
1028 struct ip_tunnel *t;
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -07001029 struct net *net = dev_net(dev);
1030 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001031
1032 switch (cmd) {
1033 case SIOCGETTUNNEL:
1034 t = NULL;
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001035 if (dev == ign->fb_tunnel_dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001036 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
1037 err = -EFAULT;
1038 break;
1039 }
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -07001040 t = ipgre_tunnel_locate(net, &p, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001041 }
1042 if (t == NULL)
Patrick McHardy2941a482006-01-08 22:05:26 -08001043 t = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001044 memcpy(&p, &t->parms, sizeof(p));
1045 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
1046 err = -EFAULT;
1047 break;
1048
1049 case SIOCADDTUNNEL:
1050 case SIOCCHGTUNNEL:
1051 err = -EPERM;
1052 if (!capable(CAP_NET_ADMIN))
1053 goto done;
1054
1055 err = -EFAULT;
1056 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1057 goto done;
1058
1059 err = -EINVAL;
1060 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
1061 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
1062 ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
1063 goto done;
1064 if (p.iph.ttl)
1065 p.iph.frag_off |= htons(IP_DF);
1066
1067 if (!(p.i_flags&GRE_KEY))
1068 p.i_key = 0;
1069 if (!(p.o_flags&GRE_KEY))
1070 p.o_key = 0;
1071
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -07001072 t = ipgre_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001073
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001074 if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001075 if (t != NULL) {
1076 if (t->dev != dev) {
1077 err = -EEXIST;
1078 break;
1079 }
1080 } else {
Eric Dumazet15078502010-09-15 11:07:53 +00001081 unsigned int nflags = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001082
Patrick McHardy2941a482006-01-08 22:05:26 -08001083 t = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001084
Joe Perchesf97c1e02007-12-16 13:45:43 -08001085 if (ipv4_is_multicast(p.iph.daddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001086 nflags = IFF_BROADCAST;
1087 else if (p.iph.daddr)
1088 nflags = IFF_POINTOPOINT;
1089
1090 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
1091 err = -EINVAL;
1092 break;
1093 }
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -07001094 ipgre_tunnel_unlink(ign, t);
Pavel Emelyanov74b0b852010-10-27 05:43:53 +00001095 synchronize_net();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001096 t->parms.iph.saddr = p.iph.saddr;
1097 t->parms.iph.daddr = p.iph.daddr;
1098 t->parms.i_key = p.i_key;
1099 t->parms.o_key = p.o_key;
1100 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1101 memcpy(dev->broadcast, &p.iph.daddr, 4);
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -07001102 ipgre_tunnel_link(ign, t);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001103 netdev_state_change(dev);
1104 }
1105 }
1106
1107 if (t) {
1108 err = 0;
1109 if (cmd == SIOCCHGTUNNEL) {
1110 t->parms.iph.ttl = p.iph.ttl;
1111 t->parms.iph.tos = p.iph.tos;
1112 t->parms.iph.frag_off = p.iph.frag_off;
Michal Schmidtee34c1e2007-12-13 09:46:32 -08001113 if (t->parms.link != p.link) {
1114 t->parms.link = p.link;
Herbert Xu42aa9162008-10-09 11:59:32 -07001115 dev->mtu = ipgre_tunnel_bind_dev(dev);
Michal Schmidtee34c1e2007-12-13 09:46:32 -08001116 netdev_state_change(dev);
1117 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001118 }
1119 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
1120 err = -EFAULT;
1121 } else
1122 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
1123 break;
1124
1125 case SIOCDELTUNNEL:
1126 err = -EPERM;
1127 if (!capable(CAP_NET_ADMIN))
1128 goto done;
1129
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001130 if (dev == ign->fb_tunnel_dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001131 err = -EFAULT;
1132 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1133 goto done;
1134 err = -ENOENT;
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -07001135 if ((t = ipgre_tunnel_locate(net, &p, 0)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001136 goto done;
1137 err = -EPERM;
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001138 if (t == netdev_priv(ign->fb_tunnel_dev))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001139 goto done;
1140 dev = t->dev;
1141 }
Stephen Hemminger22f8cde2007-02-07 00:09:58 -08001142 unregister_netdevice(dev);
1143 err = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001144 break;
1145
1146 default:
1147 err = -EINVAL;
1148 }
1149
1150done:
1151 return err;
1152}
1153
Linus Torvalds1da177e2005-04-16 15:20:36 -07001154static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1155{
Patrick McHardy2941a482006-01-08 22:05:26 -08001156 struct ip_tunnel *tunnel = netdev_priv(dev);
Herbert Xuc95b8192008-10-09 11:58:54 -07001157 if (new_mtu < 68 ||
1158 new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001159 return -EINVAL;
1160 dev->mtu = new_mtu;
1161 return 0;
1162}
1163
Linus Torvalds1da177e2005-04-16 15:20:36 -07001164/* Nice toy. Unfortunately, useless in real life :-)
1165 It allows to construct virtual multiprotocol broadcast "LAN"
1166 over the Internet, provided multicast routing is tuned.
1167
1168
1169 I have no idea was this bicycle invented before me,
1170 so that I had to set ARPHRD_IPGRE to a random value.
1171 I have an impression, that Cisco could make something similar,
1172 but this feature is apparently missing in IOS<=11.2(8).
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001173
Linus Torvalds1da177e2005-04-16 15:20:36 -07001174 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
1175 with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
1176
1177 ping -t 255 224.66.66.66
1178
1179 If nobody answers, mbone does not work.
1180
1181 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
1182 ip addr add 10.66.66.<somewhat>/24 dev Universe
1183 ifconfig Universe up
1184 ifconfig Universe add fe80::<Your_real_addr>/10
1185 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
1186 ftp 10.66.66.66
1187 ...
1188 ftp fec0:6666:6666::193.233.7.65
1189 ...
1190
1191 */
1192
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -07001193static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
1194 unsigned short type,
Eric Dumazet15078502010-09-15 11:07:53 +00001195 const void *daddr, const void *saddr, unsigned int len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001196{
Patrick McHardy2941a482006-01-08 22:05:26 -08001197 struct ip_tunnel *t = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001198 struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
Daniel Baluta5e73ea12012-04-15 01:34:41 +00001199 __be16 *p = (__be16 *)(iph+1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001200
1201 memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
1202 p[0] = t->parms.o_flags;
1203 p[1] = htons(type);
1204
1205 /*
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001206 * Set the source hardware address.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001207 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001208
Linus Torvalds1da177e2005-04-16 15:20:36 -07001209 if (saddr)
1210 memcpy(&iph->saddr, saddr, 4);
Timo Teräs6d55cb92010-03-03 04:01:13 +00001211 if (daddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001212 memcpy(&iph->daddr, daddr, 4);
Timo Teräs6d55cb92010-03-03 04:01:13 +00001213 if (iph->daddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001214 return t->hlen;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001215
Linus Torvalds1da177e2005-04-16 15:20:36 -07001216 return -t->hlen;
1217}
1218
Timo Teras6a5f44d2007-10-23 20:31:53 -07001219static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
1220{
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001221 const struct iphdr *iph = (const struct iphdr *) skb_mac_header(skb);
Timo Teras6a5f44d2007-10-23 20:31:53 -07001222 memcpy(haddr, &iph->saddr, 4);
1223 return 4;
1224}
1225
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -07001226static const struct header_ops ipgre_header_ops = {
1227 .create = ipgre_header,
Timo Teras6a5f44d2007-10-23 20:31:53 -07001228 .parse = ipgre_header_parse,
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -07001229};
1230
Timo Teras6a5f44d2007-10-23 20:31:53 -07001231#ifdef CONFIG_NET_IPGRE_BROADCAST
Linus Torvalds1da177e2005-04-16 15:20:36 -07001232static int ipgre_open(struct net_device *dev)
1233{
Patrick McHardy2941a482006-01-08 22:05:26 -08001234 struct ip_tunnel *t = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001235
Joe Perchesf97c1e02007-12-16 13:45:43 -08001236 if (ipv4_is_multicast(t->parms.iph.daddr)) {
David S. Millercbb1e852011-05-04 12:33:34 -07001237 struct flowi4 fl4;
1238 struct rtable *rt;
Eric Dumazete985aad2010-09-27 03:57:11 +00001239
David S. Millercbb1e852011-05-04 12:33:34 -07001240 rt = ip_route_output_gre(dev_net(dev), &fl4,
1241 t->parms.iph.daddr,
1242 t->parms.iph.saddr,
1243 t->parms.o_key,
1244 RT_TOS(t->parms.iph.tos),
1245 t->parms.link);
David S. Millerb23dd4f2011-03-02 14:31:35 -08001246 if (IS_ERR(rt))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001247 return -EADDRNOTAVAIL;
Changli Gaod8d1f302010-06-10 23:31:35 -07001248 dev = rt->dst.dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001249 ip_rt_put(rt);
Herbert Xue5ed6392005-10-03 14:35:55 -07001250 if (__in_dev_get_rtnl(dev) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001251 return -EADDRNOTAVAIL;
1252 t->mlink = dev->ifindex;
Herbert Xue5ed6392005-10-03 14:35:55 -07001253 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001254 }
1255 return 0;
1256}
1257
1258static int ipgre_close(struct net_device *dev)
1259{
Patrick McHardy2941a482006-01-08 22:05:26 -08001260 struct ip_tunnel *t = netdev_priv(dev);
Stephen Hemmingerb8c26a32008-11-20 20:34:29 -08001261
Joe Perchesf97c1e02007-12-16 13:45:43 -08001262 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
Denis V. Lunev7fee0ca2008-01-21 17:32:38 -08001263 struct in_device *in_dev;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001264 in_dev = inetdev_by_index(dev_net(dev), t->mlink);
Eric Dumazet8723e1b2010-10-19 00:39:26 +00001265 if (in_dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001266 ip_mc_dec_group(in_dev, t->parms.iph.daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001267 }
1268 return 0;
1269}
1270
1271#endif
1272
Stephen Hemmingerb8c26a32008-11-20 20:34:29 -08001273static const struct net_device_ops ipgre_netdev_ops = {
1274 .ndo_init = ipgre_tunnel_init,
1275 .ndo_uninit = ipgre_tunnel_uninit,
1276#ifdef CONFIG_NET_IPGRE_BROADCAST
1277 .ndo_open = ipgre_open,
1278 .ndo_stop = ipgre_close,
1279#endif
1280 .ndo_start_xmit = ipgre_tunnel_xmit,
1281 .ndo_do_ioctl = ipgre_tunnel_ioctl,
1282 .ndo_change_mtu = ipgre_tunnel_change_mtu,
stephen hemminger87b6d212012-04-12 06:31:16 +00001283 .ndo_get_stats64 = ipgre_get_stats64,
Stephen Hemmingerb8c26a32008-11-20 20:34:29 -08001284};
1285
Eric Dumazete985aad2010-09-27 03:57:11 +00001286static void ipgre_dev_free(struct net_device *dev)
1287{
1288 free_percpu(dev->tstats);
1289 free_netdev(dev);
1290}
1291
Linus Torvalds1da177e2005-04-16 15:20:36 -07001292static void ipgre_tunnel_setup(struct net_device *dev)
1293{
Stephen Hemmingerb8c26a32008-11-20 20:34:29 -08001294 dev->netdev_ops = &ipgre_netdev_ops;
Eric Dumazete985aad2010-09-27 03:57:11 +00001295 dev->destructor = ipgre_dev_free;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001296
1297 dev->type = ARPHRD_IPGRE;
Herbert Xuc95b8192008-10-09 11:58:54 -07001298 dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
Kris Katterjohn46f25df2006-01-05 16:35:42 -08001299 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001300 dev->flags = IFF_NOARP;
1301 dev->iflink = 0;
1302 dev->addr_len = 4;
Pavel Emelyanov0b67ece2008-04-16 01:11:13 -07001303 dev->features |= NETIF_F_NETNS_LOCAL;
Eric Dumazet108bfa82009-05-28 22:35:10 +00001304 dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001305}
1306
1307static int ipgre_tunnel_init(struct net_device *dev)
1308{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001309 struct ip_tunnel *tunnel;
1310 struct iphdr *iph;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001311
Patrick McHardy2941a482006-01-08 22:05:26 -08001312 tunnel = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001313 iph = &tunnel->parms.iph;
1314
1315 tunnel->dev = dev;
1316 strcpy(tunnel->parms.name, dev->name);
1317
1318 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
1319 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
1320
Linus Torvalds1da177e2005-04-16 15:20:36 -07001321 if (iph->daddr) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001322#ifdef CONFIG_NET_IPGRE_BROADCAST
Joe Perchesf97c1e02007-12-16 13:45:43 -08001323 if (ipv4_is_multicast(iph->daddr)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001324 if (!iph->saddr)
1325 return -EINVAL;
1326 dev->flags = IFF_BROADCAST;
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -07001327 dev->header_ops = &ipgre_header_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001328 }
1329#endif
Michal Schmidtee34c1e2007-12-13 09:46:32 -08001330 } else
Timo Teras6a5f44d2007-10-23 20:31:53 -07001331 dev->header_ops = &ipgre_header_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001332
Eric Dumazete985aad2010-09-27 03:57:11 +00001333 dev->tstats = alloc_percpu(struct pcpu_tstats);
1334 if (!dev->tstats)
1335 return -ENOMEM;
1336
Linus Torvalds1da177e2005-04-16 15:20:36 -07001337 return 0;
1338}
1339
Stephen Hemmingerb8c26a32008-11-20 20:34:29 -08001340static void ipgre_fb_tunnel_init(struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001341{
Patrick McHardy2941a482006-01-08 22:05:26 -08001342 struct ip_tunnel *tunnel = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001343 struct iphdr *iph = &tunnel->parms.iph;
1344
1345 tunnel->dev = dev;
1346 strcpy(tunnel->parms.name, dev->name);
1347
1348 iph->version = 4;
1349 iph->protocol = IPPROTO_GRE;
1350 iph->ihl = 5;
1351 tunnel->hlen = sizeof(struct iphdr) + 4;
1352
1353 dev_hold(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001354}
1355
1356
Dmitry Kozlov00959ad2010-08-21 23:05:39 -07001357static const struct gre_protocol ipgre_protocol = {
1358 .handler = ipgre_rcv,
1359 .err_handler = ipgre_err,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001360};
1361
Eric Dumazeteef6dd62009-10-27 07:07:16 +00001362static void ipgre_destroy_tunnels(struct ipgre_net *ign, struct list_head *head)
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -07001363{
1364 int prio;
1365
1366 for (prio = 0; prio < 4; prio++) {
1367 int h;
1368 for (h = 0; h < HASH_SIZE; h++) {
Eric Dumazet15078502010-09-15 11:07:53 +00001369 struct ip_tunnel *t;
1370
1371 t = rtnl_dereference(ign->tunnels[prio][h]);
Eric Dumazeteef6dd62009-10-27 07:07:16 +00001372
1373 while (t != NULL) {
1374 unregister_netdevice_queue(t->dev, head);
Eric Dumazet15078502010-09-15 11:07:53 +00001375 t = rtnl_dereference(t->next);
Eric Dumazeteef6dd62009-10-27 07:07:16 +00001376 }
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -07001377 }
1378 }
1379}
1380
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00001381static int __net_init ipgre_init_net(struct net *net)
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001382{
Eric W. Biedermancfb8fbf2009-11-29 15:46:13 +00001383 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001384 int err;
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001385
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001386 ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0",
1387 ipgre_tunnel_setup);
1388 if (!ign->fb_tunnel_dev) {
1389 err = -ENOMEM;
1390 goto err_alloc_dev;
1391 }
Alexey Dobriyanbe77e592008-11-23 17:26:26 -08001392 dev_net_set(ign->fb_tunnel_dev, net);
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001393
Stephen Hemmingerb8c26a32008-11-20 20:34:29 -08001394 ipgre_fb_tunnel_init(ign->fb_tunnel_dev);
Herbert Xuc19e6542008-10-09 11:59:55 -07001395 ign->fb_tunnel_dev->rtnl_link_ops = &ipgre_link_ops;
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001396
1397 if ((err = register_netdev(ign->fb_tunnel_dev)))
1398 goto err_reg_dev;
1399
Eric Dumazet3285ee32010-10-30 16:21:28 -07001400 rcu_assign_pointer(ign->tunnels_wc[0],
1401 netdev_priv(ign->fb_tunnel_dev));
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001402 return 0;
1403
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001404err_reg_dev:
Eric Dumazet3285ee32010-10-30 16:21:28 -07001405 ipgre_dev_free(ign->fb_tunnel_dev);
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001406err_alloc_dev:
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001407 return err;
1408}
1409
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00001410static void __net_exit ipgre_exit_net(struct net *net)
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001411{
1412 struct ipgre_net *ign;
Eric Dumazeteef6dd62009-10-27 07:07:16 +00001413 LIST_HEAD(list);
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001414
1415 ign = net_generic(net, ipgre_net_id);
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001416 rtnl_lock();
Eric Dumazeteef6dd62009-10-27 07:07:16 +00001417 ipgre_destroy_tunnels(ign, &list);
1418 unregister_netdevice_many(&list);
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001419 rtnl_unlock();
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001420}
1421
1422static struct pernet_operations ipgre_net_ops = {
1423 .init = ipgre_init_net,
1424 .exit = ipgre_exit_net,
Eric W. Biedermancfb8fbf2009-11-29 15:46:13 +00001425 .id = &ipgre_net_id,
1426 .size = sizeof(struct ipgre_net),
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001427};
Linus Torvalds1da177e2005-04-16 15:20:36 -07001428
Herbert Xuc19e6542008-10-09 11:59:55 -07001429static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
1430{
1431 __be16 flags;
1432
1433 if (!data)
1434 return 0;
1435
1436 flags = 0;
1437 if (data[IFLA_GRE_IFLAGS])
1438 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1439 if (data[IFLA_GRE_OFLAGS])
1440 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1441 if (flags & (GRE_VERSION|GRE_ROUTING))
1442 return -EINVAL;
1443
1444 return 0;
1445}
1446
Herbert Xue1a80002008-10-09 12:00:17 -07001447static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
1448{
1449 __be32 daddr;
1450
1451 if (tb[IFLA_ADDRESS]) {
1452 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1453 return -EINVAL;
1454 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1455 return -EADDRNOTAVAIL;
1456 }
1457
1458 if (!data)
1459 goto out;
1460
1461 if (data[IFLA_GRE_REMOTE]) {
1462 memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
1463 if (!daddr)
1464 return -EINVAL;
1465 }
1466
1467out:
1468 return ipgre_tunnel_validate(tb, data);
1469}
1470
Herbert Xuc19e6542008-10-09 11:59:55 -07001471static void ipgre_netlink_parms(struct nlattr *data[],
1472 struct ip_tunnel_parm *parms)
1473{
Herbert Xu7bb82d92008-10-11 12:20:15 -07001474 memset(parms, 0, sizeof(*parms));
Herbert Xuc19e6542008-10-09 11:59:55 -07001475
1476 parms->iph.protocol = IPPROTO_GRE;
1477
1478 if (!data)
1479 return;
1480
1481 if (data[IFLA_GRE_LINK])
1482 parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1483
1484 if (data[IFLA_GRE_IFLAGS])
1485 parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]);
1486
1487 if (data[IFLA_GRE_OFLAGS])
1488 parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]);
1489
1490 if (data[IFLA_GRE_IKEY])
1491 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1492
1493 if (data[IFLA_GRE_OKEY])
1494 parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1495
1496 if (data[IFLA_GRE_LOCAL])
Patrick McHardy4d74f8b2008-10-10 12:11:06 -07001497 parms->iph.saddr = nla_get_be32(data[IFLA_GRE_LOCAL]);
Herbert Xuc19e6542008-10-09 11:59:55 -07001498
1499 if (data[IFLA_GRE_REMOTE])
Patrick McHardy4d74f8b2008-10-10 12:11:06 -07001500 parms->iph.daddr = nla_get_be32(data[IFLA_GRE_REMOTE]);
Herbert Xuc19e6542008-10-09 11:59:55 -07001501
1502 if (data[IFLA_GRE_TTL])
1503 parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
1504
1505 if (data[IFLA_GRE_TOS])
1506 parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
1507
1508 if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC]))
1509 parms->iph.frag_off = htons(IP_DF);
1510}
1511
Herbert Xue1a80002008-10-09 12:00:17 -07001512static int ipgre_tap_init(struct net_device *dev)
1513{
1514 struct ip_tunnel *tunnel;
1515
1516 tunnel = netdev_priv(dev);
1517
1518 tunnel->dev = dev;
1519 strcpy(tunnel->parms.name, dev->name);
1520
1521 ipgre_tunnel_bind_dev(dev);
1522
Eric Dumazete985aad2010-09-27 03:57:11 +00001523 dev->tstats = alloc_percpu(struct pcpu_tstats);
1524 if (!dev->tstats)
1525 return -ENOMEM;
1526
Herbert Xue1a80002008-10-09 12:00:17 -07001527 return 0;
1528}
1529
Stephen Hemmingerb8c26a32008-11-20 20:34:29 -08001530static const struct net_device_ops ipgre_tap_netdev_ops = {
1531 .ndo_init = ipgre_tap_init,
1532 .ndo_uninit = ipgre_tunnel_uninit,
1533 .ndo_start_xmit = ipgre_tunnel_xmit,
1534 .ndo_set_mac_address = eth_mac_addr,
1535 .ndo_validate_addr = eth_validate_addr,
1536 .ndo_change_mtu = ipgre_tunnel_change_mtu,
stephen hemminger87b6d212012-04-12 06:31:16 +00001537 .ndo_get_stats64 = ipgre_get_stats64,
Stephen Hemmingerb8c26a32008-11-20 20:34:29 -08001538};
1539
Herbert Xue1a80002008-10-09 12:00:17 -07001540static void ipgre_tap_setup(struct net_device *dev)
1541{
1542
1543 ether_setup(dev);
1544
Herbert Xu2e9526b2009-10-30 05:51:48 +00001545 dev->netdev_ops = &ipgre_tap_netdev_ops;
Eric Dumazete985aad2010-09-27 03:57:11 +00001546 dev->destructor = ipgre_dev_free;
Herbert Xue1a80002008-10-09 12:00:17 -07001547
1548 dev->iflink = 0;
1549 dev->features |= NETIF_F_NETNS_LOCAL;
1550}
1551
Eric W. Biederman81adee42009-11-08 00:53:51 -08001552static int ipgre_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[],
Herbert Xuc19e6542008-10-09 11:59:55 -07001553 struct nlattr *data[])
1554{
1555 struct ip_tunnel *nt;
1556 struct net *net = dev_net(dev);
1557 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1558 int mtu;
1559 int err;
1560
1561 nt = netdev_priv(dev);
1562 ipgre_netlink_parms(data, &nt->parms);
1563
Herbert Xue1a80002008-10-09 12:00:17 -07001564 if (ipgre_tunnel_find(net, &nt->parms, dev->type))
Herbert Xuc19e6542008-10-09 11:59:55 -07001565 return -EEXIST;
1566
Herbert Xue1a80002008-10-09 12:00:17 -07001567 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
Danny Kukawkaf2cedb62012-02-15 06:45:39 +00001568 eth_hw_addr_random(dev);
Herbert Xue1a80002008-10-09 12:00:17 -07001569
Herbert Xuc19e6542008-10-09 11:59:55 -07001570 mtu = ipgre_tunnel_bind_dev(dev);
1571 if (!tb[IFLA_MTU])
1572 dev->mtu = mtu;
1573
Eric Dumazetb790e012010-09-27 23:05:47 +00001574 /* Can use a lockless transmit, unless we generate output sequences */
1575 if (!(nt->parms.o_flags & GRE_SEQ))
1576 dev->features |= NETIF_F_LLTX;
1577
Herbert Xuc19e6542008-10-09 11:59:55 -07001578 err = register_netdevice(dev);
1579 if (err)
1580 goto out;
1581
1582 dev_hold(dev);
1583 ipgre_tunnel_link(ign, nt);
1584
1585out:
1586 return err;
1587}
1588
1589static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1590 struct nlattr *data[])
1591{
1592 struct ip_tunnel *t, *nt;
1593 struct net *net = dev_net(dev);
1594 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1595 struct ip_tunnel_parm p;
1596 int mtu;
1597
1598 if (dev == ign->fb_tunnel_dev)
1599 return -EINVAL;
1600
1601 nt = netdev_priv(dev);
1602 ipgre_netlink_parms(data, &p);
1603
1604 t = ipgre_tunnel_locate(net, &p, 0);
1605
1606 if (t) {
1607 if (t->dev != dev)
1608 return -EEXIST;
1609 } else {
Herbert Xuc19e6542008-10-09 11:59:55 -07001610 t = nt;
1611
Herbert Xu2e9526b2009-10-30 05:51:48 +00001612 if (dev->type != ARPHRD_ETHER) {
Eric Dumazet15078502010-09-15 11:07:53 +00001613 unsigned int nflags = 0;
Herbert Xuc19e6542008-10-09 11:59:55 -07001614
Herbert Xu2e9526b2009-10-30 05:51:48 +00001615 if (ipv4_is_multicast(p.iph.daddr))
1616 nflags = IFF_BROADCAST;
1617 else if (p.iph.daddr)
1618 nflags = IFF_POINTOPOINT;
1619
1620 if ((dev->flags ^ nflags) &
1621 (IFF_POINTOPOINT | IFF_BROADCAST))
1622 return -EINVAL;
1623 }
Herbert Xuc19e6542008-10-09 11:59:55 -07001624
1625 ipgre_tunnel_unlink(ign, t);
1626 t->parms.iph.saddr = p.iph.saddr;
1627 t->parms.iph.daddr = p.iph.daddr;
1628 t->parms.i_key = p.i_key;
Herbert Xu2e9526b2009-10-30 05:51:48 +00001629 if (dev->type != ARPHRD_ETHER) {
1630 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1631 memcpy(dev->broadcast, &p.iph.daddr, 4);
1632 }
Herbert Xuc19e6542008-10-09 11:59:55 -07001633 ipgre_tunnel_link(ign, t);
1634 netdev_state_change(dev);
1635 }
1636
1637 t->parms.o_key = p.o_key;
1638 t->parms.iph.ttl = p.iph.ttl;
1639 t->parms.iph.tos = p.iph.tos;
1640 t->parms.iph.frag_off = p.iph.frag_off;
1641
1642 if (t->parms.link != p.link) {
1643 t->parms.link = p.link;
1644 mtu = ipgre_tunnel_bind_dev(dev);
1645 if (!tb[IFLA_MTU])
1646 dev->mtu = mtu;
1647 netdev_state_change(dev);
1648 }
1649
1650 return 0;
1651}
1652
1653static size_t ipgre_get_size(const struct net_device *dev)
1654{
1655 return
1656 /* IFLA_GRE_LINK */
1657 nla_total_size(4) +
1658 /* IFLA_GRE_IFLAGS */
1659 nla_total_size(2) +
1660 /* IFLA_GRE_OFLAGS */
1661 nla_total_size(2) +
1662 /* IFLA_GRE_IKEY */
1663 nla_total_size(4) +
1664 /* IFLA_GRE_OKEY */
1665 nla_total_size(4) +
1666 /* IFLA_GRE_LOCAL */
1667 nla_total_size(4) +
1668 /* IFLA_GRE_REMOTE */
1669 nla_total_size(4) +
1670 /* IFLA_GRE_TTL */
1671 nla_total_size(1) +
1672 /* IFLA_GRE_TOS */
1673 nla_total_size(1) +
1674 /* IFLA_GRE_PMTUDISC */
1675 nla_total_size(1) +
1676 0;
1677}
1678
1679static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1680{
1681 struct ip_tunnel *t = netdev_priv(dev);
1682 struct ip_tunnel_parm *p = &t->parms;
1683
David S. Millerf3756b72012-04-01 20:39:02 -04001684 if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
1685 nla_put_be16(skb, IFLA_GRE_IFLAGS, p->i_flags) ||
1686 nla_put_be16(skb, IFLA_GRE_OFLAGS, p->o_flags) ||
1687 nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
1688 nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
1689 nla_put_be32(skb, IFLA_GRE_LOCAL, p->iph.saddr) ||
1690 nla_put_be32(skb, IFLA_GRE_REMOTE, p->iph.daddr) ||
1691 nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) ||
1692 nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) ||
1693 nla_put_u8(skb, IFLA_GRE_PMTUDISC,
1694 !!(p->iph.frag_off & htons(IP_DF))))
1695 goto nla_put_failure;
Herbert Xuc19e6542008-10-09 11:59:55 -07001696 return 0;
1697
1698nla_put_failure:
1699 return -EMSGSIZE;
1700}
1701
1702static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1703 [IFLA_GRE_LINK] = { .type = NLA_U32 },
1704 [IFLA_GRE_IFLAGS] = { .type = NLA_U16 },
1705 [IFLA_GRE_OFLAGS] = { .type = NLA_U16 },
1706 [IFLA_GRE_IKEY] = { .type = NLA_U32 },
1707 [IFLA_GRE_OKEY] = { .type = NLA_U32 },
Patrick McHardy4d74f8b2008-10-10 12:11:06 -07001708 [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
1709 [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
Herbert Xuc19e6542008-10-09 11:59:55 -07001710 [IFLA_GRE_TTL] = { .type = NLA_U8 },
1711 [IFLA_GRE_TOS] = { .type = NLA_U8 },
1712 [IFLA_GRE_PMTUDISC] = { .type = NLA_U8 },
1713};
1714
1715static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1716 .kind = "gre",
1717 .maxtype = IFLA_GRE_MAX,
1718 .policy = ipgre_policy,
1719 .priv_size = sizeof(struct ip_tunnel),
1720 .setup = ipgre_tunnel_setup,
1721 .validate = ipgre_tunnel_validate,
1722 .newlink = ipgre_newlink,
1723 .changelink = ipgre_changelink,
1724 .get_size = ipgre_get_size,
1725 .fill_info = ipgre_fill_info,
1726};
1727
Herbert Xue1a80002008-10-09 12:00:17 -07001728static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1729 .kind = "gretap",
1730 .maxtype = IFLA_GRE_MAX,
1731 .policy = ipgre_policy,
1732 .priv_size = sizeof(struct ip_tunnel),
1733 .setup = ipgre_tap_setup,
1734 .validate = ipgre_tap_validate,
1735 .newlink = ipgre_newlink,
1736 .changelink = ipgre_changelink,
1737 .get_size = ipgre_get_size,
1738 .fill_info = ipgre_fill_info,
1739};
1740
Linus Torvalds1da177e2005-04-16 15:20:36 -07001741/*
1742 * And now the modules code and kernel interface.
1743 */
1744
1745static int __init ipgre_init(void)
1746{
1747 int err;
1748
Joe Perches058bd4d2012-03-11 18:36:11 +00001749 pr_info("GRE over IPv4 tunneling driver\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001750
Eric W. Biedermancfb8fbf2009-11-29 15:46:13 +00001751 err = register_pernet_device(&ipgre_net_ops);
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001752 if (err < 0)
Alexey Dobriyanc2892f02010-02-16 07:57:44 +00001753 return err;
1754
Dmitry Kozlov00959ad2010-08-21 23:05:39 -07001755 err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
Alexey Dobriyanc2892f02010-02-16 07:57:44 +00001756 if (err < 0) {
Joe Perches058bd4d2012-03-11 18:36:11 +00001757 pr_info("%s: can't add protocol\n", __func__);
Alexey Dobriyanc2892f02010-02-16 07:57:44 +00001758 goto add_proto_failed;
1759 }
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001760
Herbert Xuc19e6542008-10-09 11:59:55 -07001761 err = rtnl_link_register(&ipgre_link_ops);
1762 if (err < 0)
1763 goto rtnl_link_failed;
1764
Herbert Xue1a80002008-10-09 12:00:17 -07001765 err = rtnl_link_register(&ipgre_tap_ops);
1766 if (err < 0)
1767 goto tap_ops_failed;
1768
Herbert Xuc19e6542008-10-09 11:59:55 -07001769out:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001770 return err;
Herbert Xuc19e6542008-10-09 11:59:55 -07001771
Herbert Xue1a80002008-10-09 12:00:17 -07001772tap_ops_failed:
1773 rtnl_link_unregister(&ipgre_link_ops);
Herbert Xuc19e6542008-10-09 11:59:55 -07001774rtnl_link_failed:
Dmitry Kozlov00959ad2010-08-21 23:05:39 -07001775 gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
Alexey Dobriyanc2892f02010-02-16 07:57:44 +00001776add_proto_failed:
1777 unregister_pernet_device(&ipgre_net_ops);
Herbert Xuc19e6542008-10-09 11:59:55 -07001778 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001779}
1780
Alexey Kuznetsovdb445752005-07-30 17:46:44 -07001781static void __exit ipgre_fini(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001782{
Herbert Xue1a80002008-10-09 12:00:17 -07001783 rtnl_link_unregister(&ipgre_tap_ops);
Herbert Xuc19e6542008-10-09 11:59:55 -07001784 rtnl_link_unregister(&ipgre_link_ops);
Dmitry Kozlov00959ad2010-08-21 23:05:39 -07001785 if (gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO) < 0)
Joe Perches058bd4d2012-03-11 18:36:11 +00001786 pr_info("%s: can't remove protocol\n", __func__);
Alexey Dobriyanc2892f02010-02-16 07:57:44 +00001787 unregister_pernet_device(&ipgre_net_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001788}
1789
1790module_init(ipgre_init);
1791module_exit(ipgre_fini);
1792MODULE_LICENSE("GPL");
Patrick McHardy4d74f8b2008-10-10 12:11:06 -07001793MODULE_ALIAS_RTNL_LINK("gre");
1794MODULE_ALIAS_RTNL_LINK("gretap");
Vasiliy Kulikov8909c9a2011-03-02 00:33:13 +03001795MODULE_ALIAS_NETDEV("gre0");