blob: f36ce156cac6561da03d032e4f445f772d40b375 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002 * Linux NET3: GRE over IP protocol decoder.
Linus Torvalds1da177e2005-04-16 15:20:36 -07003 *
4 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
Randy Dunlap4fc268d2006-01-11 12:17:47 -080013#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070014#include <linux/module.h>
15#include <linux/types.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070016#include <linux/kernel.h>
17#include <asm/uaccess.h>
18#include <linux/skbuff.h>
19#include <linux/netdevice.h>
20#include <linux/in.h>
21#include <linux/tcp.h>
22#include <linux/udp.h>
23#include <linux/if_arp.h>
24#include <linux/mroute.h>
25#include <linux/init.h>
26#include <linux/in6.h>
27#include <linux/inetdevice.h>
28#include <linux/igmp.h>
29#include <linux/netfilter_ipv4.h>
Herbert Xue1a80002008-10-09 12:00:17 -070030#include <linux/etherdevice.h>
Kris Katterjohn46f25df2006-01-05 16:35:42 -080031#include <linux/if_ether.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070032
33#include <net/sock.h>
34#include <net/ip.h>
35#include <net/icmp.h>
36#include <net/protocol.h>
37#include <net/ipip.h>
38#include <net/arp.h>
39#include <net/checksum.h>
40#include <net/dsfield.h>
41#include <net/inet_ecn.h>
42#include <net/xfrm.h>
Pavel Emelyanov59a4c752008-04-16 01:08:53 -070043#include <net/net_namespace.h>
44#include <net/netns/generic.h>
Herbert Xuc19e6542008-10-09 11:59:55 -070045#include <net/rtnetlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070046
47#ifdef CONFIG_IPV6
48#include <net/ipv6.h>
49#include <net/ip6_fib.h>
50#include <net/ip6_route.h>
51#endif
52
53/*
54 Problems & solutions
55 --------------------
56
57 1. The most important issue is detecting local dead loops.
58 They would cause complete host lockup in transmit, which
59 would be "resolved" by stack overflow or, if queueing is enabled,
60 with infinite looping in net_bh.
61
62 We cannot track such dead loops during route installation,
63 it is infeasible task. The most general solutions would be
64 to keep skb->encapsulation counter (sort of local ttl),
65 and silently drop packet when it expires. It is the best
66 solution, but it supposes maintaing new variable in ALL
67 skb, even if no tunneling is used.
68
Eric Dumazeta43912a2009-09-23 10:28:33 +000069 Current solution: HARD_TX_LOCK lock breaks dead loops.
Linus Torvalds1da177e2005-04-16 15:20:36 -070070
71
72
73 2. Networking dead loops would not kill routers, but would really
74 kill network. IP hop limit plays role of "t->recursion" in this case,
75 if we copy it from packet being encapsulated to upper header.
76 It is very good solution, but it introduces two problems:
77
78 - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
79 do not work over tunnels.
80 - traceroute does not work. I planned to relay ICMP from tunnel,
81 so that this problem would be solved and traceroute output
82 would even more informative. This idea appeared to be wrong:
83 only Linux complies to rfc1812 now (yes, guys, Linux is the only
84 true router now :-)), all routers (at least, in neighbourhood of mine)
85 return only 8 bytes of payload. It is the end.
86
87 Hence, if we want that OSPF worked or traceroute said something reasonable,
88 we should search for another solution.
89
90 One of them is to parse packet trying to detect inner encapsulation
91 made by our node. It is difficult or even impossible, especially,
92 taking into account fragmentation. TO be short, tt is not solution at all.
93
94 Current solution: The solution was UNEXPECTEDLY SIMPLE.
95 We force DF flag on tunnels with preconfigured hop limit,
96 that is ALL. :-) Well, it does not remove the problem completely,
97 but exponential growth of network traffic is changed to linear
98 (branches, that exceed pmtu are pruned) and tunnel mtu
99 fastly degrades to value <68, where looping stops.
100 Yes, it is not good if there exists a router in the loop,
101 which does not force DF, even when encapsulating packets have DF set.
102 But it is not our problem! Nobody could accuse us, we made
103 all that we could make. Even if it is your gated who injected
104 fatal route to network, even if it were you who configured
105 fatal static route: you are innocent. :-)
106
107
108
109 3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
110 practically identical code. It would be good to glue them
111 together, but it is not very evident, how to make them modular.
112 sit is integral part of IPv6, ipip and gre are naturally modular.
113 We could extract common parts (hash table, ioctl etc)
114 to a separate module (ip_tunnel.c).
115
116 Alexey Kuznetsov.
117 */
118
Herbert Xuc19e6542008-10-09 11:59:55 -0700119static struct rtnl_link_ops ipgre_link_ops __read_mostly;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700120static int ipgre_tunnel_init(struct net_device *dev);
121static void ipgre_tunnel_setup(struct net_device *dev);
Herbert Xu42aa9162008-10-09 11:59:32 -0700122static int ipgre_tunnel_bind_dev(struct net_device *dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700123
124/* Fallback tunnel: no source, no destination, no key, no options */
125
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700126#define HASH_SIZE 16
127
Eric Dumazetf99189b2009-11-17 10:42:49 +0000128static int ipgre_net_id __read_mostly;
Pavel Emelyanov59a4c752008-04-16 01:08:53 -0700129struct ipgre_net {
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700130 struct ip_tunnel *tunnels[4][HASH_SIZE];
131
Pavel Emelyanov7daa0002008-04-16 01:10:05 -0700132 struct net_device *fb_tunnel_dev;
Pavel Emelyanov59a4c752008-04-16 01:08:53 -0700133};
134
Linus Torvalds1da177e2005-04-16 15:20:36 -0700135/* Tunnel hash table */
136
137/*
138 4 hash tables:
139
140 3: (remote,local)
141 2: (remote,*)
142 1: (*,local)
143 0: (*,*)
144
145 We require exact key match i.e. if a key is present in packet
146 it will match only tunnel with the same key; if it is not present,
147 it will match only keyless tunnel.
148
149 All keysless packets, if not matched configured keyless tunnels
150 will match fallback tunnel.
151 */
152
Al Virod5a0a1e2006-11-08 00:23:14 -0800153#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700154
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700155#define tunnels_r_l tunnels[3]
156#define tunnels_r tunnels[2]
157#define tunnels_l tunnels[1]
158#define tunnels_wc tunnels[0]
Eric Dumazet8d5b2c02009-10-23 06:14:38 +0000159/*
160 * Locking : hash tables are protected by RCU and a spinlock
161 */
162static DEFINE_SPINLOCK(ipgre_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700163
Eric Dumazet8d5b2c02009-10-23 06:14:38 +0000164#define for_each_ip_tunnel_rcu(start) \
165 for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700166
167/* Given src, dst and key, find appropriate for input tunnel. */
168
Timo Teras749c10f2009-01-19 17:22:12 -0800169static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev,
Herbert Xue1a80002008-10-09 12:00:17 -0700170 __be32 remote, __be32 local,
171 __be32 key, __be16 gre_proto)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700172{
Timo Teras749c10f2009-01-19 17:22:12 -0800173 struct net *net = dev_net(dev);
174 int link = dev->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700175 unsigned h0 = HASH(remote);
176 unsigned h1 = HASH(key);
Timo Terasafcf1242009-01-26 20:56:10 -0800177 struct ip_tunnel *t, *cand = NULL;
Pavel Emelyanov7daa0002008-04-16 01:10:05 -0700178 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
Herbert Xue1a80002008-10-09 12:00:17 -0700179 int dev_type = (gre_proto == htons(ETH_P_TEB)) ?
180 ARPHRD_ETHER : ARPHRD_IPGRE;
Timo Terasafcf1242009-01-26 20:56:10 -0800181 int score, cand_score = 4;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700182
Eric Dumazet8d5b2c02009-10-23 06:14:38 +0000183 for_each_ip_tunnel_rcu(ign->tunnels_r_l[h0 ^ h1]) {
Timo Teras749c10f2009-01-19 17:22:12 -0800184 if (local != t->parms.iph.saddr ||
185 remote != t->parms.iph.daddr ||
186 key != t->parms.i_key ||
187 !(t->dev->flags & IFF_UP))
188 continue;
189
190 if (t->dev->type != ARPHRD_IPGRE &&
191 t->dev->type != dev_type)
192 continue;
193
Timo Terasafcf1242009-01-26 20:56:10 -0800194 score = 0;
Timo Teras749c10f2009-01-19 17:22:12 -0800195 if (t->parms.link != link)
Timo Terasafcf1242009-01-26 20:56:10 -0800196 score |= 1;
Timo Teras749c10f2009-01-19 17:22:12 -0800197 if (t->dev->type != dev_type)
Timo Terasafcf1242009-01-26 20:56:10 -0800198 score |= 2;
199 if (score == 0)
Timo Teras749c10f2009-01-19 17:22:12 -0800200 return t;
Timo Terasafcf1242009-01-26 20:56:10 -0800201
202 if (score < cand_score) {
203 cand = t;
204 cand_score = score;
205 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700206 }
Herbert Xue1a80002008-10-09 12:00:17 -0700207
Eric Dumazet8d5b2c02009-10-23 06:14:38 +0000208 for_each_ip_tunnel_rcu(ign->tunnels_r[h0 ^ h1]) {
Timo Teras749c10f2009-01-19 17:22:12 -0800209 if (remote != t->parms.iph.daddr ||
210 key != t->parms.i_key ||
211 !(t->dev->flags & IFF_UP))
212 continue;
213
214 if (t->dev->type != ARPHRD_IPGRE &&
215 t->dev->type != dev_type)
216 continue;
217
Timo Terasafcf1242009-01-26 20:56:10 -0800218 score = 0;
Timo Teras749c10f2009-01-19 17:22:12 -0800219 if (t->parms.link != link)
Timo Terasafcf1242009-01-26 20:56:10 -0800220 score |= 1;
Timo Teras749c10f2009-01-19 17:22:12 -0800221 if (t->dev->type != dev_type)
Timo Terasafcf1242009-01-26 20:56:10 -0800222 score |= 2;
223 if (score == 0)
Timo Teras749c10f2009-01-19 17:22:12 -0800224 return t;
Timo Terasafcf1242009-01-26 20:56:10 -0800225
226 if (score < cand_score) {
227 cand = t;
228 cand_score = score;
229 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700230 }
Herbert Xue1a80002008-10-09 12:00:17 -0700231
Eric Dumazet8d5b2c02009-10-23 06:14:38 +0000232 for_each_ip_tunnel_rcu(ign->tunnels_l[h1]) {
Timo Teras749c10f2009-01-19 17:22:12 -0800233 if ((local != t->parms.iph.saddr &&
234 (local != t->parms.iph.daddr ||
235 !ipv4_is_multicast(local))) ||
236 key != t->parms.i_key ||
237 !(t->dev->flags & IFF_UP))
238 continue;
239
240 if (t->dev->type != ARPHRD_IPGRE &&
241 t->dev->type != dev_type)
242 continue;
243
Timo Terasafcf1242009-01-26 20:56:10 -0800244 score = 0;
Timo Teras749c10f2009-01-19 17:22:12 -0800245 if (t->parms.link != link)
Timo Terasafcf1242009-01-26 20:56:10 -0800246 score |= 1;
Timo Teras749c10f2009-01-19 17:22:12 -0800247 if (t->dev->type != dev_type)
Timo Terasafcf1242009-01-26 20:56:10 -0800248 score |= 2;
249 if (score == 0)
Timo Teras749c10f2009-01-19 17:22:12 -0800250 return t;
Timo Terasafcf1242009-01-26 20:56:10 -0800251
252 if (score < cand_score) {
253 cand = t;
254 cand_score = score;
255 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700256 }
Herbert Xue1a80002008-10-09 12:00:17 -0700257
Eric Dumazet8d5b2c02009-10-23 06:14:38 +0000258 for_each_ip_tunnel_rcu(ign->tunnels_wc[h1]) {
Timo Teras749c10f2009-01-19 17:22:12 -0800259 if (t->parms.i_key != key ||
260 !(t->dev->flags & IFF_UP))
261 continue;
262
263 if (t->dev->type != ARPHRD_IPGRE &&
264 t->dev->type != dev_type)
265 continue;
266
Timo Terasafcf1242009-01-26 20:56:10 -0800267 score = 0;
Timo Teras749c10f2009-01-19 17:22:12 -0800268 if (t->parms.link != link)
Timo Terasafcf1242009-01-26 20:56:10 -0800269 score |= 1;
Timo Teras749c10f2009-01-19 17:22:12 -0800270 if (t->dev->type != dev_type)
Timo Terasafcf1242009-01-26 20:56:10 -0800271 score |= 2;
272 if (score == 0)
Timo Teras749c10f2009-01-19 17:22:12 -0800273 return t;
Timo Terasafcf1242009-01-26 20:56:10 -0800274
275 if (score < cand_score) {
276 cand = t;
277 cand_score = score;
278 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700279 }
280
Timo Terasafcf1242009-01-26 20:56:10 -0800281 if (cand != NULL)
282 return cand;
Herbert Xue1a80002008-10-09 12:00:17 -0700283
Eric Dumazet8d5b2c02009-10-23 06:14:38 +0000284 dev = ign->fb_tunnel_dev;
285 if (dev->flags & IFF_UP)
286 return netdev_priv(dev);
Timo Teras749c10f2009-01-19 17:22:12 -0800287
Linus Torvalds1da177e2005-04-16 15:20:36 -0700288 return NULL;
289}
290
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700291static struct ip_tunnel **__ipgre_bucket(struct ipgre_net *ign,
292 struct ip_tunnel_parm *parms)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700293{
YOSHIFUJI Hideaki5056a1e2007-04-24 20:44:48 +0900294 __be32 remote = parms->iph.daddr;
295 __be32 local = parms->iph.saddr;
296 __be32 key = parms->i_key;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700297 unsigned h = HASH(key);
298 int prio = 0;
299
300 if (local)
301 prio |= 1;
Joe Perchesf97c1e02007-12-16 13:45:43 -0800302 if (remote && !ipv4_is_multicast(remote)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700303 prio |= 2;
304 h ^= HASH(remote);
305 }
306
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700307 return &ign->tunnels[prio][h];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700308}
309
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700310static inline struct ip_tunnel **ipgre_bucket(struct ipgre_net *ign,
311 struct ip_tunnel *t)
YOSHIFUJI Hideaki5056a1e2007-04-24 20:44:48 +0900312{
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700313 return __ipgre_bucket(ign, &t->parms);
YOSHIFUJI Hideaki5056a1e2007-04-24 20:44:48 +0900314}
315
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700316static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700317{
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700318 struct ip_tunnel **tp = ipgre_bucket(ign, t);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700319
Eric Dumazet8d5b2c02009-10-23 06:14:38 +0000320 spin_lock_bh(&ipgre_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700321 t->next = *tp;
Eric Dumazet8d5b2c02009-10-23 06:14:38 +0000322 rcu_assign_pointer(*tp, t);
323 spin_unlock_bh(&ipgre_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700324}
325
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700326static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700327{
328 struct ip_tunnel **tp;
329
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700330 for (tp = ipgre_bucket(ign, t); *tp; tp = &(*tp)->next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700331 if (t == *tp) {
Eric Dumazet8d5b2c02009-10-23 06:14:38 +0000332 spin_lock_bh(&ipgre_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700333 *tp = t->next;
Eric Dumazet8d5b2c02009-10-23 06:14:38 +0000334 spin_unlock_bh(&ipgre_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700335 break;
336 }
337 }
338}
339
Herbert Xue1a80002008-10-09 12:00:17 -0700340static struct ip_tunnel *ipgre_tunnel_find(struct net *net,
341 struct ip_tunnel_parm *parms,
342 int type)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700343{
Al Virod5a0a1e2006-11-08 00:23:14 -0800344 __be32 remote = parms->iph.daddr;
345 __be32 local = parms->iph.saddr;
346 __be32 key = parms->i_key;
Timo Teras749c10f2009-01-19 17:22:12 -0800347 int link = parms->link;
Herbert Xue1a80002008-10-09 12:00:17 -0700348 struct ip_tunnel *t, **tp;
349 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
350
351 for (tp = __ipgre_bucket(ign, parms); (t = *tp) != NULL; tp = &t->next)
352 if (local == t->parms.iph.saddr &&
353 remote == t->parms.iph.daddr &&
354 key == t->parms.i_key &&
Timo Teras749c10f2009-01-19 17:22:12 -0800355 link == t->parms.link &&
Herbert Xue1a80002008-10-09 12:00:17 -0700356 type == t->dev->type)
357 break;
358
359 return t;
360}
361
362static struct ip_tunnel * ipgre_tunnel_locate(struct net *net,
363 struct ip_tunnel_parm *parms, int create)
364{
365 struct ip_tunnel *t, *nt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700366 struct net_device *dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700367 char name[IFNAMSIZ];
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700368 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700369
Herbert Xue1a80002008-10-09 12:00:17 -0700370 t = ipgre_tunnel_find(net, parms, ARPHRD_IPGRE);
371 if (t || !create)
372 return t;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700373
374 if (parms->name[0])
375 strlcpy(name, parms->name, IFNAMSIZ);
Pavel Emelyanov34cc7ba2008-02-23 20:19:20 -0800376 else
377 sprintf(name, "gre%%d");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700378
379 dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup);
380 if (!dev)
381 return NULL;
382
Pavel Emelyanov0b67ece2008-04-16 01:11:13 -0700383 dev_net_set(dev, net);
384
Pavel Emelyanovb37d428b2008-02-26 23:51:04 -0800385 if (strchr(name, '%')) {
386 if (dev_alloc_name(dev, name) < 0)
387 goto failed_free;
388 }
389
Patrick McHardy2941a482006-01-08 22:05:26 -0800390 nt = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700391 nt->parms = *parms;
Herbert Xuc19e6542008-10-09 11:59:55 -0700392 dev->rtnl_link_ops = &ipgre_link_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700393
Herbert Xu42aa9162008-10-09 11:59:32 -0700394 dev->mtu = ipgre_tunnel_bind_dev(dev);
395
Pavel Emelyanovb37d428b2008-02-26 23:51:04 -0800396 if (register_netdevice(dev) < 0)
397 goto failed_free;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700398
Linus Torvalds1da177e2005-04-16 15:20:36 -0700399 dev_hold(dev);
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700400 ipgre_tunnel_link(ign, nt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700401 return nt;
402
Pavel Emelyanovb37d428b2008-02-26 23:51:04 -0800403failed_free:
404 free_netdev(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700405 return NULL;
406}
407
408static void ipgre_tunnel_uninit(struct net_device *dev)
409{
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700410 struct net *net = dev_net(dev);
411 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
412
413 ipgre_tunnel_unlink(ign, netdev_priv(dev));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700414 dev_put(dev);
415}
416
417
418static void ipgre_err(struct sk_buff *skb, u32 info)
419{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700420
Rami Rosen071f92d2008-05-21 17:47:54 -0700421/* All the routers (except for Linux) return only
Linus Torvalds1da177e2005-04-16 15:20:36 -0700422 8 bytes of packet payload. It means, that precise relaying of
423 ICMP in the real Internet is absolutely infeasible.
424
425 Moreover, Cisco "wise men" put GRE key to the third word
426 in GRE header. It makes impossible maintaining even soft state for keyed
427 GRE tunnels with enabled checksum. Tell them "thank you".
428
429 Well, I wonder, rfc1812 was written by Cisco employee,
430 what the hell these idiots break standrads established
431 by themself???
432 */
433
Jianjun Kong6ed25332008-11-03 00:25:16 -0800434 struct iphdr *iph = (struct iphdr *)skb->data;
Al Virod5a0a1e2006-11-08 00:23:14 -0800435 __be16 *p = (__be16*)(skb->data+(iph->ihl<<2));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700436 int grehlen = (iph->ihl<<2) + 4;
Arnaldo Carvalho de Melo88c76642007-03-13 14:43:18 -0300437 const int type = icmp_hdr(skb)->type;
438 const int code = icmp_hdr(skb)->code;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700439 struct ip_tunnel *t;
Al Virod5a0a1e2006-11-08 00:23:14 -0800440 __be16 flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700441
442 flags = p[0];
443 if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
444 if (flags&(GRE_VERSION|GRE_ROUTING))
445 return;
446 if (flags&GRE_KEY) {
447 grehlen += 4;
448 if (flags&GRE_CSUM)
449 grehlen += 4;
450 }
451 }
452
453 /* If only 8 bytes returned, keyed message will be dropped here */
454 if (skb_headlen(skb) < grehlen)
455 return;
456
457 switch (type) {
458 default:
459 case ICMP_PARAMETERPROB:
460 return;
461
462 case ICMP_DEST_UNREACH:
463 switch (code) {
464 case ICMP_SR_FAILED:
465 case ICMP_PORT_UNREACH:
466 /* Impossible event. */
467 return;
468 case ICMP_FRAG_NEEDED:
469 /* Soft state for pmtu is maintained by IP core. */
470 return;
471 default:
472 /* All others are translated to HOST_UNREACH.
473 rfc2003 contains "deep thoughts" about NET_UNREACH,
474 I believe they are just ether pollution. --ANK
475 */
476 break;
477 }
478 break;
479 case ICMP_TIME_EXCEEDED:
480 if (code != ICMP_EXC_TTL)
481 return;
482 break;
483 }
484
Eric Dumazet8d5b2c02009-10-23 06:14:38 +0000485 rcu_read_lock();
Timo Teras749c10f2009-01-19 17:22:12 -0800486 t = ipgre_tunnel_lookup(skb->dev, iph->daddr, iph->saddr,
Herbert Xue1a80002008-10-09 12:00:17 -0700487 flags & GRE_KEY ?
488 *(((__be32 *)p) + (grehlen / 4) - 1) : 0,
489 p[1]);
Joe Perchesf97c1e02007-12-16 13:45:43 -0800490 if (t == NULL || t->parms.iph.daddr == 0 ||
491 ipv4_is_multicast(t->parms.iph.daddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700492 goto out;
493
494 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
495 goto out;
496
Wei Yongjunda6185d82009-02-24 23:34:48 -0800497 if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700498 t->err_count++;
499 else
500 t->err_count = 1;
501 t->err_time = jiffies;
502out:
Eric Dumazet8d5b2c02009-10-23 06:14:38 +0000503 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700504 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700505}
506
507static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
508{
509 if (INET_ECN_is_ce(iph->tos)) {
510 if (skb->protocol == htons(ETH_P_IP)) {
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700511 IP_ECN_set_ce(ip_hdr(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700512 } else if (skb->protocol == htons(ETH_P_IPV6)) {
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -0700513 IP6_ECN_set_ce(ipv6_hdr(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700514 }
515 }
516}
517
518static inline u8
519ipgre_ecn_encapsulate(u8 tos, struct iphdr *old_iph, struct sk_buff *skb)
520{
521 u8 inner = 0;
522 if (skb->protocol == htons(ETH_P_IP))
523 inner = old_iph->tos;
524 else if (skb->protocol == htons(ETH_P_IPV6))
525 inner = ipv6_get_dsfield((struct ipv6hdr *)old_iph);
526 return INET_ECN_encapsulate(tos, inner);
527}
528
529static int ipgre_rcv(struct sk_buff *skb)
530{
531 struct iphdr *iph;
532 u8 *h;
Al Virod5a0a1e2006-11-08 00:23:14 -0800533 __be16 flags;
Al Virod3bc23e2006-11-14 21:24:49 -0800534 __sum16 csum = 0;
Al Virod5a0a1e2006-11-08 00:23:14 -0800535 __be32 key = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700536 u32 seqno = 0;
537 struct ip_tunnel *tunnel;
538 int offset = 4;
Herbert Xue1a80002008-10-09 12:00:17 -0700539 __be16 gre_proto;
Herbert Xu64194c32008-10-09 12:03:17 -0700540 unsigned int len;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700541
542 if (!pskb_may_pull(skb, 16))
543 goto drop_nolock;
544
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700545 iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700546 h = skb->data;
Al Virod5a0a1e2006-11-08 00:23:14 -0800547 flags = *(__be16*)h;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700548
549 if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
550 /* - Version must be 0.
551 - We do not support routing headers.
552 */
553 if (flags&(GRE_VERSION|GRE_ROUTING))
554 goto drop_nolock;
555
556 if (flags&GRE_CSUM) {
Herbert Xufb286bb2005-11-10 13:01:24 -0800557 switch (skb->ip_summed) {
Patrick McHardy84fa7932006-08-29 16:44:56 -0700558 case CHECKSUM_COMPLETE:
Al Virod3bc23e2006-11-14 21:24:49 -0800559 csum = csum_fold(skb->csum);
Herbert Xufb286bb2005-11-10 13:01:24 -0800560 if (!csum)
561 break;
562 /* fall through */
563 case CHECKSUM_NONE:
564 skb->csum = 0;
565 csum = __skb_checksum_complete(skb);
Patrick McHardy84fa7932006-08-29 16:44:56 -0700566 skb->ip_summed = CHECKSUM_COMPLETE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700567 }
568 offset += 4;
569 }
570 if (flags&GRE_KEY) {
Al Virod5a0a1e2006-11-08 00:23:14 -0800571 key = *(__be32*)(h + offset);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700572 offset += 4;
573 }
574 if (flags&GRE_SEQ) {
Al Virod5a0a1e2006-11-08 00:23:14 -0800575 seqno = ntohl(*(__be32*)(h + offset));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700576 offset += 4;
577 }
578 }
579
Herbert Xue1a80002008-10-09 12:00:17 -0700580 gre_proto = *(__be16 *)(h + 2);
581
Eric Dumazet8d5b2c02009-10-23 06:14:38 +0000582 rcu_read_lock();
Timo Teras749c10f2009-01-19 17:22:12 -0800583 if ((tunnel = ipgre_tunnel_lookup(skb->dev,
Herbert Xue1a80002008-10-09 12:00:17 -0700584 iph->saddr, iph->daddr, key,
585 gre_proto))) {
Pavel Emelyanovaddd68e2008-05-21 14:14:22 -0700586 struct net_device_stats *stats = &tunnel->dev->stats;
587
Linus Torvalds1da177e2005-04-16 15:20:36 -0700588 secpath_reset(skb);
589
Herbert Xue1a80002008-10-09 12:00:17 -0700590 skb->protocol = gre_proto;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700591 /* WCCP version 1 and 2 protocol decoding.
592 * - Change protocol to IP
593 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
594 */
Herbert Xue1a80002008-10-09 12:00:17 -0700595 if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) {
YOSHIFUJI Hideaki496c98d2006-10-10 19:41:21 -0700596 skb->protocol = htons(ETH_P_IP);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900597 if ((*(h + offset) & 0xF0) != 0x40)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700598 offset += 4;
599 }
600
Timo Teras1d069162007-12-20 00:10:33 -0800601 skb->mac_header = skb->network_header;
Arnaldo Carvalho de Melo4209fb62007-03-10 18:42:03 -0300602 __pskb_pull(skb, offset);
Arnaldo Carvalho de Melo9c702202007-04-25 18:04:18 -0700603 skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700604 skb->pkt_type = PACKET_HOST;
605#ifdef CONFIG_NET_IPGRE_BROADCAST
Joe Perchesf97c1e02007-12-16 13:45:43 -0800606 if (ipv4_is_multicast(iph->daddr)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700607 /* Looped back packet, drop it! */
Eric Dumazet511c3f92009-06-02 05:14:27 +0000608 if (skb_rtable(skb)->fl.iif == 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700609 goto drop;
Pavel Emelyanovaddd68e2008-05-21 14:14:22 -0700610 stats->multicast++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700611 skb->pkt_type = PACKET_BROADCAST;
612 }
613#endif
614
615 if (((flags&GRE_CSUM) && csum) ||
616 (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
Pavel Emelyanovaddd68e2008-05-21 14:14:22 -0700617 stats->rx_crc_errors++;
618 stats->rx_errors++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700619 goto drop;
620 }
621 if (tunnel->parms.i_flags&GRE_SEQ) {
622 if (!(flags&GRE_SEQ) ||
623 (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
Pavel Emelyanovaddd68e2008-05-21 14:14:22 -0700624 stats->rx_fifo_errors++;
625 stats->rx_errors++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700626 goto drop;
627 }
628 tunnel->i_seqno = seqno + 1;
629 }
Herbert Xue1a80002008-10-09 12:00:17 -0700630
Herbert Xu64194c32008-10-09 12:03:17 -0700631 len = skb->len;
632
Herbert Xue1a80002008-10-09 12:00:17 -0700633 /* Warning: All skb pointers will be invalidated! */
634 if (tunnel->dev->type == ARPHRD_ETHER) {
635 if (!pskb_may_pull(skb, ETH_HLEN)) {
636 stats->rx_length_errors++;
637 stats->rx_errors++;
638 goto drop;
639 }
640
641 iph = ip_hdr(skb);
642 skb->protocol = eth_type_trans(skb, tunnel->dev);
643 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
644 }
645
Pavel Emelyanovaddd68e2008-05-21 14:14:22 -0700646 stats->rx_packets++;
Herbert Xu64194c32008-10-09 12:03:17 -0700647 stats->rx_bytes += len;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700648 skb->dev = tunnel->dev;
Eric Dumazetadf30902009-06-02 05:19:30 +0000649 skb_dst_drop(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700650 nf_reset(skb);
Herbert Xue1a80002008-10-09 12:00:17 -0700651
652 skb_reset_network_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700653 ipgre_ecn_decapsulate(iph, skb);
Herbert Xue1a80002008-10-09 12:00:17 -0700654
Linus Torvalds1da177e2005-04-16 15:20:36 -0700655 netif_rx(skb);
Eric Dumazet8d5b2c02009-10-23 06:14:38 +0000656 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700657 return(0);
658 }
Herbert Xu45af08b2006-04-05 22:31:19 -0700659 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700660
661drop:
Eric Dumazet8d5b2c02009-10-23 06:14:38 +0000662 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700663drop_nolock:
664 kfree_skb(skb);
665 return(0);
666}
667
Stephen Hemminger6fef4c02009-08-31 19:50:41 +0000668static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700669{
Patrick McHardy2941a482006-01-08 22:05:26 -0800670 struct ip_tunnel *tunnel = netdev_priv(dev);
Eric Dumazet0bfbedb2009-10-05 00:11:22 -0700671 struct net_device_stats *stats = &dev->stats;
672 struct netdev_queue *txq = netdev_get_tx_queue(dev, 0);
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700673 struct iphdr *old_iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700674 struct iphdr *tiph;
675 u8 tos;
Al Virod5a0a1e2006-11-08 00:23:14 -0800676 __be16 df;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700677 struct rtable *rt; /* Route to the other host */
678 struct net_device *tdev; /* Device to other host */
679 struct iphdr *iph; /* Our new IP header */
Chuck Leverc2636b42007-10-23 21:07:32 -0700680 unsigned int max_headroom; /* The extra header space needed */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700681 int gre_hlen;
Al Virod5a0a1e2006-11-08 00:23:14 -0800682 __be32 dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700683 int mtu;
684
Herbert Xue1a80002008-10-09 12:00:17 -0700685 if (dev->type == ARPHRD_ETHER)
686 IPCB(skb)->flags = 0;
687
688 if (dev->header_ops && dev->type == ARPHRD_IPGRE) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700689 gre_hlen = 0;
Jianjun Kong6ed25332008-11-03 00:25:16 -0800690 tiph = (struct iphdr *)skb->data;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700691 } else {
692 gre_hlen = tunnel->hlen;
693 tiph = &tunnel->parms.iph;
694 }
695
696 if ((dst = tiph->daddr) == 0) {
697 /* NBMA tunnel */
698
Eric Dumazetadf30902009-06-02 05:19:30 +0000699 if (skb_dst(skb) == NULL) {
Pavel Emelyanovaddd68e2008-05-21 14:14:22 -0700700 stats->tx_fifo_errors++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700701 goto tx_error;
702 }
703
704 if (skb->protocol == htons(ETH_P_IP)) {
Eric Dumazet511c3f92009-06-02 05:14:27 +0000705 rt = skb_rtable(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700706 if ((dst = rt->rt_gateway) == 0)
707 goto tx_error_icmp;
708 }
709#ifdef CONFIG_IPV6
710 else if (skb->protocol == htons(ETH_P_IPV6)) {
711 struct in6_addr *addr6;
712 int addr_type;
Eric Dumazetadf30902009-06-02 05:19:30 +0000713 struct neighbour *neigh = skb_dst(skb)->neighbour;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700714
715 if (neigh == NULL)
716 goto tx_error;
717
Jianjun Kong6ed25332008-11-03 00:25:16 -0800718 addr6 = (struct in6_addr *)&neigh->primary_key;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700719 addr_type = ipv6_addr_type(addr6);
720
721 if (addr_type == IPV6_ADDR_ANY) {
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -0700722 addr6 = &ipv6_hdr(skb)->daddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700723 addr_type = ipv6_addr_type(addr6);
724 }
725
726 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
727 goto tx_error_icmp;
728
729 dst = addr6->s6_addr32[3];
730 }
731#endif
732 else
733 goto tx_error;
734 }
735
736 tos = tiph->tos;
Andreas Jaggiee686ca2009-07-14 09:35:59 -0700737 if (tos == 1) {
738 tos = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700739 if (skb->protocol == htons(ETH_P_IP))
740 tos = old_iph->tos;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700741 }
742
743 {
744 struct flowi fl = { .oif = tunnel->parms.link,
745 .nl_u = { .ip4_u =
746 { .daddr = dst,
747 .saddr = tiph->saddr,
748 .tos = RT_TOS(tos) } },
749 .proto = IPPROTO_GRE };
Pavel Emelyanov96635522008-04-16 01:10:44 -0700750 if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
Pavel Emelyanovaddd68e2008-05-21 14:14:22 -0700751 stats->tx_carrier_errors++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700752 goto tx_error;
753 }
754 }
755 tdev = rt->u.dst.dev;
756
757 if (tdev == dev) {
758 ip_rt_put(rt);
Pavel Emelyanovaddd68e2008-05-21 14:14:22 -0700759 stats->collisions++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700760 goto tx_error;
761 }
762
763 df = tiph->frag_off;
764 if (df)
Herbert Xuc95b8192008-10-09 11:58:54 -0700765 mtu = dst_mtu(&rt->u.dst) - dev->hard_header_len - tunnel->hlen;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700766 else
Eric Dumazetadf30902009-06-02 05:19:30 +0000767 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700768
Eric Dumazetadf30902009-06-02 05:19:30 +0000769 if (skb_dst(skb))
770 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700771
772 if (skb->protocol == htons(ETH_P_IP)) {
773 df |= (old_iph->frag_off&htons(IP_DF));
774
775 if ((old_iph->frag_off&htons(IP_DF)) &&
776 mtu < ntohs(old_iph->tot_len)) {
777 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
778 ip_rt_put(rt);
779 goto tx_error;
780 }
781 }
782#ifdef CONFIG_IPV6
783 else if (skb->protocol == htons(ETH_P_IPV6)) {
Eric Dumazetadf30902009-06-02 05:19:30 +0000784 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700785
Eric Dumazetadf30902009-06-02 05:19:30 +0000786 if (rt6 && mtu < dst_mtu(skb_dst(skb)) && mtu >= IPV6_MIN_MTU) {
Joe Perchesf97c1e02007-12-16 13:45:43 -0800787 if ((tunnel->parms.iph.daddr &&
788 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
Linus Torvalds1da177e2005-04-16 15:20:36 -0700789 rt6->rt6i_dst.plen == 128) {
790 rt6->rt6i_flags |= RTF_MODIFIED;
Eric Dumazetadf30902009-06-02 05:19:30 +0000791 skb_dst(skb)->metrics[RTAX_MTU-1] = mtu;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700792 }
793 }
794
795 if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) {
796 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
797 ip_rt_put(rt);
798 goto tx_error;
799 }
800 }
801#endif
802
803 if (tunnel->err_count > 0) {
Wei Yongjunda6185d82009-02-24 23:34:48 -0800804 if (time_before(jiffies,
805 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700806 tunnel->err_count--;
807
808 dst_link_failure(skb);
809 } else
810 tunnel->err_count = 0;
811 }
812
813 max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen;
814
Patrick McHardycfbba492007-07-09 15:33:40 -0700815 if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
816 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700817 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
818 if (!new_skb) {
819 ip_rt_put(rt);
Eric Dumazet0bfbedb2009-10-05 00:11:22 -0700820 txq->tx_dropped++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700821 dev_kfree_skb(skb);
Patrick McHardy6ed10652009-06-23 06:03:08 +0000822 return NETDEV_TX_OK;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700823 }
824 if (skb->sk)
825 skb_set_owner_w(new_skb, skb->sk);
826 dev_kfree_skb(skb);
827 skb = new_skb;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700828 old_iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700829 }
830
Herbert Xu64194c32008-10-09 12:03:17 -0700831 skb_reset_transport_header(skb);
Arnaldo Carvalho de Meloe2d1bca2007-04-10 20:46:21 -0700832 skb_push(skb, gre_hlen);
833 skb_reset_network_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700834 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
Patrick McHardy48d5cad2006-02-15 15:10:22 -0800835 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
836 IPSKB_REROUTED);
Eric Dumazetadf30902009-06-02 05:19:30 +0000837 skb_dst_drop(skb);
838 skb_dst_set(skb, &rt->u.dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700839
840 /*
841 * Push down and install the IPIP header.
842 */
843
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700844 iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700845 iph->version = 4;
846 iph->ihl = sizeof(struct iphdr) >> 2;
847 iph->frag_off = df;
848 iph->protocol = IPPROTO_GRE;
849 iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb);
850 iph->daddr = rt->rt_dst;
851 iph->saddr = rt->rt_src;
852
853 if ((iph->ttl = tiph->ttl) == 0) {
854 if (skb->protocol == htons(ETH_P_IP))
855 iph->ttl = old_iph->ttl;
856#ifdef CONFIG_IPV6
857 else if (skb->protocol == htons(ETH_P_IPV6))
Jianjun Kong6ed25332008-11-03 00:25:16 -0800858 iph->ttl = ((struct ipv6hdr *)old_iph)->hop_limit;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700859#endif
860 else
861 iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT);
862 }
863
Herbert Xue1a80002008-10-09 12:00:17 -0700864 ((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags;
865 ((__be16 *)(iph + 1))[1] = (dev->type == ARPHRD_ETHER) ?
866 htons(ETH_P_TEB) : skb->protocol;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700867
868 if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
Al Virod5a0a1e2006-11-08 00:23:14 -0800869 __be32 *ptr = (__be32*)(((u8*)iph) + tunnel->hlen - 4);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700870
871 if (tunnel->parms.o_flags&GRE_SEQ) {
872 ++tunnel->o_seqno;
873 *ptr = htonl(tunnel->o_seqno);
874 ptr--;
875 }
876 if (tunnel->parms.o_flags&GRE_KEY) {
877 *ptr = tunnel->parms.o_key;
878 ptr--;
879 }
880 if (tunnel->parms.o_flags&GRE_CSUM) {
881 *ptr = 0;
Al Viro5f92a732006-11-14 21:36:54 -0800882 *(__sum16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700883 }
884 }
885
886 nf_reset(skb);
887
888 IPTUNNEL_XMIT();
Patrick McHardy6ed10652009-06-23 06:03:08 +0000889 return NETDEV_TX_OK;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700890
891tx_error_icmp:
892 dst_link_failure(skb);
893
894tx_error:
895 stats->tx_errors++;
896 dev_kfree_skb(skb);
Patrick McHardy6ed10652009-06-23 06:03:08 +0000897 return NETDEV_TX_OK;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700898}
899
Herbert Xu42aa9162008-10-09 11:59:32 -0700900static int ipgre_tunnel_bind_dev(struct net_device *dev)
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800901{
902 struct net_device *tdev = NULL;
903 struct ip_tunnel *tunnel;
904 struct iphdr *iph;
905 int hlen = LL_MAX_HEADER;
906 int mtu = ETH_DATA_LEN;
907 int addend = sizeof(struct iphdr) + 4;
908
909 tunnel = netdev_priv(dev);
910 iph = &tunnel->parms.iph;
911
Herbert Xuc95b8192008-10-09 11:58:54 -0700912 /* Guess output device to choose reasonable mtu and needed_headroom */
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800913
914 if (iph->daddr) {
915 struct flowi fl = { .oif = tunnel->parms.link,
916 .nl_u = { .ip4_u =
917 { .daddr = iph->daddr,
918 .saddr = iph->saddr,
919 .tos = RT_TOS(iph->tos) } },
920 .proto = IPPROTO_GRE };
921 struct rtable *rt;
Pavel Emelyanov96635522008-04-16 01:10:44 -0700922 if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800923 tdev = rt->u.dst.dev;
924 ip_rt_put(rt);
925 }
Herbert Xue1a80002008-10-09 12:00:17 -0700926
927 if (dev->type != ARPHRD_ETHER)
928 dev->flags |= IFF_POINTOPOINT;
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800929 }
930
931 if (!tdev && tunnel->parms.link)
Pavel Emelyanov96635522008-04-16 01:10:44 -0700932 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800933
934 if (tdev) {
Herbert Xuc95b8192008-10-09 11:58:54 -0700935 hlen = tdev->hard_header_len + tdev->needed_headroom;
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800936 mtu = tdev->mtu;
937 }
938 dev->iflink = tunnel->parms.link;
939
940 /* Precalculate GRE options length */
941 if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
942 if (tunnel->parms.o_flags&GRE_CSUM)
943 addend += 4;
944 if (tunnel->parms.o_flags&GRE_KEY)
945 addend += 4;
946 if (tunnel->parms.o_flags&GRE_SEQ)
947 addend += 4;
948 }
Herbert Xuc95b8192008-10-09 11:58:54 -0700949 dev->needed_headroom = addend + hlen;
Tom Goff8cdb0452009-08-14 16:33:56 -0700950 mtu -= dev->hard_header_len + addend;
Herbert Xu42aa9162008-10-09 11:59:32 -0700951
952 if (mtu < 68)
953 mtu = 68;
954
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800955 tunnel->hlen = addend;
956
Herbert Xu42aa9162008-10-09 11:59:32 -0700957 return mtu;
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800958}
959
Linus Torvalds1da177e2005-04-16 15:20:36 -0700960static int
961ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
962{
963 int err = 0;
964 struct ip_tunnel_parm p;
965 struct ip_tunnel *t;
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700966 struct net *net = dev_net(dev);
967 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700968
969 switch (cmd) {
970 case SIOCGETTUNNEL:
971 t = NULL;
Pavel Emelyanov7daa0002008-04-16 01:10:05 -0700972 if (dev == ign->fb_tunnel_dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700973 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
974 err = -EFAULT;
975 break;
976 }
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700977 t = ipgre_tunnel_locate(net, &p, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700978 }
979 if (t == NULL)
Patrick McHardy2941a482006-01-08 22:05:26 -0800980 t = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700981 memcpy(&p, &t->parms, sizeof(p));
982 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
983 err = -EFAULT;
984 break;
985
986 case SIOCADDTUNNEL:
987 case SIOCCHGTUNNEL:
988 err = -EPERM;
989 if (!capable(CAP_NET_ADMIN))
990 goto done;
991
992 err = -EFAULT;
993 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
994 goto done;
995
996 err = -EINVAL;
997 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
998 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
999 ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
1000 goto done;
1001 if (p.iph.ttl)
1002 p.iph.frag_off |= htons(IP_DF);
1003
1004 if (!(p.i_flags&GRE_KEY))
1005 p.i_key = 0;
1006 if (!(p.o_flags&GRE_KEY))
1007 p.o_key = 0;
1008
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -07001009 t = ipgre_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001010
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001011 if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001012 if (t != NULL) {
1013 if (t->dev != dev) {
1014 err = -EEXIST;
1015 break;
1016 }
1017 } else {
Jianjun Kong6ed25332008-11-03 00:25:16 -08001018 unsigned nflags = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001019
Patrick McHardy2941a482006-01-08 22:05:26 -08001020 t = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001021
Joe Perchesf97c1e02007-12-16 13:45:43 -08001022 if (ipv4_is_multicast(p.iph.daddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001023 nflags = IFF_BROADCAST;
1024 else if (p.iph.daddr)
1025 nflags = IFF_POINTOPOINT;
1026
1027 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
1028 err = -EINVAL;
1029 break;
1030 }
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -07001031 ipgre_tunnel_unlink(ign, t);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001032 t->parms.iph.saddr = p.iph.saddr;
1033 t->parms.iph.daddr = p.iph.daddr;
1034 t->parms.i_key = p.i_key;
1035 t->parms.o_key = p.o_key;
1036 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1037 memcpy(dev->broadcast, &p.iph.daddr, 4);
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -07001038 ipgre_tunnel_link(ign, t);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001039 netdev_state_change(dev);
1040 }
1041 }
1042
1043 if (t) {
1044 err = 0;
1045 if (cmd == SIOCCHGTUNNEL) {
1046 t->parms.iph.ttl = p.iph.ttl;
1047 t->parms.iph.tos = p.iph.tos;
1048 t->parms.iph.frag_off = p.iph.frag_off;
Michal Schmidtee34c1e2007-12-13 09:46:32 -08001049 if (t->parms.link != p.link) {
1050 t->parms.link = p.link;
Herbert Xu42aa9162008-10-09 11:59:32 -07001051 dev->mtu = ipgre_tunnel_bind_dev(dev);
Michal Schmidtee34c1e2007-12-13 09:46:32 -08001052 netdev_state_change(dev);
1053 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001054 }
1055 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
1056 err = -EFAULT;
1057 } else
1058 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
1059 break;
1060
1061 case SIOCDELTUNNEL:
1062 err = -EPERM;
1063 if (!capable(CAP_NET_ADMIN))
1064 goto done;
1065
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001066 if (dev == ign->fb_tunnel_dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001067 err = -EFAULT;
1068 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1069 goto done;
1070 err = -ENOENT;
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -07001071 if ((t = ipgre_tunnel_locate(net, &p, 0)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001072 goto done;
1073 err = -EPERM;
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001074 if (t == netdev_priv(ign->fb_tunnel_dev))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001075 goto done;
1076 dev = t->dev;
1077 }
Stephen Hemminger22f8cde2007-02-07 00:09:58 -08001078 unregister_netdevice(dev);
1079 err = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001080 break;
1081
1082 default:
1083 err = -EINVAL;
1084 }
1085
1086done:
1087 return err;
1088}
1089
Linus Torvalds1da177e2005-04-16 15:20:36 -07001090static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1091{
Patrick McHardy2941a482006-01-08 22:05:26 -08001092 struct ip_tunnel *tunnel = netdev_priv(dev);
Herbert Xuc95b8192008-10-09 11:58:54 -07001093 if (new_mtu < 68 ||
1094 new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001095 return -EINVAL;
1096 dev->mtu = new_mtu;
1097 return 0;
1098}
1099
Linus Torvalds1da177e2005-04-16 15:20:36 -07001100/* Nice toy. Unfortunately, useless in real life :-)
1101 It allows to construct virtual multiprotocol broadcast "LAN"
1102 over the Internet, provided multicast routing is tuned.
1103
1104
1105 I have no idea was this bicycle invented before me,
1106 so that I had to set ARPHRD_IPGRE to a random value.
1107 I have an impression, that Cisco could make something similar,
1108 but this feature is apparently missing in IOS<=11.2(8).
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001109
Linus Torvalds1da177e2005-04-16 15:20:36 -07001110 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
1111 with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
1112
1113 ping -t 255 224.66.66.66
1114
1115 If nobody answers, mbone does not work.
1116
1117 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
1118 ip addr add 10.66.66.<somewhat>/24 dev Universe
1119 ifconfig Universe up
1120 ifconfig Universe add fe80::<Your_real_addr>/10
1121 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
1122 ftp 10.66.66.66
1123 ...
1124 ftp fec0:6666:6666::193.233.7.65
1125 ...
1126
1127 */
1128
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -07001129static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
1130 unsigned short type,
1131 const void *daddr, const void *saddr, unsigned len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001132{
Patrick McHardy2941a482006-01-08 22:05:26 -08001133 struct ip_tunnel *t = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001134 struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
Al Virod5a0a1e2006-11-08 00:23:14 -08001135 __be16 *p = (__be16*)(iph+1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001136
1137 memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
1138 p[0] = t->parms.o_flags;
1139 p[1] = htons(type);
1140
1141 /*
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001142 * Set the source hardware address.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001143 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001144
Linus Torvalds1da177e2005-04-16 15:20:36 -07001145 if (saddr)
1146 memcpy(&iph->saddr, saddr, 4);
1147
1148 if (daddr) {
1149 memcpy(&iph->daddr, daddr, 4);
1150 return t->hlen;
1151 }
Joe Perchesf97c1e02007-12-16 13:45:43 -08001152 if (iph->daddr && !ipv4_is_multicast(iph->daddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001153 return t->hlen;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001154
Linus Torvalds1da177e2005-04-16 15:20:36 -07001155 return -t->hlen;
1156}
1157
Timo Teras6a5f44d2007-10-23 20:31:53 -07001158static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
1159{
Jianjun Kong6ed25332008-11-03 00:25:16 -08001160 struct iphdr *iph = (struct iphdr *) skb_mac_header(skb);
Timo Teras6a5f44d2007-10-23 20:31:53 -07001161 memcpy(haddr, &iph->saddr, 4);
1162 return 4;
1163}
1164
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -07001165static const struct header_ops ipgre_header_ops = {
1166 .create = ipgre_header,
Timo Teras6a5f44d2007-10-23 20:31:53 -07001167 .parse = ipgre_header_parse,
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -07001168};
1169
Timo Teras6a5f44d2007-10-23 20:31:53 -07001170#ifdef CONFIG_NET_IPGRE_BROADCAST
Linus Torvalds1da177e2005-04-16 15:20:36 -07001171static int ipgre_open(struct net_device *dev)
1172{
Patrick McHardy2941a482006-01-08 22:05:26 -08001173 struct ip_tunnel *t = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001174
Joe Perchesf97c1e02007-12-16 13:45:43 -08001175 if (ipv4_is_multicast(t->parms.iph.daddr)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001176 struct flowi fl = { .oif = t->parms.link,
1177 .nl_u = { .ip4_u =
1178 { .daddr = t->parms.iph.daddr,
1179 .saddr = t->parms.iph.saddr,
1180 .tos = RT_TOS(t->parms.iph.tos) } },
1181 .proto = IPPROTO_GRE };
1182 struct rtable *rt;
Pavel Emelyanov96635522008-04-16 01:10:44 -07001183 if (ip_route_output_key(dev_net(dev), &rt, &fl))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001184 return -EADDRNOTAVAIL;
1185 dev = rt->u.dst.dev;
1186 ip_rt_put(rt);
Herbert Xue5ed6392005-10-03 14:35:55 -07001187 if (__in_dev_get_rtnl(dev) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001188 return -EADDRNOTAVAIL;
1189 t->mlink = dev->ifindex;
Herbert Xue5ed6392005-10-03 14:35:55 -07001190 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001191 }
1192 return 0;
1193}
1194
1195static int ipgre_close(struct net_device *dev)
1196{
Patrick McHardy2941a482006-01-08 22:05:26 -08001197 struct ip_tunnel *t = netdev_priv(dev);
Stephen Hemmingerb8c26a32008-11-20 20:34:29 -08001198
Joe Perchesf97c1e02007-12-16 13:45:43 -08001199 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
Denis V. Lunev7fee0ca2008-01-21 17:32:38 -08001200 struct in_device *in_dev;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001201 in_dev = inetdev_by_index(dev_net(dev), t->mlink);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001202 if (in_dev) {
1203 ip_mc_dec_group(in_dev, t->parms.iph.daddr);
1204 in_dev_put(in_dev);
1205 }
1206 }
1207 return 0;
1208}
1209
1210#endif
1211
Stephen Hemmingerb8c26a32008-11-20 20:34:29 -08001212static const struct net_device_ops ipgre_netdev_ops = {
1213 .ndo_init = ipgre_tunnel_init,
1214 .ndo_uninit = ipgre_tunnel_uninit,
1215#ifdef CONFIG_NET_IPGRE_BROADCAST
1216 .ndo_open = ipgre_open,
1217 .ndo_stop = ipgre_close,
1218#endif
1219 .ndo_start_xmit = ipgre_tunnel_xmit,
1220 .ndo_do_ioctl = ipgre_tunnel_ioctl,
1221 .ndo_change_mtu = ipgre_tunnel_change_mtu,
1222};
1223
Linus Torvalds1da177e2005-04-16 15:20:36 -07001224static void ipgre_tunnel_setup(struct net_device *dev)
1225{
Stephen Hemmingerb8c26a32008-11-20 20:34:29 -08001226 dev->netdev_ops = &ipgre_netdev_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001227 dev->destructor = free_netdev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001228
1229 dev->type = ARPHRD_IPGRE;
Herbert Xuc95b8192008-10-09 11:58:54 -07001230 dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
Kris Katterjohn46f25df2006-01-05 16:35:42 -08001231 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001232 dev->flags = IFF_NOARP;
1233 dev->iflink = 0;
1234 dev->addr_len = 4;
Pavel Emelyanov0b67ece2008-04-16 01:11:13 -07001235 dev->features |= NETIF_F_NETNS_LOCAL;
Eric Dumazet108bfa82009-05-28 22:35:10 +00001236 dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001237}
1238
1239static int ipgre_tunnel_init(struct net_device *dev)
1240{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001241 struct ip_tunnel *tunnel;
1242 struct iphdr *iph;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001243
Patrick McHardy2941a482006-01-08 22:05:26 -08001244 tunnel = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001245 iph = &tunnel->parms.iph;
1246
1247 tunnel->dev = dev;
1248 strcpy(tunnel->parms.name, dev->name);
1249
1250 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
1251 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
1252
Linus Torvalds1da177e2005-04-16 15:20:36 -07001253 if (iph->daddr) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001254#ifdef CONFIG_NET_IPGRE_BROADCAST
Joe Perchesf97c1e02007-12-16 13:45:43 -08001255 if (ipv4_is_multicast(iph->daddr)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001256 if (!iph->saddr)
1257 return -EINVAL;
1258 dev->flags = IFF_BROADCAST;
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -07001259 dev->header_ops = &ipgre_header_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001260 }
1261#endif
Michal Schmidtee34c1e2007-12-13 09:46:32 -08001262 } else
Timo Teras6a5f44d2007-10-23 20:31:53 -07001263 dev->header_ops = &ipgre_header_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001264
Linus Torvalds1da177e2005-04-16 15:20:36 -07001265 return 0;
1266}
1267
Stephen Hemmingerb8c26a32008-11-20 20:34:29 -08001268static void ipgre_fb_tunnel_init(struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001269{
Patrick McHardy2941a482006-01-08 22:05:26 -08001270 struct ip_tunnel *tunnel = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001271 struct iphdr *iph = &tunnel->parms.iph;
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -07001272 struct ipgre_net *ign = net_generic(dev_net(dev), ipgre_net_id);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001273
1274 tunnel->dev = dev;
1275 strcpy(tunnel->parms.name, dev->name);
1276
1277 iph->version = 4;
1278 iph->protocol = IPPROTO_GRE;
1279 iph->ihl = 5;
1280 tunnel->hlen = sizeof(struct iphdr) + 4;
1281
1282 dev_hold(dev);
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -07001283 ign->tunnels_wc[0] = tunnel;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001284}
1285
1286
Alexey Dobriyan32613092009-09-14 12:21:47 +00001287static const struct net_protocol ipgre_protocol = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001288 .handler = ipgre_rcv,
1289 .err_handler = ipgre_err,
Pavel Emelyanovf96c1482008-04-16 01:11:36 -07001290 .netns_ok = 1,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001291};
1292
Eric Dumazeteef6dd62009-10-27 07:07:16 +00001293static void ipgre_destroy_tunnels(struct ipgre_net *ign, struct list_head *head)
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -07001294{
1295 int prio;
1296
1297 for (prio = 0; prio < 4; prio++) {
1298 int h;
1299 for (h = 0; h < HASH_SIZE; h++) {
Eric Dumazeteef6dd62009-10-27 07:07:16 +00001300 struct ip_tunnel *t = ign->tunnels[prio][h];
1301
1302 while (t != NULL) {
1303 unregister_netdevice_queue(t->dev, head);
1304 t = t->next;
1305 }
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -07001306 }
1307 }
1308}
1309
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001310static int ipgre_init_net(struct net *net)
1311{
Eric W. Biedermancfb8fbf2009-11-29 15:46:13 +00001312 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001313 int err;
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001314
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001315 ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0",
1316 ipgre_tunnel_setup);
1317 if (!ign->fb_tunnel_dev) {
1318 err = -ENOMEM;
1319 goto err_alloc_dev;
1320 }
Alexey Dobriyanbe77e592008-11-23 17:26:26 -08001321 dev_net_set(ign->fb_tunnel_dev, net);
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001322
Stephen Hemmingerb8c26a32008-11-20 20:34:29 -08001323 ipgre_fb_tunnel_init(ign->fb_tunnel_dev);
Herbert Xuc19e6542008-10-09 11:59:55 -07001324 ign->fb_tunnel_dev->rtnl_link_ops = &ipgre_link_ops;
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001325
1326 if ((err = register_netdev(ign->fb_tunnel_dev)))
1327 goto err_reg_dev;
1328
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001329 return 0;
1330
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001331err_reg_dev:
1332 free_netdev(ign->fb_tunnel_dev);
1333err_alloc_dev:
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001334 return err;
1335}
1336
1337static void ipgre_exit_net(struct net *net)
1338{
1339 struct ipgre_net *ign;
Eric Dumazeteef6dd62009-10-27 07:07:16 +00001340 LIST_HEAD(list);
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001341
1342 ign = net_generic(net, ipgre_net_id);
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001343 rtnl_lock();
Eric Dumazeteef6dd62009-10-27 07:07:16 +00001344 ipgre_destroy_tunnels(ign, &list);
1345 unregister_netdevice_many(&list);
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001346 rtnl_unlock();
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001347}
1348
1349static struct pernet_operations ipgre_net_ops = {
1350 .init = ipgre_init_net,
1351 .exit = ipgre_exit_net,
Eric W. Biedermancfb8fbf2009-11-29 15:46:13 +00001352 .id = &ipgre_net_id,
1353 .size = sizeof(struct ipgre_net),
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001354};
Linus Torvalds1da177e2005-04-16 15:20:36 -07001355
Herbert Xuc19e6542008-10-09 11:59:55 -07001356static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
1357{
1358 __be16 flags;
1359
1360 if (!data)
1361 return 0;
1362
1363 flags = 0;
1364 if (data[IFLA_GRE_IFLAGS])
1365 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1366 if (data[IFLA_GRE_OFLAGS])
1367 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1368 if (flags & (GRE_VERSION|GRE_ROUTING))
1369 return -EINVAL;
1370
1371 return 0;
1372}
1373
Herbert Xue1a80002008-10-09 12:00:17 -07001374static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
1375{
1376 __be32 daddr;
1377
1378 if (tb[IFLA_ADDRESS]) {
1379 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1380 return -EINVAL;
1381 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1382 return -EADDRNOTAVAIL;
1383 }
1384
1385 if (!data)
1386 goto out;
1387
1388 if (data[IFLA_GRE_REMOTE]) {
1389 memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
1390 if (!daddr)
1391 return -EINVAL;
1392 }
1393
1394out:
1395 return ipgre_tunnel_validate(tb, data);
1396}
1397
Herbert Xuc19e6542008-10-09 11:59:55 -07001398static void ipgre_netlink_parms(struct nlattr *data[],
1399 struct ip_tunnel_parm *parms)
1400{
Herbert Xu7bb82d92008-10-11 12:20:15 -07001401 memset(parms, 0, sizeof(*parms));
Herbert Xuc19e6542008-10-09 11:59:55 -07001402
1403 parms->iph.protocol = IPPROTO_GRE;
1404
1405 if (!data)
1406 return;
1407
1408 if (data[IFLA_GRE_LINK])
1409 parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1410
1411 if (data[IFLA_GRE_IFLAGS])
1412 parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]);
1413
1414 if (data[IFLA_GRE_OFLAGS])
1415 parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]);
1416
1417 if (data[IFLA_GRE_IKEY])
1418 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1419
1420 if (data[IFLA_GRE_OKEY])
1421 parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1422
1423 if (data[IFLA_GRE_LOCAL])
Patrick McHardy4d74f8b2008-10-10 12:11:06 -07001424 parms->iph.saddr = nla_get_be32(data[IFLA_GRE_LOCAL]);
Herbert Xuc19e6542008-10-09 11:59:55 -07001425
1426 if (data[IFLA_GRE_REMOTE])
Patrick McHardy4d74f8b2008-10-10 12:11:06 -07001427 parms->iph.daddr = nla_get_be32(data[IFLA_GRE_REMOTE]);
Herbert Xuc19e6542008-10-09 11:59:55 -07001428
1429 if (data[IFLA_GRE_TTL])
1430 parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
1431
1432 if (data[IFLA_GRE_TOS])
1433 parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
1434
1435 if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC]))
1436 parms->iph.frag_off = htons(IP_DF);
1437}
1438
Herbert Xue1a80002008-10-09 12:00:17 -07001439static int ipgre_tap_init(struct net_device *dev)
1440{
1441 struct ip_tunnel *tunnel;
1442
1443 tunnel = netdev_priv(dev);
1444
1445 tunnel->dev = dev;
1446 strcpy(tunnel->parms.name, dev->name);
1447
1448 ipgre_tunnel_bind_dev(dev);
1449
1450 return 0;
1451}
1452
Stephen Hemmingerb8c26a32008-11-20 20:34:29 -08001453static const struct net_device_ops ipgre_tap_netdev_ops = {
1454 .ndo_init = ipgre_tap_init,
1455 .ndo_uninit = ipgre_tunnel_uninit,
1456 .ndo_start_xmit = ipgre_tunnel_xmit,
1457 .ndo_set_mac_address = eth_mac_addr,
1458 .ndo_validate_addr = eth_validate_addr,
1459 .ndo_change_mtu = ipgre_tunnel_change_mtu,
1460};
1461
Herbert Xue1a80002008-10-09 12:00:17 -07001462static void ipgre_tap_setup(struct net_device *dev)
1463{
1464
1465 ether_setup(dev);
1466
Herbert Xu2e9526b2009-10-30 05:51:48 +00001467 dev->netdev_ops = &ipgre_tap_netdev_ops;
Herbert Xue1a80002008-10-09 12:00:17 -07001468 dev->destructor = free_netdev;
Herbert Xue1a80002008-10-09 12:00:17 -07001469
1470 dev->iflink = 0;
1471 dev->features |= NETIF_F_NETNS_LOCAL;
1472}
1473
Eric W. Biederman81adee42009-11-08 00:53:51 -08001474static int ipgre_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[],
Herbert Xuc19e6542008-10-09 11:59:55 -07001475 struct nlattr *data[])
1476{
1477 struct ip_tunnel *nt;
1478 struct net *net = dev_net(dev);
1479 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1480 int mtu;
1481 int err;
1482
1483 nt = netdev_priv(dev);
1484 ipgre_netlink_parms(data, &nt->parms);
1485
Herbert Xue1a80002008-10-09 12:00:17 -07001486 if (ipgre_tunnel_find(net, &nt->parms, dev->type))
Herbert Xuc19e6542008-10-09 11:59:55 -07001487 return -EEXIST;
1488
Herbert Xue1a80002008-10-09 12:00:17 -07001489 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1490 random_ether_addr(dev->dev_addr);
1491
Herbert Xuc19e6542008-10-09 11:59:55 -07001492 mtu = ipgre_tunnel_bind_dev(dev);
1493 if (!tb[IFLA_MTU])
1494 dev->mtu = mtu;
1495
1496 err = register_netdevice(dev);
1497 if (err)
1498 goto out;
1499
1500 dev_hold(dev);
1501 ipgre_tunnel_link(ign, nt);
1502
1503out:
1504 return err;
1505}
1506
1507static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1508 struct nlattr *data[])
1509{
1510 struct ip_tunnel *t, *nt;
1511 struct net *net = dev_net(dev);
1512 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1513 struct ip_tunnel_parm p;
1514 int mtu;
1515
1516 if (dev == ign->fb_tunnel_dev)
1517 return -EINVAL;
1518
1519 nt = netdev_priv(dev);
1520 ipgre_netlink_parms(data, &p);
1521
1522 t = ipgre_tunnel_locate(net, &p, 0);
1523
1524 if (t) {
1525 if (t->dev != dev)
1526 return -EEXIST;
1527 } else {
Herbert Xuc19e6542008-10-09 11:59:55 -07001528 t = nt;
1529
Herbert Xu2e9526b2009-10-30 05:51:48 +00001530 if (dev->type != ARPHRD_ETHER) {
1531 unsigned nflags = 0;
Herbert Xuc19e6542008-10-09 11:59:55 -07001532
Herbert Xu2e9526b2009-10-30 05:51:48 +00001533 if (ipv4_is_multicast(p.iph.daddr))
1534 nflags = IFF_BROADCAST;
1535 else if (p.iph.daddr)
1536 nflags = IFF_POINTOPOINT;
1537
1538 if ((dev->flags ^ nflags) &
1539 (IFF_POINTOPOINT | IFF_BROADCAST))
1540 return -EINVAL;
1541 }
Herbert Xuc19e6542008-10-09 11:59:55 -07001542
1543 ipgre_tunnel_unlink(ign, t);
1544 t->parms.iph.saddr = p.iph.saddr;
1545 t->parms.iph.daddr = p.iph.daddr;
1546 t->parms.i_key = p.i_key;
Herbert Xu2e9526b2009-10-30 05:51:48 +00001547 if (dev->type != ARPHRD_ETHER) {
1548 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1549 memcpy(dev->broadcast, &p.iph.daddr, 4);
1550 }
Herbert Xuc19e6542008-10-09 11:59:55 -07001551 ipgre_tunnel_link(ign, t);
1552 netdev_state_change(dev);
1553 }
1554
1555 t->parms.o_key = p.o_key;
1556 t->parms.iph.ttl = p.iph.ttl;
1557 t->parms.iph.tos = p.iph.tos;
1558 t->parms.iph.frag_off = p.iph.frag_off;
1559
1560 if (t->parms.link != p.link) {
1561 t->parms.link = p.link;
1562 mtu = ipgre_tunnel_bind_dev(dev);
1563 if (!tb[IFLA_MTU])
1564 dev->mtu = mtu;
1565 netdev_state_change(dev);
1566 }
1567
1568 return 0;
1569}
1570
1571static size_t ipgre_get_size(const struct net_device *dev)
1572{
1573 return
1574 /* IFLA_GRE_LINK */
1575 nla_total_size(4) +
1576 /* IFLA_GRE_IFLAGS */
1577 nla_total_size(2) +
1578 /* IFLA_GRE_OFLAGS */
1579 nla_total_size(2) +
1580 /* IFLA_GRE_IKEY */
1581 nla_total_size(4) +
1582 /* IFLA_GRE_OKEY */
1583 nla_total_size(4) +
1584 /* IFLA_GRE_LOCAL */
1585 nla_total_size(4) +
1586 /* IFLA_GRE_REMOTE */
1587 nla_total_size(4) +
1588 /* IFLA_GRE_TTL */
1589 nla_total_size(1) +
1590 /* IFLA_GRE_TOS */
1591 nla_total_size(1) +
1592 /* IFLA_GRE_PMTUDISC */
1593 nla_total_size(1) +
1594 0;
1595}
1596
1597static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1598{
1599 struct ip_tunnel *t = netdev_priv(dev);
1600 struct ip_tunnel_parm *p = &t->parms;
1601
1602 NLA_PUT_U32(skb, IFLA_GRE_LINK, p->link);
1603 NLA_PUT_BE16(skb, IFLA_GRE_IFLAGS, p->i_flags);
1604 NLA_PUT_BE16(skb, IFLA_GRE_OFLAGS, p->o_flags);
Patrick McHardyba9e64b2008-10-10 12:10:30 -07001605 NLA_PUT_BE32(skb, IFLA_GRE_IKEY, p->i_key);
1606 NLA_PUT_BE32(skb, IFLA_GRE_OKEY, p->o_key);
Patrick McHardy4d74f8b2008-10-10 12:11:06 -07001607 NLA_PUT_BE32(skb, IFLA_GRE_LOCAL, p->iph.saddr);
1608 NLA_PUT_BE32(skb, IFLA_GRE_REMOTE, p->iph.daddr);
Herbert Xuc19e6542008-10-09 11:59:55 -07001609 NLA_PUT_U8(skb, IFLA_GRE_TTL, p->iph.ttl);
1610 NLA_PUT_U8(skb, IFLA_GRE_TOS, p->iph.tos);
1611 NLA_PUT_U8(skb, IFLA_GRE_PMTUDISC, !!(p->iph.frag_off & htons(IP_DF)));
1612
1613 return 0;
1614
1615nla_put_failure:
1616 return -EMSGSIZE;
1617}
1618
1619static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1620 [IFLA_GRE_LINK] = { .type = NLA_U32 },
1621 [IFLA_GRE_IFLAGS] = { .type = NLA_U16 },
1622 [IFLA_GRE_OFLAGS] = { .type = NLA_U16 },
1623 [IFLA_GRE_IKEY] = { .type = NLA_U32 },
1624 [IFLA_GRE_OKEY] = { .type = NLA_U32 },
Patrick McHardy4d74f8b2008-10-10 12:11:06 -07001625 [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
1626 [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
Herbert Xuc19e6542008-10-09 11:59:55 -07001627 [IFLA_GRE_TTL] = { .type = NLA_U8 },
1628 [IFLA_GRE_TOS] = { .type = NLA_U8 },
1629 [IFLA_GRE_PMTUDISC] = { .type = NLA_U8 },
1630};
1631
1632static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1633 .kind = "gre",
1634 .maxtype = IFLA_GRE_MAX,
1635 .policy = ipgre_policy,
1636 .priv_size = sizeof(struct ip_tunnel),
1637 .setup = ipgre_tunnel_setup,
1638 .validate = ipgre_tunnel_validate,
1639 .newlink = ipgre_newlink,
1640 .changelink = ipgre_changelink,
1641 .get_size = ipgre_get_size,
1642 .fill_info = ipgre_fill_info,
1643};
1644
Herbert Xue1a80002008-10-09 12:00:17 -07001645static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1646 .kind = "gretap",
1647 .maxtype = IFLA_GRE_MAX,
1648 .policy = ipgre_policy,
1649 .priv_size = sizeof(struct ip_tunnel),
1650 .setup = ipgre_tap_setup,
1651 .validate = ipgre_tap_validate,
1652 .newlink = ipgre_newlink,
1653 .changelink = ipgre_changelink,
1654 .get_size = ipgre_get_size,
1655 .fill_info = ipgre_fill_info,
1656};
1657
Linus Torvalds1da177e2005-04-16 15:20:36 -07001658/*
1659 * And now the modules code and kernel interface.
1660 */
1661
1662static int __init ipgre_init(void)
1663{
1664 int err;
1665
1666 printk(KERN_INFO "GRE over IPv4 tunneling driver\n");
1667
1668 if (inet_add_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) {
1669 printk(KERN_INFO "ipgre init: can't add protocol\n");
1670 return -EAGAIN;
1671 }
1672
Eric W. Biedermancfb8fbf2009-11-29 15:46:13 +00001673 err = register_pernet_device(&ipgre_net_ops);
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001674 if (err < 0)
Herbert Xuc19e6542008-10-09 11:59:55 -07001675 goto gen_device_failed;
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001676
Herbert Xuc19e6542008-10-09 11:59:55 -07001677 err = rtnl_link_register(&ipgre_link_ops);
1678 if (err < 0)
1679 goto rtnl_link_failed;
1680
Herbert Xue1a80002008-10-09 12:00:17 -07001681 err = rtnl_link_register(&ipgre_tap_ops);
1682 if (err < 0)
1683 goto tap_ops_failed;
1684
Herbert Xuc19e6542008-10-09 11:59:55 -07001685out:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001686 return err;
Herbert Xuc19e6542008-10-09 11:59:55 -07001687
Herbert Xue1a80002008-10-09 12:00:17 -07001688tap_ops_failed:
1689 rtnl_link_unregister(&ipgre_link_ops);
Herbert Xuc19e6542008-10-09 11:59:55 -07001690rtnl_link_failed:
Eric W. Biedermancfb8fbf2009-11-29 15:46:13 +00001691 unregister_pernet_device(&ipgre_net_ops);
Herbert Xuc19e6542008-10-09 11:59:55 -07001692gen_device_failed:
1693 inet_del_protocol(&ipgre_protocol, IPPROTO_GRE);
1694 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001695}
1696
Alexey Kuznetsovdb445752005-07-30 17:46:44 -07001697static void __exit ipgre_fini(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001698{
Herbert Xue1a80002008-10-09 12:00:17 -07001699 rtnl_link_unregister(&ipgre_tap_ops);
Herbert Xuc19e6542008-10-09 11:59:55 -07001700 rtnl_link_unregister(&ipgre_link_ops);
Eric W. Biedermancfb8fbf2009-11-29 15:46:13 +00001701 unregister_pernet_device(&ipgre_net_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001702 if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0)
1703 printk(KERN_INFO "ipgre close: can't remove protocol\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001704}
1705
1706module_init(ipgre_init);
1707module_exit(ipgre_fini);
1708MODULE_LICENSE("GPL");
Patrick McHardy4d74f8b2008-10-10 12:11:06 -07001709MODULE_ALIAS_RTNL_LINK("gre");
1710MODULE_ALIAS_RTNL_LINK("gretap");