blob: 74d4c515772edbd68ab2aaacb64923a72229a9cc [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002 * Linux NET3: GRE over IP protocol decoder.
Linus Torvalds1da177e2005-04-16 15:20:36 -07003 *
4 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
Randy Dunlap4fc268d2006-01-11 12:17:47 -080013#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070014#include <linux/module.h>
15#include <linux/types.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070016#include <linux/kernel.h>
17#include <asm/uaccess.h>
18#include <linux/skbuff.h>
19#include <linux/netdevice.h>
20#include <linux/in.h>
21#include <linux/tcp.h>
22#include <linux/udp.h>
23#include <linux/if_arp.h>
24#include <linux/mroute.h>
25#include <linux/init.h>
26#include <linux/in6.h>
27#include <linux/inetdevice.h>
28#include <linux/igmp.h>
29#include <linux/netfilter_ipv4.h>
Kris Katterjohn46f25df2006-01-05 16:35:42 -080030#include <linux/if_ether.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070031
32#include <net/sock.h>
33#include <net/ip.h>
34#include <net/icmp.h>
35#include <net/protocol.h>
36#include <net/ipip.h>
37#include <net/arp.h>
38#include <net/checksum.h>
39#include <net/dsfield.h>
40#include <net/inet_ecn.h>
41#include <net/xfrm.h>
Pavel Emelyanov59a4c752008-04-16 01:08:53 -070042#include <net/net_namespace.h>
43#include <net/netns/generic.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070044
45#ifdef CONFIG_IPV6
46#include <net/ipv6.h>
47#include <net/ip6_fib.h>
48#include <net/ip6_route.h>
49#endif
50
51/*
52 Problems & solutions
53 --------------------
54
55 1. The most important issue is detecting local dead loops.
56 They would cause complete host lockup in transmit, which
57 would be "resolved" by stack overflow or, if queueing is enabled,
58 with infinite looping in net_bh.
59
60 We cannot track such dead loops during route installation,
61 it is infeasible task. The most general solutions would be
62 to keep skb->encapsulation counter (sort of local ttl),
63 and silently drop packet when it expires. It is the best
64 solution, but it supposes maintaing new variable in ALL
65 skb, even if no tunneling is used.
66
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090067 Current solution: t->recursion lock breaks dead loops. It looks
Linus Torvalds1da177e2005-04-16 15:20:36 -070068 like dev->tbusy flag, but I preferred new variable, because
69 the semantics is different. One day, when hard_start_xmit
70 will be multithreaded we will have to use skb->encapsulation.
71
72
73
74 2. Networking dead loops would not kill routers, but would really
75 kill network. IP hop limit plays role of "t->recursion" in this case,
76 if we copy it from packet being encapsulated to upper header.
77 It is very good solution, but it introduces two problems:
78
79 - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
80 do not work over tunnels.
81 - traceroute does not work. I planned to relay ICMP from tunnel,
82 so that this problem would be solved and traceroute output
83 would even more informative. This idea appeared to be wrong:
84 only Linux complies to rfc1812 now (yes, guys, Linux is the only
85 true router now :-)), all routers (at least, in neighbourhood of mine)
86 return only 8 bytes of payload. It is the end.
87
88 Hence, if we want that OSPF worked or traceroute said something reasonable,
89 we should search for another solution.
90
91 One of them is to parse packet trying to detect inner encapsulation
92 made by our node. It is difficult or even impossible, especially,
93 taking into account fragmentation. TO be short, tt is not solution at all.
94
95 Current solution: The solution was UNEXPECTEDLY SIMPLE.
96 We force DF flag on tunnels with preconfigured hop limit,
97 that is ALL. :-) Well, it does not remove the problem completely,
98 but exponential growth of network traffic is changed to linear
99 (branches, that exceed pmtu are pruned) and tunnel mtu
100 fastly degrades to value <68, where looping stops.
101 Yes, it is not good if there exists a router in the loop,
102 which does not force DF, even when encapsulating packets have DF set.
103 But it is not our problem! Nobody could accuse us, we made
104 all that we could make. Even if it is your gated who injected
105 fatal route to network, even if it were you who configured
106 fatal static route: you are innocent. :-)
107
108
109
110 3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
111 practically identical code. It would be good to glue them
112 together, but it is not very evident, how to make them modular.
113 sit is integral part of IPv6, ipip and gre are naturally modular.
114 We could extract common parts (hash table, ioctl etc)
115 to a separate module (ip_tunnel.c).
116
117 Alexey Kuznetsov.
118 */
119
120static int ipgre_tunnel_init(struct net_device *dev);
121static void ipgre_tunnel_setup(struct net_device *dev);
122
123/* Fallback tunnel: no source, no destination, no key, no options */
124
125static int ipgre_fb_tunnel_init(struct net_device *dev);
126
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700127#define HASH_SIZE 16
128
Pavel Emelyanov59a4c752008-04-16 01:08:53 -0700129static int ipgre_net_id;
130struct ipgre_net {
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700131 struct ip_tunnel *tunnels[4][HASH_SIZE];
132
Pavel Emelyanov7daa0002008-04-16 01:10:05 -0700133 struct net_device *fb_tunnel_dev;
Pavel Emelyanov59a4c752008-04-16 01:08:53 -0700134};
135
Linus Torvalds1da177e2005-04-16 15:20:36 -0700136/* Tunnel hash table */
137
138/*
139 4 hash tables:
140
141 3: (remote,local)
142 2: (remote,*)
143 1: (*,local)
144 0: (*,*)
145
146 We require exact key match i.e. if a key is present in packet
147 it will match only tunnel with the same key; if it is not present,
148 it will match only keyless tunnel.
149
150 All keysless packets, if not matched configured keyless tunnels
151 will match fallback tunnel.
152 */
153
Al Virod5a0a1e2006-11-08 00:23:14 -0800154#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700155
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700156#define tunnels_r_l tunnels[3]
157#define tunnels_r tunnels[2]
158#define tunnels_l tunnels[1]
159#define tunnels_wc tunnels[0]
Linus Torvalds1da177e2005-04-16 15:20:36 -0700160
161static DEFINE_RWLOCK(ipgre_lock);
162
163/* Given src, dst and key, find appropriate for input tunnel. */
164
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700165static struct ip_tunnel * ipgre_tunnel_lookup(struct net *net,
166 __be32 remote, __be32 local, __be32 key)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700167{
168 unsigned h0 = HASH(remote);
169 unsigned h1 = HASH(key);
170 struct ip_tunnel *t;
Pavel Emelyanov7daa0002008-04-16 01:10:05 -0700171 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700172
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700173 for (t = ign->tunnels_r_l[h0^h1]; t; t = t->next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700174 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) {
175 if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
176 return t;
177 }
178 }
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700179 for (t = ign->tunnels_r[h0^h1]; t; t = t->next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700180 if (remote == t->parms.iph.daddr) {
181 if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
182 return t;
183 }
184 }
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700185 for (t = ign->tunnels_l[h1]; t; t = t->next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700186 if (local == t->parms.iph.saddr ||
Joe Perchesf97c1e02007-12-16 13:45:43 -0800187 (local == t->parms.iph.daddr &&
188 ipv4_is_multicast(local))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700189 if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
190 return t;
191 }
192 }
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700193 for (t = ign->tunnels_wc[h1]; t; t = t->next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700194 if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
195 return t;
196 }
197
Pavel Emelyanov7daa0002008-04-16 01:10:05 -0700198 if (ign->fb_tunnel_dev->flags&IFF_UP)
199 return netdev_priv(ign->fb_tunnel_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700200 return NULL;
201}
202
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700203static struct ip_tunnel **__ipgre_bucket(struct ipgre_net *ign,
204 struct ip_tunnel_parm *parms)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700205{
YOSHIFUJI Hideaki5056a1e2007-04-24 20:44:48 +0900206 __be32 remote = parms->iph.daddr;
207 __be32 local = parms->iph.saddr;
208 __be32 key = parms->i_key;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700209 unsigned h = HASH(key);
210 int prio = 0;
211
212 if (local)
213 prio |= 1;
Joe Perchesf97c1e02007-12-16 13:45:43 -0800214 if (remote && !ipv4_is_multicast(remote)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700215 prio |= 2;
216 h ^= HASH(remote);
217 }
218
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700219 return &ign->tunnels[prio][h];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700220}
221
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700222static inline struct ip_tunnel **ipgre_bucket(struct ipgre_net *ign,
223 struct ip_tunnel *t)
YOSHIFUJI Hideaki5056a1e2007-04-24 20:44:48 +0900224{
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700225 return __ipgre_bucket(ign, &t->parms);
YOSHIFUJI Hideaki5056a1e2007-04-24 20:44:48 +0900226}
227
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700228static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700229{
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700230 struct ip_tunnel **tp = ipgre_bucket(ign, t);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700231
232 t->next = *tp;
233 write_lock_bh(&ipgre_lock);
234 *tp = t;
235 write_unlock_bh(&ipgre_lock);
236}
237
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700238static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700239{
240 struct ip_tunnel **tp;
241
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700242 for (tp = ipgre_bucket(ign, t); *tp; tp = &(*tp)->next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700243 if (t == *tp) {
244 write_lock_bh(&ipgre_lock);
245 *tp = t->next;
246 write_unlock_bh(&ipgre_lock);
247 break;
248 }
249 }
250}
251
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700252static struct ip_tunnel * ipgre_tunnel_locate(struct net *net,
253 struct ip_tunnel_parm *parms, int create)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700254{
Al Virod5a0a1e2006-11-08 00:23:14 -0800255 __be32 remote = parms->iph.daddr;
256 __be32 local = parms->iph.saddr;
257 __be32 key = parms->i_key;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700258 struct ip_tunnel *t, **tp, *nt;
259 struct net_device *dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700260 char name[IFNAMSIZ];
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700261 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700262
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700263 for (tp = __ipgre_bucket(ign, parms); (t = *tp) != NULL; tp = &t->next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700264 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) {
265 if (key == t->parms.i_key)
266 return t;
267 }
268 }
269 if (!create)
270 return NULL;
271
272 if (parms->name[0])
273 strlcpy(name, parms->name, IFNAMSIZ);
Pavel Emelyanov34cc7ba2008-02-23 20:19:20 -0800274 else
275 sprintf(name, "gre%%d");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700276
277 dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup);
278 if (!dev)
279 return NULL;
280
Pavel Emelyanovb37d428b2008-02-26 23:51:04 -0800281 if (strchr(name, '%')) {
282 if (dev_alloc_name(dev, name) < 0)
283 goto failed_free;
284 }
285
Linus Torvalds1da177e2005-04-16 15:20:36 -0700286 dev->init = ipgre_tunnel_init;
Patrick McHardy2941a482006-01-08 22:05:26 -0800287 nt = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700288 nt->parms = *parms;
289
Pavel Emelyanovb37d428b2008-02-26 23:51:04 -0800290 if (register_netdevice(dev) < 0)
291 goto failed_free;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700292
Linus Torvalds1da177e2005-04-16 15:20:36 -0700293 dev_hold(dev);
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700294 ipgre_tunnel_link(ign, nt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700295 return nt;
296
Pavel Emelyanovb37d428b2008-02-26 23:51:04 -0800297failed_free:
298 free_netdev(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700299 return NULL;
300}
301
302static void ipgre_tunnel_uninit(struct net_device *dev)
303{
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700304 struct net *net = dev_net(dev);
305 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
306
307 ipgre_tunnel_unlink(ign, netdev_priv(dev));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700308 dev_put(dev);
309}
310
311
312static void ipgre_err(struct sk_buff *skb, u32 info)
313{
314#ifndef I_WISH_WORLD_WERE_PERFECT
315
316/* It is not :-( All the routers (except for Linux) return only
317 8 bytes of packet payload. It means, that precise relaying of
318 ICMP in the real Internet is absolutely infeasible.
319
320 Moreover, Cisco "wise men" put GRE key to the third word
321 in GRE header. It makes impossible maintaining even soft state for keyed
322 GRE tunnels with enabled checksum. Tell them "thank you".
323
324 Well, I wonder, rfc1812 was written by Cisco employee,
325 what the hell these idiots break standrads established
326 by themself???
327 */
328
329 struct iphdr *iph = (struct iphdr*)skb->data;
Al Virod5a0a1e2006-11-08 00:23:14 -0800330 __be16 *p = (__be16*)(skb->data+(iph->ihl<<2));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700331 int grehlen = (iph->ihl<<2) + 4;
Arnaldo Carvalho de Melo88c76642007-03-13 14:43:18 -0300332 const int type = icmp_hdr(skb)->type;
333 const int code = icmp_hdr(skb)->code;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700334 struct ip_tunnel *t;
Al Virod5a0a1e2006-11-08 00:23:14 -0800335 __be16 flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700336
337 flags = p[0];
338 if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
339 if (flags&(GRE_VERSION|GRE_ROUTING))
340 return;
341 if (flags&GRE_KEY) {
342 grehlen += 4;
343 if (flags&GRE_CSUM)
344 grehlen += 4;
345 }
346 }
347
348 /* If only 8 bytes returned, keyed message will be dropped here */
349 if (skb_headlen(skb) < grehlen)
350 return;
351
352 switch (type) {
353 default:
354 case ICMP_PARAMETERPROB:
355 return;
356
357 case ICMP_DEST_UNREACH:
358 switch (code) {
359 case ICMP_SR_FAILED:
360 case ICMP_PORT_UNREACH:
361 /* Impossible event. */
362 return;
363 case ICMP_FRAG_NEEDED:
364 /* Soft state for pmtu is maintained by IP core. */
365 return;
366 default:
367 /* All others are translated to HOST_UNREACH.
368 rfc2003 contains "deep thoughts" about NET_UNREACH,
369 I believe they are just ether pollution. --ANK
370 */
371 break;
372 }
373 break;
374 case ICMP_TIME_EXCEEDED:
375 if (code != ICMP_EXC_TTL)
376 return;
377 break;
378 }
379
380 read_lock(&ipgre_lock);
Pavel Emelyanov3b4667f2008-04-16 01:09:44 -0700381 t = ipgre_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr,
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700382 (flags&GRE_KEY) ?
383 *(((__be32*)p) + (grehlen>>2) - 1) : 0);
Joe Perchesf97c1e02007-12-16 13:45:43 -0800384 if (t == NULL || t->parms.iph.daddr == 0 ||
385 ipv4_is_multicast(t->parms.iph.daddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700386 goto out;
387
388 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
389 goto out;
390
391 if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
392 t->err_count++;
393 else
394 t->err_count = 1;
395 t->err_time = jiffies;
396out:
397 read_unlock(&ipgre_lock);
398 return;
399#else
400 struct iphdr *iph = (struct iphdr*)dp;
401 struct iphdr *eiph;
Al Virod5a0a1e2006-11-08 00:23:14 -0800402 __be16 *p = (__be16*)(dp+(iph->ihl<<2));
Arnaldo Carvalho de Melo88c76642007-03-13 14:43:18 -0300403 const int type = icmp_hdr(skb)->type;
404 const int code = icmp_hdr(skb)->code;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700405 int rel_type = 0;
406 int rel_code = 0;
Al Viroc55e2f42006-09-19 13:23:19 -0700407 __be32 rel_info = 0;
408 __u32 n = 0;
Al Virod5a0a1e2006-11-08 00:23:14 -0800409 __be16 flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700410 int grehlen = (iph->ihl<<2) + 4;
411 struct sk_buff *skb2;
412 struct flowi fl;
413 struct rtable *rt;
414
415 if (p[1] != htons(ETH_P_IP))
416 return;
417
418 flags = p[0];
419 if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
420 if (flags&(GRE_VERSION|GRE_ROUTING))
421 return;
422 if (flags&GRE_CSUM)
423 grehlen += 4;
424 if (flags&GRE_KEY)
425 grehlen += 4;
426 if (flags&GRE_SEQ)
427 grehlen += 4;
428 }
429 if (len < grehlen + sizeof(struct iphdr))
430 return;
431 eiph = (struct iphdr*)(dp + grehlen);
432
433 switch (type) {
434 default:
435 return;
436 case ICMP_PARAMETERPROB:
Arnaldo Carvalho de Melo88c76642007-03-13 14:43:18 -0300437 n = ntohl(icmp_hdr(skb)->un.gateway) >> 24;
Al Viroc55e2f42006-09-19 13:23:19 -0700438 if (n < (iph->ihl<<2))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700439 return;
440
441 /* So... This guy found something strange INSIDE encapsulated
442 packet. Well, he is fool, but what can we do ?
443 */
444 rel_type = ICMP_PARAMETERPROB;
Al Viroc55e2f42006-09-19 13:23:19 -0700445 n -= grehlen;
446 rel_info = htonl(n << 24);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700447 break;
448
449 case ICMP_DEST_UNREACH:
450 switch (code) {
451 case ICMP_SR_FAILED:
452 case ICMP_PORT_UNREACH:
453 /* Impossible event. */
454 return;
455 case ICMP_FRAG_NEEDED:
456 /* And it is the only really necessary thing :-) */
Arnaldo Carvalho de Melo88c76642007-03-13 14:43:18 -0300457 n = ntohs(icmp_hdr(skb)->un.frag.mtu);
Al Viroc55e2f42006-09-19 13:23:19 -0700458 if (n < grehlen+68)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700459 return;
Al Viroc55e2f42006-09-19 13:23:19 -0700460 n -= grehlen;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700461 /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
Al Viroc55e2f42006-09-19 13:23:19 -0700462 if (n > ntohs(eiph->tot_len))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700463 return;
Al Viroc55e2f42006-09-19 13:23:19 -0700464 rel_info = htonl(n);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700465 break;
466 default:
467 /* All others are translated to HOST_UNREACH.
468 rfc2003 contains "deep thoughts" about NET_UNREACH,
469 I believe, it is just ether pollution. --ANK
470 */
471 rel_type = ICMP_DEST_UNREACH;
472 rel_code = ICMP_HOST_UNREACH;
473 break;
474 }
475 break;
476 case ICMP_TIME_EXCEEDED:
477 if (code != ICMP_EXC_TTL)
478 return;
479 break;
480 }
481
482 /* Prepare fake skb to feed it to icmp_send */
483 skb2 = skb_clone(skb, GFP_ATOMIC);
484 if (skb2 == NULL)
485 return;
486 dst_release(skb2->dst);
487 skb2->dst = NULL;
488 skb_pull(skb2, skb->data - (u8*)eiph);
Arnaldo Carvalho de Meloc1d2bbe2007-04-10 20:45:18 -0700489 skb_reset_network_header(skb2);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700490
491 /* Try to guess incoming interface */
492 memset(&fl, 0, sizeof(fl));
493 fl.fl4_dst = eiph->saddr;
494 fl.fl4_tos = RT_TOS(eiph->tos);
495 fl.proto = IPPROTO_GRE;
Denis V. Lunevf2063512008-01-22 22:07:34 -0800496 if (ip_route_output_key(&init_net, &rt, &fl)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700497 kfree_skb(skb2);
498 return;
499 }
500 skb2->dev = rt->u.dst.dev;
501
502 /* route "incoming" packet */
503 if (rt->rt_flags&RTCF_LOCAL) {
504 ip_rt_put(rt);
505 rt = NULL;
506 fl.fl4_dst = eiph->daddr;
507 fl.fl4_src = eiph->saddr;
508 fl.fl4_tos = eiph->tos;
Denis V. Lunevf2063512008-01-22 22:07:34 -0800509 if (ip_route_output_key(&init_net, &rt, &fl) ||
Linus Torvalds1da177e2005-04-16 15:20:36 -0700510 rt->u.dst.dev->type != ARPHRD_IPGRE) {
511 ip_rt_put(rt);
512 kfree_skb(skb2);
513 return;
514 }
515 } else {
516 ip_rt_put(rt);
517 if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) ||
518 skb2->dst->dev->type != ARPHRD_IPGRE) {
519 kfree_skb(skb2);
520 return;
521 }
522 }
523
524 /* change mtu on this route */
525 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
Al Viroc55e2f42006-09-19 13:23:19 -0700526 if (n > dst_mtu(skb2->dst)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700527 kfree_skb(skb2);
528 return;
529 }
Al Viroc55e2f42006-09-19 13:23:19 -0700530 skb2->dst->ops->update_pmtu(skb2->dst, n);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700531 } else if (type == ICMP_TIME_EXCEEDED) {
Patrick McHardy2941a482006-01-08 22:05:26 -0800532 struct ip_tunnel *t = netdev_priv(skb2->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700533 if (t->parms.iph.ttl) {
534 rel_type = ICMP_DEST_UNREACH;
535 rel_code = ICMP_HOST_UNREACH;
536 }
537 }
538
539 icmp_send(skb2, rel_type, rel_code, rel_info);
540 kfree_skb(skb2);
541#endif
542}
543
544static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
545{
546 if (INET_ECN_is_ce(iph->tos)) {
547 if (skb->protocol == htons(ETH_P_IP)) {
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700548 IP_ECN_set_ce(ip_hdr(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700549 } else if (skb->protocol == htons(ETH_P_IPV6)) {
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -0700550 IP6_ECN_set_ce(ipv6_hdr(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700551 }
552 }
553}
554
555static inline u8
556ipgre_ecn_encapsulate(u8 tos, struct iphdr *old_iph, struct sk_buff *skb)
557{
558 u8 inner = 0;
559 if (skb->protocol == htons(ETH_P_IP))
560 inner = old_iph->tos;
561 else if (skb->protocol == htons(ETH_P_IPV6))
562 inner = ipv6_get_dsfield((struct ipv6hdr *)old_iph);
563 return INET_ECN_encapsulate(tos, inner);
564}
565
566static int ipgre_rcv(struct sk_buff *skb)
567{
568 struct iphdr *iph;
569 u8 *h;
Al Virod5a0a1e2006-11-08 00:23:14 -0800570 __be16 flags;
Al Virod3bc23e2006-11-14 21:24:49 -0800571 __sum16 csum = 0;
Al Virod5a0a1e2006-11-08 00:23:14 -0800572 __be32 key = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700573 u32 seqno = 0;
574 struct ip_tunnel *tunnel;
575 int offset = 4;
576
577 if (!pskb_may_pull(skb, 16))
578 goto drop_nolock;
579
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700580 iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700581 h = skb->data;
Al Virod5a0a1e2006-11-08 00:23:14 -0800582 flags = *(__be16*)h;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700583
584 if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
585 /* - Version must be 0.
586 - We do not support routing headers.
587 */
588 if (flags&(GRE_VERSION|GRE_ROUTING))
589 goto drop_nolock;
590
591 if (flags&GRE_CSUM) {
Herbert Xufb286bb2005-11-10 13:01:24 -0800592 switch (skb->ip_summed) {
Patrick McHardy84fa7932006-08-29 16:44:56 -0700593 case CHECKSUM_COMPLETE:
Al Virod3bc23e2006-11-14 21:24:49 -0800594 csum = csum_fold(skb->csum);
Herbert Xufb286bb2005-11-10 13:01:24 -0800595 if (!csum)
596 break;
597 /* fall through */
598 case CHECKSUM_NONE:
599 skb->csum = 0;
600 csum = __skb_checksum_complete(skb);
Patrick McHardy84fa7932006-08-29 16:44:56 -0700601 skb->ip_summed = CHECKSUM_COMPLETE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700602 }
603 offset += 4;
604 }
605 if (flags&GRE_KEY) {
Al Virod5a0a1e2006-11-08 00:23:14 -0800606 key = *(__be32*)(h + offset);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700607 offset += 4;
608 }
609 if (flags&GRE_SEQ) {
Al Virod5a0a1e2006-11-08 00:23:14 -0800610 seqno = ntohl(*(__be32*)(h + offset));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700611 offset += 4;
612 }
613 }
614
615 read_lock(&ipgre_lock);
Pavel Emelyanov3b4667f2008-04-16 01:09:44 -0700616 if ((tunnel = ipgre_tunnel_lookup(dev_net(skb->dev),
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700617 iph->saddr, iph->daddr, key)) != NULL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700618 secpath_reset(skb);
619
Al Virod5a0a1e2006-11-08 00:23:14 -0800620 skb->protocol = *(__be16*)(h + 2);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700621 /* WCCP version 1 and 2 protocol decoding.
622 * - Change protocol to IP
623 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
624 */
625 if (flags == 0 &&
YOSHIFUJI Hideaki496c98d2006-10-10 19:41:21 -0700626 skb->protocol == htons(ETH_P_WCCP)) {
627 skb->protocol = htons(ETH_P_IP);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900628 if ((*(h + offset) & 0xF0) != 0x40)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700629 offset += 4;
630 }
631
Timo Teras1d069162007-12-20 00:10:33 -0800632 skb->mac_header = skb->network_header;
Arnaldo Carvalho de Melo4209fb62007-03-10 18:42:03 -0300633 __pskb_pull(skb, offset);
634 skb_reset_network_header(skb);
Arnaldo Carvalho de Melo9c702202007-04-25 18:04:18 -0700635 skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700636 skb->pkt_type = PACKET_HOST;
637#ifdef CONFIG_NET_IPGRE_BROADCAST
Joe Perchesf97c1e02007-12-16 13:45:43 -0800638 if (ipv4_is_multicast(iph->daddr)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700639 /* Looped back packet, drop it! */
Eric Dumazetee6b9672008-03-05 18:30:47 -0800640 if (skb->rtable->fl.iif == 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700641 goto drop;
642 tunnel->stat.multicast++;
643 skb->pkt_type = PACKET_BROADCAST;
644 }
645#endif
646
647 if (((flags&GRE_CSUM) && csum) ||
648 (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
649 tunnel->stat.rx_crc_errors++;
650 tunnel->stat.rx_errors++;
651 goto drop;
652 }
653 if (tunnel->parms.i_flags&GRE_SEQ) {
654 if (!(flags&GRE_SEQ) ||
655 (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
656 tunnel->stat.rx_fifo_errors++;
657 tunnel->stat.rx_errors++;
658 goto drop;
659 }
660 tunnel->i_seqno = seqno + 1;
661 }
662 tunnel->stat.rx_packets++;
663 tunnel->stat.rx_bytes += skb->len;
664 skb->dev = tunnel->dev;
665 dst_release(skb->dst);
666 skb->dst = NULL;
667 nf_reset(skb);
668 ipgre_ecn_decapsulate(iph, skb);
669 netif_rx(skb);
670 read_unlock(&ipgre_lock);
671 return(0);
672 }
Herbert Xu45af08b2006-04-05 22:31:19 -0700673 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700674
675drop:
676 read_unlock(&ipgre_lock);
677drop_nolock:
678 kfree_skb(skb);
679 return(0);
680}
681
682static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
683{
Patrick McHardy2941a482006-01-08 22:05:26 -0800684 struct ip_tunnel *tunnel = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700685 struct net_device_stats *stats = &tunnel->stat;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700686 struct iphdr *old_iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700687 struct iphdr *tiph;
688 u8 tos;
Al Virod5a0a1e2006-11-08 00:23:14 -0800689 __be16 df;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700690 struct rtable *rt; /* Route to the other host */
691 struct net_device *tdev; /* Device to other host */
692 struct iphdr *iph; /* Our new IP header */
Chuck Leverc2636b42007-10-23 21:07:32 -0700693 unsigned int max_headroom; /* The extra header space needed */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700694 int gre_hlen;
Al Virod5a0a1e2006-11-08 00:23:14 -0800695 __be32 dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700696 int mtu;
697
698 if (tunnel->recursion++) {
699 tunnel->stat.collisions++;
700 goto tx_error;
701 }
702
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -0700703 if (dev->header_ops) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700704 gre_hlen = 0;
705 tiph = (struct iphdr*)skb->data;
706 } else {
707 gre_hlen = tunnel->hlen;
708 tiph = &tunnel->parms.iph;
709 }
710
711 if ((dst = tiph->daddr) == 0) {
712 /* NBMA tunnel */
713
714 if (skb->dst == NULL) {
715 tunnel->stat.tx_fifo_errors++;
716 goto tx_error;
717 }
718
719 if (skb->protocol == htons(ETH_P_IP)) {
Eric Dumazetee6b9672008-03-05 18:30:47 -0800720 rt = skb->rtable;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700721 if ((dst = rt->rt_gateway) == 0)
722 goto tx_error_icmp;
723 }
724#ifdef CONFIG_IPV6
725 else if (skb->protocol == htons(ETH_P_IPV6)) {
726 struct in6_addr *addr6;
727 int addr_type;
728 struct neighbour *neigh = skb->dst->neighbour;
729
730 if (neigh == NULL)
731 goto tx_error;
732
733 addr6 = (struct in6_addr*)&neigh->primary_key;
734 addr_type = ipv6_addr_type(addr6);
735
736 if (addr_type == IPV6_ADDR_ANY) {
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -0700737 addr6 = &ipv6_hdr(skb)->daddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700738 addr_type = ipv6_addr_type(addr6);
739 }
740
741 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
742 goto tx_error_icmp;
743
744 dst = addr6->s6_addr32[3];
745 }
746#endif
747 else
748 goto tx_error;
749 }
750
751 tos = tiph->tos;
752 if (tos&1) {
753 if (skb->protocol == htons(ETH_P_IP))
754 tos = old_iph->tos;
755 tos &= ~1;
756 }
757
758 {
759 struct flowi fl = { .oif = tunnel->parms.link,
760 .nl_u = { .ip4_u =
761 { .daddr = dst,
762 .saddr = tiph->saddr,
763 .tos = RT_TOS(tos) } },
764 .proto = IPPROTO_GRE };
Denis V. Lunevf2063512008-01-22 22:07:34 -0800765 if (ip_route_output_key(&init_net, &rt, &fl)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700766 tunnel->stat.tx_carrier_errors++;
767 goto tx_error;
768 }
769 }
770 tdev = rt->u.dst.dev;
771
772 if (tdev == dev) {
773 ip_rt_put(rt);
774 tunnel->stat.collisions++;
775 goto tx_error;
776 }
777
778 df = tiph->frag_off;
779 if (df)
780 mtu = dst_mtu(&rt->u.dst) - tunnel->hlen;
781 else
782 mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
783
784 if (skb->dst)
785 skb->dst->ops->update_pmtu(skb->dst, mtu);
786
787 if (skb->protocol == htons(ETH_P_IP)) {
788 df |= (old_iph->frag_off&htons(IP_DF));
789
790 if ((old_iph->frag_off&htons(IP_DF)) &&
791 mtu < ntohs(old_iph->tot_len)) {
792 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
793 ip_rt_put(rt);
794 goto tx_error;
795 }
796 }
797#ifdef CONFIG_IPV6
798 else if (skb->protocol == htons(ETH_P_IPV6)) {
799 struct rt6_info *rt6 = (struct rt6_info*)skb->dst;
800
801 if (rt6 && mtu < dst_mtu(skb->dst) && mtu >= IPV6_MIN_MTU) {
Joe Perchesf97c1e02007-12-16 13:45:43 -0800802 if ((tunnel->parms.iph.daddr &&
803 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
Linus Torvalds1da177e2005-04-16 15:20:36 -0700804 rt6->rt6i_dst.plen == 128) {
805 rt6->rt6i_flags |= RTF_MODIFIED;
806 skb->dst->metrics[RTAX_MTU-1] = mtu;
807 }
808 }
809
810 if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) {
811 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
812 ip_rt_put(rt);
813 goto tx_error;
814 }
815 }
816#endif
817
818 if (tunnel->err_count > 0) {
819 if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
820 tunnel->err_count--;
821
822 dst_link_failure(skb);
823 } else
824 tunnel->err_count = 0;
825 }
826
827 max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen;
828
Patrick McHardycfbba492007-07-09 15:33:40 -0700829 if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
830 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700831 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
832 if (!new_skb) {
833 ip_rt_put(rt);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900834 stats->tx_dropped++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700835 dev_kfree_skb(skb);
836 tunnel->recursion--;
837 return 0;
838 }
839 if (skb->sk)
840 skb_set_owner_w(new_skb, skb->sk);
841 dev_kfree_skb(skb);
842 skb = new_skb;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700843 old_iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700844 }
845
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -0700846 skb->transport_header = skb->network_header;
Arnaldo Carvalho de Meloe2d1bca2007-04-10 20:46:21 -0700847 skb_push(skb, gre_hlen);
848 skb_reset_network_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700849 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
Patrick McHardy48d5cad2006-02-15 15:10:22 -0800850 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
851 IPSKB_REROUTED);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700852 dst_release(skb->dst);
853 skb->dst = &rt->u.dst;
854
855 /*
856 * Push down and install the IPIP header.
857 */
858
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700859 iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700860 iph->version = 4;
861 iph->ihl = sizeof(struct iphdr) >> 2;
862 iph->frag_off = df;
863 iph->protocol = IPPROTO_GRE;
864 iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb);
865 iph->daddr = rt->rt_dst;
866 iph->saddr = rt->rt_src;
867
868 if ((iph->ttl = tiph->ttl) == 0) {
869 if (skb->protocol == htons(ETH_P_IP))
870 iph->ttl = old_iph->ttl;
871#ifdef CONFIG_IPV6
872 else if (skb->protocol == htons(ETH_P_IPV6))
873 iph->ttl = ((struct ipv6hdr*)old_iph)->hop_limit;
874#endif
875 else
876 iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT);
877 }
878
Al Virod5a0a1e2006-11-08 00:23:14 -0800879 ((__be16*)(iph+1))[0] = tunnel->parms.o_flags;
880 ((__be16*)(iph+1))[1] = skb->protocol;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700881
882 if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
Al Virod5a0a1e2006-11-08 00:23:14 -0800883 __be32 *ptr = (__be32*)(((u8*)iph) + tunnel->hlen - 4);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700884
885 if (tunnel->parms.o_flags&GRE_SEQ) {
886 ++tunnel->o_seqno;
887 *ptr = htonl(tunnel->o_seqno);
888 ptr--;
889 }
890 if (tunnel->parms.o_flags&GRE_KEY) {
891 *ptr = tunnel->parms.o_key;
892 ptr--;
893 }
894 if (tunnel->parms.o_flags&GRE_CSUM) {
895 *ptr = 0;
Al Viro5f92a732006-11-14 21:36:54 -0800896 *(__sum16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700897 }
898 }
899
900 nf_reset(skb);
901
902 IPTUNNEL_XMIT();
903 tunnel->recursion--;
904 return 0;
905
906tx_error_icmp:
907 dst_link_failure(skb);
908
909tx_error:
910 stats->tx_errors++;
911 dev_kfree_skb(skb);
912 tunnel->recursion--;
913 return 0;
914}
915
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800916static void ipgre_tunnel_bind_dev(struct net_device *dev)
917{
918 struct net_device *tdev = NULL;
919 struct ip_tunnel *tunnel;
920 struct iphdr *iph;
921 int hlen = LL_MAX_HEADER;
922 int mtu = ETH_DATA_LEN;
923 int addend = sizeof(struct iphdr) + 4;
924
925 tunnel = netdev_priv(dev);
926 iph = &tunnel->parms.iph;
927
928 /* Guess output device to choose reasonable mtu and hard_header_len */
929
930 if (iph->daddr) {
931 struct flowi fl = { .oif = tunnel->parms.link,
932 .nl_u = { .ip4_u =
933 { .daddr = iph->daddr,
934 .saddr = iph->saddr,
935 .tos = RT_TOS(iph->tos) } },
936 .proto = IPPROTO_GRE };
937 struct rtable *rt;
Denis V. Lunevf2063512008-01-22 22:07:34 -0800938 if (!ip_route_output_key(&init_net, &rt, &fl)) {
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800939 tdev = rt->u.dst.dev;
940 ip_rt_put(rt);
941 }
942 dev->flags |= IFF_POINTOPOINT;
943 }
944
945 if (!tdev && tunnel->parms.link)
946 tdev = __dev_get_by_index(&init_net, tunnel->parms.link);
947
948 if (tdev) {
949 hlen = tdev->hard_header_len;
950 mtu = tdev->mtu;
951 }
952 dev->iflink = tunnel->parms.link;
953
954 /* Precalculate GRE options length */
955 if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
956 if (tunnel->parms.o_flags&GRE_CSUM)
957 addend += 4;
958 if (tunnel->parms.o_flags&GRE_KEY)
959 addend += 4;
960 if (tunnel->parms.o_flags&GRE_SEQ)
961 addend += 4;
962 }
963 dev->hard_header_len = hlen + addend;
964 dev->mtu = mtu - addend;
965 tunnel->hlen = addend;
966
967}
968
Linus Torvalds1da177e2005-04-16 15:20:36 -0700969static int
970ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
971{
972 int err = 0;
973 struct ip_tunnel_parm p;
974 struct ip_tunnel *t;
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700975 struct net *net = dev_net(dev);
976 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700977
978 switch (cmd) {
979 case SIOCGETTUNNEL:
980 t = NULL;
Pavel Emelyanov7daa0002008-04-16 01:10:05 -0700981 if (dev == ign->fb_tunnel_dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700982 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
983 err = -EFAULT;
984 break;
985 }
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700986 t = ipgre_tunnel_locate(net, &p, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700987 }
988 if (t == NULL)
Patrick McHardy2941a482006-01-08 22:05:26 -0800989 t = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700990 memcpy(&p, &t->parms, sizeof(p));
991 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
992 err = -EFAULT;
993 break;
994
995 case SIOCADDTUNNEL:
996 case SIOCCHGTUNNEL:
997 err = -EPERM;
998 if (!capable(CAP_NET_ADMIN))
999 goto done;
1000
1001 err = -EFAULT;
1002 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1003 goto done;
1004
1005 err = -EINVAL;
1006 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
1007 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
1008 ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
1009 goto done;
1010 if (p.iph.ttl)
1011 p.iph.frag_off |= htons(IP_DF);
1012
1013 if (!(p.i_flags&GRE_KEY))
1014 p.i_key = 0;
1015 if (!(p.o_flags&GRE_KEY))
1016 p.o_key = 0;
1017
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -07001018 t = ipgre_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001019
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001020 if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001021 if (t != NULL) {
1022 if (t->dev != dev) {
1023 err = -EEXIST;
1024 break;
1025 }
1026 } else {
1027 unsigned nflags=0;
1028
Patrick McHardy2941a482006-01-08 22:05:26 -08001029 t = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001030
Joe Perchesf97c1e02007-12-16 13:45:43 -08001031 if (ipv4_is_multicast(p.iph.daddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001032 nflags = IFF_BROADCAST;
1033 else if (p.iph.daddr)
1034 nflags = IFF_POINTOPOINT;
1035
1036 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
1037 err = -EINVAL;
1038 break;
1039 }
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -07001040 ipgre_tunnel_unlink(ign, t);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001041 t->parms.iph.saddr = p.iph.saddr;
1042 t->parms.iph.daddr = p.iph.daddr;
1043 t->parms.i_key = p.i_key;
1044 t->parms.o_key = p.o_key;
1045 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1046 memcpy(dev->broadcast, &p.iph.daddr, 4);
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -07001047 ipgre_tunnel_link(ign, t);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001048 netdev_state_change(dev);
1049 }
1050 }
1051
1052 if (t) {
1053 err = 0;
1054 if (cmd == SIOCCHGTUNNEL) {
1055 t->parms.iph.ttl = p.iph.ttl;
1056 t->parms.iph.tos = p.iph.tos;
1057 t->parms.iph.frag_off = p.iph.frag_off;
Michal Schmidtee34c1e2007-12-13 09:46:32 -08001058 if (t->parms.link != p.link) {
1059 t->parms.link = p.link;
1060 ipgre_tunnel_bind_dev(dev);
1061 netdev_state_change(dev);
1062 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001063 }
1064 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
1065 err = -EFAULT;
1066 } else
1067 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
1068 break;
1069
1070 case SIOCDELTUNNEL:
1071 err = -EPERM;
1072 if (!capable(CAP_NET_ADMIN))
1073 goto done;
1074
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001075 if (dev == ign->fb_tunnel_dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001076 err = -EFAULT;
1077 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1078 goto done;
1079 err = -ENOENT;
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -07001080 if ((t = ipgre_tunnel_locate(net, &p, 0)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001081 goto done;
1082 err = -EPERM;
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001083 if (t == netdev_priv(ign->fb_tunnel_dev))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001084 goto done;
1085 dev = t->dev;
1086 }
Stephen Hemminger22f8cde2007-02-07 00:09:58 -08001087 unregister_netdevice(dev);
1088 err = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001089 break;
1090
1091 default:
1092 err = -EINVAL;
1093 }
1094
1095done:
1096 return err;
1097}
1098
1099static struct net_device_stats *ipgre_tunnel_get_stats(struct net_device *dev)
1100{
Patrick McHardy2941a482006-01-08 22:05:26 -08001101 return &(((struct ip_tunnel*)netdev_priv(dev))->stat);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001102}
1103
1104static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1105{
Patrick McHardy2941a482006-01-08 22:05:26 -08001106 struct ip_tunnel *tunnel = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001107 if (new_mtu < 68 || new_mtu > 0xFFF8 - tunnel->hlen)
1108 return -EINVAL;
1109 dev->mtu = new_mtu;
1110 return 0;
1111}
1112
Linus Torvalds1da177e2005-04-16 15:20:36 -07001113/* Nice toy. Unfortunately, useless in real life :-)
1114 It allows to construct virtual multiprotocol broadcast "LAN"
1115 over the Internet, provided multicast routing is tuned.
1116
1117
1118 I have no idea was this bicycle invented before me,
1119 so that I had to set ARPHRD_IPGRE to a random value.
1120 I have an impression, that Cisco could make something similar,
1121 but this feature is apparently missing in IOS<=11.2(8).
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001122
Linus Torvalds1da177e2005-04-16 15:20:36 -07001123 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
1124 with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
1125
1126 ping -t 255 224.66.66.66
1127
1128 If nobody answers, mbone does not work.
1129
1130 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
1131 ip addr add 10.66.66.<somewhat>/24 dev Universe
1132 ifconfig Universe up
1133 ifconfig Universe add fe80::<Your_real_addr>/10
1134 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
1135 ftp 10.66.66.66
1136 ...
1137 ftp fec0:6666:6666::193.233.7.65
1138 ...
1139
1140 */
1141
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -07001142static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
1143 unsigned short type,
1144 const void *daddr, const void *saddr, unsigned len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001145{
Patrick McHardy2941a482006-01-08 22:05:26 -08001146 struct ip_tunnel *t = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001147 struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
Al Virod5a0a1e2006-11-08 00:23:14 -08001148 __be16 *p = (__be16*)(iph+1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001149
1150 memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
1151 p[0] = t->parms.o_flags;
1152 p[1] = htons(type);
1153
1154 /*
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001155 * Set the source hardware address.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001156 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001157
Linus Torvalds1da177e2005-04-16 15:20:36 -07001158 if (saddr)
1159 memcpy(&iph->saddr, saddr, 4);
1160
1161 if (daddr) {
1162 memcpy(&iph->daddr, daddr, 4);
1163 return t->hlen;
1164 }
Joe Perchesf97c1e02007-12-16 13:45:43 -08001165 if (iph->daddr && !ipv4_is_multicast(iph->daddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001166 return t->hlen;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001167
Linus Torvalds1da177e2005-04-16 15:20:36 -07001168 return -t->hlen;
1169}
1170
Timo Teras6a5f44d2007-10-23 20:31:53 -07001171static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
1172{
1173 struct iphdr *iph = (struct iphdr*) skb_mac_header(skb);
1174 memcpy(haddr, &iph->saddr, 4);
1175 return 4;
1176}
1177
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -07001178static const struct header_ops ipgre_header_ops = {
1179 .create = ipgre_header,
Timo Teras6a5f44d2007-10-23 20:31:53 -07001180 .parse = ipgre_header_parse,
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -07001181};
1182
Timo Teras6a5f44d2007-10-23 20:31:53 -07001183#ifdef CONFIG_NET_IPGRE_BROADCAST
Linus Torvalds1da177e2005-04-16 15:20:36 -07001184static int ipgre_open(struct net_device *dev)
1185{
Patrick McHardy2941a482006-01-08 22:05:26 -08001186 struct ip_tunnel *t = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001187
Joe Perchesf97c1e02007-12-16 13:45:43 -08001188 if (ipv4_is_multicast(t->parms.iph.daddr)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001189 struct flowi fl = { .oif = t->parms.link,
1190 .nl_u = { .ip4_u =
1191 { .daddr = t->parms.iph.daddr,
1192 .saddr = t->parms.iph.saddr,
1193 .tos = RT_TOS(t->parms.iph.tos) } },
1194 .proto = IPPROTO_GRE };
1195 struct rtable *rt;
Denis V. Lunevf2063512008-01-22 22:07:34 -08001196 if (ip_route_output_key(&init_net, &rt, &fl))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001197 return -EADDRNOTAVAIL;
1198 dev = rt->u.dst.dev;
1199 ip_rt_put(rt);
Herbert Xue5ed6392005-10-03 14:35:55 -07001200 if (__in_dev_get_rtnl(dev) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001201 return -EADDRNOTAVAIL;
1202 t->mlink = dev->ifindex;
Herbert Xue5ed6392005-10-03 14:35:55 -07001203 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001204 }
1205 return 0;
1206}
1207
1208static int ipgre_close(struct net_device *dev)
1209{
Patrick McHardy2941a482006-01-08 22:05:26 -08001210 struct ip_tunnel *t = netdev_priv(dev);
Joe Perchesf97c1e02007-12-16 13:45:43 -08001211 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
Denis V. Lunev7fee0ca2008-01-21 17:32:38 -08001212 struct in_device *in_dev;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001213 in_dev = inetdev_by_index(dev_net(dev), t->mlink);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001214 if (in_dev) {
1215 ip_mc_dec_group(in_dev, t->parms.iph.daddr);
1216 in_dev_put(in_dev);
1217 }
1218 }
1219 return 0;
1220}
1221
1222#endif
1223
1224static void ipgre_tunnel_setup(struct net_device *dev)
1225{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001226 dev->uninit = ipgre_tunnel_uninit;
1227 dev->destructor = free_netdev;
1228 dev->hard_start_xmit = ipgre_tunnel_xmit;
1229 dev->get_stats = ipgre_tunnel_get_stats;
1230 dev->do_ioctl = ipgre_tunnel_ioctl;
1231 dev->change_mtu = ipgre_tunnel_change_mtu;
1232
1233 dev->type = ARPHRD_IPGRE;
1234 dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
Kris Katterjohn46f25df2006-01-05 16:35:42 -08001235 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001236 dev->flags = IFF_NOARP;
1237 dev->iflink = 0;
1238 dev->addr_len = 4;
1239}
1240
1241static int ipgre_tunnel_init(struct net_device *dev)
1242{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001243 struct ip_tunnel *tunnel;
1244 struct iphdr *iph;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001245
Patrick McHardy2941a482006-01-08 22:05:26 -08001246 tunnel = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001247 iph = &tunnel->parms.iph;
1248
1249 tunnel->dev = dev;
1250 strcpy(tunnel->parms.name, dev->name);
1251
1252 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
1253 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
1254
Michal Schmidtee34c1e2007-12-13 09:46:32 -08001255 ipgre_tunnel_bind_dev(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001256
1257 if (iph->daddr) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001258#ifdef CONFIG_NET_IPGRE_BROADCAST
Joe Perchesf97c1e02007-12-16 13:45:43 -08001259 if (ipv4_is_multicast(iph->daddr)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001260 if (!iph->saddr)
1261 return -EINVAL;
1262 dev->flags = IFF_BROADCAST;
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -07001263 dev->header_ops = &ipgre_header_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001264 dev->open = ipgre_open;
1265 dev->stop = ipgre_close;
1266 }
1267#endif
Michal Schmidtee34c1e2007-12-13 09:46:32 -08001268 } else
Timo Teras6a5f44d2007-10-23 20:31:53 -07001269 dev->header_ops = &ipgre_header_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001270
Linus Torvalds1da177e2005-04-16 15:20:36 -07001271 return 0;
1272}
1273
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001274static int ipgre_fb_tunnel_init(struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001275{
Patrick McHardy2941a482006-01-08 22:05:26 -08001276 struct ip_tunnel *tunnel = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001277 struct iphdr *iph = &tunnel->parms.iph;
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -07001278 struct ipgre_net *ign = net_generic(dev_net(dev), ipgre_net_id);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001279
1280 tunnel->dev = dev;
1281 strcpy(tunnel->parms.name, dev->name);
1282
1283 iph->version = 4;
1284 iph->protocol = IPPROTO_GRE;
1285 iph->ihl = 5;
1286 tunnel->hlen = sizeof(struct iphdr) + 4;
1287
1288 dev_hold(dev);
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -07001289 ign->tunnels_wc[0] = tunnel;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001290 return 0;
1291}
1292
1293
1294static struct net_protocol ipgre_protocol = {
1295 .handler = ipgre_rcv,
1296 .err_handler = ipgre_err,
1297};
1298
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -07001299static void ipgre_destroy_tunnels(struct ipgre_net *ign)
1300{
1301 int prio;
1302
1303 for (prio = 0; prio < 4; prio++) {
1304 int h;
1305 for (h = 0; h < HASH_SIZE; h++) {
1306 struct ip_tunnel *t;
1307 while ((t = ign->tunnels[prio][h]) != NULL)
1308 unregister_netdevice(t->dev);
1309 }
1310 }
1311}
1312
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001313static int ipgre_init_net(struct net *net)
1314{
1315 int err;
1316 struct ipgre_net *ign;
1317
1318 err = -ENOMEM;
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -07001319 ign = kzalloc(sizeof(struct ipgre_net), GFP_KERNEL);
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001320 if (ign == NULL)
1321 goto err_alloc;
1322
1323 err = net_assign_generic(net, ipgre_net_id, ign);
1324 if (err < 0)
1325 goto err_assign;
1326
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001327 ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0",
1328 ipgre_tunnel_setup);
1329 if (!ign->fb_tunnel_dev) {
1330 err = -ENOMEM;
1331 goto err_alloc_dev;
1332 }
1333
1334 ign->fb_tunnel_dev->init = ipgre_fb_tunnel_init;
1335 dev_net_set(ign->fb_tunnel_dev, net);
1336
1337 if ((err = register_netdev(ign->fb_tunnel_dev)))
1338 goto err_reg_dev;
1339
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001340 return 0;
1341
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001342err_reg_dev:
1343 free_netdev(ign->fb_tunnel_dev);
1344err_alloc_dev:
1345 /* nothing */
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001346err_assign:
1347 kfree(ign);
1348err_alloc:
1349 return err;
1350}
1351
1352static void ipgre_exit_net(struct net *net)
1353{
1354 struct ipgre_net *ign;
1355
1356 ign = net_generic(net, ipgre_net_id);
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001357 rtnl_lock();
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -07001358 ipgre_destroy_tunnels(ign);
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001359 rtnl_unlock();
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001360 kfree(ign);
1361}
1362
1363static struct pernet_operations ipgre_net_ops = {
1364 .init = ipgre_init_net,
1365 .exit = ipgre_exit_net,
1366};
Linus Torvalds1da177e2005-04-16 15:20:36 -07001367
1368/*
1369 * And now the modules code and kernel interface.
1370 */
1371
1372static int __init ipgre_init(void)
1373{
1374 int err;
1375
1376 printk(KERN_INFO "GRE over IPv4 tunneling driver\n");
1377
1378 if (inet_add_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) {
1379 printk(KERN_INFO "ipgre init: can't add protocol\n");
1380 return -EAGAIN;
1381 }
1382
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001383 err = register_pernet_gen_device(&ipgre_net_id, &ipgre_net_ops);
1384 if (err < 0)
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001385 inet_del_protocol(&ipgre_protocol, IPPROTO_GRE);
1386
Linus Torvalds1da177e2005-04-16 15:20:36 -07001387 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001388}
1389
Alexey Kuznetsovdb445752005-07-30 17:46:44 -07001390static void __exit ipgre_fini(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001391{
1392 if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0)
1393 printk(KERN_INFO "ipgre close: can't remove protocol\n");
1394
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001395 unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001396}
1397
1398module_init(ipgre_init);
1399module_exit(ipgre_fini);
1400MODULE_LICENSE("GPL");