blob: a8ec0904e5a6c5a89b43273baeee272771a3f249 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002 * Linux NET3: GRE over IP protocol decoder.
Linus Torvalds1da177e2005-04-16 15:20:36 -07003 *
4 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
Randy Dunlap4fc268d2006-01-11 12:17:47 -080013#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070014#include <linux/module.h>
15#include <linux/types.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070016#include <linux/kernel.h>
17#include <asm/uaccess.h>
18#include <linux/skbuff.h>
19#include <linux/netdevice.h>
20#include <linux/in.h>
21#include <linux/tcp.h>
22#include <linux/udp.h>
23#include <linux/if_arp.h>
24#include <linux/mroute.h>
25#include <linux/init.h>
26#include <linux/in6.h>
27#include <linux/inetdevice.h>
28#include <linux/igmp.h>
29#include <linux/netfilter_ipv4.h>
Kris Katterjohn46f25df2006-01-05 16:35:42 -080030#include <linux/if_ether.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070031
32#include <net/sock.h>
33#include <net/ip.h>
34#include <net/icmp.h>
35#include <net/protocol.h>
36#include <net/ipip.h>
37#include <net/arp.h>
38#include <net/checksum.h>
39#include <net/dsfield.h>
40#include <net/inet_ecn.h>
41#include <net/xfrm.h>
Pavel Emelyanov59a4c752008-04-16 01:08:53 -070042#include <net/net_namespace.h>
43#include <net/netns/generic.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070044
45#ifdef CONFIG_IPV6
46#include <net/ipv6.h>
47#include <net/ip6_fib.h>
48#include <net/ip6_route.h>
49#endif
50
51/*
52 Problems & solutions
53 --------------------
54
55 1. The most important issue is detecting local dead loops.
56 They would cause complete host lockup in transmit, which
57 would be "resolved" by stack overflow or, if queueing is enabled,
58 with infinite looping in net_bh.
59
60 We cannot track such dead loops during route installation,
61 it is infeasible task. The most general solutions would be
62 to keep skb->encapsulation counter (sort of local ttl),
63 and silently drop packet when it expires. It is the best
64 solution, but it supposes maintaing new variable in ALL
65 skb, even if no tunneling is used.
66
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090067 Current solution: t->recursion lock breaks dead loops. It looks
Linus Torvalds1da177e2005-04-16 15:20:36 -070068 like dev->tbusy flag, but I preferred new variable, because
69 the semantics is different. One day, when hard_start_xmit
70 will be multithreaded we will have to use skb->encapsulation.
71
72
73
74 2. Networking dead loops would not kill routers, but would really
75 kill network. IP hop limit plays role of "t->recursion" in this case,
76 if we copy it from packet being encapsulated to upper header.
77 It is very good solution, but it introduces two problems:
78
79 - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
80 do not work over tunnels.
81 - traceroute does not work. I planned to relay ICMP from tunnel,
82 so that this problem would be solved and traceroute output
83 would even more informative. This idea appeared to be wrong:
84 only Linux complies to rfc1812 now (yes, guys, Linux is the only
85 true router now :-)), all routers (at least, in neighbourhood of mine)
86 return only 8 bytes of payload. It is the end.
87
88 Hence, if we want that OSPF worked or traceroute said something reasonable,
89 we should search for another solution.
90
91 One of them is to parse packet trying to detect inner encapsulation
92 made by our node. It is difficult or even impossible, especially,
93 taking into account fragmentation. TO be short, tt is not solution at all.
94
95 Current solution: The solution was UNEXPECTEDLY SIMPLE.
96 We force DF flag on tunnels with preconfigured hop limit,
97 that is ALL. :-) Well, it does not remove the problem completely,
98 but exponential growth of network traffic is changed to linear
99 (branches, that exceed pmtu are pruned) and tunnel mtu
100 fastly degrades to value <68, where looping stops.
101 Yes, it is not good if there exists a router in the loop,
102 which does not force DF, even when encapsulating packets have DF set.
103 But it is not our problem! Nobody could accuse us, we made
104 all that we could make. Even if it is your gated who injected
105 fatal route to network, even if it were you who configured
106 fatal static route: you are innocent. :-)
107
108
109
110 3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
111 practically identical code. It would be good to glue them
112 together, but it is not very evident, how to make them modular.
113 sit is integral part of IPv6, ipip and gre are naturally modular.
114 We could extract common parts (hash table, ioctl etc)
115 to a separate module (ip_tunnel.c).
116
117 Alexey Kuznetsov.
118 */
119
120static int ipgre_tunnel_init(struct net_device *dev);
121static void ipgre_tunnel_setup(struct net_device *dev);
122
123/* Fallback tunnel: no source, no destination, no key, no options */
124
125static int ipgre_fb_tunnel_init(struct net_device *dev);
126
Pavel Emelyanov59a4c752008-04-16 01:08:53 -0700127static int ipgre_net_id;
128struct ipgre_net {
Pavel Emelyanov7daa0002008-04-16 01:10:05 -0700129 struct net_device *fb_tunnel_dev;
Pavel Emelyanov59a4c752008-04-16 01:08:53 -0700130};
131
Linus Torvalds1da177e2005-04-16 15:20:36 -0700132/* Tunnel hash table */
133
134/*
135 4 hash tables:
136
137 3: (remote,local)
138 2: (remote,*)
139 1: (*,local)
140 0: (*,*)
141
142 We require exact key match i.e. if a key is present in packet
143 it will match only tunnel with the same key; if it is not present,
144 it will match only keyless tunnel.
145
146 All keysless packets, if not matched configured keyless tunnels
147 will match fallback tunnel.
148 */
149
150#define HASH_SIZE 16
Al Virod5a0a1e2006-11-08 00:23:14 -0800151#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700152
153static struct ip_tunnel *tunnels[4][HASH_SIZE];
154
155#define tunnels_r_l (tunnels[3])
156#define tunnels_r (tunnels[2])
157#define tunnels_l (tunnels[1])
158#define tunnels_wc (tunnels[0])
159
160static DEFINE_RWLOCK(ipgre_lock);
161
162/* Given src, dst and key, find appropriate for input tunnel. */
163
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700164static struct ip_tunnel * ipgre_tunnel_lookup(struct net *net,
165 __be32 remote, __be32 local, __be32 key)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700166{
167 unsigned h0 = HASH(remote);
168 unsigned h1 = HASH(key);
169 struct ip_tunnel *t;
Pavel Emelyanov7daa0002008-04-16 01:10:05 -0700170 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700171
172 for (t = tunnels_r_l[h0^h1]; t; t = t->next) {
173 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) {
174 if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
175 return t;
176 }
177 }
178 for (t = tunnels_r[h0^h1]; t; t = t->next) {
179 if (remote == t->parms.iph.daddr) {
180 if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
181 return t;
182 }
183 }
184 for (t = tunnels_l[h1]; t; t = t->next) {
185 if (local == t->parms.iph.saddr ||
Joe Perchesf97c1e02007-12-16 13:45:43 -0800186 (local == t->parms.iph.daddr &&
187 ipv4_is_multicast(local))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700188 if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
189 return t;
190 }
191 }
192 for (t = tunnels_wc[h1]; t; t = t->next) {
193 if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
194 return t;
195 }
196
Pavel Emelyanov7daa0002008-04-16 01:10:05 -0700197 if (ign->fb_tunnel_dev->flags&IFF_UP)
198 return netdev_priv(ign->fb_tunnel_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700199 return NULL;
200}
201
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700202static struct ip_tunnel **__ipgre_bucket(struct ipgre_net *ign,
203 struct ip_tunnel_parm *parms)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700204{
YOSHIFUJI Hideaki5056a1e2007-04-24 20:44:48 +0900205 __be32 remote = parms->iph.daddr;
206 __be32 local = parms->iph.saddr;
207 __be32 key = parms->i_key;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700208 unsigned h = HASH(key);
209 int prio = 0;
210
211 if (local)
212 prio |= 1;
Joe Perchesf97c1e02007-12-16 13:45:43 -0800213 if (remote && !ipv4_is_multicast(remote)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700214 prio |= 2;
215 h ^= HASH(remote);
216 }
217
218 return &tunnels[prio][h];
219}
220
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700221static inline struct ip_tunnel **ipgre_bucket(struct ipgre_net *ign,
222 struct ip_tunnel *t)
YOSHIFUJI Hideaki5056a1e2007-04-24 20:44:48 +0900223{
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700224 return __ipgre_bucket(ign, &t->parms);
YOSHIFUJI Hideaki5056a1e2007-04-24 20:44:48 +0900225}
226
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700227static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700228{
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700229 struct ip_tunnel **tp = ipgre_bucket(ign, t);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700230
231 t->next = *tp;
232 write_lock_bh(&ipgre_lock);
233 *tp = t;
234 write_unlock_bh(&ipgre_lock);
235}
236
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700237static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700238{
239 struct ip_tunnel **tp;
240
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700241 for (tp = ipgre_bucket(ign, t); *tp; tp = &(*tp)->next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700242 if (t == *tp) {
243 write_lock_bh(&ipgre_lock);
244 *tp = t->next;
245 write_unlock_bh(&ipgre_lock);
246 break;
247 }
248 }
249}
250
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700251static struct ip_tunnel * ipgre_tunnel_locate(struct net *net,
252 struct ip_tunnel_parm *parms, int create)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700253{
Al Virod5a0a1e2006-11-08 00:23:14 -0800254 __be32 remote = parms->iph.daddr;
255 __be32 local = parms->iph.saddr;
256 __be32 key = parms->i_key;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700257 struct ip_tunnel *t, **tp, *nt;
258 struct net_device *dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700259 char name[IFNAMSIZ];
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700260 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700261
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700262 for (tp = __ipgre_bucket(ign, parms); (t = *tp) != NULL; tp = &t->next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700263 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) {
264 if (key == t->parms.i_key)
265 return t;
266 }
267 }
268 if (!create)
269 return NULL;
270
271 if (parms->name[0])
272 strlcpy(name, parms->name, IFNAMSIZ);
Pavel Emelyanov34cc7ba2008-02-23 20:19:20 -0800273 else
274 sprintf(name, "gre%%d");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700275
276 dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup);
277 if (!dev)
278 return NULL;
279
Pavel Emelyanovb37d428b2008-02-26 23:51:04 -0800280 if (strchr(name, '%')) {
281 if (dev_alloc_name(dev, name) < 0)
282 goto failed_free;
283 }
284
Linus Torvalds1da177e2005-04-16 15:20:36 -0700285 dev->init = ipgre_tunnel_init;
Patrick McHardy2941a482006-01-08 22:05:26 -0800286 nt = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700287 nt->parms = *parms;
288
Pavel Emelyanovb37d428b2008-02-26 23:51:04 -0800289 if (register_netdevice(dev) < 0)
290 goto failed_free;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700291
Linus Torvalds1da177e2005-04-16 15:20:36 -0700292 dev_hold(dev);
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700293 ipgre_tunnel_link(ign, nt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700294 return nt;
295
Pavel Emelyanovb37d428b2008-02-26 23:51:04 -0800296failed_free:
297 free_netdev(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700298 return NULL;
299}
300
301static void ipgre_tunnel_uninit(struct net_device *dev)
302{
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700303 struct net *net = dev_net(dev);
304 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
305
306 ipgre_tunnel_unlink(ign, netdev_priv(dev));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700307 dev_put(dev);
308}
309
310
311static void ipgre_err(struct sk_buff *skb, u32 info)
312{
313#ifndef I_WISH_WORLD_WERE_PERFECT
314
315/* It is not :-( All the routers (except for Linux) return only
316 8 bytes of packet payload. It means, that precise relaying of
317 ICMP in the real Internet is absolutely infeasible.
318
319 Moreover, Cisco "wise men" put GRE key to the third word
320 in GRE header. It makes impossible maintaining even soft state for keyed
321 GRE tunnels with enabled checksum. Tell them "thank you".
322
323 Well, I wonder, rfc1812 was written by Cisco employee,
324 what the hell these idiots break standrads established
325 by themself???
326 */
327
328 struct iphdr *iph = (struct iphdr*)skb->data;
Al Virod5a0a1e2006-11-08 00:23:14 -0800329 __be16 *p = (__be16*)(skb->data+(iph->ihl<<2));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700330 int grehlen = (iph->ihl<<2) + 4;
Arnaldo Carvalho de Melo88c76642007-03-13 14:43:18 -0300331 const int type = icmp_hdr(skb)->type;
332 const int code = icmp_hdr(skb)->code;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700333 struct ip_tunnel *t;
Al Virod5a0a1e2006-11-08 00:23:14 -0800334 __be16 flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700335
336 flags = p[0];
337 if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
338 if (flags&(GRE_VERSION|GRE_ROUTING))
339 return;
340 if (flags&GRE_KEY) {
341 grehlen += 4;
342 if (flags&GRE_CSUM)
343 grehlen += 4;
344 }
345 }
346
347 /* If only 8 bytes returned, keyed message will be dropped here */
348 if (skb_headlen(skb) < grehlen)
349 return;
350
351 switch (type) {
352 default:
353 case ICMP_PARAMETERPROB:
354 return;
355
356 case ICMP_DEST_UNREACH:
357 switch (code) {
358 case ICMP_SR_FAILED:
359 case ICMP_PORT_UNREACH:
360 /* Impossible event. */
361 return;
362 case ICMP_FRAG_NEEDED:
363 /* Soft state for pmtu is maintained by IP core. */
364 return;
365 default:
366 /* All others are translated to HOST_UNREACH.
367 rfc2003 contains "deep thoughts" about NET_UNREACH,
368 I believe they are just ether pollution. --ANK
369 */
370 break;
371 }
372 break;
373 case ICMP_TIME_EXCEEDED:
374 if (code != ICMP_EXC_TTL)
375 return;
376 break;
377 }
378
379 read_lock(&ipgre_lock);
Pavel Emelyanov3b4667f2008-04-16 01:09:44 -0700380 t = ipgre_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr,
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700381 (flags&GRE_KEY) ?
382 *(((__be32*)p) + (grehlen>>2) - 1) : 0);
Joe Perchesf97c1e02007-12-16 13:45:43 -0800383 if (t == NULL || t->parms.iph.daddr == 0 ||
384 ipv4_is_multicast(t->parms.iph.daddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700385 goto out;
386
387 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
388 goto out;
389
390 if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
391 t->err_count++;
392 else
393 t->err_count = 1;
394 t->err_time = jiffies;
395out:
396 read_unlock(&ipgre_lock);
397 return;
398#else
399 struct iphdr *iph = (struct iphdr*)dp;
400 struct iphdr *eiph;
Al Virod5a0a1e2006-11-08 00:23:14 -0800401 __be16 *p = (__be16*)(dp+(iph->ihl<<2));
Arnaldo Carvalho de Melo88c76642007-03-13 14:43:18 -0300402 const int type = icmp_hdr(skb)->type;
403 const int code = icmp_hdr(skb)->code;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700404 int rel_type = 0;
405 int rel_code = 0;
Al Viroc55e2f42006-09-19 13:23:19 -0700406 __be32 rel_info = 0;
407 __u32 n = 0;
Al Virod5a0a1e2006-11-08 00:23:14 -0800408 __be16 flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700409 int grehlen = (iph->ihl<<2) + 4;
410 struct sk_buff *skb2;
411 struct flowi fl;
412 struct rtable *rt;
413
414 if (p[1] != htons(ETH_P_IP))
415 return;
416
417 flags = p[0];
418 if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
419 if (flags&(GRE_VERSION|GRE_ROUTING))
420 return;
421 if (flags&GRE_CSUM)
422 grehlen += 4;
423 if (flags&GRE_KEY)
424 grehlen += 4;
425 if (flags&GRE_SEQ)
426 grehlen += 4;
427 }
428 if (len < grehlen + sizeof(struct iphdr))
429 return;
430 eiph = (struct iphdr*)(dp + grehlen);
431
432 switch (type) {
433 default:
434 return;
435 case ICMP_PARAMETERPROB:
Arnaldo Carvalho de Melo88c76642007-03-13 14:43:18 -0300436 n = ntohl(icmp_hdr(skb)->un.gateway) >> 24;
Al Viroc55e2f42006-09-19 13:23:19 -0700437 if (n < (iph->ihl<<2))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700438 return;
439
440 /* So... This guy found something strange INSIDE encapsulated
441 packet. Well, he is fool, but what can we do ?
442 */
443 rel_type = ICMP_PARAMETERPROB;
Al Viroc55e2f42006-09-19 13:23:19 -0700444 n -= grehlen;
445 rel_info = htonl(n << 24);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700446 break;
447
448 case ICMP_DEST_UNREACH:
449 switch (code) {
450 case ICMP_SR_FAILED:
451 case ICMP_PORT_UNREACH:
452 /* Impossible event. */
453 return;
454 case ICMP_FRAG_NEEDED:
455 /* And it is the only really necessary thing :-) */
Arnaldo Carvalho de Melo88c76642007-03-13 14:43:18 -0300456 n = ntohs(icmp_hdr(skb)->un.frag.mtu);
Al Viroc55e2f42006-09-19 13:23:19 -0700457 if (n < grehlen+68)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700458 return;
Al Viroc55e2f42006-09-19 13:23:19 -0700459 n -= grehlen;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700460 /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
Al Viroc55e2f42006-09-19 13:23:19 -0700461 if (n > ntohs(eiph->tot_len))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700462 return;
Al Viroc55e2f42006-09-19 13:23:19 -0700463 rel_info = htonl(n);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700464 break;
465 default:
466 /* All others are translated to HOST_UNREACH.
467 rfc2003 contains "deep thoughts" about NET_UNREACH,
468 I believe, it is just ether pollution. --ANK
469 */
470 rel_type = ICMP_DEST_UNREACH;
471 rel_code = ICMP_HOST_UNREACH;
472 break;
473 }
474 break;
475 case ICMP_TIME_EXCEEDED:
476 if (code != ICMP_EXC_TTL)
477 return;
478 break;
479 }
480
481 /* Prepare fake skb to feed it to icmp_send */
482 skb2 = skb_clone(skb, GFP_ATOMIC);
483 if (skb2 == NULL)
484 return;
485 dst_release(skb2->dst);
486 skb2->dst = NULL;
487 skb_pull(skb2, skb->data - (u8*)eiph);
Arnaldo Carvalho de Meloc1d2bbe2007-04-10 20:45:18 -0700488 skb_reset_network_header(skb2);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700489
490 /* Try to guess incoming interface */
491 memset(&fl, 0, sizeof(fl));
492 fl.fl4_dst = eiph->saddr;
493 fl.fl4_tos = RT_TOS(eiph->tos);
494 fl.proto = IPPROTO_GRE;
Denis V. Lunevf2063512008-01-22 22:07:34 -0800495 if (ip_route_output_key(&init_net, &rt, &fl)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700496 kfree_skb(skb2);
497 return;
498 }
499 skb2->dev = rt->u.dst.dev;
500
501 /* route "incoming" packet */
502 if (rt->rt_flags&RTCF_LOCAL) {
503 ip_rt_put(rt);
504 rt = NULL;
505 fl.fl4_dst = eiph->daddr;
506 fl.fl4_src = eiph->saddr;
507 fl.fl4_tos = eiph->tos;
Denis V. Lunevf2063512008-01-22 22:07:34 -0800508 if (ip_route_output_key(&init_net, &rt, &fl) ||
Linus Torvalds1da177e2005-04-16 15:20:36 -0700509 rt->u.dst.dev->type != ARPHRD_IPGRE) {
510 ip_rt_put(rt);
511 kfree_skb(skb2);
512 return;
513 }
514 } else {
515 ip_rt_put(rt);
516 if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) ||
517 skb2->dst->dev->type != ARPHRD_IPGRE) {
518 kfree_skb(skb2);
519 return;
520 }
521 }
522
523 /* change mtu on this route */
524 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
Al Viroc55e2f42006-09-19 13:23:19 -0700525 if (n > dst_mtu(skb2->dst)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700526 kfree_skb(skb2);
527 return;
528 }
Al Viroc55e2f42006-09-19 13:23:19 -0700529 skb2->dst->ops->update_pmtu(skb2->dst, n);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700530 } else if (type == ICMP_TIME_EXCEEDED) {
Patrick McHardy2941a482006-01-08 22:05:26 -0800531 struct ip_tunnel *t = netdev_priv(skb2->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700532 if (t->parms.iph.ttl) {
533 rel_type = ICMP_DEST_UNREACH;
534 rel_code = ICMP_HOST_UNREACH;
535 }
536 }
537
538 icmp_send(skb2, rel_type, rel_code, rel_info);
539 kfree_skb(skb2);
540#endif
541}
542
543static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
544{
545 if (INET_ECN_is_ce(iph->tos)) {
546 if (skb->protocol == htons(ETH_P_IP)) {
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700547 IP_ECN_set_ce(ip_hdr(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700548 } else if (skb->protocol == htons(ETH_P_IPV6)) {
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -0700549 IP6_ECN_set_ce(ipv6_hdr(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700550 }
551 }
552}
553
554static inline u8
555ipgre_ecn_encapsulate(u8 tos, struct iphdr *old_iph, struct sk_buff *skb)
556{
557 u8 inner = 0;
558 if (skb->protocol == htons(ETH_P_IP))
559 inner = old_iph->tos;
560 else if (skb->protocol == htons(ETH_P_IPV6))
561 inner = ipv6_get_dsfield((struct ipv6hdr *)old_iph);
562 return INET_ECN_encapsulate(tos, inner);
563}
564
565static int ipgre_rcv(struct sk_buff *skb)
566{
567 struct iphdr *iph;
568 u8 *h;
Al Virod5a0a1e2006-11-08 00:23:14 -0800569 __be16 flags;
Al Virod3bc23e2006-11-14 21:24:49 -0800570 __sum16 csum = 0;
Al Virod5a0a1e2006-11-08 00:23:14 -0800571 __be32 key = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700572 u32 seqno = 0;
573 struct ip_tunnel *tunnel;
574 int offset = 4;
575
576 if (!pskb_may_pull(skb, 16))
577 goto drop_nolock;
578
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700579 iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700580 h = skb->data;
Al Virod5a0a1e2006-11-08 00:23:14 -0800581 flags = *(__be16*)h;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700582
583 if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
584 /* - Version must be 0.
585 - We do not support routing headers.
586 */
587 if (flags&(GRE_VERSION|GRE_ROUTING))
588 goto drop_nolock;
589
590 if (flags&GRE_CSUM) {
Herbert Xufb286bb2005-11-10 13:01:24 -0800591 switch (skb->ip_summed) {
Patrick McHardy84fa7932006-08-29 16:44:56 -0700592 case CHECKSUM_COMPLETE:
Al Virod3bc23e2006-11-14 21:24:49 -0800593 csum = csum_fold(skb->csum);
Herbert Xufb286bb2005-11-10 13:01:24 -0800594 if (!csum)
595 break;
596 /* fall through */
597 case CHECKSUM_NONE:
598 skb->csum = 0;
599 csum = __skb_checksum_complete(skb);
Patrick McHardy84fa7932006-08-29 16:44:56 -0700600 skb->ip_summed = CHECKSUM_COMPLETE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700601 }
602 offset += 4;
603 }
604 if (flags&GRE_KEY) {
Al Virod5a0a1e2006-11-08 00:23:14 -0800605 key = *(__be32*)(h + offset);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700606 offset += 4;
607 }
608 if (flags&GRE_SEQ) {
Al Virod5a0a1e2006-11-08 00:23:14 -0800609 seqno = ntohl(*(__be32*)(h + offset));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700610 offset += 4;
611 }
612 }
613
614 read_lock(&ipgre_lock);
Pavel Emelyanov3b4667f2008-04-16 01:09:44 -0700615 if ((tunnel = ipgre_tunnel_lookup(dev_net(skb->dev),
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700616 iph->saddr, iph->daddr, key)) != NULL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700617 secpath_reset(skb);
618
Al Virod5a0a1e2006-11-08 00:23:14 -0800619 skb->protocol = *(__be16*)(h + 2);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700620 /* WCCP version 1 and 2 protocol decoding.
621 * - Change protocol to IP
622 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
623 */
624 if (flags == 0 &&
YOSHIFUJI Hideaki496c98d2006-10-10 19:41:21 -0700625 skb->protocol == htons(ETH_P_WCCP)) {
626 skb->protocol = htons(ETH_P_IP);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900627 if ((*(h + offset) & 0xF0) != 0x40)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700628 offset += 4;
629 }
630
Timo Teras1d069162007-12-20 00:10:33 -0800631 skb->mac_header = skb->network_header;
Arnaldo Carvalho de Melo4209fb62007-03-10 18:42:03 -0300632 __pskb_pull(skb, offset);
633 skb_reset_network_header(skb);
Arnaldo Carvalho de Melo9c702202007-04-25 18:04:18 -0700634 skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700635 skb->pkt_type = PACKET_HOST;
636#ifdef CONFIG_NET_IPGRE_BROADCAST
Joe Perchesf97c1e02007-12-16 13:45:43 -0800637 if (ipv4_is_multicast(iph->daddr)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700638 /* Looped back packet, drop it! */
Eric Dumazetee6b9672008-03-05 18:30:47 -0800639 if (skb->rtable->fl.iif == 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700640 goto drop;
641 tunnel->stat.multicast++;
642 skb->pkt_type = PACKET_BROADCAST;
643 }
644#endif
645
646 if (((flags&GRE_CSUM) && csum) ||
647 (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
648 tunnel->stat.rx_crc_errors++;
649 tunnel->stat.rx_errors++;
650 goto drop;
651 }
652 if (tunnel->parms.i_flags&GRE_SEQ) {
653 if (!(flags&GRE_SEQ) ||
654 (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
655 tunnel->stat.rx_fifo_errors++;
656 tunnel->stat.rx_errors++;
657 goto drop;
658 }
659 tunnel->i_seqno = seqno + 1;
660 }
661 tunnel->stat.rx_packets++;
662 tunnel->stat.rx_bytes += skb->len;
663 skb->dev = tunnel->dev;
664 dst_release(skb->dst);
665 skb->dst = NULL;
666 nf_reset(skb);
667 ipgre_ecn_decapsulate(iph, skb);
668 netif_rx(skb);
669 read_unlock(&ipgre_lock);
670 return(0);
671 }
Herbert Xu45af08b2006-04-05 22:31:19 -0700672 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700673
674drop:
675 read_unlock(&ipgre_lock);
676drop_nolock:
677 kfree_skb(skb);
678 return(0);
679}
680
681static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
682{
Patrick McHardy2941a482006-01-08 22:05:26 -0800683 struct ip_tunnel *tunnel = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700684 struct net_device_stats *stats = &tunnel->stat;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700685 struct iphdr *old_iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700686 struct iphdr *tiph;
687 u8 tos;
Al Virod5a0a1e2006-11-08 00:23:14 -0800688 __be16 df;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700689 struct rtable *rt; /* Route to the other host */
690 struct net_device *tdev; /* Device to other host */
691 struct iphdr *iph; /* Our new IP header */
Chuck Leverc2636b42007-10-23 21:07:32 -0700692 unsigned int max_headroom; /* The extra header space needed */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700693 int gre_hlen;
Al Virod5a0a1e2006-11-08 00:23:14 -0800694 __be32 dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700695 int mtu;
696
697 if (tunnel->recursion++) {
698 tunnel->stat.collisions++;
699 goto tx_error;
700 }
701
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -0700702 if (dev->header_ops) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700703 gre_hlen = 0;
704 tiph = (struct iphdr*)skb->data;
705 } else {
706 gre_hlen = tunnel->hlen;
707 tiph = &tunnel->parms.iph;
708 }
709
710 if ((dst = tiph->daddr) == 0) {
711 /* NBMA tunnel */
712
713 if (skb->dst == NULL) {
714 tunnel->stat.tx_fifo_errors++;
715 goto tx_error;
716 }
717
718 if (skb->protocol == htons(ETH_P_IP)) {
Eric Dumazetee6b9672008-03-05 18:30:47 -0800719 rt = skb->rtable;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700720 if ((dst = rt->rt_gateway) == 0)
721 goto tx_error_icmp;
722 }
723#ifdef CONFIG_IPV6
724 else if (skb->protocol == htons(ETH_P_IPV6)) {
725 struct in6_addr *addr6;
726 int addr_type;
727 struct neighbour *neigh = skb->dst->neighbour;
728
729 if (neigh == NULL)
730 goto tx_error;
731
732 addr6 = (struct in6_addr*)&neigh->primary_key;
733 addr_type = ipv6_addr_type(addr6);
734
735 if (addr_type == IPV6_ADDR_ANY) {
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -0700736 addr6 = &ipv6_hdr(skb)->daddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700737 addr_type = ipv6_addr_type(addr6);
738 }
739
740 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
741 goto tx_error_icmp;
742
743 dst = addr6->s6_addr32[3];
744 }
745#endif
746 else
747 goto tx_error;
748 }
749
750 tos = tiph->tos;
751 if (tos&1) {
752 if (skb->protocol == htons(ETH_P_IP))
753 tos = old_iph->tos;
754 tos &= ~1;
755 }
756
757 {
758 struct flowi fl = { .oif = tunnel->parms.link,
759 .nl_u = { .ip4_u =
760 { .daddr = dst,
761 .saddr = tiph->saddr,
762 .tos = RT_TOS(tos) } },
763 .proto = IPPROTO_GRE };
Denis V. Lunevf2063512008-01-22 22:07:34 -0800764 if (ip_route_output_key(&init_net, &rt, &fl)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700765 tunnel->stat.tx_carrier_errors++;
766 goto tx_error;
767 }
768 }
769 tdev = rt->u.dst.dev;
770
771 if (tdev == dev) {
772 ip_rt_put(rt);
773 tunnel->stat.collisions++;
774 goto tx_error;
775 }
776
777 df = tiph->frag_off;
778 if (df)
779 mtu = dst_mtu(&rt->u.dst) - tunnel->hlen;
780 else
781 mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
782
783 if (skb->dst)
784 skb->dst->ops->update_pmtu(skb->dst, mtu);
785
786 if (skb->protocol == htons(ETH_P_IP)) {
787 df |= (old_iph->frag_off&htons(IP_DF));
788
789 if ((old_iph->frag_off&htons(IP_DF)) &&
790 mtu < ntohs(old_iph->tot_len)) {
791 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
792 ip_rt_put(rt);
793 goto tx_error;
794 }
795 }
796#ifdef CONFIG_IPV6
797 else if (skb->protocol == htons(ETH_P_IPV6)) {
798 struct rt6_info *rt6 = (struct rt6_info*)skb->dst;
799
800 if (rt6 && mtu < dst_mtu(skb->dst) && mtu >= IPV6_MIN_MTU) {
Joe Perchesf97c1e02007-12-16 13:45:43 -0800801 if ((tunnel->parms.iph.daddr &&
802 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
Linus Torvalds1da177e2005-04-16 15:20:36 -0700803 rt6->rt6i_dst.plen == 128) {
804 rt6->rt6i_flags |= RTF_MODIFIED;
805 skb->dst->metrics[RTAX_MTU-1] = mtu;
806 }
807 }
808
809 if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) {
810 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
811 ip_rt_put(rt);
812 goto tx_error;
813 }
814 }
815#endif
816
817 if (tunnel->err_count > 0) {
818 if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
819 tunnel->err_count--;
820
821 dst_link_failure(skb);
822 } else
823 tunnel->err_count = 0;
824 }
825
826 max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen;
827
Patrick McHardycfbba492007-07-09 15:33:40 -0700828 if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
829 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700830 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
831 if (!new_skb) {
832 ip_rt_put(rt);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900833 stats->tx_dropped++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700834 dev_kfree_skb(skb);
835 tunnel->recursion--;
836 return 0;
837 }
838 if (skb->sk)
839 skb_set_owner_w(new_skb, skb->sk);
840 dev_kfree_skb(skb);
841 skb = new_skb;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700842 old_iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700843 }
844
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -0700845 skb->transport_header = skb->network_header;
Arnaldo Carvalho de Meloe2d1bca2007-04-10 20:46:21 -0700846 skb_push(skb, gre_hlen);
847 skb_reset_network_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700848 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
Patrick McHardy48d5cad2006-02-15 15:10:22 -0800849 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
850 IPSKB_REROUTED);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700851 dst_release(skb->dst);
852 skb->dst = &rt->u.dst;
853
854 /*
855 * Push down and install the IPIP header.
856 */
857
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700858 iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700859 iph->version = 4;
860 iph->ihl = sizeof(struct iphdr) >> 2;
861 iph->frag_off = df;
862 iph->protocol = IPPROTO_GRE;
863 iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb);
864 iph->daddr = rt->rt_dst;
865 iph->saddr = rt->rt_src;
866
867 if ((iph->ttl = tiph->ttl) == 0) {
868 if (skb->protocol == htons(ETH_P_IP))
869 iph->ttl = old_iph->ttl;
870#ifdef CONFIG_IPV6
871 else if (skb->protocol == htons(ETH_P_IPV6))
872 iph->ttl = ((struct ipv6hdr*)old_iph)->hop_limit;
873#endif
874 else
875 iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT);
876 }
877
Al Virod5a0a1e2006-11-08 00:23:14 -0800878 ((__be16*)(iph+1))[0] = tunnel->parms.o_flags;
879 ((__be16*)(iph+1))[1] = skb->protocol;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700880
881 if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
Al Virod5a0a1e2006-11-08 00:23:14 -0800882 __be32 *ptr = (__be32*)(((u8*)iph) + tunnel->hlen - 4);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700883
884 if (tunnel->parms.o_flags&GRE_SEQ) {
885 ++tunnel->o_seqno;
886 *ptr = htonl(tunnel->o_seqno);
887 ptr--;
888 }
889 if (tunnel->parms.o_flags&GRE_KEY) {
890 *ptr = tunnel->parms.o_key;
891 ptr--;
892 }
893 if (tunnel->parms.o_flags&GRE_CSUM) {
894 *ptr = 0;
Al Viro5f92a732006-11-14 21:36:54 -0800895 *(__sum16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700896 }
897 }
898
899 nf_reset(skb);
900
901 IPTUNNEL_XMIT();
902 tunnel->recursion--;
903 return 0;
904
905tx_error_icmp:
906 dst_link_failure(skb);
907
908tx_error:
909 stats->tx_errors++;
910 dev_kfree_skb(skb);
911 tunnel->recursion--;
912 return 0;
913}
914
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800915static void ipgre_tunnel_bind_dev(struct net_device *dev)
916{
917 struct net_device *tdev = NULL;
918 struct ip_tunnel *tunnel;
919 struct iphdr *iph;
920 int hlen = LL_MAX_HEADER;
921 int mtu = ETH_DATA_LEN;
922 int addend = sizeof(struct iphdr) + 4;
923
924 tunnel = netdev_priv(dev);
925 iph = &tunnel->parms.iph;
926
927 /* Guess output device to choose reasonable mtu and hard_header_len */
928
929 if (iph->daddr) {
930 struct flowi fl = { .oif = tunnel->parms.link,
931 .nl_u = { .ip4_u =
932 { .daddr = iph->daddr,
933 .saddr = iph->saddr,
934 .tos = RT_TOS(iph->tos) } },
935 .proto = IPPROTO_GRE };
936 struct rtable *rt;
Denis V. Lunevf2063512008-01-22 22:07:34 -0800937 if (!ip_route_output_key(&init_net, &rt, &fl)) {
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800938 tdev = rt->u.dst.dev;
939 ip_rt_put(rt);
940 }
941 dev->flags |= IFF_POINTOPOINT;
942 }
943
944 if (!tdev && tunnel->parms.link)
945 tdev = __dev_get_by_index(&init_net, tunnel->parms.link);
946
947 if (tdev) {
948 hlen = tdev->hard_header_len;
949 mtu = tdev->mtu;
950 }
951 dev->iflink = tunnel->parms.link;
952
953 /* Precalculate GRE options length */
954 if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
955 if (tunnel->parms.o_flags&GRE_CSUM)
956 addend += 4;
957 if (tunnel->parms.o_flags&GRE_KEY)
958 addend += 4;
959 if (tunnel->parms.o_flags&GRE_SEQ)
960 addend += 4;
961 }
962 dev->hard_header_len = hlen + addend;
963 dev->mtu = mtu - addend;
964 tunnel->hlen = addend;
965
966}
967
Linus Torvalds1da177e2005-04-16 15:20:36 -0700968static int
969ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
970{
971 int err = 0;
972 struct ip_tunnel_parm p;
973 struct ip_tunnel *t;
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700974 struct net *net = dev_net(dev);
975 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700976
977 switch (cmd) {
978 case SIOCGETTUNNEL:
979 t = NULL;
Pavel Emelyanov7daa0002008-04-16 01:10:05 -0700980 if (dev == ign->fb_tunnel_dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700981 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
982 err = -EFAULT;
983 break;
984 }
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700985 t = ipgre_tunnel_locate(net, &p, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700986 }
987 if (t == NULL)
Patrick McHardy2941a482006-01-08 22:05:26 -0800988 t = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700989 memcpy(&p, &t->parms, sizeof(p));
990 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
991 err = -EFAULT;
992 break;
993
994 case SIOCADDTUNNEL:
995 case SIOCCHGTUNNEL:
996 err = -EPERM;
997 if (!capable(CAP_NET_ADMIN))
998 goto done;
999
1000 err = -EFAULT;
1001 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1002 goto done;
1003
1004 err = -EINVAL;
1005 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
1006 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
1007 ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
1008 goto done;
1009 if (p.iph.ttl)
1010 p.iph.frag_off |= htons(IP_DF);
1011
1012 if (!(p.i_flags&GRE_KEY))
1013 p.i_key = 0;
1014 if (!(p.o_flags&GRE_KEY))
1015 p.o_key = 0;
1016
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -07001017 t = ipgre_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001018
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001019 if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001020 if (t != NULL) {
1021 if (t->dev != dev) {
1022 err = -EEXIST;
1023 break;
1024 }
1025 } else {
1026 unsigned nflags=0;
1027
Patrick McHardy2941a482006-01-08 22:05:26 -08001028 t = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001029
Joe Perchesf97c1e02007-12-16 13:45:43 -08001030 if (ipv4_is_multicast(p.iph.daddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001031 nflags = IFF_BROADCAST;
1032 else if (p.iph.daddr)
1033 nflags = IFF_POINTOPOINT;
1034
1035 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
1036 err = -EINVAL;
1037 break;
1038 }
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -07001039 ipgre_tunnel_unlink(ign, t);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001040 t->parms.iph.saddr = p.iph.saddr;
1041 t->parms.iph.daddr = p.iph.daddr;
1042 t->parms.i_key = p.i_key;
1043 t->parms.o_key = p.o_key;
1044 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1045 memcpy(dev->broadcast, &p.iph.daddr, 4);
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -07001046 ipgre_tunnel_link(ign, t);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001047 netdev_state_change(dev);
1048 }
1049 }
1050
1051 if (t) {
1052 err = 0;
1053 if (cmd == SIOCCHGTUNNEL) {
1054 t->parms.iph.ttl = p.iph.ttl;
1055 t->parms.iph.tos = p.iph.tos;
1056 t->parms.iph.frag_off = p.iph.frag_off;
Michal Schmidtee34c1e2007-12-13 09:46:32 -08001057 if (t->parms.link != p.link) {
1058 t->parms.link = p.link;
1059 ipgre_tunnel_bind_dev(dev);
1060 netdev_state_change(dev);
1061 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001062 }
1063 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
1064 err = -EFAULT;
1065 } else
1066 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
1067 break;
1068
1069 case SIOCDELTUNNEL:
1070 err = -EPERM;
1071 if (!capable(CAP_NET_ADMIN))
1072 goto done;
1073
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001074 if (dev == ign->fb_tunnel_dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001075 err = -EFAULT;
1076 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1077 goto done;
1078 err = -ENOENT;
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -07001079 if ((t = ipgre_tunnel_locate(net, &p, 0)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001080 goto done;
1081 err = -EPERM;
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001082 if (t == netdev_priv(ign->fb_tunnel_dev))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001083 goto done;
1084 dev = t->dev;
1085 }
Stephen Hemminger22f8cde2007-02-07 00:09:58 -08001086 unregister_netdevice(dev);
1087 err = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001088 break;
1089
1090 default:
1091 err = -EINVAL;
1092 }
1093
1094done:
1095 return err;
1096}
1097
1098static struct net_device_stats *ipgre_tunnel_get_stats(struct net_device *dev)
1099{
Patrick McHardy2941a482006-01-08 22:05:26 -08001100 return &(((struct ip_tunnel*)netdev_priv(dev))->stat);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001101}
1102
1103static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1104{
Patrick McHardy2941a482006-01-08 22:05:26 -08001105 struct ip_tunnel *tunnel = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001106 if (new_mtu < 68 || new_mtu > 0xFFF8 - tunnel->hlen)
1107 return -EINVAL;
1108 dev->mtu = new_mtu;
1109 return 0;
1110}
1111
Linus Torvalds1da177e2005-04-16 15:20:36 -07001112/* Nice toy. Unfortunately, useless in real life :-)
1113 It allows to construct virtual multiprotocol broadcast "LAN"
1114 over the Internet, provided multicast routing is tuned.
1115
1116
1117 I have no idea was this bicycle invented before me,
1118 so that I had to set ARPHRD_IPGRE to a random value.
1119 I have an impression, that Cisco could make something similar,
1120 but this feature is apparently missing in IOS<=11.2(8).
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001121
Linus Torvalds1da177e2005-04-16 15:20:36 -07001122 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
1123 with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
1124
1125 ping -t 255 224.66.66.66
1126
1127 If nobody answers, mbone does not work.
1128
1129 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
1130 ip addr add 10.66.66.<somewhat>/24 dev Universe
1131 ifconfig Universe up
1132 ifconfig Universe add fe80::<Your_real_addr>/10
1133 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
1134 ftp 10.66.66.66
1135 ...
1136 ftp fec0:6666:6666::193.233.7.65
1137 ...
1138
1139 */
1140
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -07001141static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
1142 unsigned short type,
1143 const void *daddr, const void *saddr, unsigned len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001144{
Patrick McHardy2941a482006-01-08 22:05:26 -08001145 struct ip_tunnel *t = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001146 struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
Al Virod5a0a1e2006-11-08 00:23:14 -08001147 __be16 *p = (__be16*)(iph+1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001148
1149 memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
1150 p[0] = t->parms.o_flags;
1151 p[1] = htons(type);
1152
1153 /*
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001154 * Set the source hardware address.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001155 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001156
Linus Torvalds1da177e2005-04-16 15:20:36 -07001157 if (saddr)
1158 memcpy(&iph->saddr, saddr, 4);
1159
1160 if (daddr) {
1161 memcpy(&iph->daddr, daddr, 4);
1162 return t->hlen;
1163 }
Joe Perchesf97c1e02007-12-16 13:45:43 -08001164 if (iph->daddr && !ipv4_is_multicast(iph->daddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001165 return t->hlen;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001166
Linus Torvalds1da177e2005-04-16 15:20:36 -07001167 return -t->hlen;
1168}
1169
Timo Teras6a5f44d2007-10-23 20:31:53 -07001170static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
1171{
1172 struct iphdr *iph = (struct iphdr*) skb_mac_header(skb);
1173 memcpy(haddr, &iph->saddr, 4);
1174 return 4;
1175}
1176
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -07001177static const struct header_ops ipgre_header_ops = {
1178 .create = ipgre_header,
Timo Teras6a5f44d2007-10-23 20:31:53 -07001179 .parse = ipgre_header_parse,
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -07001180};
1181
Timo Teras6a5f44d2007-10-23 20:31:53 -07001182#ifdef CONFIG_NET_IPGRE_BROADCAST
Linus Torvalds1da177e2005-04-16 15:20:36 -07001183static int ipgre_open(struct net_device *dev)
1184{
Patrick McHardy2941a482006-01-08 22:05:26 -08001185 struct ip_tunnel *t = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001186
Joe Perchesf97c1e02007-12-16 13:45:43 -08001187 if (ipv4_is_multicast(t->parms.iph.daddr)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001188 struct flowi fl = { .oif = t->parms.link,
1189 .nl_u = { .ip4_u =
1190 { .daddr = t->parms.iph.daddr,
1191 .saddr = t->parms.iph.saddr,
1192 .tos = RT_TOS(t->parms.iph.tos) } },
1193 .proto = IPPROTO_GRE };
1194 struct rtable *rt;
Denis V. Lunevf2063512008-01-22 22:07:34 -08001195 if (ip_route_output_key(&init_net, &rt, &fl))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001196 return -EADDRNOTAVAIL;
1197 dev = rt->u.dst.dev;
1198 ip_rt_put(rt);
Herbert Xue5ed6392005-10-03 14:35:55 -07001199 if (__in_dev_get_rtnl(dev) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001200 return -EADDRNOTAVAIL;
1201 t->mlink = dev->ifindex;
Herbert Xue5ed6392005-10-03 14:35:55 -07001202 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001203 }
1204 return 0;
1205}
1206
1207static int ipgre_close(struct net_device *dev)
1208{
Patrick McHardy2941a482006-01-08 22:05:26 -08001209 struct ip_tunnel *t = netdev_priv(dev);
Joe Perchesf97c1e02007-12-16 13:45:43 -08001210 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
Denis V. Lunev7fee0ca2008-01-21 17:32:38 -08001211 struct in_device *in_dev;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001212 in_dev = inetdev_by_index(dev_net(dev), t->mlink);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001213 if (in_dev) {
1214 ip_mc_dec_group(in_dev, t->parms.iph.daddr);
1215 in_dev_put(in_dev);
1216 }
1217 }
1218 return 0;
1219}
1220
1221#endif
1222
1223static void ipgre_tunnel_setup(struct net_device *dev)
1224{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001225 dev->uninit = ipgre_tunnel_uninit;
1226 dev->destructor = free_netdev;
1227 dev->hard_start_xmit = ipgre_tunnel_xmit;
1228 dev->get_stats = ipgre_tunnel_get_stats;
1229 dev->do_ioctl = ipgre_tunnel_ioctl;
1230 dev->change_mtu = ipgre_tunnel_change_mtu;
1231
1232 dev->type = ARPHRD_IPGRE;
1233 dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
Kris Katterjohn46f25df2006-01-05 16:35:42 -08001234 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001235 dev->flags = IFF_NOARP;
1236 dev->iflink = 0;
1237 dev->addr_len = 4;
1238}
1239
1240static int ipgre_tunnel_init(struct net_device *dev)
1241{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001242 struct ip_tunnel *tunnel;
1243 struct iphdr *iph;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001244
Patrick McHardy2941a482006-01-08 22:05:26 -08001245 tunnel = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001246 iph = &tunnel->parms.iph;
1247
1248 tunnel->dev = dev;
1249 strcpy(tunnel->parms.name, dev->name);
1250
1251 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
1252 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
1253
Michal Schmidtee34c1e2007-12-13 09:46:32 -08001254 ipgre_tunnel_bind_dev(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001255
1256 if (iph->daddr) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001257#ifdef CONFIG_NET_IPGRE_BROADCAST
Joe Perchesf97c1e02007-12-16 13:45:43 -08001258 if (ipv4_is_multicast(iph->daddr)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001259 if (!iph->saddr)
1260 return -EINVAL;
1261 dev->flags = IFF_BROADCAST;
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -07001262 dev->header_ops = &ipgre_header_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001263 dev->open = ipgre_open;
1264 dev->stop = ipgre_close;
1265 }
1266#endif
Michal Schmidtee34c1e2007-12-13 09:46:32 -08001267 } else
Timo Teras6a5f44d2007-10-23 20:31:53 -07001268 dev->header_ops = &ipgre_header_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001269
Linus Torvalds1da177e2005-04-16 15:20:36 -07001270 return 0;
1271}
1272
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001273static int ipgre_fb_tunnel_init(struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001274{
Patrick McHardy2941a482006-01-08 22:05:26 -08001275 struct ip_tunnel *tunnel = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001276 struct iphdr *iph = &tunnel->parms.iph;
1277
1278 tunnel->dev = dev;
1279 strcpy(tunnel->parms.name, dev->name);
1280
1281 iph->version = 4;
1282 iph->protocol = IPPROTO_GRE;
1283 iph->ihl = 5;
1284 tunnel->hlen = sizeof(struct iphdr) + 4;
1285
1286 dev_hold(dev);
1287 tunnels_wc[0] = tunnel;
1288 return 0;
1289}
1290
1291
1292static struct net_protocol ipgre_protocol = {
1293 .handler = ipgre_rcv,
1294 .err_handler = ipgre_err,
1295};
1296
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001297static int ipgre_init_net(struct net *net)
1298{
1299 int err;
1300 struct ipgre_net *ign;
1301
1302 err = -ENOMEM;
1303 ign = kmalloc(sizeof(struct ipgre_net), GFP_KERNEL);
1304 if (ign == NULL)
1305 goto err_alloc;
1306
1307 err = net_assign_generic(net, ipgre_net_id, ign);
1308 if (err < 0)
1309 goto err_assign;
1310
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001311 ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0",
1312 ipgre_tunnel_setup);
1313 if (!ign->fb_tunnel_dev) {
1314 err = -ENOMEM;
1315 goto err_alloc_dev;
1316 }
1317
1318 ign->fb_tunnel_dev->init = ipgre_fb_tunnel_init;
1319 dev_net_set(ign->fb_tunnel_dev, net);
1320
1321 if ((err = register_netdev(ign->fb_tunnel_dev)))
1322 goto err_reg_dev;
1323
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001324 return 0;
1325
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001326err_reg_dev:
1327 free_netdev(ign->fb_tunnel_dev);
1328err_alloc_dev:
1329 /* nothing */
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001330err_assign:
1331 kfree(ign);
1332err_alloc:
1333 return err;
1334}
1335
1336static void ipgre_exit_net(struct net *net)
1337{
1338 struct ipgre_net *ign;
1339
1340 ign = net_generic(net, ipgre_net_id);
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001341 rtnl_lock();
1342 if (net != &init_net)
1343 unregister_netdevice(ign->fb_tunnel_dev);
1344 rtnl_unlock();
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001345 kfree(ign);
1346}
1347
1348static struct pernet_operations ipgre_net_ops = {
1349 .init = ipgre_init_net,
1350 .exit = ipgre_exit_net,
1351};
Linus Torvalds1da177e2005-04-16 15:20:36 -07001352
1353/*
1354 * And now the modules code and kernel interface.
1355 */
1356
1357static int __init ipgre_init(void)
1358{
1359 int err;
1360
1361 printk(KERN_INFO "GRE over IPv4 tunneling driver\n");
1362
1363 if (inet_add_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) {
1364 printk(KERN_INFO "ipgre init: can't add protocol\n");
1365 return -EAGAIN;
1366 }
1367
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001368 err = register_pernet_gen_device(&ipgre_net_id, &ipgre_net_ops);
1369 if (err < 0)
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001370 inet_del_protocol(&ipgre_protocol, IPPROTO_GRE);
1371
Linus Torvalds1da177e2005-04-16 15:20:36 -07001372 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001373}
1374
Alexey Kuznetsovdb445752005-07-30 17:46:44 -07001375static void __exit ipgre_destroy_tunnels(void)
1376{
1377 int prio;
1378
1379 for (prio = 0; prio < 4; prio++) {
1380 int h;
1381 for (h = 0; h < HASH_SIZE; h++) {
1382 struct ip_tunnel *t;
1383 while ((t = tunnels[prio][h]) != NULL)
1384 unregister_netdevice(t->dev);
1385 }
1386 }
1387}
1388
1389static void __exit ipgre_fini(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001390{
1391 if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0)
1392 printk(KERN_INFO "ipgre close: can't remove protocol\n");
1393
Alexey Kuznetsovdb445752005-07-30 17:46:44 -07001394 rtnl_lock();
1395 ipgre_destroy_tunnels();
1396 rtnl_unlock();
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001397
1398 unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001399}
1400
1401module_init(ipgre_init);
1402module_exit(ipgre_fini);
1403MODULE_LICENSE("GPL");