blob: 906cb1ada4c35db7ef7281c7dd69907854b68169 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002 * Linux NET3: GRE over IP protocol decoder.
Linus Torvalds1da177e2005-04-16 15:20:36 -07003 *
4 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
Randy Dunlap4fc268d2006-01-11 12:17:47 -080013#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070014#include <linux/module.h>
15#include <linux/types.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070016#include <linux/kernel.h>
17#include <asm/uaccess.h>
18#include <linux/skbuff.h>
19#include <linux/netdevice.h>
20#include <linux/in.h>
21#include <linux/tcp.h>
22#include <linux/udp.h>
23#include <linux/if_arp.h>
24#include <linux/mroute.h>
25#include <linux/init.h>
26#include <linux/in6.h>
27#include <linux/inetdevice.h>
28#include <linux/igmp.h>
29#include <linux/netfilter_ipv4.h>
Kris Katterjohn46f25df2006-01-05 16:35:42 -080030#include <linux/if_ether.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070031
32#include <net/sock.h>
33#include <net/ip.h>
34#include <net/icmp.h>
35#include <net/protocol.h>
36#include <net/ipip.h>
37#include <net/arp.h>
38#include <net/checksum.h>
39#include <net/dsfield.h>
40#include <net/inet_ecn.h>
41#include <net/xfrm.h>
42
43#ifdef CONFIG_IPV6
44#include <net/ipv6.h>
45#include <net/ip6_fib.h>
46#include <net/ip6_route.h>
47#endif
48
49/*
50 Problems & solutions
51 --------------------
52
53 1. The most important issue is detecting local dead loops.
54 They would cause complete host lockup in transmit, which
55 would be "resolved" by stack overflow or, if queueing is enabled,
56 with infinite looping in net_bh.
57
58 We cannot track such dead loops during route installation,
59 it is infeasible task. The most general solutions would be
60 to keep skb->encapsulation counter (sort of local ttl),
61 and silently drop packet when it expires. It is the best
62 solution, but it supposes maintaing new variable in ALL
63 skb, even if no tunneling is used.
64
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090065 Current solution: t->recursion lock breaks dead loops. It looks
Linus Torvalds1da177e2005-04-16 15:20:36 -070066 like dev->tbusy flag, but I preferred new variable, because
67 the semantics is different. One day, when hard_start_xmit
68 will be multithreaded we will have to use skb->encapsulation.
69
70
71
72 2. Networking dead loops would not kill routers, but would really
73 kill network. IP hop limit plays role of "t->recursion" in this case,
74 if we copy it from packet being encapsulated to upper header.
75 It is very good solution, but it introduces two problems:
76
77 - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
78 do not work over tunnels.
79 - traceroute does not work. I planned to relay ICMP from tunnel,
80 so that this problem would be solved and traceroute output
81 would even more informative. This idea appeared to be wrong:
82 only Linux complies to rfc1812 now (yes, guys, Linux is the only
83 true router now :-)), all routers (at least, in neighbourhood of mine)
84 return only 8 bytes of payload. It is the end.
85
86 Hence, if we want that OSPF worked or traceroute said something reasonable,
87 we should search for another solution.
88
89 One of them is to parse packet trying to detect inner encapsulation
90 made by our node. It is difficult or even impossible, especially,
91 taking into account fragmentation. TO be short, tt is not solution at all.
92
93 Current solution: The solution was UNEXPECTEDLY SIMPLE.
94 We force DF flag on tunnels with preconfigured hop limit,
95 that is ALL. :-) Well, it does not remove the problem completely,
96 but exponential growth of network traffic is changed to linear
97 (branches, that exceed pmtu are pruned) and tunnel mtu
98 fastly degrades to value <68, where looping stops.
99 Yes, it is not good if there exists a router in the loop,
100 which does not force DF, even when encapsulating packets have DF set.
101 But it is not our problem! Nobody could accuse us, we made
102 all that we could make. Even if it is your gated who injected
103 fatal route to network, even if it were you who configured
104 fatal static route: you are innocent. :-)
105
106
107
108 3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
109 practically identical code. It would be good to glue them
110 together, but it is not very evident, how to make them modular.
111 sit is integral part of IPv6, ipip and gre are naturally modular.
112 We could extract common parts (hash table, ioctl etc)
113 to a separate module (ip_tunnel.c).
114
115 Alexey Kuznetsov.
116 */
117
118static int ipgre_tunnel_init(struct net_device *dev);
119static void ipgre_tunnel_setup(struct net_device *dev);
120
121/* Fallback tunnel: no source, no destination, no key, no options */
122
123static int ipgre_fb_tunnel_init(struct net_device *dev);
124
125static struct net_device *ipgre_fb_tunnel_dev;
126
127/* Tunnel hash table */
128
129/*
130 4 hash tables:
131
132 3: (remote,local)
133 2: (remote,*)
134 1: (*,local)
135 0: (*,*)
136
137 We require exact key match i.e. if a key is present in packet
138 it will match only tunnel with the same key; if it is not present,
139 it will match only keyless tunnel.
140
141 All keysless packets, if not matched configured keyless tunnels
142 will match fallback tunnel.
143 */
144
145#define HASH_SIZE 16
Al Virod5a0a1e2006-11-08 00:23:14 -0800146#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700147
148static struct ip_tunnel *tunnels[4][HASH_SIZE];
149
150#define tunnels_r_l (tunnels[3])
151#define tunnels_r (tunnels[2])
152#define tunnels_l (tunnels[1])
153#define tunnels_wc (tunnels[0])
154
155static DEFINE_RWLOCK(ipgre_lock);
156
157/* Given src, dst and key, find appropriate for input tunnel. */
158
Al Virod5a0a1e2006-11-08 00:23:14 -0800159static struct ip_tunnel * ipgre_tunnel_lookup(__be32 remote, __be32 local, __be32 key)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700160{
161 unsigned h0 = HASH(remote);
162 unsigned h1 = HASH(key);
163 struct ip_tunnel *t;
164
165 for (t = tunnels_r_l[h0^h1]; t; t = t->next) {
166 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) {
167 if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
168 return t;
169 }
170 }
171 for (t = tunnels_r[h0^h1]; t; t = t->next) {
172 if (remote == t->parms.iph.daddr) {
173 if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
174 return t;
175 }
176 }
177 for (t = tunnels_l[h1]; t; t = t->next) {
178 if (local == t->parms.iph.saddr ||
Joe Perchesf97c1e02007-12-16 13:45:43 -0800179 (local == t->parms.iph.daddr &&
180 ipv4_is_multicast(local))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700181 if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
182 return t;
183 }
184 }
185 for (t = tunnels_wc[h1]; t; t = t->next) {
186 if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
187 return t;
188 }
189
190 if (ipgre_fb_tunnel_dev->flags&IFF_UP)
Patrick McHardy2941a482006-01-08 22:05:26 -0800191 return netdev_priv(ipgre_fb_tunnel_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700192 return NULL;
193}
194
YOSHIFUJI Hideaki5056a1e2007-04-24 20:44:48 +0900195static struct ip_tunnel **__ipgre_bucket(struct ip_tunnel_parm *parms)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700196{
YOSHIFUJI Hideaki5056a1e2007-04-24 20:44:48 +0900197 __be32 remote = parms->iph.daddr;
198 __be32 local = parms->iph.saddr;
199 __be32 key = parms->i_key;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700200 unsigned h = HASH(key);
201 int prio = 0;
202
203 if (local)
204 prio |= 1;
Joe Perchesf97c1e02007-12-16 13:45:43 -0800205 if (remote && !ipv4_is_multicast(remote)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700206 prio |= 2;
207 h ^= HASH(remote);
208 }
209
210 return &tunnels[prio][h];
211}
212
YOSHIFUJI Hideaki5056a1e2007-04-24 20:44:48 +0900213static inline struct ip_tunnel **ipgre_bucket(struct ip_tunnel *t)
214{
215 return __ipgre_bucket(&t->parms);
216}
217
Linus Torvalds1da177e2005-04-16 15:20:36 -0700218static void ipgre_tunnel_link(struct ip_tunnel *t)
219{
220 struct ip_tunnel **tp = ipgre_bucket(t);
221
222 t->next = *tp;
223 write_lock_bh(&ipgre_lock);
224 *tp = t;
225 write_unlock_bh(&ipgre_lock);
226}
227
228static void ipgre_tunnel_unlink(struct ip_tunnel *t)
229{
230 struct ip_tunnel **tp;
231
232 for (tp = ipgre_bucket(t); *tp; tp = &(*tp)->next) {
233 if (t == *tp) {
234 write_lock_bh(&ipgre_lock);
235 *tp = t->next;
236 write_unlock_bh(&ipgre_lock);
237 break;
238 }
239 }
240}
241
242static struct ip_tunnel * ipgre_tunnel_locate(struct ip_tunnel_parm *parms, int create)
243{
Al Virod5a0a1e2006-11-08 00:23:14 -0800244 __be32 remote = parms->iph.daddr;
245 __be32 local = parms->iph.saddr;
246 __be32 key = parms->i_key;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700247 struct ip_tunnel *t, **tp, *nt;
248 struct net_device *dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700249 char name[IFNAMSIZ];
250
YOSHIFUJI Hideaki5056a1e2007-04-24 20:44:48 +0900251 for (tp = __ipgre_bucket(parms); (t = *tp) != NULL; tp = &t->next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700252 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) {
253 if (key == t->parms.i_key)
254 return t;
255 }
256 }
257 if (!create)
258 return NULL;
259
260 if (parms->name[0])
261 strlcpy(name, parms->name, IFNAMSIZ);
Pavel Emelyanov34cc7ba2008-02-23 20:19:20 -0800262 else
263 sprintf(name, "gre%%d");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700264
265 dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup);
266 if (!dev)
267 return NULL;
268
269 dev->init = ipgre_tunnel_init;
Patrick McHardy2941a482006-01-08 22:05:26 -0800270 nt = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700271 nt->parms = *parms;
272
273 if (register_netdevice(dev) < 0) {
274 free_netdev(dev);
275 goto failed;
276 }
277
Linus Torvalds1da177e2005-04-16 15:20:36 -0700278 dev_hold(dev);
279 ipgre_tunnel_link(nt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700280 return nt;
281
282failed:
283 return NULL;
284}
285
286static void ipgre_tunnel_uninit(struct net_device *dev)
287{
Patrick McHardy2941a482006-01-08 22:05:26 -0800288 ipgre_tunnel_unlink(netdev_priv(dev));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700289 dev_put(dev);
290}
291
292
293static void ipgre_err(struct sk_buff *skb, u32 info)
294{
295#ifndef I_WISH_WORLD_WERE_PERFECT
296
297/* It is not :-( All the routers (except for Linux) return only
298 8 bytes of packet payload. It means, that precise relaying of
299 ICMP in the real Internet is absolutely infeasible.
300
301 Moreover, Cisco "wise men" put GRE key to the third word
302 in GRE header. It makes impossible maintaining even soft state for keyed
303 GRE tunnels with enabled checksum. Tell them "thank you".
304
305 Well, I wonder, rfc1812 was written by Cisco employee,
306 what the hell these idiots break standrads established
307 by themself???
308 */
309
310 struct iphdr *iph = (struct iphdr*)skb->data;
Al Virod5a0a1e2006-11-08 00:23:14 -0800311 __be16 *p = (__be16*)(skb->data+(iph->ihl<<2));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700312 int grehlen = (iph->ihl<<2) + 4;
Arnaldo Carvalho de Melo88c76642007-03-13 14:43:18 -0300313 const int type = icmp_hdr(skb)->type;
314 const int code = icmp_hdr(skb)->code;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700315 struct ip_tunnel *t;
Al Virod5a0a1e2006-11-08 00:23:14 -0800316 __be16 flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700317
318 flags = p[0];
319 if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
320 if (flags&(GRE_VERSION|GRE_ROUTING))
321 return;
322 if (flags&GRE_KEY) {
323 grehlen += 4;
324 if (flags&GRE_CSUM)
325 grehlen += 4;
326 }
327 }
328
329 /* If only 8 bytes returned, keyed message will be dropped here */
330 if (skb_headlen(skb) < grehlen)
331 return;
332
333 switch (type) {
334 default:
335 case ICMP_PARAMETERPROB:
336 return;
337
338 case ICMP_DEST_UNREACH:
339 switch (code) {
340 case ICMP_SR_FAILED:
341 case ICMP_PORT_UNREACH:
342 /* Impossible event. */
343 return;
344 case ICMP_FRAG_NEEDED:
345 /* Soft state for pmtu is maintained by IP core. */
346 return;
347 default:
348 /* All others are translated to HOST_UNREACH.
349 rfc2003 contains "deep thoughts" about NET_UNREACH,
350 I believe they are just ether pollution. --ANK
351 */
352 break;
353 }
354 break;
355 case ICMP_TIME_EXCEEDED:
356 if (code != ICMP_EXC_TTL)
357 return;
358 break;
359 }
360
361 read_lock(&ipgre_lock);
Al Virod5a0a1e2006-11-08 00:23:14 -0800362 t = ipgre_tunnel_lookup(iph->daddr, iph->saddr, (flags&GRE_KEY) ? *(((__be32*)p) + (grehlen>>2) - 1) : 0);
Joe Perchesf97c1e02007-12-16 13:45:43 -0800363 if (t == NULL || t->parms.iph.daddr == 0 ||
364 ipv4_is_multicast(t->parms.iph.daddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700365 goto out;
366
367 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
368 goto out;
369
370 if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
371 t->err_count++;
372 else
373 t->err_count = 1;
374 t->err_time = jiffies;
375out:
376 read_unlock(&ipgre_lock);
377 return;
378#else
379 struct iphdr *iph = (struct iphdr*)dp;
380 struct iphdr *eiph;
Al Virod5a0a1e2006-11-08 00:23:14 -0800381 __be16 *p = (__be16*)(dp+(iph->ihl<<2));
Arnaldo Carvalho de Melo88c76642007-03-13 14:43:18 -0300382 const int type = icmp_hdr(skb)->type;
383 const int code = icmp_hdr(skb)->code;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700384 int rel_type = 0;
385 int rel_code = 0;
Al Viroc55e2f42006-09-19 13:23:19 -0700386 __be32 rel_info = 0;
387 __u32 n = 0;
Al Virod5a0a1e2006-11-08 00:23:14 -0800388 __be16 flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700389 int grehlen = (iph->ihl<<2) + 4;
390 struct sk_buff *skb2;
391 struct flowi fl;
392 struct rtable *rt;
393
394 if (p[1] != htons(ETH_P_IP))
395 return;
396
397 flags = p[0];
398 if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
399 if (flags&(GRE_VERSION|GRE_ROUTING))
400 return;
401 if (flags&GRE_CSUM)
402 grehlen += 4;
403 if (flags&GRE_KEY)
404 grehlen += 4;
405 if (flags&GRE_SEQ)
406 grehlen += 4;
407 }
408 if (len < grehlen + sizeof(struct iphdr))
409 return;
410 eiph = (struct iphdr*)(dp + grehlen);
411
412 switch (type) {
413 default:
414 return;
415 case ICMP_PARAMETERPROB:
Arnaldo Carvalho de Melo88c76642007-03-13 14:43:18 -0300416 n = ntohl(icmp_hdr(skb)->un.gateway) >> 24;
Al Viroc55e2f42006-09-19 13:23:19 -0700417 if (n < (iph->ihl<<2))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700418 return;
419
420 /* So... This guy found something strange INSIDE encapsulated
421 packet. Well, he is fool, but what can we do ?
422 */
423 rel_type = ICMP_PARAMETERPROB;
Al Viroc55e2f42006-09-19 13:23:19 -0700424 n -= grehlen;
425 rel_info = htonl(n << 24);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700426 break;
427
428 case ICMP_DEST_UNREACH:
429 switch (code) {
430 case ICMP_SR_FAILED:
431 case ICMP_PORT_UNREACH:
432 /* Impossible event. */
433 return;
434 case ICMP_FRAG_NEEDED:
435 /* And it is the only really necessary thing :-) */
Arnaldo Carvalho de Melo88c76642007-03-13 14:43:18 -0300436 n = ntohs(icmp_hdr(skb)->un.frag.mtu);
Al Viroc55e2f42006-09-19 13:23:19 -0700437 if (n < grehlen+68)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700438 return;
Al Viroc55e2f42006-09-19 13:23:19 -0700439 n -= grehlen;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700440 /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
Al Viroc55e2f42006-09-19 13:23:19 -0700441 if (n > ntohs(eiph->tot_len))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700442 return;
Al Viroc55e2f42006-09-19 13:23:19 -0700443 rel_info = htonl(n);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700444 break;
445 default:
446 /* All others are translated to HOST_UNREACH.
447 rfc2003 contains "deep thoughts" about NET_UNREACH,
448 I believe, it is just ether pollution. --ANK
449 */
450 rel_type = ICMP_DEST_UNREACH;
451 rel_code = ICMP_HOST_UNREACH;
452 break;
453 }
454 break;
455 case ICMP_TIME_EXCEEDED:
456 if (code != ICMP_EXC_TTL)
457 return;
458 break;
459 }
460
461 /* Prepare fake skb to feed it to icmp_send */
462 skb2 = skb_clone(skb, GFP_ATOMIC);
463 if (skb2 == NULL)
464 return;
465 dst_release(skb2->dst);
466 skb2->dst = NULL;
467 skb_pull(skb2, skb->data - (u8*)eiph);
Arnaldo Carvalho de Meloc1d2bbe2007-04-10 20:45:18 -0700468 skb_reset_network_header(skb2);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700469
470 /* Try to guess incoming interface */
471 memset(&fl, 0, sizeof(fl));
472 fl.fl4_dst = eiph->saddr;
473 fl.fl4_tos = RT_TOS(eiph->tos);
474 fl.proto = IPPROTO_GRE;
Denis V. Lunevf2063512008-01-22 22:07:34 -0800475 if (ip_route_output_key(&init_net, &rt, &fl)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700476 kfree_skb(skb2);
477 return;
478 }
479 skb2->dev = rt->u.dst.dev;
480
481 /* route "incoming" packet */
482 if (rt->rt_flags&RTCF_LOCAL) {
483 ip_rt_put(rt);
484 rt = NULL;
485 fl.fl4_dst = eiph->daddr;
486 fl.fl4_src = eiph->saddr;
487 fl.fl4_tos = eiph->tos;
Denis V. Lunevf2063512008-01-22 22:07:34 -0800488 if (ip_route_output_key(&init_net, &rt, &fl) ||
Linus Torvalds1da177e2005-04-16 15:20:36 -0700489 rt->u.dst.dev->type != ARPHRD_IPGRE) {
490 ip_rt_put(rt);
491 kfree_skb(skb2);
492 return;
493 }
494 } else {
495 ip_rt_put(rt);
496 if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) ||
497 skb2->dst->dev->type != ARPHRD_IPGRE) {
498 kfree_skb(skb2);
499 return;
500 }
501 }
502
503 /* change mtu on this route */
504 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
Al Viroc55e2f42006-09-19 13:23:19 -0700505 if (n > dst_mtu(skb2->dst)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700506 kfree_skb(skb2);
507 return;
508 }
Al Viroc55e2f42006-09-19 13:23:19 -0700509 skb2->dst->ops->update_pmtu(skb2->dst, n);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700510 } else if (type == ICMP_TIME_EXCEEDED) {
Patrick McHardy2941a482006-01-08 22:05:26 -0800511 struct ip_tunnel *t = netdev_priv(skb2->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700512 if (t->parms.iph.ttl) {
513 rel_type = ICMP_DEST_UNREACH;
514 rel_code = ICMP_HOST_UNREACH;
515 }
516 }
517
518 icmp_send(skb2, rel_type, rel_code, rel_info);
519 kfree_skb(skb2);
520#endif
521}
522
523static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
524{
525 if (INET_ECN_is_ce(iph->tos)) {
526 if (skb->protocol == htons(ETH_P_IP)) {
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700527 IP_ECN_set_ce(ip_hdr(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700528 } else if (skb->protocol == htons(ETH_P_IPV6)) {
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -0700529 IP6_ECN_set_ce(ipv6_hdr(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700530 }
531 }
532}
533
534static inline u8
535ipgre_ecn_encapsulate(u8 tos, struct iphdr *old_iph, struct sk_buff *skb)
536{
537 u8 inner = 0;
538 if (skb->protocol == htons(ETH_P_IP))
539 inner = old_iph->tos;
540 else if (skb->protocol == htons(ETH_P_IPV6))
541 inner = ipv6_get_dsfield((struct ipv6hdr *)old_iph);
542 return INET_ECN_encapsulate(tos, inner);
543}
544
545static int ipgre_rcv(struct sk_buff *skb)
546{
547 struct iphdr *iph;
548 u8 *h;
Al Virod5a0a1e2006-11-08 00:23:14 -0800549 __be16 flags;
Al Virod3bc23e2006-11-14 21:24:49 -0800550 __sum16 csum = 0;
Al Virod5a0a1e2006-11-08 00:23:14 -0800551 __be32 key = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700552 u32 seqno = 0;
553 struct ip_tunnel *tunnel;
554 int offset = 4;
555
556 if (!pskb_may_pull(skb, 16))
557 goto drop_nolock;
558
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700559 iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700560 h = skb->data;
Al Virod5a0a1e2006-11-08 00:23:14 -0800561 flags = *(__be16*)h;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700562
563 if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
564 /* - Version must be 0.
565 - We do not support routing headers.
566 */
567 if (flags&(GRE_VERSION|GRE_ROUTING))
568 goto drop_nolock;
569
570 if (flags&GRE_CSUM) {
Herbert Xufb286bb2005-11-10 13:01:24 -0800571 switch (skb->ip_summed) {
Patrick McHardy84fa7932006-08-29 16:44:56 -0700572 case CHECKSUM_COMPLETE:
Al Virod3bc23e2006-11-14 21:24:49 -0800573 csum = csum_fold(skb->csum);
Herbert Xufb286bb2005-11-10 13:01:24 -0800574 if (!csum)
575 break;
576 /* fall through */
577 case CHECKSUM_NONE:
578 skb->csum = 0;
579 csum = __skb_checksum_complete(skb);
Patrick McHardy84fa7932006-08-29 16:44:56 -0700580 skb->ip_summed = CHECKSUM_COMPLETE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700581 }
582 offset += 4;
583 }
584 if (flags&GRE_KEY) {
Al Virod5a0a1e2006-11-08 00:23:14 -0800585 key = *(__be32*)(h + offset);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700586 offset += 4;
587 }
588 if (flags&GRE_SEQ) {
Al Virod5a0a1e2006-11-08 00:23:14 -0800589 seqno = ntohl(*(__be32*)(h + offset));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700590 offset += 4;
591 }
592 }
593
594 read_lock(&ipgre_lock);
595 if ((tunnel = ipgre_tunnel_lookup(iph->saddr, iph->daddr, key)) != NULL) {
596 secpath_reset(skb);
597
Al Virod5a0a1e2006-11-08 00:23:14 -0800598 skb->protocol = *(__be16*)(h + 2);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700599 /* WCCP version 1 and 2 protocol decoding.
600 * - Change protocol to IP
601 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
602 */
603 if (flags == 0 &&
YOSHIFUJI Hideaki496c98d2006-10-10 19:41:21 -0700604 skb->protocol == htons(ETH_P_WCCP)) {
605 skb->protocol = htons(ETH_P_IP);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900606 if ((*(h + offset) & 0xF0) != 0x40)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700607 offset += 4;
608 }
609
Timo Teras1d069162007-12-20 00:10:33 -0800610 skb->mac_header = skb->network_header;
Arnaldo Carvalho de Melo4209fb62007-03-10 18:42:03 -0300611 __pskb_pull(skb, offset);
612 skb_reset_network_header(skb);
Arnaldo Carvalho de Melo9c702202007-04-25 18:04:18 -0700613 skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700614 skb->pkt_type = PACKET_HOST;
615#ifdef CONFIG_NET_IPGRE_BROADCAST
Joe Perchesf97c1e02007-12-16 13:45:43 -0800616 if (ipv4_is_multicast(iph->daddr)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700617 /* Looped back packet, drop it! */
618 if (((struct rtable*)skb->dst)->fl.iif == 0)
619 goto drop;
620 tunnel->stat.multicast++;
621 skb->pkt_type = PACKET_BROADCAST;
622 }
623#endif
624
625 if (((flags&GRE_CSUM) && csum) ||
626 (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
627 tunnel->stat.rx_crc_errors++;
628 tunnel->stat.rx_errors++;
629 goto drop;
630 }
631 if (tunnel->parms.i_flags&GRE_SEQ) {
632 if (!(flags&GRE_SEQ) ||
633 (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
634 tunnel->stat.rx_fifo_errors++;
635 tunnel->stat.rx_errors++;
636 goto drop;
637 }
638 tunnel->i_seqno = seqno + 1;
639 }
640 tunnel->stat.rx_packets++;
641 tunnel->stat.rx_bytes += skb->len;
642 skb->dev = tunnel->dev;
643 dst_release(skb->dst);
644 skb->dst = NULL;
645 nf_reset(skb);
646 ipgre_ecn_decapsulate(iph, skb);
647 netif_rx(skb);
648 read_unlock(&ipgre_lock);
649 return(0);
650 }
Herbert Xu45af08b2006-04-05 22:31:19 -0700651 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700652
653drop:
654 read_unlock(&ipgre_lock);
655drop_nolock:
656 kfree_skb(skb);
657 return(0);
658}
659
660static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
661{
Patrick McHardy2941a482006-01-08 22:05:26 -0800662 struct ip_tunnel *tunnel = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700663 struct net_device_stats *stats = &tunnel->stat;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700664 struct iphdr *old_iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700665 struct iphdr *tiph;
666 u8 tos;
Al Virod5a0a1e2006-11-08 00:23:14 -0800667 __be16 df;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700668 struct rtable *rt; /* Route to the other host */
669 struct net_device *tdev; /* Device to other host */
670 struct iphdr *iph; /* Our new IP header */
Chuck Leverc2636b42007-10-23 21:07:32 -0700671 unsigned int max_headroom; /* The extra header space needed */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700672 int gre_hlen;
Al Virod5a0a1e2006-11-08 00:23:14 -0800673 __be32 dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700674 int mtu;
675
676 if (tunnel->recursion++) {
677 tunnel->stat.collisions++;
678 goto tx_error;
679 }
680
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -0700681 if (dev->header_ops) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700682 gre_hlen = 0;
683 tiph = (struct iphdr*)skb->data;
684 } else {
685 gre_hlen = tunnel->hlen;
686 tiph = &tunnel->parms.iph;
687 }
688
689 if ((dst = tiph->daddr) == 0) {
690 /* NBMA tunnel */
691
692 if (skb->dst == NULL) {
693 tunnel->stat.tx_fifo_errors++;
694 goto tx_error;
695 }
696
697 if (skb->protocol == htons(ETH_P_IP)) {
698 rt = (struct rtable*)skb->dst;
699 if ((dst = rt->rt_gateway) == 0)
700 goto tx_error_icmp;
701 }
702#ifdef CONFIG_IPV6
703 else if (skb->protocol == htons(ETH_P_IPV6)) {
704 struct in6_addr *addr6;
705 int addr_type;
706 struct neighbour *neigh = skb->dst->neighbour;
707
708 if (neigh == NULL)
709 goto tx_error;
710
711 addr6 = (struct in6_addr*)&neigh->primary_key;
712 addr_type = ipv6_addr_type(addr6);
713
714 if (addr_type == IPV6_ADDR_ANY) {
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -0700715 addr6 = &ipv6_hdr(skb)->daddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700716 addr_type = ipv6_addr_type(addr6);
717 }
718
719 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
720 goto tx_error_icmp;
721
722 dst = addr6->s6_addr32[3];
723 }
724#endif
725 else
726 goto tx_error;
727 }
728
729 tos = tiph->tos;
730 if (tos&1) {
731 if (skb->protocol == htons(ETH_P_IP))
732 tos = old_iph->tos;
733 tos &= ~1;
734 }
735
736 {
737 struct flowi fl = { .oif = tunnel->parms.link,
738 .nl_u = { .ip4_u =
739 { .daddr = dst,
740 .saddr = tiph->saddr,
741 .tos = RT_TOS(tos) } },
742 .proto = IPPROTO_GRE };
Denis V. Lunevf2063512008-01-22 22:07:34 -0800743 if (ip_route_output_key(&init_net, &rt, &fl)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700744 tunnel->stat.tx_carrier_errors++;
745 goto tx_error;
746 }
747 }
748 tdev = rt->u.dst.dev;
749
750 if (tdev == dev) {
751 ip_rt_put(rt);
752 tunnel->stat.collisions++;
753 goto tx_error;
754 }
755
756 df = tiph->frag_off;
757 if (df)
758 mtu = dst_mtu(&rt->u.dst) - tunnel->hlen;
759 else
760 mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
761
762 if (skb->dst)
763 skb->dst->ops->update_pmtu(skb->dst, mtu);
764
765 if (skb->protocol == htons(ETH_P_IP)) {
766 df |= (old_iph->frag_off&htons(IP_DF));
767
768 if ((old_iph->frag_off&htons(IP_DF)) &&
769 mtu < ntohs(old_iph->tot_len)) {
770 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
771 ip_rt_put(rt);
772 goto tx_error;
773 }
774 }
775#ifdef CONFIG_IPV6
776 else if (skb->protocol == htons(ETH_P_IPV6)) {
777 struct rt6_info *rt6 = (struct rt6_info*)skb->dst;
778
779 if (rt6 && mtu < dst_mtu(skb->dst) && mtu >= IPV6_MIN_MTU) {
Joe Perchesf97c1e02007-12-16 13:45:43 -0800780 if ((tunnel->parms.iph.daddr &&
781 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
Linus Torvalds1da177e2005-04-16 15:20:36 -0700782 rt6->rt6i_dst.plen == 128) {
783 rt6->rt6i_flags |= RTF_MODIFIED;
784 skb->dst->metrics[RTAX_MTU-1] = mtu;
785 }
786 }
787
788 if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) {
789 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
790 ip_rt_put(rt);
791 goto tx_error;
792 }
793 }
794#endif
795
796 if (tunnel->err_count > 0) {
797 if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
798 tunnel->err_count--;
799
800 dst_link_failure(skb);
801 } else
802 tunnel->err_count = 0;
803 }
804
805 max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen;
806
Patrick McHardycfbba492007-07-09 15:33:40 -0700807 if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
808 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700809 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
810 if (!new_skb) {
811 ip_rt_put(rt);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900812 stats->tx_dropped++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700813 dev_kfree_skb(skb);
814 tunnel->recursion--;
815 return 0;
816 }
817 if (skb->sk)
818 skb_set_owner_w(new_skb, skb->sk);
819 dev_kfree_skb(skb);
820 skb = new_skb;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700821 old_iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700822 }
823
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -0700824 skb->transport_header = skb->network_header;
Arnaldo Carvalho de Meloe2d1bca2007-04-10 20:46:21 -0700825 skb_push(skb, gre_hlen);
826 skb_reset_network_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700827 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
Patrick McHardy48d5cad2006-02-15 15:10:22 -0800828 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
829 IPSKB_REROUTED);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700830 dst_release(skb->dst);
831 skb->dst = &rt->u.dst;
832
833 /*
834 * Push down and install the IPIP header.
835 */
836
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700837 iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700838 iph->version = 4;
839 iph->ihl = sizeof(struct iphdr) >> 2;
840 iph->frag_off = df;
841 iph->protocol = IPPROTO_GRE;
842 iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb);
843 iph->daddr = rt->rt_dst;
844 iph->saddr = rt->rt_src;
845
846 if ((iph->ttl = tiph->ttl) == 0) {
847 if (skb->protocol == htons(ETH_P_IP))
848 iph->ttl = old_iph->ttl;
849#ifdef CONFIG_IPV6
850 else if (skb->protocol == htons(ETH_P_IPV6))
851 iph->ttl = ((struct ipv6hdr*)old_iph)->hop_limit;
852#endif
853 else
854 iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT);
855 }
856
Al Virod5a0a1e2006-11-08 00:23:14 -0800857 ((__be16*)(iph+1))[0] = tunnel->parms.o_flags;
858 ((__be16*)(iph+1))[1] = skb->protocol;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700859
860 if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
Al Virod5a0a1e2006-11-08 00:23:14 -0800861 __be32 *ptr = (__be32*)(((u8*)iph) + tunnel->hlen - 4);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700862
863 if (tunnel->parms.o_flags&GRE_SEQ) {
864 ++tunnel->o_seqno;
865 *ptr = htonl(tunnel->o_seqno);
866 ptr--;
867 }
868 if (tunnel->parms.o_flags&GRE_KEY) {
869 *ptr = tunnel->parms.o_key;
870 ptr--;
871 }
872 if (tunnel->parms.o_flags&GRE_CSUM) {
873 *ptr = 0;
Al Viro5f92a732006-11-14 21:36:54 -0800874 *(__sum16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700875 }
876 }
877
878 nf_reset(skb);
879
880 IPTUNNEL_XMIT();
881 tunnel->recursion--;
882 return 0;
883
884tx_error_icmp:
885 dst_link_failure(skb);
886
887tx_error:
888 stats->tx_errors++;
889 dev_kfree_skb(skb);
890 tunnel->recursion--;
891 return 0;
892}
893
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800894static void ipgre_tunnel_bind_dev(struct net_device *dev)
895{
896 struct net_device *tdev = NULL;
897 struct ip_tunnel *tunnel;
898 struct iphdr *iph;
899 int hlen = LL_MAX_HEADER;
900 int mtu = ETH_DATA_LEN;
901 int addend = sizeof(struct iphdr) + 4;
902
903 tunnel = netdev_priv(dev);
904 iph = &tunnel->parms.iph;
905
906 /* Guess output device to choose reasonable mtu and hard_header_len */
907
908 if (iph->daddr) {
909 struct flowi fl = { .oif = tunnel->parms.link,
910 .nl_u = { .ip4_u =
911 { .daddr = iph->daddr,
912 .saddr = iph->saddr,
913 .tos = RT_TOS(iph->tos) } },
914 .proto = IPPROTO_GRE };
915 struct rtable *rt;
Denis V. Lunevf2063512008-01-22 22:07:34 -0800916 if (!ip_route_output_key(&init_net, &rt, &fl)) {
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800917 tdev = rt->u.dst.dev;
918 ip_rt_put(rt);
919 }
920 dev->flags |= IFF_POINTOPOINT;
921 }
922
923 if (!tdev && tunnel->parms.link)
924 tdev = __dev_get_by_index(&init_net, tunnel->parms.link);
925
926 if (tdev) {
927 hlen = tdev->hard_header_len;
928 mtu = tdev->mtu;
929 }
930 dev->iflink = tunnel->parms.link;
931
932 /* Precalculate GRE options length */
933 if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
934 if (tunnel->parms.o_flags&GRE_CSUM)
935 addend += 4;
936 if (tunnel->parms.o_flags&GRE_KEY)
937 addend += 4;
938 if (tunnel->parms.o_flags&GRE_SEQ)
939 addend += 4;
940 }
941 dev->hard_header_len = hlen + addend;
942 dev->mtu = mtu - addend;
943 tunnel->hlen = addend;
944
945}
946
Linus Torvalds1da177e2005-04-16 15:20:36 -0700947static int
948ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
949{
950 int err = 0;
951 struct ip_tunnel_parm p;
952 struct ip_tunnel *t;
953
954 switch (cmd) {
955 case SIOCGETTUNNEL:
956 t = NULL;
957 if (dev == ipgre_fb_tunnel_dev) {
958 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
959 err = -EFAULT;
960 break;
961 }
962 t = ipgre_tunnel_locate(&p, 0);
963 }
964 if (t == NULL)
Patrick McHardy2941a482006-01-08 22:05:26 -0800965 t = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700966 memcpy(&p, &t->parms, sizeof(p));
967 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
968 err = -EFAULT;
969 break;
970
971 case SIOCADDTUNNEL:
972 case SIOCCHGTUNNEL:
973 err = -EPERM;
974 if (!capable(CAP_NET_ADMIN))
975 goto done;
976
977 err = -EFAULT;
978 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
979 goto done;
980
981 err = -EINVAL;
982 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
983 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
984 ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
985 goto done;
986 if (p.iph.ttl)
987 p.iph.frag_off |= htons(IP_DF);
988
989 if (!(p.i_flags&GRE_KEY))
990 p.i_key = 0;
991 if (!(p.o_flags&GRE_KEY))
992 p.o_key = 0;
993
994 t = ipgre_tunnel_locate(&p, cmd == SIOCADDTUNNEL);
995
996 if (dev != ipgre_fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
997 if (t != NULL) {
998 if (t->dev != dev) {
999 err = -EEXIST;
1000 break;
1001 }
1002 } else {
1003 unsigned nflags=0;
1004
Patrick McHardy2941a482006-01-08 22:05:26 -08001005 t = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001006
Joe Perchesf97c1e02007-12-16 13:45:43 -08001007 if (ipv4_is_multicast(p.iph.daddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001008 nflags = IFF_BROADCAST;
1009 else if (p.iph.daddr)
1010 nflags = IFF_POINTOPOINT;
1011
1012 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
1013 err = -EINVAL;
1014 break;
1015 }
1016 ipgre_tunnel_unlink(t);
1017 t->parms.iph.saddr = p.iph.saddr;
1018 t->parms.iph.daddr = p.iph.daddr;
1019 t->parms.i_key = p.i_key;
1020 t->parms.o_key = p.o_key;
1021 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1022 memcpy(dev->broadcast, &p.iph.daddr, 4);
1023 ipgre_tunnel_link(t);
1024 netdev_state_change(dev);
1025 }
1026 }
1027
1028 if (t) {
1029 err = 0;
1030 if (cmd == SIOCCHGTUNNEL) {
1031 t->parms.iph.ttl = p.iph.ttl;
1032 t->parms.iph.tos = p.iph.tos;
1033 t->parms.iph.frag_off = p.iph.frag_off;
Michal Schmidtee34c1e2007-12-13 09:46:32 -08001034 if (t->parms.link != p.link) {
1035 t->parms.link = p.link;
1036 ipgre_tunnel_bind_dev(dev);
1037 netdev_state_change(dev);
1038 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001039 }
1040 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
1041 err = -EFAULT;
1042 } else
1043 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
1044 break;
1045
1046 case SIOCDELTUNNEL:
1047 err = -EPERM;
1048 if (!capable(CAP_NET_ADMIN))
1049 goto done;
1050
1051 if (dev == ipgre_fb_tunnel_dev) {
1052 err = -EFAULT;
1053 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1054 goto done;
1055 err = -ENOENT;
1056 if ((t = ipgre_tunnel_locate(&p, 0)) == NULL)
1057 goto done;
1058 err = -EPERM;
Patrick McHardy2941a482006-01-08 22:05:26 -08001059 if (t == netdev_priv(ipgre_fb_tunnel_dev))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001060 goto done;
1061 dev = t->dev;
1062 }
Stephen Hemminger22f8cde2007-02-07 00:09:58 -08001063 unregister_netdevice(dev);
1064 err = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001065 break;
1066
1067 default:
1068 err = -EINVAL;
1069 }
1070
1071done:
1072 return err;
1073}
1074
1075static struct net_device_stats *ipgre_tunnel_get_stats(struct net_device *dev)
1076{
Patrick McHardy2941a482006-01-08 22:05:26 -08001077 return &(((struct ip_tunnel*)netdev_priv(dev))->stat);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001078}
1079
1080static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1081{
Patrick McHardy2941a482006-01-08 22:05:26 -08001082 struct ip_tunnel *tunnel = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001083 if (new_mtu < 68 || new_mtu > 0xFFF8 - tunnel->hlen)
1084 return -EINVAL;
1085 dev->mtu = new_mtu;
1086 return 0;
1087}
1088
Linus Torvalds1da177e2005-04-16 15:20:36 -07001089/* Nice toy. Unfortunately, useless in real life :-)
1090 It allows to construct virtual multiprotocol broadcast "LAN"
1091 over the Internet, provided multicast routing is tuned.
1092
1093
1094 I have no idea was this bicycle invented before me,
1095 so that I had to set ARPHRD_IPGRE to a random value.
1096 I have an impression, that Cisco could make something similar,
1097 but this feature is apparently missing in IOS<=11.2(8).
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001098
Linus Torvalds1da177e2005-04-16 15:20:36 -07001099 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
1100 with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
1101
1102 ping -t 255 224.66.66.66
1103
1104 If nobody answers, mbone does not work.
1105
1106 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
1107 ip addr add 10.66.66.<somewhat>/24 dev Universe
1108 ifconfig Universe up
1109 ifconfig Universe add fe80::<Your_real_addr>/10
1110 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
1111 ftp 10.66.66.66
1112 ...
1113 ftp fec0:6666:6666::193.233.7.65
1114 ...
1115
1116 */
1117
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -07001118static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
1119 unsigned short type,
1120 const void *daddr, const void *saddr, unsigned len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001121{
Patrick McHardy2941a482006-01-08 22:05:26 -08001122 struct ip_tunnel *t = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001123 struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
Al Virod5a0a1e2006-11-08 00:23:14 -08001124 __be16 *p = (__be16*)(iph+1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001125
1126 memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
1127 p[0] = t->parms.o_flags;
1128 p[1] = htons(type);
1129
1130 /*
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001131 * Set the source hardware address.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001132 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001133
Linus Torvalds1da177e2005-04-16 15:20:36 -07001134 if (saddr)
1135 memcpy(&iph->saddr, saddr, 4);
1136
1137 if (daddr) {
1138 memcpy(&iph->daddr, daddr, 4);
1139 return t->hlen;
1140 }
Joe Perchesf97c1e02007-12-16 13:45:43 -08001141 if (iph->daddr && !ipv4_is_multicast(iph->daddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001142 return t->hlen;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001143
Linus Torvalds1da177e2005-04-16 15:20:36 -07001144 return -t->hlen;
1145}
1146
Timo Teras6a5f44d2007-10-23 20:31:53 -07001147static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
1148{
1149 struct iphdr *iph = (struct iphdr*) skb_mac_header(skb);
1150 memcpy(haddr, &iph->saddr, 4);
1151 return 4;
1152}
1153
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -07001154static const struct header_ops ipgre_header_ops = {
1155 .create = ipgre_header,
Timo Teras6a5f44d2007-10-23 20:31:53 -07001156 .parse = ipgre_header_parse,
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -07001157};
1158
Timo Teras6a5f44d2007-10-23 20:31:53 -07001159#ifdef CONFIG_NET_IPGRE_BROADCAST
Linus Torvalds1da177e2005-04-16 15:20:36 -07001160static int ipgre_open(struct net_device *dev)
1161{
Patrick McHardy2941a482006-01-08 22:05:26 -08001162 struct ip_tunnel *t = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001163
Joe Perchesf97c1e02007-12-16 13:45:43 -08001164 if (ipv4_is_multicast(t->parms.iph.daddr)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001165 struct flowi fl = { .oif = t->parms.link,
1166 .nl_u = { .ip4_u =
1167 { .daddr = t->parms.iph.daddr,
1168 .saddr = t->parms.iph.saddr,
1169 .tos = RT_TOS(t->parms.iph.tos) } },
1170 .proto = IPPROTO_GRE };
1171 struct rtable *rt;
Denis V. Lunevf2063512008-01-22 22:07:34 -08001172 if (ip_route_output_key(&init_net, &rt, &fl))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001173 return -EADDRNOTAVAIL;
1174 dev = rt->u.dst.dev;
1175 ip_rt_put(rt);
Herbert Xue5ed6392005-10-03 14:35:55 -07001176 if (__in_dev_get_rtnl(dev) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001177 return -EADDRNOTAVAIL;
1178 t->mlink = dev->ifindex;
Herbert Xue5ed6392005-10-03 14:35:55 -07001179 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001180 }
1181 return 0;
1182}
1183
1184static int ipgre_close(struct net_device *dev)
1185{
Patrick McHardy2941a482006-01-08 22:05:26 -08001186 struct ip_tunnel *t = netdev_priv(dev);
Joe Perchesf97c1e02007-12-16 13:45:43 -08001187 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
Denis V. Lunev7fee0ca2008-01-21 17:32:38 -08001188 struct in_device *in_dev;
1189 in_dev = inetdev_by_index(dev->nd_net, t->mlink);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001190 if (in_dev) {
1191 ip_mc_dec_group(in_dev, t->parms.iph.daddr);
1192 in_dev_put(in_dev);
1193 }
1194 }
1195 return 0;
1196}
1197
1198#endif
1199
1200static void ipgre_tunnel_setup(struct net_device *dev)
1201{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001202 dev->uninit = ipgre_tunnel_uninit;
1203 dev->destructor = free_netdev;
1204 dev->hard_start_xmit = ipgre_tunnel_xmit;
1205 dev->get_stats = ipgre_tunnel_get_stats;
1206 dev->do_ioctl = ipgre_tunnel_ioctl;
1207 dev->change_mtu = ipgre_tunnel_change_mtu;
1208
1209 dev->type = ARPHRD_IPGRE;
1210 dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
Kris Katterjohn46f25df2006-01-05 16:35:42 -08001211 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001212 dev->flags = IFF_NOARP;
1213 dev->iflink = 0;
1214 dev->addr_len = 4;
1215}
1216
1217static int ipgre_tunnel_init(struct net_device *dev)
1218{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001219 struct ip_tunnel *tunnel;
1220 struct iphdr *iph;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001221
Patrick McHardy2941a482006-01-08 22:05:26 -08001222 tunnel = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001223 iph = &tunnel->parms.iph;
1224
1225 tunnel->dev = dev;
1226 strcpy(tunnel->parms.name, dev->name);
1227
1228 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
1229 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
1230
Michal Schmidtee34c1e2007-12-13 09:46:32 -08001231 ipgre_tunnel_bind_dev(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001232
1233 if (iph->daddr) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001234#ifdef CONFIG_NET_IPGRE_BROADCAST
Joe Perchesf97c1e02007-12-16 13:45:43 -08001235 if (ipv4_is_multicast(iph->daddr)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001236 if (!iph->saddr)
1237 return -EINVAL;
1238 dev->flags = IFF_BROADCAST;
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -07001239 dev->header_ops = &ipgre_header_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001240 dev->open = ipgre_open;
1241 dev->stop = ipgre_close;
1242 }
1243#endif
Michal Schmidtee34c1e2007-12-13 09:46:32 -08001244 } else
Timo Teras6a5f44d2007-10-23 20:31:53 -07001245 dev->header_ops = &ipgre_header_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001246
Linus Torvalds1da177e2005-04-16 15:20:36 -07001247 return 0;
1248}
1249
Adrian Bunk4b30b1c2005-11-29 16:27:20 -08001250static int __init ipgre_fb_tunnel_init(struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001251{
Patrick McHardy2941a482006-01-08 22:05:26 -08001252 struct ip_tunnel *tunnel = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001253 struct iphdr *iph = &tunnel->parms.iph;
1254
1255 tunnel->dev = dev;
1256 strcpy(tunnel->parms.name, dev->name);
1257
1258 iph->version = 4;
1259 iph->protocol = IPPROTO_GRE;
1260 iph->ihl = 5;
1261 tunnel->hlen = sizeof(struct iphdr) + 4;
1262
1263 dev_hold(dev);
1264 tunnels_wc[0] = tunnel;
1265 return 0;
1266}
1267
1268
1269static struct net_protocol ipgre_protocol = {
1270 .handler = ipgre_rcv,
1271 .err_handler = ipgre_err,
1272};
1273
1274
1275/*
1276 * And now the modules code and kernel interface.
1277 */
1278
1279static int __init ipgre_init(void)
1280{
1281 int err;
1282
1283 printk(KERN_INFO "GRE over IPv4 tunneling driver\n");
1284
1285 if (inet_add_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) {
1286 printk(KERN_INFO "ipgre init: can't add protocol\n");
1287 return -EAGAIN;
1288 }
1289
1290 ipgre_fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0",
1291 ipgre_tunnel_setup);
1292 if (!ipgre_fb_tunnel_dev) {
1293 err = -ENOMEM;
1294 goto err1;
1295 }
1296
1297 ipgre_fb_tunnel_dev->init = ipgre_fb_tunnel_init;
1298
1299 if ((err = register_netdev(ipgre_fb_tunnel_dev)))
1300 goto err2;
1301out:
1302 return err;
1303err2:
1304 free_netdev(ipgre_fb_tunnel_dev);
1305err1:
1306 inet_del_protocol(&ipgre_protocol, IPPROTO_GRE);
1307 goto out;
1308}
1309
Alexey Kuznetsovdb445752005-07-30 17:46:44 -07001310static void __exit ipgre_destroy_tunnels(void)
1311{
1312 int prio;
1313
1314 for (prio = 0; prio < 4; prio++) {
1315 int h;
1316 for (h = 0; h < HASH_SIZE; h++) {
1317 struct ip_tunnel *t;
1318 while ((t = tunnels[prio][h]) != NULL)
1319 unregister_netdevice(t->dev);
1320 }
1321 }
1322}
1323
1324static void __exit ipgre_fini(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001325{
1326 if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0)
1327 printk(KERN_INFO "ipgre close: can't remove protocol\n");
1328
Alexey Kuznetsovdb445752005-07-30 17:46:44 -07001329 rtnl_lock();
1330 ipgre_destroy_tunnels();
1331 rtnl_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001332}
1333
1334module_init(ipgre_init);
1335module_exit(ipgre_fini);
1336MODULE_LICENSE("GPL");