blob: d729ca820931b82d7186d103ffb99c0700bcf39c [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002 * Linux NET3: GRE over IP protocol decoder.
Linus Torvalds1da177e2005-04-16 15:20:36 -07003 *
4 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
Randy Dunlap4fc268d2006-01-11 12:17:47 -080013#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070014#include <linux/module.h>
15#include <linux/types.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070016#include <linux/kernel.h>
17#include <asm/uaccess.h>
18#include <linux/skbuff.h>
19#include <linux/netdevice.h>
20#include <linux/in.h>
21#include <linux/tcp.h>
22#include <linux/udp.h>
23#include <linux/if_arp.h>
24#include <linux/mroute.h>
25#include <linux/init.h>
26#include <linux/in6.h>
27#include <linux/inetdevice.h>
28#include <linux/igmp.h>
29#include <linux/netfilter_ipv4.h>
Kris Katterjohn46f25df2006-01-05 16:35:42 -080030#include <linux/if_ether.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070031
32#include <net/sock.h>
33#include <net/ip.h>
34#include <net/icmp.h>
35#include <net/protocol.h>
36#include <net/ipip.h>
37#include <net/arp.h>
38#include <net/checksum.h>
39#include <net/dsfield.h>
40#include <net/inet_ecn.h>
41#include <net/xfrm.h>
Pavel Emelyanov59a4c752008-04-16 01:08:53 -070042#include <net/net_namespace.h>
43#include <net/netns/generic.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070044
45#ifdef CONFIG_IPV6
46#include <net/ipv6.h>
47#include <net/ip6_fib.h>
48#include <net/ip6_route.h>
49#endif
50
51/*
52 Problems & solutions
53 --------------------
54
55 1. The most important issue is detecting local dead loops.
56 They would cause complete host lockup in transmit, which
57 would be "resolved" by stack overflow or, if queueing is enabled,
58 with infinite looping in net_bh.
59
60 We cannot track such dead loops during route installation,
61 it is infeasible task. The most general solutions would be
62 to keep skb->encapsulation counter (sort of local ttl),
63 and silently drop packet when it expires. It is the best
64 solution, but it supposes maintaing new variable in ALL
65 skb, even if no tunneling is used.
66
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090067 Current solution: t->recursion lock breaks dead loops. It looks
Linus Torvalds1da177e2005-04-16 15:20:36 -070068 like dev->tbusy flag, but I preferred new variable, because
69 the semantics is different. One day, when hard_start_xmit
70 will be multithreaded we will have to use skb->encapsulation.
71
72
73
74 2. Networking dead loops would not kill routers, but would really
75 kill network. IP hop limit plays role of "t->recursion" in this case,
76 if we copy it from packet being encapsulated to upper header.
77 It is very good solution, but it introduces two problems:
78
79 - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
80 do not work over tunnels.
81 - traceroute does not work. I planned to relay ICMP from tunnel,
82 so that this problem would be solved and traceroute output
83 would even more informative. This idea appeared to be wrong:
84 only Linux complies to rfc1812 now (yes, guys, Linux is the only
85 true router now :-)), all routers (at least, in neighbourhood of mine)
86 return only 8 bytes of payload. It is the end.
87
88 Hence, if we want that OSPF worked or traceroute said something reasonable,
89 we should search for another solution.
90
91 One of them is to parse packet trying to detect inner encapsulation
92 made by our node. It is difficult or even impossible, especially,
93 taking into account fragmentation. TO be short, tt is not solution at all.
94
95 Current solution: The solution was UNEXPECTEDLY SIMPLE.
96 We force DF flag on tunnels with preconfigured hop limit,
97 that is ALL. :-) Well, it does not remove the problem completely,
98 but exponential growth of network traffic is changed to linear
99 (branches, that exceed pmtu are pruned) and tunnel mtu
100 fastly degrades to value <68, where looping stops.
101 Yes, it is not good if there exists a router in the loop,
102 which does not force DF, even when encapsulating packets have DF set.
103 But it is not our problem! Nobody could accuse us, we made
104 all that we could make. Even if it is your gated who injected
105 fatal route to network, even if it were you who configured
106 fatal static route: you are innocent. :-)
107
108
109
110 3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
111 practically identical code. It would be good to glue them
112 together, but it is not very evident, how to make them modular.
113 sit is integral part of IPv6, ipip and gre are naturally modular.
114 We could extract common parts (hash table, ioctl etc)
115 to a separate module (ip_tunnel.c).
116
117 Alexey Kuznetsov.
118 */
119
120static int ipgre_tunnel_init(struct net_device *dev);
121static void ipgre_tunnel_setup(struct net_device *dev);
122
123/* Fallback tunnel: no source, no destination, no key, no options */
124
125static int ipgre_fb_tunnel_init(struct net_device *dev);
126
Pavel Emelyanov59a4c752008-04-16 01:08:53 -0700127static int ipgre_net_id;
128struct ipgre_net {
129};
130
Linus Torvalds1da177e2005-04-16 15:20:36 -0700131static struct net_device *ipgre_fb_tunnel_dev;
132
133/* Tunnel hash table */
134
135/*
136 4 hash tables:
137
138 3: (remote,local)
139 2: (remote,*)
140 1: (*,local)
141 0: (*,*)
142
143 We require exact key match i.e. if a key is present in packet
144 it will match only tunnel with the same key; if it is not present,
145 it will match only keyless tunnel.
146
147 All keysless packets, if not matched configured keyless tunnels
148 will match fallback tunnel.
149 */
150
151#define HASH_SIZE 16
Al Virod5a0a1e2006-11-08 00:23:14 -0800152#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700153
154static struct ip_tunnel *tunnels[4][HASH_SIZE];
155
156#define tunnels_r_l (tunnels[3])
157#define tunnels_r (tunnels[2])
158#define tunnels_l (tunnels[1])
159#define tunnels_wc (tunnels[0])
160
161static DEFINE_RWLOCK(ipgre_lock);
162
163/* Given src, dst and key, find appropriate for input tunnel. */
164
Al Virod5a0a1e2006-11-08 00:23:14 -0800165static struct ip_tunnel * ipgre_tunnel_lookup(__be32 remote, __be32 local, __be32 key)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700166{
167 unsigned h0 = HASH(remote);
168 unsigned h1 = HASH(key);
169 struct ip_tunnel *t;
170
171 for (t = tunnels_r_l[h0^h1]; t; t = t->next) {
172 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) {
173 if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
174 return t;
175 }
176 }
177 for (t = tunnels_r[h0^h1]; t; t = t->next) {
178 if (remote == t->parms.iph.daddr) {
179 if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
180 return t;
181 }
182 }
183 for (t = tunnels_l[h1]; t; t = t->next) {
184 if (local == t->parms.iph.saddr ||
Joe Perchesf97c1e02007-12-16 13:45:43 -0800185 (local == t->parms.iph.daddr &&
186 ipv4_is_multicast(local))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700187 if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
188 return t;
189 }
190 }
191 for (t = tunnels_wc[h1]; t; t = t->next) {
192 if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
193 return t;
194 }
195
196 if (ipgre_fb_tunnel_dev->flags&IFF_UP)
Patrick McHardy2941a482006-01-08 22:05:26 -0800197 return netdev_priv(ipgre_fb_tunnel_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700198 return NULL;
199}
200
YOSHIFUJI Hideaki5056a1e2007-04-24 20:44:48 +0900201static struct ip_tunnel **__ipgre_bucket(struct ip_tunnel_parm *parms)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700202{
YOSHIFUJI Hideaki5056a1e2007-04-24 20:44:48 +0900203 __be32 remote = parms->iph.daddr;
204 __be32 local = parms->iph.saddr;
205 __be32 key = parms->i_key;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700206 unsigned h = HASH(key);
207 int prio = 0;
208
209 if (local)
210 prio |= 1;
Joe Perchesf97c1e02007-12-16 13:45:43 -0800211 if (remote && !ipv4_is_multicast(remote)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700212 prio |= 2;
213 h ^= HASH(remote);
214 }
215
216 return &tunnels[prio][h];
217}
218
YOSHIFUJI Hideaki5056a1e2007-04-24 20:44:48 +0900219static inline struct ip_tunnel **ipgre_bucket(struct ip_tunnel *t)
220{
221 return __ipgre_bucket(&t->parms);
222}
223
Linus Torvalds1da177e2005-04-16 15:20:36 -0700224static void ipgre_tunnel_link(struct ip_tunnel *t)
225{
226 struct ip_tunnel **tp = ipgre_bucket(t);
227
228 t->next = *tp;
229 write_lock_bh(&ipgre_lock);
230 *tp = t;
231 write_unlock_bh(&ipgre_lock);
232}
233
234static void ipgre_tunnel_unlink(struct ip_tunnel *t)
235{
236 struct ip_tunnel **tp;
237
238 for (tp = ipgre_bucket(t); *tp; tp = &(*tp)->next) {
239 if (t == *tp) {
240 write_lock_bh(&ipgre_lock);
241 *tp = t->next;
242 write_unlock_bh(&ipgre_lock);
243 break;
244 }
245 }
246}
247
248static struct ip_tunnel * ipgre_tunnel_locate(struct ip_tunnel_parm *parms, int create)
249{
Al Virod5a0a1e2006-11-08 00:23:14 -0800250 __be32 remote = parms->iph.daddr;
251 __be32 local = parms->iph.saddr;
252 __be32 key = parms->i_key;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700253 struct ip_tunnel *t, **tp, *nt;
254 struct net_device *dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700255 char name[IFNAMSIZ];
256
YOSHIFUJI Hideaki5056a1e2007-04-24 20:44:48 +0900257 for (tp = __ipgre_bucket(parms); (t = *tp) != NULL; tp = &t->next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700258 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) {
259 if (key == t->parms.i_key)
260 return t;
261 }
262 }
263 if (!create)
264 return NULL;
265
266 if (parms->name[0])
267 strlcpy(name, parms->name, IFNAMSIZ);
Pavel Emelyanov34cc7ba2008-02-23 20:19:20 -0800268 else
269 sprintf(name, "gre%%d");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700270
271 dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup);
272 if (!dev)
273 return NULL;
274
Pavel Emelyanovb37d4282008-02-26 23:51:04 -0800275 if (strchr(name, '%')) {
276 if (dev_alloc_name(dev, name) < 0)
277 goto failed_free;
278 }
279
Linus Torvalds1da177e2005-04-16 15:20:36 -0700280 dev->init = ipgre_tunnel_init;
Patrick McHardy2941a482006-01-08 22:05:26 -0800281 nt = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700282 nt->parms = *parms;
283
Pavel Emelyanovb37d4282008-02-26 23:51:04 -0800284 if (register_netdevice(dev) < 0)
285 goto failed_free;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700286
Linus Torvalds1da177e2005-04-16 15:20:36 -0700287 dev_hold(dev);
288 ipgre_tunnel_link(nt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700289 return nt;
290
Pavel Emelyanovb37d4282008-02-26 23:51:04 -0800291failed_free:
292 free_netdev(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700293 return NULL;
294}
295
296static void ipgre_tunnel_uninit(struct net_device *dev)
297{
Patrick McHardy2941a482006-01-08 22:05:26 -0800298 ipgre_tunnel_unlink(netdev_priv(dev));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700299 dev_put(dev);
300}
301
302
303static void ipgre_err(struct sk_buff *skb, u32 info)
304{
305#ifndef I_WISH_WORLD_WERE_PERFECT
306
307/* It is not :-( All the routers (except for Linux) return only
308 8 bytes of packet payload. It means, that precise relaying of
309 ICMP in the real Internet is absolutely infeasible.
310
311 Moreover, Cisco "wise men" put GRE key to the third word
312 in GRE header. It makes impossible maintaining even soft state for keyed
313 GRE tunnels with enabled checksum. Tell them "thank you".
314
315 Well, I wonder, rfc1812 was written by Cisco employee,
316 what the hell these idiots break standrads established
317 by themself???
318 */
319
320 struct iphdr *iph = (struct iphdr*)skb->data;
Al Virod5a0a1e2006-11-08 00:23:14 -0800321 __be16 *p = (__be16*)(skb->data+(iph->ihl<<2));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700322 int grehlen = (iph->ihl<<2) + 4;
Arnaldo Carvalho de Melo88c76642007-03-13 14:43:18 -0300323 const int type = icmp_hdr(skb)->type;
324 const int code = icmp_hdr(skb)->code;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700325 struct ip_tunnel *t;
Al Virod5a0a1e2006-11-08 00:23:14 -0800326 __be16 flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700327
328 flags = p[0];
329 if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
330 if (flags&(GRE_VERSION|GRE_ROUTING))
331 return;
332 if (flags&GRE_KEY) {
333 grehlen += 4;
334 if (flags&GRE_CSUM)
335 grehlen += 4;
336 }
337 }
338
339 /* If only 8 bytes returned, keyed message will be dropped here */
340 if (skb_headlen(skb) < grehlen)
341 return;
342
343 switch (type) {
344 default:
345 case ICMP_PARAMETERPROB:
346 return;
347
348 case ICMP_DEST_UNREACH:
349 switch (code) {
350 case ICMP_SR_FAILED:
351 case ICMP_PORT_UNREACH:
352 /* Impossible event. */
353 return;
354 case ICMP_FRAG_NEEDED:
355 /* Soft state for pmtu is maintained by IP core. */
356 return;
357 default:
358 /* All others are translated to HOST_UNREACH.
359 rfc2003 contains "deep thoughts" about NET_UNREACH,
360 I believe they are just ether pollution. --ANK
361 */
362 break;
363 }
364 break;
365 case ICMP_TIME_EXCEEDED:
366 if (code != ICMP_EXC_TTL)
367 return;
368 break;
369 }
370
371 read_lock(&ipgre_lock);
Al Virod5a0a1e2006-11-08 00:23:14 -0800372 t = ipgre_tunnel_lookup(iph->daddr, iph->saddr, (flags&GRE_KEY) ? *(((__be32*)p) + (grehlen>>2) - 1) : 0);
Joe Perchesf97c1e02007-12-16 13:45:43 -0800373 if (t == NULL || t->parms.iph.daddr == 0 ||
374 ipv4_is_multicast(t->parms.iph.daddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700375 goto out;
376
377 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
378 goto out;
379
380 if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
381 t->err_count++;
382 else
383 t->err_count = 1;
384 t->err_time = jiffies;
385out:
386 read_unlock(&ipgre_lock);
387 return;
388#else
389 struct iphdr *iph = (struct iphdr*)dp;
390 struct iphdr *eiph;
Al Virod5a0a1e2006-11-08 00:23:14 -0800391 __be16 *p = (__be16*)(dp+(iph->ihl<<2));
Arnaldo Carvalho de Melo88c76642007-03-13 14:43:18 -0300392 const int type = icmp_hdr(skb)->type;
393 const int code = icmp_hdr(skb)->code;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700394 int rel_type = 0;
395 int rel_code = 0;
Al Viroc55e2f42006-09-19 13:23:19 -0700396 __be32 rel_info = 0;
397 __u32 n = 0;
Al Virod5a0a1e2006-11-08 00:23:14 -0800398 __be16 flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700399 int grehlen = (iph->ihl<<2) + 4;
400 struct sk_buff *skb2;
401 struct flowi fl;
402 struct rtable *rt;
403
404 if (p[1] != htons(ETH_P_IP))
405 return;
406
407 flags = p[0];
408 if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
409 if (flags&(GRE_VERSION|GRE_ROUTING))
410 return;
411 if (flags&GRE_CSUM)
412 grehlen += 4;
413 if (flags&GRE_KEY)
414 grehlen += 4;
415 if (flags&GRE_SEQ)
416 grehlen += 4;
417 }
418 if (len < grehlen + sizeof(struct iphdr))
419 return;
420 eiph = (struct iphdr*)(dp + grehlen);
421
422 switch (type) {
423 default:
424 return;
425 case ICMP_PARAMETERPROB:
Arnaldo Carvalho de Melo88c76642007-03-13 14:43:18 -0300426 n = ntohl(icmp_hdr(skb)->un.gateway) >> 24;
Al Viroc55e2f42006-09-19 13:23:19 -0700427 if (n < (iph->ihl<<2))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700428 return;
429
430 /* So... This guy found something strange INSIDE encapsulated
431 packet. Well, he is fool, but what can we do ?
432 */
433 rel_type = ICMP_PARAMETERPROB;
Al Viroc55e2f42006-09-19 13:23:19 -0700434 n -= grehlen;
435 rel_info = htonl(n << 24);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700436 break;
437
438 case ICMP_DEST_UNREACH:
439 switch (code) {
440 case ICMP_SR_FAILED:
441 case ICMP_PORT_UNREACH:
442 /* Impossible event. */
443 return;
444 case ICMP_FRAG_NEEDED:
445 /* And it is the only really necessary thing :-) */
Arnaldo Carvalho de Melo88c76642007-03-13 14:43:18 -0300446 n = ntohs(icmp_hdr(skb)->un.frag.mtu);
Al Viroc55e2f42006-09-19 13:23:19 -0700447 if (n < grehlen+68)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700448 return;
Al Viroc55e2f42006-09-19 13:23:19 -0700449 n -= grehlen;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700450 /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
Al Viroc55e2f42006-09-19 13:23:19 -0700451 if (n > ntohs(eiph->tot_len))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700452 return;
Al Viroc55e2f42006-09-19 13:23:19 -0700453 rel_info = htonl(n);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700454 break;
455 default:
456 /* All others are translated to HOST_UNREACH.
457 rfc2003 contains "deep thoughts" about NET_UNREACH,
458 I believe, it is just ether pollution. --ANK
459 */
460 rel_type = ICMP_DEST_UNREACH;
461 rel_code = ICMP_HOST_UNREACH;
462 break;
463 }
464 break;
465 case ICMP_TIME_EXCEEDED:
466 if (code != ICMP_EXC_TTL)
467 return;
468 break;
469 }
470
471 /* Prepare fake skb to feed it to icmp_send */
472 skb2 = skb_clone(skb, GFP_ATOMIC);
473 if (skb2 == NULL)
474 return;
475 dst_release(skb2->dst);
476 skb2->dst = NULL;
477 skb_pull(skb2, skb->data - (u8*)eiph);
Arnaldo Carvalho de Meloc1d2bbe2007-04-10 20:45:18 -0700478 skb_reset_network_header(skb2);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700479
480 /* Try to guess incoming interface */
481 memset(&fl, 0, sizeof(fl));
482 fl.fl4_dst = eiph->saddr;
483 fl.fl4_tos = RT_TOS(eiph->tos);
484 fl.proto = IPPROTO_GRE;
Denis V. Lunevf2063512008-01-22 22:07:34 -0800485 if (ip_route_output_key(&init_net, &rt, &fl)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700486 kfree_skb(skb2);
487 return;
488 }
489 skb2->dev = rt->u.dst.dev;
490
491 /* route "incoming" packet */
492 if (rt->rt_flags&RTCF_LOCAL) {
493 ip_rt_put(rt);
494 rt = NULL;
495 fl.fl4_dst = eiph->daddr;
496 fl.fl4_src = eiph->saddr;
497 fl.fl4_tos = eiph->tos;
Denis V. Lunevf2063512008-01-22 22:07:34 -0800498 if (ip_route_output_key(&init_net, &rt, &fl) ||
Linus Torvalds1da177e2005-04-16 15:20:36 -0700499 rt->u.dst.dev->type != ARPHRD_IPGRE) {
500 ip_rt_put(rt);
501 kfree_skb(skb2);
502 return;
503 }
504 } else {
505 ip_rt_put(rt);
506 if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) ||
507 skb2->dst->dev->type != ARPHRD_IPGRE) {
508 kfree_skb(skb2);
509 return;
510 }
511 }
512
513 /* change mtu on this route */
514 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
Al Viroc55e2f42006-09-19 13:23:19 -0700515 if (n > dst_mtu(skb2->dst)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700516 kfree_skb(skb2);
517 return;
518 }
Al Viroc55e2f42006-09-19 13:23:19 -0700519 skb2->dst->ops->update_pmtu(skb2->dst, n);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700520 } else if (type == ICMP_TIME_EXCEEDED) {
Patrick McHardy2941a482006-01-08 22:05:26 -0800521 struct ip_tunnel *t = netdev_priv(skb2->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700522 if (t->parms.iph.ttl) {
523 rel_type = ICMP_DEST_UNREACH;
524 rel_code = ICMP_HOST_UNREACH;
525 }
526 }
527
528 icmp_send(skb2, rel_type, rel_code, rel_info);
529 kfree_skb(skb2);
530#endif
531}
532
533static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
534{
535 if (INET_ECN_is_ce(iph->tos)) {
536 if (skb->protocol == htons(ETH_P_IP)) {
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700537 IP_ECN_set_ce(ip_hdr(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700538 } else if (skb->protocol == htons(ETH_P_IPV6)) {
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -0700539 IP6_ECN_set_ce(ipv6_hdr(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700540 }
541 }
542}
543
544static inline u8
545ipgre_ecn_encapsulate(u8 tos, struct iphdr *old_iph, struct sk_buff *skb)
546{
547 u8 inner = 0;
548 if (skb->protocol == htons(ETH_P_IP))
549 inner = old_iph->tos;
550 else if (skb->protocol == htons(ETH_P_IPV6))
551 inner = ipv6_get_dsfield((struct ipv6hdr *)old_iph);
552 return INET_ECN_encapsulate(tos, inner);
553}
554
555static int ipgre_rcv(struct sk_buff *skb)
556{
557 struct iphdr *iph;
558 u8 *h;
Al Virod5a0a1e2006-11-08 00:23:14 -0800559 __be16 flags;
Al Virod3bc23e2006-11-14 21:24:49 -0800560 __sum16 csum = 0;
Al Virod5a0a1e2006-11-08 00:23:14 -0800561 __be32 key = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700562 u32 seqno = 0;
563 struct ip_tunnel *tunnel;
564 int offset = 4;
565
566 if (!pskb_may_pull(skb, 16))
567 goto drop_nolock;
568
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700569 iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700570 h = skb->data;
Al Virod5a0a1e2006-11-08 00:23:14 -0800571 flags = *(__be16*)h;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700572
573 if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
574 /* - Version must be 0.
575 - We do not support routing headers.
576 */
577 if (flags&(GRE_VERSION|GRE_ROUTING))
578 goto drop_nolock;
579
580 if (flags&GRE_CSUM) {
Herbert Xufb286bb2005-11-10 13:01:24 -0800581 switch (skb->ip_summed) {
Patrick McHardy84fa7932006-08-29 16:44:56 -0700582 case CHECKSUM_COMPLETE:
Al Virod3bc23e2006-11-14 21:24:49 -0800583 csum = csum_fold(skb->csum);
Herbert Xufb286bb2005-11-10 13:01:24 -0800584 if (!csum)
585 break;
586 /* fall through */
587 case CHECKSUM_NONE:
588 skb->csum = 0;
589 csum = __skb_checksum_complete(skb);
Patrick McHardy84fa7932006-08-29 16:44:56 -0700590 skb->ip_summed = CHECKSUM_COMPLETE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700591 }
592 offset += 4;
593 }
594 if (flags&GRE_KEY) {
Al Virod5a0a1e2006-11-08 00:23:14 -0800595 key = *(__be32*)(h + offset);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700596 offset += 4;
597 }
598 if (flags&GRE_SEQ) {
Al Virod5a0a1e2006-11-08 00:23:14 -0800599 seqno = ntohl(*(__be32*)(h + offset));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700600 offset += 4;
601 }
602 }
603
604 read_lock(&ipgre_lock);
605 if ((tunnel = ipgre_tunnel_lookup(iph->saddr, iph->daddr, key)) != NULL) {
606 secpath_reset(skb);
607
Al Virod5a0a1e2006-11-08 00:23:14 -0800608 skb->protocol = *(__be16*)(h + 2);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700609 /* WCCP version 1 and 2 protocol decoding.
610 * - Change protocol to IP
611 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
612 */
613 if (flags == 0 &&
YOSHIFUJI Hideaki496c98d2006-10-10 19:41:21 -0700614 skb->protocol == htons(ETH_P_WCCP)) {
615 skb->protocol = htons(ETH_P_IP);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900616 if ((*(h + offset) & 0xF0) != 0x40)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700617 offset += 4;
618 }
619
Timo Teras1d069162007-12-20 00:10:33 -0800620 skb->mac_header = skb->network_header;
Arnaldo Carvalho de Melo4209fb62007-03-10 18:42:03 -0300621 __pskb_pull(skb, offset);
622 skb_reset_network_header(skb);
Arnaldo Carvalho de Melo9c702202007-04-25 18:04:18 -0700623 skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700624 skb->pkt_type = PACKET_HOST;
625#ifdef CONFIG_NET_IPGRE_BROADCAST
Joe Perchesf97c1e02007-12-16 13:45:43 -0800626 if (ipv4_is_multicast(iph->daddr)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700627 /* Looped back packet, drop it! */
Eric Dumazetee6b9672008-03-05 18:30:47 -0800628 if (skb->rtable->fl.iif == 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700629 goto drop;
630 tunnel->stat.multicast++;
631 skb->pkt_type = PACKET_BROADCAST;
632 }
633#endif
634
635 if (((flags&GRE_CSUM) && csum) ||
636 (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
637 tunnel->stat.rx_crc_errors++;
638 tunnel->stat.rx_errors++;
639 goto drop;
640 }
641 if (tunnel->parms.i_flags&GRE_SEQ) {
642 if (!(flags&GRE_SEQ) ||
643 (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
644 tunnel->stat.rx_fifo_errors++;
645 tunnel->stat.rx_errors++;
646 goto drop;
647 }
648 tunnel->i_seqno = seqno + 1;
649 }
650 tunnel->stat.rx_packets++;
651 tunnel->stat.rx_bytes += skb->len;
652 skb->dev = tunnel->dev;
653 dst_release(skb->dst);
654 skb->dst = NULL;
655 nf_reset(skb);
656 ipgre_ecn_decapsulate(iph, skb);
657 netif_rx(skb);
658 read_unlock(&ipgre_lock);
659 return(0);
660 }
Herbert Xu45af08b2006-04-05 22:31:19 -0700661 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700662
663drop:
664 read_unlock(&ipgre_lock);
665drop_nolock:
666 kfree_skb(skb);
667 return(0);
668}
669
670static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
671{
Patrick McHardy2941a482006-01-08 22:05:26 -0800672 struct ip_tunnel *tunnel = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700673 struct net_device_stats *stats = &tunnel->stat;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700674 struct iphdr *old_iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700675 struct iphdr *tiph;
676 u8 tos;
Al Virod5a0a1e2006-11-08 00:23:14 -0800677 __be16 df;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700678 struct rtable *rt; /* Route to the other host */
679 struct net_device *tdev; /* Device to other host */
680 struct iphdr *iph; /* Our new IP header */
Chuck Leverc2636b42007-10-23 21:07:32 -0700681 unsigned int max_headroom; /* The extra header space needed */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700682 int gre_hlen;
Al Virod5a0a1e2006-11-08 00:23:14 -0800683 __be32 dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700684 int mtu;
685
686 if (tunnel->recursion++) {
687 tunnel->stat.collisions++;
688 goto tx_error;
689 }
690
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -0700691 if (dev->header_ops) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700692 gre_hlen = 0;
693 tiph = (struct iphdr*)skb->data;
694 } else {
695 gre_hlen = tunnel->hlen;
696 tiph = &tunnel->parms.iph;
697 }
698
699 if ((dst = tiph->daddr) == 0) {
700 /* NBMA tunnel */
701
702 if (skb->dst == NULL) {
703 tunnel->stat.tx_fifo_errors++;
704 goto tx_error;
705 }
706
707 if (skb->protocol == htons(ETH_P_IP)) {
Eric Dumazetee6b9672008-03-05 18:30:47 -0800708 rt = skb->rtable;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700709 if ((dst = rt->rt_gateway) == 0)
710 goto tx_error_icmp;
711 }
712#ifdef CONFIG_IPV6
713 else if (skb->protocol == htons(ETH_P_IPV6)) {
714 struct in6_addr *addr6;
715 int addr_type;
716 struct neighbour *neigh = skb->dst->neighbour;
717
718 if (neigh == NULL)
719 goto tx_error;
720
721 addr6 = (struct in6_addr*)&neigh->primary_key;
722 addr_type = ipv6_addr_type(addr6);
723
724 if (addr_type == IPV6_ADDR_ANY) {
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -0700725 addr6 = &ipv6_hdr(skb)->daddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700726 addr_type = ipv6_addr_type(addr6);
727 }
728
729 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
730 goto tx_error_icmp;
731
732 dst = addr6->s6_addr32[3];
733 }
734#endif
735 else
736 goto tx_error;
737 }
738
739 tos = tiph->tos;
740 if (tos&1) {
741 if (skb->protocol == htons(ETH_P_IP))
742 tos = old_iph->tos;
743 tos &= ~1;
744 }
745
746 {
747 struct flowi fl = { .oif = tunnel->parms.link,
748 .nl_u = { .ip4_u =
749 { .daddr = dst,
750 .saddr = tiph->saddr,
751 .tos = RT_TOS(tos) } },
752 .proto = IPPROTO_GRE };
Denis V. Lunevf2063512008-01-22 22:07:34 -0800753 if (ip_route_output_key(&init_net, &rt, &fl)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700754 tunnel->stat.tx_carrier_errors++;
755 goto tx_error;
756 }
757 }
758 tdev = rt->u.dst.dev;
759
760 if (tdev == dev) {
761 ip_rt_put(rt);
762 tunnel->stat.collisions++;
763 goto tx_error;
764 }
765
766 df = tiph->frag_off;
767 if (df)
768 mtu = dst_mtu(&rt->u.dst) - tunnel->hlen;
769 else
770 mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
771
772 if (skb->dst)
773 skb->dst->ops->update_pmtu(skb->dst, mtu);
774
775 if (skb->protocol == htons(ETH_P_IP)) {
776 df |= (old_iph->frag_off&htons(IP_DF));
777
778 if ((old_iph->frag_off&htons(IP_DF)) &&
779 mtu < ntohs(old_iph->tot_len)) {
780 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
781 ip_rt_put(rt);
782 goto tx_error;
783 }
784 }
785#ifdef CONFIG_IPV6
786 else if (skb->protocol == htons(ETH_P_IPV6)) {
787 struct rt6_info *rt6 = (struct rt6_info*)skb->dst;
788
789 if (rt6 && mtu < dst_mtu(skb->dst) && mtu >= IPV6_MIN_MTU) {
Joe Perchesf97c1e02007-12-16 13:45:43 -0800790 if ((tunnel->parms.iph.daddr &&
791 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
Linus Torvalds1da177e2005-04-16 15:20:36 -0700792 rt6->rt6i_dst.plen == 128) {
793 rt6->rt6i_flags |= RTF_MODIFIED;
794 skb->dst->metrics[RTAX_MTU-1] = mtu;
795 }
796 }
797
798 if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) {
799 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
800 ip_rt_put(rt);
801 goto tx_error;
802 }
803 }
804#endif
805
806 if (tunnel->err_count > 0) {
807 if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
808 tunnel->err_count--;
809
810 dst_link_failure(skb);
811 } else
812 tunnel->err_count = 0;
813 }
814
815 max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen;
816
Patrick McHardycfbba492007-07-09 15:33:40 -0700817 if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
818 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700819 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
820 if (!new_skb) {
821 ip_rt_put(rt);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900822 stats->tx_dropped++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700823 dev_kfree_skb(skb);
824 tunnel->recursion--;
825 return 0;
826 }
827 if (skb->sk)
828 skb_set_owner_w(new_skb, skb->sk);
829 dev_kfree_skb(skb);
830 skb = new_skb;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700831 old_iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700832 }
833
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -0700834 skb->transport_header = skb->network_header;
Arnaldo Carvalho de Meloe2d1bca2007-04-10 20:46:21 -0700835 skb_push(skb, gre_hlen);
836 skb_reset_network_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700837 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
Patrick McHardy48d5cad2006-02-15 15:10:22 -0800838 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
839 IPSKB_REROUTED);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700840 dst_release(skb->dst);
841 skb->dst = &rt->u.dst;
842
843 /*
844 * Push down and install the IPIP header.
845 */
846
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700847 iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700848 iph->version = 4;
849 iph->ihl = sizeof(struct iphdr) >> 2;
850 iph->frag_off = df;
851 iph->protocol = IPPROTO_GRE;
852 iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb);
853 iph->daddr = rt->rt_dst;
854 iph->saddr = rt->rt_src;
855
856 if ((iph->ttl = tiph->ttl) == 0) {
857 if (skb->protocol == htons(ETH_P_IP))
858 iph->ttl = old_iph->ttl;
859#ifdef CONFIG_IPV6
860 else if (skb->protocol == htons(ETH_P_IPV6))
861 iph->ttl = ((struct ipv6hdr*)old_iph)->hop_limit;
862#endif
863 else
864 iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT);
865 }
866
Al Virod5a0a1e2006-11-08 00:23:14 -0800867 ((__be16*)(iph+1))[0] = tunnel->parms.o_flags;
868 ((__be16*)(iph+1))[1] = skb->protocol;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700869
870 if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
Al Virod5a0a1e2006-11-08 00:23:14 -0800871 __be32 *ptr = (__be32*)(((u8*)iph) + tunnel->hlen - 4);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700872
873 if (tunnel->parms.o_flags&GRE_SEQ) {
874 ++tunnel->o_seqno;
875 *ptr = htonl(tunnel->o_seqno);
876 ptr--;
877 }
878 if (tunnel->parms.o_flags&GRE_KEY) {
879 *ptr = tunnel->parms.o_key;
880 ptr--;
881 }
882 if (tunnel->parms.o_flags&GRE_CSUM) {
883 *ptr = 0;
Al Viro5f92a732006-11-14 21:36:54 -0800884 *(__sum16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700885 }
886 }
887
888 nf_reset(skb);
889
890 IPTUNNEL_XMIT();
891 tunnel->recursion--;
892 return 0;
893
894tx_error_icmp:
895 dst_link_failure(skb);
896
897tx_error:
898 stats->tx_errors++;
899 dev_kfree_skb(skb);
900 tunnel->recursion--;
901 return 0;
902}
903
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800904static void ipgre_tunnel_bind_dev(struct net_device *dev)
905{
906 struct net_device *tdev = NULL;
907 struct ip_tunnel *tunnel;
908 struct iphdr *iph;
909 int hlen = LL_MAX_HEADER;
910 int mtu = ETH_DATA_LEN;
911 int addend = sizeof(struct iphdr) + 4;
912
913 tunnel = netdev_priv(dev);
914 iph = &tunnel->parms.iph;
915
916 /* Guess output device to choose reasonable mtu and hard_header_len */
917
918 if (iph->daddr) {
919 struct flowi fl = { .oif = tunnel->parms.link,
920 .nl_u = { .ip4_u =
921 { .daddr = iph->daddr,
922 .saddr = iph->saddr,
923 .tos = RT_TOS(iph->tos) } },
924 .proto = IPPROTO_GRE };
925 struct rtable *rt;
Denis V. Lunevf2063512008-01-22 22:07:34 -0800926 if (!ip_route_output_key(&init_net, &rt, &fl)) {
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800927 tdev = rt->u.dst.dev;
928 ip_rt_put(rt);
929 }
930 dev->flags |= IFF_POINTOPOINT;
931 }
932
933 if (!tdev && tunnel->parms.link)
934 tdev = __dev_get_by_index(&init_net, tunnel->parms.link);
935
936 if (tdev) {
937 hlen = tdev->hard_header_len;
938 mtu = tdev->mtu;
939 }
940 dev->iflink = tunnel->parms.link;
941
942 /* Precalculate GRE options length */
943 if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
944 if (tunnel->parms.o_flags&GRE_CSUM)
945 addend += 4;
946 if (tunnel->parms.o_flags&GRE_KEY)
947 addend += 4;
948 if (tunnel->parms.o_flags&GRE_SEQ)
949 addend += 4;
950 }
951 dev->hard_header_len = hlen + addend;
952 dev->mtu = mtu - addend;
953 tunnel->hlen = addend;
954
955}
956
Linus Torvalds1da177e2005-04-16 15:20:36 -0700957static int
958ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
959{
960 int err = 0;
961 struct ip_tunnel_parm p;
962 struct ip_tunnel *t;
963
964 switch (cmd) {
965 case SIOCGETTUNNEL:
966 t = NULL;
967 if (dev == ipgre_fb_tunnel_dev) {
968 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
969 err = -EFAULT;
970 break;
971 }
972 t = ipgre_tunnel_locate(&p, 0);
973 }
974 if (t == NULL)
Patrick McHardy2941a482006-01-08 22:05:26 -0800975 t = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700976 memcpy(&p, &t->parms, sizeof(p));
977 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
978 err = -EFAULT;
979 break;
980
981 case SIOCADDTUNNEL:
982 case SIOCCHGTUNNEL:
983 err = -EPERM;
984 if (!capable(CAP_NET_ADMIN))
985 goto done;
986
987 err = -EFAULT;
988 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
989 goto done;
990
991 err = -EINVAL;
992 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
993 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
994 ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
995 goto done;
996 if (p.iph.ttl)
997 p.iph.frag_off |= htons(IP_DF);
998
999 if (!(p.i_flags&GRE_KEY))
1000 p.i_key = 0;
1001 if (!(p.o_flags&GRE_KEY))
1002 p.o_key = 0;
1003
1004 t = ipgre_tunnel_locate(&p, cmd == SIOCADDTUNNEL);
1005
1006 if (dev != ipgre_fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
1007 if (t != NULL) {
1008 if (t->dev != dev) {
1009 err = -EEXIST;
1010 break;
1011 }
1012 } else {
1013 unsigned nflags=0;
1014
Patrick McHardy2941a482006-01-08 22:05:26 -08001015 t = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001016
Joe Perchesf97c1e02007-12-16 13:45:43 -08001017 if (ipv4_is_multicast(p.iph.daddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001018 nflags = IFF_BROADCAST;
1019 else if (p.iph.daddr)
1020 nflags = IFF_POINTOPOINT;
1021
1022 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
1023 err = -EINVAL;
1024 break;
1025 }
1026 ipgre_tunnel_unlink(t);
1027 t->parms.iph.saddr = p.iph.saddr;
1028 t->parms.iph.daddr = p.iph.daddr;
1029 t->parms.i_key = p.i_key;
1030 t->parms.o_key = p.o_key;
1031 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1032 memcpy(dev->broadcast, &p.iph.daddr, 4);
1033 ipgre_tunnel_link(t);
1034 netdev_state_change(dev);
1035 }
1036 }
1037
1038 if (t) {
1039 err = 0;
1040 if (cmd == SIOCCHGTUNNEL) {
1041 t->parms.iph.ttl = p.iph.ttl;
1042 t->parms.iph.tos = p.iph.tos;
1043 t->parms.iph.frag_off = p.iph.frag_off;
Michal Schmidtee34c1e2007-12-13 09:46:32 -08001044 if (t->parms.link != p.link) {
1045 t->parms.link = p.link;
1046 ipgre_tunnel_bind_dev(dev);
1047 netdev_state_change(dev);
1048 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001049 }
1050 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
1051 err = -EFAULT;
1052 } else
1053 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
1054 break;
1055
1056 case SIOCDELTUNNEL:
1057 err = -EPERM;
1058 if (!capable(CAP_NET_ADMIN))
1059 goto done;
1060
1061 if (dev == ipgre_fb_tunnel_dev) {
1062 err = -EFAULT;
1063 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1064 goto done;
1065 err = -ENOENT;
1066 if ((t = ipgre_tunnel_locate(&p, 0)) == NULL)
1067 goto done;
1068 err = -EPERM;
Patrick McHardy2941a482006-01-08 22:05:26 -08001069 if (t == netdev_priv(ipgre_fb_tunnel_dev))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001070 goto done;
1071 dev = t->dev;
1072 }
Stephen Hemminger22f8cde2007-02-07 00:09:58 -08001073 unregister_netdevice(dev);
1074 err = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001075 break;
1076
1077 default:
1078 err = -EINVAL;
1079 }
1080
1081done:
1082 return err;
1083}
1084
1085static struct net_device_stats *ipgre_tunnel_get_stats(struct net_device *dev)
1086{
Patrick McHardy2941a482006-01-08 22:05:26 -08001087 return &(((struct ip_tunnel*)netdev_priv(dev))->stat);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001088}
1089
1090static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1091{
Patrick McHardy2941a482006-01-08 22:05:26 -08001092 struct ip_tunnel *tunnel = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001093 if (new_mtu < 68 || new_mtu > 0xFFF8 - tunnel->hlen)
1094 return -EINVAL;
1095 dev->mtu = new_mtu;
1096 return 0;
1097}
1098
Linus Torvalds1da177e2005-04-16 15:20:36 -07001099/* Nice toy. Unfortunately, useless in real life :-)
1100 It allows to construct virtual multiprotocol broadcast "LAN"
1101 over the Internet, provided multicast routing is tuned.
1102
1103
1104 I have no idea was this bicycle invented before me,
1105 so that I had to set ARPHRD_IPGRE to a random value.
1106 I have an impression, that Cisco could make something similar,
1107 but this feature is apparently missing in IOS<=11.2(8).
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001108
Linus Torvalds1da177e2005-04-16 15:20:36 -07001109 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
1110 with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
1111
1112 ping -t 255 224.66.66.66
1113
1114 If nobody answers, mbone does not work.
1115
1116 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
1117 ip addr add 10.66.66.<somewhat>/24 dev Universe
1118 ifconfig Universe up
1119 ifconfig Universe add fe80::<Your_real_addr>/10
1120 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
1121 ftp 10.66.66.66
1122 ...
1123 ftp fec0:6666:6666::193.233.7.65
1124 ...
1125
1126 */
1127
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -07001128static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
1129 unsigned short type,
1130 const void *daddr, const void *saddr, unsigned len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001131{
Patrick McHardy2941a482006-01-08 22:05:26 -08001132 struct ip_tunnel *t = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001133 struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
Al Virod5a0a1e2006-11-08 00:23:14 -08001134 __be16 *p = (__be16*)(iph+1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001135
1136 memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
1137 p[0] = t->parms.o_flags;
1138 p[1] = htons(type);
1139
1140 /*
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001141 * Set the source hardware address.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001142 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001143
Linus Torvalds1da177e2005-04-16 15:20:36 -07001144 if (saddr)
1145 memcpy(&iph->saddr, saddr, 4);
1146
1147 if (daddr) {
1148 memcpy(&iph->daddr, daddr, 4);
1149 return t->hlen;
1150 }
Joe Perchesf97c1e02007-12-16 13:45:43 -08001151 if (iph->daddr && !ipv4_is_multicast(iph->daddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001152 return t->hlen;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001153
Linus Torvalds1da177e2005-04-16 15:20:36 -07001154 return -t->hlen;
1155}
1156
Timo Teras6a5f44d2007-10-23 20:31:53 -07001157static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
1158{
1159 struct iphdr *iph = (struct iphdr*) skb_mac_header(skb);
1160 memcpy(haddr, &iph->saddr, 4);
1161 return 4;
1162}
1163
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -07001164static const struct header_ops ipgre_header_ops = {
1165 .create = ipgre_header,
Timo Teras6a5f44d2007-10-23 20:31:53 -07001166 .parse = ipgre_header_parse,
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -07001167};
1168
Timo Teras6a5f44d2007-10-23 20:31:53 -07001169#ifdef CONFIG_NET_IPGRE_BROADCAST
Linus Torvalds1da177e2005-04-16 15:20:36 -07001170static int ipgre_open(struct net_device *dev)
1171{
Patrick McHardy2941a482006-01-08 22:05:26 -08001172 struct ip_tunnel *t = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001173
Joe Perchesf97c1e02007-12-16 13:45:43 -08001174 if (ipv4_is_multicast(t->parms.iph.daddr)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001175 struct flowi fl = { .oif = t->parms.link,
1176 .nl_u = { .ip4_u =
1177 { .daddr = t->parms.iph.daddr,
1178 .saddr = t->parms.iph.saddr,
1179 .tos = RT_TOS(t->parms.iph.tos) } },
1180 .proto = IPPROTO_GRE };
1181 struct rtable *rt;
Denis V. Lunevf2063512008-01-22 22:07:34 -08001182 if (ip_route_output_key(&init_net, &rt, &fl))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001183 return -EADDRNOTAVAIL;
1184 dev = rt->u.dst.dev;
1185 ip_rt_put(rt);
Herbert Xue5ed6392005-10-03 14:35:55 -07001186 if (__in_dev_get_rtnl(dev) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001187 return -EADDRNOTAVAIL;
1188 t->mlink = dev->ifindex;
Herbert Xue5ed6392005-10-03 14:35:55 -07001189 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001190 }
1191 return 0;
1192}
1193
1194static int ipgre_close(struct net_device *dev)
1195{
Patrick McHardy2941a482006-01-08 22:05:26 -08001196 struct ip_tunnel *t = netdev_priv(dev);
Joe Perchesf97c1e02007-12-16 13:45:43 -08001197 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
Denis V. Lunev7fee0ca2008-01-21 17:32:38 -08001198 struct in_device *in_dev;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001199 in_dev = inetdev_by_index(dev_net(dev), t->mlink);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001200 if (in_dev) {
1201 ip_mc_dec_group(in_dev, t->parms.iph.daddr);
1202 in_dev_put(in_dev);
1203 }
1204 }
1205 return 0;
1206}
1207
1208#endif
1209
1210static void ipgre_tunnel_setup(struct net_device *dev)
1211{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001212 dev->uninit = ipgre_tunnel_uninit;
1213 dev->destructor = free_netdev;
1214 dev->hard_start_xmit = ipgre_tunnel_xmit;
1215 dev->get_stats = ipgre_tunnel_get_stats;
1216 dev->do_ioctl = ipgre_tunnel_ioctl;
1217 dev->change_mtu = ipgre_tunnel_change_mtu;
1218
1219 dev->type = ARPHRD_IPGRE;
1220 dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
Kris Katterjohn46f25df2006-01-05 16:35:42 -08001221 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001222 dev->flags = IFF_NOARP;
1223 dev->iflink = 0;
1224 dev->addr_len = 4;
1225}
1226
1227static int ipgre_tunnel_init(struct net_device *dev)
1228{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001229 struct ip_tunnel *tunnel;
1230 struct iphdr *iph;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001231
Patrick McHardy2941a482006-01-08 22:05:26 -08001232 tunnel = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001233 iph = &tunnel->parms.iph;
1234
1235 tunnel->dev = dev;
1236 strcpy(tunnel->parms.name, dev->name);
1237
1238 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
1239 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
1240
Michal Schmidtee34c1e2007-12-13 09:46:32 -08001241 ipgre_tunnel_bind_dev(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001242
1243 if (iph->daddr) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001244#ifdef CONFIG_NET_IPGRE_BROADCAST
Joe Perchesf97c1e02007-12-16 13:45:43 -08001245 if (ipv4_is_multicast(iph->daddr)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001246 if (!iph->saddr)
1247 return -EINVAL;
1248 dev->flags = IFF_BROADCAST;
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -07001249 dev->header_ops = &ipgre_header_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001250 dev->open = ipgre_open;
1251 dev->stop = ipgre_close;
1252 }
1253#endif
Michal Schmidtee34c1e2007-12-13 09:46:32 -08001254 } else
Timo Teras6a5f44d2007-10-23 20:31:53 -07001255 dev->header_ops = &ipgre_header_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001256
Linus Torvalds1da177e2005-04-16 15:20:36 -07001257 return 0;
1258}
1259
Adrian Bunk4b30b1c2005-11-29 16:27:20 -08001260static int __init ipgre_fb_tunnel_init(struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001261{
Patrick McHardy2941a482006-01-08 22:05:26 -08001262 struct ip_tunnel *tunnel = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001263 struct iphdr *iph = &tunnel->parms.iph;
1264
1265 tunnel->dev = dev;
1266 strcpy(tunnel->parms.name, dev->name);
1267
1268 iph->version = 4;
1269 iph->protocol = IPPROTO_GRE;
1270 iph->ihl = 5;
1271 tunnel->hlen = sizeof(struct iphdr) + 4;
1272
1273 dev_hold(dev);
1274 tunnels_wc[0] = tunnel;
1275 return 0;
1276}
1277
1278
1279static struct net_protocol ipgre_protocol = {
1280 .handler = ipgre_rcv,
1281 .err_handler = ipgre_err,
1282};
1283
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001284static int ipgre_init_net(struct net *net)
1285{
1286 int err;
1287 struct ipgre_net *ign;
1288
1289 err = -ENOMEM;
1290 ign = kmalloc(sizeof(struct ipgre_net), GFP_KERNEL);
1291 if (ign == NULL)
1292 goto err_alloc;
1293
1294 err = net_assign_generic(net, ipgre_net_id, ign);
1295 if (err < 0)
1296 goto err_assign;
1297
1298 return 0;
1299
1300err_assign:
1301 kfree(ign);
1302err_alloc:
1303 return err;
1304}
1305
1306static void ipgre_exit_net(struct net *net)
1307{
1308 struct ipgre_net *ign;
1309
1310 ign = net_generic(net, ipgre_net_id);
1311 kfree(ign);
1312}
1313
1314static struct pernet_operations ipgre_net_ops = {
1315 .init = ipgre_init_net,
1316 .exit = ipgre_exit_net,
1317};
Linus Torvalds1da177e2005-04-16 15:20:36 -07001318
1319/*
1320 * And now the modules code and kernel interface.
1321 */
1322
1323static int __init ipgre_init(void)
1324{
1325 int err;
1326
1327 printk(KERN_INFO "GRE over IPv4 tunneling driver\n");
1328
1329 if (inet_add_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) {
1330 printk(KERN_INFO "ipgre init: can't add protocol\n");
1331 return -EAGAIN;
1332 }
1333
1334 ipgre_fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0",
1335 ipgre_tunnel_setup);
1336 if (!ipgre_fb_tunnel_dev) {
1337 err = -ENOMEM;
1338 goto err1;
1339 }
1340
1341 ipgre_fb_tunnel_dev->init = ipgre_fb_tunnel_init;
1342
1343 if ((err = register_netdev(ipgre_fb_tunnel_dev)))
1344 goto err2;
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001345
1346 err = register_pernet_gen_device(&ipgre_net_id, &ipgre_net_ops);
1347 if (err < 0)
1348 goto err3;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001349out:
1350 return err;
1351err2:
1352 free_netdev(ipgre_fb_tunnel_dev);
1353err1:
1354 inet_del_protocol(&ipgre_protocol, IPPROTO_GRE);
1355 goto out;
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001356err3:
1357 unregister_netdevice(ipgre_fb_tunnel_dev);
1358 goto err1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001359}
1360
Alexey Kuznetsovdb445752005-07-30 17:46:44 -07001361static void __exit ipgre_destroy_tunnels(void)
1362{
1363 int prio;
1364
1365 for (prio = 0; prio < 4; prio++) {
1366 int h;
1367 for (h = 0; h < HASH_SIZE; h++) {
1368 struct ip_tunnel *t;
1369 while ((t = tunnels[prio][h]) != NULL)
1370 unregister_netdevice(t->dev);
1371 }
1372 }
1373}
1374
1375static void __exit ipgre_fini(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001376{
1377 if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0)
1378 printk(KERN_INFO "ipgre close: can't remove protocol\n");
1379
Alexey Kuznetsovdb445752005-07-30 17:46:44 -07001380 rtnl_lock();
1381 ipgre_destroy_tunnels();
1382 rtnl_unlock();
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001383
1384 unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001385}
1386
1387module_init(ipgre_init);
1388module_exit(ipgre_fini);
1389MODULE_LICENSE("GPL");