blob: 4342cba4ff823bbddfe053d9583f31b9521d735d [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002 * Linux NET3: GRE over IP protocol decoder.
Linus Torvalds1da177e2005-04-16 15:20:36 -07003 *
4 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
Randy Dunlap4fc268d2006-01-11 12:17:47 -080013#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070014#include <linux/module.h>
15#include <linux/types.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070016#include <linux/kernel.h>
17#include <asm/uaccess.h>
18#include <linux/skbuff.h>
19#include <linux/netdevice.h>
20#include <linux/in.h>
21#include <linux/tcp.h>
22#include <linux/udp.h>
23#include <linux/if_arp.h>
24#include <linux/mroute.h>
25#include <linux/init.h>
26#include <linux/in6.h>
27#include <linux/inetdevice.h>
28#include <linux/igmp.h>
29#include <linux/netfilter_ipv4.h>
Kris Katterjohn46f25df2006-01-05 16:35:42 -080030#include <linux/if_ether.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070031
32#include <net/sock.h>
33#include <net/ip.h>
34#include <net/icmp.h>
35#include <net/protocol.h>
36#include <net/ipip.h>
37#include <net/arp.h>
38#include <net/checksum.h>
39#include <net/dsfield.h>
40#include <net/inet_ecn.h>
41#include <net/xfrm.h>
Pavel Emelyanov59a4c752008-04-16 01:08:53 -070042#include <net/net_namespace.h>
43#include <net/netns/generic.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070044
45#ifdef CONFIG_IPV6
46#include <net/ipv6.h>
47#include <net/ip6_fib.h>
48#include <net/ip6_route.h>
49#endif
50
51/*
52 Problems & solutions
53 --------------------
54
55 1. The most important issue is detecting local dead loops.
56 They would cause complete host lockup in transmit, which
57 would be "resolved" by stack overflow or, if queueing is enabled,
58 with infinite looping in net_bh.
59
60 We cannot track such dead loops during route installation,
61 it is infeasible task. The most general solutions would be
62 to keep skb->encapsulation counter (sort of local ttl),
63 and silently drop packet when it expires. It is the best
64 solution, but it supposes maintaing new variable in ALL
65 skb, even if no tunneling is used.
66
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090067 Current solution: t->recursion lock breaks dead loops. It looks
Linus Torvalds1da177e2005-04-16 15:20:36 -070068 like dev->tbusy flag, but I preferred new variable, because
69 the semantics is different. One day, when hard_start_xmit
70 will be multithreaded we will have to use skb->encapsulation.
71
72
73
74 2. Networking dead loops would not kill routers, but would really
75 kill network. IP hop limit plays role of "t->recursion" in this case,
76 if we copy it from packet being encapsulated to upper header.
77 It is very good solution, but it introduces two problems:
78
79 - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
80 do not work over tunnels.
81 - traceroute does not work. I planned to relay ICMP from tunnel,
82 so that this problem would be solved and traceroute output
83 would even more informative. This idea appeared to be wrong:
84 only Linux complies to rfc1812 now (yes, guys, Linux is the only
85 true router now :-)), all routers (at least, in neighbourhood of mine)
86 return only 8 bytes of payload. It is the end.
87
88 Hence, if we want that OSPF worked or traceroute said something reasonable,
89 we should search for another solution.
90
91 One of them is to parse packet trying to detect inner encapsulation
92 made by our node. It is difficult or even impossible, especially,
93 taking into account fragmentation. TO be short, tt is not solution at all.
94
95 Current solution: The solution was UNEXPECTEDLY SIMPLE.
96 We force DF flag on tunnels with preconfigured hop limit,
97 that is ALL. :-) Well, it does not remove the problem completely,
98 but exponential growth of network traffic is changed to linear
99 (branches, that exceed pmtu are pruned) and tunnel mtu
100 fastly degrades to value <68, where looping stops.
101 Yes, it is not good if there exists a router in the loop,
102 which does not force DF, even when encapsulating packets have DF set.
103 But it is not our problem! Nobody could accuse us, we made
104 all that we could make. Even if it is your gated who injected
105 fatal route to network, even if it were you who configured
106 fatal static route: you are innocent. :-)
107
108
109
110 3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
111 practically identical code. It would be good to glue them
112 together, but it is not very evident, how to make them modular.
113 sit is integral part of IPv6, ipip and gre are naturally modular.
114 We could extract common parts (hash table, ioctl etc)
115 to a separate module (ip_tunnel.c).
116
117 Alexey Kuznetsov.
118 */
119
120static int ipgre_tunnel_init(struct net_device *dev);
121static void ipgre_tunnel_setup(struct net_device *dev);
122
123/* Fallback tunnel: no source, no destination, no key, no options */
124
125static int ipgre_fb_tunnel_init(struct net_device *dev);
126
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700127#define HASH_SIZE 16
128
Pavel Emelyanov59a4c752008-04-16 01:08:53 -0700129static int ipgre_net_id;
130struct ipgre_net {
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700131 struct ip_tunnel *tunnels[4][HASH_SIZE];
132
Pavel Emelyanov7daa0002008-04-16 01:10:05 -0700133 struct net_device *fb_tunnel_dev;
Pavel Emelyanov59a4c752008-04-16 01:08:53 -0700134};
135
Linus Torvalds1da177e2005-04-16 15:20:36 -0700136/* Tunnel hash table */
137
138/*
139 4 hash tables:
140
141 3: (remote,local)
142 2: (remote,*)
143 1: (*,local)
144 0: (*,*)
145
146 We require exact key match i.e. if a key is present in packet
147 it will match only tunnel with the same key; if it is not present,
148 it will match only keyless tunnel.
149
150 All keysless packets, if not matched configured keyless tunnels
151 will match fallback tunnel.
152 */
153
Al Virod5a0a1e2006-11-08 00:23:14 -0800154#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700155
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700156#define tunnels_r_l tunnels[3]
157#define tunnels_r tunnels[2]
158#define tunnels_l tunnels[1]
159#define tunnels_wc tunnels[0]
Linus Torvalds1da177e2005-04-16 15:20:36 -0700160
161static DEFINE_RWLOCK(ipgre_lock);
162
163/* Given src, dst and key, find appropriate for input tunnel. */
164
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700165static struct ip_tunnel * ipgre_tunnel_lookup(struct net *net,
166 __be32 remote, __be32 local, __be32 key)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700167{
168 unsigned h0 = HASH(remote);
169 unsigned h1 = HASH(key);
170 struct ip_tunnel *t;
Pavel Emelyanov7daa0002008-04-16 01:10:05 -0700171 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700172
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700173 for (t = ign->tunnels_r_l[h0^h1]; t; t = t->next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700174 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) {
175 if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
176 return t;
177 }
178 }
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700179 for (t = ign->tunnels_r[h0^h1]; t; t = t->next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700180 if (remote == t->parms.iph.daddr) {
181 if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
182 return t;
183 }
184 }
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700185 for (t = ign->tunnels_l[h1]; t; t = t->next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700186 if (local == t->parms.iph.saddr ||
Joe Perchesf97c1e02007-12-16 13:45:43 -0800187 (local == t->parms.iph.daddr &&
188 ipv4_is_multicast(local))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700189 if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
190 return t;
191 }
192 }
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700193 for (t = ign->tunnels_wc[h1]; t; t = t->next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700194 if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
195 return t;
196 }
197
Pavel Emelyanov7daa0002008-04-16 01:10:05 -0700198 if (ign->fb_tunnel_dev->flags&IFF_UP)
199 return netdev_priv(ign->fb_tunnel_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700200 return NULL;
201}
202
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700203static struct ip_tunnel **__ipgre_bucket(struct ipgre_net *ign,
204 struct ip_tunnel_parm *parms)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700205{
YOSHIFUJI Hideaki5056a1e2007-04-24 20:44:48 +0900206 __be32 remote = parms->iph.daddr;
207 __be32 local = parms->iph.saddr;
208 __be32 key = parms->i_key;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700209 unsigned h = HASH(key);
210 int prio = 0;
211
212 if (local)
213 prio |= 1;
Joe Perchesf97c1e02007-12-16 13:45:43 -0800214 if (remote && !ipv4_is_multicast(remote)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700215 prio |= 2;
216 h ^= HASH(remote);
217 }
218
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700219 return &ign->tunnels[prio][h];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700220}
221
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700222static inline struct ip_tunnel **ipgre_bucket(struct ipgre_net *ign,
223 struct ip_tunnel *t)
YOSHIFUJI Hideaki5056a1e2007-04-24 20:44:48 +0900224{
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700225 return __ipgre_bucket(ign, &t->parms);
YOSHIFUJI Hideaki5056a1e2007-04-24 20:44:48 +0900226}
227
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700228static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700229{
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700230 struct ip_tunnel **tp = ipgre_bucket(ign, t);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700231
232 t->next = *tp;
233 write_lock_bh(&ipgre_lock);
234 *tp = t;
235 write_unlock_bh(&ipgre_lock);
236}
237
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700238static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700239{
240 struct ip_tunnel **tp;
241
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700242 for (tp = ipgre_bucket(ign, t); *tp; tp = &(*tp)->next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700243 if (t == *tp) {
244 write_lock_bh(&ipgre_lock);
245 *tp = t->next;
246 write_unlock_bh(&ipgre_lock);
247 break;
248 }
249 }
250}
251
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700252static struct ip_tunnel * ipgre_tunnel_locate(struct net *net,
253 struct ip_tunnel_parm *parms, int create)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700254{
Al Virod5a0a1e2006-11-08 00:23:14 -0800255 __be32 remote = parms->iph.daddr;
256 __be32 local = parms->iph.saddr;
257 __be32 key = parms->i_key;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700258 struct ip_tunnel *t, **tp, *nt;
259 struct net_device *dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700260 char name[IFNAMSIZ];
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700261 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700262
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700263 for (tp = __ipgre_bucket(ign, parms); (t = *tp) != NULL; tp = &t->next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700264 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) {
265 if (key == t->parms.i_key)
266 return t;
267 }
268 }
269 if (!create)
270 return NULL;
271
272 if (parms->name[0])
273 strlcpy(name, parms->name, IFNAMSIZ);
Pavel Emelyanov34cc7ba2008-02-23 20:19:20 -0800274 else
275 sprintf(name, "gre%%d");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700276
277 dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup);
278 if (!dev)
279 return NULL;
280
Pavel Emelyanov0b67ece2008-04-16 01:11:13 -0700281 dev_net_set(dev, net);
282
Pavel Emelyanovb37d4282008-02-26 23:51:04 -0800283 if (strchr(name, '%')) {
284 if (dev_alloc_name(dev, name) < 0)
285 goto failed_free;
286 }
287
Linus Torvalds1da177e2005-04-16 15:20:36 -0700288 dev->init = ipgre_tunnel_init;
Patrick McHardy2941a482006-01-08 22:05:26 -0800289 nt = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700290 nt->parms = *parms;
291
Pavel Emelyanovb37d4282008-02-26 23:51:04 -0800292 if (register_netdevice(dev) < 0)
293 goto failed_free;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700294
Linus Torvalds1da177e2005-04-16 15:20:36 -0700295 dev_hold(dev);
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700296 ipgre_tunnel_link(ign, nt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700297 return nt;
298
Pavel Emelyanovb37d4282008-02-26 23:51:04 -0800299failed_free:
300 free_netdev(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700301 return NULL;
302}
303
304static void ipgre_tunnel_uninit(struct net_device *dev)
305{
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700306 struct net *net = dev_net(dev);
307 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
308
309 ipgre_tunnel_unlink(ign, netdev_priv(dev));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700310 dev_put(dev);
311}
312
313
314static void ipgre_err(struct sk_buff *skb, u32 info)
315{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700316
Rami Rosen071f92d2008-05-21 17:47:54 -0700317/* All the routers (except for Linux) return only
Linus Torvalds1da177e2005-04-16 15:20:36 -0700318 8 bytes of packet payload. It means, that precise relaying of
319 ICMP in the real Internet is absolutely infeasible.
320
321 Moreover, Cisco "wise men" put GRE key to the third word
322 in GRE header. It makes impossible maintaining even soft state for keyed
323 GRE tunnels with enabled checksum. Tell them "thank you".
324
325 Well, I wonder, rfc1812 was written by Cisco employee,
326 what the hell these idiots break standrads established
327 by themself???
328 */
329
330 struct iphdr *iph = (struct iphdr*)skb->data;
Al Virod5a0a1e2006-11-08 00:23:14 -0800331 __be16 *p = (__be16*)(skb->data+(iph->ihl<<2));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700332 int grehlen = (iph->ihl<<2) + 4;
Arnaldo Carvalho de Melo88c76642007-03-13 14:43:18 -0300333 const int type = icmp_hdr(skb)->type;
334 const int code = icmp_hdr(skb)->code;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700335 struct ip_tunnel *t;
Al Virod5a0a1e2006-11-08 00:23:14 -0800336 __be16 flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700337
338 flags = p[0];
339 if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
340 if (flags&(GRE_VERSION|GRE_ROUTING))
341 return;
342 if (flags&GRE_KEY) {
343 grehlen += 4;
344 if (flags&GRE_CSUM)
345 grehlen += 4;
346 }
347 }
348
349 /* If only 8 bytes returned, keyed message will be dropped here */
350 if (skb_headlen(skb) < grehlen)
351 return;
352
353 switch (type) {
354 default:
355 case ICMP_PARAMETERPROB:
356 return;
357
358 case ICMP_DEST_UNREACH:
359 switch (code) {
360 case ICMP_SR_FAILED:
361 case ICMP_PORT_UNREACH:
362 /* Impossible event. */
363 return;
364 case ICMP_FRAG_NEEDED:
365 /* Soft state for pmtu is maintained by IP core. */
366 return;
367 default:
368 /* All others are translated to HOST_UNREACH.
369 rfc2003 contains "deep thoughts" about NET_UNREACH,
370 I believe they are just ether pollution. --ANK
371 */
372 break;
373 }
374 break;
375 case ICMP_TIME_EXCEEDED:
376 if (code != ICMP_EXC_TTL)
377 return;
378 break;
379 }
380
381 read_lock(&ipgre_lock);
Pavel Emelyanov3b4667f2008-04-16 01:09:44 -0700382 t = ipgre_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr,
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700383 (flags&GRE_KEY) ?
384 *(((__be32*)p) + (grehlen>>2) - 1) : 0);
Joe Perchesf97c1e02007-12-16 13:45:43 -0800385 if (t == NULL || t->parms.iph.daddr == 0 ||
386 ipv4_is_multicast(t->parms.iph.daddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700387 goto out;
388
389 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
390 goto out;
391
392 if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
393 t->err_count++;
394 else
395 t->err_count = 1;
396 t->err_time = jiffies;
397out:
398 read_unlock(&ipgre_lock);
399 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700400}
401
402static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
403{
404 if (INET_ECN_is_ce(iph->tos)) {
405 if (skb->protocol == htons(ETH_P_IP)) {
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700406 IP_ECN_set_ce(ip_hdr(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700407 } else if (skb->protocol == htons(ETH_P_IPV6)) {
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -0700408 IP6_ECN_set_ce(ipv6_hdr(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700409 }
410 }
411}
412
413static inline u8
414ipgre_ecn_encapsulate(u8 tos, struct iphdr *old_iph, struct sk_buff *skb)
415{
416 u8 inner = 0;
417 if (skb->protocol == htons(ETH_P_IP))
418 inner = old_iph->tos;
419 else if (skb->protocol == htons(ETH_P_IPV6))
420 inner = ipv6_get_dsfield((struct ipv6hdr *)old_iph);
421 return INET_ECN_encapsulate(tos, inner);
422}
423
424static int ipgre_rcv(struct sk_buff *skb)
425{
426 struct iphdr *iph;
427 u8 *h;
Al Virod5a0a1e2006-11-08 00:23:14 -0800428 __be16 flags;
Al Virod3bc23e2006-11-14 21:24:49 -0800429 __sum16 csum = 0;
Al Virod5a0a1e2006-11-08 00:23:14 -0800430 __be32 key = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700431 u32 seqno = 0;
432 struct ip_tunnel *tunnel;
433 int offset = 4;
434
435 if (!pskb_may_pull(skb, 16))
436 goto drop_nolock;
437
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700438 iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700439 h = skb->data;
Al Virod5a0a1e2006-11-08 00:23:14 -0800440 flags = *(__be16*)h;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700441
442 if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
443 /* - Version must be 0.
444 - We do not support routing headers.
445 */
446 if (flags&(GRE_VERSION|GRE_ROUTING))
447 goto drop_nolock;
448
449 if (flags&GRE_CSUM) {
Herbert Xufb286bb2005-11-10 13:01:24 -0800450 switch (skb->ip_summed) {
Patrick McHardy84fa7932006-08-29 16:44:56 -0700451 case CHECKSUM_COMPLETE:
Al Virod3bc23e2006-11-14 21:24:49 -0800452 csum = csum_fold(skb->csum);
Herbert Xufb286bb2005-11-10 13:01:24 -0800453 if (!csum)
454 break;
455 /* fall through */
456 case CHECKSUM_NONE:
457 skb->csum = 0;
458 csum = __skb_checksum_complete(skb);
Patrick McHardy84fa7932006-08-29 16:44:56 -0700459 skb->ip_summed = CHECKSUM_COMPLETE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700460 }
461 offset += 4;
462 }
463 if (flags&GRE_KEY) {
Al Virod5a0a1e2006-11-08 00:23:14 -0800464 key = *(__be32*)(h + offset);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700465 offset += 4;
466 }
467 if (flags&GRE_SEQ) {
Al Virod5a0a1e2006-11-08 00:23:14 -0800468 seqno = ntohl(*(__be32*)(h + offset));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700469 offset += 4;
470 }
471 }
472
473 read_lock(&ipgre_lock);
Pavel Emelyanov3b4667f2008-04-16 01:09:44 -0700474 if ((tunnel = ipgre_tunnel_lookup(dev_net(skb->dev),
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700475 iph->saddr, iph->daddr, key)) != NULL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700476 secpath_reset(skb);
477
Al Virod5a0a1e2006-11-08 00:23:14 -0800478 skb->protocol = *(__be16*)(h + 2);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700479 /* WCCP version 1 and 2 protocol decoding.
480 * - Change protocol to IP
481 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
482 */
483 if (flags == 0 &&
YOSHIFUJI Hideaki496c98d2006-10-10 19:41:21 -0700484 skb->protocol == htons(ETH_P_WCCP)) {
485 skb->protocol = htons(ETH_P_IP);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900486 if ((*(h + offset) & 0xF0) != 0x40)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700487 offset += 4;
488 }
489
Timo Teras1d069162007-12-20 00:10:33 -0800490 skb->mac_header = skb->network_header;
Arnaldo Carvalho de Melo4209fb62007-03-10 18:42:03 -0300491 __pskb_pull(skb, offset);
492 skb_reset_network_header(skb);
Arnaldo Carvalho de Melo9c702202007-04-25 18:04:18 -0700493 skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700494 skb->pkt_type = PACKET_HOST;
495#ifdef CONFIG_NET_IPGRE_BROADCAST
Joe Perchesf97c1e02007-12-16 13:45:43 -0800496 if (ipv4_is_multicast(iph->daddr)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700497 /* Looped back packet, drop it! */
Eric Dumazetee6b9672008-03-05 18:30:47 -0800498 if (skb->rtable->fl.iif == 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700499 goto drop;
500 tunnel->stat.multicast++;
501 skb->pkt_type = PACKET_BROADCAST;
502 }
503#endif
504
505 if (((flags&GRE_CSUM) && csum) ||
506 (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
507 tunnel->stat.rx_crc_errors++;
508 tunnel->stat.rx_errors++;
509 goto drop;
510 }
511 if (tunnel->parms.i_flags&GRE_SEQ) {
512 if (!(flags&GRE_SEQ) ||
513 (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
514 tunnel->stat.rx_fifo_errors++;
515 tunnel->stat.rx_errors++;
516 goto drop;
517 }
518 tunnel->i_seqno = seqno + 1;
519 }
520 tunnel->stat.rx_packets++;
521 tunnel->stat.rx_bytes += skb->len;
522 skb->dev = tunnel->dev;
523 dst_release(skb->dst);
524 skb->dst = NULL;
525 nf_reset(skb);
526 ipgre_ecn_decapsulate(iph, skb);
527 netif_rx(skb);
528 read_unlock(&ipgre_lock);
529 return(0);
530 }
Herbert Xu45af08b2006-04-05 22:31:19 -0700531 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700532
533drop:
534 read_unlock(&ipgre_lock);
535drop_nolock:
536 kfree_skb(skb);
537 return(0);
538}
539
540static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
541{
Patrick McHardy2941a482006-01-08 22:05:26 -0800542 struct ip_tunnel *tunnel = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700543 struct net_device_stats *stats = &tunnel->stat;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700544 struct iphdr *old_iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700545 struct iphdr *tiph;
546 u8 tos;
Al Virod5a0a1e2006-11-08 00:23:14 -0800547 __be16 df;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700548 struct rtable *rt; /* Route to the other host */
549 struct net_device *tdev; /* Device to other host */
550 struct iphdr *iph; /* Our new IP header */
Chuck Leverc2636b42007-10-23 21:07:32 -0700551 unsigned int max_headroom; /* The extra header space needed */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700552 int gre_hlen;
Al Virod5a0a1e2006-11-08 00:23:14 -0800553 __be32 dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700554 int mtu;
555
556 if (tunnel->recursion++) {
557 tunnel->stat.collisions++;
558 goto tx_error;
559 }
560
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -0700561 if (dev->header_ops) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700562 gre_hlen = 0;
563 tiph = (struct iphdr*)skb->data;
564 } else {
565 gre_hlen = tunnel->hlen;
566 tiph = &tunnel->parms.iph;
567 }
568
569 if ((dst = tiph->daddr) == 0) {
570 /* NBMA tunnel */
571
572 if (skb->dst == NULL) {
573 tunnel->stat.tx_fifo_errors++;
574 goto tx_error;
575 }
576
577 if (skb->protocol == htons(ETH_P_IP)) {
Eric Dumazetee6b9672008-03-05 18:30:47 -0800578 rt = skb->rtable;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700579 if ((dst = rt->rt_gateway) == 0)
580 goto tx_error_icmp;
581 }
582#ifdef CONFIG_IPV6
583 else if (skb->protocol == htons(ETH_P_IPV6)) {
584 struct in6_addr *addr6;
585 int addr_type;
586 struct neighbour *neigh = skb->dst->neighbour;
587
588 if (neigh == NULL)
589 goto tx_error;
590
591 addr6 = (struct in6_addr*)&neigh->primary_key;
592 addr_type = ipv6_addr_type(addr6);
593
594 if (addr_type == IPV6_ADDR_ANY) {
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -0700595 addr6 = &ipv6_hdr(skb)->daddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700596 addr_type = ipv6_addr_type(addr6);
597 }
598
599 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
600 goto tx_error_icmp;
601
602 dst = addr6->s6_addr32[3];
603 }
604#endif
605 else
606 goto tx_error;
607 }
608
609 tos = tiph->tos;
610 if (tos&1) {
611 if (skb->protocol == htons(ETH_P_IP))
612 tos = old_iph->tos;
613 tos &= ~1;
614 }
615
616 {
617 struct flowi fl = { .oif = tunnel->parms.link,
618 .nl_u = { .ip4_u =
619 { .daddr = dst,
620 .saddr = tiph->saddr,
621 .tos = RT_TOS(tos) } },
622 .proto = IPPROTO_GRE };
Pavel Emelyanov96635522008-04-16 01:10:44 -0700623 if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700624 tunnel->stat.tx_carrier_errors++;
625 goto tx_error;
626 }
627 }
628 tdev = rt->u.dst.dev;
629
630 if (tdev == dev) {
631 ip_rt_put(rt);
632 tunnel->stat.collisions++;
633 goto tx_error;
634 }
635
636 df = tiph->frag_off;
637 if (df)
638 mtu = dst_mtu(&rt->u.dst) - tunnel->hlen;
639 else
640 mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
641
642 if (skb->dst)
643 skb->dst->ops->update_pmtu(skb->dst, mtu);
644
645 if (skb->protocol == htons(ETH_P_IP)) {
646 df |= (old_iph->frag_off&htons(IP_DF));
647
648 if ((old_iph->frag_off&htons(IP_DF)) &&
649 mtu < ntohs(old_iph->tot_len)) {
650 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
651 ip_rt_put(rt);
652 goto tx_error;
653 }
654 }
655#ifdef CONFIG_IPV6
656 else if (skb->protocol == htons(ETH_P_IPV6)) {
657 struct rt6_info *rt6 = (struct rt6_info*)skb->dst;
658
659 if (rt6 && mtu < dst_mtu(skb->dst) && mtu >= IPV6_MIN_MTU) {
Joe Perchesf97c1e02007-12-16 13:45:43 -0800660 if ((tunnel->parms.iph.daddr &&
661 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
Linus Torvalds1da177e2005-04-16 15:20:36 -0700662 rt6->rt6i_dst.plen == 128) {
663 rt6->rt6i_flags |= RTF_MODIFIED;
664 skb->dst->metrics[RTAX_MTU-1] = mtu;
665 }
666 }
667
668 if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) {
669 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
670 ip_rt_put(rt);
671 goto tx_error;
672 }
673 }
674#endif
675
676 if (tunnel->err_count > 0) {
677 if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
678 tunnel->err_count--;
679
680 dst_link_failure(skb);
681 } else
682 tunnel->err_count = 0;
683 }
684
685 max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen;
686
Patrick McHardycfbba492007-07-09 15:33:40 -0700687 if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
688 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700689 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
690 if (!new_skb) {
691 ip_rt_put(rt);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900692 stats->tx_dropped++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700693 dev_kfree_skb(skb);
694 tunnel->recursion--;
695 return 0;
696 }
697 if (skb->sk)
698 skb_set_owner_w(new_skb, skb->sk);
699 dev_kfree_skb(skb);
700 skb = new_skb;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700701 old_iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700702 }
703
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -0700704 skb->transport_header = skb->network_header;
Arnaldo Carvalho de Meloe2d1bca2007-04-10 20:46:21 -0700705 skb_push(skb, gre_hlen);
706 skb_reset_network_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700707 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
Patrick McHardy48d5cad2006-02-15 15:10:22 -0800708 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
709 IPSKB_REROUTED);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700710 dst_release(skb->dst);
711 skb->dst = &rt->u.dst;
712
713 /*
714 * Push down and install the IPIP header.
715 */
716
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700717 iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700718 iph->version = 4;
719 iph->ihl = sizeof(struct iphdr) >> 2;
720 iph->frag_off = df;
721 iph->protocol = IPPROTO_GRE;
722 iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb);
723 iph->daddr = rt->rt_dst;
724 iph->saddr = rt->rt_src;
725
726 if ((iph->ttl = tiph->ttl) == 0) {
727 if (skb->protocol == htons(ETH_P_IP))
728 iph->ttl = old_iph->ttl;
729#ifdef CONFIG_IPV6
730 else if (skb->protocol == htons(ETH_P_IPV6))
731 iph->ttl = ((struct ipv6hdr*)old_iph)->hop_limit;
732#endif
733 else
734 iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT);
735 }
736
Al Virod5a0a1e2006-11-08 00:23:14 -0800737 ((__be16*)(iph+1))[0] = tunnel->parms.o_flags;
738 ((__be16*)(iph+1))[1] = skb->protocol;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700739
740 if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
Al Virod5a0a1e2006-11-08 00:23:14 -0800741 __be32 *ptr = (__be32*)(((u8*)iph) + tunnel->hlen - 4);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700742
743 if (tunnel->parms.o_flags&GRE_SEQ) {
744 ++tunnel->o_seqno;
745 *ptr = htonl(tunnel->o_seqno);
746 ptr--;
747 }
748 if (tunnel->parms.o_flags&GRE_KEY) {
749 *ptr = tunnel->parms.o_key;
750 ptr--;
751 }
752 if (tunnel->parms.o_flags&GRE_CSUM) {
753 *ptr = 0;
Al Viro5f92a732006-11-14 21:36:54 -0800754 *(__sum16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700755 }
756 }
757
758 nf_reset(skb);
759
760 IPTUNNEL_XMIT();
761 tunnel->recursion--;
762 return 0;
763
764tx_error_icmp:
765 dst_link_failure(skb);
766
767tx_error:
768 stats->tx_errors++;
769 dev_kfree_skb(skb);
770 tunnel->recursion--;
771 return 0;
772}
773
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800774static void ipgre_tunnel_bind_dev(struct net_device *dev)
775{
776 struct net_device *tdev = NULL;
777 struct ip_tunnel *tunnel;
778 struct iphdr *iph;
779 int hlen = LL_MAX_HEADER;
780 int mtu = ETH_DATA_LEN;
781 int addend = sizeof(struct iphdr) + 4;
782
783 tunnel = netdev_priv(dev);
784 iph = &tunnel->parms.iph;
785
786 /* Guess output device to choose reasonable mtu and hard_header_len */
787
788 if (iph->daddr) {
789 struct flowi fl = { .oif = tunnel->parms.link,
790 .nl_u = { .ip4_u =
791 { .daddr = iph->daddr,
792 .saddr = iph->saddr,
793 .tos = RT_TOS(iph->tos) } },
794 .proto = IPPROTO_GRE };
795 struct rtable *rt;
Pavel Emelyanov96635522008-04-16 01:10:44 -0700796 if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800797 tdev = rt->u.dst.dev;
798 ip_rt_put(rt);
799 }
800 dev->flags |= IFF_POINTOPOINT;
801 }
802
803 if (!tdev && tunnel->parms.link)
Pavel Emelyanov96635522008-04-16 01:10:44 -0700804 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800805
806 if (tdev) {
807 hlen = tdev->hard_header_len;
808 mtu = tdev->mtu;
809 }
810 dev->iflink = tunnel->parms.link;
811
812 /* Precalculate GRE options length */
813 if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
814 if (tunnel->parms.o_flags&GRE_CSUM)
815 addend += 4;
816 if (tunnel->parms.o_flags&GRE_KEY)
817 addend += 4;
818 if (tunnel->parms.o_flags&GRE_SEQ)
819 addend += 4;
820 }
821 dev->hard_header_len = hlen + addend;
822 dev->mtu = mtu - addend;
823 tunnel->hlen = addend;
824
825}
826
Linus Torvalds1da177e2005-04-16 15:20:36 -0700827static int
828ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
829{
830 int err = 0;
831 struct ip_tunnel_parm p;
832 struct ip_tunnel *t;
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700833 struct net *net = dev_net(dev);
834 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700835
836 switch (cmd) {
837 case SIOCGETTUNNEL:
838 t = NULL;
Pavel Emelyanov7daa0002008-04-16 01:10:05 -0700839 if (dev == ign->fb_tunnel_dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700840 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
841 err = -EFAULT;
842 break;
843 }
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700844 t = ipgre_tunnel_locate(net, &p, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700845 }
846 if (t == NULL)
Patrick McHardy2941a482006-01-08 22:05:26 -0800847 t = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700848 memcpy(&p, &t->parms, sizeof(p));
849 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
850 err = -EFAULT;
851 break;
852
853 case SIOCADDTUNNEL:
854 case SIOCCHGTUNNEL:
855 err = -EPERM;
856 if (!capable(CAP_NET_ADMIN))
857 goto done;
858
859 err = -EFAULT;
860 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
861 goto done;
862
863 err = -EINVAL;
864 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
865 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
866 ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
867 goto done;
868 if (p.iph.ttl)
869 p.iph.frag_off |= htons(IP_DF);
870
871 if (!(p.i_flags&GRE_KEY))
872 p.i_key = 0;
873 if (!(p.o_flags&GRE_KEY))
874 p.o_key = 0;
875
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700876 t = ipgre_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700877
Pavel Emelyanov7daa0002008-04-16 01:10:05 -0700878 if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700879 if (t != NULL) {
880 if (t->dev != dev) {
881 err = -EEXIST;
882 break;
883 }
884 } else {
885 unsigned nflags=0;
886
Patrick McHardy2941a482006-01-08 22:05:26 -0800887 t = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700888
Joe Perchesf97c1e02007-12-16 13:45:43 -0800889 if (ipv4_is_multicast(p.iph.daddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700890 nflags = IFF_BROADCAST;
891 else if (p.iph.daddr)
892 nflags = IFF_POINTOPOINT;
893
894 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
895 err = -EINVAL;
896 break;
897 }
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700898 ipgre_tunnel_unlink(ign, t);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700899 t->parms.iph.saddr = p.iph.saddr;
900 t->parms.iph.daddr = p.iph.daddr;
901 t->parms.i_key = p.i_key;
902 t->parms.o_key = p.o_key;
903 memcpy(dev->dev_addr, &p.iph.saddr, 4);
904 memcpy(dev->broadcast, &p.iph.daddr, 4);
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700905 ipgre_tunnel_link(ign, t);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700906 netdev_state_change(dev);
907 }
908 }
909
910 if (t) {
911 err = 0;
912 if (cmd == SIOCCHGTUNNEL) {
913 t->parms.iph.ttl = p.iph.ttl;
914 t->parms.iph.tos = p.iph.tos;
915 t->parms.iph.frag_off = p.iph.frag_off;
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800916 if (t->parms.link != p.link) {
917 t->parms.link = p.link;
918 ipgre_tunnel_bind_dev(dev);
919 netdev_state_change(dev);
920 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700921 }
922 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
923 err = -EFAULT;
924 } else
925 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
926 break;
927
928 case SIOCDELTUNNEL:
929 err = -EPERM;
930 if (!capable(CAP_NET_ADMIN))
931 goto done;
932
Pavel Emelyanov7daa0002008-04-16 01:10:05 -0700933 if (dev == ign->fb_tunnel_dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700934 err = -EFAULT;
935 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
936 goto done;
937 err = -ENOENT;
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700938 if ((t = ipgre_tunnel_locate(net, &p, 0)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700939 goto done;
940 err = -EPERM;
Pavel Emelyanov7daa0002008-04-16 01:10:05 -0700941 if (t == netdev_priv(ign->fb_tunnel_dev))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700942 goto done;
943 dev = t->dev;
944 }
Stephen Hemminger22f8cde2007-02-07 00:09:58 -0800945 unregister_netdevice(dev);
946 err = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700947 break;
948
949 default:
950 err = -EINVAL;
951 }
952
953done:
954 return err;
955}
956
957static struct net_device_stats *ipgre_tunnel_get_stats(struct net_device *dev)
958{
Patrick McHardy2941a482006-01-08 22:05:26 -0800959 return &(((struct ip_tunnel*)netdev_priv(dev))->stat);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700960}
961
962static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
963{
Patrick McHardy2941a482006-01-08 22:05:26 -0800964 struct ip_tunnel *tunnel = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700965 if (new_mtu < 68 || new_mtu > 0xFFF8 - tunnel->hlen)
966 return -EINVAL;
967 dev->mtu = new_mtu;
968 return 0;
969}
970
Linus Torvalds1da177e2005-04-16 15:20:36 -0700971/* Nice toy. Unfortunately, useless in real life :-)
972 It allows to construct virtual multiprotocol broadcast "LAN"
973 over the Internet, provided multicast routing is tuned.
974
975
976 I have no idea was this bicycle invented before me,
977 so that I had to set ARPHRD_IPGRE to a random value.
978 I have an impression, that Cisco could make something similar,
979 but this feature is apparently missing in IOS<=11.2(8).
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900980
Linus Torvalds1da177e2005-04-16 15:20:36 -0700981 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
982 with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
983
984 ping -t 255 224.66.66.66
985
986 If nobody answers, mbone does not work.
987
988 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
989 ip addr add 10.66.66.<somewhat>/24 dev Universe
990 ifconfig Universe up
991 ifconfig Universe add fe80::<Your_real_addr>/10
992 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
993 ftp 10.66.66.66
994 ...
995 ftp fec0:6666:6666::193.233.7.65
996 ...
997
998 */
999
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -07001000static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
1001 unsigned short type,
1002 const void *daddr, const void *saddr, unsigned len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001003{
Patrick McHardy2941a482006-01-08 22:05:26 -08001004 struct ip_tunnel *t = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001005 struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
Al Virod5a0a1e2006-11-08 00:23:14 -08001006 __be16 *p = (__be16*)(iph+1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001007
1008 memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
1009 p[0] = t->parms.o_flags;
1010 p[1] = htons(type);
1011
1012 /*
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001013 * Set the source hardware address.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001014 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001015
Linus Torvalds1da177e2005-04-16 15:20:36 -07001016 if (saddr)
1017 memcpy(&iph->saddr, saddr, 4);
1018
1019 if (daddr) {
1020 memcpy(&iph->daddr, daddr, 4);
1021 return t->hlen;
1022 }
Joe Perchesf97c1e02007-12-16 13:45:43 -08001023 if (iph->daddr && !ipv4_is_multicast(iph->daddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001024 return t->hlen;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001025
Linus Torvalds1da177e2005-04-16 15:20:36 -07001026 return -t->hlen;
1027}
1028
Timo Teras6a5f44d2007-10-23 20:31:53 -07001029static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
1030{
1031 struct iphdr *iph = (struct iphdr*) skb_mac_header(skb);
1032 memcpy(haddr, &iph->saddr, 4);
1033 return 4;
1034}
1035
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -07001036static const struct header_ops ipgre_header_ops = {
1037 .create = ipgre_header,
Timo Teras6a5f44d2007-10-23 20:31:53 -07001038 .parse = ipgre_header_parse,
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -07001039};
1040
Timo Teras6a5f44d2007-10-23 20:31:53 -07001041#ifdef CONFIG_NET_IPGRE_BROADCAST
Linus Torvalds1da177e2005-04-16 15:20:36 -07001042static int ipgre_open(struct net_device *dev)
1043{
Patrick McHardy2941a482006-01-08 22:05:26 -08001044 struct ip_tunnel *t = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001045
Joe Perchesf97c1e02007-12-16 13:45:43 -08001046 if (ipv4_is_multicast(t->parms.iph.daddr)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001047 struct flowi fl = { .oif = t->parms.link,
1048 .nl_u = { .ip4_u =
1049 { .daddr = t->parms.iph.daddr,
1050 .saddr = t->parms.iph.saddr,
1051 .tos = RT_TOS(t->parms.iph.tos) } },
1052 .proto = IPPROTO_GRE };
1053 struct rtable *rt;
Pavel Emelyanov96635522008-04-16 01:10:44 -07001054 if (ip_route_output_key(dev_net(dev), &rt, &fl))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001055 return -EADDRNOTAVAIL;
1056 dev = rt->u.dst.dev;
1057 ip_rt_put(rt);
Herbert Xue5ed6392005-10-03 14:35:55 -07001058 if (__in_dev_get_rtnl(dev) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001059 return -EADDRNOTAVAIL;
1060 t->mlink = dev->ifindex;
Herbert Xue5ed6392005-10-03 14:35:55 -07001061 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001062 }
1063 return 0;
1064}
1065
1066static int ipgre_close(struct net_device *dev)
1067{
Patrick McHardy2941a482006-01-08 22:05:26 -08001068 struct ip_tunnel *t = netdev_priv(dev);
Joe Perchesf97c1e02007-12-16 13:45:43 -08001069 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
Denis V. Lunev7fee0ca2008-01-21 17:32:38 -08001070 struct in_device *in_dev;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001071 in_dev = inetdev_by_index(dev_net(dev), t->mlink);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001072 if (in_dev) {
1073 ip_mc_dec_group(in_dev, t->parms.iph.daddr);
1074 in_dev_put(in_dev);
1075 }
1076 }
1077 return 0;
1078}
1079
1080#endif
1081
1082static void ipgre_tunnel_setup(struct net_device *dev)
1083{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001084 dev->uninit = ipgre_tunnel_uninit;
1085 dev->destructor = free_netdev;
1086 dev->hard_start_xmit = ipgre_tunnel_xmit;
1087 dev->get_stats = ipgre_tunnel_get_stats;
1088 dev->do_ioctl = ipgre_tunnel_ioctl;
1089 dev->change_mtu = ipgre_tunnel_change_mtu;
1090
1091 dev->type = ARPHRD_IPGRE;
1092 dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
Kris Katterjohn46f25df2006-01-05 16:35:42 -08001093 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001094 dev->flags = IFF_NOARP;
1095 dev->iflink = 0;
1096 dev->addr_len = 4;
Pavel Emelyanov0b67ece2008-04-16 01:11:13 -07001097 dev->features |= NETIF_F_NETNS_LOCAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001098}
1099
1100static int ipgre_tunnel_init(struct net_device *dev)
1101{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001102 struct ip_tunnel *tunnel;
1103 struct iphdr *iph;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001104
Patrick McHardy2941a482006-01-08 22:05:26 -08001105 tunnel = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001106 iph = &tunnel->parms.iph;
1107
1108 tunnel->dev = dev;
1109 strcpy(tunnel->parms.name, dev->name);
1110
1111 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
1112 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
1113
Michal Schmidtee34c1e2007-12-13 09:46:32 -08001114 ipgre_tunnel_bind_dev(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001115
1116 if (iph->daddr) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001117#ifdef CONFIG_NET_IPGRE_BROADCAST
Joe Perchesf97c1e02007-12-16 13:45:43 -08001118 if (ipv4_is_multicast(iph->daddr)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001119 if (!iph->saddr)
1120 return -EINVAL;
1121 dev->flags = IFF_BROADCAST;
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -07001122 dev->header_ops = &ipgre_header_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001123 dev->open = ipgre_open;
1124 dev->stop = ipgre_close;
1125 }
1126#endif
Michal Schmidtee34c1e2007-12-13 09:46:32 -08001127 } else
Timo Teras6a5f44d2007-10-23 20:31:53 -07001128 dev->header_ops = &ipgre_header_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001129
Linus Torvalds1da177e2005-04-16 15:20:36 -07001130 return 0;
1131}
1132
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001133static int ipgre_fb_tunnel_init(struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001134{
Patrick McHardy2941a482006-01-08 22:05:26 -08001135 struct ip_tunnel *tunnel = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001136 struct iphdr *iph = &tunnel->parms.iph;
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -07001137 struct ipgre_net *ign = net_generic(dev_net(dev), ipgre_net_id);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001138
1139 tunnel->dev = dev;
1140 strcpy(tunnel->parms.name, dev->name);
1141
1142 iph->version = 4;
1143 iph->protocol = IPPROTO_GRE;
1144 iph->ihl = 5;
1145 tunnel->hlen = sizeof(struct iphdr) + 4;
1146
1147 dev_hold(dev);
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -07001148 ign->tunnels_wc[0] = tunnel;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001149 return 0;
1150}
1151
1152
1153static struct net_protocol ipgre_protocol = {
1154 .handler = ipgre_rcv,
1155 .err_handler = ipgre_err,
Pavel Emelyanovf96c1482008-04-16 01:11:36 -07001156 .netns_ok = 1,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001157};
1158
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -07001159static void ipgre_destroy_tunnels(struct ipgre_net *ign)
1160{
1161 int prio;
1162
1163 for (prio = 0; prio < 4; prio++) {
1164 int h;
1165 for (h = 0; h < HASH_SIZE; h++) {
1166 struct ip_tunnel *t;
1167 while ((t = ign->tunnels[prio][h]) != NULL)
1168 unregister_netdevice(t->dev);
1169 }
1170 }
1171}
1172
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001173static int ipgre_init_net(struct net *net)
1174{
1175 int err;
1176 struct ipgre_net *ign;
1177
1178 err = -ENOMEM;
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -07001179 ign = kzalloc(sizeof(struct ipgre_net), GFP_KERNEL);
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001180 if (ign == NULL)
1181 goto err_alloc;
1182
1183 err = net_assign_generic(net, ipgre_net_id, ign);
1184 if (err < 0)
1185 goto err_assign;
1186
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001187 ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0",
1188 ipgre_tunnel_setup);
1189 if (!ign->fb_tunnel_dev) {
1190 err = -ENOMEM;
1191 goto err_alloc_dev;
1192 }
1193
1194 ign->fb_tunnel_dev->init = ipgre_fb_tunnel_init;
1195 dev_net_set(ign->fb_tunnel_dev, net);
1196
1197 if ((err = register_netdev(ign->fb_tunnel_dev)))
1198 goto err_reg_dev;
1199
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001200 return 0;
1201
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001202err_reg_dev:
1203 free_netdev(ign->fb_tunnel_dev);
1204err_alloc_dev:
1205 /* nothing */
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001206err_assign:
1207 kfree(ign);
1208err_alloc:
1209 return err;
1210}
1211
1212static void ipgre_exit_net(struct net *net)
1213{
1214 struct ipgre_net *ign;
1215
1216 ign = net_generic(net, ipgre_net_id);
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001217 rtnl_lock();
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -07001218 ipgre_destroy_tunnels(ign);
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001219 rtnl_unlock();
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001220 kfree(ign);
1221}
1222
1223static struct pernet_operations ipgre_net_ops = {
1224 .init = ipgre_init_net,
1225 .exit = ipgre_exit_net,
1226};
Linus Torvalds1da177e2005-04-16 15:20:36 -07001227
1228/*
1229 * And now the modules code and kernel interface.
1230 */
1231
1232static int __init ipgre_init(void)
1233{
1234 int err;
1235
1236 printk(KERN_INFO "GRE over IPv4 tunneling driver\n");
1237
1238 if (inet_add_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) {
1239 printk(KERN_INFO "ipgre init: can't add protocol\n");
1240 return -EAGAIN;
1241 }
1242
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001243 err = register_pernet_gen_device(&ipgre_net_id, &ipgre_net_ops);
1244 if (err < 0)
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001245 inet_del_protocol(&ipgre_protocol, IPPROTO_GRE);
1246
Linus Torvalds1da177e2005-04-16 15:20:36 -07001247 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001248}
1249
Alexey Kuznetsovdb445752005-07-30 17:46:44 -07001250static void __exit ipgre_fini(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001251{
1252 if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0)
1253 printk(KERN_INFO "ipgre close: can't remove protocol\n");
1254
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001255 unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001256}
1257
1258module_init(ipgre_init);
1259module_exit(ipgre_fini);
1260MODULE_LICENSE("GPL");