blob: 44ed9487fa15d15833c1610999a2665fb77f00ba [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002 * Linux NET3: GRE over IP protocol decoder.
Linus Torvalds1da177e2005-04-16 15:20:36 -07003 *
4 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
Randy Dunlap4fc268d2006-01-11 12:17:47 -080013#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070014#include <linux/module.h>
15#include <linux/types.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070016#include <linux/kernel.h>
17#include <asm/uaccess.h>
18#include <linux/skbuff.h>
19#include <linux/netdevice.h>
20#include <linux/in.h>
21#include <linux/tcp.h>
22#include <linux/udp.h>
23#include <linux/if_arp.h>
24#include <linux/mroute.h>
25#include <linux/init.h>
26#include <linux/in6.h>
27#include <linux/inetdevice.h>
28#include <linux/igmp.h>
29#include <linux/netfilter_ipv4.h>
Herbert Xue1a80002008-10-09 12:00:17 -070030#include <linux/etherdevice.h>
Kris Katterjohn46f25df2006-01-05 16:35:42 -080031#include <linux/if_ether.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070032
33#include <net/sock.h>
34#include <net/ip.h>
35#include <net/icmp.h>
36#include <net/protocol.h>
37#include <net/ipip.h>
38#include <net/arp.h>
39#include <net/checksum.h>
40#include <net/dsfield.h>
41#include <net/inet_ecn.h>
42#include <net/xfrm.h>
Pavel Emelyanov59a4c752008-04-16 01:08:53 -070043#include <net/net_namespace.h>
44#include <net/netns/generic.h>
Herbert Xuc19e6542008-10-09 11:59:55 -070045#include <net/rtnetlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070046
47#ifdef CONFIG_IPV6
48#include <net/ipv6.h>
49#include <net/ip6_fib.h>
50#include <net/ip6_route.h>
51#endif
52
53/*
54 Problems & solutions
55 --------------------
56
57 1. The most important issue is detecting local dead loops.
58 They would cause complete host lockup in transmit, which
59 would be "resolved" by stack overflow or, if queueing is enabled,
60 with infinite looping in net_bh.
61
62 We cannot track such dead loops during route installation,
63 it is infeasible task. The most general solutions would be
64 to keep skb->encapsulation counter (sort of local ttl),
65 and silently drop packet when it expires. It is the best
66 solution, but it supposes maintaing new variable in ALL
67 skb, even if no tunneling is used.
68
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090069 Current solution: t->recursion lock breaks dead loops. It looks
Linus Torvalds1da177e2005-04-16 15:20:36 -070070 like dev->tbusy flag, but I preferred new variable, because
71 the semantics is different. One day, when hard_start_xmit
72 will be multithreaded we will have to use skb->encapsulation.
73
74
75
76 2. Networking dead loops would not kill routers, but would really
77 kill network. IP hop limit plays role of "t->recursion" in this case,
78 if we copy it from packet being encapsulated to upper header.
79 It is very good solution, but it introduces two problems:
80
81 - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
82 do not work over tunnels.
83 - traceroute does not work. I planned to relay ICMP from tunnel,
84 so that this problem would be solved and traceroute output
85 would even more informative. This idea appeared to be wrong:
86 only Linux complies to rfc1812 now (yes, guys, Linux is the only
87 true router now :-)), all routers (at least, in neighbourhood of mine)
88 return only 8 bytes of payload. It is the end.
89
90 Hence, if we want that OSPF worked or traceroute said something reasonable,
91 we should search for another solution.
92
93 One of them is to parse packet trying to detect inner encapsulation
94 made by our node. It is difficult or even impossible, especially,
95 taking into account fragmentation. TO be short, tt is not solution at all.
96
97 Current solution: The solution was UNEXPECTEDLY SIMPLE.
98 We force DF flag on tunnels with preconfigured hop limit,
99 that is ALL. :-) Well, it does not remove the problem completely,
100 but exponential growth of network traffic is changed to linear
101 (branches, that exceed pmtu are pruned) and tunnel mtu
102 fastly degrades to value <68, where looping stops.
103 Yes, it is not good if there exists a router in the loop,
104 which does not force DF, even when encapsulating packets have DF set.
105 But it is not our problem! Nobody could accuse us, we made
106 all that we could make. Even if it is your gated who injected
107 fatal route to network, even if it were you who configured
108 fatal static route: you are innocent. :-)
109
110
111
112 3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
113 practically identical code. It would be good to glue them
114 together, but it is not very evident, how to make them modular.
115 sit is integral part of IPv6, ipip and gre are naturally modular.
116 We could extract common parts (hash table, ioctl etc)
117 to a separate module (ip_tunnel.c).
118
119 Alexey Kuznetsov.
120 */
121
Herbert Xuc19e6542008-10-09 11:59:55 -0700122static struct rtnl_link_ops ipgre_link_ops __read_mostly;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700123static int ipgre_tunnel_init(struct net_device *dev);
124static void ipgre_tunnel_setup(struct net_device *dev);
Herbert Xu42aa9162008-10-09 11:59:32 -0700125static int ipgre_tunnel_bind_dev(struct net_device *dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700126
127/* Fallback tunnel: no source, no destination, no key, no options */
128
129static int ipgre_fb_tunnel_init(struct net_device *dev);
130
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700131#define HASH_SIZE 16
132
Pavel Emelyanov59a4c752008-04-16 01:08:53 -0700133static int ipgre_net_id;
134struct ipgre_net {
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700135 struct ip_tunnel *tunnels[4][HASH_SIZE];
136
Pavel Emelyanov7daa0002008-04-16 01:10:05 -0700137 struct net_device *fb_tunnel_dev;
Pavel Emelyanov59a4c752008-04-16 01:08:53 -0700138};
139
Linus Torvalds1da177e2005-04-16 15:20:36 -0700140/* Tunnel hash table */
141
142/*
143 4 hash tables:
144
145 3: (remote,local)
146 2: (remote,*)
147 1: (*,local)
148 0: (*,*)
149
150 We require exact key match i.e. if a key is present in packet
151 it will match only tunnel with the same key; if it is not present,
152 it will match only keyless tunnel.
153
154 All keysless packets, if not matched configured keyless tunnels
155 will match fallback tunnel.
156 */
157
Al Virod5a0a1e2006-11-08 00:23:14 -0800158#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700159
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700160#define tunnels_r_l tunnels[3]
161#define tunnels_r tunnels[2]
162#define tunnels_l tunnels[1]
163#define tunnels_wc tunnels[0]
Linus Torvalds1da177e2005-04-16 15:20:36 -0700164
165static DEFINE_RWLOCK(ipgre_lock);
166
167/* Given src, dst and key, find appropriate for input tunnel. */
168
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700169static struct ip_tunnel * ipgre_tunnel_lookup(struct net *net,
Herbert Xue1a80002008-10-09 12:00:17 -0700170 __be32 remote, __be32 local,
171 __be32 key, __be16 gre_proto)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700172{
173 unsigned h0 = HASH(remote);
174 unsigned h1 = HASH(key);
175 struct ip_tunnel *t;
Herbert Xue1a80002008-10-09 12:00:17 -0700176 struct ip_tunnel *t2 = NULL;
Pavel Emelyanov7daa0002008-04-16 01:10:05 -0700177 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
Herbert Xue1a80002008-10-09 12:00:17 -0700178 int dev_type = (gre_proto == htons(ETH_P_TEB)) ?
179 ARPHRD_ETHER : ARPHRD_IPGRE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700180
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700181 for (t = ign->tunnels_r_l[h0^h1]; t; t = t->next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700182 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) {
Herbert Xue1a80002008-10-09 12:00:17 -0700183 if (t->parms.i_key == key && t->dev->flags & IFF_UP) {
184 if (t->dev->type == dev_type)
185 return t;
186 if (t->dev->type == ARPHRD_IPGRE && !t2)
187 t2 = t;
188 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700189 }
190 }
Herbert Xue1a80002008-10-09 12:00:17 -0700191
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700192 for (t = ign->tunnels_r[h0^h1]; t; t = t->next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700193 if (remote == t->parms.iph.daddr) {
Herbert Xue1a80002008-10-09 12:00:17 -0700194 if (t->parms.i_key == key && t->dev->flags & IFF_UP) {
195 if (t->dev->type == dev_type)
196 return t;
197 if (t->dev->type == ARPHRD_IPGRE && !t2)
198 t2 = t;
199 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700200 }
201 }
Herbert Xue1a80002008-10-09 12:00:17 -0700202
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700203 for (t = ign->tunnels_l[h1]; t; t = t->next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700204 if (local == t->parms.iph.saddr ||
Joe Perchesf97c1e02007-12-16 13:45:43 -0800205 (local == t->parms.iph.daddr &&
206 ipv4_is_multicast(local))) {
Herbert Xue1a80002008-10-09 12:00:17 -0700207 if (t->parms.i_key == key && t->dev->flags & IFF_UP) {
208 if (t->dev->type == dev_type)
209 return t;
210 if (t->dev->type == ARPHRD_IPGRE && !t2)
211 t2 = t;
212 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700213 }
214 }
Herbert Xue1a80002008-10-09 12:00:17 -0700215
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700216 for (t = ign->tunnels_wc[h1]; t; t = t->next) {
Herbert Xue1a80002008-10-09 12:00:17 -0700217 if (t->parms.i_key == key && t->dev->flags & IFF_UP) {
218 if (t->dev->type == dev_type)
219 return t;
220 if (t->dev->type == ARPHRD_IPGRE && !t2)
221 t2 = t;
222 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700223 }
224
Herbert Xue1a80002008-10-09 12:00:17 -0700225 if (t2)
226 return t2;
227
Pavel Emelyanov7daa0002008-04-16 01:10:05 -0700228 if (ign->fb_tunnel_dev->flags&IFF_UP)
229 return netdev_priv(ign->fb_tunnel_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700230 return NULL;
231}
232
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700233static struct ip_tunnel **__ipgre_bucket(struct ipgre_net *ign,
234 struct ip_tunnel_parm *parms)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700235{
YOSHIFUJI Hideaki5056a1e2007-04-24 20:44:48 +0900236 __be32 remote = parms->iph.daddr;
237 __be32 local = parms->iph.saddr;
238 __be32 key = parms->i_key;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700239 unsigned h = HASH(key);
240 int prio = 0;
241
242 if (local)
243 prio |= 1;
Joe Perchesf97c1e02007-12-16 13:45:43 -0800244 if (remote && !ipv4_is_multicast(remote)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700245 prio |= 2;
246 h ^= HASH(remote);
247 }
248
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700249 return &ign->tunnels[prio][h];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700250}
251
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700252static inline struct ip_tunnel **ipgre_bucket(struct ipgre_net *ign,
253 struct ip_tunnel *t)
YOSHIFUJI Hideaki5056a1e2007-04-24 20:44:48 +0900254{
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700255 return __ipgre_bucket(ign, &t->parms);
YOSHIFUJI Hideaki5056a1e2007-04-24 20:44:48 +0900256}
257
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700258static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700259{
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700260 struct ip_tunnel **tp = ipgre_bucket(ign, t);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700261
262 t->next = *tp;
263 write_lock_bh(&ipgre_lock);
264 *tp = t;
265 write_unlock_bh(&ipgre_lock);
266}
267
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700268static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700269{
270 struct ip_tunnel **tp;
271
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700272 for (tp = ipgre_bucket(ign, t); *tp; tp = &(*tp)->next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700273 if (t == *tp) {
274 write_lock_bh(&ipgre_lock);
275 *tp = t->next;
276 write_unlock_bh(&ipgre_lock);
277 break;
278 }
279 }
280}
281
Herbert Xue1a80002008-10-09 12:00:17 -0700282static struct ip_tunnel *ipgre_tunnel_find(struct net *net,
283 struct ip_tunnel_parm *parms,
284 int type)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700285{
Al Virod5a0a1e2006-11-08 00:23:14 -0800286 __be32 remote = parms->iph.daddr;
287 __be32 local = parms->iph.saddr;
288 __be32 key = parms->i_key;
Herbert Xue1a80002008-10-09 12:00:17 -0700289 struct ip_tunnel *t, **tp;
290 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
291
292 for (tp = __ipgre_bucket(ign, parms); (t = *tp) != NULL; tp = &t->next)
293 if (local == t->parms.iph.saddr &&
294 remote == t->parms.iph.daddr &&
295 key == t->parms.i_key &&
296 type == t->dev->type)
297 break;
298
299 return t;
300}
301
302static struct ip_tunnel * ipgre_tunnel_locate(struct net *net,
303 struct ip_tunnel_parm *parms, int create)
304{
305 struct ip_tunnel *t, *nt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700306 struct net_device *dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700307 char name[IFNAMSIZ];
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700308 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700309
Herbert Xue1a80002008-10-09 12:00:17 -0700310 t = ipgre_tunnel_find(net, parms, ARPHRD_IPGRE);
311 if (t || !create)
312 return t;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700313
314 if (parms->name[0])
315 strlcpy(name, parms->name, IFNAMSIZ);
Pavel Emelyanov34cc7ba2008-02-23 20:19:20 -0800316 else
317 sprintf(name, "gre%%d");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700318
319 dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup);
320 if (!dev)
321 return NULL;
322
Pavel Emelyanov0b67ece2008-04-16 01:11:13 -0700323 dev_net_set(dev, net);
324
Pavel Emelyanovb37d428b2008-02-26 23:51:04 -0800325 if (strchr(name, '%')) {
326 if (dev_alloc_name(dev, name) < 0)
327 goto failed_free;
328 }
329
Patrick McHardy2941a482006-01-08 22:05:26 -0800330 nt = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700331 nt->parms = *parms;
Herbert Xuc19e6542008-10-09 11:59:55 -0700332 dev->rtnl_link_ops = &ipgre_link_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700333
Herbert Xu42aa9162008-10-09 11:59:32 -0700334 dev->mtu = ipgre_tunnel_bind_dev(dev);
335
Pavel Emelyanovb37d428b2008-02-26 23:51:04 -0800336 if (register_netdevice(dev) < 0)
337 goto failed_free;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700338
Linus Torvalds1da177e2005-04-16 15:20:36 -0700339 dev_hold(dev);
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700340 ipgre_tunnel_link(ign, nt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700341 return nt;
342
Pavel Emelyanovb37d428b2008-02-26 23:51:04 -0800343failed_free:
344 free_netdev(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700345 return NULL;
346}
347
348static void ipgre_tunnel_uninit(struct net_device *dev)
349{
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700350 struct net *net = dev_net(dev);
351 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
352
353 ipgre_tunnel_unlink(ign, netdev_priv(dev));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700354 dev_put(dev);
355}
356
357
358static void ipgre_err(struct sk_buff *skb, u32 info)
359{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700360
Rami Rosen071f92d2008-05-21 17:47:54 -0700361/* All the routers (except for Linux) return only
Linus Torvalds1da177e2005-04-16 15:20:36 -0700362 8 bytes of packet payload. It means, that precise relaying of
363 ICMP in the real Internet is absolutely infeasible.
364
365 Moreover, Cisco "wise men" put GRE key to the third word
366 in GRE header. It makes impossible maintaining even soft state for keyed
367 GRE tunnels with enabled checksum. Tell them "thank you".
368
369 Well, I wonder, rfc1812 was written by Cisco employee,
370 what the hell these idiots break standrads established
371 by themself???
372 */
373
374 struct iphdr *iph = (struct iphdr*)skb->data;
Al Virod5a0a1e2006-11-08 00:23:14 -0800375 __be16 *p = (__be16*)(skb->data+(iph->ihl<<2));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700376 int grehlen = (iph->ihl<<2) + 4;
Arnaldo Carvalho de Melo88c76642007-03-13 14:43:18 -0300377 const int type = icmp_hdr(skb)->type;
378 const int code = icmp_hdr(skb)->code;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700379 struct ip_tunnel *t;
Al Virod5a0a1e2006-11-08 00:23:14 -0800380 __be16 flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700381
382 flags = p[0];
383 if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
384 if (flags&(GRE_VERSION|GRE_ROUTING))
385 return;
386 if (flags&GRE_KEY) {
387 grehlen += 4;
388 if (flags&GRE_CSUM)
389 grehlen += 4;
390 }
391 }
392
393 /* If only 8 bytes returned, keyed message will be dropped here */
394 if (skb_headlen(skb) < grehlen)
395 return;
396
397 switch (type) {
398 default:
399 case ICMP_PARAMETERPROB:
400 return;
401
402 case ICMP_DEST_UNREACH:
403 switch (code) {
404 case ICMP_SR_FAILED:
405 case ICMP_PORT_UNREACH:
406 /* Impossible event. */
407 return;
408 case ICMP_FRAG_NEEDED:
409 /* Soft state for pmtu is maintained by IP core. */
410 return;
411 default:
412 /* All others are translated to HOST_UNREACH.
413 rfc2003 contains "deep thoughts" about NET_UNREACH,
414 I believe they are just ether pollution. --ANK
415 */
416 break;
417 }
418 break;
419 case ICMP_TIME_EXCEEDED:
420 if (code != ICMP_EXC_TTL)
421 return;
422 break;
423 }
424
425 read_lock(&ipgre_lock);
Pavel Emelyanov3b4667f2008-04-16 01:09:44 -0700426 t = ipgre_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr,
Herbert Xue1a80002008-10-09 12:00:17 -0700427 flags & GRE_KEY ?
428 *(((__be32 *)p) + (grehlen / 4) - 1) : 0,
429 p[1]);
Joe Perchesf97c1e02007-12-16 13:45:43 -0800430 if (t == NULL || t->parms.iph.daddr == 0 ||
431 ipv4_is_multicast(t->parms.iph.daddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700432 goto out;
433
434 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
435 goto out;
436
437 if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
438 t->err_count++;
439 else
440 t->err_count = 1;
441 t->err_time = jiffies;
442out:
443 read_unlock(&ipgre_lock);
444 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700445}
446
447static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
448{
449 if (INET_ECN_is_ce(iph->tos)) {
450 if (skb->protocol == htons(ETH_P_IP)) {
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700451 IP_ECN_set_ce(ip_hdr(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700452 } else if (skb->protocol == htons(ETH_P_IPV6)) {
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -0700453 IP6_ECN_set_ce(ipv6_hdr(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700454 }
455 }
456}
457
458static inline u8
459ipgre_ecn_encapsulate(u8 tos, struct iphdr *old_iph, struct sk_buff *skb)
460{
461 u8 inner = 0;
462 if (skb->protocol == htons(ETH_P_IP))
463 inner = old_iph->tos;
464 else if (skb->protocol == htons(ETH_P_IPV6))
465 inner = ipv6_get_dsfield((struct ipv6hdr *)old_iph);
466 return INET_ECN_encapsulate(tos, inner);
467}
468
469static int ipgre_rcv(struct sk_buff *skb)
470{
471 struct iphdr *iph;
472 u8 *h;
Al Virod5a0a1e2006-11-08 00:23:14 -0800473 __be16 flags;
Al Virod3bc23e2006-11-14 21:24:49 -0800474 __sum16 csum = 0;
Al Virod5a0a1e2006-11-08 00:23:14 -0800475 __be32 key = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700476 u32 seqno = 0;
477 struct ip_tunnel *tunnel;
478 int offset = 4;
Herbert Xue1a80002008-10-09 12:00:17 -0700479 __be16 gre_proto;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700480
481 if (!pskb_may_pull(skb, 16))
482 goto drop_nolock;
483
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700484 iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700485 h = skb->data;
Al Virod5a0a1e2006-11-08 00:23:14 -0800486 flags = *(__be16*)h;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700487
488 if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
489 /* - Version must be 0.
490 - We do not support routing headers.
491 */
492 if (flags&(GRE_VERSION|GRE_ROUTING))
493 goto drop_nolock;
494
495 if (flags&GRE_CSUM) {
Herbert Xufb286bb2005-11-10 13:01:24 -0800496 switch (skb->ip_summed) {
Patrick McHardy84fa7932006-08-29 16:44:56 -0700497 case CHECKSUM_COMPLETE:
Al Virod3bc23e2006-11-14 21:24:49 -0800498 csum = csum_fold(skb->csum);
Herbert Xufb286bb2005-11-10 13:01:24 -0800499 if (!csum)
500 break;
501 /* fall through */
502 case CHECKSUM_NONE:
503 skb->csum = 0;
504 csum = __skb_checksum_complete(skb);
Patrick McHardy84fa7932006-08-29 16:44:56 -0700505 skb->ip_summed = CHECKSUM_COMPLETE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700506 }
507 offset += 4;
508 }
509 if (flags&GRE_KEY) {
Al Virod5a0a1e2006-11-08 00:23:14 -0800510 key = *(__be32*)(h + offset);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700511 offset += 4;
512 }
513 if (flags&GRE_SEQ) {
Al Virod5a0a1e2006-11-08 00:23:14 -0800514 seqno = ntohl(*(__be32*)(h + offset));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700515 offset += 4;
516 }
517 }
518
Herbert Xue1a80002008-10-09 12:00:17 -0700519 gre_proto = *(__be16 *)(h + 2);
520
Linus Torvalds1da177e2005-04-16 15:20:36 -0700521 read_lock(&ipgre_lock);
Pavel Emelyanov3b4667f2008-04-16 01:09:44 -0700522 if ((tunnel = ipgre_tunnel_lookup(dev_net(skb->dev),
Herbert Xue1a80002008-10-09 12:00:17 -0700523 iph->saddr, iph->daddr, key,
524 gre_proto))) {
Pavel Emelyanovaddd68e2008-05-21 14:14:22 -0700525 struct net_device_stats *stats = &tunnel->dev->stats;
526
Linus Torvalds1da177e2005-04-16 15:20:36 -0700527 secpath_reset(skb);
528
Herbert Xue1a80002008-10-09 12:00:17 -0700529 skb->protocol = gre_proto;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700530 /* WCCP version 1 and 2 protocol decoding.
531 * - Change protocol to IP
532 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
533 */
Herbert Xue1a80002008-10-09 12:00:17 -0700534 if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) {
YOSHIFUJI Hideaki496c98d2006-10-10 19:41:21 -0700535 skb->protocol = htons(ETH_P_IP);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900536 if ((*(h + offset) & 0xF0) != 0x40)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700537 offset += 4;
538 }
539
Timo Teras1d069162007-12-20 00:10:33 -0800540 skb->mac_header = skb->network_header;
Arnaldo Carvalho de Melo4209fb62007-03-10 18:42:03 -0300541 __pskb_pull(skb, offset);
Arnaldo Carvalho de Melo9c702202007-04-25 18:04:18 -0700542 skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700543 skb->pkt_type = PACKET_HOST;
544#ifdef CONFIG_NET_IPGRE_BROADCAST
Joe Perchesf97c1e02007-12-16 13:45:43 -0800545 if (ipv4_is_multicast(iph->daddr)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700546 /* Looped back packet, drop it! */
Eric Dumazetee6b9672008-03-05 18:30:47 -0800547 if (skb->rtable->fl.iif == 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700548 goto drop;
Pavel Emelyanovaddd68e2008-05-21 14:14:22 -0700549 stats->multicast++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700550 skb->pkt_type = PACKET_BROADCAST;
551 }
552#endif
553
554 if (((flags&GRE_CSUM) && csum) ||
555 (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
Pavel Emelyanovaddd68e2008-05-21 14:14:22 -0700556 stats->rx_crc_errors++;
557 stats->rx_errors++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700558 goto drop;
559 }
560 if (tunnel->parms.i_flags&GRE_SEQ) {
561 if (!(flags&GRE_SEQ) ||
562 (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
Pavel Emelyanovaddd68e2008-05-21 14:14:22 -0700563 stats->rx_fifo_errors++;
564 stats->rx_errors++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700565 goto drop;
566 }
567 tunnel->i_seqno = seqno + 1;
568 }
Herbert Xue1a80002008-10-09 12:00:17 -0700569
570 /* Warning: All skb pointers will be invalidated! */
571 if (tunnel->dev->type == ARPHRD_ETHER) {
572 if (!pskb_may_pull(skb, ETH_HLEN)) {
573 stats->rx_length_errors++;
574 stats->rx_errors++;
575 goto drop;
576 }
577
578 iph = ip_hdr(skb);
579 skb->protocol = eth_type_trans(skb, tunnel->dev);
580 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
581 }
582
Pavel Emelyanovaddd68e2008-05-21 14:14:22 -0700583 stats->rx_packets++;
584 stats->rx_bytes += skb->len;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700585 skb->dev = tunnel->dev;
586 dst_release(skb->dst);
587 skb->dst = NULL;
588 nf_reset(skb);
Herbert Xue1a80002008-10-09 12:00:17 -0700589
590 skb_reset_network_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700591 ipgre_ecn_decapsulate(iph, skb);
Herbert Xue1a80002008-10-09 12:00:17 -0700592
Linus Torvalds1da177e2005-04-16 15:20:36 -0700593 netif_rx(skb);
594 read_unlock(&ipgre_lock);
595 return(0);
596 }
Herbert Xu45af08b2006-04-05 22:31:19 -0700597 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700598
599drop:
600 read_unlock(&ipgre_lock);
601drop_nolock:
602 kfree_skb(skb);
603 return(0);
604}
605
606static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
607{
Patrick McHardy2941a482006-01-08 22:05:26 -0800608 struct ip_tunnel *tunnel = netdev_priv(dev);
Pavel Emelyanovaddd68e2008-05-21 14:14:22 -0700609 struct net_device_stats *stats = &tunnel->dev->stats;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700610 struct iphdr *old_iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700611 struct iphdr *tiph;
612 u8 tos;
Al Virod5a0a1e2006-11-08 00:23:14 -0800613 __be16 df;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700614 struct rtable *rt; /* Route to the other host */
615 struct net_device *tdev; /* Device to other host */
616 struct iphdr *iph; /* Our new IP header */
Chuck Leverc2636b42007-10-23 21:07:32 -0700617 unsigned int max_headroom; /* The extra header space needed */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700618 int gre_hlen;
Al Virod5a0a1e2006-11-08 00:23:14 -0800619 __be32 dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700620 int mtu;
621
622 if (tunnel->recursion++) {
Pavel Emelyanovaddd68e2008-05-21 14:14:22 -0700623 stats->collisions++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700624 goto tx_error;
625 }
626
Herbert Xue1a80002008-10-09 12:00:17 -0700627 if (dev->type == ARPHRD_ETHER)
628 IPCB(skb)->flags = 0;
629
630 if (dev->header_ops && dev->type == ARPHRD_IPGRE) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700631 gre_hlen = 0;
632 tiph = (struct iphdr*)skb->data;
633 } else {
634 gre_hlen = tunnel->hlen;
635 tiph = &tunnel->parms.iph;
636 }
637
638 if ((dst = tiph->daddr) == 0) {
639 /* NBMA tunnel */
640
641 if (skb->dst == NULL) {
Pavel Emelyanovaddd68e2008-05-21 14:14:22 -0700642 stats->tx_fifo_errors++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700643 goto tx_error;
644 }
645
646 if (skb->protocol == htons(ETH_P_IP)) {
Eric Dumazetee6b9672008-03-05 18:30:47 -0800647 rt = skb->rtable;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700648 if ((dst = rt->rt_gateway) == 0)
649 goto tx_error_icmp;
650 }
651#ifdef CONFIG_IPV6
652 else if (skb->protocol == htons(ETH_P_IPV6)) {
653 struct in6_addr *addr6;
654 int addr_type;
655 struct neighbour *neigh = skb->dst->neighbour;
656
657 if (neigh == NULL)
658 goto tx_error;
659
660 addr6 = (struct in6_addr*)&neigh->primary_key;
661 addr_type = ipv6_addr_type(addr6);
662
663 if (addr_type == IPV6_ADDR_ANY) {
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -0700664 addr6 = &ipv6_hdr(skb)->daddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700665 addr_type = ipv6_addr_type(addr6);
666 }
667
668 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
669 goto tx_error_icmp;
670
671 dst = addr6->s6_addr32[3];
672 }
673#endif
674 else
675 goto tx_error;
676 }
677
678 tos = tiph->tos;
679 if (tos&1) {
680 if (skb->protocol == htons(ETH_P_IP))
681 tos = old_iph->tos;
682 tos &= ~1;
683 }
684
685 {
686 struct flowi fl = { .oif = tunnel->parms.link,
687 .nl_u = { .ip4_u =
688 { .daddr = dst,
689 .saddr = tiph->saddr,
690 .tos = RT_TOS(tos) } },
691 .proto = IPPROTO_GRE };
Pavel Emelyanov96635522008-04-16 01:10:44 -0700692 if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
Pavel Emelyanovaddd68e2008-05-21 14:14:22 -0700693 stats->tx_carrier_errors++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700694 goto tx_error;
695 }
696 }
697 tdev = rt->u.dst.dev;
698
699 if (tdev == dev) {
700 ip_rt_put(rt);
Pavel Emelyanovaddd68e2008-05-21 14:14:22 -0700701 stats->collisions++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700702 goto tx_error;
703 }
704
705 df = tiph->frag_off;
706 if (df)
Herbert Xuc95b8192008-10-09 11:58:54 -0700707 mtu = dst_mtu(&rt->u.dst) - dev->hard_header_len - tunnel->hlen;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700708 else
709 mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
710
711 if (skb->dst)
712 skb->dst->ops->update_pmtu(skb->dst, mtu);
713
714 if (skb->protocol == htons(ETH_P_IP)) {
715 df |= (old_iph->frag_off&htons(IP_DF));
716
717 if ((old_iph->frag_off&htons(IP_DF)) &&
718 mtu < ntohs(old_iph->tot_len)) {
719 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
720 ip_rt_put(rt);
721 goto tx_error;
722 }
723 }
724#ifdef CONFIG_IPV6
725 else if (skb->protocol == htons(ETH_P_IPV6)) {
726 struct rt6_info *rt6 = (struct rt6_info*)skb->dst;
727
728 if (rt6 && mtu < dst_mtu(skb->dst) && mtu >= IPV6_MIN_MTU) {
Joe Perchesf97c1e02007-12-16 13:45:43 -0800729 if ((tunnel->parms.iph.daddr &&
730 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
Linus Torvalds1da177e2005-04-16 15:20:36 -0700731 rt6->rt6i_dst.plen == 128) {
732 rt6->rt6i_flags |= RTF_MODIFIED;
733 skb->dst->metrics[RTAX_MTU-1] = mtu;
734 }
735 }
736
737 if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) {
738 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
739 ip_rt_put(rt);
740 goto tx_error;
741 }
742 }
743#endif
744
745 if (tunnel->err_count > 0) {
746 if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
747 tunnel->err_count--;
748
749 dst_link_failure(skb);
750 } else
751 tunnel->err_count = 0;
752 }
753
754 max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen;
755
Patrick McHardycfbba492007-07-09 15:33:40 -0700756 if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
757 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700758 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
759 if (!new_skb) {
760 ip_rt_put(rt);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900761 stats->tx_dropped++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700762 dev_kfree_skb(skb);
763 tunnel->recursion--;
764 return 0;
765 }
766 if (skb->sk)
767 skb_set_owner_w(new_skb, skb->sk);
768 dev_kfree_skb(skb);
769 skb = new_skb;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700770 old_iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700771 }
772
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -0700773 skb->transport_header = skb->network_header;
Arnaldo Carvalho de Meloe2d1bca2007-04-10 20:46:21 -0700774 skb_push(skb, gre_hlen);
775 skb_reset_network_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700776 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
Patrick McHardy48d5cad2006-02-15 15:10:22 -0800777 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
778 IPSKB_REROUTED);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700779 dst_release(skb->dst);
780 skb->dst = &rt->u.dst;
781
782 /*
783 * Push down and install the IPIP header.
784 */
785
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700786 iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700787 iph->version = 4;
788 iph->ihl = sizeof(struct iphdr) >> 2;
789 iph->frag_off = df;
790 iph->protocol = IPPROTO_GRE;
791 iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb);
792 iph->daddr = rt->rt_dst;
793 iph->saddr = rt->rt_src;
794
795 if ((iph->ttl = tiph->ttl) == 0) {
796 if (skb->protocol == htons(ETH_P_IP))
797 iph->ttl = old_iph->ttl;
798#ifdef CONFIG_IPV6
799 else if (skb->protocol == htons(ETH_P_IPV6))
800 iph->ttl = ((struct ipv6hdr*)old_iph)->hop_limit;
801#endif
802 else
803 iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT);
804 }
805
Herbert Xue1a80002008-10-09 12:00:17 -0700806 ((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags;
807 ((__be16 *)(iph + 1))[1] = (dev->type == ARPHRD_ETHER) ?
808 htons(ETH_P_TEB) : skb->protocol;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700809
810 if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
Al Virod5a0a1e2006-11-08 00:23:14 -0800811 __be32 *ptr = (__be32*)(((u8*)iph) + tunnel->hlen - 4);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700812
813 if (tunnel->parms.o_flags&GRE_SEQ) {
814 ++tunnel->o_seqno;
815 *ptr = htonl(tunnel->o_seqno);
816 ptr--;
817 }
818 if (tunnel->parms.o_flags&GRE_KEY) {
819 *ptr = tunnel->parms.o_key;
820 ptr--;
821 }
822 if (tunnel->parms.o_flags&GRE_CSUM) {
823 *ptr = 0;
Al Viro5f92a732006-11-14 21:36:54 -0800824 *(__sum16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700825 }
826 }
827
828 nf_reset(skb);
829
830 IPTUNNEL_XMIT();
831 tunnel->recursion--;
832 return 0;
833
834tx_error_icmp:
835 dst_link_failure(skb);
836
837tx_error:
838 stats->tx_errors++;
839 dev_kfree_skb(skb);
840 tunnel->recursion--;
841 return 0;
842}
843
Herbert Xu42aa9162008-10-09 11:59:32 -0700844static int ipgre_tunnel_bind_dev(struct net_device *dev)
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800845{
846 struct net_device *tdev = NULL;
847 struct ip_tunnel *tunnel;
848 struct iphdr *iph;
849 int hlen = LL_MAX_HEADER;
850 int mtu = ETH_DATA_LEN;
851 int addend = sizeof(struct iphdr) + 4;
852
853 tunnel = netdev_priv(dev);
854 iph = &tunnel->parms.iph;
855
Herbert Xuc95b8192008-10-09 11:58:54 -0700856 /* Guess output device to choose reasonable mtu and needed_headroom */
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800857
858 if (iph->daddr) {
859 struct flowi fl = { .oif = tunnel->parms.link,
860 .nl_u = { .ip4_u =
861 { .daddr = iph->daddr,
862 .saddr = iph->saddr,
863 .tos = RT_TOS(iph->tos) } },
864 .proto = IPPROTO_GRE };
865 struct rtable *rt;
Pavel Emelyanov96635522008-04-16 01:10:44 -0700866 if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800867 tdev = rt->u.dst.dev;
868 ip_rt_put(rt);
869 }
Herbert Xue1a80002008-10-09 12:00:17 -0700870
871 if (dev->type != ARPHRD_ETHER)
872 dev->flags |= IFF_POINTOPOINT;
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800873 }
874
875 if (!tdev && tunnel->parms.link)
Pavel Emelyanov96635522008-04-16 01:10:44 -0700876 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800877
878 if (tdev) {
Herbert Xuc95b8192008-10-09 11:58:54 -0700879 hlen = tdev->hard_header_len + tdev->needed_headroom;
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800880 mtu = tdev->mtu;
881 }
882 dev->iflink = tunnel->parms.link;
883
884 /* Precalculate GRE options length */
885 if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
886 if (tunnel->parms.o_flags&GRE_CSUM)
887 addend += 4;
888 if (tunnel->parms.o_flags&GRE_KEY)
889 addend += 4;
890 if (tunnel->parms.o_flags&GRE_SEQ)
891 addend += 4;
892 }
Herbert Xuc95b8192008-10-09 11:58:54 -0700893 dev->needed_headroom = addend + hlen;
Herbert Xu42aa9162008-10-09 11:59:32 -0700894 mtu -= dev->hard_header_len - addend;
895
896 if (mtu < 68)
897 mtu = 68;
898
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800899 tunnel->hlen = addend;
900
Herbert Xu42aa9162008-10-09 11:59:32 -0700901 return mtu;
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800902}
903
Linus Torvalds1da177e2005-04-16 15:20:36 -0700904static int
905ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
906{
907 int err = 0;
908 struct ip_tunnel_parm p;
909 struct ip_tunnel *t;
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700910 struct net *net = dev_net(dev);
911 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700912
913 switch (cmd) {
914 case SIOCGETTUNNEL:
915 t = NULL;
Pavel Emelyanov7daa0002008-04-16 01:10:05 -0700916 if (dev == ign->fb_tunnel_dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700917 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
918 err = -EFAULT;
919 break;
920 }
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700921 t = ipgre_tunnel_locate(net, &p, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700922 }
923 if (t == NULL)
Patrick McHardy2941a482006-01-08 22:05:26 -0800924 t = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700925 memcpy(&p, &t->parms, sizeof(p));
926 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
927 err = -EFAULT;
928 break;
929
930 case SIOCADDTUNNEL:
931 case SIOCCHGTUNNEL:
932 err = -EPERM;
933 if (!capable(CAP_NET_ADMIN))
934 goto done;
935
936 err = -EFAULT;
937 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
938 goto done;
939
940 err = -EINVAL;
941 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
942 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
943 ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
944 goto done;
945 if (p.iph.ttl)
946 p.iph.frag_off |= htons(IP_DF);
947
948 if (!(p.i_flags&GRE_KEY))
949 p.i_key = 0;
950 if (!(p.o_flags&GRE_KEY))
951 p.o_key = 0;
952
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700953 t = ipgre_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700954
Pavel Emelyanov7daa0002008-04-16 01:10:05 -0700955 if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700956 if (t != NULL) {
957 if (t->dev != dev) {
958 err = -EEXIST;
959 break;
960 }
961 } else {
962 unsigned nflags=0;
963
Patrick McHardy2941a482006-01-08 22:05:26 -0800964 t = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700965
Joe Perchesf97c1e02007-12-16 13:45:43 -0800966 if (ipv4_is_multicast(p.iph.daddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700967 nflags = IFF_BROADCAST;
968 else if (p.iph.daddr)
969 nflags = IFF_POINTOPOINT;
970
971 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
972 err = -EINVAL;
973 break;
974 }
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700975 ipgre_tunnel_unlink(ign, t);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700976 t->parms.iph.saddr = p.iph.saddr;
977 t->parms.iph.daddr = p.iph.daddr;
978 t->parms.i_key = p.i_key;
979 t->parms.o_key = p.o_key;
980 memcpy(dev->dev_addr, &p.iph.saddr, 4);
981 memcpy(dev->broadcast, &p.iph.daddr, 4);
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700982 ipgre_tunnel_link(ign, t);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700983 netdev_state_change(dev);
984 }
985 }
986
987 if (t) {
988 err = 0;
989 if (cmd == SIOCCHGTUNNEL) {
990 t->parms.iph.ttl = p.iph.ttl;
991 t->parms.iph.tos = p.iph.tos;
992 t->parms.iph.frag_off = p.iph.frag_off;
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800993 if (t->parms.link != p.link) {
994 t->parms.link = p.link;
Herbert Xu42aa9162008-10-09 11:59:32 -0700995 dev->mtu = ipgre_tunnel_bind_dev(dev);
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800996 netdev_state_change(dev);
997 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700998 }
999 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
1000 err = -EFAULT;
1001 } else
1002 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
1003 break;
1004
1005 case SIOCDELTUNNEL:
1006 err = -EPERM;
1007 if (!capable(CAP_NET_ADMIN))
1008 goto done;
1009
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001010 if (dev == ign->fb_tunnel_dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001011 err = -EFAULT;
1012 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1013 goto done;
1014 err = -ENOENT;
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -07001015 if ((t = ipgre_tunnel_locate(net, &p, 0)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001016 goto done;
1017 err = -EPERM;
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001018 if (t == netdev_priv(ign->fb_tunnel_dev))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001019 goto done;
1020 dev = t->dev;
1021 }
Stephen Hemminger22f8cde2007-02-07 00:09:58 -08001022 unregister_netdevice(dev);
1023 err = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001024 break;
1025
1026 default:
1027 err = -EINVAL;
1028 }
1029
1030done:
1031 return err;
1032}
1033
Linus Torvalds1da177e2005-04-16 15:20:36 -07001034static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1035{
Patrick McHardy2941a482006-01-08 22:05:26 -08001036 struct ip_tunnel *tunnel = netdev_priv(dev);
Herbert Xuc95b8192008-10-09 11:58:54 -07001037 if (new_mtu < 68 ||
1038 new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001039 return -EINVAL;
1040 dev->mtu = new_mtu;
1041 return 0;
1042}
1043
Linus Torvalds1da177e2005-04-16 15:20:36 -07001044/* Nice toy. Unfortunately, useless in real life :-)
1045 It allows to construct virtual multiprotocol broadcast "LAN"
1046 over the Internet, provided multicast routing is tuned.
1047
1048
1049 I have no idea was this bicycle invented before me,
1050 so that I had to set ARPHRD_IPGRE to a random value.
1051 I have an impression, that Cisco could make something similar,
1052 but this feature is apparently missing in IOS<=11.2(8).
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001053
Linus Torvalds1da177e2005-04-16 15:20:36 -07001054 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
1055 with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
1056
1057 ping -t 255 224.66.66.66
1058
1059 If nobody answers, mbone does not work.
1060
1061 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
1062 ip addr add 10.66.66.<somewhat>/24 dev Universe
1063 ifconfig Universe up
1064 ifconfig Universe add fe80::<Your_real_addr>/10
1065 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
1066 ftp 10.66.66.66
1067 ...
1068 ftp fec0:6666:6666::193.233.7.65
1069 ...
1070
1071 */
1072
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -07001073static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
1074 unsigned short type,
1075 const void *daddr, const void *saddr, unsigned len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001076{
Patrick McHardy2941a482006-01-08 22:05:26 -08001077 struct ip_tunnel *t = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001078 struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
Al Virod5a0a1e2006-11-08 00:23:14 -08001079 __be16 *p = (__be16*)(iph+1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001080
1081 memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
1082 p[0] = t->parms.o_flags;
1083 p[1] = htons(type);
1084
1085 /*
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001086 * Set the source hardware address.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001087 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001088
Linus Torvalds1da177e2005-04-16 15:20:36 -07001089 if (saddr)
1090 memcpy(&iph->saddr, saddr, 4);
1091
1092 if (daddr) {
1093 memcpy(&iph->daddr, daddr, 4);
1094 return t->hlen;
1095 }
Joe Perchesf97c1e02007-12-16 13:45:43 -08001096 if (iph->daddr && !ipv4_is_multicast(iph->daddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001097 return t->hlen;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001098
Linus Torvalds1da177e2005-04-16 15:20:36 -07001099 return -t->hlen;
1100}
1101
Timo Teras6a5f44d2007-10-23 20:31:53 -07001102static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
1103{
1104 struct iphdr *iph = (struct iphdr*) skb_mac_header(skb);
1105 memcpy(haddr, &iph->saddr, 4);
1106 return 4;
1107}
1108
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -07001109static const struct header_ops ipgre_header_ops = {
1110 .create = ipgre_header,
Timo Teras6a5f44d2007-10-23 20:31:53 -07001111 .parse = ipgre_header_parse,
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -07001112};
1113
Timo Teras6a5f44d2007-10-23 20:31:53 -07001114#ifdef CONFIG_NET_IPGRE_BROADCAST
Linus Torvalds1da177e2005-04-16 15:20:36 -07001115static int ipgre_open(struct net_device *dev)
1116{
Patrick McHardy2941a482006-01-08 22:05:26 -08001117 struct ip_tunnel *t = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001118
Joe Perchesf97c1e02007-12-16 13:45:43 -08001119 if (ipv4_is_multicast(t->parms.iph.daddr)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001120 struct flowi fl = { .oif = t->parms.link,
1121 .nl_u = { .ip4_u =
1122 { .daddr = t->parms.iph.daddr,
1123 .saddr = t->parms.iph.saddr,
1124 .tos = RT_TOS(t->parms.iph.tos) } },
1125 .proto = IPPROTO_GRE };
1126 struct rtable *rt;
Pavel Emelyanov96635522008-04-16 01:10:44 -07001127 if (ip_route_output_key(dev_net(dev), &rt, &fl))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001128 return -EADDRNOTAVAIL;
1129 dev = rt->u.dst.dev;
1130 ip_rt_put(rt);
Herbert Xue5ed6392005-10-03 14:35:55 -07001131 if (__in_dev_get_rtnl(dev) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001132 return -EADDRNOTAVAIL;
1133 t->mlink = dev->ifindex;
Herbert Xue5ed6392005-10-03 14:35:55 -07001134 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001135 }
1136 return 0;
1137}
1138
1139static int ipgre_close(struct net_device *dev)
1140{
Patrick McHardy2941a482006-01-08 22:05:26 -08001141 struct ip_tunnel *t = netdev_priv(dev);
Joe Perchesf97c1e02007-12-16 13:45:43 -08001142 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
Denis V. Lunev7fee0ca2008-01-21 17:32:38 -08001143 struct in_device *in_dev;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001144 in_dev = inetdev_by_index(dev_net(dev), t->mlink);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001145 if (in_dev) {
1146 ip_mc_dec_group(in_dev, t->parms.iph.daddr);
1147 in_dev_put(in_dev);
1148 }
1149 }
1150 return 0;
1151}
1152
1153#endif
1154
1155static void ipgre_tunnel_setup(struct net_device *dev)
1156{
Herbert Xuc19e6542008-10-09 11:59:55 -07001157 dev->init = ipgre_tunnel_init;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001158 dev->uninit = ipgre_tunnel_uninit;
1159 dev->destructor = free_netdev;
1160 dev->hard_start_xmit = ipgre_tunnel_xmit;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001161 dev->do_ioctl = ipgre_tunnel_ioctl;
1162 dev->change_mtu = ipgre_tunnel_change_mtu;
1163
1164 dev->type = ARPHRD_IPGRE;
Herbert Xuc95b8192008-10-09 11:58:54 -07001165 dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
Kris Katterjohn46f25df2006-01-05 16:35:42 -08001166 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001167 dev->flags = IFF_NOARP;
1168 dev->iflink = 0;
1169 dev->addr_len = 4;
Pavel Emelyanov0b67ece2008-04-16 01:11:13 -07001170 dev->features |= NETIF_F_NETNS_LOCAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001171}
1172
1173static int ipgre_tunnel_init(struct net_device *dev)
1174{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001175 struct ip_tunnel *tunnel;
1176 struct iphdr *iph;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001177
Patrick McHardy2941a482006-01-08 22:05:26 -08001178 tunnel = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001179 iph = &tunnel->parms.iph;
1180
1181 tunnel->dev = dev;
1182 strcpy(tunnel->parms.name, dev->name);
1183
1184 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
1185 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
1186
Linus Torvalds1da177e2005-04-16 15:20:36 -07001187 if (iph->daddr) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001188#ifdef CONFIG_NET_IPGRE_BROADCAST
Joe Perchesf97c1e02007-12-16 13:45:43 -08001189 if (ipv4_is_multicast(iph->daddr)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001190 if (!iph->saddr)
1191 return -EINVAL;
1192 dev->flags = IFF_BROADCAST;
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -07001193 dev->header_ops = &ipgre_header_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001194 dev->open = ipgre_open;
1195 dev->stop = ipgre_close;
1196 }
1197#endif
Michal Schmidtee34c1e2007-12-13 09:46:32 -08001198 } else
Timo Teras6a5f44d2007-10-23 20:31:53 -07001199 dev->header_ops = &ipgre_header_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001200
Linus Torvalds1da177e2005-04-16 15:20:36 -07001201 return 0;
1202}
1203
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001204static int ipgre_fb_tunnel_init(struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001205{
Patrick McHardy2941a482006-01-08 22:05:26 -08001206 struct ip_tunnel *tunnel = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001207 struct iphdr *iph = &tunnel->parms.iph;
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -07001208 struct ipgre_net *ign = net_generic(dev_net(dev), ipgre_net_id);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001209
1210 tunnel->dev = dev;
1211 strcpy(tunnel->parms.name, dev->name);
1212
1213 iph->version = 4;
1214 iph->protocol = IPPROTO_GRE;
1215 iph->ihl = 5;
1216 tunnel->hlen = sizeof(struct iphdr) + 4;
1217
1218 dev_hold(dev);
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -07001219 ign->tunnels_wc[0] = tunnel;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001220 return 0;
1221}
1222
1223
1224static struct net_protocol ipgre_protocol = {
1225 .handler = ipgre_rcv,
1226 .err_handler = ipgre_err,
Pavel Emelyanovf96c1482008-04-16 01:11:36 -07001227 .netns_ok = 1,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001228};
1229
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -07001230static void ipgre_destroy_tunnels(struct ipgre_net *ign)
1231{
1232 int prio;
1233
1234 for (prio = 0; prio < 4; prio++) {
1235 int h;
1236 for (h = 0; h < HASH_SIZE; h++) {
1237 struct ip_tunnel *t;
1238 while ((t = ign->tunnels[prio][h]) != NULL)
1239 unregister_netdevice(t->dev);
1240 }
1241 }
1242}
1243
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001244static int ipgre_init_net(struct net *net)
1245{
1246 int err;
1247 struct ipgre_net *ign;
1248
1249 err = -ENOMEM;
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -07001250 ign = kzalloc(sizeof(struct ipgre_net), GFP_KERNEL);
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001251 if (ign == NULL)
1252 goto err_alloc;
1253
1254 err = net_assign_generic(net, ipgre_net_id, ign);
1255 if (err < 0)
1256 goto err_assign;
1257
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001258 ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0",
1259 ipgre_tunnel_setup);
1260 if (!ign->fb_tunnel_dev) {
1261 err = -ENOMEM;
1262 goto err_alloc_dev;
1263 }
1264
1265 ign->fb_tunnel_dev->init = ipgre_fb_tunnel_init;
1266 dev_net_set(ign->fb_tunnel_dev, net);
Herbert Xuc19e6542008-10-09 11:59:55 -07001267 ign->fb_tunnel_dev->rtnl_link_ops = &ipgre_link_ops;
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001268
1269 if ((err = register_netdev(ign->fb_tunnel_dev)))
1270 goto err_reg_dev;
1271
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001272 return 0;
1273
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001274err_reg_dev:
1275 free_netdev(ign->fb_tunnel_dev);
1276err_alloc_dev:
1277 /* nothing */
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001278err_assign:
1279 kfree(ign);
1280err_alloc:
1281 return err;
1282}
1283
1284static void ipgre_exit_net(struct net *net)
1285{
1286 struct ipgre_net *ign;
1287
1288 ign = net_generic(net, ipgre_net_id);
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001289 rtnl_lock();
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -07001290 ipgre_destroy_tunnels(ign);
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001291 rtnl_unlock();
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001292 kfree(ign);
1293}
1294
1295static struct pernet_operations ipgre_net_ops = {
1296 .init = ipgre_init_net,
1297 .exit = ipgre_exit_net,
1298};
Linus Torvalds1da177e2005-04-16 15:20:36 -07001299
Herbert Xuc19e6542008-10-09 11:59:55 -07001300static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
1301{
1302 __be16 flags;
1303
1304 if (!data)
1305 return 0;
1306
1307 flags = 0;
1308 if (data[IFLA_GRE_IFLAGS])
1309 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1310 if (data[IFLA_GRE_OFLAGS])
1311 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1312 if (flags & (GRE_VERSION|GRE_ROUTING))
1313 return -EINVAL;
1314
1315 return 0;
1316}
1317
Herbert Xue1a80002008-10-09 12:00:17 -07001318static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
1319{
1320 __be32 daddr;
1321
1322 if (tb[IFLA_ADDRESS]) {
1323 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1324 return -EINVAL;
1325 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1326 return -EADDRNOTAVAIL;
1327 }
1328
1329 if (!data)
1330 goto out;
1331
1332 if (data[IFLA_GRE_REMOTE]) {
1333 memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
1334 if (!daddr)
1335 return -EINVAL;
1336 }
1337
1338out:
1339 return ipgre_tunnel_validate(tb, data);
1340}
1341
Herbert Xuc19e6542008-10-09 11:59:55 -07001342static void ipgre_netlink_parms(struct nlattr *data[],
1343 struct ip_tunnel_parm *parms)
1344{
1345 memset(parms, 0, sizeof(parms));
1346
1347 parms->iph.protocol = IPPROTO_GRE;
1348
1349 if (!data)
1350 return;
1351
1352 if (data[IFLA_GRE_LINK])
1353 parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1354
1355 if (data[IFLA_GRE_IFLAGS])
1356 parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]);
1357
1358 if (data[IFLA_GRE_OFLAGS])
1359 parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]);
1360
1361 if (data[IFLA_GRE_IKEY])
1362 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1363
1364 if (data[IFLA_GRE_OKEY])
1365 parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1366
1367 if (data[IFLA_GRE_LOCAL])
1368 memcpy(&parms->iph.saddr, nla_data(data[IFLA_GRE_LOCAL]), 4);
1369
1370 if (data[IFLA_GRE_REMOTE])
1371 memcpy(&parms->iph.daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
1372
1373 if (data[IFLA_GRE_TTL])
1374 parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
1375
1376 if (data[IFLA_GRE_TOS])
1377 parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
1378
1379 if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC]))
1380 parms->iph.frag_off = htons(IP_DF);
1381}
1382
Herbert Xue1a80002008-10-09 12:00:17 -07001383static int ipgre_tap_init(struct net_device *dev)
1384{
1385 struct ip_tunnel *tunnel;
1386
1387 tunnel = netdev_priv(dev);
1388
1389 tunnel->dev = dev;
1390 strcpy(tunnel->parms.name, dev->name);
1391
1392 ipgre_tunnel_bind_dev(dev);
1393
1394 return 0;
1395}
1396
1397static void ipgre_tap_setup(struct net_device *dev)
1398{
1399
1400 ether_setup(dev);
1401
1402 dev->init = ipgre_tap_init;
1403 dev->uninit = ipgre_tunnel_uninit;
1404 dev->destructor = free_netdev;
1405 dev->hard_start_xmit = ipgre_tunnel_xmit;
1406 dev->change_mtu = ipgre_tunnel_change_mtu;
1407
1408 dev->iflink = 0;
1409 dev->features |= NETIF_F_NETNS_LOCAL;
1410}
1411
Herbert Xuc19e6542008-10-09 11:59:55 -07001412static int ipgre_newlink(struct net_device *dev, struct nlattr *tb[],
1413 struct nlattr *data[])
1414{
1415 struct ip_tunnel *nt;
1416 struct net *net = dev_net(dev);
1417 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1418 int mtu;
1419 int err;
1420
1421 nt = netdev_priv(dev);
1422 ipgre_netlink_parms(data, &nt->parms);
1423
Herbert Xue1a80002008-10-09 12:00:17 -07001424 if (ipgre_tunnel_find(net, &nt->parms, dev->type))
Herbert Xuc19e6542008-10-09 11:59:55 -07001425 return -EEXIST;
1426
Herbert Xue1a80002008-10-09 12:00:17 -07001427 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1428 random_ether_addr(dev->dev_addr);
1429
Herbert Xuc19e6542008-10-09 11:59:55 -07001430 mtu = ipgre_tunnel_bind_dev(dev);
1431 if (!tb[IFLA_MTU])
1432 dev->mtu = mtu;
1433
1434 err = register_netdevice(dev);
1435 if (err)
1436 goto out;
1437
1438 dev_hold(dev);
1439 ipgre_tunnel_link(ign, nt);
1440
1441out:
1442 return err;
1443}
1444
1445static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1446 struct nlattr *data[])
1447{
1448 struct ip_tunnel *t, *nt;
1449 struct net *net = dev_net(dev);
1450 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1451 struct ip_tunnel_parm p;
1452 int mtu;
1453
1454 if (dev == ign->fb_tunnel_dev)
1455 return -EINVAL;
1456
1457 nt = netdev_priv(dev);
1458 ipgre_netlink_parms(data, &p);
1459
1460 t = ipgre_tunnel_locate(net, &p, 0);
1461
1462 if (t) {
1463 if (t->dev != dev)
1464 return -EEXIST;
1465 } else {
1466 unsigned nflags = 0;
1467
1468 t = nt;
1469
1470 if (ipv4_is_multicast(p.iph.daddr))
1471 nflags = IFF_BROADCAST;
1472 else if (p.iph.daddr)
1473 nflags = IFF_POINTOPOINT;
1474
1475 if ((dev->flags ^ nflags) &
1476 (IFF_POINTOPOINT | IFF_BROADCAST))
1477 return -EINVAL;
1478
1479 ipgre_tunnel_unlink(ign, t);
1480 t->parms.iph.saddr = p.iph.saddr;
1481 t->parms.iph.daddr = p.iph.daddr;
1482 t->parms.i_key = p.i_key;
1483 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1484 memcpy(dev->broadcast, &p.iph.daddr, 4);
1485 ipgre_tunnel_link(ign, t);
1486 netdev_state_change(dev);
1487 }
1488
1489 t->parms.o_key = p.o_key;
1490 t->parms.iph.ttl = p.iph.ttl;
1491 t->parms.iph.tos = p.iph.tos;
1492 t->parms.iph.frag_off = p.iph.frag_off;
1493
1494 if (t->parms.link != p.link) {
1495 t->parms.link = p.link;
1496 mtu = ipgre_tunnel_bind_dev(dev);
1497 if (!tb[IFLA_MTU])
1498 dev->mtu = mtu;
1499 netdev_state_change(dev);
1500 }
1501
1502 return 0;
1503}
1504
1505static size_t ipgre_get_size(const struct net_device *dev)
1506{
1507 return
1508 /* IFLA_GRE_LINK */
1509 nla_total_size(4) +
1510 /* IFLA_GRE_IFLAGS */
1511 nla_total_size(2) +
1512 /* IFLA_GRE_OFLAGS */
1513 nla_total_size(2) +
1514 /* IFLA_GRE_IKEY */
1515 nla_total_size(4) +
1516 /* IFLA_GRE_OKEY */
1517 nla_total_size(4) +
1518 /* IFLA_GRE_LOCAL */
1519 nla_total_size(4) +
1520 /* IFLA_GRE_REMOTE */
1521 nla_total_size(4) +
1522 /* IFLA_GRE_TTL */
1523 nla_total_size(1) +
1524 /* IFLA_GRE_TOS */
1525 nla_total_size(1) +
1526 /* IFLA_GRE_PMTUDISC */
1527 nla_total_size(1) +
1528 0;
1529}
1530
1531static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1532{
1533 struct ip_tunnel *t = netdev_priv(dev);
1534 struct ip_tunnel_parm *p = &t->parms;
1535
1536 NLA_PUT_U32(skb, IFLA_GRE_LINK, p->link);
1537 NLA_PUT_BE16(skb, IFLA_GRE_IFLAGS, p->i_flags);
1538 NLA_PUT_BE16(skb, IFLA_GRE_OFLAGS, p->o_flags);
1539 NLA_PUT_BE32(skb, IFLA_GRE_IFLAGS, p->i_flags);
1540 NLA_PUT_BE32(skb, IFLA_GRE_OFLAGS, p->o_flags);
1541 NLA_PUT(skb, IFLA_GRE_LOCAL, 4, &p->iph.saddr);
1542 NLA_PUT(skb, IFLA_GRE_REMOTE, 4, &p->iph.daddr);
1543 NLA_PUT_U8(skb, IFLA_GRE_TTL, p->iph.ttl);
1544 NLA_PUT_U8(skb, IFLA_GRE_TOS, p->iph.tos);
1545 NLA_PUT_U8(skb, IFLA_GRE_PMTUDISC, !!(p->iph.frag_off & htons(IP_DF)));
1546
1547 return 0;
1548
1549nla_put_failure:
1550 return -EMSGSIZE;
1551}
1552
1553static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1554 [IFLA_GRE_LINK] = { .type = NLA_U32 },
1555 [IFLA_GRE_IFLAGS] = { .type = NLA_U16 },
1556 [IFLA_GRE_OFLAGS] = { .type = NLA_U16 },
1557 [IFLA_GRE_IKEY] = { .type = NLA_U32 },
1558 [IFLA_GRE_OKEY] = { .type = NLA_U32 },
1559 [IFLA_GRE_LOCAL] = { .len = 4 },
1560 [IFLA_GRE_REMOTE] = { .len = 4 },
1561 [IFLA_GRE_TTL] = { .type = NLA_U8 },
1562 [IFLA_GRE_TOS] = { .type = NLA_U8 },
1563 [IFLA_GRE_PMTUDISC] = { .type = NLA_U8 },
1564};
1565
1566static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1567 .kind = "gre",
1568 .maxtype = IFLA_GRE_MAX,
1569 .policy = ipgre_policy,
1570 .priv_size = sizeof(struct ip_tunnel),
1571 .setup = ipgre_tunnel_setup,
1572 .validate = ipgre_tunnel_validate,
1573 .newlink = ipgre_newlink,
1574 .changelink = ipgre_changelink,
1575 .get_size = ipgre_get_size,
1576 .fill_info = ipgre_fill_info,
1577};
1578
Herbert Xue1a80002008-10-09 12:00:17 -07001579static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1580 .kind = "gretap",
1581 .maxtype = IFLA_GRE_MAX,
1582 .policy = ipgre_policy,
1583 .priv_size = sizeof(struct ip_tunnel),
1584 .setup = ipgre_tap_setup,
1585 .validate = ipgre_tap_validate,
1586 .newlink = ipgre_newlink,
1587 .changelink = ipgre_changelink,
1588 .get_size = ipgre_get_size,
1589 .fill_info = ipgre_fill_info,
1590};
1591
Linus Torvalds1da177e2005-04-16 15:20:36 -07001592/*
1593 * And now the modules code and kernel interface.
1594 */
1595
1596static int __init ipgre_init(void)
1597{
1598 int err;
1599
1600 printk(KERN_INFO "GRE over IPv4 tunneling driver\n");
1601
1602 if (inet_add_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) {
1603 printk(KERN_INFO "ipgre init: can't add protocol\n");
1604 return -EAGAIN;
1605 }
1606
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001607 err = register_pernet_gen_device(&ipgre_net_id, &ipgre_net_ops);
1608 if (err < 0)
Herbert Xuc19e6542008-10-09 11:59:55 -07001609 goto gen_device_failed;
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001610
Herbert Xuc19e6542008-10-09 11:59:55 -07001611 err = rtnl_link_register(&ipgre_link_ops);
1612 if (err < 0)
1613 goto rtnl_link_failed;
1614
Herbert Xue1a80002008-10-09 12:00:17 -07001615 err = rtnl_link_register(&ipgre_tap_ops);
1616 if (err < 0)
1617 goto tap_ops_failed;
1618
Herbert Xuc19e6542008-10-09 11:59:55 -07001619out:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001620 return err;
Herbert Xuc19e6542008-10-09 11:59:55 -07001621
Herbert Xue1a80002008-10-09 12:00:17 -07001622tap_ops_failed:
1623 rtnl_link_unregister(&ipgre_link_ops);
Herbert Xuc19e6542008-10-09 11:59:55 -07001624rtnl_link_failed:
1625 unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops);
1626gen_device_failed:
1627 inet_del_protocol(&ipgre_protocol, IPPROTO_GRE);
1628 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001629}
1630
Alexey Kuznetsovdb445752005-07-30 17:46:44 -07001631static void __exit ipgre_fini(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001632{
Herbert Xue1a80002008-10-09 12:00:17 -07001633 rtnl_link_unregister(&ipgre_tap_ops);
Herbert Xuc19e6542008-10-09 11:59:55 -07001634 rtnl_link_unregister(&ipgre_link_ops);
1635 unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001636 if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0)
1637 printk(KERN_INFO "ipgre close: can't remove protocol\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001638}
1639
1640module_init(ipgre_init);
1641module_exit(ipgre_fini);
1642MODULE_LICENSE("GPL");
Herbert Xuc19e6542008-10-09 11:59:55 -07001643MODULE_ALIAS("rtnl-link-gre");
Herbert Xue1a80002008-10-09 12:00:17 -07001644MODULE_ALIAS("rtnl-link-gretap");