blob: 4a43739c9035df34a6ec0344662a2d8dab9c6325 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002 * Linux NET3: GRE over IP protocol decoder.
Linus Torvalds1da177e2005-04-16 15:20:36 -07003 *
4 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
Randy Dunlap4fc268d2006-01-11 12:17:47 -080013#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070014#include <linux/module.h>
15#include <linux/types.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070016#include <linux/kernel.h>
17#include <asm/uaccess.h>
18#include <linux/skbuff.h>
19#include <linux/netdevice.h>
20#include <linux/in.h>
21#include <linux/tcp.h>
22#include <linux/udp.h>
23#include <linux/if_arp.h>
24#include <linux/mroute.h>
25#include <linux/init.h>
26#include <linux/in6.h>
27#include <linux/inetdevice.h>
28#include <linux/igmp.h>
29#include <linux/netfilter_ipv4.h>
Herbert Xue1a80002008-10-09 12:00:17 -070030#include <linux/etherdevice.h>
Kris Katterjohn46f25df2006-01-05 16:35:42 -080031#include <linux/if_ether.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070032
33#include <net/sock.h>
34#include <net/ip.h>
35#include <net/icmp.h>
36#include <net/protocol.h>
37#include <net/ipip.h>
38#include <net/arp.h>
39#include <net/checksum.h>
40#include <net/dsfield.h>
41#include <net/inet_ecn.h>
42#include <net/xfrm.h>
Pavel Emelyanov59a4c752008-04-16 01:08:53 -070043#include <net/net_namespace.h>
44#include <net/netns/generic.h>
Herbert Xuc19e6542008-10-09 11:59:55 -070045#include <net/rtnetlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070046
47#ifdef CONFIG_IPV6
48#include <net/ipv6.h>
49#include <net/ip6_fib.h>
50#include <net/ip6_route.h>
51#endif
52
53/*
54 Problems & solutions
55 --------------------
56
57 1. The most important issue is detecting local dead loops.
58 They would cause complete host lockup in transmit, which
59 would be "resolved" by stack overflow or, if queueing is enabled,
60 with infinite looping in net_bh.
61
62 We cannot track such dead loops during route installation,
63 it is infeasible task. The most general solutions would be
64 to keep skb->encapsulation counter (sort of local ttl),
65 and silently drop packet when it expires. It is the best
66 solution, but it supposes maintaing new variable in ALL
67 skb, even if no tunneling is used.
68
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090069 Current solution: t->recursion lock breaks dead loops. It looks
Linus Torvalds1da177e2005-04-16 15:20:36 -070070 like dev->tbusy flag, but I preferred new variable, because
71 the semantics is different. One day, when hard_start_xmit
72 will be multithreaded we will have to use skb->encapsulation.
73
74
75
76 2. Networking dead loops would not kill routers, but would really
77 kill network. IP hop limit plays role of "t->recursion" in this case,
78 if we copy it from packet being encapsulated to upper header.
79 It is very good solution, but it introduces two problems:
80
81 - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
82 do not work over tunnels.
83 - traceroute does not work. I planned to relay ICMP from tunnel,
84 so that this problem would be solved and traceroute output
85 would even more informative. This idea appeared to be wrong:
86 only Linux complies to rfc1812 now (yes, guys, Linux is the only
87 true router now :-)), all routers (at least, in neighbourhood of mine)
88 return only 8 bytes of payload. It is the end.
89
90 Hence, if we want that OSPF worked or traceroute said something reasonable,
91 we should search for another solution.
92
93 One of them is to parse packet trying to detect inner encapsulation
94 made by our node. It is difficult or even impossible, especially,
95 taking into account fragmentation. TO be short, tt is not solution at all.
96
97 Current solution: The solution was UNEXPECTEDLY SIMPLE.
98 We force DF flag on tunnels with preconfigured hop limit,
99 that is ALL. :-) Well, it does not remove the problem completely,
100 but exponential growth of network traffic is changed to linear
101 (branches, that exceed pmtu are pruned) and tunnel mtu
102 fastly degrades to value <68, where looping stops.
103 Yes, it is not good if there exists a router in the loop,
104 which does not force DF, even when encapsulating packets have DF set.
105 But it is not our problem! Nobody could accuse us, we made
106 all that we could make. Even if it is your gated who injected
107 fatal route to network, even if it were you who configured
108 fatal static route: you are innocent. :-)
109
110
111
112 3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
113 practically identical code. It would be good to glue them
114 together, but it is not very evident, how to make them modular.
115 sit is integral part of IPv6, ipip and gre are naturally modular.
116 We could extract common parts (hash table, ioctl etc)
117 to a separate module (ip_tunnel.c).
118
119 Alexey Kuznetsov.
120 */
121
Herbert Xuc19e6542008-10-09 11:59:55 -0700122static struct rtnl_link_ops ipgre_link_ops __read_mostly;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700123static int ipgre_tunnel_init(struct net_device *dev);
124static void ipgre_tunnel_setup(struct net_device *dev);
Herbert Xu42aa9162008-10-09 11:59:32 -0700125static int ipgre_tunnel_bind_dev(struct net_device *dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700126
127/* Fallback tunnel: no source, no destination, no key, no options */
128
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700129#define HASH_SIZE 16
130
Pavel Emelyanov59a4c752008-04-16 01:08:53 -0700131static int ipgre_net_id;
132struct ipgre_net {
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700133 struct ip_tunnel *tunnels[4][HASH_SIZE];
134
Pavel Emelyanov7daa0002008-04-16 01:10:05 -0700135 struct net_device *fb_tunnel_dev;
Pavel Emelyanov59a4c752008-04-16 01:08:53 -0700136};
137
Linus Torvalds1da177e2005-04-16 15:20:36 -0700138/* Tunnel hash table */
139
140/*
141 4 hash tables:
142
143 3: (remote,local)
144 2: (remote,*)
145 1: (*,local)
146 0: (*,*)
147
148 We require exact key match i.e. if a key is present in packet
149 it will match only tunnel with the same key; if it is not present,
150 it will match only keyless tunnel.
151
152 All keysless packets, if not matched configured keyless tunnels
153 will match fallback tunnel.
154 */
155
Al Virod5a0a1e2006-11-08 00:23:14 -0800156#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700157
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700158#define tunnels_r_l tunnels[3]
159#define tunnels_r tunnels[2]
160#define tunnels_l tunnels[1]
161#define tunnels_wc tunnels[0]
Linus Torvalds1da177e2005-04-16 15:20:36 -0700162
163static DEFINE_RWLOCK(ipgre_lock);
164
165/* Given src, dst and key, find appropriate for input tunnel. */
166
Timo Teras749c10f2009-01-19 17:22:12 -0800167static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev,
Herbert Xue1a80002008-10-09 12:00:17 -0700168 __be32 remote, __be32 local,
169 __be32 key, __be16 gre_proto)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700170{
Timo Teras749c10f2009-01-19 17:22:12 -0800171 struct net *net = dev_net(dev);
172 int link = dev->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700173 unsigned h0 = HASH(remote);
174 unsigned h1 = HASH(key);
Timo Teras749c10f2009-01-19 17:22:12 -0800175 struct ip_tunnel *t, *sel[4] = { NULL, NULL, NULL, NULL };
Pavel Emelyanov7daa0002008-04-16 01:10:05 -0700176 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
Herbert Xue1a80002008-10-09 12:00:17 -0700177 int dev_type = (gre_proto == htons(ETH_P_TEB)) ?
178 ARPHRD_ETHER : ARPHRD_IPGRE;
Timo Teras749c10f2009-01-19 17:22:12 -0800179 int idx;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700180
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700181 for (t = ign->tunnels_r_l[h0^h1]; t; t = t->next) {
Timo Teras749c10f2009-01-19 17:22:12 -0800182 if (local != t->parms.iph.saddr ||
183 remote != t->parms.iph.daddr ||
184 key != t->parms.i_key ||
185 !(t->dev->flags & IFF_UP))
186 continue;
187
188 if (t->dev->type != ARPHRD_IPGRE &&
189 t->dev->type != dev_type)
190 continue;
191
192 idx = 0;
193 if (t->parms.link != link)
194 idx |= 1;
195 if (t->dev->type != dev_type)
196 idx |= 2;
197 if (idx == 0)
198 return t;
199 if (sel[idx] == NULL)
200 sel[idx] = t;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700201 }
Herbert Xue1a80002008-10-09 12:00:17 -0700202
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700203 for (t = ign->tunnels_r[h0^h1]; t; t = t->next) {
Timo Teras749c10f2009-01-19 17:22:12 -0800204 if (remote != t->parms.iph.daddr ||
205 key != t->parms.i_key ||
206 !(t->dev->flags & IFF_UP))
207 continue;
208
209 if (t->dev->type != ARPHRD_IPGRE &&
210 t->dev->type != dev_type)
211 continue;
212
213 idx = 0;
214 if (t->parms.link != link)
215 idx |= 1;
216 if (t->dev->type != dev_type)
217 idx |= 2;
218 if (idx == 0)
219 return t;
220 if (sel[idx] == NULL)
221 sel[idx] = t;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700222 }
Herbert Xue1a80002008-10-09 12:00:17 -0700223
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700224 for (t = ign->tunnels_l[h1]; t; t = t->next) {
Timo Teras749c10f2009-01-19 17:22:12 -0800225 if ((local != t->parms.iph.saddr &&
226 (local != t->parms.iph.daddr ||
227 !ipv4_is_multicast(local))) ||
228 key != t->parms.i_key ||
229 !(t->dev->flags & IFF_UP))
230 continue;
231
232 if (t->dev->type != ARPHRD_IPGRE &&
233 t->dev->type != dev_type)
234 continue;
235
236 idx = 0;
237 if (t->parms.link != link)
238 idx |= 1;
239 if (t->dev->type != dev_type)
240 idx |= 2;
241 if (idx == 0)
242 return t;
243 if (sel[idx] == NULL)
244 sel[idx] = t;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700245 }
Herbert Xue1a80002008-10-09 12:00:17 -0700246
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700247 for (t = ign->tunnels_wc[h1]; t; t = t->next) {
Timo Teras749c10f2009-01-19 17:22:12 -0800248 if (t->parms.i_key != key ||
249 !(t->dev->flags & IFF_UP))
250 continue;
251
252 if (t->dev->type != ARPHRD_IPGRE &&
253 t->dev->type != dev_type)
254 continue;
255
256 idx = 0;
257 if (t->parms.link != link)
258 idx |= 1;
259 if (t->dev->type != dev_type)
260 idx |= 2;
261 if (idx == 0)
262 return t;
263 if (sel[idx] == NULL)
264 sel[idx] = t;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700265 }
266
Timo Teras749c10f2009-01-19 17:22:12 -0800267 for (idx = 1; idx < ARRAY_SIZE(sel); idx++)
268 if (sel[idx] != NULL)
269 return sel[idx];
Herbert Xue1a80002008-10-09 12:00:17 -0700270
Timo Teras749c10f2009-01-19 17:22:12 -0800271 if (ign->fb_tunnel_dev->flags & IFF_UP)
Pavel Emelyanov7daa0002008-04-16 01:10:05 -0700272 return netdev_priv(ign->fb_tunnel_dev);
Timo Teras749c10f2009-01-19 17:22:12 -0800273
Linus Torvalds1da177e2005-04-16 15:20:36 -0700274 return NULL;
275}
276
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700277static struct ip_tunnel **__ipgre_bucket(struct ipgre_net *ign,
278 struct ip_tunnel_parm *parms)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700279{
YOSHIFUJI Hideaki5056a1e2007-04-24 20:44:48 +0900280 __be32 remote = parms->iph.daddr;
281 __be32 local = parms->iph.saddr;
282 __be32 key = parms->i_key;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700283 unsigned h = HASH(key);
284 int prio = 0;
285
286 if (local)
287 prio |= 1;
Joe Perchesf97c1e02007-12-16 13:45:43 -0800288 if (remote && !ipv4_is_multicast(remote)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700289 prio |= 2;
290 h ^= HASH(remote);
291 }
292
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700293 return &ign->tunnels[prio][h];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700294}
295
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700296static inline struct ip_tunnel **ipgre_bucket(struct ipgre_net *ign,
297 struct ip_tunnel *t)
YOSHIFUJI Hideaki5056a1e2007-04-24 20:44:48 +0900298{
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700299 return __ipgre_bucket(ign, &t->parms);
YOSHIFUJI Hideaki5056a1e2007-04-24 20:44:48 +0900300}
301
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700302static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700303{
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700304 struct ip_tunnel **tp = ipgre_bucket(ign, t);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700305
306 t->next = *tp;
307 write_lock_bh(&ipgre_lock);
308 *tp = t;
309 write_unlock_bh(&ipgre_lock);
310}
311
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700312static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700313{
314 struct ip_tunnel **tp;
315
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700316 for (tp = ipgre_bucket(ign, t); *tp; tp = &(*tp)->next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700317 if (t == *tp) {
318 write_lock_bh(&ipgre_lock);
319 *tp = t->next;
320 write_unlock_bh(&ipgre_lock);
321 break;
322 }
323 }
324}
325
Herbert Xue1a80002008-10-09 12:00:17 -0700326static struct ip_tunnel *ipgre_tunnel_find(struct net *net,
327 struct ip_tunnel_parm *parms,
328 int type)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700329{
Al Virod5a0a1e2006-11-08 00:23:14 -0800330 __be32 remote = parms->iph.daddr;
331 __be32 local = parms->iph.saddr;
332 __be32 key = parms->i_key;
Timo Teras749c10f2009-01-19 17:22:12 -0800333 int link = parms->link;
Herbert Xue1a80002008-10-09 12:00:17 -0700334 struct ip_tunnel *t, **tp;
335 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
336
337 for (tp = __ipgre_bucket(ign, parms); (t = *tp) != NULL; tp = &t->next)
338 if (local == t->parms.iph.saddr &&
339 remote == t->parms.iph.daddr &&
340 key == t->parms.i_key &&
Timo Teras749c10f2009-01-19 17:22:12 -0800341 link == t->parms.link &&
Herbert Xue1a80002008-10-09 12:00:17 -0700342 type == t->dev->type)
343 break;
344
345 return t;
346}
347
348static struct ip_tunnel * ipgre_tunnel_locate(struct net *net,
349 struct ip_tunnel_parm *parms, int create)
350{
351 struct ip_tunnel *t, *nt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700352 struct net_device *dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700353 char name[IFNAMSIZ];
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700354 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700355
Herbert Xue1a80002008-10-09 12:00:17 -0700356 t = ipgre_tunnel_find(net, parms, ARPHRD_IPGRE);
357 if (t || !create)
358 return t;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700359
360 if (parms->name[0])
361 strlcpy(name, parms->name, IFNAMSIZ);
Pavel Emelyanov34cc7ba2008-02-23 20:19:20 -0800362 else
363 sprintf(name, "gre%%d");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700364
365 dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup);
366 if (!dev)
367 return NULL;
368
Pavel Emelyanov0b67ece2008-04-16 01:11:13 -0700369 dev_net_set(dev, net);
370
Pavel Emelyanovb37d428b2008-02-26 23:51:04 -0800371 if (strchr(name, '%')) {
372 if (dev_alloc_name(dev, name) < 0)
373 goto failed_free;
374 }
375
Patrick McHardy2941a482006-01-08 22:05:26 -0800376 nt = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700377 nt->parms = *parms;
Herbert Xuc19e6542008-10-09 11:59:55 -0700378 dev->rtnl_link_ops = &ipgre_link_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700379
Herbert Xu42aa9162008-10-09 11:59:32 -0700380 dev->mtu = ipgre_tunnel_bind_dev(dev);
381
Pavel Emelyanovb37d428b2008-02-26 23:51:04 -0800382 if (register_netdevice(dev) < 0)
383 goto failed_free;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700384
Linus Torvalds1da177e2005-04-16 15:20:36 -0700385 dev_hold(dev);
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700386 ipgre_tunnel_link(ign, nt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700387 return nt;
388
Pavel Emelyanovb37d428b2008-02-26 23:51:04 -0800389failed_free:
390 free_netdev(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700391 return NULL;
392}
393
394static void ipgre_tunnel_uninit(struct net_device *dev)
395{
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700396 struct net *net = dev_net(dev);
397 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
398
399 ipgre_tunnel_unlink(ign, netdev_priv(dev));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700400 dev_put(dev);
401}
402
403
404static void ipgre_err(struct sk_buff *skb, u32 info)
405{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700406
Rami Rosen071f92d2008-05-21 17:47:54 -0700407/* All the routers (except for Linux) return only
Linus Torvalds1da177e2005-04-16 15:20:36 -0700408 8 bytes of packet payload. It means, that precise relaying of
409 ICMP in the real Internet is absolutely infeasible.
410
411 Moreover, Cisco "wise men" put GRE key to the third word
412 in GRE header. It makes impossible maintaining even soft state for keyed
413 GRE tunnels with enabled checksum. Tell them "thank you".
414
415 Well, I wonder, rfc1812 was written by Cisco employee,
416 what the hell these idiots break standrads established
417 by themself???
418 */
419
Jianjun Kong6ed25332008-11-03 00:25:16 -0800420 struct iphdr *iph = (struct iphdr *)skb->data;
Al Virod5a0a1e2006-11-08 00:23:14 -0800421 __be16 *p = (__be16*)(skb->data+(iph->ihl<<2));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700422 int grehlen = (iph->ihl<<2) + 4;
Arnaldo Carvalho de Melo88c76642007-03-13 14:43:18 -0300423 const int type = icmp_hdr(skb)->type;
424 const int code = icmp_hdr(skb)->code;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700425 struct ip_tunnel *t;
Al Virod5a0a1e2006-11-08 00:23:14 -0800426 __be16 flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700427
428 flags = p[0];
429 if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
430 if (flags&(GRE_VERSION|GRE_ROUTING))
431 return;
432 if (flags&GRE_KEY) {
433 grehlen += 4;
434 if (flags&GRE_CSUM)
435 grehlen += 4;
436 }
437 }
438
439 /* If only 8 bytes returned, keyed message will be dropped here */
440 if (skb_headlen(skb) < grehlen)
441 return;
442
443 switch (type) {
444 default:
445 case ICMP_PARAMETERPROB:
446 return;
447
448 case ICMP_DEST_UNREACH:
449 switch (code) {
450 case ICMP_SR_FAILED:
451 case ICMP_PORT_UNREACH:
452 /* Impossible event. */
453 return;
454 case ICMP_FRAG_NEEDED:
455 /* Soft state for pmtu is maintained by IP core. */
456 return;
457 default:
458 /* All others are translated to HOST_UNREACH.
459 rfc2003 contains "deep thoughts" about NET_UNREACH,
460 I believe they are just ether pollution. --ANK
461 */
462 break;
463 }
464 break;
465 case ICMP_TIME_EXCEEDED:
466 if (code != ICMP_EXC_TTL)
467 return;
468 break;
469 }
470
471 read_lock(&ipgre_lock);
Timo Teras749c10f2009-01-19 17:22:12 -0800472 t = ipgre_tunnel_lookup(skb->dev, iph->daddr, iph->saddr,
Herbert Xue1a80002008-10-09 12:00:17 -0700473 flags & GRE_KEY ?
474 *(((__be32 *)p) + (grehlen / 4) - 1) : 0,
475 p[1]);
Joe Perchesf97c1e02007-12-16 13:45:43 -0800476 if (t == NULL || t->parms.iph.daddr == 0 ||
477 ipv4_is_multicast(t->parms.iph.daddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700478 goto out;
479
480 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
481 goto out;
482
483 if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
484 t->err_count++;
485 else
486 t->err_count = 1;
487 t->err_time = jiffies;
488out:
489 read_unlock(&ipgre_lock);
490 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700491}
492
493static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
494{
495 if (INET_ECN_is_ce(iph->tos)) {
496 if (skb->protocol == htons(ETH_P_IP)) {
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700497 IP_ECN_set_ce(ip_hdr(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700498 } else if (skb->protocol == htons(ETH_P_IPV6)) {
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -0700499 IP6_ECN_set_ce(ipv6_hdr(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700500 }
501 }
502}
503
504static inline u8
505ipgre_ecn_encapsulate(u8 tos, struct iphdr *old_iph, struct sk_buff *skb)
506{
507 u8 inner = 0;
508 if (skb->protocol == htons(ETH_P_IP))
509 inner = old_iph->tos;
510 else if (skb->protocol == htons(ETH_P_IPV6))
511 inner = ipv6_get_dsfield((struct ipv6hdr *)old_iph);
512 return INET_ECN_encapsulate(tos, inner);
513}
514
515static int ipgre_rcv(struct sk_buff *skb)
516{
517 struct iphdr *iph;
518 u8 *h;
Al Virod5a0a1e2006-11-08 00:23:14 -0800519 __be16 flags;
Al Virod3bc23e2006-11-14 21:24:49 -0800520 __sum16 csum = 0;
Al Virod5a0a1e2006-11-08 00:23:14 -0800521 __be32 key = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700522 u32 seqno = 0;
523 struct ip_tunnel *tunnel;
524 int offset = 4;
Herbert Xue1a80002008-10-09 12:00:17 -0700525 __be16 gre_proto;
Herbert Xu64194c32008-10-09 12:03:17 -0700526 unsigned int len;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700527
528 if (!pskb_may_pull(skb, 16))
529 goto drop_nolock;
530
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700531 iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700532 h = skb->data;
Al Virod5a0a1e2006-11-08 00:23:14 -0800533 flags = *(__be16*)h;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700534
535 if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
536 /* - Version must be 0.
537 - We do not support routing headers.
538 */
539 if (flags&(GRE_VERSION|GRE_ROUTING))
540 goto drop_nolock;
541
542 if (flags&GRE_CSUM) {
Herbert Xufb286bb2005-11-10 13:01:24 -0800543 switch (skb->ip_summed) {
Patrick McHardy84fa7932006-08-29 16:44:56 -0700544 case CHECKSUM_COMPLETE:
Al Virod3bc23e2006-11-14 21:24:49 -0800545 csum = csum_fold(skb->csum);
Herbert Xufb286bb2005-11-10 13:01:24 -0800546 if (!csum)
547 break;
548 /* fall through */
549 case CHECKSUM_NONE:
550 skb->csum = 0;
551 csum = __skb_checksum_complete(skb);
Patrick McHardy84fa7932006-08-29 16:44:56 -0700552 skb->ip_summed = CHECKSUM_COMPLETE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700553 }
554 offset += 4;
555 }
556 if (flags&GRE_KEY) {
Al Virod5a0a1e2006-11-08 00:23:14 -0800557 key = *(__be32*)(h + offset);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700558 offset += 4;
559 }
560 if (flags&GRE_SEQ) {
Al Virod5a0a1e2006-11-08 00:23:14 -0800561 seqno = ntohl(*(__be32*)(h + offset));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700562 offset += 4;
563 }
564 }
565
Herbert Xue1a80002008-10-09 12:00:17 -0700566 gre_proto = *(__be16 *)(h + 2);
567
Linus Torvalds1da177e2005-04-16 15:20:36 -0700568 read_lock(&ipgre_lock);
Timo Teras749c10f2009-01-19 17:22:12 -0800569 if ((tunnel = ipgre_tunnel_lookup(skb->dev,
Herbert Xue1a80002008-10-09 12:00:17 -0700570 iph->saddr, iph->daddr, key,
571 gre_proto))) {
Pavel Emelyanovaddd68e2008-05-21 14:14:22 -0700572 struct net_device_stats *stats = &tunnel->dev->stats;
573
Linus Torvalds1da177e2005-04-16 15:20:36 -0700574 secpath_reset(skb);
575
Herbert Xue1a80002008-10-09 12:00:17 -0700576 skb->protocol = gre_proto;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700577 /* WCCP version 1 and 2 protocol decoding.
578 * - Change protocol to IP
579 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
580 */
Herbert Xue1a80002008-10-09 12:00:17 -0700581 if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) {
YOSHIFUJI Hideaki496c98d2006-10-10 19:41:21 -0700582 skb->protocol = htons(ETH_P_IP);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900583 if ((*(h + offset) & 0xF0) != 0x40)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700584 offset += 4;
585 }
586
Timo Teras1d069162007-12-20 00:10:33 -0800587 skb->mac_header = skb->network_header;
Arnaldo Carvalho de Melo4209fb62007-03-10 18:42:03 -0300588 __pskb_pull(skb, offset);
Arnaldo Carvalho de Melo9c702202007-04-25 18:04:18 -0700589 skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700590 skb->pkt_type = PACKET_HOST;
591#ifdef CONFIG_NET_IPGRE_BROADCAST
Joe Perchesf97c1e02007-12-16 13:45:43 -0800592 if (ipv4_is_multicast(iph->daddr)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700593 /* Looped back packet, drop it! */
Eric Dumazetee6b9672008-03-05 18:30:47 -0800594 if (skb->rtable->fl.iif == 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700595 goto drop;
Pavel Emelyanovaddd68e2008-05-21 14:14:22 -0700596 stats->multicast++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700597 skb->pkt_type = PACKET_BROADCAST;
598 }
599#endif
600
601 if (((flags&GRE_CSUM) && csum) ||
602 (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
Pavel Emelyanovaddd68e2008-05-21 14:14:22 -0700603 stats->rx_crc_errors++;
604 stats->rx_errors++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700605 goto drop;
606 }
607 if (tunnel->parms.i_flags&GRE_SEQ) {
608 if (!(flags&GRE_SEQ) ||
609 (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
Pavel Emelyanovaddd68e2008-05-21 14:14:22 -0700610 stats->rx_fifo_errors++;
611 stats->rx_errors++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700612 goto drop;
613 }
614 tunnel->i_seqno = seqno + 1;
615 }
Herbert Xue1a80002008-10-09 12:00:17 -0700616
Herbert Xu64194c32008-10-09 12:03:17 -0700617 len = skb->len;
618
Herbert Xue1a80002008-10-09 12:00:17 -0700619 /* Warning: All skb pointers will be invalidated! */
620 if (tunnel->dev->type == ARPHRD_ETHER) {
621 if (!pskb_may_pull(skb, ETH_HLEN)) {
622 stats->rx_length_errors++;
623 stats->rx_errors++;
624 goto drop;
625 }
626
627 iph = ip_hdr(skb);
628 skb->protocol = eth_type_trans(skb, tunnel->dev);
629 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
630 }
631
Pavel Emelyanovaddd68e2008-05-21 14:14:22 -0700632 stats->rx_packets++;
Herbert Xu64194c32008-10-09 12:03:17 -0700633 stats->rx_bytes += len;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700634 skb->dev = tunnel->dev;
635 dst_release(skb->dst);
636 skb->dst = NULL;
637 nf_reset(skb);
Herbert Xue1a80002008-10-09 12:00:17 -0700638
639 skb_reset_network_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700640 ipgre_ecn_decapsulate(iph, skb);
Herbert Xue1a80002008-10-09 12:00:17 -0700641
Linus Torvalds1da177e2005-04-16 15:20:36 -0700642 netif_rx(skb);
643 read_unlock(&ipgre_lock);
644 return(0);
645 }
Herbert Xu45af08b2006-04-05 22:31:19 -0700646 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700647
648drop:
649 read_unlock(&ipgre_lock);
650drop_nolock:
651 kfree_skb(skb);
652 return(0);
653}
654
655static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
656{
Patrick McHardy2941a482006-01-08 22:05:26 -0800657 struct ip_tunnel *tunnel = netdev_priv(dev);
Pavel Emelyanovaddd68e2008-05-21 14:14:22 -0700658 struct net_device_stats *stats = &tunnel->dev->stats;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700659 struct iphdr *old_iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700660 struct iphdr *tiph;
661 u8 tos;
Al Virod5a0a1e2006-11-08 00:23:14 -0800662 __be16 df;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700663 struct rtable *rt; /* Route to the other host */
664 struct net_device *tdev; /* Device to other host */
665 struct iphdr *iph; /* Our new IP header */
Chuck Leverc2636b42007-10-23 21:07:32 -0700666 unsigned int max_headroom; /* The extra header space needed */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700667 int gre_hlen;
Al Virod5a0a1e2006-11-08 00:23:14 -0800668 __be32 dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700669 int mtu;
670
671 if (tunnel->recursion++) {
Pavel Emelyanovaddd68e2008-05-21 14:14:22 -0700672 stats->collisions++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700673 goto tx_error;
674 }
675
Herbert Xue1a80002008-10-09 12:00:17 -0700676 if (dev->type == ARPHRD_ETHER)
677 IPCB(skb)->flags = 0;
678
679 if (dev->header_ops && dev->type == ARPHRD_IPGRE) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700680 gre_hlen = 0;
Jianjun Kong6ed25332008-11-03 00:25:16 -0800681 tiph = (struct iphdr *)skb->data;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700682 } else {
683 gre_hlen = tunnel->hlen;
684 tiph = &tunnel->parms.iph;
685 }
686
687 if ((dst = tiph->daddr) == 0) {
688 /* NBMA tunnel */
689
690 if (skb->dst == NULL) {
Pavel Emelyanovaddd68e2008-05-21 14:14:22 -0700691 stats->tx_fifo_errors++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700692 goto tx_error;
693 }
694
695 if (skb->protocol == htons(ETH_P_IP)) {
Eric Dumazetee6b9672008-03-05 18:30:47 -0800696 rt = skb->rtable;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700697 if ((dst = rt->rt_gateway) == 0)
698 goto tx_error_icmp;
699 }
700#ifdef CONFIG_IPV6
701 else if (skb->protocol == htons(ETH_P_IPV6)) {
702 struct in6_addr *addr6;
703 int addr_type;
704 struct neighbour *neigh = skb->dst->neighbour;
705
706 if (neigh == NULL)
707 goto tx_error;
708
Jianjun Kong6ed25332008-11-03 00:25:16 -0800709 addr6 = (struct in6_addr *)&neigh->primary_key;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700710 addr_type = ipv6_addr_type(addr6);
711
712 if (addr_type == IPV6_ADDR_ANY) {
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -0700713 addr6 = &ipv6_hdr(skb)->daddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700714 addr_type = ipv6_addr_type(addr6);
715 }
716
717 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
718 goto tx_error_icmp;
719
720 dst = addr6->s6_addr32[3];
721 }
722#endif
723 else
724 goto tx_error;
725 }
726
727 tos = tiph->tos;
728 if (tos&1) {
729 if (skb->protocol == htons(ETH_P_IP))
730 tos = old_iph->tos;
731 tos &= ~1;
732 }
733
734 {
735 struct flowi fl = { .oif = tunnel->parms.link,
736 .nl_u = { .ip4_u =
737 { .daddr = dst,
738 .saddr = tiph->saddr,
739 .tos = RT_TOS(tos) } },
740 .proto = IPPROTO_GRE };
Pavel Emelyanov96635522008-04-16 01:10:44 -0700741 if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
Pavel Emelyanovaddd68e2008-05-21 14:14:22 -0700742 stats->tx_carrier_errors++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700743 goto tx_error;
744 }
745 }
746 tdev = rt->u.dst.dev;
747
748 if (tdev == dev) {
749 ip_rt_put(rt);
Pavel Emelyanovaddd68e2008-05-21 14:14:22 -0700750 stats->collisions++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700751 goto tx_error;
752 }
753
754 df = tiph->frag_off;
755 if (df)
Herbert Xuc95b8192008-10-09 11:58:54 -0700756 mtu = dst_mtu(&rt->u.dst) - dev->hard_header_len - tunnel->hlen;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700757 else
758 mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
759
760 if (skb->dst)
761 skb->dst->ops->update_pmtu(skb->dst, mtu);
762
763 if (skb->protocol == htons(ETH_P_IP)) {
764 df |= (old_iph->frag_off&htons(IP_DF));
765
766 if ((old_iph->frag_off&htons(IP_DF)) &&
767 mtu < ntohs(old_iph->tot_len)) {
768 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
769 ip_rt_put(rt);
770 goto tx_error;
771 }
772 }
773#ifdef CONFIG_IPV6
774 else if (skb->protocol == htons(ETH_P_IPV6)) {
Jianjun Kong6ed25332008-11-03 00:25:16 -0800775 struct rt6_info *rt6 = (struct rt6_info *)skb->dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700776
777 if (rt6 && mtu < dst_mtu(skb->dst) && mtu >= IPV6_MIN_MTU) {
Joe Perchesf97c1e02007-12-16 13:45:43 -0800778 if ((tunnel->parms.iph.daddr &&
779 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
Linus Torvalds1da177e2005-04-16 15:20:36 -0700780 rt6->rt6i_dst.plen == 128) {
781 rt6->rt6i_flags |= RTF_MODIFIED;
782 skb->dst->metrics[RTAX_MTU-1] = mtu;
783 }
784 }
785
786 if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) {
787 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
788 ip_rt_put(rt);
789 goto tx_error;
790 }
791 }
792#endif
793
794 if (tunnel->err_count > 0) {
795 if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
796 tunnel->err_count--;
797
798 dst_link_failure(skb);
799 } else
800 tunnel->err_count = 0;
801 }
802
803 max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen;
804
Patrick McHardycfbba492007-07-09 15:33:40 -0700805 if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
806 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700807 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
808 if (!new_skb) {
809 ip_rt_put(rt);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900810 stats->tx_dropped++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700811 dev_kfree_skb(skb);
812 tunnel->recursion--;
813 return 0;
814 }
815 if (skb->sk)
816 skb_set_owner_w(new_skb, skb->sk);
817 dev_kfree_skb(skb);
818 skb = new_skb;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700819 old_iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700820 }
821
Herbert Xu64194c32008-10-09 12:03:17 -0700822 skb_reset_transport_header(skb);
Arnaldo Carvalho de Meloe2d1bca2007-04-10 20:46:21 -0700823 skb_push(skb, gre_hlen);
824 skb_reset_network_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700825 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
Patrick McHardy48d5cad2006-02-15 15:10:22 -0800826 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
827 IPSKB_REROUTED);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700828 dst_release(skb->dst);
829 skb->dst = &rt->u.dst;
830
831 /*
832 * Push down and install the IPIP header.
833 */
834
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700835 iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700836 iph->version = 4;
837 iph->ihl = sizeof(struct iphdr) >> 2;
838 iph->frag_off = df;
839 iph->protocol = IPPROTO_GRE;
840 iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb);
841 iph->daddr = rt->rt_dst;
842 iph->saddr = rt->rt_src;
843
844 if ((iph->ttl = tiph->ttl) == 0) {
845 if (skb->protocol == htons(ETH_P_IP))
846 iph->ttl = old_iph->ttl;
847#ifdef CONFIG_IPV6
848 else if (skb->protocol == htons(ETH_P_IPV6))
Jianjun Kong6ed25332008-11-03 00:25:16 -0800849 iph->ttl = ((struct ipv6hdr *)old_iph)->hop_limit;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700850#endif
851 else
852 iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT);
853 }
854
Herbert Xue1a80002008-10-09 12:00:17 -0700855 ((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags;
856 ((__be16 *)(iph + 1))[1] = (dev->type == ARPHRD_ETHER) ?
857 htons(ETH_P_TEB) : skb->protocol;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700858
859 if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
Al Virod5a0a1e2006-11-08 00:23:14 -0800860 __be32 *ptr = (__be32*)(((u8*)iph) + tunnel->hlen - 4);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700861
862 if (tunnel->parms.o_flags&GRE_SEQ) {
863 ++tunnel->o_seqno;
864 *ptr = htonl(tunnel->o_seqno);
865 ptr--;
866 }
867 if (tunnel->parms.o_flags&GRE_KEY) {
868 *ptr = tunnel->parms.o_key;
869 ptr--;
870 }
871 if (tunnel->parms.o_flags&GRE_CSUM) {
872 *ptr = 0;
Al Viro5f92a732006-11-14 21:36:54 -0800873 *(__sum16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700874 }
875 }
876
877 nf_reset(skb);
878
879 IPTUNNEL_XMIT();
880 tunnel->recursion--;
881 return 0;
882
883tx_error_icmp:
884 dst_link_failure(skb);
885
886tx_error:
887 stats->tx_errors++;
888 dev_kfree_skb(skb);
889 tunnel->recursion--;
890 return 0;
891}
892
Herbert Xu42aa9162008-10-09 11:59:32 -0700893static int ipgre_tunnel_bind_dev(struct net_device *dev)
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800894{
895 struct net_device *tdev = NULL;
896 struct ip_tunnel *tunnel;
897 struct iphdr *iph;
898 int hlen = LL_MAX_HEADER;
899 int mtu = ETH_DATA_LEN;
900 int addend = sizeof(struct iphdr) + 4;
901
902 tunnel = netdev_priv(dev);
903 iph = &tunnel->parms.iph;
904
Herbert Xuc95b8192008-10-09 11:58:54 -0700905 /* Guess output device to choose reasonable mtu and needed_headroom */
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800906
907 if (iph->daddr) {
908 struct flowi fl = { .oif = tunnel->parms.link,
909 .nl_u = { .ip4_u =
910 { .daddr = iph->daddr,
911 .saddr = iph->saddr,
912 .tos = RT_TOS(iph->tos) } },
913 .proto = IPPROTO_GRE };
914 struct rtable *rt;
Pavel Emelyanov96635522008-04-16 01:10:44 -0700915 if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800916 tdev = rt->u.dst.dev;
917 ip_rt_put(rt);
918 }
Herbert Xue1a80002008-10-09 12:00:17 -0700919
920 if (dev->type != ARPHRD_ETHER)
921 dev->flags |= IFF_POINTOPOINT;
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800922 }
923
924 if (!tdev && tunnel->parms.link)
Pavel Emelyanov96635522008-04-16 01:10:44 -0700925 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800926
927 if (tdev) {
Herbert Xuc95b8192008-10-09 11:58:54 -0700928 hlen = tdev->hard_header_len + tdev->needed_headroom;
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800929 mtu = tdev->mtu;
930 }
931 dev->iflink = tunnel->parms.link;
932
933 /* Precalculate GRE options length */
934 if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
935 if (tunnel->parms.o_flags&GRE_CSUM)
936 addend += 4;
937 if (tunnel->parms.o_flags&GRE_KEY)
938 addend += 4;
939 if (tunnel->parms.o_flags&GRE_SEQ)
940 addend += 4;
941 }
Herbert Xuc95b8192008-10-09 11:58:54 -0700942 dev->needed_headroom = addend + hlen;
Herbert Xu42aa9162008-10-09 11:59:32 -0700943 mtu -= dev->hard_header_len - addend;
944
945 if (mtu < 68)
946 mtu = 68;
947
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800948 tunnel->hlen = addend;
949
Herbert Xu42aa9162008-10-09 11:59:32 -0700950 return mtu;
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800951}
952
Linus Torvalds1da177e2005-04-16 15:20:36 -0700953static int
954ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
955{
956 int err = 0;
957 struct ip_tunnel_parm p;
958 struct ip_tunnel *t;
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700959 struct net *net = dev_net(dev);
960 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700961
962 switch (cmd) {
963 case SIOCGETTUNNEL:
964 t = NULL;
Pavel Emelyanov7daa0002008-04-16 01:10:05 -0700965 if (dev == ign->fb_tunnel_dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700966 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
967 err = -EFAULT;
968 break;
969 }
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700970 t = ipgre_tunnel_locate(net, &p, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700971 }
972 if (t == NULL)
Patrick McHardy2941a482006-01-08 22:05:26 -0800973 t = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700974 memcpy(&p, &t->parms, sizeof(p));
975 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
976 err = -EFAULT;
977 break;
978
979 case SIOCADDTUNNEL:
980 case SIOCCHGTUNNEL:
981 err = -EPERM;
982 if (!capable(CAP_NET_ADMIN))
983 goto done;
984
985 err = -EFAULT;
986 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
987 goto done;
988
989 err = -EINVAL;
990 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
991 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
992 ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
993 goto done;
994 if (p.iph.ttl)
995 p.iph.frag_off |= htons(IP_DF);
996
997 if (!(p.i_flags&GRE_KEY))
998 p.i_key = 0;
999 if (!(p.o_flags&GRE_KEY))
1000 p.o_key = 0;
1001
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -07001002 t = ipgre_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001003
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001004 if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001005 if (t != NULL) {
1006 if (t->dev != dev) {
1007 err = -EEXIST;
1008 break;
1009 }
1010 } else {
Jianjun Kong6ed25332008-11-03 00:25:16 -08001011 unsigned nflags = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001012
Patrick McHardy2941a482006-01-08 22:05:26 -08001013 t = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001014
Joe Perchesf97c1e02007-12-16 13:45:43 -08001015 if (ipv4_is_multicast(p.iph.daddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001016 nflags = IFF_BROADCAST;
1017 else if (p.iph.daddr)
1018 nflags = IFF_POINTOPOINT;
1019
1020 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
1021 err = -EINVAL;
1022 break;
1023 }
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -07001024 ipgre_tunnel_unlink(ign, t);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001025 t->parms.iph.saddr = p.iph.saddr;
1026 t->parms.iph.daddr = p.iph.daddr;
1027 t->parms.i_key = p.i_key;
1028 t->parms.o_key = p.o_key;
1029 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1030 memcpy(dev->broadcast, &p.iph.daddr, 4);
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -07001031 ipgre_tunnel_link(ign, t);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001032 netdev_state_change(dev);
1033 }
1034 }
1035
1036 if (t) {
1037 err = 0;
1038 if (cmd == SIOCCHGTUNNEL) {
1039 t->parms.iph.ttl = p.iph.ttl;
1040 t->parms.iph.tos = p.iph.tos;
1041 t->parms.iph.frag_off = p.iph.frag_off;
Michal Schmidtee34c1e2007-12-13 09:46:32 -08001042 if (t->parms.link != p.link) {
1043 t->parms.link = p.link;
Herbert Xu42aa9162008-10-09 11:59:32 -07001044 dev->mtu = ipgre_tunnel_bind_dev(dev);
Michal Schmidtee34c1e2007-12-13 09:46:32 -08001045 netdev_state_change(dev);
1046 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001047 }
1048 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
1049 err = -EFAULT;
1050 } else
1051 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
1052 break;
1053
1054 case SIOCDELTUNNEL:
1055 err = -EPERM;
1056 if (!capable(CAP_NET_ADMIN))
1057 goto done;
1058
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001059 if (dev == ign->fb_tunnel_dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001060 err = -EFAULT;
1061 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1062 goto done;
1063 err = -ENOENT;
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -07001064 if ((t = ipgre_tunnel_locate(net, &p, 0)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001065 goto done;
1066 err = -EPERM;
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001067 if (t == netdev_priv(ign->fb_tunnel_dev))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001068 goto done;
1069 dev = t->dev;
1070 }
Stephen Hemminger22f8cde2007-02-07 00:09:58 -08001071 unregister_netdevice(dev);
1072 err = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001073 break;
1074
1075 default:
1076 err = -EINVAL;
1077 }
1078
1079done:
1080 return err;
1081}
1082
Linus Torvalds1da177e2005-04-16 15:20:36 -07001083static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1084{
Patrick McHardy2941a482006-01-08 22:05:26 -08001085 struct ip_tunnel *tunnel = netdev_priv(dev);
Herbert Xuc95b8192008-10-09 11:58:54 -07001086 if (new_mtu < 68 ||
1087 new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001088 return -EINVAL;
1089 dev->mtu = new_mtu;
1090 return 0;
1091}
1092
Linus Torvalds1da177e2005-04-16 15:20:36 -07001093/* Nice toy. Unfortunately, useless in real life :-)
1094 It allows to construct virtual multiprotocol broadcast "LAN"
1095 over the Internet, provided multicast routing is tuned.
1096
1097
1098 I have no idea was this bicycle invented before me,
1099 so that I had to set ARPHRD_IPGRE to a random value.
1100 I have an impression, that Cisco could make something similar,
1101 but this feature is apparently missing in IOS<=11.2(8).
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001102
Linus Torvalds1da177e2005-04-16 15:20:36 -07001103 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
1104 with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
1105
1106 ping -t 255 224.66.66.66
1107
1108 If nobody answers, mbone does not work.
1109
1110 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
1111 ip addr add 10.66.66.<somewhat>/24 dev Universe
1112 ifconfig Universe up
1113 ifconfig Universe add fe80::<Your_real_addr>/10
1114 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
1115 ftp 10.66.66.66
1116 ...
1117 ftp fec0:6666:6666::193.233.7.65
1118 ...
1119
1120 */
1121
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -07001122static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
1123 unsigned short type,
1124 const void *daddr, const void *saddr, unsigned len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001125{
Patrick McHardy2941a482006-01-08 22:05:26 -08001126 struct ip_tunnel *t = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001127 struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
Al Virod5a0a1e2006-11-08 00:23:14 -08001128 __be16 *p = (__be16*)(iph+1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001129
1130 memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
1131 p[0] = t->parms.o_flags;
1132 p[1] = htons(type);
1133
1134 /*
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001135 * Set the source hardware address.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001136 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001137
Linus Torvalds1da177e2005-04-16 15:20:36 -07001138 if (saddr)
1139 memcpy(&iph->saddr, saddr, 4);
1140
1141 if (daddr) {
1142 memcpy(&iph->daddr, daddr, 4);
1143 return t->hlen;
1144 }
Joe Perchesf97c1e02007-12-16 13:45:43 -08001145 if (iph->daddr && !ipv4_is_multicast(iph->daddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001146 return t->hlen;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001147
Linus Torvalds1da177e2005-04-16 15:20:36 -07001148 return -t->hlen;
1149}
1150
Timo Teras6a5f44d2007-10-23 20:31:53 -07001151static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
1152{
Jianjun Kong6ed25332008-11-03 00:25:16 -08001153 struct iphdr *iph = (struct iphdr *) skb_mac_header(skb);
Timo Teras6a5f44d2007-10-23 20:31:53 -07001154 memcpy(haddr, &iph->saddr, 4);
1155 return 4;
1156}
1157
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -07001158static const struct header_ops ipgre_header_ops = {
1159 .create = ipgre_header,
Timo Teras6a5f44d2007-10-23 20:31:53 -07001160 .parse = ipgre_header_parse,
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -07001161};
1162
Timo Teras6a5f44d2007-10-23 20:31:53 -07001163#ifdef CONFIG_NET_IPGRE_BROADCAST
Linus Torvalds1da177e2005-04-16 15:20:36 -07001164static int ipgre_open(struct net_device *dev)
1165{
Patrick McHardy2941a482006-01-08 22:05:26 -08001166 struct ip_tunnel *t = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001167
Joe Perchesf97c1e02007-12-16 13:45:43 -08001168 if (ipv4_is_multicast(t->parms.iph.daddr)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001169 struct flowi fl = { .oif = t->parms.link,
1170 .nl_u = { .ip4_u =
1171 { .daddr = t->parms.iph.daddr,
1172 .saddr = t->parms.iph.saddr,
1173 .tos = RT_TOS(t->parms.iph.tos) } },
1174 .proto = IPPROTO_GRE };
1175 struct rtable *rt;
Pavel Emelyanov96635522008-04-16 01:10:44 -07001176 if (ip_route_output_key(dev_net(dev), &rt, &fl))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001177 return -EADDRNOTAVAIL;
1178 dev = rt->u.dst.dev;
1179 ip_rt_put(rt);
Herbert Xue5ed6392005-10-03 14:35:55 -07001180 if (__in_dev_get_rtnl(dev) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001181 return -EADDRNOTAVAIL;
1182 t->mlink = dev->ifindex;
Herbert Xue5ed6392005-10-03 14:35:55 -07001183 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001184 }
1185 return 0;
1186}
1187
1188static int ipgre_close(struct net_device *dev)
1189{
Patrick McHardy2941a482006-01-08 22:05:26 -08001190 struct ip_tunnel *t = netdev_priv(dev);
Stephen Hemmingerb8c26a32008-11-20 20:34:29 -08001191
Joe Perchesf97c1e02007-12-16 13:45:43 -08001192 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
Denis V. Lunev7fee0ca2008-01-21 17:32:38 -08001193 struct in_device *in_dev;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001194 in_dev = inetdev_by_index(dev_net(dev), t->mlink);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001195 if (in_dev) {
1196 ip_mc_dec_group(in_dev, t->parms.iph.daddr);
1197 in_dev_put(in_dev);
1198 }
1199 }
1200 return 0;
1201}
1202
1203#endif
1204
Stephen Hemmingerb8c26a32008-11-20 20:34:29 -08001205static const struct net_device_ops ipgre_netdev_ops = {
1206 .ndo_init = ipgre_tunnel_init,
1207 .ndo_uninit = ipgre_tunnel_uninit,
1208#ifdef CONFIG_NET_IPGRE_BROADCAST
1209 .ndo_open = ipgre_open,
1210 .ndo_stop = ipgre_close,
1211#endif
1212 .ndo_start_xmit = ipgre_tunnel_xmit,
1213 .ndo_do_ioctl = ipgre_tunnel_ioctl,
1214 .ndo_change_mtu = ipgre_tunnel_change_mtu,
1215};
1216
Linus Torvalds1da177e2005-04-16 15:20:36 -07001217static void ipgre_tunnel_setup(struct net_device *dev)
1218{
Stephen Hemmingerb8c26a32008-11-20 20:34:29 -08001219 dev->netdev_ops = &ipgre_netdev_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001220 dev->destructor = free_netdev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001221
1222 dev->type = ARPHRD_IPGRE;
Herbert Xuc95b8192008-10-09 11:58:54 -07001223 dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
Kris Katterjohn46f25df2006-01-05 16:35:42 -08001224 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001225 dev->flags = IFF_NOARP;
1226 dev->iflink = 0;
1227 dev->addr_len = 4;
Pavel Emelyanov0b67ece2008-04-16 01:11:13 -07001228 dev->features |= NETIF_F_NETNS_LOCAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001229}
1230
1231static int ipgre_tunnel_init(struct net_device *dev)
1232{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001233 struct ip_tunnel *tunnel;
1234 struct iphdr *iph;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001235
Patrick McHardy2941a482006-01-08 22:05:26 -08001236 tunnel = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001237 iph = &tunnel->parms.iph;
1238
1239 tunnel->dev = dev;
1240 strcpy(tunnel->parms.name, dev->name);
1241
1242 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
1243 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
1244
Linus Torvalds1da177e2005-04-16 15:20:36 -07001245 if (iph->daddr) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001246#ifdef CONFIG_NET_IPGRE_BROADCAST
Joe Perchesf97c1e02007-12-16 13:45:43 -08001247 if (ipv4_is_multicast(iph->daddr)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001248 if (!iph->saddr)
1249 return -EINVAL;
1250 dev->flags = IFF_BROADCAST;
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -07001251 dev->header_ops = &ipgre_header_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001252 }
1253#endif
Michal Schmidtee34c1e2007-12-13 09:46:32 -08001254 } else
Timo Teras6a5f44d2007-10-23 20:31:53 -07001255 dev->header_ops = &ipgre_header_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001256
Linus Torvalds1da177e2005-04-16 15:20:36 -07001257 return 0;
1258}
1259
Stephen Hemmingerb8c26a32008-11-20 20:34:29 -08001260static void ipgre_fb_tunnel_init(struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001261{
Patrick McHardy2941a482006-01-08 22:05:26 -08001262 struct ip_tunnel *tunnel = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001263 struct iphdr *iph = &tunnel->parms.iph;
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -07001264 struct ipgre_net *ign = net_generic(dev_net(dev), ipgre_net_id);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001265
1266 tunnel->dev = dev;
1267 strcpy(tunnel->parms.name, dev->name);
1268
1269 iph->version = 4;
1270 iph->protocol = IPPROTO_GRE;
1271 iph->ihl = 5;
1272 tunnel->hlen = sizeof(struct iphdr) + 4;
1273
1274 dev_hold(dev);
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -07001275 ign->tunnels_wc[0] = tunnel;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001276}
1277
1278
1279static struct net_protocol ipgre_protocol = {
1280 .handler = ipgre_rcv,
1281 .err_handler = ipgre_err,
Pavel Emelyanovf96c1482008-04-16 01:11:36 -07001282 .netns_ok = 1,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001283};
1284
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -07001285static void ipgre_destroy_tunnels(struct ipgre_net *ign)
1286{
1287 int prio;
1288
1289 for (prio = 0; prio < 4; prio++) {
1290 int h;
1291 for (h = 0; h < HASH_SIZE; h++) {
1292 struct ip_tunnel *t;
1293 while ((t = ign->tunnels[prio][h]) != NULL)
1294 unregister_netdevice(t->dev);
1295 }
1296 }
1297}
1298
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001299static int ipgre_init_net(struct net *net)
1300{
1301 int err;
1302 struct ipgre_net *ign;
1303
1304 err = -ENOMEM;
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -07001305 ign = kzalloc(sizeof(struct ipgre_net), GFP_KERNEL);
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001306 if (ign == NULL)
1307 goto err_alloc;
1308
1309 err = net_assign_generic(net, ipgre_net_id, ign);
1310 if (err < 0)
1311 goto err_assign;
1312
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001313 ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0",
1314 ipgre_tunnel_setup);
1315 if (!ign->fb_tunnel_dev) {
1316 err = -ENOMEM;
1317 goto err_alloc_dev;
1318 }
Alexey Dobriyanbe77e592008-11-23 17:26:26 -08001319 dev_net_set(ign->fb_tunnel_dev, net);
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001320
Stephen Hemmingerb8c26a32008-11-20 20:34:29 -08001321 ipgre_fb_tunnel_init(ign->fb_tunnel_dev);
Herbert Xuc19e6542008-10-09 11:59:55 -07001322 ign->fb_tunnel_dev->rtnl_link_ops = &ipgre_link_ops;
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001323
1324 if ((err = register_netdev(ign->fb_tunnel_dev)))
1325 goto err_reg_dev;
1326
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001327 return 0;
1328
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001329err_reg_dev:
1330 free_netdev(ign->fb_tunnel_dev);
1331err_alloc_dev:
1332 /* nothing */
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001333err_assign:
1334 kfree(ign);
1335err_alloc:
1336 return err;
1337}
1338
1339static void ipgre_exit_net(struct net *net)
1340{
1341 struct ipgre_net *ign;
1342
1343 ign = net_generic(net, ipgre_net_id);
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001344 rtnl_lock();
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -07001345 ipgre_destroy_tunnels(ign);
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001346 rtnl_unlock();
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001347 kfree(ign);
1348}
1349
1350static struct pernet_operations ipgre_net_ops = {
1351 .init = ipgre_init_net,
1352 .exit = ipgre_exit_net,
1353};
Linus Torvalds1da177e2005-04-16 15:20:36 -07001354
Herbert Xuc19e6542008-10-09 11:59:55 -07001355static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
1356{
1357 __be16 flags;
1358
1359 if (!data)
1360 return 0;
1361
1362 flags = 0;
1363 if (data[IFLA_GRE_IFLAGS])
1364 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1365 if (data[IFLA_GRE_OFLAGS])
1366 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1367 if (flags & (GRE_VERSION|GRE_ROUTING))
1368 return -EINVAL;
1369
1370 return 0;
1371}
1372
Herbert Xue1a80002008-10-09 12:00:17 -07001373static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
1374{
1375 __be32 daddr;
1376
1377 if (tb[IFLA_ADDRESS]) {
1378 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1379 return -EINVAL;
1380 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1381 return -EADDRNOTAVAIL;
1382 }
1383
1384 if (!data)
1385 goto out;
1386
1387 if (data[IFLA_GRE_REMOTE]) {
1388 memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
1389 if (!daddr)
1390 return -EINVAL;
1391 }
1392
1393out:
1394 return ipgre_tunnel_validate(tb, data);
1395}
1396
Herbert Xuc19e6542008-10-09 11:59:55 -07001397static void ipgre_netlink_parms(struct nlattr *data[],
1398 struct ip_tunnel_parm *parms)
1399{
Herbert Xu7bb82d92008-10-11 12:20:15 -07001400 memset(parms, 0, sizeof(*parms));
Herbert Xuc19e6542008-10-09 11:59:55 -07001401
1402 parms->iph.protocol = IPPROTO_GRE;
1403
1404 if (!data)
1405 return;
1406
1407 if (data[IFLA_GRE_LINK])
1408 parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1409
1410 if (data[IFLA_GRE_IFLAGS])
1411 parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]);
1412
1413 if (data[IFLA_GRE_OFLAGS])
1414 parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]);
1415
1416 if (data[IFLA_GRE_IKEY])
1417 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1418
1419 if (data[IFLA_GRE_OKEY])
1420 parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1421
1422 if (data[IFLA_GRE_LOCAL])
Patrick McHardy4d74f8b2008-10-10 12:11:06 -07001423 parms->iph.saddr = nla_get_be32(data[IFLA_GRE_LOCAL]);
Herbert Xuc19e6542008-10-09 11:59:55 -07001424
1425 if (data[IFLA_GRE_REMOTE])
Patrick McHardy4d74f8b2008-10-10 12:11:06 -07001426 parms->iph.daddr = nla_get_be32(data[IFLA_GRE_REMOTE]);
Herbert Xuc19e6542008-10-09 11:59:55 -07001427
1428 if (data[IFLA_GRE_TTL])
1429 parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
1430
1431 if (data[IFLA_GRE_TOS])
1432 parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
1433
1434 if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC]))
1435 parms->iph.frag_off = htons(IP_DF);
1436}
1437
Herbert Xue1a80002008-10-09 12:00:17 -07001438static int ipgre_tap_init(struct net_device *dev)
1439{
1440 struct ip_tunnel *tunnel;
1441
1442 tunnel = netdev_priv(dev);
1443
1444 tunnel->dev = dev;
1445 strcpy(tunnel->parms.name, dev->name);
1446
1447 ipgre_tunnel_bind_dev(dev);
1448
1449 return 0;
1450}
1451
Stephen Hemmingerb8c26a32008-11-20 20:34:29 -08001452static const struct net_device_ops ipgre_tap_netdev_ops = {
1453 .ndo_init = ipgre_tap_init,
1454 .ndo_uninit = ipgre_tunnel_uninit,
1455 .ndo_start_xmit = ipgre_tunnel_xmit,
1456 .ndo_set_mac_address = eth_mac_addr,
1457 .ndo_validate_addr = eth_validate_addr,
1458 .ndo_change_mtu = ipgre_tunnel_change_mtu,
1459};
1460
Herbert Xue1a80002008-10-09 12:00:17 -07001461static void ipgre_tap_setup(struct net_device *dev)
1462{
1463
1464 ether_setup(dev);
1465
Stephen Hemmingerb8c26a32008-11-20 20:34:29 -08001466 dev->netdev_ops = &ipgre_netdev_ops;
Herbert Xue1a80002008-10-09 12:00:17 -07001467 dev->destructor = free_netdev;
Herbert Xue1a80002008-10-09 12:00:17 -07001468
1469 dev->iflink = 0;
1470 dev->features |= NETIF_F_NETNS_LOCAL;
1471}
1472
Herbert Xuc19e6542008-10-09 11:59:55 -07001473static int ipgre_newlink(struct net_device *dev, struct nlattr *tb[],
1474 struct nlattr *data[])
1475{
1476 struct ip_tunnel *nt;
1477 struct net *net = dev_net(dev);
1478 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1479 int mtu;
1480 int err;
1481
1482 nt = netdev_priv(dev);
1483 ipgre_netlink_parms(data, &nt->parms);
1484
Herbert Xue1a80002008-10-09 12:00:17 -07001485 if (ipgre_tunnel_find(net, &nt->parms, dev->type))
Herbert Xuc19e6542008-10-09 11:59:55 -07001486 return -EEXIST;
1487
Herbert Xue1a80002008-10-09 12:00:17 -07001488 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1489 random_ether_addr(dev->dev_addr);
1490
Herbert Xuc19e6542008-10-09 11:59:55 -07001491 mtu = ipgre_tunnel_bind_dev(dev);
1492 if (!tb[IFLA_MTU])
1493 dev->mtu = mtu;
1494
1495 err = register_netdevice(dev);
1496 if (err)
1497 goto out;
1498
1499 dev_hold(dev);
1500 ipgre_tunnel_link(ign, nt);
1501
1502out:
1503 return err;
1504}
1505
1506static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1507 struct nlattr *data[])
1508{
1509 struct ip_tunnel *t, *nt;
1510 struct net *net = dev_net(dev);
1511 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1512 struct ip_tunnel_parm p;
1513 int mtu;
1514
1515 if (dev == ign->fb_tunnel_dev)
1516 return -EINVAL;
1517
1518 nt = netdev_priv(dev);
1519 ipgre_netlink_parms(data, &p);
1520
1521 t = ipgre_tunnel_locate(net, &p, 0);
1522
1523 if (t) {
1524 if (t->dev != dev)
1525 return -EEXIST;
1526 } else {
1527 unsigned nflags = 0;
1528
1529 t = nt;
1530
1531 if (ipv4_is_multicast(p.iph.daddr))
1532 nflags = IFF_BROADCAST;
1533 else if (p.iph.daddr)
1534 nflags = IFF_POINTOPOINT;
1535
1536 if ((dev->flags ^ nflags) &
1537 (IFF_POINTOPOINT | IFF_BROADCAST))
1538 return -EINVAL;
1539
1540 ipgre_tunnel_unlink(ign, t);
1541 t->parms.iph.saddr = p.iph.saddr;
1542 t->parms.iph.daddr = p.iph.daddr;
1543 t->parms.i_key = p.i_key;
1544 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1545 memcpy(dev->broadcast, &p.iph.daddr, 4);
1546 ipgre_tunnel_link(ign, t);
1547 netdev_state_change(dev);
1548 }
1549
1550 t->parms.o_key = p.o_key;
1551 t->parms.iph.ttl = p.iph.ttl;
1552 t->parms.iph.tos = p.iph.tos;
1553 t->parms.iph.frag_off = p.iph.frag_off;
1554
1555 if (t->parms.link != p.link) {
1556 t->parms.link = p.link;
1557 mtu = ipgre_tunnel_bind_dev(dev);
1558 if (!tb[IFLA_MTU])
1559 dev->mtu = mtu;
1560 netdev_state_change(dev);
1561 }
1562
1563 return 0;
1564}
1565
1566static size_t ipgre_get_size(const struct net_device *dev)
1567{
1568 return
1569 /* IFLA_GRE_LINK */
1570 nla_total_size(4) +
1571 /* IFLA_GRE_IFLAGS */
1572 nla_total_size(2) +
1573 /* IFLA_GRE_OFLAGS */
1574 nla_total_size(2) +
1575 /* IFLA_GRE_IKEY */
1576 nla_total_size(4) +
1577 /* IFLA_GRE_OKEY */
1578 nla_total_size(4) +
1579 /* IFLA_GRE_LOCAL */
1580 nla_total_size(4) +
1581 /* IFLA_GRE_REMOTE */
1582 nla_total_size(4) +
1583 /* IFLA_GRE_TTL */
1584 nla_total_size(1) +
1585 /* IFLA_GRE_TOS */
1586 nla_total_size(1) +
1587 /* IFLA_GRE_PMTUDISC */
1588 nla_total_size(1) +
1589 0;
1590}
1591
1592static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1593{
1594 struct ip_tunnel *t = netdev_priv(dev);
1595 struct ip_tunnel_parm *p = &t->parms;
1596
1597 NLA_PUT_U32(skb, IFLA_GRE_LINK, p->link);
1598 NLA_PUT_BE16(skb, IFLA_GRE_IFLAGS, p->i_flags);
1599 NLA_PUT_BE16(skb, IFLA_GRE_OFLAGS, p->o_flags);
Patrick McHardyba9e64b2008-10-10 12:10:30 -07001600 NLA_PUT_BE32(skb, IFLA_GRE_IKEY, p->i_key);
1601 NLA_PUT_BE32(skb, IFLA_GRE_OKEY, p->o_key);
Patrick McHardy4d74f8b2008-10-10 12:11:06 -07001602 NLA_PUT_BE32(skb, IFLA_GRE_LOCAL, p->iph.saddr);
1603 NLA_PUT_BE32(skb, IFLA_GRE_REMOTE, p->iph.daddr);
Herbert Xuc19e6542008-10-09 11:59:55 -07001604 NLA_PUT_U8(skb, IFLA_GRE_TTL, p->iph.ttl);
1605 NLA_PUT_U8(skb, IFLA_GRE_TOS, p->iph.tos);
1606 NLA_PUT_U8(skb, IFLA_GRE_PMTUDISC, !!(p->iph.frag_off & htons(IP_DF)));
1607
1608 return 0;
1609
1610nla_put_failure:
1611 return -EMSGSIZE;
1612}
1613
1614static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1615 [IFLA_GRE_LINK] = { .type = NLA_U32 },
1616 [IFLA_GRE_IFLAGS] = { .type = NLA_U16 },
1617 [IFLA_GRE_OFLAGS] = { .type = NLA_U16 },
1618 [IFLA_GRE_IKEY] = { .type = NLA_U32 },
1619 [IFLA_GRE_OKEY] = { .type = NLA_U32 },
Patrick McHardy4d74f8b2008-10-10 12:11:06 -07001620 [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
1621 [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
Herbert Xuc19e6542008-10-09 11:59:55 -07001622 [IFLA_GRE_TTL] = { .type = NLA_U8 },
1623 [IFLA_GRE_TOS] = { .type = NLA_U8 },
1624 [IFLA_GRE_PMTUDISC] = { .type = NLA_U8 },
1625};
1626
1627static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1628 .kind = "gre",
1629 .maxtype = IFLA_GRE_MAX,
1630 .policy = ipgre_policy,
1631 .priv_size = sizeof(struct ip_tunnel),
1632 .setup = ipgre_tunnel_setup,
1633 .validate = ipgre_tunnel_validate,
1634 .newlink = ipgre_newlink,
1635 .changelink = ipgre_changelink,
1636 .get_size = ipgre_get_size,
1637 .fill_info = ipgre_fill_info,
1638};
1639
Herbert Xue1a80002008-10-09 12:00:17 -07001640static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1641 .kind = "gretap",
1642 .maxtype = IFLA_GRE_MAX,
1643 .policy = ipgre_policy,
1644 .priv_size = sizeof(struct ip_tunnel),
1645 .setup = ipgre_tap_setup,
1646 .validate = ipgre_tap_validate,
1647 .newlink = ipgre_newlink,
1648 .changelink = ipgre_changelink,
1649 .get_size = ipgre_get_size,
1650 .fill_info = ipgre_fill_info,
1651};
1652
Linus Torvalds1da177e2005-04-16 15:20:36 -07001653/*
1654 * And now the modules code and kernel interface.
1655 */
1656
1657static int __init ipgre_init(void)
1658{
1659 int err;
1660
1661 printk(KERN_INFO "GRE over IPv4 tunneling driver\n");
1662
1663 if (inet_add_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) {
1664 printk(KERN_INFO "ipgre init: can't add protocol\n");
1665 return -EAGAIN;
1666 }
1667
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001668 err = register_pernet_gen_device(&ipgre_net_id, &ipgre_net_ops);
1669 if (err < 0)
Herbert Xuc19e6542008-10-09 11:59:55 -07001670 goto gen_device_failed;
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001671
Herbert Xuc19e6542008-10-09 11:59:55 -07001672 err = rtnl_link_register(&ipgre_link_ops);
1673 if (err < 0)
1674 goto rtnl_link_failed;
1675
Herbert Xue1a80002008-10-09 12:00:17 -07001676 err = rtnl_link_register(&ipgre_tap_ops);
1677 if (err < 0)
1678 goto tap_ops_failed;
1679
Herbert Xuc19e6542008-10-09 11:59:55 -07001680out:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001681 return err;
Herbert Xuc19e6542008-10-09 11:59:55 -07001682
Herbert Xue1a80002008-10-09 12:00:17 -07001683tap_ops_failed:
1684 rtnl_link_unregister(&ipgre_link_ops);
Herbert Xuc19e6542008-10-09 11:59:55 -07001685rtnl_link_failed:
1686 unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops);
1687gen_device_failed:
1688 inet_del_protocol(&ipgre_protocol, IPPROTO_GRE);
1689 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001690}
1691
Alexey Kuznetsovdb445752005-07-30 17:46:44 -07001692static void __exit ipgre_fini(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001693{
Herbert Xue1a80002008-10-09 12:00:17 -07001694 rtnl_link_unregister(&ipgre_tap_ops);
Herbert Xuc19e6542008-10-09 11:59:55 -07001695 rtnl_link_unregister(&ipgre_link_ops);
1696 unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001697 if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0)
1698 printk(KERN_INFO "ipgre close: can't remove protocol\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001699}
1700
1701module_init(ipgre_init);
1702module_exit(ipgre_fini);
1703MODULE_LICENSE("GPL");
Patrick McHardy4d74f8b2008-10-10 12:11:06 -07001704MODULE_ALIAS_RTNL_LINK("gre");
1705MODULE_ALIAS_RTNL_LINK("gretap");