blob: 1433338526248bdac71566c94e01c1bcd731f190 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002 * Linux NET3: GRE over IP protocol decoder.
Linus Torvalds1da177e2005-04-16 15:20:36 -07003 *
4 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
Randy Dunlap4fc268d2006-01-11 12:17:47 -080013#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070014#include <linux/module.h>
15#include <linux/types.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070016#include <linux/kernel.h>
17#include <asm/uaccess.h>
18#include <linux/skbuff.h>
19#include <linux/netdevice.h>
20#include <linux/in.h>
21#include <linux/tcp.h>
22#include <linux/udp.h>
23#include <linux/if_arp.h>
24#include <linux/mroute.h>
25#include <linux/init.h>
26#include <linux/in6.h>
27#include <linux/inetdevice.h>
28#include <linux/igmp.h>
29#include <linux/netfilter_ipv4.h>
Herbert Xue1a80002008-10-09 12:00:17 -070030#include <linux/etherdevice.h>
Kris Katterjohn46f25df2006-01-05 16:35:42 -080031#include <linux/if_ether.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070032
33#include <net/sock.h>
34#include <net/ip.h>
35#include <net/icmp.h>
36#include <net/protocol.h>
37#include <net/ipip.h>
38#include <net/arp.h>
39#include <net/checksum.h>
40#include <net/dsfield.h>
41#include <net/inet_ecn.h>
42#include <net/xfrm.h>
Pavel Emelyanov59a4c752008-04-16 01:08:53 -070043#include <net/net_namespace.h>
44#include <net/netns/generic.h>
Herbert Xuc19e6542008-10-09 11:59:55 -070045#include <net/rtnetlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070046
47#ifdef CONFIG_IPV6
48#include <net/ipv6.h>
49#include <net/ip6_fib.h>
50#include <net/ip6_route.h>
51#endif
52
53/*
54 Problems & solutions
55 --------------------
56
57 1. The most important issue is detecting local dead loops.
58 They would cause complete host lockup in transmit, which
59 would be "resolved" by stack overflow or, if queueing is enabled,
60 with infinite looping in net_bh.
61
62 We cannot track such dead loops during route installation,
63 it is infeasible task. The most general solutions would be
64 to keep skb->encapsulation counter (sort of local ttl),
65 and silently drop packet when it expires. It is the best
66 solution, but it supposes maintaing new variable in ALL
67 skb, even if no tunneling is used.
68
Eric Dumazeta43912a2009-09-23 10:28:33 +000069 Current solution: HARD_TX_LOCK lock breaks dead loops.
Linus Torvalds1da177e2005-04-16 15:20:36 -070070
71
72
73 2. Networking dead loops would not kill routers, but would really
74 kill network. IP hop limit plays role of "t->recursion" in this case,
75 if we copy it from packet being encapsulated to upper header.
76 It is very good solution, but it introduces two problems:
77
78 - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
79 do not work over tunnels.
80 - traceroute does not work. I planned to relay ICMP from tunnel,
81 so that this problem would be solved and traceroute output
82 would even more informative. This idea appeared to be wrong:
83 only Linux complies to rfc1812 now (yes, guys, Linux is the only
84 true router now :-)), all routers (at least, in neighbourhood of mine)
85 return only 8 bytes of payload. It is the end.
86
87 Hence, if we want that OSPF worked or traceroute said something reasonable,
88 we should search for another solution.
89
90 One of them is to parse packet trying to detect inner encapsulation
91 made by our node. It is difficult or even impossible, especially,
92 taking into account fragmentation. TO be short, tt is not solution at all.
93
94 Current solution: The solution was UNEXPECTEDLY SIMPLE.
95 We force DF flag on tunnels with preconfigured hop limit,
96 that is ALL. :-) Well, it does not remove the problem completely,
97 but exponential growth of network traffic is changed to linear
98 (branches, that exceed pmtu are pruned) and tunnel mtu
99 fastly degrades to value <68, where looping stops.
100 Yes, it is not good if there exists a router in the loop,
101 which does not force DF, even when encapsulating packets have DF set.
102 But it is not our problem! Nobody could accuse us, we made
103 all that we could make. Even if it is your gated who injected
104 fatal route to network, even if it were you who configured
105 fatal static route: you are innocent. :-)
106
107
108
109 3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
110 practically identical code. It would be good to glue them
111 together, but it is not very evident, how to make them modular.
112 sit is integral part of IPv6, ipip and gre are naturally modular.
113 We could extract common parts (hash table, ioctl etc)
114 to a separate module (ip_tunnel.c).
115
116 Alexey Kuznetsov.
117 */
118
Herbert Xuc19e6542008-10-09 11:59:55 -0700119static struct rtnl_link_ops ipgre_link_ops __read_mostly;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700120static int ipgre_tunnel_init(struct net_device *dev);
121static void ipgre_tunnel_setup(struct net_device *dev);
Herbert Xu42aa9162008-10-09 11:59:32 -0700122static int ipgre_tunnel_bind_dev(struct net_device *dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700123
124/* Fallback tunnel: no source, no destination, no key, no options */
125
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700126#define HASH_SIZE 16
127
Pavel Emelyanov59a4c752008-04-16 01:08:53 -0700128static int ipgre_net_id;
129struct ipgre_net {
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700130 struct ip_tunnel *tunnels[4][HASH_SIZE];
131
Pavel Emelyanov7daa0002008-04-16 01:10:05 -0700132 struct net_device *fb_tunnel_dev;
Pavel Emelyanov59a4c752008-04-16 01:08:53 -0700133};
134
Linus Torvalds1da177e2005-04-16 15:20:36 -0700135/* Tunnel hash table */
136
137/*
138 4 hash tables:
139
140 3: (remote,local)
141 2: (remote,*)
142 1: (*,local)
143 0: (*,*)
144
145 We require exact key match i.e. if a key is present in packet
146 it will match only tunnel with the same key; if it is not present,
147 it will match only keyless tunnel.
148
149 All keysless packets, if not matched configured keyless tunnels
150 will match fallback tunnel.
151 */
152
Al Virod5a0a1e2006-11-08 00:23:14 -0800153#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700154
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700155#define tunnels_r_l tunnels[3]
156#define tunnels_r tunnels[2]
157#define tunnels_l tunnels[1]
158#define tunnels_wc tunnels[0]
Linus Torvalds1da177e2005-04-16 15:20:36 -0700159
160static DEFINE_RWLOCK(ipgre_lock);
161
162/* Given src, dst and key, find appropriate for input tunnel. */
163
Timo Teras749c10f2009-01-19 17:22:12 -0800164static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev,
Herbert Xue1a80002008-10-09 12:00:17 -0700165 __be32 remote, __be32 local,
166 __be32 key, __be16 gre_proto)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700167{
Timo Teras749c10f2009-01-19 17:22:12 -0800168 struct net *net = dev_net(dev);
169 int link = dev->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700170 unsigned h0 = HASH(remote);
171 unsigned h1 = HASH(key);
Timo Terasafcf1242009-01-26 20:56:10 -0800172 struct ip_tunnel *t, *cand = NULL;
Pavel Emelyanov7daa0002008-04-16 01:10:05 -0700173 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
Herbert Xue1a80002008-10-09 12:00:17 -0700174 int dev_type = (gre_proto == htons(ETH_P_TEB)) ?
175 ARPHRD_ETHER : ARPHRD_IPGRE;
Timo Terasafcf1242009-01-26 20:56:10 -0800176 int score, cand_score = 4;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700177
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700178 for (t = ign->tunnels_r_l[h0^h1]; t; t = t->next) {
Timo Teras749c10f2009-01-19 17:22:12 -0800179 if (local != t->parms.iph.saddr ||
180 remote != t->parms.iph.daddr ||
181 key != t->parms.i_key ||
182 !(t->dev->flags & IFF_UP))
183 continue;
184
185 if (t->dev->type != ARPHRD_IPGRE &&
186 t->dev->type != dev_type)
187 continue;
188
Timo Terasafcf1242009-01-26 20:56:10 -0800189 score = 0;
Timo Teras749c10f2009-01-19 17:22:12 -0800190 if (t->parms.link != link)
Timo Terasafcf1242009-01-26 20:56:10 -0800191 score |= 1;
Timo Teras749c10f2009-01-19 17:22:12 -0800192 if (t->dev->type != dev_type)
Timo Terasafcf1242009-01-26 20:56:10 -0800193 score |= 2;
194 if (score == 0)
Timo Teras749c10f2009-01-19 17:22:12 -0800195 return t;
Timo Terasafcf1242009-01-26 20:56:10 -0800196
197 if (score < cand_score) {
198 cand = t;
199 cand_score = score;
200 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700201 }
Herbert Xue1a80002008-10-09 12:00:17 -0700202
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700203 for (t = ign->tunnels_r[h0^h1]; t; t = t->next) {
Timo Teras749c10f2009-01-19 17:22:12 -0800204 if (remote != t->parms.iph.daddr ||
205 key != t->parms.i_key ||
206 !(t->dev->flags & IFF_UP))
207 continue;
208
209 if (t->dev->type != ARPHRD_IPGRE &&
210 t->dev->type != dev_type)
211 continue;
212
Timo Terasafcf1242009-01-26 20:56:10 -0800213 score = 0;
Timo Teras749c10f2009-01-19 17:22:12 -0800214 if (t->parms.link != link)
Timo Terasafcf1242009-01-26 20:56:10 -0800215 score |= 1;
Timo Teras749c10f2009-01-19 17:22:12 -0800216 if (t->dev->type != dev_type)
Timo Terasafcf1242009-01-26 20:56:10 -0800217 score |= 2;
218 if (score == 0)
Timo Teras749c10f2009-01-19 17:22:12 -0800219 return t;
Timo Terasafcf1242009-01-26 20:56:10 -0800220
221 if (score < cand_score) {
222 cand = t;
223 cand_score = score;
224 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700225 }
Herbert Xue1a80002008-10-09 12:00:17 -0700226
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700227 for (t = ign->tunnels_l[h1]; t; t = t->next) {
Timo Teras749c10f2009-01-19 17:22:12 -0800228 if ((local != t->parms.iph.saddr &&
229 (local != t->parms.iph.daddr ||
230 !ipv4_is_multicast(local))) ||
231 key != t->parms.i_key ||
232 !(t->dev->flags & IFF_UP))
233 continue;
234
235 if (t->dev->type != ARPHRD_IPGRE &&
236 t->dev->type != dev_type)
237 continue;
238
Timo Terasafcf1242009-01-26 20:56:10 -0800239 score = 0;
Timo Teras749c10f2009-01-19 17:22:12 -0800240 if (t->parms.link != link)
Timo Terasafcf1242009-01-26 20:56:10 -0800241 score |= 1;
Timo Teras749c10f2009-01-19 17:22:12 -0800242 if (t->dev->type != dev_type)
Timo Terasafcf1242009-01-26 20:56:10 -0800243 score |= 2;
244 if (score == 0)
Timo Teras749c10f2009-01-19 17:22:12 -0800245 return t;
Timo Terasafcf1242009-01-26 20:56:10 -0800246
247 if (score < cand_score) {
248 cand = t;
249 cand_score = score;
250 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700251 }
Herbert Xue1a80002008-10-09 12:00:17 -0700252
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700253 for (t = ign->tunnels_wc[h1]; t; t = t->next) {
Timo Teras749c10f2009-01-19 17:22:12 -0800254 if (t->parms.i_key != key ||
255 !(t->dev->flags & IFF_UP))
256 continue;
257
258 if (t->dev->type != ARPHRD_IPGRE &&
259 t->dev->type != dev_type)
260 continue;
261
Timo Terasafcf1242009-01-26 20:56:10 -0800262 score = 0;
Timo Teras749c10f2009-01-19 17:22:12 -0800263 if (t->parms.link != link)
Timo Terasafcf1242009-01-26 20:56:10 -0800264 score |= 1;
Timo Teras749c10f2009-01-19 17:22:12 -0800265 if (t->dev->type != dev_type)
Timo Terasafcf1242009-01-26 20:56:10 -0800266 score |= 2;
267 if (score == 0)
Timo Teras749c10f2009-01-19 17:22:12 -0800268 return t;
Timo Terasafcf1242009-01-26 20:56:10 -0800269
270 if (score < cand_score) {
271 cand = t;
272 cand_score = score;
273 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700274 }
275
Timo Terasafcf1242009-01-26 20:56:10 -0800276 if (cand != NULL)
277 return cand;
Herbert Xue1a80002008-10-09 12:00:17 -0700278
Timo Teras749c10f2009-01-19 17:22:12 -0800279 if (ign->fb_tunnel_dev->flags & IFF_UP)
Pavel Emelyanov7daa0002008-04-16 01:10:05 -0700280 return netdev_priv(ign->fb_tunnel_dev);
Timo Teras749c10f2009-01-19 17:22:12 -0800281
Linus Torvalds1da177e2005-04-16 15:20:36 -0700282 return NULL;
283}
284
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700285static struct ip_tunnel **__ipgre_bucket(struct ipgre_net *ign,
286 struct ip_tunnel_parm *parms)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700287{
YOSHIFUJI Hideaki5056a1e2007-04-24 20:44:48 +0900288 __be32 remote = parms->iph.daddr;
289 __be32 local = parms->iph.saddr;
290 __be32 key = parms->i_key;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700291 unsigned h = HASH(key);
292 int prio = 0;
293
294 if (local)
295 prio |= 1;
Joe Perchesf97c1e02007-12-16 13:45:43 -0800296 if (remote && !ipv4_is_multicast(remote)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700297 prio |= 2;
298 h ^= HASH(remote);
299 }
300
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700301 return &ign->tunnels[prio][h];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700302}
303
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700304static inline struct ip_tunnel **ipgre_bucket(struct ipgre_net *ign,
305 struct ip_tunnel *t)
YOSHIFUJI Hideaki5056a1e2007-04-24 20:44:48 +0900306{
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700307 return __ipgre_bucket(ign, &t->parms);
YOSHIFUJI Hideaki5056a1e2007-04-24 20:44:48 +0900308}
309
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700310static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700311{
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700312 struct ip_tunnel **tp = ipgre_bucket(ign, t);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700313
314 t->next = *tp;
315 write_lock_bh(&ipgre_lock);
316 *tp = t;
317 write_unlock_bh(&ipgre_lock);
318}
319
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700320static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700321{
322 struct ip_tunnel **tp;
323
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700324 for (tp = ipgre_bucket(ign, t); *tp; tp = &(*tp)->next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700325 if (t == *tp) {
326 write_lock_bh(&ipgre_lock);
327 *tp = t->next;
328 write_unlock_bh(&ipgre_lock);
329 break;
330 }
331 }
332}
333
Herbert Xue1a80002008-10-09 12:00:17 -0700334static struct ip_tunnel *ipgre_tunnel_find(struct net *net,
335 struct ip_tunnel_parm *parms,
336 int type)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700337{
Al Virod5a0a1e2006-11-08 00:23:14 -0800338 __be32 remote = parms->iph.daddr;
339 __be32 local = parms->iph.saddr;
340 __be32 key = parms->i_key;
Timo Teras749c10f2009-01-19 17:22:12 -0800341 int link = parms->link;
Herbert Xue1a80002008-10-09 12:00:17 -0700342 struct ip_tunnel *t, **tp;
343 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
344
345 for (tp = __ipgre_bucket(ign, parms); (t = *tp) != NULL; tp = &t->next)
346 if (local == t->parms.iph.saddr &&
347 remote == t->parms.iph.daddr &&
348 key == t->parms.i_key &&
Timo Teras749c10f2009-01-19 17:22:12 -0800349 link == t->parms.link &&
Herbert Xue1a80002008-10-09 12:00:17 -0700350 type == t->dev->type)
351 break;
352
353 return t;
354}
355
356static struct ip_tunnel * ipgre_tunnel_locate(struct net *net,
357 struct ip_tunnel_parm *parms, int create)
358{
359 struct ip_tunnel *t, *nt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700360 struct net_device *dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700361 char name[IFNAMSIZ];
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700362 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700363
Herbert Xue1a80002008-10-09 12:00:17 -0700364 t = ipgre_tunnel_find(net, parms, ARPHRD_IPGRE);
365 if (t || !create)
366 return t;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700367
368 if (parms->name[0])
369 strlcpy(name, parms->name, IFNAMSIZ);
Pavel Emelyanov34cc7ba2008-02-23 20:19:20 -0800370 else
371 sprintf(name, "gre%%d");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700372
373 dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup);
374 if (!dev)
375 return NULL;
376
Pavel Emelyanov0b67ece2008-04-16 01:11:13 -0700377 dev_net_set(dev, net);
378
Pavel Emelyanovb37d428b2008-02-26 23:51:04 -0800379 if (strchr(name, '%')) {
380 if (dev_alloc_name(dev, name) < 0)
381 goto failed_free;
382 }
383
Patrick McHardy2941a482006-01-08 22:05:26 -0800384 nt = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700385 nt->parms = *parms;
Herbert Xuc19e6542008-10-09 11:59:55 -0700386 dev->rtnl_link_ops = &ipgre_link_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700387
Herbert Xu42aa9162008-10-09 11:59:32 -0700388 dev->mtu = ipgre_tunnel_bind_dev(dev);
389
Pavel Emelyanovb37d428b2008-02-26 23:51:04 -0800390 if (register_netdevice(dev) < 0)
391 goto failed_free;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700392
Linus Torvalds1da177e2005-04-16 15:20:36 -0700393 dev_hold(dev);
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700394 ipgre_tunnel_link(ign, nt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700395 return nt;
396
Pavel Emelyanovb37d428b2008-02-26 23:51:04 -0800397failed_free:
398 free_netdev(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700399 return NULL;
400}
401
402static void ipgre_tunnel_uninit(struct net_device *dev)
403{
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700404 struct net *net = dev_net(dev);
405 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
406
407 ipgre_tunnel_unlink(ign, netdev_priv(dev));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700408 dev_put(dev);
409}
410
411
412static void ipgre_err(struct sk_buff *skb, u32 info)
413{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700414
Rami Rosen071f92d2008-05-21 17:47:54 -0700415/* All the routers (except for Linux) return only
Linus Torvalds1da177e2005-04-16 15:20:36 -0700416 8 bytes of packet payload. It means, that precise relaying of
417 ICMP in the real Internet is absolutely infeasible.
418
419 Moreover, Cisco "wise men" put GRE key to the third word
420 in GRE header. It makes impossible maintaining even soft state for keyed
421 GRE tunnels with enabled checksum. Tell them "thank you".
422
423 Well, I wonder, rfc1812 was written by Cisco employee,
424 what the hell these idiots break standrads established
425 by themself???
426 */
427
Jianjun Kong6ed25332008-11-03 00:25:16 -0800428 struct iphdr *iph = (struct iphdr *)skb->data;
Al Virod5a0a1e2006-11-08 00:23:14 -0800429 __be16 *p = (__be16*)(skb->data+(iph->ihl<<2));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700430 int grehlen = (iph->ihl<<2) + 4;
Arnaldo Carvalho de Melo88c76642007-03-13 14:43:18 -0300431 const int type = icmp_hdr(skb)->type;
432 const int code = icmp_hdr(skb)->code;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700433 struct ip_tunnel *t;
Al Virod5a0a1e2006-11-08 00:23:14 -0800434 __be16 flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700435
436 flags = p[0];
437 if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
438 if (flags&(GRE_VERSION|GRE_ROUTING))
439 return;
440 if (flags&GRE_KEY) {
441 grehlen += 4;
442 if (flags&GRE_CSUM)
443 grehlen += 4;
444 }
445 }
446
447 /* If only 8 bytes returned, keyed message will be dropped here */
448 if (skb_headlen(skb) < grehlen)
449 return;
450
451 switch (type) {
452 default:
453 case ICMP_PARAMETERPROB:
454 return;
455
456 case ICMP_DEST_UNREACH:
457 switch (code) {
458 case ICMP_SR_FAILED:
459 case ICMP_PORT_UNREACH:
460 /* Impossible event. */
461 return;
462 case ICMP_FRAG_NEEDED:
463 /* Soft state for pmtu is maintained by IP core. */
464 return;
465 default:
466 /* All others are translated to HOST_UNREACH.
467 rfc2003 contains "deep thoughts" about NET_UNREACH,
468 I believe they are just ether pollution. --ANK
469 */
470 break;
471 }
472 break;
473 case ICMP_TIME_EXCEEDED:
474 if (code != ICMP_EXC_TTL)
475 return;
476 break;
477 }
478
479 read_lock(&ipgre_lock);
Timo Teras749c10f2009-01-19 17:22:12 -0800480 t = ipgre_tunnel_lookup(skb->dev, iph->daddr, iph->saddr,
Herbert Xue1a80002008-10-09 12:00:17 -0700481 flags & GRE_KEY ?
482 *(((__be32 *)p) + (grehlen / 4) - 1) : 0,
483 p[1]);
Joe Perchesf97c1e02007-12-16 13:45:43 -0800484 if (t == NULL || t->parms.iph.daddr == 0 ||
485 ipv4_is_multicast(t->parms.iph.daddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700486 goto out;
487
488 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
489 goto out;
490
Wei Yongjunda6185d82009-02-24 23:34:48 -0800491 if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700492 t->err_count++;
493 else
494 t->err_count = 1;
495 t->err_time = jiffies;
496out:
497 read_unlock(&ipgre_lock);
498 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700499}
500
501static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
502{
503 if (INET_ECN_is_ce(iph->tos)) {
504 if (skb->protocol == htons(ETH_P_IP)) {
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700505 IP_ECN_set_ce(ip_hdr(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700506 } else if (skb->protocol == htons(ETH_P_IPV6)) {
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -0700507 IP6_ECN_set_ce(ipv6_hdr(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700508 }
509 }
510}
511
512static inline u8
513ipgre_ecn_encapsulate(u8 tos, struct iphdr *old_iph, struct sk_buff *skb)
514{
515 u8 inner = 0;
516 if (skb->protocol == htons(ETH_P_IP))
517 inner = old_iph->tos;
518 else if (skb->protocol == htons(ETH_P_IPV6))
519 inner = ipv6_get_dsfield((struct ipv6hdr *)old_iph);
520 return INET_ECN_encapsulate(tos, inner);
521}
522
523static int ipgre_rcv(struct sk_buff *skb)
524{
525 struct iphdr *iph;
526 u8 *h;
Al Virod5a0a1e2006-11-08 00:23:14 -0800527 __be16 flags;
Al Virod3bc23e2006-11-14 21:24:49 -0800528 __sum16 csum = 0;
Al Virod5a0a1e2006-11-08 00:23:14 -0800529 __be32 key = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700530 u32 seqno = 0;
531 struct ip_tunnel *tunnel;
532 int offset = 4;
Herbert Xue1a80002008-10-09 12:00:17 -0700533 __be16 gre_proto;
Herbert Xu64194c32008-10-09 12:03:17 -0700534 unsigned int len;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700535
536 if (!pskb_may_pull(skb, 16))
537 goto drop_nolock;
538
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700539 iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700540 h = skb->data;
Al Virod5a0a1e2006-11-08 00:23:14 -0800541 flags = *(__be16*)h;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700542
543 if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
544 /* - Version must be 0.
545 - We do not support routing headers.
546 */
547 if (flags&(GRE_VERSION|GRE_ROUTING))
548 goto drop_nolock;
549
550 if (flags&GRE_CSUM) {
Herbert Xufb286bb2005-11-10 13:01:24 -0800551 switch (skb->ip_summed) {
Patrick McHardy84fa7932006-08-29 16:44:56 -0700552 case CHECKSUM_COMPLETE:
Al Virod3bc23e2006-11-14 21:24:49 -0800553 csum = csum_fold(skb->csum);
Herbert Xufb286bb2005-11-10 13:01:24 -0800554 if (!csum)
555 break;
556 /* fall through */
557 case CHECKSUM_NONE:
558 skb->csum = 0;
559 csum = __skb_checksum_complete(skb);
Patrick McHardy84fa7932006-08-29 16:44:56 -0700560 skb->ip_summed = CHECKSUM_COMPLETE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700561 }
562 offset += 4;
563 }
564 if (flags&GRE_KEY) {
Al Virod5a0a1e2006-11-08 00:23:14 -0800565 key = *(__be32*)(h + offset);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700566 offset += 4;
567 }
568 if (flags&GRE_SEQ) {
Al Virod5a0a1e2006-11-08 00:23:14 -0800569 seqno = ntohl(*(__be32*)(h + offset));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700570 offset += 4;
571 }
572 }
573
Herbert Xue1a80002008-10-09 12:00:17 -0700574 gre_proto = *(__be16 *)(h + 2);
575
Linus Torvalds1da177e2005-04-16 15:20:36 -0700576 read_lock(&ipgre_lock);
Timo Teras749c10f2009-01-19 17:22:12 -0800577 if ((tunnel = ipgre_tunnel_lookup(skb->dev,
Herbert Xue1a80002008-10-09 12:00:17 -0700578 iph->saddr, iph->daddr, key,
579 gre_proto))) {
Pavel Emelyanovaddd68e2008-05-21 14:14:22 -0700580 struct net_device_stats *stats = &tunnel->dev->stats;
581
Linus Torvalds1da177e2005-04-16 15:20:36 -0700582 secpath_reset(skb);
583
Herbert Xue1a80002008-10-09 12:00:17 -0700584 skb->protocol = gre_proto;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700585 /* WCCP version 1 and 2 protocol decoding.
586 * - Change protocol to IP
587 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
588 */
Herbert Xue1a80002008-10-09 12:00:17 -0700589 if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) {
YOSHIFUJI Hideaki496c98d2006-10-10 19:41:21 -0700590 skb->protocol = htons(ETH_P_IP);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900591 if ((*(h + offset) & 0xF0) != 0x40)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700592 offset += 4;
593 }
594
Timo Teras1d069162007-12-20 00:10:33 -0800595 skb->mac_header = skb->network_header;
Arnaldo Carvalho de Melo4209fb62007-03-10 18:42:03 -0300596 __pskb_pull(skb, offset);
Arnaldo Carvalho de Melo9c702202007-04-25 18:04:18 -0700597 skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700598 skb->pkt_type = PACKET_HOST;
599#ifdef CONFIG_NET_IPGRE_BROADCAST
Joe Perchesf97c1e02007-12-16 13:45:43 -0800600 if (ipv4_is_multicast(iph->daddr)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700601 /* Looped back packet, drop it! */
Eric Dumazet511c3f92009-06-02 05:14:27 +0000602 if (skb_rtable(skb)->fl.iif == 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700603 goto drop;
Pavel Emelyanovaddd68e2008-05-21 14:14:22 -0700604 stats->multicast++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700605 skb->pkt_type = PACKET_BROADCAST;
606 }
607#endif
608
609 if (((flags&GRE_CSUM) && csum) ||
610 (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
Pavel Emelyanovaddd68e2008-05-21 14:14:22 -0700611 stats->rx_crc_errors++;
612 stats->rx_errors++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700613 goto drop;
614 }
615 if (tunnel->parms.i_flags&GRE_SEQ) {
616 if (!(flags&GRE_SEQ) ||
617 (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
Pavel Emelyanovaddd68e2008-05-21 14:14:22 -0700618 stats->rx_fifo_errors++;
619 stats->rx_errors++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700620 goto drop;
621 }
622 tunnel->i_seqno = seqno + 1;
623 }
Herbert Xue1a80002008-10-09 12:00:17 -0700624
Herbert Xu64194c32008-10-09 12:03:17 -0700625 len = skb->len;
626
Herbert Xue1a80002008-10-09 12:00:17 -0700627 /* Warning: All skb pointers will be invalidated! */
628 if (tunnel->dev->type == ARPHRD_ETHER) {
629 if (!pskb_may_pull(skb, ETH_HLEN)) {
630 stats->rx_length_errors++;
631 stats->rx_errors++;
632 goto drop;
633 }
634
635 iph = ip_hdr(skb);
636 skb->protocol = eth_type_trans(skb, tunnel->dev);
637 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
638 }
639
Pavel Emelyanovaddd68e2008-05-21 14:14:22 -0700640 stats->rx_packets++;
Herbert Xu64194c32008-10-09 12:03:17 -0700641 stats->rx_bytes += len;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700642 skb->dev = tunnel->dev;
Eric Dumazetadf30902009-06-02 05:19:30 +0000643 skb_dst_drop(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700644 nf_reset(skb);
Herbert Xue1a80002008-10-09 12:00:17 -0700645
646 skb_reset_network_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700647 ipgre_ecn_decapsulate(iph, skb);
Herbert Xue1a80002008-10-09 12:00:17 -0700648
Linus Torvalds1da177e2005-04-16 15:20:36 -0700649 netif_rx(skb);
650 read_unlock(&ipgre_lock);
651 return(0);
652 }
Herbert Xu45af08b2006-04-05 22:31:19 -0700653 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700654
655drop:
656 read_unlock(&ipgre_lock);
657drop_nolock:
658 kfree_skb(skb);
659 return(0);
660}
661
Stephen Hemminger6fef4c02009-08-31 19:50:41 +0000662static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700663{
Patrick McHardy2941a482006-01-08 22:05:26 -0800664 struct ip_tunnel *tunnel = netdev_priv(dev);
Pavel Emelyanovaddd68e2008-05-21 14:14:22 -0700665 struct net_device_stats *stats = &tunnel->dev->stats;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700666 struct iphdr *old_iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700667 struct iphdr *tiph;
668 u8 tos;
Al Virod5a0a1e2006-11-08 00:23:14 -0800669 __be16 df;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700670 struct rtable *rt; /* Route to the other host */
671 struct net_device *tdev; /* Device to other host */
672 struct iphdr *iph; /* Our new IP header */
Chuck Leverc2636b42007-10-23 21:07:32 -0700673 unsigned int max_headroom; /* The extra header space needed */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700674 int gre_hlen;
Al Virod5a0a1e2006-11-08 00:23:14 -0800675 __be32 dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700676 int mtu;
677
Herbert Xue1a80002008-10-09 12:00:17 -0700678 if (dev->type == ARPHRD_ETHER)
679 IPCB(skb)->flags = 0;
680
681 if (dev->header_ops && dev->type == ARPHRD_IPGRE) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700682 gre_hlen = 0;
Jianjun Kong6ed25332008-11-03 00:25:16 -0800683 tiph = (struct iphdr *)skb->data;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700684 } else {
685 gre_hlen = tunnel->hlen;
686 tiph = &tunnel->parms.iph;
687 }
688
689 if ((dst = tiph->daddr) == 0) {
690 /* NBMA tunnel */
691
Eric Dumazetadf30902009-06-02 05:19:30 +0000692 if (skb_dst(skb) == NULL) {
Pavel Emelyanovaddd68e2008-05-21 14:14:22 -0700693 stats->tx_fifo_errors++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700694 goto tx_error;
695 }
696
697 if (skb->protocol == htons(ETH_P_IP)) {
Eric Dumazet511c3f92009-06-02 05:14:27 +0000698 rt = skb_rtable(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700699 if ((dst = rt->rt_gateway) == 0)
700 goto tx_error_icmp;
701 }
702#ifdef CONFIG_IPV6
703 else if (skb->protocol == htons(ETH_P_IPV6)) {
704 struct in6_addr *addr6;
705 int addr_type;
Eric Dumazetadf30902009-06-02 05:19:30 +0000706 struct neighbour *neigh = skb_dst(skb)->neighbour;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700707
708 if (neigh == NULL)
709 goto tx_error;
710
Jianjun Kong6ed25332008-11-03 00:25:16 -0800711 addr6 = (struct in6_addr *)&neigh->primary_key;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700712 addr_type = ipv6_addr_type(addr6);
713
714 if (addr_type == IPV6_ADDR_ANY) {
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -0700715 addr6 = &ipv6_hdr(skb)->daddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700716 addr_type = ipv6_addr_type(addr6);
717 }
718
719 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
720 goto tx_error_icmp;
721
722 dst = addr6->s6_addr32[3];
723 }
724#endif
725 else
726 goto tx_error;
727 }
728
729 tos = tiph->tos;
Andreas Jaggiee686ca2009-07-14 09:35:59 -0700730 if (tos == 1) {
731 tos = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700732 if (skb->protocol == htons(ETH_P_IP))
733 tos = old_iph->tos;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700734 }
735
736 {
737 struct flowi fl = { .oif = tunnel->parms.link,
738 .nl_u = { .ip4_u =
739 { .daddr = dst,
740 .saddr = tiph->saddr,
741 .tos = RT_TOS(tos) } },
742 .proto = IPPROTO_GRE };
Pavel Emelyanov96635522008-04-16 01:10:44 -0700743 if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
Pavel Emelyanovaddd68e2008-05-21 14:14:22 -0700744 stats->tx_carrier_errors++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700745 goto tx_error;
746 }
747 }
748 tdev = rt->u.dst.dev;
749
750 if (tdev == dev) {
751 ip_rt_put(rt);
Pavel Emelyanovaddd68e2008-05-21 14:14:22 -0700752 stats->collisions++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700753 goto tx_error;
754 }
755
756 df = tiph->frag_off;
757 if (df)
Herbert Xuc95b8192008-10-09 11:58:54 -0700758 mtu = dst_mtu(&rt->u.dst) - dev->hard_header_len - tunnel->hlen;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700759 else
Eric Dumazetadf30902009-06-02 05:19:30 +0000760 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700761
Eric Dumazetadf30902009-06-02 05:19:30 +0000762 if (skb_dst(skb))
763 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700764
765 if (skb->protocol == htons(ETH_P_IP)) {
766 df |= (old_iph->frag_off&htons(IP_DF));
767
768 if ((old_iph->frag_off&htons(IP_DF)) &&
769 mtu < ntohs(old_iph->tot_len)) {
770 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
771 ip_rt_put(rt);
772 goto tx_error;
773 }
774 }
775#ifdef CONFIG_IPV6
776 else if (skb->protocol == htons(ETH_P_IPV6)) {
Eric Dumazetadf30902009-06-02 05:19:30 +0000777 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700778
Eric Dumazetadf30902009-06-02 05:19:30 +0000779 if (rt6 && mtu < dst_mtu(skb_dst(skb)) && mtu >= IPV6_MIN_MTU) {
Joe Perchesf97c1e02007-12-16 13:45:43 -0800780 if ((tunnel->parms.iph.daddr &&
781 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
Linus Torvalds1da177e2005-04-16 15:20:36 -0700782 rt6->rt6i_dst.plen == 128) {
783 rt6->rt6i_flags |= RTF_MODIFIED;
Eric Dumazetadf30902009-06-02 05:19:30 +0000784 skb_dst(skb)->metrics[RTAX_MTU-1] = mtu;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700785 }
786 }
787
788 if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) {
789 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
790 ip_rt_put(rt);
791 goto tx_error;
792 }
793 }
794#endif
795
796 if (tunnel->err_count > 0) {
Wei Yongjunda6185d82009-02-24 23:34:48 -0800797 if (time_before(jiffies,
798 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700799 tunnel->err_count--;
800
801 dst_link_failure(skb);
802 } else
803 tunnel->err_count = 0;
804 }
805
806 max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen;
807
Patrick McHardycfbba492007-07-09 15:33:40 -0700808 if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
809 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700810 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
811 if (!new_skb) {
812 ip_rt_put(rt);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900813 stats->tx_dropped++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700814 dev_kfree_skb(skb);
Patrick McHardy6ed10652009-06-23 06:03:08 +0000815 return NETDEV_TX_OK;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700816 }
817 if (skb->sk)
818 skb_set_owner_w(new_skb, skb->sk);
819 dev_kfree_skb(skb);
820 skb = new_skb;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700821 old_iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700822 }
823
Herbert Xu64194c32008-10-09 12:03:17 -0700824 skb_reset_transport_header(skb);
Arnaldo Carvalho de Meloe2d1bca2007-04-10 20:46:21 -0700825 skb_push(skb, gre_hlen);
826 skb_reset_network_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700827 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
Patrick McHardy48d5cad2006-02-15 15:10:22 -0800828 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
829 IPSKB_REROUTED);
Eric Dumazetadf30902009-06-02 05:19:30 +0000830 skb_dst_drop(skb);
831 skb_dst_set(skb, &rt->u.dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700832
833 /*
834 * Push down and install the IPIP header.
835 */
836
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700837 iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700838 iph->version = 4;
839 iph->ihl = sizeof(struct iphdr) >> 2;
840 iph->frag_off = df;
841 iph->protocol = IPPROTO_GRE;
842 iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb);
843 iph->daddr = rt->rt_dst;
844 iph->saddr = rt->rt_src;
845
846 if ((iph->ttl = tiph->ttl) == 0) {
847 if (skb->protocol == htons(ETH_P_IP))
848 iph->ttl = old_iph->ttl;
849#ifdef CONFIG_IPV6
850 else if (skb->protocol == htons(ETH_P_IPV6))
Jianjun Kong6ed25332008-11-03 00:25:16 -0800851 iph->ttl = ((struct ipv6hdr *)old_iph)->hop_limit;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700852#endif
853 else
854 iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT);
855 }
856
Herbert Xue1a80002008-10-09 12:00:17 -0700857 ((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags;
858 ((__be16 *)(iph + 1))[1] = (dev->type == ARPHRD_ETHER) ?
859 htons(ETH_P_TEB) : skb->protocol;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700860
861 if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
Al Virod5a0a1e2006-11-08 00:23:14 -0800862 __be32 *ptr = (__be32*)(((u8*)iph) + tunnel->hlen - 4);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700863
864 if (tunnel->parms.o_flags&GRE_SEQ) {
865 ++tunnel->o_seqno;
866 *ptr = htonl(tunnel->o_seqno);
867 ptr--;
868 }
869 if (tunnel->parms.o_flags&GRE_KEY) {
870 *ptr = tunnel->parms.o_key;
871 ptr--;
872 }
873 if (tunnel->parms.o_flags&GRE_CSUM) {
874 *ptr = 0;
Al Viro5f92a732006-11-14 21:36:54 -0800875 *(__sum16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700876 }
877 }
878
879 nf_reset(skb);
880
881 IPTUNNEL_XMIT();
Patrick McHardy6ed10652009-06-23 06:03:08 +0000882 return NETDEV_TX_OK;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700883
884tx_error_icmp:
885 dst_link_failure(skb);
886
887tx_error:
888 stats->tx_errors++;
889 dev_kfree_skb(skb);
Patrick McHardy6ed10652009-06-23 06:03:08 +0000890 return NETDEV_TX_OK;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700891}
892
Herbert Xu42aa9162008-10-09 11:59:32 -0700893static int ipgre_tunnel_bind_dev(struct net_device *dev)
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800894{
895 struct net_device *tdev = NULL;
896 struct ip_tunnel *tunnel;
897 struct iphdr *iph;
898 int hlen = LL_MAX_HEADER;
899 int mtu = ETH_DATA_LEN;
900 int addend = sizeof(struct iphdr) + 4;
901
902 tunnel = netdev_priv(dev);
903 iph = &tunnel->parms.iph;
904
Herbert Xuc95b8192008-10-09 11:58:54 -0700905 /* Guess output device to choose reasonable mtu and needed_headroom */
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800906
907 if (iph->daddr) {
908 struct flowi fl = { .oif = tunnel->parms.link,
909 .nl_u = { .ip4_u =
910 { .daddr = iph->daddr,
911 .saddr = iph->saddr,
912 .tos = RT_TOS(iph->tos) } },
913 .proto = IPPROTO_GRE };
914 struct rtable *rt;
Pavel Emelyanov96635522008-04-16 01:10:44 -0700915 if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800916 tdev = rt->u.dst.dev;
917 ip_rt_put(rt);
918 }
Herbert Xue1a80002008-10-09 12:00:17 -0700919
920 if (dev->type != ARPHRD_ETHER)
921 dev->flags |= IFF_POINTOPOINT;
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800922 }
923
924 if (!tdev && tunnel->parms.link)
Pavel Emelyanov96635522008-04-16 01:10:44 -0700925 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800926
927 if (tdev) {
Herbert Xuc95b8192008-10-09 11:58:54 -0700928 hlen = tdev->hard_header_len + tdev->needed_headroom;
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800929 mtu = tdev->mtu;
930 }
931 dev->iflink = tunnel->parms.link;
932
933 /* Precalculate GRE options length */
934 if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
935 if (tunnel->parms.o_flags&GRE_CSUM)
936 addend += 4;
937 if (tunnel->parms.o_flags&GRE_KEY)
938 addend += 4;
939 if (tunnel->parms.o_flags&GRE_SEQ)
940 addend += 4;
941 }
Herbert Xuc95b8192008-10-09 11:58:54 -0700942 dev->needed_headroom = addend + hlen;
Tom Goff8cdb0452009-08-14 16:33:56 -0700943 mtu -= dev->hard_header_len + addend;
Herbert Xu42aa9162008-10-09 11:59:32 -0700944
945 if (mtu < 68)
946 mtu = 68;
947
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800948 tunnel->hlen = addend;
949
Herbert Xu42aa9162008-10-09 11:59:32 -0700950 return mtu;
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800951}
952
Linus Torvalds1da177e2005-04-16 15:20:36 -0700953static int
954ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
955{
956 int err = 0;
957 struct ip_tunnel_parm p;
958 struct ip_tunnel *t;
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700959 struct net *net = dev_net(dev);
960 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700961
962 switch (cmd) {
963 case SIOCGETTUNNEL:
964 t = NULL;
Pavel Emelyanov7daa0002008-04-16 01:10:05 -0700965 if (dev == ign->fb_tunnel_dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700966 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
967 err = -EFAULT;
968 break;
969 }
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700970 t = ipgre_tunnel_locate(net, &p, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700971 }
972 if (t == NULL)
Patrick McHardy2941a482006-01-08 22:05:26 -0800973 t = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700974 memcpy(&p, &t->parms, sizeof(p));
975 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
976 err = -EFAULT;
977 break;
978
979 case SIOCADDTUNNEL:
980 case SIOCCHGTUNNEL:
981 err = -EPERM;
982 if (!capable(CAP_NET_ADMIN))
983 goto done;
984
985 err = -EFAULT;
986 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
987 goto done;
988
989 err = -EINVAL;
990 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
991 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
992 ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
993 goto done;
994 if (p.iph.ttl)
995 p.iph.frag_off |= htons(IP_DF);
996
997 if (!(p.i_flags&GRE_KEY))
998 p.i_key = 0;
999 if (!(p.o_flags&GRE_KEY))
1000 p.o_key = 0;
1001
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -07001002 t = ipgre_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001003
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001004 if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001005 if (t != NULL) {
1006 if (t->dev != dev) {
1007 err = -EEXIST;
1008 break;
1009 }
1010 } else {
Jianjun Kong6ed25332008-11-03 00:25:16 -08001011 unsigned nflags = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001012
Patrick McHardy2941a482006-01-08 22:05:26 -08001013 t = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001014
Joe Perchesf97c1e02007-12-16 13:45:43 -08001015 if (ipv4_is_multicast(p.iph.daddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001016 nflags = IFF_BROADCAST;
1017 else if (p.iph.daddr)
1018 nflags = IFF_POINTOPOINT;
1019
1020 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
1021 err = -EINVAL;
1022 break;
1023 }
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -07001024 ipgre_tunnel_unlink(ign, t);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001025 t->parms.iph.saddr = p.iph.saddr;
1026 t->parms.iph.daddr = p.iph.daddr;
1027 t->parms.i_key = p.i_key;
1028 t->parms.o_key = p.o_key;
1029 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1030 memcpy(dev->broadcast, &p.iph.daddr, 4);
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -07001031 ipgre_tunnel_link(ign, t);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001032 netdev_state_change(dev);
1033 }
1034 }
1035
1036 if (t) {
1037 err = 0;
1038 if (cmd == SIOCCHGTUNNEL) {
1039 t->parms.iph.ttl = p.iph.ttl;
1040 t->parms.iph.tos = p.iph.tos;
1041 t->parms.iph.frag_off = p.iph.frag_off;
Michal Schmidtee34c1e2007-12-13 09:46:32 -08001042 if (t->parms.link != p.link) {
1043 t->parms.link = p.link;
Herbert Xu42aa9162008-10-09 11:59:32 -07001044 dev->mtu = ipgre_tunnel_bind_dev(dev);
Michal Schmidtee34c1e2007-12-13 09:46:32 -08001045 netdev_state_change(dev);
1046 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001047 }
1048 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
1049 err = -EFAULT;
1050 } else
1051 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
1052 break;
1053
1054 case SIOCDELTUNNEL:
1055 err = -EPERM;
1056 if (!capable(CAP_NET_ADMIN))
1057 goto done;
1058
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001059 if (dev == ign->fb_tunnel_dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001060 err = -EFAULT;
1061 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1062 goto done;
1063 err = -ENOENT;
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -07001064 if ((t = ipgre_tunnel_locate(net, &p, 0)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001065 goto done;
1066 err = -EPERM;
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001067 if (t == netdev_priv(ign->fb_tunnel_dev))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001068 goto done;
1069 dev = t->dev;
1070 }
Stephen Hemminger22f8cde2007-02-07 00:09:58 -08001071 unregister_netdevice(dev);
1072 err = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001073 break;
1074
1075 default:
1076 err = -EINVAL;
1077 }
1078
1079done:
1080 return err;
1081}
1082
Linus Torvalds1da177e2005-04-16 15:20:36 -07001083static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1084{
Patrick McHardy2941a482006-01-08 22:05:26 -08001085 struct ip_tunnel *tunnel = netdev_priv(dev);
Herbert Xuc95b8192008-10-09 11:58:54 -07001086 if (new_mtu < 68 ||
1087 new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001088 return -EINVAL;
1089 dev->mtu = new_mtu;
1090 return 0;
1091}
1092
Linus Torvalds1da177e2005-04-16 15:20:36 -07001093/* Nice toy. Unfortunately, useless in real life :-)
1094 It allows to construct virtual multiprotocol broadcast "LAN"
1095 over the Internet, provided multicast routing is tuned.
1096
1097
1098 I have no idea was this bicycle invented before me,
1099 so that I had to set ARPHRD_IPGRE to a random value.
1100 I have an impression, that Cisco could make something similar,
1101 but this feature is apparently missing in IOS<=11.2(8).
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001102
Linus Torvalds1da177e2005-04-16 15:20:36 -07001103 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
1104 with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
1105
1106 ping -t 255 224.66.66.66
1107
1108 If nobody answers, mbone does not work.
1109
1110 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
1111 ip addr add 10.66.66.<somewhat>/24 dev Universe
1112 ifconfig Universe up
1113 ifconfig Universe add fe80::<Your_real_addr>/10
1114 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
1115 ftp 10.66.66.66
1116 ...
1117 ftp fec0:6666:6666::193.233.7.65
1118 ...
1119
1120 */
1121
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -07001122static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
1123 unsigned short type,
1124 const void *daddr, const void *saddr, unsigned len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001125{
Patrick McHardy2941a482006-01-08 22:05:26 -08001126 struct ip_tunnel *t = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001127 struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
Al Virod5a0a1e2006-11-08 00:23:14 -08001128 __be16 *p = (__be16*)(iph+1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001129
1130 memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
1131 p[0] = t->parms.o_flags;
1132 p[1] = htons(type);
1133
1134 /*
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001135 * Set the source hardware address.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001136 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001137
Linus Torvalds1da177e2005-04-16 15:20:36 -07001138 if (saddr)
1139 memcpy(&iph->saddr, saddr, 4);
1140
1141 if (daddr) {
1142 memcpy(&iph->daddr, daddr, 4);
1143 return t->hlen;
1144 }
Joe Perchesf97c1e02007-12-16 13:45:43 -08001145 if (iph->daddr && !ipv4_is_multicast(iph->daddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001146 return t->hlen;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001147
Linus Torvalds1da177e2005-04-16 15:20:36 -07001148 return -t->hlen;
1149}
1150
Timo Teras6a5f44d2007-10-23 20:31:53 -07001151static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
1152{
Jianjun Kong6ed25332008-11-03 00:25:16 -08001153 struct iphdr *iph = (struct iphdr *) skb_mac_header(skb);
Timo Teras6a5f44d2007-10-23 20:31:53 -07001154 memcpy(haddr, &iph->saddr, 4);
1155 return 4;
1156}
1157
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -07001158static const struct header_ops ipgre_header_ops = {
1159 .create = ipgre_header,
Timo Teras6a5f44d2007-10-23 20:31:53 -07001160 .parse = ipgre_header_parse,
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -07001161};
1162
Timo Teras6a5f44d2007-10-23 20:31:53 -07001163#ifdef CONFIG_NET_IPGRE_BROADCAST
Linus Torvalds1da177e2005-04-16 15:20:36 -07001164static int ipgre_open(struct net_device *dev)
1165{
Patrick McHardy2941a482006-01-08 22:05:26 -08001166 struct ip_tunnel *t = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001167
Joe Perchesf97c1e02007-12-16 13:45:43 -08001168 if (ipv4_is_multicast(t->parms.iph.daddr)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001169 struct flowi fl = { .oif = t->parms.link,
1170 .nl_u = { .ip4_u =
1171 { .daddr = t->parms.iph.daddr,
1172 .saddr = t->parms.iph.saddr,
1173 .tos = RT_TOS(t->parms.iph.tos) } },
1174 .proto = IPPROTO_GRE };
1175 struct rtable *rt;
Pavel Emelyanov96635522008-04-16 01:10:44 -07001176 if (ip_route_output_key(dev_net(dev), &rt, &fl))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001177 return -EADDRNOTAVAIL;
1178 dev = rt->u.dst.dev;
1179 ip_rt_put(rt);
Herbert Xue5ed6392005-10-03 14:35:55 -07001180 if (__in_dev_get_rtnl(dev) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001181 return -EADDRNOTAVAIL;
1182 t->mlink = dev->ifindex;
Herbert Xue5ed6392005-10-03 14:35:55 -07001183 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001184 }
1185 return 0;
1186}
1187
1188static int ipgre_close(struct net_device *dev)
1189{
Patrick McHardy2941a482006-01-08 22:05:26 -08001190 struct ip_tunnel *t = netdev_priv(dev);
Stephen Hemmingerb8c26a32008-11-20 20:34:29 -08001191
Joe Perchesf97c1e02007-12-16 13:45:43 -08001192 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
Denis V. Lunev7fee0ca2008-01-21 17:32:38 -08001193 struct in_device *in_dev;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001194 in_dev = inetdev_by_index(dev_net(dev), t->mlink);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001195 if (in_dev) {
1196 ip_mc_dec_group(in_dev, t->parms.iph.daddr);
1197 in_dev_put(in_dev);
1198 }
1199 }
1200 return 0;
1201}
1202
1203#endif
1204
Stephen Hemmingerb8c26a32008-11-20 20:34:29 -08001205static const struct net_device_ops ipgre_netdev_ops = {
1206 .ndo_init = ipgre_tunnel_init,
1207 .ndo_uninit = ipgre_tunnel_uninit,
1208#ifdef CONFIG_NET_IPGRE_BROADCAST
1209 .ndo_open = ipgre_open,
1210 .ndo_stop = ipgre_close,
1211#endif
1212 .ndo_start_xmit = ipgre_tunnel_xmit,
1213 .ndo_do_ioctl = ipgre_tunnel_ioctl,
1214 .ndo_change_mtu = ipgre_tunnel_change_mtu,
1215};
1216
Linus Torvalds1da177e2005-04-16 15:20:36 -07001217static void ipgre_tunnel_setup(struct net_device *dev)
1218{
Stephen Hemmingerb8c26a32008-11-20 20:34:29 -08001219 dev->netdev_ops = &ipgre_netdev_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001220 dev->destructor = free_netdev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001221
1222 dev->type = ARPHRD_IPGRE;
Herbert Xuc95b8192008-10-09 11:58:54 -07001223 dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
Kris Katterjohn46f25df2006-01-05 16:35:42 -08001224 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001225 dev->flags = IFF_NOARP;
1226 dev->iflink = 0;
1227 dev->addr_len = 4;
Pavel Emelyanov0b67ece2008-04-16 01:11:13 -07001228 dev->features |= NETIF_F_NETNS_LOCAL;
Eric Dumazet108bfa82009-05-28 22:35:10 +00001229 dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001230}
1231
1232static int ipgre_tunnel_init(struct net_device *dev)
1233{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001234 struct ip_tunnel *tunnel;
1235 struct iphdr *iph;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001236
Patrick McHardy2941a482006-01-08 22:05:26 -08001237 tunnel = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001238 iph = &tunnel->parms.iph;
1239
1240 tunnel->dev = dev;
1241 strcpy(tunnel->parms.name, dev->name);
1242
1243 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
1244 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
1245
Linus Torvalds1da177e2005-04-16 15:20:36 -07001246 if (iph->daddr) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001247#ifdef CONFIG_NET_IPGRE_BROADCAST
Joe Perchesf97c1e02007-12-16 13:45:43 -08001248 if (ipv4_is_multicast(iph->daddr)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001249 if (!iph->saddr)
1250 return -EINVAL;
1251 dev->flags = IFF_BROADCAST;
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -07001252 dev->header_ops = &ipgre_header_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001253 }
1254#endif
Michal Schmidtee34c1e2007-12-13 09:46:32 -08001255 } else
Timo Teras6a5f44d2007-10-23 20:31:53 -07001256 dev->header_ops = &ipgre_header_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001257
Linus Torvalds1da177e2005-04-16 15:20:36 -07001258 return 0;
1259}
1260
Stephen Hemmingerb8c26a32008-11-20 20:34:29 -08001261static void ipgre_fb_tunnel_init(struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001262{
Patrick McHardy2941a482006-01-08 22:05:26 -08001263 struct ip_tunnel *tunnel = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001264 struct iphdr *iph = &tunnel->parms.iph;
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -07001265 struct ipgre_net *ign = net_generic(dev_net(dev), ipgre_net_id);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001266
1267 tunnel->dev = dev;
1268 strcpy(tunnel->parms.name, dev->name);
1269
1270 iph->version = 4;
1271 iph->protocol = IPPROTO_GRE;
1272 iph->ihl = 5;
1273 tunnel->hlen = sizeof(struct iphdr) + 4;
1274
1275 dev_hold(dev);
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -07001276 ign->tunnels_wc[0] = tunnel;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001277}
1278
1279
Alexey Dobriyan32613092009-09-14 12:21:47 +00001280static const struct net_protocol ipgre_protocol = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001281 .handler = ipgre_rcv,
1282 .err_handler = ipgre_err,
Pavel Emelyanovf96c1482008-04-16 01:11:36 -07001283 .netns_ok = 1,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001284};
1285
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -07001286static void ipgre_destroy_tunnels(struct ipgre_net *ign)
1287{
1288 int prio;
1289
1290 for (prio = 0; prio < 4; prio++) {
1291 int h;
1292 for (h = 0; h < HASH_SIZE; h++) {
1293 struct ip_tunnel *t;
1294 while ((t = ign->tunnels[prio][h]) != NULL)
1295 unregister_netdevice(t->dev);
1296 }
1297 }
1298}
1299
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001300static int ipgre_init_net(struct net *net)
1301{
1302 int err;
1303 struct ipgre_net *ign;
1304
1305 err = -ENOMEM;
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -07001306 ign = kzalloc(sizeof(struct ipgre_net), GFP_KERNEL);
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001307 if (ign == NULL)
1308 goto err_alloc;
1309
1310 err = net_assign_generic(net, ipgre_net_id, ign);
1311 if (err < 0)
1312 goto err_assign;
1313
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001314 ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0",
1315 ipgre_tunnel_setup);
1316 if (!ign->fb_tunnel_dev) {
1317 err = -ENOMEM;
1318 goto err_alloc_dev;
1319 }
Alexey Dobriyanbe77e592008-11-23 17:26:26 -08001320 dev_net_set(ign->fb_tunnel_dev, net);
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001321
Stephen Hemmingerb8c26a32008-11-20 20:34:29 -08001322 ipgre_fb_tunnel_init(ign->fb_tunnel_dev);
Herbert Xuc19e6542008-10-09 11:59:55 -07001323 ign->fb_tunnel_dev->rtnl_link_ops = &ipgre_link_ops;
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001324
1325 if ((err = register_netdev(ign->fb_tunnel_dev)))
1326 goto err_reg_dev;
1327
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001328 return 0;
1329
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001330err_reg_dev:
1331 free_netdev(ign->fb_tunnel_dev);
1332err_alloc_dev:
1333 /* nothing */
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001334err_assign:
1335 kfree(ign);
1336err_alloc:
1337 return err;
1338}
1339
1340static void ipgre_exit_net(struct net *net)
1341{
1342 struct ipgre_net *ign;
1343
1344 ign = net_generic(net, ipgre_net_id);
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001345 rtnl_lock();
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -07001346 ipgre_destroy_tunnels(ign);
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001347 rtnl_unlock();
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001348 kfree(ign);
1349}
1350
1351static struct pernet_operations ipgre_net_ops = {
1352 .init = ipgre_init_net,
1353 .exit = ipgre_exit_net,
1354};
Linus Torvalds1da177e2005-04-16 15:20:36 -07001355
Herbert Xuc19e6542008-10-09 11:59:55 -07001356static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
1357{
1358 __be16 flags;
1359
1360 if (!data)
1361 return 0;
1362
1363 flags = 0;
1364 if (data[IFLA_GRE_IFLAGS])
1365 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1366 if (data[IFLA_GRE_OFLAGS])
1367 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1368 if (flags & (GRE_VERSION|GRE_ROUTING))
1369 return -EINVAL;
1370
1371 return 0;
1372}
1373
Herbert Xue1a80002008-10-09 12:00:17 -07001374static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
1375{
1376 __be32 daddr;
1377
1378 if (tb[IFLA_ADDRESS]) {
1379 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1380 return -EINVAL;
1381 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1382 return -EADDRNOTAVAIL;
1383 }
1384
1385 if (!data)
1386 goto out;
1387
1388 if (data[IFLA_GRE_REMOTE]) {
1389 memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
1390 if (!daddr)
1391 return -EINVAL;
1392 }
1393
1394out:
1395 return ipgre_tunnel_validate(tb, data);
1396}
1397
Herbert Xuc19e6542008-10-09 11:59:55 -07001398static void ipgre_netlink_parms(struct nlattr *data[],
1399 struct ip_tunnel_parm *parms)
1400{
Herbert Xu7bb82d92008-10-11 12:20:15 -07001401 memset(parms, 0, sizeof(*parms));
Herbert Xuc19e6542008-10-09 11:59:55 -07001402
1403 parms->iph.protocol = IPPROTO_GRE;
1404
1405 if (!data)
1406 return;
1407
1408 if (data[IFLA_GRE_LINK])
1409 parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1410
1411 if (data[IFLA_GRE_IFLAGS])
1412 parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]);
1413
1414 if (data[IFLA_GRE_OFLAGS])
1415 parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]);
1416
1417 if (data[IFLA_GRE_IKEY])
1418 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1419
1420 if (data[IFLA_GRE_OKEY])
1421 parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1422
1423 if (data[IFLA_GRE_LOCAL])
Patrick McHardy4d74f8b2008-10-10 12:11:06 -07001424 parms->iph.saddr = nla_get_be32(data[IFLA_GRE_LOCAL]);
Herbert Xuc19e6542008-10-09 11:59:55 -07001425
1426 if (data[IFLA_GRE_REMOTE])
Patrick McHardy4d74f8b2008-10-10 12:11:06 -07001427 parms->iph.daddr = nla_get_be32(data[IFLA_GRE_REMOTE]);
Herbert Xuc19e6542008-10-09 11:59:55 -07001428
1429 if (data[IFLA_GRE_TTL])
1430 parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
1431
1432 if (data[IFLA_GRE_TOS])
1433 parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
1434
1435 if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC]))
1436 parms->iph.frag_off = htons(IP_DF);
1437}
1438
Herbert Xue1a80002008-10-09 12:00:17 -07001439static int ipgre_tap_init(struct net_device *dev)
1440{
1441 struct ip_tunnel *tunnel;
1442
1443 tunnel = netdev_priv(dev);
1444
1445 tunnel->dev = dev;
1446 strcpy(tunnel->parms.name, dev->name);
1447
1448 ipgre_tunnel_bind_dev(dev);
1449
1450 return 0;
1451}
1452
Stephen Hemmingerb8c26a32008-11-20 20:34:29 -08001453static const struct net_device_ops ipgre_tap_netdev_ops = {
1454 .ndo_init = ipgre_tap_init,
1455 .ndo_uninit = ipgre_tunnel_uninit,
1456 .ndo_start_xmit = ipgre_tunnel_xmit,
1457 .ndo_set_mac_address = eth_mac_addr,
1458 .ndo_validate_addr = eth_validate_addr,
1459 .ndo_change_mtu = ipgre_tunnel_change_mtu,
1460};
1461
Herbert Xue1a80002008-10-09 12:00:17 -07001462static void ipgre_tap_setup(struct net_device *dev)
1463{
1464
1465 ether_setup(dev);
1466
Herbert Xu2e9526b2009-10-30 05:51:48 +00001467 dev->netdev_ops = &ipgre_tap_netdev_ops;
Herbert Xue1a80002008-10-09 12:00:17 -07001468 dev->destructor = free_netdev;
Herbert Xue1a80002008-10-09 12:00:17 -07001469
1470 dev->iflink = 0;
1471 dev->features |= NETIF_F_NETNS_LOCAL;
1472}
1473
Herbert Xuc19e6542008-10-09 11:59:55 -07001474static int ipgre_newlink(struct net_device *dev, struct nlattr *tb[],
1475 struct nlattr *data[])
1476{
1477 struct ip_tunnel *nt;
1478 struct net *net = dev_net(dev);
1479 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1480 int mtu;
1481 int err;
1482
1483 nt = netdev_priv(dev);
1484 ipgre_netlink_parms(data, &nt->parms);
1485
Herbert Xue1a80002008-10-09 12:00:17 -07001486 if (ipgre_tunnel_find(net, &nt->parms, dev->type))
Herbert Xuc19e6542008-10-09 11:59:55 -07001487 return -EEXIST;
1488
Herbert Xue1a80002008-10-09 12:00:17 -07001489 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1490 random_ether_addr(dev->dev_addr);
1491
Herbert Xuc19e6542008-10-09 11:59:55 -07001492 mtu = ipgre_tunnel_bind_dev(dev);
1493 if (!tb[IFLA_MTU])
1494 dev->mtu = mtu;
1495
1496 err = register_netdevice(dev);
1497 if (err)
1498 goto out;
1499
1500 dev_hold(dev);
1501 ipgre_tunnel_link(ign, nt);
1502
1503out:
1504 return err;
1505}
1506
1507static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1508 struct nlattr *data[])
1509{
1510 struct ip_tunnel *t, *nt;
1511 struct net *net = dev_net(dev);
1512 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1513 struct ip_tunnel_parm p;
1514 int mtu;
1515
1516 if (dev == ign->fb_tunnel_dev)
1517 return -EINVAL;
1518
1519 nt = netdev_priv(dev);
1520 ipgre_netlink_parms(data, &p);
1521
1522 t = ipgre_tunnel_locate(net, &p, 0);
1523
1524 if (t) {
1525 if (t->dev != dev)
1526 return -EEXIST;
1527 } else {
Herbert Xuc19e6542008-10-09 11:59:55 -07001528 t = nt;
1529
Herbert Xu2e9526b2009-10-30 05:51:48 +00001530 if (dev->type != ARPHRD_ETHER) {
1531 unsigned nflags = 0;
Herbert Xuc19e6542008-10-09 11:59:55 -07001532
Herbert Xu2e9526b2009-10-30 05:51:48 +00001533 if (ipv4_is_multicast(p.iph.daddr))
1534 nflags = IFF_BROADCAST;
1535 else if (p.iph.daddr)
1536 nflags = IFF_POINTOPOINT;
1537
1538 if ((dev->flags ^ nflags) &
1539 (IFF_POINTOPOINT | IFF_BROADCAST))
1540 return -EINVAL;
1541 }
Herbert Xuc19e6542008-10-09 11:59:55 -07001542
1543 ipgre_tunnel_unlink(ign, t);
1544 t->parms.iph.saddr = p.iph.saddr;
1545 t->parms.iph.daddr = p.iph.daddr;
1546 t->parms.i_key = p.i_key;
Herbert Xu2e9526b2009-10-30 05:51:48 +00001547 if (dev->type != ARPHRD_ETHER) {
1548 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1549 memcpy(dev->broadcast, &p.iph.daddr, 4);
1550 }
Herbert Xuc19e6542008-10-09 11:59:55 -07001551 ipgre_tunnel_link(ign, t);
1552 netdev_state_change(dev);
1553 }
1554
1555 t->parms.o_key = p.o_key;
1556 t->parms.iph.ttl = p.iph.ttl;
1557 t->parms.iph.tos = p.iph.tos;
1558 t->parms.iph.frag_off = p.iph.frag_off;
1559
1560 if (t->parms.link != p.link) {
1561 t->parms.link = p.link;
1562 mtu = ipgre_tunnel_bind_dev(dev);
1563 if (!tb[IFLA_MTU])
1564 dev->mtu = mtu;
1565 netdev_state_change(dev);
1566 }
1567
1568 return 0;
1569}
1570
1571static size_t ipgre_get_size(const struct net_device *dev)
1572{
1573 return
1574 /* IFLA_GRE_LINK */
1575 nla_total_size(4) +
1576 /* IFLA_GRE_IFLAGS */
1577 nla_total_size(2) +
1578 /* IFLA_GRE_OFLAGS */
1579 nla_total_size(2) +
1580 /* IFLA_GRE_IKEY */
1581 nla_total_size(4) +
1582 /* IFLA_GRE_OKEY */
1583 nla_total_size(4) +
1584 /* IFLA_GRE_LOCAL */
1585 nla_total_size(4) +
1586 /* IFLA_GRE_REMOTE */
1587 nla_total_size(4) +
1588 /* IFLA_GRE_TTL */
1589 nla_total_size(1) +
1590 /* IFLA_GRE_TOS */
1591 nla_total_size(1) +
1592 /* IFLA_GRE_PMTUDISC */
1593 nla_total_size(1) +
1594 0;
1595}
1596
1597static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1598{
1599 struct ip_tunnel *t = netdev_priv(dev);
1600 struct ip_tunnel_parm *p = &t->parms;
1601
1602 NLA_PUT_U32(skb, IFLA_GRE_LINK, p->link);
1603 NLA_PUT_BE16(skb, IFLA_GRE_IFLAGS, p->i_flags);
1604 NLA_PUT_BE16(skb, IFLA_GRE_OFLAGS, p->o_flags);
Patrick McHardyba9e64b2008-10-10 12:10:30 -07001605 NLA_PUT_BE32(skb, IFLA_GRE_IKEY, p->i_key);
1606 NLA_PUT_BE32(skb, IFLA_GRE_OKEY, p->o_key);
Patrick McHardy4d74f8b2008-10-10 12:11:06 -07001607 NLA_PUT_BE32(skb, IFLA_GRE_LOCAL, p->iph.saddr);
1608 NLA_PUT_BE32(skb, IFLA_GRE_REMOTE, p->iph.daddr);
Herbert Xuc19e6542008-10-09 11:59:55 -07001609 NLA_PUT_U8(skb, IFLA_GRE_TTL, p->iph.ttl);
1610 NLA_PUT_U8(skb, IFLA_GRE_TOS, p->iph.tos);
1611 NLA_PUT_U8(skb, IFLA_GRE_PMTUDISC, !!(p->iph.frag_off & htons(IP_DF)));
1612
1613 return 0;
1614
1615nla_put_failure:
1616 return -EMSGSIZE;
1617}
1618
1619static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1620 [IFLA_GRE_LINK] = { .type = NLA_U32 },
1621 [IFLA_GRE_IFLAGS] = { .type = NLA_U16 },
1622 [IFLA_GRE_OFLAGS] = { .type = NLA_U16 },
1623 [IFLA_GRE_IKEY] = { .type = NLA_U32 },
1624 [IFLA_GRE_OKEY] = { .type = NLA_U32 },
Patrick McHardy4d74f8b2008-10-10 12:11:06 -07001625 [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
1626 [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
Herbert Xuc19e6542008-10-09 11:59:55 -07001627 [IFLA_GRE_TTL] = { .type = NLA_U8 },
1628 [IFLA_GRE_TOS] = { .type = NLA_U8 },
1629 [IFLA_GRE_PMTUDISC] = { .type = NLA_U8 },
1630};
1631
1632static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1633 .kind = "gre",
1634 .maxtype = IFLA_GRE_MAX,
1635 .policy = ipgre_policy,
1636 .priv_size = sizeof(struct ip_tunnel),
1637 .setup = ipgre_tunnel_setup,
1638 .validate = ipgre_tunnel_validate,
1639 .newlink = ipgre_newlink,
1640 .changelink = ipgre_changelink,
1641 .get_size = ipgre_get_size,
1642 .fill_info = ipgre_fill_info,
1643};
1644
Herbert Xue1a80002008-10-09 12:00:17 -07001645static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1646 .kind = "gretap",
1647 .maxtype = IFLA_GRE_MAX,
1648 .policy = ipgre_policy,
1649 .priv_size = sizeof(struct ip_tunnel),
1650 .setup = ipgre_tap_setup,
1651 .validate = ipgre_tap_validate,
1652 .newlink = ipgre_newlink,
1653 .changelink = ipgre_changelink,
1654 .get_size = ipgre_get_size,
1655 .fill_info = ipgre_fill_info,
1656};
1657
Linus Torvalds1da177e2005-04-16 15:20:36 -07001658/*
1659 * And now the modules code and kernel interface.
1660 */
1661
1662static int __init ipgre_init(void)
1663{
1664 int err;
1665
1666 printk(KERN_INFO "GRE over IPv4 tunneling driver\n");
1667
1668 if (inet_add_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) {
1669 printk(KERN_INFO "ipgre init: can't add protocol\n");
1670 return -EAGAIN;
1671 }
1672
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001673 err = register_pernet_gen_device(&ipgre_net_id, &ipgre_net_ops);
1674 if (err < 0)
Herbert Xuc19e6542008-10-09 11:59:55 -07001675 goto gen_device_failed;
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001676
Herbert Xuc19e6542008-10-09 11:59:55 -07001677 err = rtnl_link_register(&ipgre_link_ops);
1678 if (err < 0)
1679 goto rtnl_link_failed;
1680
Herbert Xue1a80002008-10-09 12:00:17 -07001681 err = rtnl_link_register(&ipgre_tap_ops);
1682 if (err < 0)
1683 goto tap_ops_failed;
1684
Herbert Xuc19e6542008-10-09 11:59:55 -07001685out:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001686 return err;
Herbert Xuc19e6542008-10-09 11:59:55 -07001687
Herbert Xue1a80002008-10-09 12:00:17 -07001688tap_ops_failed:
1689 rtnl_link_unregister(&ipgre_link_ops);
Herbert Xuc19e6542008-10-09 11:59:55 -07001690rtnl_link_failed:
1691 unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops);
1692gen_device_failed:
1693 inet_del_protocol(&ipgre_protocol, IPPROTO_GRE);
1694 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001695}
1696
Alexey Kuznetsovdb445752005-07-30 17:46:44 -07001697static void __exit ipgre_fini(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001698{
Herbert Xue1a80002008-10-09 12:00:17 -07001699 rtnl_link_unregister(&ipgre_tap_ops);
Herbert Xuc19e6542008-10-09 11:59:55 -07001700 rtnl_link_unregister(&ipgre_link_ops);
1701 unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001702 if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0)
1703 printk(KERN_INFO "ipgre close: can't remove protocol\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001704}
1705
1706module_init(ipgre_init);
1707module_exit(ipgre_fini);
1708MODULE_LICENSE("GPL");
Patrick McHardy4d74f8b2008-10-10 12:11:06 -07001709MODULE_ALIAS_RTNL_LINK("gre");
1710MODULE_ALIAS_RTNL_LINK("gretap");