blob: 89ff9d5b1500cf90c7b8bfbe801edec8ac303549 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002 * Linux NET3: GRE over IP protocol decoder.
Linus Torvalds1da177e2005-04-16 15:20:36 -07003 *
4 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
Randy Dunlap4fc268d2006-01-11 12:17:47 -080013#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070014#include <linux/module.h>
15#include <linux/types.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070016#include <linux/kernel.h>
17#include <asm/uaccess.h>
18#include <linux/skbuff.h>
19#include <linux/netdevice.h>
20#include <linux/in.h>
21#include <linux/tcp.h>
22#include <linux/udp.h>
23#include <linux/if_arp.h>
24#include <linux/mroute.h>
25#include <linux/init.h>
26#include <linux/in6.h>
27#include <linux/inetdevice.h>
28#include <linux/igmp.h>
29#include <linux/netfilter_ipv4.h>
Herbert Xue1a80002008-10-09 12:00:17 -070030#include <linux/etherdevice.h>
Kris Katterjohn46f25df2006-01-05 16:35:42 -080031#include <linux/if_ether.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070032
33#include <net/sock.h>
34#include <net/ip.h>
35#include <net/icmp.h>
36#include <net/protocol.h>
37#include <net/ipip.h>
38#include <net/arp.h>
39#include <net/checksum.h>
40#include <net/dsfield.h>
41#include <net/inet_ecn.h>
42#include <net/xfrm.h>
Pavel Emelyanov59a4c752008-04-16 01:08:53 -070043#include <net/net_namespace.h>
44#include <net/netns/generic.h>
Herbert Xuc19e6542008-10-09 11:59:55 -070045#include <net/rtnetlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070046
47#ifdef CONFIG_IPV6
48#include <net/ipv6.h>
49#include <net/ip6_fib.h>
50#include <net/ip6_route.h>
51#endif
52
53/*
54 Problems & solutions
55 --------------------
56
57 1. The most important issue is detecting local dead loops.
58 They would cause complete host lockup in transmit, which
59 would be "resolved" by stack overflow or, if queueing is enabled,
60 with infinite looping in net_bh.
61
62 We cannot track such dead loops during route installation,
63 it is infeasible task. The most general solutions would be
64 to keep skb->encapsulation counter (sort of local ttl),
65 and silently drop packet when it expires. It is the best
66 solution, but it supposes maintaing new variable in ALL
67 skb, even if no tunneling is used.
68
Eric Dumazeta43912a2009-09-23 10:28:33 +000069 Current solution: HARD_TX_LOCK lock breaks dead loops.
Linus Torvalds1da177e2005-04-16 15:20:36 -070070
71
72
73 2. Networking dead loops would not kill routers, but would really
74 kill network. IP hop limit plays role of "t->recursion" in this case,
75 if we copy it from packet being encapsulated to upper header.
76 It is very good solution, but it introduces two problems:
77
78 - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
79 do not work over tunnels.
80 - traceroute does not work. I planned to relay ICMP from tunnel,
81 so that this problem would be solved and traceroute output
82 would even more informative. This idea appeared to be wrong:
83 only Linux complies to rfc1812 now (yes, guys, Linux is the only
84 true router now :-)), all routers (at least, in neighbourhood of mine)
85 return only 8 bytes of payload. It is the end.
86
87 Hence, if we want that OSPF worked or traceroute said something reasonable,
88 we should search for another solution.
89
90 One of them is to parse packet trying to detect inner encapsulation
91 made by our node. It is difficult or even impossible, especially,
92 taking into account fragmentation. TO be short, tt is not solution at all.
93
94 Current solution: The solution was UNEXPECTEDLY SIMPLE.
95 We force DF flag on tunnels with preconfigured hop limit,
96 that is ALL. :-) Well, it does not remove the problem completely,
97 but exponential growth of network traffic is changed to linear
98 (branches, that exceed pmtu are pruned) and tunnel mtu
99 fastly degrades to value <68, where looping stops.
100 Yes, it is not good if there exists a router in the loop,
101 which does not force DF, even when encapsulating packets have DF set.
102 But it is not our problem! Nobody could accuse us, we made
103 all that we could make. Even if it is your gated who injected
104 fatal route to network, even if it were you who configured
105 fatal static route: you are innocent. :-)
106
107
108
109 3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
110 practically identical code. It would be good to glue them
111 together, but it is not very evident, how to make them modular.
112 sit is integral part of IPv6, ipip and gre are naturally modular.
113 We could extract common parts (hash table, ioctl etc)
114 to a separate module (ip_tunnel.c).
115
116 Alexey Kuznetsov.
117 */
118
Herbert Xuc19e6542008-10-09 11:59:55 -0700119static struct rtnl_link_ops ipgre_link_ops __read_mostly;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700120static int ipgre_tunnel_init(struct net_device *dev);
121static void ipgre_tunnel_setup(struct net_device *dev);
Herbert Xu42aa9162008-10-09 11:59:32 -0700122static int ipgre_tunnel_bind_dev(struct net_device *dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700123
124/* Fallback tunnel: no source, no destination, no key, no options */
125
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700126#define HASH_SIZE 16
127
Pavel Emelyanov59a4c752008-04-16 01:08:53 -0700128static int ipgre_net_id;
129struct ipgre_net {
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700130 struct ip_tunnel *tunnels[4][HASH_SIZE];
131
Pavel Emelyanov7daa0002008-04-16 01:10:05 -0700132 struct net_device *fb_tunnel_dev;
Pavel Emelyanov59a4c752008-04-16 01:08:53 -0700133};
134
Linus Torvalds1da177e2005-04-16 15:20:36 -0700135/* Tunnel hash table */
136
137/*
138 4 hash tables:
139
140 3: (remote,local)
141 2: (remote,*)
142 1: (*,local)
143 0: (*,*)
144
145 We require exact key match i.e. if a key is present in packet
146 it will match only tunnel with the same key; if it is not present,
147 it will match only keyless tunnel.
148
149 All keysless packets, if not matched configured keyless tunnels
150 will match fallback tunnel.
151 */
152
Al Virod5a0a1e2006-11-08 00:23:14 -0800153#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700154
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700155#define tunnels_r_l tunnels[3]
156#define tunnels_r tunnels[2]
157#define tunnels_l tunnels[1]
158#define tunnels_wc tunnels[0]
Linus Torvalds1da177e2005-04-16 15:20:36 -0700159
160static DEFINE_RWLOCK(ipgre_lock);
161
162/* Given src, dst and key, find appropriate for input tunnel. */
163
Timo Teras749c10f2009-01-19 17:22:12 -0800164static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev,
Herbert Xue1a80002008-10-09 12:00:17 -0700165 __be32 remote, __be32 local,
166 __be32 key, __be16 gre_proto)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700167{
Timo Teras749c10f2009-01-19 17:22:12 -0800168 struct net *net = dev_net(dev);
169 int link = dev->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700170 unsigned h0 = HASH(remote);
171 unsigned h1 = HASH(key);
Timo Terasafcf1242009-01-26 20:56:10 -0800172 struct ip_tunnel *t, *cand = NULL;
Pavel Emelyanov7daa0002008-04-16 01:10:05 -0700173 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
Herbert Xue1a80002008-10-09 12:00:17 -0700174 int dev_type = (gre_proto == htons(ETH_P_TEB)) ?
175 ARPHRD_ETHER : ARPHRD_IPGRE;
Timo Terasafcf1242009-01-26 20:56:10 -0800176 int score, cand_score = 4;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700177
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700178 for (t = ign->tunnels_r_l[h0^h1]; t; t = t->next) {
Timo Teras749c10f2009-01-19 17:22:12 -0800179 if (local != t->parms.iph.saddr ||
180 remote != t->parms.iph.daddr ||
181 key != t->parms.i_key ||
182 !(t->dev->flags & IFF_UP))
183 continue;
184
185 if (t->dev->type != ARPHRD_IPGRE &&
186 t->dev->type != dev_type)
187 continue;
188
Timo Terasafcf1242009-01-26 20:56:10 -0800189 score = 0;
Timo Teras749c10f2009-01-19 17:22:12 -0800190 if (t->parms.link != link)
Timo Terasafcf1242009-01-26 20:56:10 -0800191 score |= 1;
Timo Teras749c10f2009-01-19 17:22:12 -0800192 if (t->dev->type != dev_type)
Timo Terasafcf1242009-01-26 20:56:10 -0800193 score |= 2;
194 if (score == 0)
Timo Teras749c10f2009-01-19 17:22:12 -0800195 return t;
Timo Terasafcf1242009-01-26 20:56:10 -0800196
197 if (score < cand_score) {
198 cand = t;
199 cand_score = score;
200 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700201 }
Herbert Xue1a80002008-10-09 12:00:17 -0700202
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700203 for (t = ign->tunnels_r[h0^h1]; t; t = t->next) {
Timo Teras749c10f2009-01-19 17:22:12 -0800204 if (remote != t->parms.iph.daddr ||
205 key != t->parms.i_key ||
206 !(t->dev->flags & IFF_UP))
207 continue;
208
209 if (t->dev->type != ARPHRD_IPGRE &&
210 t->dev->type != dev_type)
211 continue;
212
Timo Terasafcf1242009-01-26 20:56:10 -0800213 score = 0;
Timo Teras749c10f2009-01-19 17:22:12 -0800214 if (t->parms.link != link)
Timo Terasafcf1242009-01-26 20:56:10 -0800215 score |= 1;
Timo Teras749c10f2009-01-19 17:22:12 -0800216 if (t->dev->type != dev_type)
Timo Terasafcf1242009-01-26 20:56:10 -0800217 score |= 2;
218 if (score == 0)
Timo Teras749c10f2009-01-19 17:22:12 -0800219 return t;
Timo Terasafcf1242009-01-26 20:56:10 -0800220
221 if (score < cand_score) {
222 cand = t;
223 cand_score = score;
224 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700225 }
Herbert Xue1a80002008-10-09 12:00:17 -0700226
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700227 for (t = ign->tunnels_l[h1]; t; t = t->next) {
Timo Teras749c10f2009-01-19 17:22:12 -0800228 if ((local != t->parms.iph.saddr &&
229 (local != t->parms.iph.daddr ||
230 !ipv4_is_multicast(local))) ||
231 key != t->parms.i_key ||
232 !(t->dev->flags & IFF_UP))
233 continue;
234
235 if (t->dev->type != ARPHRD_IPGRE &&
236 t->dev->type != dev_type)
237 continue;
238
Timo Terasafcf1242009-01-26 20:56:10 -0800239 score = 0;
Timo Teras749c10f2009-01-19 17:22:12 -0800240 if (t->parms.link != link)
Timo Terasafcf1242009-01-26 20:56:10 -0800241 score |= 1;
Timo Teras749c10f2009-01-19 17:22:12 -0800242 if (t->dev->type != dev_type)
Timo Terasafcf1242009-01-26 20:56:10 -0800243 score |= 2;
244 if (score == 0)
Timo Teras749c10f2009-01-19 17:22:12 -0800245 return t;
Timo Terasafcf1242009-01-26 20:56:10 -0800246
247 if (score < cand_score) {
248 cand = t;
249 cand_score = score;
250 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700251 }
Herbert Xue1a80002008-10-09 12:00:17 -0700252
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700253 for (t = ign->tunnels_wc[h1]; t; t = t->next) {
Timo Teras749c10f2009-01-19 17:22:12 -0800254 if (t->parms.i_key != key ||
255 !(t->dev->flags & IFF_UP))
256 continue;
257
258 if (t->dev->type != ARPHRD_IPGRE &&
259 t->dev->type != dev_type)
260 continue;
261
Timo Terasafcf1242009-01-26 20:56:10 -0800262 score = 0;
Timo Teras749c10f2009-01-19 17:22:12 -0800263 if (t->parms.link != link)
Timo Terasafcf1242009-01-26 20:56:10 -0800264 score |= 1;
Timo Teras749c10f2009-01-19 17:22:12 -0800265 if (t->dev->type != dev_type)
Timo Terasafcf1242009-01-26 20:56:10 -0800266 score |= 2;
267 if (score == 0)
Timo Teras749c10f2009-01-19 17:22:12 -0800268 return t;
Timo Terasafcf1242009-01-26 20:56:10 -0800269
270 if (score < cand_score) {
271 cand = t;
272 cand_score = score;
273 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700274 }
275
Timo Terasafcf1242009-01-26 20:56:10 -0800276 if (cand != NULL)
277 return cand;
Herbert Xue1a80002008-10-09 12:00:17 -0700278
Timo Teras749c10f2009-01-19 17:22:12 -0800279 if (ign->fb_tunnel_dev->flags & IFF_UP)
Pavel Emelyanov7daa0002008-04-16 01:10:05 -0700280 return netdev_priv(ign->fb_tunnel_dev);
Timo Teras749c10f2009-01-19 17:22:12 -0800281
Linus Torvalds1da177e2005-04-16 15:20:36 -0700282 return NULL;
283}
284
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700285static struct ip_tunnel **__ipgre_bucket(struct ipgre_net *ign,
286 struct ip_tunnel_parm *parms)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700287{
YOSHIFUJI Hideaki5056a1e2007-04-24 20:44:48 +0900288 __be32 remote = parms->iph.daddr;
289 __be32 local = parms->iph.saddr;
290 __be32 key = parms->i_key;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700291 unsigned h = HASH(key);
292 int prio = 0;
293
294 if (local)
295 prio |= 1;
Joe Perchesf97c1e02007-12-16 13:45:43 -0800296 if (remote && !ipv4_is_multicast(remote)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700297 prio |= 2;
298 h ^= HASH(remote);
299 }
300
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700301 return &ign->tunnels[prio][h];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700302}
303
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700304static inline struct ip_tunnel **ipgre_bucket(struct ipgre_net *ign,
305 struct ip_tunnel *t)
YOSHIFUJI Hideaki5056a1e2007-04-24 20:44:48 +0900306{
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700307 return __ipgre_bucket(ign, &t->parms);
YOSHIFUJI Hideaki5056a1e2007-04-24 20:44:48 +0900308}
309
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700310static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700311{
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700312 struct ip_tunnel **tp = ipgre_bucket(ign, t);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700313
314 t->next = *tp;
315 write_lock_bh(&ipgre_lock);
316 *tp = t;
317 write_unlock_bh(&ipgre_lock);
318}
319
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700320static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700321{
322 struct ip_tunnel **tp;
323
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700324 for (tp = ipgre_bucket(ign, t); *tp; tp = &(*tp)->next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700325 if (t == *tp) {
326 write_lock_bh(&ipgre_lock);
327 *tp = t->next;
328 write_unlock_bh(&ipgre_lock);
329 break;
330 }
331 }
332}
333
Herbert Xue1a80002008-10-09 12:00:17 -0700334static struct ip_tunnel *ipgre_tunnel_find(struct net *net,
335 struct ip_tunnel_parm *parms,
336 int type)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700337{
Al Virod5a0a1e2006-11-08 00:23:14 -0800338 __be32 remote = parms->iph.daddr;
339 __be32 local = parms->iph.saddr;
340 __be32 key = parms->i_key;
Timo Teras749c10f2009-01-19 17:22:12 -0800341 int link = parms->link;
Herbert Xue1a80002008-10-09 12:00:17 -0700342 struct ip_tunnel *t, **tp;
343 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
344
345 for (tp = __ipgre_bucket(ign, parms); (t = *tp) != NULL; tp = &t->next)
346 if (local == t->parms.iph.saddr &&
347 remote == t->parms.iph.daddr &&
348 key == t->parms.i_key &&
Timo Teras749c10f2009-01-19 17:22:12 -0800349 link == t->parms.link &&
Herbert Xue1a80002008-10-09 12:00:17 -0700350 type == t->dev->type)
351 break;
352
353 return t;
354}
355
356static struct ip_tunnel * ipgre_tunnel_locate(struct net *net,
357 struct ip_tunnel_parm *parms, int create)
358{
359 struct ip_tunnel *t, *nt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700360 struct net_device *dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700361 char name[IFNAMSIZ];
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700362 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700363
Herbert Xue1a80002008-10-09 12:00:17 -0700364 t = ipgre_tunnel_find(net, parms, ARPHRD_IPGRE);
365 if (t || !create)
366 return t;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700367
368 if (parms->name[0])
369 strlcpy(name, parms->name, IFNAMSIZ);
Pavel Emelyanov34cc7ba2008-02-23 20:19:20 -0800370 else
371 sprintf(name, "gre%%d");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700372
373 dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup);
374 if (!dev)
375 return NULL;
376
Pavel Emelyanov0b67ece2008-04-16 01:11:13 -0700377 dev_net_set(dev, net);
378
Pavel Emelyanovb37d428b2008-02-26 23:51:04 -0800379 if (strchr(name, '%')) {
380 if (dev_alloc_name(dev, name) < 0)
381 goto failed_free;
382 }
383
Patrick McHardy2941a482006-01-08 22:05:26 -0800384 nt = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700385 nt->parms = *parms;
Herbert Xuc19e6542008-10-09 11:59:55 -0700386 dev->rtnl_link_ops = &ipgre_link_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700387
Herbert Xu42aa9162008-10-09 11:59:32 -0700388 dev->mtu = ipgre_tunnel_bind_dev(dev);
389
Pavel Emelyanovb37d428b2008-02-26 23:51:04 -0800390 if (register_netdevice(dev) < 0)
391 goto failed_free;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700392
Linus Torvalds1da177e2005-04-16 15:20:36 -0700393 dev_hold(dev);
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700394 ipgre_tunnel_link(ign, nt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700395 return nt;
396
Pavel Emelyanovb37d428b2008-02-26 23:51:04 -0800397failed_free:
398 free_netdev(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700399 return NULL;
400}
401
402static void ipgre_tunnel_uninit(struct net_device *dev)
403{
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700404 struct net *net = dev_net(dev);
405 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
406
407 ipgre_tunnel_unlink(ign, netdev_priv(dev));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700408 dev_put(dev);
409}
410
411
412static void ipgre_err(struct sk_buff *skb, u32 info)
413{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700414
Rami Rosen071f92d2008-05-21 17:47:54 -0700415/* All the routers (except for Linux) return only
Linus Torvalds1da177e2005-04-16 15:20:36 -0700416 8 bytes of packet payload. It means, that precise relaying of
417 ICMP in the real Internet is absolutely infeasible.
418
419 Moreover, Cisco "wise men" put GRE key to the third word
420 in GRE header. It makes impossible maintaining even soft state for keyed
421 GRE tunnels with enabled checksum. Tell them "thank you".
422
423 Well, I wonder, rfc1812 was written by Cisco employee,
424 what the hell these idiots break standrads established
425 by themself???
426 */
427
Jianjun Kong6ed25332008-11-03 00:25:16 -0800428 struct iphdr *iph = (struct iphdr *)skb->data;
Al Virod5a0a1e2006-11-08 00:23:14 -0800429 __be16 *p = (__be16*)(skb->data+(iph->ihl<<2));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700430 int grehlen = (iph->ihl<<2) + 4;
Arnaldo Carvalho de Melo88c76642007-03-13 14:43:18 -0300431 const int type = icmp_hdr(skb)->type;
432 const int code = icmp_hdr(skb)->code;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700433 struct ip_tunnel *t;
Al Virod5a0a1e2006-11-08 00:23:14 -0800434 __be16 flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700435
436 flags = p[0];
437 if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
438 if (flags&(GRE_VERSION|GRE_ROUTING))
439 return;
440 if (flags&GRE_KEY) {
441 grehlen += 4;
442 if (flags&GRE_CSUM)
443 grehlen += 4;
444 }
445 }
446
447 /* If only 8 bytes returned, keyed message will be dropped here */
448 if (skb_headlen(skb) < grehlen)
449 return;
450
451 switch (type) {
452 default:
453 case ICMP_PARAMETERPROB:
454 return;
455
456 case ICMP_DEST_UNREACH:
457 switch (code) {
458 case ICMP_SR_FAILED:
459 case ICMP_PORT_UNREACH:
460 /* Impossible event. */
461 return;
462 case ICMP_FRAG_NEEDED:
463 /* Soft state for pmtu is maintained by IP core. */
464 return;
465 default:
466 /* All others are translated to HOST_UNREACH.
467 rfc2003 contains "deep thoughts" about NET_UNREACH,
468 I believe they are just ether pollution. --ANK
469 */
470 break;
471 }
472 break;
473 case ICMP_TIME_EXCEEDED:
474 if (code != ICMP_EXC_TTL)
475 return;
476 break;
477 }
478
479 read_lock(&ipgre_lock);
Timo Teras749c10f2009-01-19 17:22:12 -0800480 t = ipgre_tunnel_lookup(skb->dev, iph->daddr, iph->saddr,
Herbert Xue1a80002008-10-09 12:00:17 -0700481 flags & GRE_KEY ?
482 *(((__be32 *)p) + (grehlen / 4) - 1) : 0,
483 p[1]);
Joe Perchesf97c1e02007-12-16 13:45:43 -0800484 if (t == NULL || t->parms.iph.daddr == 0 ||
485 ipv4_is_multicast(t->parms.iph.daddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700486 goto out;
487
488 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
489 goto out;
490
Wei Yongjunda6185d82009-02-24 23:34:48 -0800491 if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700492 t->err_count++;
493 else
494 t->err_count = 1;
495 t->err_time = jiffies;
496out:
497 read_unlock(&ipgre_lock);
498 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700499}
500
501static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
502{
503 if (INET_ECN_is_ce(iph->tos)) {
504 if (skb->protocol == htons(ETH_P_IP)) {
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700505 IP_ECN_set_ce(ip_hdr(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700506 } else if (skb->protocol == htons(ETH_P_IPV6)) {
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -0700507 IP6_ECN_set_ce(ipv6_hdr(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700508 }
509 }
510}
511
512static inline u8
513ipgre_ecn_encapsulate(u8 tos, struct iphdr *old_iph, struct sk_buff *skb)
514{
515 u8 inner = 0;
516 if (skb->protocol == htons(ETH_P_IP))
517 inner = old_iph->tos;
518 else if (skb->protocol == htons(ETH_P_IPV6))
519 inner = ipv6_get_dsfield((struct ipv6hdr *)old_iph);
520 return INET_ECN_encapsulate(tos, inner);
521}
522
523static int ipgre_rcv(struct sk_buff *skb)
524{
525 struct iphdr *iph;
526 u8 *h;
Al Virod5a0a1e2006-11-08 00:23:14 -0800527 __be16 flags;
Al Virod3bc23e2006-11-14 21:24:49 -0800528 __sum16 csum = 0;
Al Virod5a0a1e2006-11-08 00:23:14 -0800529 __be32 key = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700530 u32 seqno = 0;
531 struct ip_tunnel *tunnel;
532 int offset = 4;
Herbert Xue1a80002008-10-09 12:00:17 -0700533 __be16 gre_proto;
Herbert Xu64194c32008-10-09 12:03:17 -0700534 unsigned int len;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700535
536 if (!pskb_may_pull(skb, 16))
537 goto drop_nolock;
538
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700539 iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700540 h = skb->data;
Al Virod5a0a1e2006-11-08 00:23:14 -0800541 flags = *(__be16*)h;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700542
543 if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
544 /* - Version must be 0.
545 - We do not support routing headers.
546 */
547 if (flags&(GRE_VERSION|GRE_ROUTING))
548 goto drop_nolock;
549
550 if (flags&GRE_CSUM) {
Herbert Xufb286bb2005-11-10 13:01:24 -0800551 switch (skb->ip_summed) {
Patrick McHardy84fa7932006-08-29 16:44:56 -0700552 case CHECKSUM_COMPLETE:
Al Virod3bc23e2006-11-14 21:24:49 -0800553 csum = csum_fold(skb->csum);
Herbert Xufb286bb2005-11-10 13:01:24 -0800554 if (!csum)
555 break;
556 /* fall through */
557 case CHECKSUM_NONE:
558 skb->csum = 0;
559 csum = __skb_checksum_complete(skb);
Patrick McHardy84fa7932006-08-29 16:44:56 -0700560 skb->ip_summed = CHECKSUM_COMPLETE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700561 }
562 offset += 4;
563 }
564 if (flags&GRE_KEY) {
Al Virod5a0a1e2006-11-08 00:23:14 -0800565 key = *(__be32*)(h + offset);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700566 offset += 4;
567 }
568 if (flags&GRE_SEQ) {
Al Virod5a0a1e2006-11-08 00:23:14 -0800569 seqno = ntohl(*(__be32*)(h + offset));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700570 offset += 4;
571 }
572 }
573
Herbert Xue1a80002008-10-09 12:00:17 -0700574 gre_proto = *(__be16 *)(h + 2);
575
Linus Torvalds1da177e2005-04-16 15:20:36 -0700576 read_lock(&ipgre_lock);
Timo Teras749c10f2009-01-19 17:22:12 -0800577 if ((tunnel = ipgre_tunnel_lookup(skb->dev,
Herbert Xue1a80002008-10-09 12:00:17 -0700578 iph->saddr, iph->daddr, key,
579 gre_proto))) {
Pavel Emelyanovaddd68e2008-05-21 14:14:22 -0700580 struct net_device_stats *stats = &tunnel->dev->stats;
581
Linus Torvalds1da177e2005-04-16 15:20:36 -0700582 secpath_reset(skb);
583
Herbert Xue1a80002008-10-09 12:00:17 -0700584 skb->protocol = gre_proto;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700585 /* WCCP version 1 and 2 protocol decoding.
586 * - Change protocol to IP
587 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
588 */
Herbert Xue1a80002008-10-09 12:00:17 -0700589 if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) {
YOSHIFUJI Hideaki496c98d2006-10-10 19:41:21 -0700590 skb->protocol = htons(ETH_P_IP);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900591 if ((*(h + offset) & 0xF0) != 0x40)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700592 offset += 4;
593 }
594
Timo Teras1d069162007-12-20 00:10:33 -0800595 skb->mac_header = skb->network_header;
Arnaldo Carvalho de Melo4209fb62007-03-10 18:42:03 -0300596 __pskb_pull(skb, offset);
Arnaldo Carvalho de Melo9c702202007-04-25 18:04:18 -0700597 skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700598 skb->pkt_type = PACKET_HOST;
599#ifdef CONFIG_NET_IPGRE_BROADCAST
Joe Perchesf97c1e02007-12-16 13:45:43 -0800600 if (ipv4_is_multicast(iph->daddr)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700601 /* Looped back packet, drop it! */
Eric Dumazet511c3f92009-06-02 05:14:27 +0000602 if (skb_rtable(skb)->fl.iif == 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700603 goto drop;
Pavel Emelyanovaddd68e2008-05-21 14:14:22 -0700604 stats->multicast++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700605 skb->pkt_type = PACKET_BROADCAST;
606 }
607#endif
608
609 if (((flags&GRE_CSUM) && csum) ||
610 (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
Pavel Emelyanovaddd68e2008-05-21 14:14:22 -0700611 stats->rx_crc_errors++;
612 stats->rx_errors++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700613 goto drop;
614 }
615 if (tunnel->parms.i_flags&GRE_SEQ) {
616 if (!(flags&GRE_SEQ) ||
617 (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
Pavel Emelyanovaddd68e2008-05-21 14:14:22 -0700618 stats->rx_fifo_errors++;
619 stats->rx_errors++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700620 goto drop;
621 }
622 tunnel->i_seqno = seqno + 1;
623 }
Herbert Xue1a80002008-10-09 12:00:17 -0700624
Herbert Xu64194c32008-10-09 12:03:17 -0700625 len = skb->len;
626
Herbert Xue1a80002008-10-09 12:00:17 -0700627 /* Warning: All skb pointers will be invalidated! */
628 if (tunnel->dev->type == ARPHRD_ETHER) {
629 if (!pskb_may_pull(skb, ETH_HLEN)) {
630 stats->rx_length_errors++;
631 stats->rx_errors++;
632 goto drop;
633 }
634
635 iph = ip_hdr(skb);
636 skb->protocol = eth_type_trans(skb, tunnel->dev);
637 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
638 }
639
Pavel Emelyanovaddd68e2008-05-21 14:14:22 -0700640 stats->rx_packets++;
Herbert Xu64194c32008-10-09 12:03:17 -0700641 stats->rx_bytes += len;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700642 skb->dev = tunnel->dev;
Eric Dumazetadf30902009-06-02 05:19:30 +0000643 skb_dst_drop(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700644 nf_reset(skb);
Herbert Xue1a80002008-10-09 12:00:17 -0700645
646 skb_reset_network_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700647 ipgre_ecn_decapsulate(iph, skb);
Herbert Xue1a80002008-10-09 12:00:17 -0700648
Linus Torvalds1da177e2005-04-16 15:20:36 -0700649 netif_rx(skb);
650 read_unlock(&ipgre_lock);
651 return(0);
652 }
Herbert Xu45af08b2006-04-05 22:31:19 -0700653 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700654
655drop:
656 read_unlock(&ipgre_lock);
657drop_nolock:
658 kfree_skb(skb);
659 return(0);
660}
661
Stephen Hemminger6fef4c02009-08-31 19:50:41 +0000662static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700663{
Patrick McHardy2941a482006-01-08 22:05:26 -0800664 struct ip_tunnel *tunnel = netdev_priv(dev);
Eric Dumazet0bfbedb2009-10-05 00:11:22 -0700665 struct net_device_stats *stats = &dev->stats;
666 struct netdev_queue *txq = netdev_get_tx_queue(dev, 0);
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700667 struct iphdr *old_iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700668 struct iphdr *tiph;
669 u8 tos;
Al Virod5a0a1e2006-11-08 00:23:14 -0800670 __be16 df;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700671 struct rtable *rt; /* Route to the other host */
672 struct net_device *tdev; /* Device to other host */
673 struct iphdr *iph; /* Our new IP header */
Chuck Leverc2636b42007-10-23 21:07:32 -0700674 unsigned int max_headroom; /* The extra header space needed */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700675 int gre_hlen;
Al Virod5a0a1e2006-11-08 00:23:14 -0800676 __be32 dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700677 int mtu;
678
Herbert Xue1a80002008-10-09 12:00:17 -0700679 if (dev->type == ARPHRD_ETHER)
680 IPCB(skb)->flags = 0;
681
682 if (dev->header_ops && dev->type == ARPHRD_IPGRE) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700683 gre_hlen = 0;
Jianjun Kong6ed25332008-11-03 00:25:16 -0800684 tiph = (struct iphdr *)skb->data;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700685 } else {
686 gre_hlen = tunnel->hlen;
687 tiph = &tunnel->parms.iph;
688 }
689
690 if ((dst = tiph->daddr) == 0) {
691 /* NBMA tunnel */
692
Eric Dumazetadf30902009-06-02 05:19:30 +0000693 if (skb_dst(skb) == NULL) {
Pavel Emelyanovaddd68e2008-05-21 14:14:22 -0700694 stats->tx_fifo_errors++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700695 goto tx_error;
696 }
697
698 if (skb->protocol == htons(ETH_P_IP)) {
Eric Dumazet511c3f92009-06-02 05:14:27 +0000699 rt = skb_rtable(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700700 if ((dst = rt->rt_gateway) == 0)
701 goto tx_error_icmp;
702 }
703#ifdef CONFIG_IPV6
704 else if (skb->protocol == htons(ETH_P_IPV6)) {
705 struct in6_addr *addr6;
706 int addr_type;
Eric Dumazetadf30902009-06-02 05:19:30 +0000707 struct neighbour *neigh = skb_dst(skb)->neighbour;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700708
709 if (neigh == NULL)
710 goto tx_error;
711
Jianjun Kong6ed25332008-11-03 00:25:16 -0800712 addr6 = (struct in6_addr *)&neigh->primary_key;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700713 addr_type = ipv6_addr_type(addr6);
714
715 if (addr_type == IPV6_ADDR_ANY) {
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -0700716 addr6 = &ipv6_hdr(skb)->daddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700717 addr_type = ipv6_addr_type(addr6);
718 }
719
720 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
721 goto tx_error_icmp;
722
723 dst = addr6->s6_addr32[3];
724 }
725#endif
726 else
727 goto tx_error;
728 }
729
730 tos = tiph->tos;
Andreas Jaggiee686ca2009-07-14 09:35:59 -0700731 if (tos == 1) {
732 tos = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700733 if (skb->protocol == htons(ETH_P_IP))
734 tos = old_iph->tos;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700735 }
736
737 {
738 struct flowi fl = { .oif = tunnel->parms.link,
739 .nl_u = { .ip4_u =
740 { .daddr = dst,
741 .saddr = tiph->saddr,
742 .tos = RT_TOS(tos) } },
743 .proto = IPPROTO_GRE };
Pavel Emelyanov96635522008-04-16 01:10:44 -0700744 if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
Pavel Emelyanovaddd68e2008-05-21 14:14:22 -0700745 stats->tx_carrier_errors++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700746 goto tx_error;
747 }
748 }
749 tdev = rt->u.dst.dev;
750
751 if (tdev == dev) {
752 ip_rt_put(rt);
Pavel Emelyanovaddd68e2008-05-21 14:14:22 -0700753 stats->collisions++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700754 goto tx_error;
755 }
756
757 df = tiph->frag_off;
758 if (df)
Herbert Xuc95b8192008-10-09 11:58:54 -0700759 mtu = dst_mtu(&rt->u.dst) - dev->hard_header_len - tunnel->hlen;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700760 else
Eric Dumazetadf30902009-06-02 05:19:30 +0000761 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700762
Eric Dumazetadf30902009-06-02 05:19:30 +0000763 if (skb_dst(skb))
764 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700765
766 if (skb->protocol == htons(ETH_P_IP)) {
767 df |= (old_iph->frag_off&htons(IP_DF));
768
769 if ((old_iph->frag_off&htons(IP_DF)) &&
770 mtu < ntohs(old_iph->tot_len)) {
771 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
772 ip_rt_put(rt);
773 goto tx_error;
774 }
775 }
776#ifdef CONFIG_IPV6
777 else if (skb->protocol == htons(ETH_P_IPV6)) {
Eric Dumazetadf30902009-06-02 05:19:30 +0000778 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700779
Eric Dumazetadf30902009-06-02 05:19:30 +0000780 if (rt6 && mtu < dst_mtu(skb_dst(skb)) && mtu >= IPV6_MIN_MTU) {
Joe Perchesf97c1e02007-12-16 13:45:43 -0800781 if ((tunnel->parms.iph.daddr &&
782 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
Linus Torvalds1da177e2005-04-16 15:20:36 -0700783 rt6->rt6i_dst.plen == 128) {
784 rt6->rt6i_flags |= RTF_MODIFIED;
Eric Dumazetadf30902009-06-02 05:19:30 +0000785 skb_dst(skb)->metrics[RTAX_MTU-1] = mtu;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700786 }
787 }
788
789 if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) {
790 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
791 ip_rt_put(rt);
792 goto tx_error;
793 }
794 }
795#endif
796
797 if (tunnel->err_count > 0) {
Wei Yongjunda6185d82009-02-24 23:34:48 -0800798 if (time_before(jiffies,
799 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700800 tunnel->err_count--;
801
802 dst_link_failure(skb);
803 } else
804 tunnel->err_count = 0;
805 }
806
807 max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen;
808
Patrick McHardycfbba492007-07-09 15:33:40 -0700809 if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
810 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700811 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
812 if (!new_skb) {
813 ip_rt_put(rt);
Eric Dumazet0bfbedb2009-10-05 00:11:22 -0700814 txq->tx_dropped++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700815 dev_kfree_skb(skb);
Patrick McHardy6ed10652009-06-23 06:03:08 +0000816 return NETDEV_TX_OK;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700817 }
818 if (skb->sk)
819 skb_set_owner_w(new_skb, skb->sk);
820 dev_kfree_skb(skb);
821 skb = new_skb;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700822 old_iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700823 }
824
Herbert Xu64194c32008-10-09 12:03:17 -0700825 skb_reset_transport_header(skb);
Arnaldo Carvalho de Meloe2d1bca2007-04-10 20:46:21 -0700826 skb_push(skb, gre_hlen);
827 skb_reset_network_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700828 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
Patrick McHardy48d5cad2006-02-15 15:10:22 -0800829 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
830 IPSKB_REROUTED);
Eric Dumazetadf30902009-06-02 05:19:30 +0000831 skb_dst_drop(skb);
832 skb_dst_set(skb, &rt->u.dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700833
834 /*
835 * Push down and install the IPIP header.
836 */
837
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700838 iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700839 iph->version = 4;
840 iph->ihl = sizeof(struct iphdr) >> 2;
841 iph->frag_off = df;
842 iph->protocol = IPPROTO_GRE;
843 iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb);
844 iph->daddr = rt->rt_dst;
845 iph->saddr = rt->rt_src;
846
847 if ((iph->ttl = tiph->ttl) == 0) {
848 if (skb->protocol == htons(ETH_P_IP))
849 iph->ttl = old_iph->ttl;
850#ifdef CONFIG_IPV6
851 else if (skb->protocol == htons(ETH_P_IPV6))
Jianjun Kong6ed25332008-11-03 00:25:16 -0800852 iph->ttl = ((struct ipv6hdr *)old_iph)->hop_limit;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700853#endif
854 else
855 iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT);
856 }
857
Herbert Xue1a80002008-10-09 12:00:17 -0700858 ((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags;
859 ((__be16 *)(iph + 1))[1] = (dev->type == ARPHRD_ETHER) ?
860 htons(ETH_P_TEB) : skb->protocol;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700861
862 if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
Al Virod5a0a1e2006-11-08 00:23:14 -0800863 __be32 *ptr = (__be32*)(((u8*)iph) + tunnel->hlen - 4);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700864
865 if (tunnel->parms.o_flags&GRE_SEQ) {
866 ++tunnel->o_seqno;
867 *ptr = htonl(tunnel->o_seqno);
868 ptr--;
869 }
870 if (tunnel->parms.o_flags&GRE_KEY) {
871 *ptr = tunnel->parms.o_key;
872 ptr--;
873 }
874 if (tunnel->parms.o_flags&GRE_CSUM) {
875 *ptr = 0;
Al Viro5f92a732006-11-14 21:36:54 -0800876 *(__sum16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700877 }
878 }
879
880 nf_reset(skb);
881
882 IPTUNNEL_XMIT();
Patrick McHardy6ed10652009-06-23 06:03:08 +0000883 return NETDEV_TX_OK;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700884
885tx_error_icmp:
886 dst_link_failure(skb);
887
888tx_error:
889 stats->tx_errors++;
890 dev_kfree_skb(skb);
Patrick McHardy6ed10652009-06-23 06:03:08 +0000891 return NETDEV_TX_OK;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700892}
893
Herbert Xu42aa9162008-10-09 11:59:32 -0700894static int ipgre_tunnel_bind_dev(struct net_device *dev)
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800895{
896 struct net_device *tdev = NULL;
897 struct ip_tunnel *tunnel;
898 struct iphdr *iph;
899 int hlen = LL_MAX_HEADER;
900 int mtu = ETH_DATA_LEN;
901 int addend = sizeof(struct iphdr) + 4;
902
903 tunnel = netdev_priv(dev);
904 iph = &tunnel->parms.iph;
905
Herbert Xuc95b8192008-10-09 11:58:54 -0700906 /* Guess output device to choose reasonable mtu and needed_headroom */
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800907
908 if (iph->daddr) {
909 struct flowi fl = { .oif = tunnel->parms.link,
910 .nl_u = { .ip4_u =
911 { .daddr = iph->daddr,
912 .saddr = iph->saddr,
913 .tos = RT_TOS(iph->tos) } },
914 .proto = IPPROTO_GRE };
915 struct rtable *rt;
Pavel Emelyanov96635522008-04-16 01:10:44 -0700916 if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800917 tdev = rt->u.dst.dev;
918 ip_rt_put(rt);
919 }
Herbert Xue1a80002008-10-09 12:00:17 -0700920
921 if (dev->type != ARPHRD_ETHER)
922 dev->flags |= IFF_POINTOPOINT;
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800923 }
924
925 if (!tdev && tunnel->parms.link)
Pavel Emelyanov96635522008-04-16 01:10:44 -0700926 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800927
928 if (tdev) {
Herbert Xuc95b8192008-10-09 11:58:54 -0700929 hlen = tdev->hard_header_len + tdev->needed_headroom;
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800930 mtu = tdev->mtu;
931 }
932 dev->iflink = tunnel->parms.link;
933
934 /* Precalculate GRE options length */
935 if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
936 if (tunnel->parms.o_flags&GRE_CSUM)
937 addend += 4;
938 if (tunnel->parms.o_flags&GRE_KEY)
939 addend += 4;
940 if (tunnel->parms.o_flags&GRE_SEQ)
941 addend += 4;
942 }
Herbert Xuc95b8192008-10-09 11:58:54 -0700943 dev->needed_headroom = addend + hlen;
Tom Goff8cdb0452009-08-14 16:33:56 -0700944 mtu -= dev->hard_header_len + addend;
Herbert Xu42aa9162008-10-09 11:59:32 -0700945
946 if (mtu < 68)
947 mtu = 68;
948
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800949 tunnel->hlen = addend;
950
Herbert Xu42aa9162008-10-09 11:59:32 -0700951 return mtu;
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800952}
953
Linus Torvalds1da177e2005-04-16 15:20:36 -0700954static int
955ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
956{
957 int err = 0;
958 struct ip_tunnel_parm p;
959 struct ip_tunnel *t;
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700960 struct net *net = dev_net(dev);
961 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700962
963 switch (cmd) {
964 case SIOCGETTUNNEL:
965 t = NULL;
Pavel Emelyanov7daa0002008-04-16 01:10:05 -0700966 if (dev == ign->fb_tunnel_dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700967 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
968 err = -EFAULT;
969 break;
970 }
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700971 t = ipgre_tunnel_locate(net, &p, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700972 }
973 if (t == NULL)
Patrick McHardy2941a482006-01-08 22:05:26 -0800974 t = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700975 memcpy(&p, &t->parms, sizeof(p));
976 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
977 err = -EFAULT;
978 break;
979
980 case SIOCADDTUNNEL:
981 case SIOCCHGTUNNEL:
982 err = -EPERM;
983 if (!capable(CAP_NET_ADMIN))
984 goto done;
985
986 err = -EFAULT;
987 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
988 goto done;
989
990 err = -EINVAL;
991 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
992 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
993 ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
994 goto done;
995 if (p.iph.ttl)
996 p.iph.frag_off |= htons(IP_DF);
997
998 if (!(p.i_flags&GRE_KEY))
999 p.i_key = 0;
1000 if (!(p.o_flags&GRE_KEY))
1001 p.o_key = 0;
1002
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -07001003 t = ipgre_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001004
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001005 if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001006 if (t != NULL) {
1007 if (t->dev != dev) {
1008 err = -EEXIST;
1009 break;
1010 }
1011 } else {
Jianjun Kong6ed25332008-11-03 00:25:16 -08001012 unsigned nflags = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001013
Patrick McHardy2941a482006-01-08 22:05:26 -08001014 t = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001015
Joe Perchesf97c1e02007-12-16 13:45:43 -08001016 if (ipv4_is_multicast(p.iph.daddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001017 nflags = IFF_BROADCAST;
1018 else if (p.iph.daddr)
1019 nflags = IFF_POINTOPOINT;
1020
1021 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
1022 err = -EINVAL;
1023 break;
1024 }
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -07001025 ipgre_tunnel_unlink(ign, t);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001026 t->parms.iph.saddr = p.iph.saddr;
1027 t->parms.iph.daddr = p.iph.daddr;
1028 t->parms.i_key = p.i_key;
1029 t->parms.o_key = p.o_key;
1030 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1031 memcpy(dev->broadcast, &p.iph.daddr, 4);
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -07001032 ipgre_tunnel_link(ign, t);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001033 netdev_state_change(dev);
1034 }
1035 }
1036
1037 if (t) {
1038 err = 0;
1039 if (cmd == SIOCCHGTUNNEL) {
1040 t->parms.iph.ttl = p.iph.ttl;
1041 t->parms.iph.tos = p.iph.tos;
1042 t->parms.iph.frag_off = p.iph.frag_off;
Michal Schmidtee34c1e2007-12-13 09:46:32 -08001043 if (t->parms.link != p.link) {
1044 t->parms.link = p.link;
Herbert Xu42aa9162008-10-09 11:59:32 -07001045 dev->mtu = ipgre_tunnel_bind_dev(dev);
Michal Schmidtee34c1e2007-12-13 09:46:32 -08001046 netdev_state_change(dev);
1047 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001048 }
1049 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
1050 err = -EFAULT;
1051 } else
1052 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
1053 break;
1054
1055 case SIOCDELTUNNEL:
1056 err = -EPERM;
1057 if (!capable(CAP_NET_ADMIN))
1058 goto done;
1059
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001060 if (dev == ign->fb_tunnel_dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001061 err = -EFAULT;
1062 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1063 goto done;
1064 err = -ENOENT;
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -07001065 if ((t = ipgre_tunnel_locate(net, &p, 0)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001066 goto done;
1067 err = -EPERM;
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001068 if (t == netdev_priv(ign->fb_tunnel_dev))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001069 goto done;
1070 dev = t->dev;
1071 }
Stephen Hemminger22f8cde2007-02-07 00:09:58 -08001072 unregister_netdevice(dev);
1073 err = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001074 break;
1075
1076 default:
1077 err = -EINVAL;
1078 }
1079
1080done:
1081 return err;
1082}
1083
Linus Torvalds1da177e2005-04-16 15:20:36 -07001084static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1085{
Patrick McHardy2941a482006-01-08 22:05:26 -08001086 struct ip_tunnel *tunnel = netdev_priv(dev);
Herbert Xuc95b8192008-10-09 11:58:54 -07001087 if (new_mtu < 68 ||
1088 new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001089 return -EINVAL;
1090 dev->mtu = new_mtu;
1091 return 0;
1092}
1093
Linus Torvalds1da177e2005-04-16 15:20:36 -07001094/* Nice toy. Unfortunately, useless in real life :-)
1095 It allows to construct virtual multiprotocol broadcast "LAN"
1096 over the Internet, provided multicast routing is tuned.
1097
1098
1099 I have no idea was this bicycle invented before me,
1100 so that I had to set ARPHRD_IPGRE to a random value.
1101 I have an impression, that Cisco could make something similar,
1102 but this feature is apparently missing in IOS<=11.2(8).
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001103
Linus Torvalds1da177e2005-04-16 15:20:36 -07001104 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
1105 with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
1106
1107 ping -t 255 224.66.66.66
1108
1109 If nobody answers, mbone does not work.
1110
1111 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
1112 ip addr add 10.66.66.<somewhat>/24 dev Universe
1113 ifconfig Universe up
1114 ifconfig Universe add fe80::<Your_real_addr>/10
1115 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
1116 ftp 10.66.66.66
1117 ...
1118 ftp fec0:6666:6666::193.233.7.65
1119 ...
1120
1121 */
1122
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -07001123static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
1124 unsigned short type,
1125 const void *daddr, const void *saddr, unsigned len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001126{
Patrick McHardy2941a482006-01-08 22:05:26 -08001127 struct ip_tunnel *t = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001128 struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
Al Virod5a0a1e2006-11-08 00:23:14 -08001129 __be16 *p = (__be16*)(iph+1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001130
1131 memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
1132 p[0] = t->parms.o_flags;
1133 p[1] = htons(type);
1134
1135 /*
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001136 * Set the source hardware address.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001137 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001138
Linus Torvalds1da177e2005-04-16 15:20:36 -07001139 if (saddr)
1140 memcpy(&iph->saddr, saddr, 4);
1141
1142 if (daddr) {
1143 memcpy(&iph->daddr, daddr, 4);
1144 return t->hlen;
1145 }
Joe Perchesf97c1e02007-12-16 13:45:43 -08001146 if (iph->daddr && !ipv4_is_multicast(iph->daddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001147 return t->hlen;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001148
Linus Torvalds1da177e2005-04-16 15:20:36 -07001149 return -t->hlen;
1150}
1151
Timo Teras6a5f44d2007-10-23 20:31:53 -07001152static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
1153{
Jianjun Kong6ed25332008-11-03 00:25:16 -08001154 struct iphdr *iph = (struct iphdr *) skb_mac_header(skb);
Timo Teras6a5f44d2007-10-23 20:31:53 -07001155 memcpy(haddr, &iph->saddr, 4);
1156 return 4;
1157}
1158
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -07001159static const struct header_ops ipgre_header_ops = {
1160 .create = ipgre_header,
Timo Teras6a5f44d2007-10-23 20:31:53 -07001161 .parse = ipgre_header_parse,
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -07001162};
1163
Timo Teras6a5f44d2007-10-23 20:31:53 -07001164#ifdef CONFIG_NET_IPGRE_BROADCAST
Linus Torvalds1da177e2005-04-16 15:20:36 -07001165static int ipgre_open(struct net_device *dev)
1166{
Patrick McHardy2941a482006-01-08 22:05:26 -08001167 struct ip_tunnel *t = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001168
Joe Perchesf97c1e02007-12-16 13:45:43 -08001169 if (ipv4_is_multicast(t->parms.iph.daddr)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001170 struct flowi fl = { .oif = t->parms.link,
1171 .nl_u = { .ip4_u =
1172 { .daddr = t->parms.iph.daddr,
1173 .saddr = t->parms.iph.saddr,
1174 .tos = RT_TOS(t->parms.iph.tos) } },
1175 .proto = IPPROTO_GRE };
1176 struct rtable *rt;
Pavel Emelyanov96635522008-04-16 01:10:44 -07001177 if (ip_route_output_key(dev_net(dev), &rt, &fl))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001178 return -EADDRNOTAVAIL;
1179 dev = rt->u.dst.dev;
1180 ip_rt_put(rt);
Herbert Xue5ed6392005-10-03 14:35:55 -07001181 if (__in_dev_get_rtnl(dev) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001182 return -EADDRNOTAVAIL;
1183 t->mlink = dev->ifindex;
Herbert Xue5ed6392005-10-03 14:35:55 -07001184 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001185 }
1186 return 0;
1187}
1188
1189static int ipgre_close(struct net_device *dev)
1190{
Patrick McHardy2941a482006-01-08 22:05:26 -08001191 struct ip_tunnel *t = netdev_priv(dev);
Stephen Hemmingerb8c26a32008-11-20 20:34:29 -08001192
Joe Perchesf97c1e02007-12-16 13:45:43 -08001193 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
Denis V. Lunev7fee0ca2008-01-21 17:32:38 -08001194 struct in_device *in_dev;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001195 in_dev = inetdev_by_index(dev_net(dev), t->mlink);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001196 if (in_dev) {
1197 ip_mc_dec_group(in_dev, t->parms.iph.daddr);
1198 in_dev_put(in_dev);
1199 }
1200 }
1201 return 0;
1202}
1203
1204#endif
1205
Stephen Hemmingerb8c26a32008-11-20 20:34:29 -08001206static const struct net_device_ops ipgre_netdev_ops = {
1207 .ndo_init = ipgre_tunnel_init,
1208 .ndo_uninit = ipgre_tunnel_uninit,
1209#ifdef CONFIG_NET_IPGRE_BROADCAST
1210 .ndo_open = ipgre_open,
1211 .ndo_stop = ipgre_close,
1212#endif
1213 .ndo_start_xmit = ipgre_tunnel_xmit,
1214 .ndo_do_ioctl = ipgre_tunnel_ioctl,
1215 .ndo_change_mtu = ipgre_tunnel_change_mtu,
1216};
1217
Linus Torvalds1da177e2005-04-16 15:20:36 -07001218static void ipgre_tunnel_setup(struct net_device *dev)
1219{
Stephen Hemmingerb8c26a32008-11-20 20:34:29 -08001220 dev->netdev_ops = &ipgre_netdev_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001221 dev->destructor = free_netdev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001222
1223 dev->type = ARPHRD_IPGRE;
Herbert Xuc95b8192008-10-09 11:58:54 -07001224 dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
Kris Katterjohn46f25df2006-01-05 16:35:42 -08001225 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001226 dev->flags = IFF_NOARP;
1227 dev->iflink = 0;
1228 dev->addr_len = 4;
Pavel Emelyanov0b67ece2008-04-16 01:11:13 -07001229 dev->features |= NETIF_F_NETNS_LOCAL;
Eric Dumazet108bfa82009-05-28 22:35:10 +00001230 dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001231}
1232
1233static int ipgre_tunnel_init(struct net_device *dev)
1234{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001235 struct ip_tunnel *tunnel;
1236 struct iphdr *iph;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001237
Patrick McHardy2941a482006-01-08 22:05:26 -08001238 tunnel = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001239 iph = &tunnel->parms.iph;
1240
1241 tunnel->dev = dev;
1242 strcpy(tunnel->parms.name, dev->name);
1243
1244 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
1245 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
1246
Linus Torvalds1da177e2005-04-16 15:20:36 -07001247 if (iph->daddr) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001248#ifdef CONFIG_NET_IPGRE_BROADCAST
Joe Perchesf97c1e02007-12-16 13:45:43 -08001249 if (ipv4_is_multicast(iph->daddr)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001250 if (!iph->saddr)
1251 return -EINVAL;
1252 dev->flags = IFF_BROADCAST;
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -07001253 dev->header_ops = &ipgre_header_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001254 }
1255#endif
Michal Schmidtee34c1e2007-12-13 09:46:32 -08001256 } else
Timo Teras6a5f44d2007-10-23 20:31:53 -07001257 dev->header_ops = &ipgre_header_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001258
Linus Torvalds1da177e2005-04-16 15:20:36 -07001259 return 0;
1260}
1261
Stephen Hemmingerb8c26a32008-11-20 20:34:29 -08001262static void ipgre_fb_tunnel_init(struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001263{
Patrick McHardy2941a482006-01-08 22:05:26 -08001264 struct ip_tunnel *tunnel = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001265 struct iphdr *iph = &tunnel->parms.iph;
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -07001266 struct ipgre_net *ign = net_generic(dev_net(dev), ipgre_net_id);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001267
1268 tunnel->dev = dev;
1269 strcpy(tunnel->parms.name, dev->name);
1270
1271 iph->version = 4;
1272 iph->protocol = IPPROTO_GRE;
1273 iph->ihl = 5;
1274 tunnel->hlen = sizeof(struct iphdr) + 4;
1275
1276 dev_hold(dev);
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -07001277 ign->tunnels_wc[0] = tunnel;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001278}
1279
1280
Alexey Dobriyan32613092009-09-14 12:21:47 +00001281static const struct net_protocol ipgre_protocol = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001282 .handler = ipgre_rcv,
1283 .err_handler = ipgre_err,
Pavel Emelyanovf96c1482008-04-16 01:11:36 -07001284 .netns_ok = 1,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001285};
1286
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -07001287static void ipgre_destroy_tunnels(struct ipgre_net *ign)
1288{
1289 int prio;
1290
1291 for (prio = 0; prio < 4; prio++) {
1292 int h;
1293 for (h = 0; h < HASH_SIZE; h++) {
1294 struct ip_tunnel *t;
1295 while ((t = ign->tunnels[prio][h]) != NULL)
1296 unregister_netdevice(t->dev);
1297 }
1298 }
1299}
1300
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001301static int ipgre_init_net(struct net *net)
1302{
1303 int err;
1304 struct ipgre_net *ign;
1305
1306 err = -ENOMEM;
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -07001307 ign = kzalloc(sizeof(struct ipgre_net), GFP_KERNEL);
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001308 if (ign == NULL)
1309 goto err_alloc;
1310
1311 err = net_assign_generic(net, ipgre_net_id, ign);
1312 if (err < 0)
1313 goto err_assign;
1314
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001315 ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0",
1316 ipgre_tunnel_setup);
1317 if (!ign->fb_tunnel_dev) {
1318 err = -ENOMEM;
1319 goto err_alloc_dev;
1320 }
Alexey Dobriyanbe77e592008-11-23 17:26:26 -08001321 dev_net_set(ign->fb_tunnel_dev, net);
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001322
Stephen Hemmingerb8c26a32008-11-20 20:34:29 -08001323 ipgre_fb_tunnel_init(ign->fb_tunnel_dev);
Herbert Xuc19e6542008-10-09 11:59:55 -07001324 ign->fb_tunnel_dev->rtnl_link_ops = &ipgre_link_ops;
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001325
1326 if ((err = register_netdev(ign->fb_tunnel_dev)))
1327 goto err_reg_dev;
1328
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001329 return 0;
1330
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001331err_reg_dev:
1332 free_netdev(ign->fb_tunnel_dev);
1333err_alloc_dev:
1334 /* nothing */
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001335err_assign:
1336 kfree(ign);
1337err_alloc:
1338 return err;
1339}
1340
1341static void ipgre_exit_net(struct net *net)
1342{
1343 struct ipgre_net *ign;
1344
1345 ign = net_generic(net, ipgre_net_id);
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001346 rtnl_lock();
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -07001347 ipgre_destroy_tunnels(ign);
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001348 rtnl_unlock();
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001349 kfree(ign);
1350}
1351
1352static struct pernet_operations ipgre_net_ops = {
1353 .init = ipgre_init_net,
1354 .exit = ipgre_exit_net,
1355};
Linus Torvalds1da177e2005-04-16 15:20:36 -07001356
Herbert Xuc19e6542008-10-09 11:59:55 -07001357static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
1358{
1359 __be16 flags;
1360
1361 if (!data)
1362 return 0;
1363
1364 flags = 0;
1365 if (data[IFLA_GRE_IFLAGS])
1366 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1367 if (data[IFLA_GRE_OFLAGS])
1368 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1369 if (flags & (GRE_VERSION|GRE_ROUTING))
1370 return -EINVAL;
1371
1372 return 0;
1373}
1374
Herbert Xue1a80002008-10-09 12:00:17 -07001375static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
1376{
1377 __be32 daddr;
1378
1379 if (tb[IFLA_ADDRESS]) {
1380 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1381 return -EINVAL;
1382 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1383 return -EADDRNOTAVAIL;
1384 }
1385
1386 if (!data)
1387 goto out;
1388
1389 if (data[IFLA_GRE_REMOTE]) {
1390 memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
1391 if (!daddr)
1392 return -EINVAL;
1393 }
1394
1395out:
1396 return ipgre_tunnel_validate(tb, data);
1397}
1398
Herbert Xuc19e6542008-10-09 11:59:55 -07001399static void ipgre_netlink_parms(struct nlattr *data[],
1400 struct ip_tunnel_parm *parms)
1401{
Herbert Xu7bb82d92008-10-11 12:20:15 -07001402 memset(parms, 0, sizeof(*parms));
Herbert Xuc19e6542008-10-09 11:59:55 -07001403
1404 parms->iph.protocol = IPPROTO_GRE;
1405
1406 if (!data)
1407 return;
1408
1409 if (data[IFLA_GRE_LINK])
1410 parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1411
1412 if (data[IFLA_GRE_IFLAGS])
1413 parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]);
1414
1415 if (data[IFLA_GRE_OFLAGS])
1416 parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]);
1417
1418 if (data[IFLA_GRE_IKEY])
1419 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1420
1421 if (data[IFLA_GRE_OKEY])
1422 parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1423
1424 if (data[IFLA_GRE_LOCAL])
Patrick McHardy4d74f8b2008-10-10 12:11:06 -07001425 parms->iph.saddr = nla_get_be32(data[IFLA_GRE_LOCAL]);
Herbert Xuc19e6542008-10-09 11:59:55 -07001426
1427 if (data[IFLA_GRE_REMOTE])
Patrick McHardy4d74f8b2008-10-10 12:11:06 -07001428 parms->iph.daddr = nla_get_be32(data[IFLA_GRE_REMOTE]);
Herbert Xuc19e6542008-10-09 11:59:55 -07001429
1430 if (data[IFLA_GRE_TTL])
1431 parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
1432
1433 if (data[IFLA_GRE_TOS])
1434 parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
1435
1436 if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC]))
1437 parms->iph.frag_off = htons(IP_DF);
1438}
1439
Herbert Xue1a80002008-10-09 12:00:17 -07001440static int ipgre_tap_init(struct net_device *dev)
1441{
1442 struct ip_tunnel *tunnel;
1443
1444 tunnel = netdev_priv(dev);
1445
1446 tunnel->dev = dev;
1447 strcpy(tunnel->parms.name, dev->name);
1448
1449 ipgre_tunnel_bind_dev(dev);
1450
1451 return 0;
1452}
1453
Stephen Hemmingerb8c26a32008-11-20 20:34:29 -08001454static const struct net_device_ops ipgre_tap_netdev_ops = {
1455 .ndo_init = ipgre_tap_init,
1456 .ndo_uninit = ipgre_tunnel_uninit,
1457 .ndo_start_xmit = ipgre_tunnel_xmit,
1458 .ndo_set_mac_address = eth_mac_addr,
1459 .ndo_validate_addr = eth_validate_addr,
1460 .ndo_change_mtu = ipgre_tunnel_change_mtu,
1461};
1462
Herbert Xue1a80002008-10-09 12:00:17 -07001463static void ipgre_tap_setup(struct net_device *dev)
1464{
1465
1466 ether_setup(dev);
1467
Stephen Hemmingerb8c26a32008-11-20 20:34:29 -08001468 dev->netdev_ops = &ipgre_netdev_ops;
Herbert Xue1a80002008-10-09 12:00:17 -07001469 dev->destructor = free_netdev;
Herbert Xue1a80002008-10-09 12:00:17 -07001470
1471 dev->iflink = 0;
1472 dev->features |= NETIF_F_NETNS_LOCAL;
1473}
1474
Herbert Xuc19e6542008-10-09 11:59:55 -07001475static int ipgre_newlink(struct net_device *dev, struct nlattr *tb[],
1476 struct nlattr *data[])
1477{
1478 struct ip_tunnel *nt;
1479 struct net *net = dev_net(dev);
1480 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1481 int mtu;
1482 int err;
1483
1484 nt = netdev_priv(dev);
1485 ipgre_netlink_parms(data, &nt->parms);
1486
Herbert Xue1a80002008-10-09 12:00:17 -07001487 if (ipgre_tunnel_find(net, &nt->parms, dev->type))
Herbert Xuc19e6542008-10-09 11:59:55 -07001488 return -EEXIST;
1489
Herbert Xue1a80002008-10-09 12:00:17 -07001490 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1491 random_ether_addr(dev->dev_addr);
1492
Herbert Xuc19e6542008-10-09 11:59:55 -07001493 mtu = ipgre_tunnel_bind_dev(dev);
1494 if (!tb[IFLA_MTU])
1495 dev->mtu = mtu;
1496
1497 err = register_netdevice(dev);
1498 if (err)
1499 goto out;
1500
1501 dev_hold(dev);
1502 ipgre_tunnel_link(ign, nt);
1503
1504out:
1505 return err;
1506}
1507
1508static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1509 struct nlattr *data[])
1510{
1511 struct ip_tunnel *t, *nt;
1512 struct net *net = dev_net(dev);
1513 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1514 struct ip_tunnel_parm p;
1515 int mtu;
1516
1517 if (dev == ign->fb_tunnel_dev)
1518 return -EINVAL;
1519
1520 nt = netdev_priv(dev);
1521 ipgre_netlink_parms(data, &p);
1522
1523 t = ipgre_tunnel_locate(net, &p, 0);
1524
1525 if (t) {
1526 if (t->dev != dev)
1527 return -EEXIST;
1528 } else {
1529 unsigned nflags = 0;
1530
1531 t = nt;
1532
1533 if (ipv4_is_multicast(p.iph.daddr))
1534 nflags = IFF_BROADCAST;
1535 else if (p.iph.daddr)
1536 nflags = IFF_POINTOPOINT;
1537
1538 if ((dev->flags ^ nflags) &
1539 (IFF_POINTOPOINT | IFF_BROADCAST))
1540 return -EINVAL;
1541
1542 ipgre_tunnel_unlink(ign, t);
1543 t->parms.iph.saddr = p.iph.saddr;
1544 t->parms.iph.daddr = p.iph.daddr;
1545 t->parms.i_key = p.i_key;
1546 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1547 memcpy(dev->broadcast, &p.iph.daddr, 4);
1548 ipgre_tunnel_link(ign, t);
1549 netdev_state_change(dev);
1550 }
1551
1552 t->parms.o_key = p.o_key;
1553 t->parms.iph.ttl = p.iph.ttl;
1554 t->parms.iph.tos = p.iph.tos;
1555 t->parms.iph.frag_off = p.iph.frag_off;
1556
1557 if (t->parms.link != p.link) {
1558 t->parms.link = p.link;
1559 mtu = ipgre_tunnel_bind_dev(dev);
1560 if (!tb[IFLA_MTU])
1561 dev->mtu = mtu;
1562 netdev_state_change(dev);
1563 }
1564
1565 return 0;
1566}
1567
1568static size_t ipgre_get_size(const struct net_device *dev)
1569{
1570 return
1571 /* IFLA_GRE_LINK */
1572 nla_total_size(4) +
1573 /* IFLA_GRE_IFLAGS */
1574 nla_total_size(2) +
1575 /* IFLA_GRE_OFLAGS */
1576 nla_total_size(2) +
1577 /* IFLA_GRE_IKEY */
1578 nla_total_size(4) +
1579 /* IFLA_GRE_OKEY */
1580 nla_total_size(4) +
1581 /* IFLA_GRE_LOCAL */
1582 nla_total_size(4) +
1583 /* IFLA_GRE_REMOTE */
1584 nla_total_size(4) +
1585 /* IFLA_GRE_TTL */
1586 nla_total_size(1) +
1587 /* IFLA_GRE_TOS */
1588 nla_total_size(1) +
1589 /* IFLA_GRE_PMTUDISC */
1590 nla_total_size(1) +
1591 0;
1592}
1593
1594static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1595{
1596 struct ip_tunnel *t = netdev_priv(dev);
1597 struct ip_tunnel_parm *p = &t->parms;
1598
1599 NLA_PUT_U32(skb, IFLA_GRE_LINK, p->link);
1600 NLA_PUT_BE16(skb, IFLA_GRE_IFLAGS, p->i_flags);
1601 NLA_PUT_BE16(skb, IFLA_GRE_OFLAGS, p->o_flags);
Patrick McHardyba9e64b2008-10-10 12:10:30 -07001602 NLA_PUT_BE32(skb, IFLA_GRE_IKEY, p->i_key);
1603 NLA_PUT_BE32(skb, IFLA_GRE_OKEY, p->o_key);
Patrick McHardy4d74f8b2008-10-10 12:11:06 -07001604 NLA_PUT_BE32(skb, IFLA_GRE_LOCAL, p->iph.saddr);
1605 NLA_PUT_BE32(skb, IFLA_GRE_REMOTE, p->iph.daddr);
Herbert Xuc19e6542008-10-09 11:59:55 -07001606 NLA_PUT_U8(skb, IFLA_GRE_TTL, p->iph.ttl);
1607 NLA_PUT_U8(skb, IFLA_GRE_TOS, p->iph.tos);
1608 NLA_PUT_U8(skb, IFLA_GRE_PMTUDISC, !!(p->iph.frag_off & htons(IP_DF)));
1609
1610 return 0;
1611
1612nla_put_failure:
1613 return -EMSGSIZE;
1614}
1615
1616static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1617 [IFLA_GRE_LINK] = { .type = NLA_U32 },
1618 [IFLA_GRE_IFLAGS] = { .type = NLA_U16 },
1619 [IFLA_GRE_OFLAGS] = { .type = NLA_U16 },
1620 [IFLA_GRE_IKEY] = { .type = NLA_U32 },
1621 [IFLA_GRE_OKEY] = { .type = NLA_U32 },
Patrick McHardy4d74f8b2008-10-10 12:11:06 -07001622 [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
1623 [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
Herbert Xuc19e6542008-10-09 11:59:55 -07001624 [IFLA_GRE_TTL] = { .type = NLA_U8 },
1625 [IFLA_GRE_TOS] = { .type = NLA_U8 },
1626 [IFLA_GRE_PMTUDISC] = { .type = NLA_U8 },
1627};
1628
1629static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1630 .kind = "gre",
1631 .maxtype = IFLA_GRE_MAX,
1632 .policy = ipgre_policy,
1633 .priv_size = sizeof(struct ip_tunnel),
1634 .setup = ipgre_tunnel_setup,
1635 .validate = ipgre_tunnel_validate,
1636 .newlink = ipgre_newlink,
1637 .changelink = ipgre_changelink,
1638 .get_size = ipgre_get_size,
1639 .fill_info = ipgre_fill_info,
1640};
1641
Herbert Xue1a80002008-10-09 12:00:17 -07001642static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1643 .kind = "gretap",
1644 .maxtype = IFLA_GRE_MAX,
1645 .policy = ipgre_policy,
1646 .priv_size = sizeof(struct ip_tunnel),
1647 .setup = ipgre_tap_setup,
1648 .validate = ipgre_tap_validate,
1649 .newlink = ipgre_newlink,
1650 .changelink = ipgre_changelink,
1651 .get_size = ipgre_get_size,
1652 .fill_info = ipgre_fill_info,
1653};
1654
Linus Torvalds1da177e2005-04-16 15:20:36 -07001655/*
1656 * And now the modules code and kernel interface.
1657 */
1658
1659static int __init ipgre_init(void)
1660{
1661 int err;
1662
1663 printk(KERN_INFO "GRE over IPv4 tunneling driver\n");
1664
1665 if (inet_add_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) {
1666 printk(KERN_INFO "ipgre init: can't add protocol\n");
1667 return -EAGAIN;
1668 }
1669
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001670 err = register_pernet_gen_device(&ipgre_net_id, &ipgre_net_ops);
1671 if (err < 0)
Herbert Xuc19e6542008-10-09 11:59:55 -07001672 goto gen_device_failed;
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001673
Herbert Xuc19e6542008-10-09 11:59:55 -07001674 err = rtnl_link_register(&ipgre_link_ops);
1675 if (err < 0)
1676 goto rtnl_link_failed;
1677
Herbert Xue1a80002008-10-09 12:00:17 -07001678 err = rtnl_link_register(&ipgre_tap_ops);
1679 if (err < 0)
1680 goto tap_ops_failed;
1681
Herbert Xuc19e6542008-10-09 11:59:55 -07001682out:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001683 return err;
Herbert Xuc19e6542008-10-09 11:59:55 -07001684
Herbert Xue1a80002008-10-09 12:00:17 -07001685tap_ops_failed:
1686 rtnl_link_unregister(&ipgre_link_ops);
Herbert Xuc19e6542008-10-09 11:59:55 -07001687rtnl_link_failed:
1688 unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops);
1689gen_device_failed:
1690 inet_del_protocol(&ipgre_protocol, IPPROTO_GRE);
1691 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001692}
1693
Alexey Kuznetsovdb445752005-07-30 17:46:44 -07001694static void __exit ipgre_fini(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001695{
Herbert Xue1a80002008-10-09 12:00:17 -07001696 rtnl_link_unregister(&ipgre_tap_ops);
Herbert Xuc19e6542008-10-09 11:59:55 -07001697 rtnl_link_unregister(&ipgre_link_ops);
1698 unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001699 if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0)
1700 printk(KERN_INFO "ipgre close: can't remove protocol\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001701}
1702
1703module_init(ipgre_init);
1704module_exit(ipgre_fini);
1705MODULE_LICENSE("GPL");
Patrick McHardy4d74f8b2008-10-10 12:11:06 -07001706MODULE_ALIAS_RTNL_LINK("gre");
1707MODULE_ALIAS_RTNL_LINK("gretap");