blob: 25d2c77a7f38d1f596a11c9f51f4eaabf0938467 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002 * Linux NET3: GRE over IP protocol decoder.
Linus Torvalds1da177e2005-04-16 15:20:36 -07003 *
4 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
Randy Dunlap4fc268d2006-01-11 12:17:47 -080013#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070014#include <linux/module.h>
15#include <linux/types.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070016#include <linux/kernel.h>
17#include <asm/uaccess.h>
18#include <linux/skbuff.h>
19#include <linux/netdevice.h>
20#include <linux/in.h>
21#include <linux/tcp.h>
22#include <linux/udp.h>
23#include <linux/if_arp.h>
24#include <linux/mroute.h>
25#include <linux/init.h>
26#include <linux/in6.h>
27#include <linux/inetdevice.h>
28#include <linux/igmp.h>
29#include <linux/netfilter_ipv4.h>
Kris Katterjohn46f25df2006-01-05 16:35:42 -080030#include <linux/if_ether.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070031
32#include <net/sock.h>
33#include <net/ip.h>
34#include <net/icmp.h>
35#include <net/protocol.h>
36#include <net/ipip.h>
37#include <net/arp.h>
38#include <net/checksum.h>
39#include <net/dsfield.h>
40#include <net/inet_ecn.h>
41#include <net/xfrm.h>
Pavel Emelyanov59a4c752008-04-16 01:08:53 -070042#include <net/net_namespace.h>
43#include <net/netns/generic.h>
Herbert Xuc19e6542008-10-09 11:59:55 -070044#include <net/rtnetlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070045
46#ifdef CONFIG_IPV6
47#include <net/ipv6.h>
48#include <net/ip6_fib.h>
49#include <net/ip6_route.h>
50#endif
51
52/*
53 Problems & solutions
54 --------------------
55
56 1. The most important issue is detecting local dead loops.
57 They would cause complete host lockup in transmit, which
58 would be "resolved" by stack overflow or, if queueing is enabled,
59 with infinite looping in net_bh.
60
61 We cannot track such dead loops during route installation,
62 it is infeasible task. The most general solutions would be
63 to keep skb->encapsulation counter (sort of local ttl),
64 and silently drop packet when it expires. It is the best
65 solution, but it supposes maintaing new variable in ALL
66 skb, even if no tunneling is used.
67
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090068 Current solution: t->recursion lock breaks dead loops. It looks
Linus Torvalds1da177e2005-04-16 15:20:36 -070069 like dev->tbusy flag, but I preferred new variable, because
70 the semantics is different. One day, when hard_start_xmit
71 will be multithreaded we will have to use skb->encapsulation.
72
73
74
75 2. Networking dead loops would not kill routers, but would really
76 kill network. IP hop limit plays role of "t->recursion" in this case,
77 if we copy it from packet being encapsulated to upper header.
78 It is very good solution, but it introduces two problems:
79
80 - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
81 do not work over tunnels.
82 - traceroute does not work. I planned to relay ICMP from tunnel,
83 so that this problem would be solved and traceroute output
84 would even more informative. This idea appeared to be wrong:
85 only Linux complies to rfc1812 now (yes, guys, Linux is the only
86 true router now :-)), all routers (at least, in neighbourhood of mine)
87 return only 8 bytes of payload. It is the end.
88
89 Hence, if we want that OSPF worked or traceroute said something reasonable,
90 we should search for another solution.
91
92 One of them is to parse packet trying to detect inner encapsulation
93 made by our node. It is difficult or even impossible, especially,
94 taking into account fragmentation. TO be short, tt is not solution at all.
95
96 Current solution: The solution was UNEXPECTEDLY SIMPLE.
97 We force DF flag on tunnels with preconfigured hop limit,
98 that is ALL. :-) Well, it does not remove the problem completely,
99 but exponential growth of network traffic is changed to linear
100 (branches, that exceed pmtu are pruned) and tunnel mtu
101 fastly degrades to value <68, where looping stops.
102 Yes, it is not good if there exists a router in the loop,
103 which does not force DF, even when encapsulating packets have DF set.
104 But it is not our problem! Nobody could accuse us, we made
105 all that we could make. Even if it is your gated who injected
106 fatal route to network, even if it were you who configured
107 fatal static route: you are innocent. :-)
108
109
110
111 3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
112 practically identical code. It would be good to glue them
113 together, but it is not very evident, how to make them modular.
114 sit is integral part of IPv6, ipip and gre are naturally modular.
115 We could extract common parts (hash table, ioctl etc)
116 to a separate module (ip_tunnel.c).
117
118 Alexey Kuznetsov.
119 */
120
Herbert Xuc19e6542008-10-09 11:59:55 -0700121static struct rtnl_link_ops ipgre_link_ops __read_mostly;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700122static int ipgre_tunnel_init(struct net_device *dev);
123static void ipgre_tunnel_setup(struct net_device *dev);
Herbert Xu42aa9162008-10-09 11:59:32 -0700124static int ipgre_tunnel_bind_dev(struct net_device *dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700125
126/* Fallback tunnel: no source, no destination, no key, no options */
127
128static int ipgre_fb_tunnel_init(struct net_device *dev);
129
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700130#define HASH_SIZE 16
131
Pavel Emelyanov59a4c752008-04-16 01:08:53 -0700132static int ipgre_net_id;
133struct ipgre_net {
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700134 struct ip_tunnel *tunnels[4][HASH_SIZE];
135
Pavel Emelyanov7daa0002008-04-16 01:10:05 -0700136 struct net_device *fb_tunnel_dev;
Pavel Emelyanov59a4c752008-04-16 01:08:53 -0700137};
138
Linus Torvalds1da177e2005-04-16 15:20:36 -0700139/* Tunnel hash table */
140
141/*
142 4 hash tables:
143
144 3: (remote,local)
145 2: (remote,*)
146 1: (*,local)
147 0: (*,*)
148
149 We require exact key match i.e. if a key is present in packet
150 it will match only tunnel with the same key; if it is not present,
151 it will match only keyless tunnel.
152
153 All keysless packets, if not matched configured keyless tunnels
154 will match fallback tunnel.
155 */
156
Al Virod5a0a1e2006-11-08 00:23:14 -0800157#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700158
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700159#define tunnels_r_l tunnels[3]
160#define tunnels_r tunnels[2]
161#define tunnels_l tunnels[1]
162#define tunnels_wc tunnels[0]
Linus Torvalds1da177e2005-04-16 15:20:36 -0700163
164static DEFINE_RWLOCK(ipgre_lock);
165
166/* Given src, dst and key, find appropriate for input tunnel. */
167
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700168static struct ip_tunnel * ipgre_tunnel_lookup(struct net *net,
169 __be32 remote, __be32 local, __be32 key)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700170{
171 unsigned h0 = HASH(remote);
172 unsigned h1 = HASH(key);
173 struct ip_tunnel *t;
Pavel Emelyanov7daa0002008-04-16 01:10:05 -0700174 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700175
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700176 for (t = ign->tunnels_r_l[h0^h1]; t; t = t->next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700177 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) {
178 if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
179 return t;
180 }
181 }
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700182 for (t = ign->tunnels_r[h0^h1]; t; t = t->next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700183 if (remote == t->parms.iph.daddr) {
184 if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
185 return t;
186 }
187 }
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700188 for (t = ign->tunnels_l[h1]; t; t = t->next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700189 if (local == t->parms.iph.saddr ||
Joe Perchesf97c1e02007-12-16 13:45:43 -0800190 (local == t->parms.iph.daddr &&
191 ipv4_is_multicast(local))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700192 if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
193 return t;
194 }
195 }
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700196 for (t = ign->tunnels_wc[h1]; t; t = t->next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700197 if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
198 return t;
199 }
200
Pavel Emelyanov7daa0002008-04-16 01:10:05 -0700201 if (ign->fb_tunnel_dev->flags&IFF_UP)
202 return netdev_priv(ign->fb_tunnel_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700203 return NULL;
204}
205
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700206static struct ip_tunnel **__ipgre_bucket(struct ipgre_net *ign,
207 struct ip_tunnel_parm *parms)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700208{
YOSHIFUJI Hideaki5056a1e2007-04-24 20:44:48 +0900209 __be32 remote = parms->iph.daddr;
210 __be32 local = parms->iph.saddr;
211 __be32 key = parms->i_key;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700212 unsigned h = HASH(key);
213 int prio = 0;
214
215 if (local)
216 prio |= 1;
Joe Perchesf97c1e02007-12-16 13:45:43 -0800217 if (remote && !ipv4_is_multicast(remote)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700218 prio |= 2;
219 h ^= HASH(remote);
220 }
221
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700222 return &ign->tunnels[prio][h];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700223}
224
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700225static inline struct ip_tunnel **ipgre_bucket(struct ipgre_net *ign,
226 struct ip_tunnel *t)
YOSHIFUJI Hideaki5056a1e2007-04-24 20:44:48 +0900227{
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700228 return __ipgre_bucket(ign, &t->parms);
YOSHIFUJI Hideaki5056a1e2007-04-24 20:44:48 +0900229}
230
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700231static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700232{
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700233 struct ip_tunnel **tp = ipgre_bucket(ign, t);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700234
235 t->next = *tp;
236 write_lock_bh(&ipgre_lock);
237 *tp = t;
238 write_unlock_bh(&ipgre_lock);
239}
240
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700241static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700242{
243 struct ip_tunnel **tp;
244
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700245 for (tp = ipgre_bucket(ign, t); *tp; tp = &(*tp)->next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700246 if (t == *tp) {
247 write_lock_bh(&ipgre_lock);
248 *tp = t->next;
249 write_unlock_bh(&ipgre_lock);
250 break;
251 }
252 }
253}
254
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700255static struct ip_tunnel * ipgre_tunnel_locate(struct net *net,
256 struct ip_tunnel_parm *parms, int create)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700257{
Al Virod5a0a1e2006-11-08 00:23:14 -0800258 __be32 remote = parms->iph.daddr;
259 __be32 local = parms->iph.saddr;
260 __be32 key = parms->i_key;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700261 struct ip_tunnel *t, **tp, *nt;
262 struct net_device *dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700263 char name[IFNAMSIZ];
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700264 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700265
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700266 for (tp = __ipgre_bucket(ign, parms); (t = *tp) != NULL; tp = &t->next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700267 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) {
268 if (key == t->parms.i_key)
269 return t;
270 }
271 }
272 if (!create)
273 return NULL;
274
275 if (parms->name[0])
276 strlcpy(name, parms->name, IFNAMSIZ);
Pavel Emelyanov34cc7ba2008-02-23 20:19:20 -0800277 else
278 sprintf(name, "gre%%d");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700279
280 dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup);
281 if (!dev)
282 return NULL;
283
Pavel Emelyanov0b67ece2008-04-16 01:11:13 -0700284 dev_net_set(dev, net);
285
Pavel Emelyanovb37d428b2008-02-26 23:51:04 -0800286 if (strchr(name, '%')) {
287 if (dev_alloc_name(dev, name) < 0)
288 goto failed_free;
289 }
290
Patrick McHardy2941a482006-01-08 22:05:26 -0800291 nt = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700292 nt->parms = *parms;
Herbert Xuc19e6542008-10-09 11:59:55 -0700293 dev->rtnl_link_ops = &ipgre_link_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700294
Herbert Xu42aa9162008-10-09 11:59:32 -0700295 dev->mtu = ipgre_tunnel_bind_dev(dev);
296
Pavel Emelyanovb37d428b2008-02-26 23:51:04 -0800297 if (register_netdevice(dev) < 0)
298 goto failed_free;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700299
Linus Torvalds1da177e2005-04-16 15:20:36 -0700300 dev_hold(dev);
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700301 ipgre_tunnel_link(ign, nt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700302 return nt;
303
Pavel Emelyanovb37d428b2008-02-26 23:51:04 -0800304failed_free:
305 free_netdev(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700306 return NULL;
307}
308
309static void ipgre_tunnel_uninit(struct net_device *dev)
310{
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700311 struct net *net = dev_net(dev);
312 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
313
314 ipgre_tunnel_unlink(ign, netdev_priv(dev));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700315 dev_put(dev);
316}
317
318
319static void ipgre_err(struct sk_buff *skb, u32 info)
320{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700321
Rami Rosen071f92d2008-05-21 17:47:54 -0700322/* All the routers (except for Linux) return only
Linus Torvalds1da177e2005-04-16 15:20:36 -0700323 8 bytes of packet payload. It means, that precise relaying of
324 ICMP in the real Internet is absolutely infeasible.
325
326 Moreover, Cisco "wise men" put GRE key to the third word
327 in GRE header. It makes impossible maintaining even soft state for keyed
328 GRE tunnels with enabled checksum. Tell them "thank you".
329
330 Well, I wonder, rfc1812 was written by Cisco employee,
331 what the hell these idiots break standrads established
332 by themself???
333 */
334
335 struct iphdr *iph = (struct iphdr*)skb->data;
Al Virod5a0a1e2006-11-08 00:23:14 -0800336 __be16 *p = (__be16*)(skb->data+(iph->ihl<<2));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700337 int grehlen = (iph->ihl<<2) + 4;
Arnaldo Carvalho de Melo88c76642007-03-13 14:43:18 -0300338 const int type = icmp_hdr(skb)->type;
339 const int code = icmp_hdr(skb)->code;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700340 struct ip_tunnel *t;
Al Virod5a0a1e2006-11-08 00:23:14 -0800341 __be16 flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700342
343 flags = p[0];
344 if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
345 if (flags&(GRE_VERSION|GRE_ROUTING))
346 return;
347 if (flags&GRE_KEY) {
348 grehlen += 4;
349 if (flags&GRE_CSUM)
350 grehlen += 4;
351 }
352 }
353
354 /* If only 8 bytes returned, keyed message will be dropped here */
355 if (skb_headlen(skb) < grehlen)
356 return;
357
358 switch (type) {
359 default:
360 case ICMP_PARAMETERPROB:
361 return;
362
363 case ICMP_DEST_UNREACH:
364 switch (code) {
365 case ICMP_SR_FAILED:
366 case ICMP_PORT_UNREACH:
367 /* Impossible event. */
368 return;
369 case ICMP_FRAG_NEEDED:
370 /* Soft state for pmtu is maintained by IP core. */
371 return;
372 default:
373 /* All others are translated to HOST_UNREACH.
374 rfc2003 contains "deep thoughts" about NET_UNREACH,
375 I believe they are just ether pollution. --ANK
376 */
377 break;
378 }
379 break;
380 case ICMP_TIME_EXCEEDED:
381 if (code != ICMP_EXC_TTL)
382 return;
383 break;
384 }
385
386 read_lock(&ipgre_lock);
Pavel Emelyanov3b4667f2008-04-16 01:09:44 -0700387 t = ipgre_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr,
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700388 (flags&GRE_KEY) ?
389 *(((__be32*)p) + (grehlen>>2) - 1) : 0);
Joe Perchesf97c1e02007-12-16 13:45:43 -0800390 if (t == NULL || t->parms.iph.daddr == 0 ||
391 ipv4_is_multicast(t->parms.iph.daddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700392 goto out;
393
394 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
395 goto out;
396
397 if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
398 t->err_count++;
399 else
400 t->err_count = 1;
401 t->err_time = jiffies;
402out:
403 read_unlock(&ipgre_lock);
404 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700405}
406
407static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
408{
409 if (INET_ECN_is_ce(iph->tos)) {
410 if (skb->protocol == htons(ETH_P_IP)) {
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700411 IP_ECN_set_ce(ip_hdr(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700412 } else if (skb->protocol == htons(ETH_P_IPV6)) {
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -0700413 IP6_ECN_set_ce(ipv6_hdr(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700414 }
415 }
416}
417
418static inline u8
419ipgre_ecn_encapsulate(u8 tos, struct iphdr *old_iph, struct sk_buff *skb)
420{
421 u8 inner = 0;
422 if (skb->protocol == htons(ETH_P_IP))
423 inner = old_iph->tos;
424 else if (skb->protocol == htons(ETH_P_IPV6))
425 inner = ipv6_get_dsfield((struct ipv6hdr *)old_iph);
426 return INET_ECN_encapsulate(tos, inner);
427}
428
429static int ipgre_rcv(struct sk_buff *skb)
430{
431 struct iphdr *iph;
432 u8 *h;
Al Virod5a0a1e2006-11-08 00:23:14 -0800433 __be16 flags;
Al Virod3bc23e2006-11-14 21:24:49 -0800434 __sum16 csum = 0;
Al Virod5a0a1e2006-11-08 00:23:14 -0800435 __be32 key = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700436 u32 seqno = 0;
437 struct ip_tunnel *tunnel;
438 int offset = 4;
439
440 if (!pskb_may_pull(skb, 16))
441 goto drop_nolock;
442
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700443 iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700444 h = skb->data;
Al Virod5a0a1e2006-11-08 00:23:14 -0800445 flags = *(__be16*)h;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700446
447 if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
448 /* - Version must be 0.
449 - We do not support routing headers.
450 */
451 if (flags&(GRE_VERSION|GRE_ROUTING))
452 goto drop_nolock;
453
454 if (flags&GRE_CSUM) {
Herbert Xufb286bb2005-11-10 13:01:24 -0800455 switch (skb->ip_summed) {
Patrick McHardy84fa7932006-08-29 16:44:56 -0700456 case CHECKSUM_COMPLETE:
Al Virod3bc23e2006-11-14 21:24:49 -0800457 csum = csum_fold(skb->csum);
Herbert Xufb286bb2005-11-10 13:01:24 -0800458 if (!csum)
459 break;
460 /* fall through */
461 case CHECKSUM_NONE:
462 skb->csum = 0;
463 csum = __skb_checksum_complete(skb);
Patrick McHardy84fa7932006-08-29 16:44:56 -0700464 skb->ip_summed = CHECKSUM_COMPLETE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700465 }
466 offset += 4;
467 }
468 if (flags&GRE_KEY) {
Al Virod5a0a1e2006-11-08 00:23:14 -0800469 key = *(__be32*)(h + offset);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700470 offset += 4;
471 }
472 if (flags&GRE_SEQ) {
Al Virod5a0a1e2006-11-08 00:23:14 -0800473 seqno = ntohl(*(__be32*)(h + offset));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700474 offset += 4;
475 }
476 }
477
478 read_lock(&ipgre_lock);
Pavel Emelyanov3b4667f2008-04-16 01:09:44 -0700479 if ((tunnel = ipgre_tunnel_lookup(dev_net(skb->dev),
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700480 iph->saddr, iph->daddr, key)) != NULL) {
Pavel Emelyanovaddd68e2008-05-21 14:14:22 -0700481 struct net_device_stats *stats = &tunnel->dev->stats;
482
Linus Torvalds1da177e2005-04-16 15:20:36 -0700483 secpath_reset(skb);
484
Al Virod5a0a1e2006-11-08 00:23:14 -0800485 skb->protocol = *(__be16*)(h + 2);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700486 /* WCCP version 1 and 2 protocol decoding.
487 * - Change protocol to IP
488 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
489 */
490 if (flags == 0 &&
YOSHIFUJI Hideaki496c98d2006-10-10 19:41:21 -0700491 skb->protocol == htons(ETH_P_WCCP)) {
492 skb->protocol = htons(ETH_P_IP);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900493 if ((*(h + offset) & 0xF0) != 0x40)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700494 offset += 4;
495 }
496
Timo Teras1d069162007-12-20 00:10:33 -0800497 skb->mac_header = skb->network_header;
Arnaldo Carvalho de Melo4209fb62007-03-10 18:42:03 -0300498 __pskb_pull(skb, offset);
499 skb_reset_network_header(skb);
Arnaldo Carvalho de Melo9c702202007-04-25 18:04:18 -0700500 skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700501 skb->pkt_type = PACKET_HOST;
502#ifdef CONFIG_NET_IPGRE_BROADCAST
Joe Perchesf97c1e02007-12-16 13:45:43 -0800503 if (ipv4_is_multicast(iph->daddr)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700504 /* Looped back packet, drop it! */
Eric Dumazetee6b9672008-03-05 18:30:47 -0800505 if (skb->rtable->fl.iif == 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700506 goto drop;
Pavel Emelyanovaddd68e2008-05-21 14:14:22 -0700507 stats->multicast++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700508 skb->pkt_type = PACKET_BROADCAST;
509 }
510#endif
511
512 if (((flags&GRE_CSUM) && csum) ||
513 (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
Pavel Emelyanovaddd68e2008-05-21 14:14:22 -0700514 stats->rx_crc_errors++;
515 stats->rx_errors++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700516 goto drop;
517 }
518 if (tunnel->parms.i_flags&GRE_SEQ) {
519 if (!(flags&GRE_SEQ) ||
520 (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
Pavel Emelyanovaddd68e2008-05-21 14:14:22 -0700521 stats->rx_fifo_errors++;
522 stats->rx_errors++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700523 goto drop;
524 }
525 tunnel->i_seqno = seqno + 1;
526 }
Pavel Emelyanovaddd68e2008-05-21 14:14:22 -0700527 stats->rx_packets++;
528 stats->rx_bytes += skb->len;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700529 skb->dev = tunnel->dev;
530 dst_release(skb->dst);
531 skb->dst = NULL;
532 nf_reset(skb);
533 ipgre_ecn_decapsulate(iph, skb);
534 netif_rx(skb);
535 read_unlock(&ipgre_lock);
536 return(0);
537 }
Herbert Xu45af08b2006-04-05 22:31:19 -0700538 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700539
540drop:
541 read_unlock(&ipgre_lock);
542drop_nolock:
543 kfree_skb(skb);
544 return(0);
545}
546
547static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
548{
Patrick McHardy2941a482006-01-08 22:05:26 -0800549 struct ip_tunnel *tunnel = netdev_priv(dev);
Pavel Emelyanovaddd68e2008-05-21 14:14:22 -0700550 struct net_device_stats *stats = &tunnel->dev->stats;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700551 struct iphdr *old_iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700552 struct iphdr *tiph;
553 u8 tos;
Al Virod5a0a1e2006-11-08 00:23:14 -0800554 __be16 df;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700555 struct rtable *rt; /* Route to the other host */
556 struct net_device *tdev; /* Device to other host */
557 struct iphdr *iph; /* Our new IP header */
Chuck Leverc2636b42007-10-23 21:07:32 -0700558 unsigned int max_headroom; /* The extra header space needed */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700559 int gre_hlen;
Al Virod5a0a1e2006-11-08 00:23:14 -0800560 __be32 dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700561 int mtu;
562
563 if (tunnel->recursion++) {
Pavel Emelyanovaddd68e2008-05-21 14:14:22 -0700564 stats->collisions++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700565 goto tx_error;
566 }
567
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -0700568 if (dev->header_ops) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700569 gre_hlen = 0;
570 tiph = (struct iphdr*)skb->data;
571 } else {
572 gre_hlen = tunnel->hlen;
573 tiph = &tunnel->parms.iph;
574 }
575
576 if ((dst = tiph->daddr) == 0) {
577 /* NBMA tunnel */
578
579 if (skb->dst == NULL) {
Pavel Emelyanovaddd68e2008-05-21 14:14:22 -0700580 stats->tx_fifo_errors++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700581 goto tx_error;
582 }
583
584 if (skb->protocol == htons(ETH_P_IP)) {
Eric Dumazetee6b9672008-03-05 18:30:47 -0800585 rt = skb->rtable;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700586 if ((dst = rt->rt_gateway) == 0)
587 goto tx_error_icmp;
588 }
589#ifdef CONFIG_IPV6
590 else if (skb->protocol == htons(ETH_P_IPV6)) {
591 struct in6_addr *addr6;
592 int addr_type;
593 struct neighbour *neigh = skb->dst->neighbour;
594
595 if (neigh == NULL)
596 goto tx_error;
597
598 addr6 = (struct in6_addr*)&neigh->primary_key;
599 addr_type = ipv6_addr_type(addr6);
600
601 if (addr_type == IPV6_ADDR_ANY) {
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -0700602 addr6 = &ipv6_hdr(skb)->daddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700603 addr_type = ipv6_addr_type(addr6);
604 }
605
606 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
607 goto tx_error_icmp;
608
609 dst = addr6->s6_addr32[3];
610 }
611#endif
612 else
613 goto tx_error;
614 }
615
616 tos = tiph->tos;
617 if (tos&1) {
618 if (skb->protocol == htons(ETH_P_IP))
619 tos = old_iph->tos;
620 tos &= ~1;
621 }
622
623 {
624 struct flowi fl = { .oif = tunnel->parms.link,
625 .nl_u = { .ip4_u =
626 { .daddr = dst,
627 .saddr = tiph->saddr,
628 .tos = RT_TOS(tos) } },
629 .proto = IPPROTO_GRE };
Pavel Emelyanov96635522008-04-16 01:10:44 -0700630 if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
Pavel Emelyanovaddd68e2008-05-21 14:14:22 -0700631 stats->tx_carrier_errors++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700632 goto tx_error;
633 }
634 }
635 tdev = rt->u.dst.dev;
636
637 if (tdev == dev) {
638 ip_rt_put(rt);
Pavel Emelyanovaddd68e2008-05-21 14:14:22 -0700639 stats->collisions++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700640 goto tx_error;
641 }
642
643 df = tiph->frag_off;
644 if (df)
Herbert Xuc95b8192008-10-09 11:58:54 -0700645 mtu = dst_mtu(&rt->u.dst) - dev->hard_header_len - tunnel->hlen;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700646 else
647 mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
648
649 if (skb->dst)
650 skb->dst->ops->update_pmtu(skb->dst, mtu);
651
652 if (skb->protocol == htons(ETH_P_IP)) {
653 df |= (old_iph->frag_off&htons(IP_DF));
654
655 if ((old_iph->frag_off&htons(IP_DF)) &&
656 mtu < ntohs(old_iph->tot_len)) {
657 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
658 ip_rt_put(rt);
659 goto tx_error;
660 }
661 }
662#ifdef CONFIG_IPV6
663 else if (skb->protocol == htons(ETH_P_IPV6)) {
664 struct rt6_info *rt6 = (struct rt6_info*)skb->dst;
665
666 if (rt6 && mtu < dst_mtu(skb->dst) && mtu >= IPV6_MIN_MTU) {
Joe Perchesf97c1e02007-12-16 13:45:43 -0800667 if ((tunnel->parms.iph.daddr &&
668 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
Linus Torvalds1da177e2005-04-16 15:20:36 -0700669 rt6->rt6i_dst.plen == 128) {
670 rt6->rt6i_flags |= RTF_MODIFIED;
671 skb->dst->metrics[RTAX_MTU-1] = mtu;
672 }
673 }
674
675 if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) {
676 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
677 ip_rt_put(rt);
678 goto tx_error;
679 }
680 }
681#endif
682
683 if (tunnel->err_count > 0) {
684 if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
685 tunnel->err_count--;
686
687 dst_link_failure(skb);
688 } else
689 tunnel->err_count = 0;
690 }
691
692 max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen;
693
Patrick McHardycfbba492007-07-09 15:33:40 -0700694 if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
695 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700696 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
697 if (!new_skb) {
698 ip_rt_put(rt);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900699 stats->tx_dropped++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700700 dev_kfree_skb(skb);
701 tunnel->recursion--;
702 return 0;
703 }
704 if (skb->sk)
705 skb_set_owner_w(new_skb, skb->sk);
706 dev_kfree_skb(skb);
707 skb = new_skb;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700708 old_iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700709 }
710
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -0700711 skb->transport_header = skb->network_header;
Arnaldo Carvalho de Meloe2d1bca2007-04-10 20:46:21 -0700712 skb_push(skb, gre_hlen);
713 skb_reset_network_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700714 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
Patrick McHardy48d5cad2006-02-15 15:10:22 -0800715 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
716 IPSKB_REROUTED);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700717 dst_release(skb->dst);
718 skb->dst = &rt->u.dst;
719
720 /*
721 * Push down and install the IPIP header.
722 */
723
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700724 iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700725 iph->version = 4;
726 iph->ihl = sizeof(struct iphdr) >> 2;
727 iph->frag_off = df;
728 iph->protocol = IPPROTO_GRE;
729 iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb);
730 iph->daddr = rt->rt_dst;
731 iph->saddr = rt->rt_src;
732
733 if ((iph->ttl = tiph->ttl) == 0) {
734 if (skb->protocol == htons(ETH_P_IP))
735 iph->ttl = old_iph->ttl;
736#ifdef CONFIG_IPV6
737 else if (skb->protocol == htons(ETH_P_IPV6))
738 iph->ttl = ((struct ipv6hdr*)old_iph)->hop_limit;
739#endif
740 else
741 iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT);
742 }
743
Al Virod5a0a1e2006-11-08 00:23:14 -0800744 ((__be16*)(iph+1))[0] = tunnel->parms.o_flags;
745 ((__be16*)(iph+1))[1] = skb->protocol;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700746
747 if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
Al Virod5a0a1e2006-11-08 00:23:14 -0800748 __be32 *ptr = (__be32*)(((u8*)iph) + tunnel->hlen - 4);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700749
750 if (tunnel->parms.o_flags&GRE_SEQ) {
751 ++tunnel->o_seqno;
752 *ptr = htonl(tunnel->o_seqno);
753 ptr--;
754 }
755 if (tunnel->parms.o_flags&GRE_KEY) {
756 *ptr = tunnel->parms.o_key;
757 ptr--;
758 }
759 if (tunnel->parms.o_flags&GRE_CSUM) {
760 *ptr = 0;
Al Viro5f92a732006-11-14 21:36:54 -0800761 *(__sum16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700762 }
763 }
764
765 nf_reset(skb);
766
767 IPTUNNEL_XMIT();
768 tunnel->recursion--;
769 return 0;
770
771tx_error_icmp:
772 dst_link_failure(skb);
773
774tx_error:
775 stats->tx_errors++;
776 dev_kfree_skb(skb);
777 tunnel->recursion--;
778 return 0;
779}
780
Herbert Xu42aa9162008-10-09 11:59:32 -0700781static int ipgre_tunnel_bind_dev(struct net_device *dev)
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800782{
783 struct net_device *tdev = NULL;
784 struct ip_tunnel *tunnel;
785 struct iphdr *iph;
786 int hlen = LL_MAX_HEADER;
787 int mtu = ETH_DATA_LEN;
788 int addend = sizeof(struct iphdr) + 4;
789
790 tunnel = netdev_priv(dev);
791 iph = &tunnel->parms.iph;
792
Herbert Xuc95b8192008-10-09 11:58:54 -0700793 /* Guess output device to choose reasonable mtu and needed_headroom */
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800794
795 if (iph->daddr) {
796 struct flowi fl = { .oif = tunnel->parms.link,
797 .nl_u = { .ip4_u =
798 { .daddr = iph->daddr,
799 .saddr = iph->saddr,
800 .tos = RT_TOS(iph->tos) } },
801 .proto = IPPROTO_GRE };
802 struct rtable *rt;
Pavel Emelyanov96635522008-04-16 01:10:44 -0700803 if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800804 tdev = rt->u.dst.dev;
805 ip_rt_put(rt);
806 }
807 dev->flags |= IFF_POINTOPOINT;
808 }
809
810 if (!tdev && tunnel->parms.link)
Pavel Emelyanov96635522008-04-16 01:10:44 -0700811 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800812
813 if (tdev) {
Herbert Xuc95b8192008-10-09 11:58:54 -0700814 hlen = tdev->hard_header_len + tdev->needed_headroom;
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800815 mtu = tdev->mtu;
816 }
817 dev->iflink = tunnel->parms.link;
818
819 /* Precalculate GRE options length */
820 if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
821 if (tunnel->parms.o_flags&GRE_CSUM)
822 addend += 4;
823 if (tunnel->parms.o_flags&GRE_KEY)
824 addend += 4;
825 if (tunnel->parms.o_flags&GRE_SEQ)
826 addend += 4;
827 }
Herbert Xuc95b8192008-10-09 11:58:54 -0700828 dev->needed_headroom = addend + hlen;
Herbert Xu42aa9162008-10-09 11:59:32 -0700829 mtu -= dev->hard_header_len - addend;
830
831 if (mtu < 68)
832 mtu = 68;
833
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800834 tunnel->hlen = addend;
835
Herbert Xu42aa9162008-10-09 11:59:32 -0700836 return mtu;
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800837}
838
Linus Torvalds1da177e2005-04-16 15:20:36 -0700839static int
840ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
841{
842 int err = 0;
843 struct ip_tunnel_parm p;
844 struct ip_tunnel *t;
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700845 struct net *net = dev_net(dev);
846 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700847
848 switch (cmd) {
849 case SIOCGETTUNNEL:
850 t = NULL;
Pavel Emelyanov7daa0002008-04-16 01:10:05 -0700851 if (dev == ign->fb_tunnel_dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700852 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
853 err = -EFAULT;
854 break;
855 }
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700856 t = ipgre_tunnel_locate(net, &p, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700857 }
858 if (t == NULL)
Patrick McHardy2941a482006-01-08 22:05:26 -0800859 t = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700860 memcpy(&p, &t->parms, sizeof(p));
861 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
862 err = -EFAULT;
863 break;
864
865 case SIOCADDTUNNEL:
866 case SIOCCHGTUNNEL:
867 err = -EPERM;
868 if (!capable(CAP_NET_ADMIN))
869 goto done;
870
871 err = -EFAULT;
872 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
873 goto done;
874
875 err = -EINVAL;
876 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
877 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
878 ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
879 goto done;
880 if (p.iph.ttl)
881 p.iph.frag_off |= htons(IP_DF);
882
883 if (!(p.i_flags&GRE_KEY))
884 p.i_key = 0;
885 if (!(p.o_flags&GRE_KEY))
886 p.o_key = 0;
887
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700888 t = ipgre_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700889
Pavel Emelyanov7daa0002008-04-16 01:10:05 -0700890 if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700891 if (t != NULL) {
892 if (t->dev != dev) {
893 err = -EEXIST;
894 break;
895 }
896 } else {
897 unsigned nflags=0;
898
Patrick McHardy2941a482006-01-08 22:05:26 -0800899 t = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700900
Joe Perchesf97c1e02007-12-16 13:45:43 -0800901 if (ipv4_is_multicast(p.iph.daddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700902 nflags = IFF_BROADCAST;
903 else if (p.iph.daddr)
904 nflags = IFF_POINTOPOINT;
905
906 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
907 err = -EINVAL;
908 break;
909 }
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700910 ipgre_tunnel_unlink(ign, t);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700911 t->parms.iph.saddr = p.iph.saddr;
912 t->parms.iph.daddr = p.iph.daddr;
913 t->parms.i_key = p.i_key;
914 t->parms.o_key = p.o_key;
915 memcpy(dev->dev_addr, &p.iph.saddr, 4);
916 memcpy(dev->broadcast, &p.iph.daddr, 4);
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700917 ipgre_tunnel_link(ign, t);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700918 netdev_state_change(dev);
919 }
920 }
921
922 if (t) {
923 err = 0;
924 if (cmd == SIOCCHGTUNNEL) {
925 t->parms.iph.ttl = p.iph.ttl;
926 t->parms.iph.tos = p.iph.tos;
927 t->parms.iph.frag_off = p.iph.frag_off;
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800928 if (t->parms.link != p.link) {
929 t->parms.link = p.link;
Herbert Xu42aa9162008-10-09 11:59:32 -0700930 dev->mtu = ipgre_tunnel_bind_dev(dev);
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800931 netdev_state_change(dev);
932 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700933 }
934 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
935 err = -EFAULT;
936 } else
937 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
938 break;
939
940 case SIOCDELTUNNEL:
941 err = -EPERM;
942 if (!capable(CAP_NET_ADMIN))
943 goto done;
944
Pavel Emelyanov7daa0002008-04-16 01:10:05 -0700945 if (dev == ign->fb_tunnel_dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700946 err = -EFAULT;
947 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
948 goto done;
949 err = -ENOENT;
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700950 if ((t = ipgre_tunnel_locate(net, &p, 0)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700951 goto done;
952 err = -EPERM;
Pavel Emelyanov7daa0002008-04-16 01:10:05 -0700953 if (t == netdev_priv(ign->fb_tunnel_dev))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700954 goto done;
955 dev = t->dev;
956 }
Stephen Hemminger22f8cde2007-02-07 00:09:58 -0800957 unregister_netdevice(dev);
958 err = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700959 break;
960
961 default:
962 err = -EINVAL;
963 }
964
965done:
966 return err;
967}
968
Linus Torvalds1da177e2005-04-16 15:20:36 -0700969static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
970{
Patrick McHardy2941a482006-01-08 22:05:26 -0800971 struct ip_tunnel *tunnel = netdev_priv(dev);
Herbert Xuc95b8192008-10-09 11:58:54 -0700972 if (new_mtu < 68 ||
973 new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700974 return -EINVAL;
975 dev->mtu = new_mtu;
976 return 0;
977}
978
Linus Torvalds1da177e2005-04-16 15:20:36 -0700979/* Nice toy. Unfortunately, useless in real life :-)
980 It allows to construct virtual multiprotocol broadcast "LAN"
981 over the Internet, provided multicast routing is tuned.
982
983
984 I have no idea was this bicycle invented before me,
985 so that I had to set ARPHRD_IPGRE to a random value.
986 I have an impression, that Cisco could make something similar,
987 but this feature is apparently missing in IOS<=11.2(8).
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900988
Linus Torvalds1da177e2005-04-16 15:20:36 -0700989 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
990 with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
991
992 ping -t 255 224.66.66.66
993
994 If nobody answers, mbone does not work.
995
996 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
997 ip addr add 10.66.66.<somewhat>/24 dev Universe
998 ifconfig Universe up
999 ifconfig Universe add fe80::<Your_real_addr>/10
1000 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
1001 ftp 10.66.66.66
1002 ...
1003 ftp fec0:6666:6666::193.233.7.65
1004 ...
1005
1006 */
1007
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -07001008static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
1009 unsigned short type,
1010 const void *daddr, const void *saddr, unsigned len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001011{
Patrick McHardy2941a482006-01-08 22:05:26 -08001012 struct ip_tunnel *t = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001013 struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
Al Virod5a0a1e2006-11-08 00:23:14 -08001014 __be16 *p = (__be16*)(iph+1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001015
1016 memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
1017 p[0] = t->parms.o_flags;
1018 p[1] = htons(type);
1019
1020 /*
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001021 * Set the source hardware address.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001022 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001023
Linus Torvalds1da177e2005-04-16 15:20:36 -07001024 if (saddr)
1025 memcpy(&iph->saddr, saddr, 4);
1026
1027 if (daddr) {
1028 memcpy(&iph->daddr, daddr, 4);
1029 return t->hlen;
1030 }
Joe Perchesf97c1e02007-12-16 13:45:43 -08001031 if (iph->daddr && !ipv4_is_multicast(iph->daddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001032 return t->hlen;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001033
Linus Torvalds1da177e2005-04-16 15:20:36 -07001034 return -t->hlen;
1035}
1036
Timo Teras6a5f44d2007-10-23 20:31:53 -07001037static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
1038{
1039 struct iphdr *iph = (struct iphdr*) skb_mac_header(skb);
1040 memcpy(haddr, &iph->saddr, 4);
1041 return 4;
1042}
1043
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -07001044static const struct header_ops ipgre_header_ops = {
1045 .create = ipgre_header,
Timo Teras6a5f44d2007-10-23 20:31:53 -07001046 .parse = ipgre_header_parse,
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -07001047};
1048
Timo Teras6a5f44d2007-10-23 20:31:53 -07001049#ifdef CONFIG_NET_IPGRE_BROADCAST
Linus Torvalds1da177e2005-04-16 15:20:36 -07001050static int ipgre_open(struct net_device *dev)
1051{
Patrick McHardy2941a482006-01-08 22:05:26 -08001052 struct ip_tunnel *t = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001053
Joe Perchesf97c1e02007-12-16 13:45:43 -08001054 if (ipv4_is_multicast(t->parms.iph.daddr)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001055 struct flowi fl = { .oif = t->parms.link,
1056 .nl_u = { .ip4_u =
1057 { .daddr = t->parms.iph.daddr,
1058 .saddr = t->parms.iph.saddr,
1059 .tos = RT_TOS(t->parms.iph.tos) } },
1060 .proto = IPPROTO_GRE };
1061 struct rtable *rt;
Pavel Emelyanov96635522008-04-16 01:10:44 -07001062 if (ip_route_output_key(dev_net(dev), &rt, &fl))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001063 return -EADDRNOTAVAIL;
1064 dev = rt->u.dst.dev;
1065 ip_rt_put(rt);
Herbert Xue5ed6392005-10-03 14:35:55 -07001066 if (__in_dev_get_rtnl(dev) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001067 return -EADDRNOTAVAIL;
1068 t->mlink = dev->ifindex;
Herbert Xue5ed6392005-10-03 14:35:55 -07001069 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001070 }
1071 return 0;
1072}
1073
1074static int ipgre_close(struct net_device *dev)
1075{
Patrick McHardy2941a482006-01-08 22:05:26 -08001076 struct ip_tunnel *t = netdev_priv(dev);
Joe Perchesf97c1e02007-12-16 13:45:43 -08001077 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
Denis V. Lunev7fee0ca2008-01-21 17:32:38 -08001078 struct in_device *in_dev;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001079 in_dev = inetdev_by_index(dev_net(dev), t->mlink);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001080 if (in_dev) {
1081 ip_mc_dec_group(in_dev, t->parms.iph.daddr);
1082 in_dev_put(in_dev);
1083 }
1084 }
1085 return 0;
1086}
1087
1088#endif
1089
1090static void ipgre_tunnel_setup(struct net_device *dev)
1091{
Herbert Xuc19e6542008-10-09 11:59:55 -07001092 dev->init = ipgre_tunnel_init;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001093 dev->uninit = ipgre_tunnel_uninit;
1094 dev->destructor = free_netdev;
1095 dev->hard_start_xmit = ipgre_tunnel_xmit;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001096 dev->do_ioctl = ipgre_tunnel_ioctl;
1097 dev->change_mtu = ipgre_tunnel_change_mtu;
1098
1099 dev->type = ARPHRD_IPGRE;
Herbert Xuc95b8192008-10-09 11:58:54 -07001100 dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
Kris Katterjohn46f25df2006-01-05 16:35:42 -08001101 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001102 dev->flags = IFF_NOARP;
1103 dev->iflink = 0;
1104 dev->addr_len = 4;
Pavel Emelyanov0b67ece2008-04-16 01:11:13 -07001105 dev->features |= NETIF_F_NETNS_LOCAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001106}
1107
1108static int ipgre_tunnel_init(struct net_device *dev)
1109{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001110 struct ip_tunnel *tunnel;
1111 struct iphdr *iph;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001112
Patrick McHardy2941a482006-01-08 22:05:26 -08001113 tunnel = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001114 iph = &tunnel->parms.iph;
1115
1116 tunnel->dev = dev;
1117 strcpy(tunnel->parms.name, dev->name);
1118
1119 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
1120 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
1121
Linus Torvalds1da177e2005-04-16 15:20:36 -07001122 if (iph->daddr) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001123#ifdef CONFIG_NET_IPGRE_BROADCAST
Joe Perchesf97c1e02007-12-16 13:45:43 -08001124 if (ipv4_is_multicast(iph->daddr)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001125 if (!iph->saddr)
1126 return -EINVAL;
1127 dev->flags = IFF_BROADCAST;
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -07001128 dev->header_ops = &ipgre_header_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001129 dev->open = ipgre_open;
1130 dev->stop = ipgre_close;
1131 }
1132#endif
Michal Schmidtee34c1e2007-12-13 09:46:32 -08001133 } else
Timo Teras6a5f44d2007-10-23 20:31:53 -07001134 dev->header_ops = &ipgre_header_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001135
Linus Torvalds1da177e2005-04-16 15:20:36 -07001136 return 0;
1137}
1138
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001139static int ipgre_fb_tunnel_init(struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001140{
Patrick McHardy2941a482006-01-08 22:05:26 -08001141 struct ip_tunnel *tunnel = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001142 struct iphdr *iph = &tunnel->parms.iph;
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -07001143 struct ipgre_net *ign = net_generic(dev_net(dev), ipgre_net_id);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001144
1145 tunnel->dev = dev;
1146 strcpy(tunnel->parms.name, dev->name);
1147
1148 iph->version = 4;
1149 iph->protocol = IPPROTO_GRE;
1150 iph->ihl = 5;
1151 tunnel->hlen = sizeof(struct iphdr) + 4;
1152
1153 dev_hold(dev);
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -07001154 ign->tunnels_wc[0] = tunnel;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001155 return 0;
1156}
1157
1158
1159static struct net_protocol ipgre_protocol = {
1160 .handler = ipgre_rcv,
1161 .err_handler = ipgre_err,
Pavel Emelyanovf96c1482008-04-16 01:11:36 -07001162 .netns_ok = 1,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001163};
1164
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -07001165static void ipgre_destroy_tunnels(struct ipgre_net *ign)
1166{
1167 int prio;
1168
1169 for (prio = 0; prio < 4; prio++) {
1170 int h;
1171 for (h = 0; h < HASH_SIZE; h++) {
1172 struct ip_tunnel *t;
1173 while ((t = ign->tunnels[prio][h]) != NULL)
1174 unregister_netdevice(t->dev);
1175 }
1176 }
1177}
1178
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001179static int ipgre_init_net(struct net *net)
1180{
1181 int err;
1182 struct ipgre_net *ign;
1183
1184 err = -ENOMEM;
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -07001185 ign = kzalloc(sizeof(struct ipgre_net), GFP_KERNEL);
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001186 if (ign == NULL)
1187 goto err_alloc;
1188
1189 err = net_assign_generic(net, ipgre_net_id, ign);
1190 if (err < 0)
1191 goto err_assign;
1192
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001193 ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0",
1194 ipgre_tunnel_setup);
1195 if (!ign->fb_tunnel_dev) {
1196 err = -ENOMEM;
1197 goto err_alloc_dev;
1198 }
1199
1200 ign->fb_tunnel_dev->init = ipgre_fb_tunnel_init;
1201 dev_net_set(ign->fb_tunnel_dev, net);
Herbert Xuc19e6542008-10-09 11:59:55 -07001202 ign->fb_tunnel_dev->rtnl_link_ops = &ipgre_link_ops;
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001203
1204 if ((err = register_netdev(ign->fb_tunnel_dev)))
1205 goto err_reg_dev;
1206
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001207 return 0;
1208
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001209err_reg_dev:
1210 free_netdev(ign->fb_tunnel_dev);
1211err_alloc_dev:
1212 /* nothing */
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001213err_assign:
1214 kfree(ign);
1215err_alloc:
1216 return err;
1217}
1218
1219static void ipgre_exit_net(struct net *net)
1220{
1221 struct ipgre_net *ign;
1222
1223 ign = net_generic(net, ipgre_net_id);
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001224 rtnl_lock();
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -07001225 ipgre_destroy_tunnels(ign);
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001226 rtnl_unlock();
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001227 kfree(ign);
1228}
1229
1230static struct pernet_operations ipgre_net_ops = {
1231 .init = ipgre_init_net,
1232 .exit = ipgre_exit_net,
1233};
Linus Torvalds1da177e2005-04-16 15:20:36 -07001234
Herbert Xuc19e6542008-10-09 11:59:55 -07001235static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
1236{
1237 __be16 flags;
1238
1239 if (!data)
1240 return 0;
1241
1242 flags = 0;
1243 if (data[IFLA_GRE_IFLAGS])
1244 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1245 if (data[IFLA_GRE_OFLAGS])
1246 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1247 if (flags & (GRE_VERSION|GRE_ROUTING))
1248 return -EINVAL;
1249
1250 return 0;
1251}
1252
1253static void ipgre_netlink_parms(struct nlattr *data[],
1254 struct ip_tunnel_parm *parms)
1255{
1256 memset(parms, 0, sizeof(parms));
1257
1258 parms->iph.protocol = IPPROTO_GRE;
1259
1260 if (!data)
1261 return;
1262
1263 if (data[IFLA_GRE_LINK])
1264 parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1265
1266 if (data[IFLA_GRE_IFLAGS])
1267 parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]);
1268
1269 if (data[IFLA_GRE_OFLAGS])
1270 parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]);
1271
1272 if (data[IFLA_GRE_IKEY])
1273 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1274
1275 if (data[IFLA_GRE_OKEY])
1276 parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1277
1278 if (data[IFLA_GRE_LOCAL])
1279 memcpy(&parms->iph.saddr, nla_data(data[IFLA_GRE_LOCAL]), 4);
1280
1281 if (data[IFLA_GRE_REMOTE])
1282 memcpy(&parms->iph.daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
1283
1284 if (data[IFLA_GRE_TTL])
1285 parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
1286
1287 if (data[IFLA_GRE_TOS])
1288 parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
1289
1290 if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC]))
1291 parms->iph.frag_off = htons(IP_DF);
1292}
1293
1294static int ipgre_newlink(struct net_device *dev, struct nlattr *tb[],
1295 struct nlattr *data[])
1296{
1297 struct ip_tunnel *nt;
1298 struct net *net = dev_net(dev);
1299 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1300 int mtu;
1301 int err;
1302
1303 nt = netdev_priv(dev);
1304 ipgre_netlink_parms(data, &nt->parms);
1305
1306 if (ipgre_tunnel_locate(net, &nt->parms, 0))
1307 return -EEXIST;
1308
1309 mtu = ipgre_tunnel_bind_dev(dev);
1310 if (!tb[IFLA_MTU])
1311 dev->mtu = mtu;
1312
1313 err = register_netdevice(dev);
1314 if (err)
1315 goto out;
1316
1317 dev_hold(dev);
1318 ipgre_tunnel_link(ign, nt);
1319
1320out:
1321 return err;
1322}
1323
1324static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1325 struct nlattr *data[])
1326{
1327 struct ip_tunnel *t, *nt;
1328 struct net *net = dev_net(dev);
1329 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1330 struct ip_tunnel_parm p;
1331 int mtu;
1332
1333 if (dev == ign->fb_tunnel_dev)
1334 return -EINVAL;
1335
1336 nt = netdev_priv(dev);
1337 ipgre_netlink_parms(data, &p);
1338
1339 t = ipgre_tunnel_locate(net, &p, 0);
1340
1341 if (t) {
1342 if (t->dev != dev)
1343 return -EEXIST;
1344 } else {
1345 unsigned nflags = 0;
1346
1347 t = nt;
1348
1349 if (ipv4_is_multicast(p.iph.daddr))
1350 nflags = IFF_BROADCAST;
1351 else if (p.iph.daddr)
1352 nflags = IFF_POINTOPOINT;
1353
1354 if ((dev->flags ^ nflags) &
1355 (IFF_POINTOPOINT | IFF_BROADCAST))
1356 return -EINVAL;
1357
1358 ipgre_tunnel_unlink(ign, t);
1359 t->parms.iph.saddr = p.iph.saddr;
1360 t->parms.iph.daddr = p.iph.daddr;
1361 t->parms.i_key = p.i_key;
1362 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1363 memcpy(dev->broadcast, &p.iph.daddr, 4);
1364 ipgre_tunnel_link(ign, t);
1365 netdev_state_change(dev);
1366 }
1367
1368 t->parms.o_key = p.o_key;
1369 t->parms.iph.ttl = p.iph.ttl;
1370 t->parms.iph.tos = p.iph.tos;
1371 t->parms.iph.frag_off = p.iph.frag_off;
1372
1373 if (t->parms.link != p.link) {
1374 t->parms.link = p.link;
1375 mtu = ipgre_tunnel_bind_dev(dev);
1376 if (!tb[IFLA_MTU])
1377 dev->mtu = mtu;
1378 netdev_state_change(dev);
1379 }
1380
1381 return 0;
1382}
1383
1384static size_t ipgre_get_size(const struct net_device *dev)
1385{
1386 return
1387 /* IFLA_GRE_LINK */
1388 nla_total_size(4) +
1389 /* IFLA_GRE_IFLAGS */
1390 nla_total_size(2) +
1391 /* IFLA_GRE_OFLAGS */
1392 nla_total_size(2) +
1393 /* IFLA_GRE_IKEY */
1394 nla_total_size(4) +
1395 /* IFLA_GRE_OKEY */
1396 nla_total_size(4) +
1397 /* IFLA_GRE_LOCAL */
1398 nla_total_size(4) +
1399 /* IFLA_GRE_REMOTE */
1400 nla_total_size(4) +
1401 /* IFLA_GRE_TTL */
1402 nla_total_size(1) +
1403 /* IFLA_GRE_TOS */
1404 nla_total_size(1) +
1405 /* IFLA_GRE_PMTUDISC */
1406 nla_total_size(1) +
1407 0;
1408}
1409
1410static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1411{
1412 struct ip_tunnel *t = netdev_priv(dev);
1413 struct ip_tunnel_parm *p = &t->parms;
1414
1415 NLA_PUT_U32(skb, IFLA_GRE_LINK, p->link);
1416 NLA_PUT_BE16(skb, IFLA_GRE_IFLAGS, p->i_flags);
1417 NLA_PUT_BE16(skb, IFLA_GRE_OFLAGS, p->o_flags);
1418 NLA_PUT_BE32(skb, IFLA_GRE_IFLAGS, p->i_flags);
1419 NLA_PUT_BE32(skb, IFLA_GRE_OFLAGS, p->o_flags);
1420 NLA_PUT(skb, IFLA_GRE_LOCAL, 4, &p->iph.saddr);
1421 NLA_PUT(skb, IFLA_GRE_REMOTE, 4, &p->iph.daddr);
1422 NLA_PUT_U8(skb, IFLA_GRE_TTL, p->iph.ttl);
1423 NLA_PUT_U8(skb, IFLA_GRE_TOS, p->iph.tos);
1424 NLA_PUT_U8(skb, IFLA_GRE_PMTUDISC, !!(p->iph.frag_off & htons(IP_DF)));
1425
1426 return 0;
1427
1428nla_put_failure:
1429 return -EMSGSIZE;
1430}
1431
1432static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1433 [IFLA_GRE_LINK] = { .type = NLA_U32 },
1434 [IFLA_GRE_IFLAGS] = { .type = NLA_U16 },
1435 [IFLA_GRE_OFLAGS] = { .type = NLA_U16 },
1436 [IFLA_GRE_IKEY] = { .type = NLA_U32 },
1437 [IFLA_GRE_OKEY] = { .type = NLA_U32 },
1438 [IFLA_GRE_LOCAL] = { .len = 4 },
1439 [IFLA_GRE_REMOTE] = { .len = 4 },
1440 [IFLA_GRE_TTL] = { .type = NLA_U8 },
1441 [IFLA_GRE_TOS] = { .type = NLA_U8 },
1442 [IFLA_GRE_PMTUDISC] = { .type = NLA_U8 },
1443};
1444
1445static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1446 .kind = "gre",
1447 .maxtype = IFLA_GRE_MAX,
1448 .policy = ipgre_policy,
1449 .priv_size = sizeof(struct ip_tunnel),
1450 .setup = ipgre_tunnel_setup,
1451 .validate = ipgre_tunnel_validate,
1452 .newlink = ipgre_newlink,
1453 .changelink = ipgre_changelink,
1454 .get_size = ipgre_get_size,
1455 .fill_info = ipgre_fill_info,
1456};
1457
Linus Torvalds1da177e2005-04-16 15:20:36 -07001458/*
1459 * And now the modules code and kernel interface.
1460 */
1461
1462static int __init ipgre_init(void)
1463{
1464 int err;
1465
1466 printk(KERN_INFO "GRE over IPv4 tunneling driver\n");
1467
1468 if (inet_add_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) {
1469 printk(KERN_INFO "ipgre init: can't add protocol\n");
1470 return -EAGAIN;
1471 }
1472
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001473 err = register_pernet_gen_device(&ipgre_net_id, &ipgre_net_ops);
1474 if (err < 0)
Herbert Xuc19e6542008-10-09 11:59:55 -07001475 goto gen_device_failed;
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001476
Herbert Xuc19e6542008-10-09 11:59:55 -07001477 err = rtnl_link_register(&ipgre_link_ops);
1478 if (err < 0)
1479 goto rtnl_link_failed;
1480
1481out:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001482 return err;
Herbert Xuc19e6542008-10-09 11:59:55 -07001483
1484rtnl_link_failed:
1485 unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops);
1486gen_device_failed:
1487 inet_del_protocol(&ipgre_protocol, IPPROTO_GRE);
1488 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001489}
1490
Alexey Kuznetsovdb445752005-07-30 17:46:44 -07001491static void __exit ipgre_fini(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001492{
Herbert Xuc19e6542008-10-09 11:59:55 -07001493 rtnl_link_unregister(&ipgre_link_ops);
1494 unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001495 if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0)
1496 printk(KERN_INFO "ipgre close: can't remove protocol\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001497}
1498
1499module_init(ipgre_init);
1500module_exit(ipgre_fini);
1501MODULE_LICENSE("GPL");
Herbert Xuc19e6542008-10-09 11:59:55 -07001502MODULE_ALIAS("rtnl-link-gre");