blob: e77e3b8558340d9f60f2f42a6a9eb7bc8e8720bb [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002 * Linux NET3: IP/IP protocol decoder.
Linus Torvalds1da177e2005-04-16 15:20:36 -07003 *
4 * Version: $Id: ipip.c,v 1.50 2001/10/02 02:22:36 davem Exp $
5 *
6 * Authors:
7 * Sam Lantinga (slouken@cs.ucdavis.edu) 02/01/95
8 *
9 * Fixes:
10 * Alan Cox : Merged and made usable non modular (its so tiny its silly as
11 * a module taking up 2 pages).
12 * Alan Cox : Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
13 * to keep ip_forward happy.
14 * Alan Cox : More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
15 * Kai Schulte : Fixed #defines for IP_FIREWALL->FIREWALL
16 * David Woodhouse : Perform some basic ICMP handling.
17 * IPIP Routing without decapsulation.
18 * Carlos Picoto : GRE over IP support
19 * Alexey Kuznetsov: Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
20 * I do not want to merge them together.
21 *
22 * This program is free software; you can redistribute it and/or
23 * modify it under the terms of the GNU General Public License
24 * as published by the Free Software Foundation; either version
25 * 2 of the License, or (at your option) any later version.
26 *
27 */
28
29/* tunnel.c: an IP tunnel driver
30
31 The purpose of this driver is to provide an IP tunnel through
32 which you can tunnel network traffic transparently across subnets.
33
34 This was written by looking at Nick Holloway's dummy driver
35 Thanks for the great code!
36
37 -Sam Lantinga (slouken@cs.ucdavis.edu) 02/01/95
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090038
Linus Torvalds1da177e2005-04-16 15:20:36 -070039 Minor tweaks:
40 Cleaned up the code a little and added some pre-1.3.0 tweaks.
41 dev->hard_header/hard_header_len changed to use no headers.
42 Comments/bracketing tweaked.
43 Made the tunnels use dev->name not tunnel: when error reporting.
44 Added tx_dropped stat
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090045
Linus Torvalds1da177e2005-04-16 15:20:36 -070046 -Alan Cox (Alan.Cox@linux.org) 21 March 95
47
48 Reworked:
49 Changed to tunnel to destination gateway in addition to the
50 tunnel's pointopoint address
51 Almost completely rewritten
52 Note: There is currently no firewall or ICMP handling done.
53
54 -Sam Lantinga (slouken@cs.ucdavis.edu) 02/13/96
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090055
Linus Torvalds1da177e2005-04-16 15:20:36 -070056*/
57
58/* Things I wish I had known when writing the tunnel driver:
59
60 When the tunnel_xmit() function is called, the skb contains the
61 packet to be sent (plus a great deal of extra info), and dev
62 contains the tunnel device that _we_ are.
63
64 When we are passed a packet, we are expected to fill in the
65 source address with our source IP address.
66
67 What is the proper way to allocate, copy and free a buffer?
68 After you allocate it, it is a "0 length" chunk of memory
69 starting at zero. If you want to add headers to the buffer
70 later, you'll have to call "skb_reserve(skb, amount)" with
71 the amount of memory you want reserved. Then, you call
72 "skb_put(skb, amount)" with the amount of space you want in
73 the buffer. skb_put() returns a pointer to the top (#0) of
74 that buffer. skb->len is set to the amount of space you have
75 "allocated" with skb_put(). You can then write up to skb->len
76 bytes to that buffer. If you need more, you can call skb_put()
77 again with the additional amount of space you need. You can
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090078 find out how much more space you can allocate by calling
Linus Torvalds1da177e2005-04-16 15:20:36 -070079 "skb_tailroom(skb)".
80 Now, to add header space, call "skb_push(skb, header_len)".
81 This creates space at the beginning of the buffer and returns
82 a pointer to this new space. If later you need to strip a
83 header from a buffer, call "skb_pull(skb, header_len)".
84 skb_headroom() will return how much space is left at the top
85 of the buffer (before the main data). Remember, this headroom
86 space must be reserved before the skb_put() function is called.
87 */
88
89/*
90 This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
91
92 For comments look at net/ipv4/ip_gre.c --ANK
93 */
94
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090095
Randy Dunlap4fc268d2006-01-11 12:17:47 -080096#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070097#include <linux/module.h>
98#include <linux/types.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070099#include <linux/kernel.h>
100#include <asm/uaccess.h>
101#include <linux/skbuff.h>
102#include <linux/netdevice.h>
103#include <linux/in.h>
104#include <linux/tcp.h>
105#include <linux/udp.h>
106#include <linux/if_arp.h>
107#include <linux/mroute.h>
108#include <linux/init.h>
109#include <linux/netfilter_ipv4.h>
Kris Katterjohn46f25df2006-01-05 16:35:42 -0800110#include <linux/if_ether.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700111
112#include <net/sock.h>
113#include <net/ip.h>
114#include <net/icmp.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700115#include <net/ipip.h>
116#include <net/inet_ecn.h>
117#include <net/xfrm.h>
118
119#define HASH_SIZE 16
Al Virod5a0a1e2006-11-08 00:23:14 -0800120#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700121
122static int ipip_fb_tunnel_init(struct net_device *dev);
123static int ipip_tunnel_init(struct net_device *dev);
124static void ipip_tunnel_setup(struct net_device *dev);
125
126static struct net_device *ipip_fb_tunnel_dev;
127
128static struct ip_tunnel *tunnels_r_l[HASH_SIZE];
129static struct ip_tunnel *tunnels_r[HASH_SIZE];
130static struct ip_tunnel *tunnels_l[HASH_SIZE];
131static struct ip_tunnel *tunnels_wc[1];
132static struct ip_tunnel **tunnels[4] = { tunnels_wc, tunnels_l, tunnels_r, tunnels_r_l };
133
134static DEFINE_RWLOCK(ipip_lock);
135
Al Virod5a0a1e2006-11-08 00:23:14 -0800136static struct ip_tunnel * ipip_tunnel_lookup(__be32 remote, __be32 local)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700137{
138 unsigned h0 = HASH(remote);
139 unsigned h1 = HASH(local);
140 struct ip_tunnel *t;
141
142 for (t = tunnels_r_l[h0^h1]; t; t = t->next) {
143 if (local == t->parms.iph.saddr &&
144 remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
145 return t;
146 }
147 for (t = tunnels_r[h0]; t; t = t->next) {
148 if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
149 return t;
150 }
151 for (t = tunnels_l[h1]; t; t = t->next) {
152 if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
153 return t;
154 }
155 if ((t = tunnels_wc[0]) != NULL && (t->dev->flags&IFF_UP))
156 return t;
157 return NULL;
158}
159
YOSHIFUJI Hideaki87d1a162007-04-24 20:44:47 +0900160static struct ip_tunnel **__ipip_bucket(struct ip_tunnel_parm *parms)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700161{
YOSHIFUJI Hideaki87d1a162007-04-24 20:44:47 +0900162 __be32 remote = parms->iph.daddr;
163 __be32 local = parms->iph.saddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700164 unsigned h = 0;
165 int prio = 0;
166
167 if (remote) {
168 prio |= 2;
169 h ^= HASH(remote);
170 }
171 if (local) {
172 prio |= 1;
173 h ^= HASH(local);
174 }
175 return &tunnels[prio][h];
176}
177
YOSHIFUJI Hideaki87d1a162007-04-24 20:44:47 +0900178static inline struct ip_tunnel **ipip_bucket(struct ip_tunnel *t)
179{
180 return __ipip_bucket(&t->parms);
181}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700182
183static void ipip_tunnel_unlink(struct ip_tunnel *t)
184{
185 struct ip_tunnel **tp;
186
187 for (tp = ipip_bucket(t); *tp; tp = &(*tp)->next) {
188 if (t == *tp) {
189 write_lock_bh(&ipip_lock);
190 *tp = t->next;
191 write_unlock_bh(&ipip_lock);
192 break;
193 }
194 }
195}
196
197static void ipip_tunnel_link(struct ip_tunnel *t)
198{
199 struct ip_tunnel **tp = ipip_bucket(t);
200
201 t->next = *tp;
202 write_lock_bh(&ipip_lock);
203 *tp = t;
204 write_unlock_bh(&ipip_lock);
205}
206
207static struct ip_tunnel * ipip_tunnel_locate(struct ip_tunnel_parm *parms, int create)
208{
Al Virod5a0a1e2006-11-08 00:23:14 -0800209 __be32 remote = parms->iph.daddr;
210 __be32 local = parms->iph.saddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700211 struct ip_tunnel *t, **tp, *nt;
212 struct net_device *dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700213 char name[IFNAMSIZ];
214
YOSHIFUJI Hideaki87d1a162007-04-24 20:44:47 +0900215 for (tp = __ipip_bucket(parms); (t = *tp) != NULL; tp = &t->next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700216 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
217 return t;
218 }
219 if (!create)
220 return NULL;
221
222 if (parms->name[0])
223 strlcpy(name, parms->name, IFNAMSIZ);
Pavel Emelyanov34cc7ba2008-02-23 20:19:20 -0800224 else
225 sprintf(name, "tunl%%d");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700226
227 dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup);
228 if (dev == NULL)
229 return NULL;
230
Patrick McHardy2941a482006-01-08 22:05:26 -0800231 nt = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700232 dev->init = ipip_tunnel_init;
233 nt->parms = *parms;
234
235 if (register_netdevice(dev) < 0) {
236 free_netdev(dev);
237 goto failed;
238 }
239
240 dev_hold(dev);
241 ipip_tunnel_link(nt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700242 return nt;
243
244failed:
245 return NULL;
246}
247
248static void ipip_tunnel_uninit(struct net_device *dev)
249{
250 if (dev == ipip_fb_tunnel_dev) {
251 write_lock_bh(&ipip_lock);
252 tunnels_wc[0] = NULL;
253 write_unlock_bh(&ipip_lock);
254 } else
Patrick McHardy2941a482006-01-08 22:05:26 -0800255 ipip_tunnel_unlink(netdev_priv(dev));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700256 dev_put(dev);
257}
258
Herbert Xud2acc342006-03-28 01:12:13 -0800259static int ipip_err(struct sk_buff *skb, u32 info)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700260{
261#ifndef I_WISH_WORLD_WERE_PERFECT
262
263/* It is not :-( All the routers (except for Linux) return only
264 8 bytes of packet payload. It means, that precise relaying of
265 ICMP in the real Internet is absolutely infeasible.
266 */
267 struct iphdr *iph = (struct iphdr*)skb->data;
Arnaldo Carvalho de Melo88c76642007-03-13 14:43:18 -0300268 const int type = icmp_hdr(skb)->type;
269 const int code = icmp_hdr(skb)->code;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700270 struct ip_tunnel *t;
Herbert Xud2acc342006-03-28 01:12:13 -0800271 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700272
273 switch (type) {
274 default:
275 case ICMP_PARAMETERPROB:
Herbert Xud2acc342006-03-28 01:12:13 -0800276 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700277
278 case ICMP_DEST_UNREACH:
279 switch (code) {
280 case ICMP_SR_FAILED:
281 case ICMP_PORT_UNREACH:
282 /* Impossible event. */
Herbert Xud2acc342006-03-28 01:12:13 -0800283 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700284 case ICMP_FRAG_NEEDED:
285 /* Soft state for pmtu is maintained by IP core. */
Herbert Xud2acc342006-03-28 01:12:13 -0800286 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700287 default:
288 /* All others are translated to HOST_UNREACH.
289 rfc2003 contains "deep thoughts" about NET_UNREACH,
290 I believe they are just ether pollution. --ANK
291 */
292 break;
293 }
294 break;
295 case ICMP_TIME_EXCEEDED:
296 if (code != ICMP_EXC_TTL)
Herbert Xud2acc342006-03-28 01:12:13 -0800297 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700298 break;
299 }
300
Herbert Xud2acc342006-03-28 01:12:13 -0800301 err = -ENOENT;
302
Linus Torvalds1da177e2005-04-16 15:20:36 -0700303 read_lock(&ipip_lock);
304 t = ipip_tunnel_lookup(iph->daddr, iph->saddr);
305 if (t == NULL || t->parms.iph.daddr == 0)
306 goto out;
Herbert Xud2acc342006-03-28 01:12:13 -0800307
308 err = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700309 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
310 goto out;
311
312 if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
313 t->err_count++;
314 else
315 t->err_count = 1;
316 t->err_time = jiffies;
317out:
318 read_unlock(&ipip_lock);
Herbert Xud2acc342006-03-28 01:12:13 -0800319 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700320#else
321 struct iphdr *iph = (struct iphdr*)dp;
322 int hlen = iph->ihl<<2;
323 struct iphdr *eiph;
Arnaldo Carvalho de Melo88c76642007-03-13 14:43:18 -0300324 const int type = icmp_hdr(skb)->type;
325 const int code = icmp_hdr(skb)->code;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700326 int rel_type = 0;
327 int rel_code = 0;
Al Viroc55e2f42006-09-19 13:23:19 -0700328 __be32 rel_info = 0;
329 __u32 n = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700330 struct sk_buff *skb2;
331 struct flowi fl;
332 struct rtable *rt;
333
334 if (len < hlen + sizeof(struct iphdr))
Herbert Xud2acc342006-03-28 01:12:13 -0800335 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700336 eiph = (struct iphdr*)(dp + hlen);
337
338 switch (type) {
339 default:
Herbert Xud2acc342006-03-28 01:12:13 -0800340 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700341 case ICMP_PARAMETERPROB:
Arnaldo Carvalho de Melo88c76642007-03-13 14:43:18 -0300342 n = ntohl(icmp_hdr(skb)->un.gateway) >> 24;
Al Viroc55e2f42006-09-19 13:23:19 -0700343 if (n < hlen)
Herbert Xud2acc342006-03-28 01:12:13 -0800344 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700345
346 /* So... This guy found something strange INSIDE encapsulated
347 packet. Well, he is fool, but what can we do ?
348 */
349 rel_type = ICMP_PARAMETERPROB;
Al Viroc55e2f42006-09-19 13:23:19 -0700350 rel_info = htonl((n - hlen) << 24);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700351 break;
352
353 case ICMP_DEST_UNREACH:
354 switch (code) {
355 case ICMP_SR_FAILED:
356 case ICMP_PORT_UNREACH:
357 /* Impossible event. */
Herbert Xud2acc342006-03-28 01:12:13 -0800358 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700359 case ICMP_FRAG_NEEDED:
360 /* And it is the only really necessary thing :-) */
Arnaldo Carvalho de Melo88c76642007-03-13 14:43:18 -0300361 n = ntohs(icmp_hdr(skb)->un.frag.mtu);
Al Viroc55e2f42006-09-19 13:23:19 -0700362 if (n < hlen+68)
Herbert Xud2acc342006-03-28 01:12:13 -0800363 return 0;
Al Viroc55e2f42006-09-19 13:23:19 -0700364 n -= hlen;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700365 /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
Al Viroc55e2f42006-09-19 13:23:19 -0700366 if (n > ntohs(eiph->tot_len))
Herbert Xud2acc342006-03-28 01:12:13 -0800367 return 0;
Al Viroc55e2f42006-09-19 13:23:19 -0700368 rel_info = htonl(n);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700369 break;
370 default:
371 /* All others are translated to HOST_UNREACH.
372 rfc2003 contains "deep thoughts" about NET_UNREACH,
373 I believe, it is just ether pollution. --ANK
374 */
375 rel_type = ICMP_DEST_UNREACH;
376 rel_code = ICMP_HOST_UNREACH;
377 break;
378 }
379 break;
380 case ICMP_TIME_EXCEEDED:
381 if (code != ICMP_EXC_TTL)
Herbert Xud2acc342006-03-28 01:12:13 -0800382 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700383 break;
384 }
385
386 /* Prepare fake skb to feed it to icmp_send */
387 skb2 = skb_clone(skb, GFP_ATOMIC);
388 if (skb2 == NULL)
Herbert Xud2acc342006-03-28 01:12:13 -0800389 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700390 dst_release(skb2->dst);
391 skb2->dst = NULL;
392 skb_pull(skb2, skb->data - (u8*)eiph);
Arnaldo Carvalho de Meloc1d2bbe2007-04-10 20:45:18 -0700393 skb_reset_network_header(skb2);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700394
395 /* Try to guess incoming interface */
396 memset(&fl, 0, sizeof(fl));
397 fl.fl4_daddr = eiph->saddr;
398 fl.fl4_tos = RT_TOS(eiph->tos);
399 fl.proto = IPPROTO_IPIP;
Denis V. Lunevf2063512008-01-22 22:07:34 -0800400 if (ip_route_output_key(&init_net, &rt, &key)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700401 kfree_skb(skb2);
Herbert Xud2acc342006-03-28 01:12:13 -0800402 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700403 }
404 skb2->dev = rt->u.dst.dev;
405
406 /* route "incoming" packet */
407 if (rt->rt_flags&RTCF_LOCAL) {
408 ip_rt_put(rt);
409 rt = NULL;
410 fl.fl4_daddr = eiph->daddr;
411 fl.fl4_src = eiph->saddr;
412 fl.fl4_tos = eiph->tos;
Denis V. Lunevf2063512008-01-22 22:07:34 -0800413 if (ip_route_output_key(&init_net, &rt, &fl) ||
Linus Torvalds1da177e2005-04-16 15:20:36 -0700414 rt->u.dst.dev->type != ARPHRD_TUNNEL) {
415 ip_rt_put(rt);
416 kfree_skb(skb2);
Herbert Xud2acc342006-03-28 01:12:13 -0800417 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700418 }
419 } else {
420 ip_rt_put(rt);
421 if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) ||
422 skb2->dst->dev->type != ARPHRD_TUNNEL) {
423 kfree_skb(skb2);
Herbert Xud2acc342006-03-28 01:12:13 -0800424 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700425 }
426 }
427
428 /* change mtu on this route */
429 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
Al Viroc55e2f42006-09-19 13:23:19 -0700430 if (n > dst_mtu(skb2->dst)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700431 kfree_skb(skb2);
Herbert Xud2acc342006-03-28 01:12:13 -0800432 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700433 }
Al Viroc55e2f42006-09-19 13:23:19 -0700434 skb2->dst->ops->update_pmtu(skb2->dst, n);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700435 } else if (type == ICMP_TIME_EXCEEDED) {
Patrick McHardy2941a482006-01-08 22:05:26 -0800436 struct ip_tunnel *t = netdev_priv(skb2->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700437 if (t->parms.iph.ttl) {
438 rel_type = ICMP_DEST_UNREACH;
439 rel_code = ICMP_HOST_UNREACH;
440 }
441 }
442
443 icmp_send(skb2, rel_type, rel_code, rel_info);
444 kfree_skb(skb2);
Herbert Xud2acc342006-03-28 01:12:13 -0800445 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700446#endif
447}
448
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700449static inline void ipip_ecn_decapsulate(const struct iphdr *outer_iph,
450 struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700451{
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700452 struct iphdr *inner_iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700453
454 if (INET_ECN_is_ce(outer_iph->tos))
455 IP_ECN_set_ce(inner_iph);
456}
457
458static int ipip_rcv(struct sk_buff *skb)
459{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700460 struct ip_tunnel *tunnel;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700461 const struct iphdr *iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700462
463 read_lock(&ipip_lock);
464 if ((tunnel = ipip_tunnel_lookup(iph->saddr, iph->daddr)) != NULL) {
465 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
466 read_unlock(&ipip_lock);
467 kfree_skb(skb);
468 return 0;
469 }
470
471 secpath_reset(skb);
472
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -0700473 skb->mac_header = skb->network_header;
Arnaldo Carvalho de Meloc1d2bbe2007-04-10 20:45:18 -0700474 skb_reset_network_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700475 skb->protocol = htons(ETH_P_IP);
476 skb->pkt_type = PACKET_HOST;
477
478 tunnel->stat.rx_packets++;
479 tunnel->stat.rx_bytes += skb->len;
480 skb->dev = tunnel->dev;
481 dst_release(skb->dst);
482 skb->dst = NULL;
483 nf_reset(skb);
484 ipip_ecn_decapsulate(iph, skb);
485 netif_rx(skb);
486 read_unlock(&ipip_lock);
487 return 0;
488 }
489 read_unlock(&ipip_lock);
490
Linus Torvalds1da177e2005-04-16 15:20:36 -0700491 return -1;
492}
493
494/*
495 * This function assumes it is being called from dev_queue_xmit()
496 * and that skb is filled properly by that function.
497 */
498
499static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
500{
Patrick McHardy2941a482006-01-08 22:05:26 -0800501 struct ip_tunnel *tunnel = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700502 struct net_device_stats *stats = &tunnel->stat;
503 struct iphdr *tiph = &tunnel->parms.iph;
504 u8 tos = tunnel->parms.iph.tos;
Al Virod5a0a1e2006-11-08 00:23:14 -0800505 __be16 df = tiph->frag_off;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700506 struct rtable *rt; /* Route to the other host */
507 struct net_device *tdev; /* Device to other host */
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700508 struct iphdr *old_iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700509 struct iphdr *iph; /* Our new IP header */
Chuck Leverc2636b42007-10-23 21:07:32 -0700510 unsigned int max_headroom; /* The extra header space needed */
Al Virod5a0a1e2006-11-08 00:23:14 -0800511 __be32 dst = tiph->daddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700512 int mtu;
513
514 if (tunnel->recursion++) {
515 tunnel->stat.collisions++;
516 goto tx_error;
517 }
518
519 if (skb->protocol != htons(ETH_P_IP))
520 goto tx_error;
521
522 if (tos&1)
523 tos = old_iph->tos;
524
525 if (!dst) {
526 /* NBMA tunnel */
527 if ((rt = (struct rtable*)skb->dst) == NULL) {
528 tunnel->stat.tx_fifo_errors++;
529 goto tx_error;
530 }
531 if ((dst = rt->rt_gateway) == 0)
532 goto tx_error_icmp;
533 }
534
535 {
536 struct flowi fl = { .oif = tunnel->parms.link,
537 .nl_u = { .ip4_u =
538 { .daddr = dst,
539 .saddr = tiph->saddr,
540 .tos = RT_TOS(tos) } },
541 .proto = IPPROTO_IPIP };
Denis V. Lunevf2063512008-01-22 22:07:34 -0800542 if (ip_route_output_key(&init_net, &rt, &fl)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700543 tunnel->stat.tx_carrier_errors++;
544 goto tx_error_icmp;
545 }
546 }
547 tdev = rt->u.dst.dev;
548
549 if (tdev == dev) {
550 ip_rt_put(rt);
551 tunnel->stat.collisions++;
552 goto tx_error;
553 }
554
555 if (tiph->frag_off)
556 mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr);
557 else
558 mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
559
560 if (mtu < 68) {
561 tunnel->stat.collisions++;
562 ip_rt_put(rt);
563 goto tx_error;
564 }
565 if (skb->dst)
566 skb->dst->ops->update_pmtu(skb->dst, mtu);
567
568 df |= (old_iph->frag_off&htons(IP_DF));
569
570 if ((old_iph->frag_off&htons(IP_DF)) && mtu < ntohs(old_iph->tot_len)) {
571 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
572 ip_rt_put(rt);
573 goto tx_error;
574 }
575
576 if (tunnel->err_count > 0) {
577 if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
578 tunnel->err_count--;
579 dst_link_failure(skb);
580 } else
581 tunnel->err_count = 0;
582 }
583
584 /*
585 * Okay, now see if we can stuff it in the buffer as-is.
586 */
587 max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr));
588
Patrick McHardycfbba492007-07-09 15:33:40 -0700589 if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
590 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700591 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
592 if (!new_skb) {
593 ip_rt_put(rt);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900594 stats->tx_dropped++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700595 dev_kfree_skb(skb);
596 tunnel->recursion--;
597 return 0;
598 }
599 if (skb->sk)
600 skb_set_owner_w(new_skb, skb->sk);
601 dev_kfree_skb(skb);
602 skb = new_skb;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700603 old_iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700604 }
605
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -0700606 skb->transport_header = skb->network_header;
Arnaldo Carvalho de Meloe2d1bca2007-04-10 20:46:21 -0700607 skb_push(skb, sizeof(struct iphdr));
608 skb_reset_network_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700609 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
Patrick McHardy48d5cad2006-02-15 15:10:22 -0800610 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
611 IPSKB_REROUTED);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700612 dst_release(skb->dst);
613 skb->dst = &rt->u.dst;
614
615 /*
616 * Push down and install the IPIP header.
617 */
618
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700619 iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700620 iph->version = 4;
621 iph->ihl = sizeof(struct iphdr)>>2;
622 iph->frag_off = df;
623 iph->protocol = IPPROTO_IPIP;
624 iph->tos = INET_ECN_encapsulate(tos, old_iph->tos);
625 iph->daddr = rt->rt_dst;
626 iph->saddr = rt->rt_src;
627
628 if ((iph->ttl = tiph->ttl) == 0)
629 iph->ttl = old_iph->ttl;
630
631 nf_reset(skb);
632
633 IPTUNNEL_XMIT();
634 tunnel->recursion--;
635 return 0;
636
637tx_error_icmp:
638 dst_link_failure(skb);
639tx_error:
640 stats->tx_errors++;
641 dev_kfree_skb(skb);
642 tunnel->recursion--;
643 return 0;
644}
645
Michal Schmidt55339952007-12-12 11:01:43 -0800646static void ipip_tunnel_bind_dev(struct net_device *dev)
647{
648 struct net_device *tdev = NULL;
649 struct ip_tunnel *tunnel;
650 struct iphdr *iph;
651
652 tunnel = netdev_priv(dev);
653 iph = &tunnel->parms.iph;
654
655 if (iph->daddr) {
656 struct flowi fl = { .oif = tunnel->parms.link,
657 .nl_u = { .ip4_u =
658 { .daddr = iph->daddr,
659 .saddr = iph->saddr,
660 .tos = RT_TOS(iph->tos) } },
661 .proto = IPPROTO_IPIP };
662 struct rtable *rt;
Denis V. Lunevf2063512008-01-22 22:07:34 -0800663 if (!ip_route_output_key(&init_net, &rt, &fl)) {
Michal Schmidt55339952007-12-12 11:01:43 -0800664 tdev = rt->u.dst.dev;
665 ip_rt_put(rt);
666 }
667 dev->flags |= IFF_POINTOPOINT;
668 }
669
670 if (!tdev && tunnel->parms.link)
671 tdev = __dev_get_by_index(&init_net, tunnel->parms.link);
672
673 if (tdev) {
674 dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
675 dev->mtu = tdev->mtu - sizeof(struct iphdr);
676 }
677 dev->iflink = tunnel->parms.link;
678}
679
Linus Torvalds1da177e2005-04-16 15:20:36 -0700680static int
681ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
682{
683 int err = 0;
684 struct ip_tunnel_parm p;
685 struct ip_tunnel *t;
686
687 switch (cmd) {
688 case SIOCGETTUNNEL:
689 t = NULL;
690 if (dev == ipip_fb_tunnel_dev) {
691 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
692 err = -EFAULT;
693 break;
694 }
695 t = ipip_tunnel_locate(&p, 0);
696 }
697 if (t == NULL)
Patrick McHardy2941a482006-01-08 22:05:26 -0800698 t = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700699 memcpy(&p, &t->parms, sizeof(p));
700 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
701 err = -EFAULT;
702 break;
703
704 case SIOCADDTUNNEL:
705 case SIOCCHGTUNNEL:
706 err = -EPERM;
707 if (!capable(CAP_NET_ADMIN))
708 goto done;
709
710 err = -EFAULT;
711 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
712 goto done;
713
714 err = -EINVAL;
715 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
716 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
717 goto done;
718 if (p.iph.ttl)
719 p.iph.frag_off |= htons(IP_DF);
720
721 t = ipip_tunnel_locate(&p, cmd == SIOCADDTUNNEL);
722
723 if (dev != ipip_fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
724 if (t != NULL) {
725 if (t->dev != dev) {
726 err = -EEXIST;
727 break;
728 }
729 } else {
730 if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
731 (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
732 err = -EINVAL;
733 break;
734 }
Patrick McHardy2941a482006-01-08 22:05:26 -0800735 t = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700736 ipip_tunnel_unlink(t);
737 t->parms.iph.saddr = p.iph.saddr;
738 t->parms.iph.daddr = p.iph.daddr;
739 memcpy(dev->dev_addr, &p.iph.saddr, 4);
740 memcpy(dev->broadcast, &p.iph.daddr, 4);
741 ipip_tunnel_link(t);
742 netdev_state_change(dev);
743 }
744 }
745
746 if (t) {
747 err = 0;
748 if (cmd == SIOCCHGTUNNEL) {
749 t->parms.iph.ttl = p.iph.ttl;
750 t->parms.iph.tos = p.iph.tos;
751 t->parms.iph.frag_off = p.iph.frag_off;
Michal Schmidt55339952007-12-12 11:01:43 -0800752 if (t->parms.link != p.link) {
753 t->parms.link = p.link;
754 ipip_tunnel_bind_dev(dev);
755 netdev_state_change(dev);
756 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700757 }
758 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
759 err = -EFAULT;
760 } else
761 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
762 break;
763
764 case SIOCDELTUNNEL:
765 err = -EPERM;
766 if (!capable(CAP_NET_ADMIN))
767 goto done;
768
769 if (dev == ipip_fb_tunnel_dev) {
770 err = -EFAULT;
771 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
772 goto done;
773 err = -ENOENT;
774 if ((t = ipip_tunnel_locate(&p, 0)) == NULL)
775 goto done;
776 err = -EPERM;
777 if (t->dev == ipip_fb_tunnel_dev)
778 goto done;
779 dev = t->dev;
780 }
Stephen Hemminger22f8cde2007-02-07 00:09:58 -0800781 unregister_netdevice(dev);
782 err = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700783 break;
784
785 default:
786 err = -EINVAL;
787 }
788
789done:
790 return err;
791}
792
793static struct net_device_stats *ipip_tunnel_get_stats(struct net_device *dev)
794{
Patrick McHardy2941a482006-01-08 22:05:26 -0800795 return &(((struct ip_tunnel*)netdev_priv(dev))->stat);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700796}
797
798static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
799{
800 if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr))
801 return -EINVAL;
802 dev->mtu = new_mtu;
803 return 0;
804}
805
806static void ipip_tunnel_setup(struct net_device *dev)
807{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700808 dev->uninit = ipip_tunnel_uninit;
809 dev->hard_start_xmit = ipip_tunnel_xmit;
810 dev->get_stats = ipip_tunnel_get_stats;
811 dev->do_ioctl = ipip_tunnel_ioctl;
812 dev->change_mtu = ipip_tunnel_change_mtu;
813 dev->destructor = free_netdev;
814
815 dev->type = ARPHRD_TUNNEL;
816 dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr);
Kris Katterjohn46f25df2006-01-05 16:35:42 -0800817 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700818 dev->flags = IFF_NOARP;
819 dev->iflink = 0;
820 dev->addr_len = 4;
821}
822
823static int ipip_tunnel_init(struct net_device *dev)
824{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700825 struct ip_tunnel *tunnel;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700826
Patrick McHardy2941a482006-01-08 22:05:26 -0800827 tunnel = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700828
829 tunnel->dev = dev;
830 strcpy(tunnel->parms.name, dev->name);
831
832 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
833 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
834
Michal Schmidt55339952007-12-12 11:01:43 -0800835 ipip_tunnel_bind_dev(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700836
837 return 0;
838}
839
840static int __init ipip_fb_tunnel_init(struct net_device *dev)
841{
Patrick McHardy2941a482006-01-08 22:05:26 -0800842 struct ip_tunnel *tunnel = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700843 struct iphdr *iph = &tunnel->parms.iph;
844
845 tunnel->dev = dev;
846 strcpy(tunnel->parms.name, dev->name);
847
848 iph->version = 4;
849 iph->protocol = IPPROTO_IPIP;
850 iph->ihl = 5;
851
852 dev_hold(dev);
853 tunnels_wc[0] = tunnel;
854 return 0;
855}
856
857static struct xfrm_tunnel ipip_handler = {
858 .handler = ipip_rcv,
859 .err_handler = ipip_err,
Herbert Xud2acc342006-03-28 01:12:13 -0800860 .priority = 1,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700861};
862
863static char banner[] __initdata =
864 KERN_INFO "IPv4 over IPv4 tunneling driver\n";
865
866static int __init ipip_init(void)
867{
868 int err;
869
870 printk(banner);
871
Kazunori MIYAZAWAc0d56402007-02-13 12:54:47 -0800872 if (xfrm4_tunnel_register(&ipip_handler, AF_INET)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700873 printk(KERN_INFO "ipip init: can't register tunnel\n");
874 return -EAGAIN;
875 }
876
877 ipip_fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
878 "tunl0",
879 ipip_tunnel_setup);
880 if (!ipip_fb_tunnel_dev) {
881 err = -ENOMEM;
882 goto err1;
883 }
884
885 ipip_fb_tunnel_dev->init = ipip_fb_tunnel_init;
886
887 if ((err = register_netdev(ipip_fb_tunnel_dev)))
888 goto err2;
889 out:
890 return err;
891 err2:
892 free_netdev(ipip_fb_tunnel_dev);
893 err1:
Kazunori MIYAZAWAc0d56402007-02-13 12:54:47 -0800894 xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700895 goto out;
896}
897
Alexey Kuznetsovdb445752005-07-30 17:46:44 -0700898static void __exit ipip_destroy_tunnels(void)
899{
900 int prio;
901
902 for (prio = 1; prio < 4; prio++) {
903 int h;
904 for (h = 0; h < HASH_SIZE; h++) {
905 struct ip_tunnel *t;
906 while ((t = tunnels[prio][h]) != NULL)
907 unregister_netdevice(t->dev);
908 }
909 }
910}
911
Linus Torvalds1da177e2005-04-16 15:20:36 -0700912static void __exit ipip_fini(void)
913{
Kazunori MIYAZAWAc0d56402007-02-13 12:54:47 -0800914 if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700915 printk(KERN_INFO "ipip close: can't deregister tunnel\n");
916
Alexey Kuznetsovdb445752005-07-30 17:46:44 -0700917 rtnl_lock();
918 ipip_destroy_tunnels();
919 unregister_netdevice(ipip_fb_tunnel_dev);
920 rtnl_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700921}
922
923module_init(ipip_init);
924module_exit(ipip_fini);
925MODULE_LICENSE("GPL");