blob: dbaed69de06a8f1a52b4034c8531118bd9824d74 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002 * Linux NET3: IP/IP protocol decoder.
Linus Torvalds1da177e2005-04-16 15:20:36 -07003 *
4 * Version: $Id: ipip.c,v 1.50 2001/10/02 02:22:36 davem Exp $
5 *
6 * Authors:
7 * Sam Lantinga (slouken@cs.ucdavis.edu) 02/01/95
8 *
9 * Fixes:
10 * Alan Cox : Merged and made usable non modular (its so tiny its silly as
11 * a module taking up 2 pages).
12 * Alan Cox : Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
13 * to keep ip_forward happy.
14 * Alan Cox : More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
15 * Kai Schulte : Fixed #defines for IP_FIREWALL->FIREWALL
16 * David Woodhouse : Perform some basic ICMP handling.
17 * IPIP Routing without decapsulation.
18 * Carlos Picoto : GRE over IP support
19 * Alexey Kuznetsov: Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
20 * I do not want to merge them together.
21 *
22 * This program is free software; you can redistribute it and/or
23 * modify it under the terms of the GNU General Public License
24 * as published by the Free Software Foundation; either version
25 * 2 of the License, or (at your option) any later version.
26 *
27 */
28
29/* tunnel.c: an IP tunnel driver
30
31 The purpose of this driver is to provide an IP tunnel through
32 which you can tunnel network traffic transparently across subnets.
33
34 This was written by looking at Nick Holloway's dummy driver
35 Thanks for the great code!
36
37 -Sam Lantinga (slouken@cs.ucdavis.edu) 02/01/95
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090038
Linus Torvalds1da177e2005-04-16 15:20:36 -070039 Minor tweaks:
40 Cleaned up the code a little and added some pre-1.3.0 tweaks.
41 dev->hard_header/hard_header_len changed to use no headers.
42 Comments/bracketing tweaked.
43 Made the tunnels use dev->name not tunnel: when error reporting.
44 Added tx_dropped stat
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090045
Linus Torvalds1da177e2005-04-16 15:20:36 -070046 -Alan Cox (Alan.Cox@linux.org) 21 March 95
47
48 Reworked:
49 Changed to tunnel to destination gateway in addition to the
50 tunnel's pointopoint address
51 Almost completely rewritten
52 Note: There is currently no firewall or ICMP handling done.
53
54 -Sam Lantinga (slouken@cs.ucdavis.edu) 02/13/96
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090055
Linus Torvalds1da177e2005-04-16 15:20:36 -070056*/
57
58/* Things I wish I had known when writing the tunnel driver:
59
60 When the tunnel_xmit() function is called, the skb contains the
61 packet to be sent (plus a great deal of extra info), and dev
62 contains the tunnel device that _we_ are.
63
64 When we are passed a packet, we are expected to fill in the
65 source address with our source IP address.
66
67 What is the proper way to allocate, copy and free a buffer?
68 After you allocate it, it is a "0 length" chunk of memory
69 starting at zero. If you want to add headers to the buffer
70 later, you'll have to call "skb_reserve(skb, amount)" with
71 the amount of memory you want reserved. Then, you call
72 "skb_put(skb, amount)" with the amount of space you want in
73 the buffer. skb_put() returns a pointer to the top (#0) of
74 that buffer. skb->len is set to the amount of space you have
75 "allocated" with skb_put(). You can then write up to skb->len
76 bytes to that buffer. If you need more, you can call skb_put()
77 again with the additional amount of space you need. You can
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090078 find out how much more space you can allocate by calling
Linus Torvalds1da177e2005-04-16 15:20:36 -070079 "skb_tailroom(skb)".
80 Now, to add header space, call "skb_push(skb, header_len)".
81 This creates space at the beginning of the buffer and returns
82 a pointer to this new space. If later you need to strip a
83 header from a buffer, call "skb_pull(skb, header_len)".
84 skb_headroom() will return how much space is left at the top
85 of the buffer (before the main data). Remember, this headroom
86 space must be reserved before the skb_put() function is called.
87 */
88
89/*
90 This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
91
92 For comments look at net/ipv4/ip_gre.c --ANK
93 */
94
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090095
Randy Dunlap4fc268d2006-01-11 12:17:47 -080096#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070097#include <linux/module.h>
98#include <linux/types.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070099#include <linux/kernel.h>
100#include <asm/uaccess.h>
101#include <linux/skbuff.h>
102#include <linux/netdevice.h>
103#include <linux/in.h>
104#include <linux/tcp.h>
105#include <linux/udp.h>
106#include <linux/if_arp.h>
107#include <linux/mroute.h>
108#include <linux/init.h>
109#include <linux/netfilter_ipv4.h>
Kris Katterjohn46f25df2006-01-05 16:35:42 -0800110#include <linux/if_ether.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700111
112#include <net/sock.h>
113#include <net/ip.h>
114#include <net/icmp.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700115#include <net/ipip.h>
116#include <net/inet_ecn.h>
117#include <net/xfrm.h>
118
119#define HASH_SIZE 16
Al Virod5a0a1e2006-11-08 00:23:14 -0800120#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700121
122static int ipip_fb_tunnel_init(struct net_device *dev);
123static int ipip_tunnel_init(struct net_device *dev);
124static void ipip_tunnel_setup(struct net_device *dev);
125
126static struct net_device *ipip_fb_tunnel_dev;
127
128static struct ip_tunnel *tunnels_r_l[HASH_SIZE];
129static struct ip_tunnel *tunnels_r[HASH_SIZE];
130static struct ip_tunnel *tunnels_l[HASH_SIZE];
131static struct ip_tunnel *tunnels_wc[1];
132static struct ip_tunnel **tunnels[4] = { tunnels_wc, tunnels_l, tunnels_r, tunnels_r_l };
133
134static DEFINE_RWLOCK(ipip_lock);
135
Al Virod5a0a1e2006-11-08 00:23:14 -0800136static struct ip_tunnel * ipip_tunnel_lookup(__be32 remote, __be32 local)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700137{
138 unsigned h0 = HASH(remote);
139 unsigned h1 = HASH(local);
140 struct ip_tunnel *t;
141
142 for (t = tunnels_r_l[h0^h1]; t; t = t->next) {
143 if (local == t->parms.iph.saddr &&
144 remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
145 return t;
146 }
147 for (t = tunnels_r[h0]; t; t = t->next) {
148 if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
149 return t;
150 }
151 for (t = tunnels_l[h1]; t; t = t->next) {
152 if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
153 return t;
154 }
155 if ((t = tunnels_wc[0]) != NULL && (t->dev->flags&IFF_UP))
156 return t;
157 return NULL;
158}
159
YOSHIFUJI Hideaki87d1a162007-04-24 20:44:47 +0900160static struct ip_tunnel **__ipip_bucket(struct ip_tunnel_parm *parms)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700161{
YOSHIFUJI Hideaki87d1a162007-04-24 20:44:47 +0900162 __be32 remote = parms->iph.daddr;
163 __be32 local = parms->iph.saddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700164 unsigned h = 0;
165 int prio = 0;
166
167 if (remote) {
168 prio |= 2;
169 h ^= HASH(remote);
170 }
171 if (local) {
172 prio |= 1;
173 h ^= HASH(local);
174 }
175 return &tunnels[prio][h];
176}
177
YOSHIFUJI Hideaki87d1a162007-04-24 20:44:47 +0900178static inline struct ip_tunnel **ipip_bucket(struct ip_tunnel *t)
179{
180 return __ipip_bucket(&t->parms);
181}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700182
183static void ipip_tunnel_unlink(struct ip_tunnel *t)
184{
185 struct ip_tunnel **tp;
186
187 for (tp = ipip_bucket(t); *tp; tp = &(*tp)->next) {
188 if (t == *tp) {
189 write_lock_bh(&ipip_lock);
190 *tp = t->next;
191 write_unlock_bh(&ipip_lock);
192 break;
193 }
194 }
195}
196
197static void ipip_tunnel_link(struct ip_tunnel *t)
198{
199 struct ip_tunnel **tp = ipip_bucket(t);
200
201 t->next = *tp;
202 write_lock_bh(&ipip_lock);
203 *tp = t;
204 write_unlock_bh(&ipip_lock);
205}
206
207static struct ip_tunnel * ipip_tunnel_locate(struct ip_tunnel_parm *parms, int create)
208{
Al Virod5a0a1e2006-11-08 00:23:14 -0800209 __be32 remote = parms->iph.daddr;
210 __be32 local = parms->iph.saddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700211 struct ip_tunnel *t, **tp, *nt;
212 struct net_device *dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700213 char name[IFNAMSIZ];
214
YOSHIFUJI Hideaki87d1a162007-04-24 20:44:47 +0900215 for (tp = __ipip_bucket(parms); (t = *tp) != NULL; tp = &t->next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700216 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
217 return t;
218 }
219 if (!create)
220 return NULL;
221
222 if (parms->name[0])
223 strlcpy(name, parms->name, IFNAMSIZ);
Pavel Emelyanov34cc7ba2008-02-23 20:19:20 -0800224 else
225 sprintf(name, "tunl%%d");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700226
227 dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup);
228 if (dev == NULL)
229 return NULL;
230
Pavel Emelyanovb37d4282008-02-26 23:51:04 -0800231 if (strchr(name, '%')) {
232 if (dev_alloc_name(dev, name) < 0)
233 goto failed_free;
234 }
235
Patrick McHardy2941a482006-01-08 22:05:26 -0800236 nt = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700237 dev->init = ipip_tunnel_init;
238 nt->parms = *parms;
239
Pavel Emelyanovb37d4282008-02-26 23:51:04 -0800240 if (register_netdevice(dev) < 0)
241 goto failed_free;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700242
243 dev_hold(dev);
244 ipip_tunnel_link(nt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700245 return nt;
246
Pavel Emelyanovb37d4282008-02-26 23:51:04 -0800247failed_free:
248 free_netdev(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700249 return NULL;
250}
251
252static void ipip_tunnel_uninit(struct net_device *dev)
253{
254 if (dev == ipip_fb_tunnel_dev) {
255 write_lock_bh(&ipip_lock);
256 tunnels_wc[0] = NULL;
257 write_unlock_bh(&ipip_lock);
258 } else
Patrick McHardy2941a482006-01-08 22:05:26 -0800259 ipip_tunnel_unlink(netdev_priv(dev));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700260 dev_put(dev);
261}
262
Herbert Xud2acc342006-03-28 01:12:13 -0800263static int ipip_err(struct sk_buff *skb, u32 info)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700264{
265#ifndef I_WISH_WORLD_WERE_PERFECT
266
267/* It is not :-( All the routers (except for Linux) return only
268 8 bytes of packet payload. It means, that precise relaying of
269 ICMP in the real Internet is absolutely infeasible.
270 */
271 struct iphdr *iph = (struct iphdr*)skb->data;
Arnaldo Carvalho de Melo88c76642007-03-13 14:43:18 -0300272 const int type = icmp_hdr(skb)->type;
273 const int code = icmp_hdr(skb)->code;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700274 struct ip_tunnel *t;
Herbert Xud2acc342006-03-28 01:12:13 -0800275 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700276
277 switch (type) {
278 default:
279 case ICMP_PARAMETERPROB:
Herbert Xud2acc342006-03-28 01:12:13 -0800280 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700281
282 case ICMP_DEST_UNREACH:
283 switch (code) {
284 case ICMP_SR_FAILED:
285 case ICMP_PORT_UNREACH:
286 /* Impossible event. */
Herbert Xud2acc342006-03-28 01:12:13 -0800287 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700288 case ICMP_FRAG_NEEDED:
289 /* Soft state for pmtu is maintained by IP core. */
Herbert Xud2acc342006-03-28 01:12:13 -0800290 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700291 default:
292 /* All others are translated to HOST_UNREACH.
293 rfc2003 contains "deep thoughts" about NET_UNREACH,
294 I believe they are just ether pollution. --ANK
295 */
296 break;
297 }
298 break;
299 case ICMP_TIME_EXCEEDED:
300 if (code != ICMP_EXC_TTL)
Herbert Xud2acc342006-03-28 01:12:13 -0800301 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700302 break;
303 }
304
Herbert Xud2acc342006-03-28 01:12:13 -0800305 err = -ENOENT;
306
Linus Torvalds1da177e2005-04-16 15:20:36 -0700307 read_lock(&ipip_lock);
308 t = ipip_tunnel_lookup(iph->daddr, iph->saddr);
309 if (t == NULL || t->parms.iph.daddr == 0)
310 goto out;
Herbert Xud2acc342006-03-28 01:12:13 -0800311
312 err = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700313 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
314 goto out;
315
316 if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
317 t->err_count++;
318 else
319 t->err_count = 1;
320 t->err_time = jiffies;
321out:
322 read_unlock(&ipip_lock);
Herbert Xud2acc342006-03-28 01:12:13 -0800323 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700324#else
325 struct iphdr *iph = (struct iphdr*)dp;
326 int hlen = iph->ihl<<2;
327 struct iphdr *eiph;
Arnaldo Carvalho de Melo88c76642007-03-13 14:43:18 -0300328 const int type = icmp_hdr(skb)->type;
329 const int code = icmp_hdr(skb)->code;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700330 int rel_type = 0;
331 int rel_code = 0;
Al Viroc55e2f42006-09-19 13:23:19 -0700332 __be32 rel_info = 0;
333 __u32 n = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700334 struct sk_buff *skb2;
335 struct flowi fl;
336 struct rtable *rt;
337
338 if (len < hlen + sizeof(struct iphdr))
Herbert Xud2acc342006-03-28 01:12:13 -0800339 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700340 eiph = (struct iphdr*)(dp + hlen);
341
342 switch (type) {
343 default:
Herbert Xud2acc342006-03-28 01:12:13 -0800344 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700345 case ICMP_PARAMETERPROB:
Arnaldo Carvalho de Melo88c76642007-03-13 14:43:18 -0300346 n = ntohl(icmp_hdr(skb)->un.gateway) >> 24;
Al Viroc55e2f42006-09-19 13:23:19 -0700347 if (n < hlen)
Herbert Xud2acc342006-03-28 01:12:13 -0800348 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700349
350 /* So... This guy found something strange INSIDE encapsulated
351 packet. Well, he is fool, but what can we do ?
352 */
353 rel_type = ICMP_PARAMETERPROB;
Al Viroc55e2f42006-09-19 13:23:19 -0700354 rel_info = htonl((n - hlen) << 24);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700355 break;
356
357 case ICMP_DEST_UNREACH:
358 switch (code) {
359 case ICMP_SR_FAILED:
360 case ICMP_PORT_UNREACH:
361 /* Impossible event. */
Herbert Xud2acc342006-03-28 01:12:13 -0800362 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700363 case ICMP_FRAG_NEEDED:
364 /* And it is the only really necessary thing :-) */
Arnaldo Carvalho de Melo88c76642007-03-13 14:43:18 -0300365 n = ntohs(icmp_hdr(skb)->un.frag.mtu);
Al Viroc55e2f42006-09-19 13:23:19 -0700366 if (n < hlen+68)
Herbert Xud2acc342006-03-28 01:12:13 -0800367 return 0;
Al Viroc55e2f42006-09-19 13:23:19 -0700368 n -= hlen;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700369 /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
Al Viroc55e2f42006-09-19 13:23:19 -0700370 if (n > ntohs(eiph->tot_len))
Herbert Xud2acc342006-03-28 01:12:13 -0800371 return 0;
Al Viroc55e2f42006-09-19 13:23:19 -0700372 rel_info = htonl(n);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700373 break;
374 default:
375 /* All others are translated to HOST_UNREACH.
376 rfc2003 contains "deep thoughts" about NET_UNREACH,
377 I believe, it is just ether pollution. --ANK
378 */
379 rel_type = ICMP_DEST_UNREACH;
380 rel_code = ICMP_HOST_UNREACH;
381 break;
382 }
383 break;
384 case ICMP_TIME_EXCEEDED:
385 if (code != ICMP_EXC_TTL)
Herbert Xud2acc342006-03-28 01:12:13 -0800386 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700387 break;
388 }
389
390 /* Prepare fake skb to feed it to icmp_send */
391 skb2 = skb_clone(skb, GFP_ATOMIC);
392 if (skb2 == NULL)
Herbert Xud2acc342006-03-28 01:12:13 -0800393 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700394 dst_release(skb2->dst);
395 skb2->dst = NULL;
396 skb_pull(skb2, skb->data - (u8*)eiph);
Arnaldo Carvalho de Meloc1d2bbe2007-04-10 20:45:18 -0700397 skb_reset_network_header(skb2);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700398
399 /* Try to guess incoming interface */
400 memset(&fl, 0, sizeof(fl));
401 fl.fl4_daddr = eiph->saddr;
402 fl.fl4_tos = RT_TOS(eiph->tos);
403 fl.proto = IPPROTO_IPIP;
Denis V. Lunevf2063512008-01-22 22:07:34 -0800404 if (ip_route_output_key(&init_net, &rt, &key)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700405 kfree_skb(skb2);
Herbert Xud2acc342006-03-28 01:12:13 -0800406 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700407 }
408 skb2->dev = rt->u.dst.dev;
409
410 /* route "incoming" packet */
411 if (rt->rt_flags&RTCF_LOCAL) {
412 ip_rt_put(rt);
413 rt = NULL;
414 fl.fl4_daddr = eiph->daddr;
415 fl.fl4_src = eiph->saddr;
416 fl.fl4_tos = eiph->tos;
Denis V. Lunevf2063512008-01-22 22:07:34 -0800417 if (ip_route_output_key(&init_net, &rt, &fl) ||
Linus Torvalds1da177e2005-04-16 15:20:36 -0700418 rt->u.dst.dev->type != ARPHRD_TUNNEL) {
419 ip_rt_put(rt);
420 kfree_skb(skb2);
Herbert Xud2acc342006-03-28 01:12:13 -0800421 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700422 }
423 } else {
424 ip_rt_put(rt);
425 if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) ||
426 skb2->dst->dev->type != ARPHRD_TUNNEL) {
427 kfree_skb(skb2);
Herbert Xud2acc342006-03-28 01:12:13 -0800428 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700429 }
430 }
431
432 /* change mtu on this route */
433 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
Al Viroc55e2f42006-09-19 13:23:19 -0700434 if (n > dst_mtu(skb2->dst)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700435 kfree_skb(skb2);
Herbert Xud2acc342006-03-28 01:12:13 -0800436 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700437 }
Al Viroc55e2f42006-09-19 13:23:19 -0700438 skb2->dst->ops->update_pmtu(skb2->dst, n);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700439 } else if (type == ICMP_TIME_EXCEEDED) {
Patrick McHardy2941a482006-01-08 22:05:26 -0800440 struct ip_tunnel *t = netdev_priv(skb2->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700441 if (t->parms.iph.ttl) {
442 rel_type = ICMP_DEST_UNREACH;
443 rel_code = ICMP_HOST_UNREACH;
444 }
445 }
446
447 icmp_send(skb2, rel_type, rel_code, rel_info);
448 kfree_skb(skb2);
Herbert Xud2acc342006-03-28 01:12:13 -0800449 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700450#endif
451}
452
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700453static inline void ipip_ecn_decapsulate(const struct iphdr *outer_iph,
454 struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700455{
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700456 struct iphdr *inner_iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700457
458 if (INET_ECN_is_ce(outer_iph->tos))
459 IP_ECN_set_ce(inner_iph);
460}
461
462static int ipip_rcv(struct sk_buff *skb)
463{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700464 struct ip_tunnel *tunnel;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700465 const struct iphdr *iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700466
467 read_lock(&ipip_lock);
468 if ((tunnel = ipip_tunnel_lookup(iph->saddr, iph->daddr)) != NULL) {
469 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
470 read_unlock(&ipip_lock);
471 kfree_skb(skb);
472 return 0;
473 }
474
475 secpath_reset(skb);
476
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -0700477 skb->mac_header = skb->network_header;
Arnaldo Carvalho de Meloc1d2bbe2007-04-10 20:45:18 -0700478 skb_reset_network_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700479 skb->protocol = htons(ETH_P_IP);
480 skb->pkt_type = PACKET_HOST;
481
482 tunnel->stat.rx_packets++;
483 tunnel->stat.rx_bytes += skb->len;
484 skb->dev = tunnel->dev;
485 dst_release(skb->dst);
486 skb->dst = NULL;
487 nf_reset(skb);
488 ipip_ecn_decapsulate(iph, skb);
489 netif_rx(skb);
490 read_unlock(&ipip_lock);
491 return 0;
492 }
493 read_unlock(&ipip_lock);
494
Linus Torvalds1da177e2005-04-16 15:20:36 -0700495 return -1;
496}
497
498/*
499 * This function assumes it is being called from dev_queue_xmit()
500 * and that skb is filled properly by that function.
501 */
502
503static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
504{
Patrick McHardy2941a482006-01-08 22:05:26 -0800505 struct ip_tunnel *tunnel = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700506 struct net_device_stats *stats = &tunnel->stat;
507 struct iphdr *tiph = &tunnel->parms.iph;
508 u8 tos = tunnel->parms.iph.tos;
Al Virod5a0a1e2006-11-08 00:23:14 -0800509 __be16 df = tiph->frag_off;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700510 struct rtable *rt; /* Route to the other host */
511 struct net_device *tdev; /* Device to other host */
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700512 struct iphdr *old_iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700513 struct iphdr *iph; /* Our new IP header */
Chuck Leverc2636b42007-10-23 21:07:32 -0700514 unsigned int max_headroom; /* The extra header space needed */
Al Virod5a0a1e2006-11-08 00:23:14 -0800515 __be32 dst = tiph->daddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700516 int mtu;
517
518 if (tunnel->recursion++) {
519 tunnel->stat.collisions++;
520 goto tx_error;
521 }
522
523 if (skb->protocol != htons(ETH_P_IP))
524 goto tx_error;
525
526 if (tos&1)
527 tos = old_iph->tos;
528
529 if (!dst) {
530 /* NBMA tunnel */
531 if ((rt = (struct rtable*)skb->dst) == NULL) {
532 tunnel->stat.tx_fifo_errors++;
533 goto tx_error;
534 }
535 if ((dst = rt->rt_gateway) == 0)
536 goto tx_error_icmp;
537 }
538
539 {
540 struct flowi fl = { .oif = tunnel->parms.link,
541 .nl_u = { .ip4_u =
542 { .daddr = dst,
543 .saddr = tiph->saddr,
544 .tos = RT_TOS(tos) } },
545 .proto = IPPROTO_IPIP };
Denis V. Lunevf2063512008-01-22 22:07:34 -0800546 if (ip_route_output_key(&init_net, &rt, &fl)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700547 tunnel->stat.tx_carrier_errors++;
548 goto tx_error_icmp;
549 }
550 }
551 tdev = rt->u.dst.dev;
552
553 if (tdev == dev) {
554 ip_rt_put(rt);
555 tunnel->stat.collisions++;
556 goto tx_error;
557 }
558
559 if (tiph->frag_off)
560 mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr);
561 else
562 mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
563
564 if (mtu < 68) {
565 tunnel->stat.collisions++;
566 ip_rt_put(rt);
567 goto tx_error;
568 }
569 if (skb->dst)
570 skb->dst->ops->update_pmtu(skb->dst, mtu);
571
572 df |= (old_iph->frag_off&htons(IP_DF));
573
574 if ((old_iph->frag_off&htons(IP_DF)) && mtu < ntohs(old_iph->tot_len)) {
575 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
576 ip_rt_put(rt);
577 goto tx_error;
578 }
579
580 if (tunnel->err_count > 0) {
581 if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
582 tunnel->err_count--;
583 dst_link_failure(skb);
584 } else
585 tunnel->err_count = 0;
586 }
587
588 /*
589 * Okay, now see if we can stuff it in the buffer as-is.
590 */
591 max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr));
592
Patrick McHardycfbba492007-07-09 15:33:40 -0700593 if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
594 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700595 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
596 if (!new_skb) {
597 ip_rt_put(rt);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900598 stats->tx_dropped++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700599 dev_kfree_skb(skb);
600 tunnel->recursion--;
601 return 0;
602 }
603 if (skb->sk)
604 skb_set_owner_w(new_skb, skb->sk);
605 dev_kfree_skb(skb);
606 skb = new_skb;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700607 old_iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700608 }
609
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -0700610 skb->transport_header = skb->network_header;
Arnaldo Carvalho de Meloe2d1bca2007-04-10 20:46:21 -0700611 skb_push(skb, sizeof(struct iphdr));
612 skb_reset_network_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700613 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
Patrick McHardy48d5cad2006-02-15 15:10:22 -0800614 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
615 IPSKB_REROUTED);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700616 dst_release(skb->dst);
617 skb->dst = &rt->u.dst;
618
619 /*
620 * Push down and install the IPIP header.
621 */
622
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700623 iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700624 iph->version = 4;
625 iph->ihl = sizeof(struct iphdr)>>2;
626 iph->frag_off = df;
627 iph->protocol = IPPROTO_IPIP;
628 iph->tos = INET_ECN_encapsulate(tos, old_iph->tos);
629 iph->daddr = rt->rt_dst;
630 iph->saddr = rt->rt_src;
631
632 if ((iph->ttl = tiph->ttl) == 0)
633 iph->ttl = old_iph->ttl;
634
635 nf_reset(skb);
636
637 IPTUNNEL_XMIT();
638 tunnel->recursion--;
639 return 0;
640
641tx_error_icmp:
642 dst_link_failure(skb);
643tx_error:
644 stats->tx_errors++;
645 dev_kfree_skb(skb);
646 tunnel->recursion--;
647 return 0;
648}
649
Michal Schmidt55339952007-12-12 11:01:43 -0800650static void ipip_tunnel_bind_dev(struct net_device *dev)
651{
652 struct net_device *tdev = NULL;
653 struct ip_tunnel *tunnel;
654 struct iphdr *iph;
655
656 tunnel = netdev_priv(dev);
657 iph = &tunnel->parms.iph;
658
659 if (iph->daddr) {
660 struct flowi fl = { .oif = tunnel->parms.link,
661 .nl_u = { .ip4_u =
662 { .daddr = iph->daddr,
663 .saddr = iph->saddr,
664 .tos = RT_TOS(iph->tos) } },
665 .proto = IPPROTO_IPIP };
666 struct rtable *rt;
Denis V. Lunevf2063512008-01-22 22:07:34 -0800667 if (!ip_route_output_key(&init_net, &rt, &fl)) {
Michal Schmidt55339952007-12-12 11:01:43 -0800668 tdev = rt->u.dst.dev;
669 ip_rt_put(rt);
670 }
671 dev->flags |= IFF_POINTOPOINT;
672 }
673
674 if (!tdev && tunnel->parms.link)
675 tdev = __dev_get_by_index(&init_net, tunnel->parms.link);
676
677 if (tdev) {
678 dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
679 dev->mtu = tdev->mtu - sizeof(struct iphdr);
680 }
681 dev->iflink = tunnel->parms.link;
682}
683
Linus Torvalds1da177e2005-04-16 15:20:36 -0700684static int
685ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
686{
687 int err = 0;
688 struct ip_tunnel_parm p;
689 struct ip_tunnel *t;
690
691 switch (cmd) {
692 case SIOCGETTUNNEL:
693 t = NULL;
694 if (dev == ipip_fb_tunnel_dev) {
695 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
696 err = -EFAULT;
697 break;
698 }
699 t = ipip_tunnel_locate(&p, 0);
700 }
701 if (t == NULL)
Patrick McHardy2941a482006-01-08 22:05:26 -0800702 t = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700703 memcpy(&p, &t->parms, sizeof(p));
704 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
705 err = -EFAULT;
706 break;
707
708 case SIOCADDTUNNEL:
709 case SIOCCHGTUNNEL:
710 err = -EPERM;
711 if (!capable(CAP_NET_ADMIN))
712 goto done;
713
714 err = -EFAULT;
715 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
716 goto done;
717
718 err = -EINVAL;
719 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
720 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
721 goto done;
722 if (p.iph.ttl)
723 p.iph.frag_off |= htons(IP_DF);
724
725 t = ipip_tunnel_locate(&p, cmd == SIOCADDTUNNEL);
726
727 if (dev != ipip_fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
728 if (t != NULL) {
729 if (t->dev != dev) {
730 err = -EEXIST;
731 break;
732 }
733 } else {
734 if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
735 (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
736 err = -EINVAL;
737 break;
738 }
Patrick McHardy2941a482006-01-08 22:05:26 -0800739 t = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700740 ipip_tunnel_unlink(t);
741 t->parms.iph.saddr = p.iph.saddr;
742 t->parms.iph.daddr = p.iph.daddr;
743 memcpy(dev->dev_addr, &p.iph.saddr, 4);
744 memcpy(dev->broadcast, &p.iph.daddr, 4);
745 ipip_tunnel_link(t);
746 netdev_state_change(dev);
747 }
748 }
749
750 if (t) {
751 err = 0;
752 if (cmd == SIOCCHGTUNNEL) {
753 t->parms.iph.ttl = p.iph.ttl;
754 t->parms.iph.tos = p.iph.tos;
755 t->parms.iph.frag_off = p.iph.frag_off;
Michal Schmidt55339952007-12-12 11:01:43 -0800756 if (t->parms.link != p.link) {
757 t->parms.link = p.link;
758 ipip_tunnel_bind_dev(dev);
759 netdev_state_change(dev);
760 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700761 }
762 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
763 err = -EFAULT;
764 } else
765 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
766 break;
767
768 case SIOCDELTUNNEL:
769 err = -EPERM;
770 if (!capable(CAP_NET_ADMIN))
771 goto done;
772
773 if (dev == ipip_fb_tunnel_dev) {
774 err = -EFAULT;
775 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
776 goto done;
777 err = -ENOENT;
778 if ((t = ipip_tunnel_locate(&p, 0)) == NULL)
779 goto done;
780 err = -EPERM;
781 if (t->dev == ipip_fb_tunnel_dev)
782 goto done;
783 dev = t->dev;
784 }
Stephen Hemminger22f8cde2007-02-07 00:09:58 -0800785 unregister_netdevice(dev);
786 err = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700787 break;
788
789 default:
790 err = -EINVAL;
791 }
792
793done:
794 return err;
795}
796
797static struct net_device_stats *ipip_tunnel_get_stats(struct net_device *dev)
798{
Patrick McHardy2941a482006-01-08 22:05:26 -0800799 return &(((struct ip_tunnel*)netdev_priv(dev))->stat);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700800}
801
802static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
803{
804 if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr))
805 return -EINVAL;
806 dev->mtu = new_mtu;
807 return 0;
808}
809
810static void ipip_tunnel_setup(struct net_device *dev)
811{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700812 dev->uninit = ipip_tunnel_uninit;
813 dev->hard_start_xmit = ipip_tunnel_xmit;
814 dev->get_stats = ipip_tunnel_get_stats;
815 dev->do_ioctl = ipip_tunnel_ioctl;
816 dev->change_mtu = ipip_tunnel_change_mtu;
817 dev->destructor = free_netdev;
818
819 dev->type = ARPHRD_TUNNEL;
820 dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr);
Kris Katterjohn46f25df2006-01-05 16:35:42 -0800821 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700822 dev->flags = IFF_NOARP;
823 dev->iflink = 0;
824 dev->addr_len = 4;
825}
826
827static int ipip_tunnel_init(struct net_device *dev)
828{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700829 struct ip_tunnel *tunnel;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700830
Patrick McHardy2941a482006-01-08 22:05:26 -0800831 tunnel = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700832
833 tunnel->dev = dev;
834 strcpy(tunnel->parms.name, dev->name);
835
836 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
837 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
838
Michal Schmidt55339952007-12-12 11:01:43 -0800839 ipip_tunnel_bind_dev(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700840
841 return 0;
842}
843
844static int __init ipip_fb_tunnel_init(struct net_device *dev)
845{
Patrick McHardy2941a482006-01-08 22:05:26 -0800846 struct ip_tunnel *tunnel = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700847 struct iphdr *iph = &tunnel->parms.iph;
848
849 tunnel->dev = dev;
850 strcpy(tunnel->parms.name, dev->name);
851
852 iph->version = 4;
853 iph->protocol = IPPROTO_IPIP;
854 iph->ihl = 5;
855
856 dev_hold(dev);
857 tunnels_wc[0] = tunnel;
858 return 0;
859}
860
861static struct xfrm_tunnel ipip_handler = {
862 .handler = ipip_rcv,
863 .err_handler = ipip_err,
Herbert Xud2acc342006-03-28 01:12:13 -0800864 .priority = 1,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700865};
866
867static char banner[] __initdata =
868 KERN_INFO "IPv4 over IPv4 tunneling driver\n";
869
870static int __init ipip_init(void)
871{
872 int err;
873
874 printk(banner);
875
Kazunori MIYAZAWAc0d56402007-02-13 12:54:47 -0800876 if (xfrm4_tunnel_register(&ipip_handler, AF_INET)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700877 printk(KERN_INFO "ipip init: can't register tunnel\n");
878 return -EAGAIN;
879 }
880
881 ipip_fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
882 "tunl0",
883 ipip_tunnel_setup);
884 if (!ipip_fb_tunnel_dev) {
885 err = -ENOMEM;
886 goto err1;
887 }
888
889 ipip_fb_tunnel_dev->init = ipip_fb_tunnel_init;
890
891 if ((err = register_netdev(ipip_fb_tunnel_dev)))
892 goto err2;
893 out:
894 return err;
895 err2:
896 free_netdev(ipip_fb_tunnel_dev);
897 err1:
Kazunori MIYAZAWAc0d56402007-02-13 12:54:47 -0800898 xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700899 goto out;
900}
901
Alexey Kuznetsovdb445752005-07-30 17:46:44 -0700902static void __exit ipip_destroy_tunnels(void)
903{
904 int prio;
905
906 for (prio = 1; prio < 4; prio++) {
907 int h;
908 for (h = 0; h < HASH_SIZE; h++) {
909 struct ip_tunnel *t;
910 while ((t = tunnels[prio][h]) != NULL)
911 unregister_netdevice(t->dev);
912 }
913 }
914}
915
Linus Torvalds1da177e2005-04-16 15:20:36 -0700916static void __exit ipip_fini(void)
917{
Kazunori MIYAZAWAc0d56402007-02-13 12:54:47 -0800918 if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700919 printk(KERN_INFO "ipip close: can't deregister tunnel\n");
920
Alexey Kuznetsovdb445752005-07-30 17:46:44 -0700921 rtnl_lock();
922 ipip_destroy_tunnels();
923 unregister_netdevice(ipip_fb_tunnel_dev);
924 rtnl_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700925}
926
927module_init(ipip_init);
928module_exit(ipip_fini);
929MODULE_LICENSE("GPL");