blob: 160535b5170549e77e12d8e2c3ccb5737f67e01a [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002 * Linux NET3: IP/IP protocol decoder.
Linus Torvalds1da177e2005-04-16 15:20:36 -07003 *
4 * Version: $Id: ipip.c,v 1.50 2001/10/02 02:22:36 davem Exp $
5 *
6 * Authors:
7 * Sam Lantinga (slouken@cs.ucdavis.edu) 02/01/95
8 *
9 * Fixes:
10 * Alan Cox : Merged and made usable non modular (its so tiny its silly as
11 * a module taking up 2 pages).
12 * Alan Cox : Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
13 * to keep ip_forward happy.
14 * Alan Cox : More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
15 * Kai Schulte : Fixed #defines for IP_FIREWALL->FIREWALL
16 * David Woodhouse : Perform some basic ICMP handling.
17 * IPIP Routing without decapsulation.
18 * Carlos Picoto : GRE over IP support
19 * Alexey Kuznetsov: Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
20 * I do not want to merge them together.
21 *
22 * This program is free software; you can redistribute it and/or
23 * modify it under the terms of the GNU General Public License
24 * as published by the Free Software Foundation; either version
25 * 2 of the License, or (at your option) any later version.
26 *
27 */
28
29/* tunnel.c: an IP tunnel driver
30
31 The purpose of this driver is to provide an IP tunnel through
32 which you can tunnel network traffic transparently across subnets.
33
34 This was written by looking at Nick Holloway's dummy driver
35 Thanks for the great code!
36
37 -Sam Lantinga (slouken@cs.ucdavis.edu) 02/01/95
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090038
Linus Torvalds1da177e2005-04-16 15:20:36 -070039 Minor tweaks:
40 Cleaned up the code a little and added some pre-1.3.0 tweaks.
41 dev->hard_header/hard_header_len changed to use no headers.
42 Comments/bracketing tweaked.
43 Made the tunnels use dev->name not tunnel: when error reporting.
44 Added tx_dropped stat
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090045
Linus Torvalds1da177e2005-04-16 15:20:36 -070046 -Alan Cox (Alan.Cox@linux.org) 21 March 95
47
48 Reworked:
49 Changed to tunnel to destination gateway in addition to the
50 tunnel's pointopoint address
51 Almost completely rewritten
52 Note: There is currently no firewall or ICMP handling done.
53
54 -Sam Lantinga (slouken@cs.ucdavis.edu) 02/13/96
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090055
Linus Torvalds1da177e2005-04-16 15:20:36 -070056*/
57
58/* Things I wish I had known when writing the tunnel driver:
59
60 When the tunnel_xmit() function is called, the skb contains the
61 packet to be sent (plus a great deal of extra info), and dev
62 contains the tunnel device that _we_ are.
63
64 When we are passed a packet, we are expected to fill in the
65 source address with our source IP address.
66
67 What is the proper way to allocate, copy and free a buffer?
68 After you allocate it, it is a "0 length" chunk of memory
69 starting at zero. If you want to add headers to the buffer
70 later, you'll have to call "skb_reserve(skb, amount)" with
71 the amount of memory you want reserved. Then, you call
72 "skb_put(skb, amount)" with the amount of space you want in
73 the buffer. skb_put() returns a pointer to the top (#0) of
74 that buffer. skb->len is set to the amount of space you have
75 "allocated" with skb_put(). You can then write up to skb->len
76 bytes to that buffer. If you need more, you can call skb_put()
77 again with the additional amount of space you need. You can
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090078 find out how much more space you can allocate by calling
Linus Torvalds1da177e2005-04-16 15:20:36 -070079 "skb_tailroom(skb)".
80 Now, to add header space, call "skb_push(skb, header_len)".
81 This creates space at the beginning of the buffer and returns
82 a pointer to this new space. If later you need to strip a
83 header from a buffer, call "skb_pull(skb, header_len)".
84 skb_headroom() will return how much space is left at the top
85 of the buffer (before the main data). Remember, this headroom
86 space must be reserved before the skb_put() function is called.
87 */
88
89/*
90 This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
91
92 For comments look at net/ipv4/ip_gre.c --ANK
93 */
94
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090095
Randy Dunlap4fc268d2006-01-11 12:17:47 -080096#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070097#include <linux/module.h>
98#include <linux/types.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070099#include <linux/kernel.h>
100#include <asm/uaccess.h>
101#include <linux/skbuff.h>
102#include <linux/netdevice.h>
103#include <linux/in.h>
104#include <linux/tcp.h>
105#include <linux/udp.h>
106#include <linux/if_arp.h>
107#include <linux/mroute.h>
108#include <linux/init.h>
109#include <linux/netfilter_ipv4.h>
Kris Katterjohn46f25df2006-01-05 16:35:42 -0800110#include <linux/if_ether.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700111
112#include <net/sock.h>
113#include <net/ip.h>
114#include <net/icmp.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700115#include <net/ipip.h>
116#include <net/inet_ecn.h>
117#include <net/xfrm.h>
118
119#define HASH_SIZE 16
Al Virod5a0a1e2006-11-08 00:23:14 -0800120#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700121
122static int ipip_fb_tunnel_init(struct net_device *dev);
123static int ipip_tunnel_init(struct net_device *dev);
124static void ipip_tunnel_setup(struct net_device *dev);
125
126static struct net_device *ipip_fb_tunnel_dev;
127
128static struct ip_tunnel *tunnels_r_l[HASH_SIZE];
129static struct ip_tunnel *tunnels_r[HASH_SIZE];
130static struct ip_tunnel *tunnels_l[HASH_SIZE];
131static struct ip_tunnel *tunnels_wc[1];
132static struct ip_tunnel **tunnels[4] = { tunnels_wc, tunnels_l, tunnels_r, tunnels_r_l };
133
134static DEFINE_RWLOCK(ipip_lock);
135
Al Virod5a0a1e2006-11-08 00:23:14 -0800136static struct ip_tunnel * ipip_tunnel_lookup(__be32 remote, __be32 local)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700137{
138 unsigned h0 = HASH(remote);
139 unsigned h1 = HASH(local);
140 struct ip_tunnel *t;
141
142 for (t = tunnels_r_l[h0^h1]; t; t = t->next) {
143 if (local == t->parms.iph.saddr &&
144 remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
145 return t;
146 }
147 for (t = tunnels_r[h0]; t; t = t->next) {
148 if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
149 return t;
150 }
151 for (t = tunnels_l[h1]; t; t = t->next) {
152 if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
153 return t;
154 }
155 if ((t = tunnels_wc[0]) != NULL && (t->dev->flags&IFF_UP))
156 return t;
157 return NULL;
158}
159
YOSHIFUJI Hideaki87d1a162007-04-24 20:44:47 +0900160static struct ip_tunnel **__ipip_bucket(struct ip_tunnel_parm *parms)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700161{
YOSHIFUJI Hideaki87d1a162007-04-24 20:44:47 +0900162 __be32 remote = parms->iph.daddr;
163 __be32 local = parms->iph.saddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700164 unsigned h = 0;
165 int prio = 0;
166
167 if (remote) {
168 prio |= 2;
169 h ^= HASH(remote);
170 }
171 if (local) {
172 prio |= 1;
173 h ^= HASH(local);
174 }
175 return &tunnels[prio][h];
176}
177
YOSHIFUJI Hideaki87d1a162007-04-24 20:44:47 +0900178static inline struct ip_tunnel **ipip_bucket(struct ip_tunnel *t)
179{
180 return __ipip_bucket(&t->parms);
181}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700182
183static void ipip_tunnel_unlink(struct ip_tunnel *t)
184{
185 struct ip_tunnel **tp;
186
187 for (tp = ipip_bucket(t); *tp; tp = &(*tp)->next) {
188 if (t == *tp) {
189 write_lock_bh(&ipip_lock);
190 *tp = t->next;
191 write_unlock_bh(&ipip_lock);
192 break;
193 }
194 }
195}
196
197static void ipip_tunnel_link(struct ip_tunnel *t)
198{
199 struct ip_tunnel **tp = ipip_bucket(t);
200
201 t->next = *tp;
202 write_lock_bh(&ipip_lock);
203 *tp = t;
204 write_unlock_bh(&ipip_lock);
205}
206
207static struct ip_tunnel * ipip_tunnel_locate(struct ip_tunnel_parm *parms, int create)
208{
Al Virod5a0a1e2006-11-08 00:23:14 -0800209 __be32 remote = parms->iph.daddr;
210 __be32 local = parms->iph.saddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700211 struct ip_tunnel *t, **tp, *nt;
212 struct net_device *dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700213 char name[IFNAMSIZ];
214
YOSHIFUJI Hideaki87d1a162007-04-24 20:44:47 +0900215 for (tp = __ipip_bucket(parms); (t = *tp) != NULL; tp = &t->next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700216 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
217 return t;
218 }
219 if (!create)
220 return NULL;
221
222 if (parms->name[0])
223 strlcpy(name, parms->name, IFNAMSIZ);
224 else {
225 int i;
226 for (i=1; i<100; i++) {
227 sprintf(name, "tunl%d", i);
Eric W. Biederman881d9662007-09-17 11:56:21 -0700228 if (__dev_get_by_name(&init_net, name) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700229 break;
230 }
231 if (i==100)
232 goto failed;
233 }
234
235 dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup);
236 if (dev == NULL)
237 return NULL;
238
Patrick McHardy2941a482006-01-08 22:05:26 -0800239 nt = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700240 dev->init = ipip_tunnel_init;
241 nt->parms = *parms;
242
243 if (register_netdevice(dev) < 0) {
244 free_netdev(dev);
245 goto failed;
246 }
247
248 dev_hold(dev);
249 ipip_tunnel_link(nt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700250 return nt;
251
252failed:
253 return NULL;
254}
255
256static void ipip_tunnel_uninit(struct net_device *dev)
257{
258 if (dev == ipip_fb_tunnel_dev) {
259 write_lock_bh(&ipip_lock);
260 tunnels_wc[0] = NULL;
261 write_unlock_bh(&ipip_lock);
262 } else
Patrick McHardy2941a482006-01-08 22:05:26 -0800263 ipip_tunnel_unlink(netdev_priv(dev));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700264 dev_put(dev);
265}
266
Herbert Xud2acc342006-03-28 01:12:13 -0800267static int ipip_err(struct sk_buff *skb, u32 info)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700268{
269#ifndef I_WISH_WORLD_WERE_PERFECT
270
271/* It is not :-( All the routers (except for Linux) return only
272 8 bytes of packet payload. It means, that precise relaying of
273 ICMP in the real Internet is absolutely infeasible.
274 */
275 struct iphdr *iph = (struct iphdr*)skb->data;
Arnaldo Carvalho de Melo88c76642007-03-13 14:43:18 -0300276 const int type = icmp_hdr(skb)->type;
277 const int code = icmp_hdr(skb)->code;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700278 struct ip_tunnel *t;
Herbert Xud2acc342006-03-28 01:12:13 -0800279 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700280
281 switch (type) {
282 default:
283 case ICMP_PARAMETERPROB:
Herbert Xud2acc342006-03-28 01:12:13 -0800284 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700285
286 case ICMP_DEST_UNREACH:
287 switch (code) {
288 case ICMP_SR_FAILED:
289 case ICMP_PORT_UNREACH:
290 /* Impossible event. */
Herbert Xud2acc342006-03-28 01:12:13 -0800291 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700292 case ICMP_FRAG_NEEDED:
293 /* Soft state for pmtu is maintained by IP core. */
Herbert Xud2acc342006-03-28 01:12:13 -0800294 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700295 default:
296 /* All others are translated to HOST_UNREACH.
297 rfc2003 contains "deep thoughts" about NET_UNREACH,
298 I believe they are just ether pollution. --ANK
299 */
300 break;
301 }
302 break;
303 case ICMP_TIME_EXCEEDED:
304 if (code != ICMP_EXC_TTL)
Herbert Xud2acc342006-03-28 01:12:13 -0800305 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700306 break;
307 }
308
Herbert Xud2acc342006-03-28 01:12:13 -0800309 err = -ENOENT;
310
Linus Torvalds1da177e2005-04-16 15:20:36 -0700311 read_lock(&ipip_lock);
312 t = ipip_tunnel_lookup(iph->daddr, iph->saddr);
313 if (t == NULL || t->parms.iph.daddr == 0)
314 goto out;
Herbert Xud2acc342006-03-28 01:12:13 -0800315
316 err = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700317 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
318 goto out;
319
320 if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
321 t->err_count++;
322 else
323 t->err_count = 1;
324 t->err_time = jiffies;
325out:
326 read_unlock(&ipip_lock);
Herbert Xud2acc342006-03-28 01:12:13 -0800327 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700328#else
329 struct iphdr *iph = (struct iphdr*)dp;
330 int hlen = iph->ihl<<2;
331 struct iphdr *eiph;
Arnaldo Carvalho de Melo88c76642007-03-13 14:43:18 -0300332 const int type = icmp_hdr(skb)->type;
333 const int code = icmp_hdr(skb)->code;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700334 int rel_type = 0;
335 int rel_code = 0;
Al Viroc55e2f42006-09-19 13:23:19 -0700336 __be32 rel_info = 0;
337 __u32 n = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700338 struct sk_buff *skb2;
339 struct flowi fl;
340 struct rtable *rt;
341
342 if (len < hlen + sizeof(struct iphdr))
Herbert Xud2acc342006-03-28 01:12:13 -0800343 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700344 eiph = (struct iphdr*)(dp + hlen);
345
346 switch (type) {
347 default:
Herbert Xud2acc342006-03-28 01:12:13 -0800348 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700349 case ICMP_PARAMETERPROB:
Arnaldo Carvalho de Melo88c76642007-03-13 14:43:18 -0300350 n = ntohl(icmp_hdr(skb)->un.gateway) >> 24;
Al Viroc55e2f42006-09-19 13:23:19 -0700351 if (n < hlen)
Herbert Xud2acc342006-03-28 01:12:13 -0800352 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700353
354 /* So... This guy found something strange INSIDE encapsulated
355 packet. Well, he is fool, but what can we do ?
356 */
357 rel_type = ICMP_PARAMETERPROB;
Al Viroc55e2f42006-09-19 13:23:19 -0700358 rel_info = htonl((n - hlen) << 24);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700359 break;
360
361 case ICMP_DEST_UNREACH:
362 switch (code) {
363 case ICMP_SR_FAILED:
364 case ICMP_PORT_UNREACH:
365 /* Impossible event. */
Herbert Xud2acc342006-03-28 01:12:13 -0800366 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700367 case ICMP_FRAG_NEEDED:
368 /* And it is the only really necessary thing :-) */
Arnaldo Carvalho de Melo88c76642007-03-13 14:43:18 -0300369 n = ntohs(icmp_hdr(skb)->un.frag.mtu);
Al Viroc55e2f42006-09-19 13:23:19 -0700370 if (n < hlen+68)
Herbert Xud2acc342006-03-28 01:12:13 -0800371 return 0;
Al Viroc55e2f42006-09-19 13:23:19 -0700372 n -= hlen;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700373 /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
Al Viroc55e2f42006-09-19 13:23:19 -0700374 if (n > ntohs(eiph->tot_len))
Herbert Xud2acc342006-03-28 01:12:13 -0800375 return 0;
Al Viroc55e2f42006-09-19 13:23:19 -0700376 rel_info = htonl(n);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700377 break;
378 default:
379 /* All others are translated to HOST_UNREACH.
380 rfc2003 contains "deep thoughts" about NET_UNREACH,
381 I believe, it is just ether pollution. --ANK
382 */
383 rel_type = ICMP_DEST_UNREACH;
384 rel_code = ICMP_HOST_UNREACH;
385 break;
386 }
387 break;
388 case ICMP_TIME_EXCEEDED:
389 if (code != ICMP_EXC_TTL)
Herbert Xud2acc342006-03-28 01:12:13 -0800390 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700391 break;
392 }
393
394 /* Prepare fake skb to feed it to icmp_send */
395 skb2 = skb_clone(skb, GFP_ATOMIC);
396 if (skb2 == NULL)
Herbert Xud2acc342006-03-28 01:12:13 -0800397 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700398 dst_release(skb2->dst);
399 skb2->dst = NULL;
400 skb_pull(skb2, skb->data - (u8*)eiph);
Arnaldo Carvalho de Meloc1d2bbe2007-04-10 20:45:18 -0700401 skb_reset_network_header(skb2);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700402
403 /* Try to guess incoming interface */
404 memset(&fl, 0, sizeof(fl));
405 fl.fl4_daddr = eiph->saddr;
406 fl.fl4_tos = RT_TOS(eiph->tos);
407 fl.proto = IPPROTO_IPIP;
408 if (ip_route_output_key(&rt, &key)) {
409 kfree_skb(skb2);
Herbert Xud2acc342006-03-28 01:12:13 -0800410 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700411 }
412 skb2->dev = rt->u.dst.dev;
413
414 /* route "incoming" packet */
415 if (rt->rt_flags&RTCF_LOCAL) {
416 ip_rt_put(rt);
417 rt = NULL;
418 fl.fl4_daddr = eiph->daddr;
419 fl.fl4_src = eiph->saddr;
420 fl.fl4_tos = eiph->tos;
421 if (ip_route_output_key(&rt, &fl) ||
422 rt->u.dst.dev->type != ARPHRD_TUNNEL) {
423 ip_rt_put(rt);
424 kfree_skb(skb2);
Herbert Xud2acc342006-03-28 01:12:13 -0800425 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700426 }
427 } else {
428 ip_rt_put(rt);
429 if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) ||
430 skb2->dst->dev->type != ARPHRD_TUNNEL) {
431 kfree_skb(skb2);
Herbert Xud2acc342006-03-28 01:12:13 -0800432 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700433 }
434 }
435
436 /* change mtu on this route */
437 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
Al Viroc55e2f42006-09-19 13:23:19 -0700438 if (n > dst_mtu(skb2->dst)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700439 kfree_skb(skb2);
Herbert Xud2acc342006-03-28 01:12:13 -0800440 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700441 }
Al Viroc55e2f42006-09-19 13:23:19 -0700442 skb2->dst->ops->update_pmtu(skb2->dst, n);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700443 } else if (type == ICMP_TIME_EXCEEDED) {
Patrick McHardy2941a482006-01-08 22:05:26 -0800444 struct ip_tunnel *t = netdev_priv(skb2->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700445 if (t->parms.iph.ttl) {
446 rel_type = ICMP_DEST_UNREACH;
447 rel_code = ICMP_HOST_UNREACH;
448 }
449 }
450
451 icmp_send(skb2, rel_type, rel_code, rel_info);
452 kfree_skb(skb2);
Herbert Xud2acc342006-03-28 01:12:13 -0800453 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700454#endif
455}
456
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700457static inline void ipip_ecn_decapsulate(const struct iphdr *outer_iph,
458 struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700459{
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700460 struct iphdr *inner_iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700461
462 if (INET_ECN_is_ce(outer_iph->tos))
463 IP_ECN_set_ce(inner_iph);
464}
465
466static int ipip_rcv(struct sk_buff *skb)
467{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700468 struct ip_tunnel *tunnel;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700469 const struct iphdr *iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700470
471 read_lock(&ipip_lock);
472 if ((tunnel = ipip_tunnel_lookup(iph->saddr, iph->daddr)) != NULL) {
473 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
474 read_unlock(&ipip_lock);
475 kfree_skb(skb);
476 return 0;
477 }
478
479 secpath_reset(skb);
480
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -0700481 skb->mac_header = skb->network_header;
Arnaldo Carvalho de Meloc1d2bbe2007-04-10 20:45:18 -0700482 skb_reset_network_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700483 skb->protocol = htons(ETH_P_IP);
484 skb->pkt_type = PACKET_HOST;
485
486 tunnel->stat.rx_packets++;
487 tunnel->stat.rx_bytes += skb->len;
488 skb->dev = tunnel->dev;
489 dst_release(skb->dst);
490 skb->dst = NULL;
491 nf_reset(skb);
492 ipip_ecn_decapsulate(iph, skb);
493 netif_rx(skb);
494 read_unlock(&ipip_lock);
495 return 0;
496 }
497 read_unlock(&ipip_lock);
498
Linus Torvalds1da177e2005-04-16 15:20:36 -0700499 return -1;
500}
501
502/*
503 * This function assumes it is being called from dev_queue_xmit()
504 * and that skb is filled properly by that function.
505 */
506
507static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
508{
Patrick McHardy2941a482006-01-08 22:05:26 -0800509 struct ip_tunnel *tunnel = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700510 struct net_device_stats *stats = &tunnel->stat;
511 struct iphdr *tiph = &tunnel->parms.iph;
512 u8 tos = tunnel->parms.iph.tos;
Al Virod5a0a1e2006-11-08 00:23:14 -0800513 __be16 df = tiph->frag_off;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700514 struct rtable *rt; /* Route to the other host */
515 struct net_device *tdev; /* Device to other host */
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700516 struct iphdr *old_iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700517 struct iphdr *iph; /* Our new IP header */
Chuck Leverc2636b42007-10-23 21:07:32 -0700518 unsigned int max_headroom; /* The extra header space needed */
Al Virod5a0a1e2006-11-08 00:23:14 -0800519 __be32 dst = tiph->daddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700520 int mtu;
521
522 if (tunnel->recursion++) {
523 tunnel->stat.collisions++;
524 goto tx_error;
525 }
526
527 if (skb->protocol != htons(ETH_P_IP))
528 goto tx_error;
529
530 if (tos&1)
531 tos = old_iph->tos;
532
533 if (!dst) {
534 /* NBMA tunnel */
535 if ((rt = (struct rtable*)skb->dst) == NULL) {
536 tunnel->stat.tx_fifo_errors++;
537 goto tx_error;
538 }
539 if ((dst = rt->rt_gateway) == 0)
540 goto tx_error_icmp;
541 }
542
543 {
544 struct flowi fl = { .oif = tunnel->parms.link,
545 .nl_u = { .ip4_u =
546 { .daddr = dst,
547 .saddr = tiph->saddr,
548 .tos = RT_TOS(tos) } },
549 .proto = IPPROTO_IPIP };
550 if (ip_route_output_key(&rt, &fl)) {
551 tunnel->stat.tx_carrier_errors++;
552 goto tx_error_icmp;
553 }
554 }
555 tdev = rt->u.dst.dev;
556
557 if (tdev == dev) {
558 ip_rt_put(rt);
559 tunnel->stat.collisions++;
560 goto tx_error;
561 }
562
563 if (tiph->frag_off)
564 mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr);
565 else
566 mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
567
568 if (mtu < 68) {
569 tunnel->stat.collisions++;
570 ip_rt_put(rt);
571 goto tx_error;
572 }
573 if (skb->dst)
574 skb->dst->ops->update_pmtu(skb->dst, mtu);
575
576 df |= (old_iph->frag_off&htons(IP_DF));
577
578 if ((old_iph->frag_off&htons(IP_DF)) && mtu < ntohs(old_iph->tot_len)) {
579 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
580 ip_rt_put(rt);
581 goto tx_error;
582 }
583
584 if (tunnel->err_count > 0) {
585 if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
586 tunnel->err_count--;
587 dst_link_failure(skb);
588 } else
589 tunnel->err_count = 0;
590 }
591
592 /*
593 * Okay, now see if we can stuff it in the buffer as-is.
594 */
595 max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr));
596
Patrick McHardycfbba492007-07-09 15:33:40 -0700597 if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
598 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700599 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
600 if (!new_skb) {
601 ip_rt_put(rt);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900602 stats->tx_dropped++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700603 dev_kfree_skb(skb);
604 tunnel->recursion--;
605 return 0;
606 }
607 if (skb->sk)
608 skb_set_owner_w(new_skb, skb->sk);
609 dev_kfree_skb(skb);
610 skb = new_skb;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700611 old_iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700612 }
613
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -0700614 skb->transport_header = skb->network_header;
Arnaldo Carvalho de Meloe2d1bca2007-04-10 20:46:21 -0700615 skb_push(skb, sizeof(struct iphdr));
616 skb_reset_network_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700617 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
Patrick McHardy48d5cad2006-02-15 15:10:22 -0800618 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
619 IPSKB_REROUTED);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700620 dst_release(skb->dst);
621 skb->dst = &rt->u.dst;
622
623 /*
624 * Push down and install the IPIP header.
625 */
626
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700627 iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700628 iph->version = 4;
629 iph->ihl = sizeof(struct iphdr)>>2;
630 iph->frag_off = df;
631 iph->protocol = IPPROTO_IPIP;
632 iph->tos = INET_ECN_encapsulate(tos, old_iph->tos);
633 iph->daddr = rt->rt_dst;
634 iph->saddr = rt->rt_src;
635
636 if ((iph->ttl = tiph->ttl) == 0)
637 iph->ttl = old_iph->ttl;
638
639 nf_reset(skb);
640
641 IPTUNNEL_XMIT();
642 tunnel->recursion--;
643 return 0;
644
645tx_error_icmp:
646 dst_link_failure(skb);
647tx_error:
648 stats->tx_errors++;
649 dev_kfree_skb(skb);
650 tunnel->recursion--;
651 return 0;
652}
653
Michal Schmidt55339952007-12-12 11:01:43 -0800654static void ipip_tunnel_bind_dev(struct net_device *dev)
655{
656 struct net_device *tdev = NULL;
657 struct ip_tunnel *tunnel;
658 struct iphdr *iph;
659
660 tunnel = netdev_priv(dev);
661 iph = &tunnel->parms.iph;
662
663 if (iph->daddr) {
664 struct flowi fl = { .oif = tunnel->parms.link,
665 .nl_u = { .ip4_u =
666 { .daddr = iph->daddr,
667 .saddr = iph->saddr,
668 .tos = RT_TOS(iph->tos) } },
669 .proto = IPPROTO_IPIP };
670 struct rtable *rt;
671 if (!ip_route_output_key(&rt, &fl)) {
672 tdev = rt->u.dst.dev;
673 ip_rt_put(rt);
674 }
675 dev->flags |= IFF_POINTOPOINT;
676 }
677
678 if (!tdev && tunnel->parms.link)
679 tdev = __dev_get_by_index(&init_net, tunnel->parms.link);
680
681 if (tdev) {
682 dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
683 dev->mtu = tdev->mtu - sizeof(struct iphdr);
684 }
685 dev->iflink = tunnel->parms.link;
686}
687
Linus Torvalds1da177e2005-04-16 15:20:36 -0700688static int
689ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
690{
691 int err = 0;
692 struct ip_tunnel_parm p;
693 struct ip_tunnel *t;
694
695 switch (cmd) {
696 case SIOCGETTUNNEL:
697 t = NULL;
698 if (dev == ipip_fb_tunnel_dev) {
699 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
700 err = -EFAULT;
701 break;
702 }
703 t = ipip_tunnel_locate(&p, 0);
704 }
705 if (t == NULL)
Patrick McHardy2941a482006-01-08 22:05:26 -0800706 t = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700707 memcpy(&p, &t->parms, sizeof(p));
708 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
709 err = -EFAULT;
710 break;
711
712 case SIOCADDTUNNEL:
713 case SIOCCHGTUNNEL:
714 err = -EPERM;
715 if (!capable(CAP_NET_ADMIN))
716 goto done;
717
718 err = -EFAULT;
719 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
720 goto done;
721
722 err = -EINVAL;
723 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
724 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
725 goto done;
726 if (p.iph.ttl)
727 p.iph.frag_off |= htons(IP_DF);
728
729 t = ipip_tunnel_locate(&p, cmd == SIOCADDTUNNEL);
730
731 if (dev != ipip_fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
732 if (t != NULL) {
733 if (t->dev != dev) {
734 err = -EEXIST;
735 break;
736 }
737 } else {
738 if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
739 (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
740 err = -EINVAL;
741 break;
742 }
Patrick McHardy2941a482006-01-08 22:05:26 -0800743 t = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700744 ipip_tunnel_unlink(t);
745 t->parms.iph.saddr = p.iph.saddr;
746 t->parms.iph.daddr = p.iph.daddr;
747 memcpy(dev->dev_addr, &p.iph.saddr, 4);
748 memcpy(dev->broadcast, &p.iph.daddr, 4);
749 ipip_tunnel_link(t);
750 netdev_state_change(dev);
751 }
752 }
753
754 if (t) {
755 err = 0;
756 if (cmd == SIOCCHGTUNNEL) {
757 t->parms.iph.ttl = p.iph.ttl;
758 t->parms.iph.tos = p.iph.tos;
759 t->parms.iph.frag_off = p.iph.frag_off;
Michal Schmidt55339952007-12-12 11:01:43 -0800760 if (t->parms.link != p.link) {
761 t->parms.link = p.link;
762 ipip_tunnel_bind_dev(dev);
763 netdev_state_change(dev);
764 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700765 }
766 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
767 err = -EFAULT;
768 } else
769 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
770 break;
771
772 case SIOCDELTUNNEL:
773 err = -EPERM;
774 if (!capable(CAP_NET_ADMIN))
775 goto done;
776
777 if (dev == ipip_fb_tunnel_dev) {
778 err = -EFAULT;
779 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
780 goto done;
781 err = -ENOENT;
782 if ((t = ipip_tunnel_locate(&p, 0)) == NULL)
783 goto done;
784 err = -EPERM;
785 if (t->dev == ipip_fb_tunnel_dev)
786 goto done;
787 dev = t->dev;
788 }
Stephen Hemminger22f8cde2007-02-07 00:09:58 -0800789 unregister_netdevice(dev);
790 err = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700791 break;
792
793 default:
794 err = -EINVAL;
795 }
796
797done:
798 return err;
799}
800
801static struct net_device_stats *ipip_tunnel_get_stats(struct net_device *dev)
802{
Patrick McHardy2941a482006-01-08 22:05:26 -0800803 return &(((struct ip_tunnel*)netdev_priv(dev))->stat);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700804}
805
806static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
807{
808 if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr))
809 return -EINVAL;
810 dev->mtu = new_mtu;
811 return 0;
812}
813
814static void ipip_tunnel_setup(struct net_device *dev)
815{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700816 dev->uninit = ipip_tunnel_uninit;
817 dev->hard_start_xmit = ipip_tunnel_xmit;
818 dev->get_stats = ipip_tunnel_get_stats;
819 dev->do_ioctl = ipip_tunnel_ioctl;
820 dev->change_mtu = ipip_tunnel_change_mtu;
821 dev->destructor = free_netdev;
822
823 dev->type = ARPHRD_TUNNEL;
824 dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr);
Kris Katterjohn46f25df2006-01-05 16:35:42 -0800825 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700826 dev->flags = IFF_NOARP;
827 dev->iflink = 0;
828 dev->addr_len = 4;
829}
830
831static int ipip_tunnel_init(struct net_device *dev)
832{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700833 struct ip_tunnel *tunnel;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700834
Patrick McHardy2941a482006-01-08 22:05:26 -0800835 tunnel = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700836
837 tunnel->dev = dev;
838 strcpy(tunnel->parms.name, dev->name);
839
840 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
841 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
842
Michal Schmidt55339952007-12-12 11:01:43 -0800843 ipip_tunnel_bind_dev(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700844
845 return 0;
846}
847
848static int __init ipip_fb_tunnel_init(struct net_device *dev)
849{
Patrick McHardy2941a482006-01-08 22:05:26 -0800850 struct ip_tunnel *tunnel = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700851 struct iphdr *iph = &tunnel->parms.iph;
852
853 tunnel->dev = dev;
854 strcpy(tunnel->parms.name, dev->name);
855
856 iph->version = 4;
857 iph->protocol = IPPROTO_IPIP;
858 iph->ihl = 5;
859
860 dev_hold(dev);
861 tunnels_wc[0] = tunnel;
862 return 0;
863}
864
865static struct xfrm_tunnel ipip_handler = {
866 .handler = ipip_rcv,
867 .err_handler = ipip_err,
Herbert Xud2acc342006-03-28 01:12:13 -0800868 .priority = 1,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700869};
870
871static char banner[] __initdata =
872 KERN_INFO "IPv4 over IPv4 tunneling driver\n";
873
874static int __init ipip_init(void)
875{
876 int err;
877
878 printk(banner);
879
Kazunori MIYAZAWAc0d56402007-02-13 12:54:47 -0800880 if (xfrm4_tunnel_register(&ipip_handler, AF_INET)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700881 printk(KERN_INFO "ipip init: can't register tunnel\n");
882 return -EAGAIN;
883 }
884
885 ipip_fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
886 "tunl0",
887 ipip_tunnel_setup);
888 if (!ipip_fb_tunnel_dev) {
889 err = -ENOMEM;
890 goto err1;
891 }
892
893 ipip_fb_tunnel_dev->init = ipip_fb_tunnel_init;
894
895 if ((err = register_netdev(ipip_fb_tunnel_dev)))
896 goto err2;
897 out:
898 return err;
899 err2:
900 free_netdev(ipip_fb_tunnel_dev);
901 err1:
Kazunori MIYAZAWAc0d56402007-02-13 12:54:47 -0800902 xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700903 goto out;
904}
905
Alexey Kuznetsovdb445752005-07-30 17:46:44 -0700906static void __exit ipip_destroy_tunnels(void)
907{
908 int prio;
909
910 for (prio = 1; prio < 4; prio++) {
911 int h;
912 for (h = 0; h < HASH_SIZE; h++) {
913 struct ip_tunnel *t;
914 while ((t = tunnels[prio][h]) != NULL)
915 unregister_netdevice(t->dev);
916 }
917 }
918}
919
Linus Torvalds1da177e2005-04-16 15:20:36 -0700920static void __exit ipip_fini(void)
921{
Kazunori MIYAZAWAc0d56402007-02-13 12:54:47 -0800922 if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700923 printk(KERN_INFO "ipip close: can't deregister tunnel\n");
924
Alexey Kuznetsovdb445752005-07-30 17:46:44 -0700925 rtnl_lock();
926 ipip_destroy_tunnels();
927 unregister_netdevice(ipip_fb_tunnel_dev);
928 rtnl_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700929}
930
931module_init(ipip_init);
932module_exit(ipip_fini);
933MODULE_LICENSE("GPL");