blob: 8c2b2b0741daeb4ac118ea39721ecaab19ad00fb [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002 * Linux NET3: IP/IP protocol decoder.
Linus Torvalds1da177e2005-04-16 15:20:36 -07003 *
4 * Version: $Id: ipip.c,v 1.50 2001/10/02 02:22:36 davem Exp $
5 *
6 * Authors:
7 * Sam Lantinga (slouken@cs.ucdavis.edu) 02/01/95
8 *
9 * Fixes:
10 * Alan Cox : Merged and made usable non modular (its so tiny its silly as
11 * a module taking up 2 pages).
12 * Alan Cox : Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
13 * to keep ip_forward happy.
14 * Alan Cox : More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
15 * Kai Schulte : Fixed #defines for IP_FIREWALL->FIREWALL
16 * David Woodhouse : Perform some basic ICMP handling.
17 * IPIP Routing without decapsulation.
18 * Carlos Picoto : GRE over IP support
19 * Alexey Kuznetsov: Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
20 * I do not want to merge them together.
21 *
22 * This program is free software; you can redistribute it and/or
23 * modify it under the terms of the GNU General Public License
24 * as published by the Free Software Foundation; either version
25 * 2 of the License, or (at your option) any later version.
26 *
27 */
28
29/* tunnel.c: an IP tunnel driver
30
31 The purpose of this driver is to provide an IP tunnel through
32 which you can tunnel network traffic transparently across subnets.
33
34 This was written by looking at Nick Holloway's dummy driver
35 Thanks for the great code!
36
37 -Sam Lantinga (slouken@cs.ucdavis.edu) 02/01/95
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090038
Linus Torvalds1da177e2005-04-16 15:20:36 -070039 Minor tweaks:
40 Cleaned up the code a little and added some pre-1.3.0 tweaks.
41 dev->hard_header/hard_header_len changed to use no headers.
42 Comments/bracketing tweaked.
43 Made the tunnels use dev->name not tunnel: when error reporting.
44 Added tx_dropped stat
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090045
Linus Torvalds1da177e2005-04-16 15:20:36 -070046 -Alan Cox (Alan.Cox@linux.org) 21 March 95
47
48 Reworked:
49 Changed to tunnel to destination gateway in addition to the
50 tunnel's pointopoint address
51 Almost completely rewritten
52 Note: There is currently no firewall or ICMP handling done.
53
54 -Sam Lantinga (slouken@cs.ucdavis.edu) 02/13/96
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090055
Linus Torvalds1da177e2005-04-16 15:20:36 -070056*/
57
58/* Things I wish I had known when writing the tunnel driver:
59
60 When the tunnel_xmit() function is called, the skb contains the
61 packet to be sent (plus a great deal of extra info), and dev
62 contains the tunnel device that _we_ are.
63
64 When we are passed a packet, we are expected to fill in the
65 source address with our source IP address.
66
67 What is the proper way to allocate, copy and free a buffer?
68 After you allocate it, it is a "0 length" chunk of memory
69 starting at zero. If you want to add headers to the buffer
70 later, you'll have to call "skb_reserve(skb, amount)" with
71 the amount of memory you want reserved. Then, you call
72 "skb_put(skb, amount)" with the amount of space you want in
73 the buffer. skb_put() returns a pointer to the top (#0) of
74 that buffer. skb->len is set to the amount of space you have
75 "allocated" with skb_put(). You can then write up to skb->len
76 bytes to that buffer. If you need more, you can call skb_put()
77 again with the additional amount of space you need. You can
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090078 find out how much more space you can allocate by calling
Linus Torvalds1da177e2005-04-16 15:20:36 -070079 "skb_tailroom(skb)".
80 Now, to add header space, call "skb_push(skb, header_len)".
81 This creates space at the beginning of the buffer and returns
82 a pointer to this new space. If later you need to strip a
83 header from a buffer, call "skb_pull(skb, header_len)".
84 skb_headroom() will return how much space is left at the top
85 of the buffer (before the main data). Remember, this headroom
86 space must be reserved before the skb_put() function is called.
87 */
88
89/*
90 This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
91
92 For comments look at net/ipv4/ip_gre.c --ANK
93 */
94
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090095
Randy Dunlap4fc268d2006-01-11 12:17:47 -080096#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070097#include <linux/module.h>
98#include <linux/types.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070099#include <linux/kernel.h>
100#include <asm/uaccess.h>
101#include <linux/skbuff.h>
102#include <linux/netdevice.h>
103#include <linux/in.h>
104#include <linux/tcp.h>
105#include <linux/udp.h>
106#include <linux/if_arp.h>
107#include <linux/mroute.h>
108#include <linux/init.h>
109#include <linux/netfilter_ipv4.h>
Kris Katterjohn46f25df2006-01-05 16:35:42 -0800110#include <linux/if_ether.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700111
112#include <net/sock.h>
113#include <net/ip.h>
114#include <net/icmp.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700115#include <net/ipip.h>
116#include <net/inet_ecn.h>
117#include <net/xfrm.h>
118
119#define HASH_SIZE 16
Al Virod5a0a1e2006-11-08 00:23:14 -0800120#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700121
122static int ipip_fb_tunnel_init(struct net_device *dev);
123static int ipip_tunnel_init(struct net_device *dev);
124static void ipip_tunnel_setup(struct net_device *dev);
125
126static struct net_device *ipip_fb_tunnel_dev;
127
128static struct ip_tunnel *tunnels_r_l[HASH_SIZE];
129static struct ip_tunnel *tunnels_r[HASH_SIZE];
130static struct ip_tunnel *tunnels_l[HASH_SIZE];
131static struct ip_tunnel *tunnels_wc[1];
132static struct ip_tunnel **tunnels[4] = { tunnels_wc, tunnels_l, tunnels_r, tunnels_r_l };
133
134static DEFINE_RWLOCK(ipip_lock);
135
Al Virod5a0a1e2006-11-08 00:23:14 -0800136static struct ip_tunnel * ipip_tunnel_lookup(__be32 remote, __be32 local)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700137{
138 unsigned h0 = HASH(remote);
139 unsigned h1 = HASH(local);
140 struct ip_tunnel *t;
141
142 for (t = tunnels_r_l[h0^h1]; t; t = t->next) {
143 if (local == t->parms.iph.saddr &&
144 remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
145 return t;
146 }
147 for (t = tunnels_r[h0]; t; t = t->next) {
148 if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
149 return t;
150 }
151 for (t = tunnels_l[h1]; t; t = t->next) {
152 if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
153 return t;
154 }
155 if ((t = tunnels_wc[0]) != NULL && (t->dev->flags&IFF_UP))
156 return t;
157 return NULL;
158}
159
YOSHIFUJI Hideaki87d1a162007-04-24 20:44:47 +0900160static struct ip_tunnel **__ipip_bucket(struct ip_tunnel_parm *parms)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700161{
YOSHIFUJI Hideaki87d1a162007-04-24 20:44:47 +0900162 __be32 remote = parms->iph.daddr;
163 __be32 local = parms->iph.saddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700164 unsigned h = 0;
165 int prio = 0;
166
167 if (remote) {
168 prio |= 2;
169 h ^= HASH(remote);
170 }
171 if (local) {
172 prio |= 1;
173 h ^= HASH(local);
174 }
175 return &tunnels[prio][h];
176}
177
YOSHIFUJI Hideaki87d1a162007-04-24 20:44:47 +0900178static inline struct ip_tunnel **ipip_bucket(struct ip_tunnel *t)
179{
180 return __ipip_bucket(&t->parms);
181}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700182
183static void ipip_tunnel_unlink(struct ip_tunnel *t)
184{
185 struct ip_tunnel **tp;
186
187 for (tp = ipip_bucket(t); *tp; tp = &(*tp)->next) {
188 if (t == *tp) {
189 write_lock_bh(&ipip_lock);
190 *tp = t->next;
191 write_unlock_bh(&ipip_lock);
192 break;
193 }
194 }
195}
196
197static void ipip_tunnel_link(struct ip_tunnel *t)
198{
199 struct ip_tunnel **tp = ipip_bucket(t);
200
201 t->next = *tp;
202 write_lock_bh(&ipip_lock);
203 *tp = t;
204 write_unlock_bh(&ipip_lock);
205}
206
207static struct ip_tunnel * ipip_tunnel_locate(struct ip_tunnel_parm *parms, int create)
208{
Al Virod5a0a1e2006-11-08 00:23:14 -0800209 __be32 remote = parms->iph.daddr;
210 __be32 local = parms->iph.saddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700211 struct ip_tunnel *t, **tp, *nt;
212 struct net_device *dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700213 char name[IFNAMSIZ];
214
YOSHIFUJI Hideaki87d1a162007-04-24 20:44:47 +0900215 for (tp = __ipip_bucket(parms); (t = *tp) != NULL; tp = &t->next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700216 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
217 return t;
218 }
219 if (!create)
220 return NULL;
221
222 if (parms->name[0])
223 strlcpy(name, parms->name, IFNAMSIZ);
224 else {
225 int i;
226 for (i=1; i<100; i++) {
227 sprintf(name, "tunl%d", i);
Eric W. Biederman881d9662007-09-17 11:56:21 -0700228 if (__dev_get_by_name(&init_net, name) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700229 break;
230 }
231 if (i==100)
232 goto failed;
233 }
234
235 dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup);
236 if (dev == NULL)
237 return NULL;
238
Patrick McHardy2941a482006-01-08 22:05:26 -0800239 nt = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700240 dev->init = ipip_tunnel_init;
241 nt->parms = *parms;
242
243 if (register_netdevice(dev) < 0) {
244 free_netdev(dev);
245 goto failed;
246 }
247
248 dev_hold(dev);
249 ipip_tunnel_link(nt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700250 return nt;
251
252failed:
253 return NULL;
254}
255
256static void ipip_tunnel_uninit(struct net_device *dev)
257{
258 if (dev == ipip_fb_tunnel_dev) {
259 write_lock_bh(&ipip_lock);
260 tunnels_wc[0] = NULL;
261 write_unlock_bh(&ipip_lock);
262 } else
Patrick McHardy2941a482006-01-08 22:05:26 -0800263 ipip_tunnel_unlink(netdev_priv(dev));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700264 dev_put(dev);
265}
266
Herbert Xud2acc342006-03-28 01:12:13 -0800267static int ipip_err(struct sk_buff *skb, u32 info)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700268{
269#ifndef I_WISH_WORLD_WERE_PERFECT
270
271/* It is not :-( All the routers (except for Linux) return only
272 8 bytes of packet payload. It means, that precise relaying of
273 ICMP in the real Internet is absolutely infeasible.
274 */
275 struct iphdr *iph = (struct iphdr*)skb->data;
Arnaldo Carvalho de Melo88c76642007-03-13 14:43:18 -0300276 const int type = icmp_hdr(skb)->type;
277 const int code = icmp_hdr(skb)->code;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700278 struct ip_tunnel *t;
Herbert Xud2acc342006-03-28 01:12:13 -0800279 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700280
281 switch (type) {
282 default:
283 case ICMP_PARAMETERPROB:
Herbert Xud2acc342006-03-28 01:12:13 -0800284 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700285
286 case ICMP_DEST_UNREACH:
287 switch (code) {
288 case ICMP_SR_FAILED:
289 case ICMP_PORT_UNREACH:
290 /* Impossible event. */
Herbert Xud2acc342006-03-28 01:12:13 -0800291 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700292 case ICMP_FRAG_NEEDED:
293 /* Soft state for pmtu is maintained by IP core. */
Herbert Xud2acc342006-03-28 01:12:13 -0800294 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700295 default:
296 /* All others are translated to HOST_UNREACH.
297 rfc2003 contains "deep thoughts" about NET_UNREACH,
298 I believe they are just ether pollution. --ANK
299 */
300 break;
301 }
302 break;
303 case ICMP_TIME_EXCEEDED:
304 if (code != ICMP_EXC_TTL)
Herbert Xud2acc342006-03-28 01:12:13 -0800305 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700306 break;
307 }
308
Herbert Xud2acc342006-03-28 01:12:13 -0800309 err = -ENOENT;
310
Linus Torvalds1da177e2005-04-16 15:20:36 -0700311 read_lock(&ipip_lock);
312 t = ipip_tunnel_lookup(iph->daddr, iph->saddr);
313 if (t == NULL || t->parms.iph.daddr == 0)
314 goto out;
Herbert Xud2acc342006-03-28 01:12:13 -0800315
316 err = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700317 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
318 goto out;
319
320 if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
321 t->err_count++;
322 else
323 t->err_count = 1;
324 t->err_time = jiffies;
325out:
326 read_unlock(&ipip_lock);
Herbert Xud2acc342006-03-28 01:12:13 -0800327 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700328#else
329 struct iphdr *iph = (struct iphdr*)dp;
330 int hlen = iph->ihl<<2;
331 struct iphdr *eiph;
Arnaldo Carvalho de Melo88c76642007-03-13 14:43:18 -0300332 const int type = icmp_hdr(skb)->type;
333 const int code = icmp_hdr(skb)->code;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700334 int rel_type = 0;
335 int rel_code = 0;
Al Viroc55e2f42006-09-19 13:23:19 -0700336 __be32 rel_info = 0;
337 __u32 n = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700338 struct sk_buff *skb2;
339 struct flowi fl;
340 struct rtable *rt;
341
342 if (len < hlen + sizeof(struct iphdr))
Herbert Xud2acc342006-03-28 01:12:13 -0800343 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700344 eiph = (struct iphdr*)(dp + hlen);
345
346 switch (type) {
347 default:
Herbert Xud2acc342006-03-28 01:12:13 -0800348 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700349 case ICMP_PARAMETERPROB:
Arnaldo Carvalho de Melo88c76642007-03-13 14:43:18 -0300350 n = ntohl(icmp_hdr(skb)->un.gateway) >> 24;
Al Viroc55e2f42006-09-19 13:23:19 -0700351 if (n < hlen)
Herbert Xud2acc342006-03-28 01:12:13 -0800352 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700353
354 /* So... This guy found something strange INSIDE encapsulated
355 packet. Well, he is fool, but what can we do ?
356 */
357 rel_type = ICMP_PARAMETERPROB;
Al Viroc55e2f42006-09-19 13:23:19 -0700358 rel_info = htonl((n - hlen) << 24);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700359 break;
360
361 case ICMP_DEST_UNREACH:
362 switch (code) {
363 case ICMP_SR_FAILED:
364 case ICMP_PORT_UNREACH:
365 /* Impossible event. */
Herbert Xud2acc342006-03-28 01:12:13 -0800366 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700367 case ICMP_FRAG_NEEDED:
368 /* And it is the only really necessary thing :-) */
Arnaldo Carvalho de Melo88c76642007-03-13 14:43:18 -0300369 n = ntohs(icmp_hdr(skb)->un.frag.mtu);
Al Viroc55e2f42006-09-19 13:23:19 -0700370 if (n < hlen+68)
Herbert Xud2acc342006-03-28 01:12:13 -0800371 return 0;
Al Viroc55e2f42006-09-19 13:23:19 -0700372 n -= hlen;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700373 /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
Al Viroc55e2f42006-09-19 13:23:19 -0700374 if (n > ntohs(eiph->tot_len))
Herbert Xud2acc342006-03-28 01:12:13 -0800375 return 0;
Al Viroc55e2f42006-09-19 13:23:19 -0700376 rel_info = htonl(n);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700377 break;
378 default:
379 /* All others are translated to HOST_UNREACH.
380 rfc2003 contains "deep thoughts" about NET_UNREACH,
381 I believe, it is just ether pollution. --ANK
382 */
383 rel_type = ICMP_DEST_UNREACH;
384 rel_code = ICMP_HOST_UNREACH;
385 break;
386 }
387 break;
388 case ICMP_TIME_EXCEEDED:
389 if (code != ICMP_EXC_TTL)
Herbert Xud2acc342006-03-28 01:12:13 -0800390 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700391 break;
392 }
393
394 /* Prepare fake skb to feed it to icmp_send */
395 skb2 = skb_clone(skb, GFP_ATOMIC);
396 if (skb2 == NULL)
Herbert Xud2acc342006-03-28 01:12:13 -0800397 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700398 dst_release(skb2->dst);
399 skb2->dst = NULL;
400 skb_pull(skb2, skb->data - (u8*)eiph);
Arnaldo Carvalho de Meloc1d2bbe2007-04-10 20:45:18 -0700401 skb_reset_network_header(skb2);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700402
403 /* Try to guess incoming interface */
404 memset(&fl, 0, sizeof(fl));
405 fl.fl4_daddr = eiph->saddr;
406 fl.fl4_tos = RT_TOS(eiph->tos);
407 fl.proto = IPPROTO_IPIP;
408 if (ip_route_output_key(&rt, &key)) {
409 kfree_skb(skb2);
Herbert Xud2acc342006-03-28 01:12:13 -0800410 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700411 }
412 skb2->dev = rt->u.dst.dev;
413
414 /* route "incoming" packet */
415 if (rt->rt_flags&RTCF_LOCAL) {
416 ip_rt_put(rt);
417 rt = NULL;
418 fl.fl4_daddr = eiph->daddr;
419 fl.fl4_src = eiph->saddr;
420 fl.fl4_tos = eiph->tos;
421 if (ip_route_output_key(&rt, &fl) ||
422 rt->u.dst.dev->type != ARPHRD_TUNNEL) {
423 ip_rt_put(rt);
424 kfree_skb(skb2);
Herbert Xud2acc342006-03-28 01:12:13 -0800425 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700426 }
427 } else {
428 ip_rt_put(rt);
429 if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) ||
430 skb2->dst->dev->type != ARPHRD_TUNNEL) {
431 kfree_skb(skb2);
Herbert Xud2acc342006-03-28 01:12:13 -0800432 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700433 }
434 }
435
436 /* change mtu on this route */
437 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
Al Viroc55e2f42006-09-19 13:23:19 -0700438 if (n > dst_mtu(skb2->dst)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700439 kfree_skb(skb2);
Herbert Xud2acc342006-03-28 01:12:13 -0800440 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700441 }
Al Viroc55e2f42006-09-19 13:23:19 -0700442 skb2->dst->ops->update_pmtu(skb2->dst, n);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700443 } else if (type == ICMP_TIME_EXCEEDED) {
Patrick McHardy2941a482006-01-08 22:05:26 -0800444 struct ip_tunnel *t = netdev_priv(skb2->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700445 if (t->parms.iph.ttl) {
446 rel_type = ICMP_DEST_UNREACH;
447 rel_code = ICMP_HOST_UNREACH;
448 }
449 }
450
451 icmp_send(skb2, rel_type, rel_code, rel_info);
452 kfree_skb(skb2);
Herbert Xud2acc342006-03-28 01:12:13 -0800453 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700454#endif
455}
456
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700457static inline void ipip_ecn_decapsulate(const struct iphdr *outer_iph,
458 struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700459{
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700460 struct iphdr *inner_iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700461
462 if (INET_ECN_is_ce(outer_iph->tos))
463 IP_ECN_set_ce(inner_iph);
464}
465
466static int ipip_rcv(struct sk_buff *skb)
467{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700468 struct ip_tunnel *tunnel;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700469 const struct iphdr *iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700470
471 read_lock(&ipip_lock);
472 if ((tunnel = ipip_tunnel_lookup(iph->saddr, iph->daddr)) != NULL) {
473 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
474 read_unlock(&ipip_lock);
475 kfree_skb(skb);
476 return 0;
477 }
478
479 secpath_reset(skb);
480
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -0700481 skb->mac_header = skb->network_header;
Arnaldo Carvalho de Meloc1d2bbe2007-04-10 20:45:18 -0700482 skb_reset_network_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700483 skb->protocol = htons(ETH_P_IP);
484 skb->pkt_type = PACKET_HOST;
485
486 tunnel->stat.rx_packets++;
487 tunnel->stat.rx_bytes += skb->len;
488 skb->dev = tunnel->dev;
489 dst_release(skb->dst);
490 skb->dst = NULL;
491 nf_reset(skb);
492 ipip_ecn_decapsulate(iph, skb);
493 netif_rx(skb);
494 read_unlock(&ipip_lock);
495 return 0;
496 }
497 read_unlock(&ipip_lock);
498
Linus Torvalds1da177e2005-04-16 15:20:36 -0700499 return -1;
500}
501
502/*
503 * This function assumes it is being called from dev_queue_xmit()
504 * and that skb is filled properly by that function.
505 */
506
507static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
508{
Patrick McHardy2941a482006-01-08 22:05:26 -0800509 struct ip_tunnel *tunnel = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700510 struct net_device_stats *stats = &tunnel->stat;
511 struct iphdr *tiph = &tunnel->parms.iph;
512 u8 tos = tunnel->parms.iph.tos;
Al Virod5a0a1e2006-11-08 00:23:14 -0800513 __be16 df = tiph->frag_off;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700514 struct rtable *rt; /* Route to the other host */
515 struct net_device *tdev; /* Device to other host */
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700516 struct iphdr *old_iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700517 struct iphdr *iph; /* Our new IP header */
Chuck Leverc2636b42007-10-23 21:07:32 -0700518 unsigned int max_headroom; /* The extra header space needed */
Al Virod5a0a1e2006-11-08 00:23:14 -0800519 __be32 dst = tiph->daddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700520 int mtu;
521
522 if (tunnel->recursion++) {
523 tunnel->stat.collisions++;
524 goto tx_error;
525 }
526
527 if (skb->protocol != htons(ETH_P_IP))
528 goto tx_error;
529
530 if (tos&1)
531 tos = old_iph->tos;
532
533 if (!dst) {
534 /* NBMA tunnel */
535 if ((rt = (struct rtable*)skb->dst) == NULL) {
536 tunnel->stat.tx_fifo_errors++;
537 goto tx_error;
538 }
539 if ((dst = rt->rt_gateway) == 0)
540 goto tx_error_icmp;
541 }
542
543 {
544 struct flowi fl = { .oif = tunnel->parms.link,
545 .nl_u = { .ip4_u =
546 { .daddr = dst,
547 .saddr = tiph->saddr,
548 .tos = RT_TOS(tos) } },
549 .proto = IPPROTO_IPIP };
550 if (ip_route_output_key(&rt, &fl)) {
551 tunnel->stat.tx_carrier_errors++;
552 goto tx_error_icmp;
553 }
554 }
555 tdev = rt->u.dst.dev;
556
557 if (tdev == dev) {
558 ip_rt_put(rt);
559 tunnel->stat.collisions++;
560 goto tx_error;
561 }
562
563 if (tiph->frag_off)
564 mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr);
565 else
566 mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
567
568 if (mtu < 68) {
569 tunnel->stat.collisions++;
570 ip_rt_put(rt);
571 goto tx_error;
572 }
573 if (skb->dst)
574 skb->dst->ops->update_pmtu(skb->dst, mtu);
575
576 df |= (old_iph->frag_off&htons(IP_DF));
577
578 if ((old_iph->frag_off&htons(IP_DF)) && mtu < ntohs(old_iph->tot_len)) {
579 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
580 ip_rt_put(rt);
581 goto tx_error;
582 }
583
584 if (tunnel->err_count > 0) {
585 if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
586 tunnel->err_count--;
587 dst_link_failure(skb);
588 } else
589 tunnel->err_count = 0;
590 }
591
592 /*
593 * Okay, now see if we can stuff it in the buffer as-is.
594 */
595 max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr));
596
Patrick McHardycfbba492007-07-09 15:33:40 -0700597 if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
598 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700599 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
600 if (!new_skb) {
601 ip_rt_put(rt);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900602 stats->tx_dropped++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700603 dev_kfree_skb(skb);
604 tunnel->recursion--;
605 return 0;
606 }
607 if (skb->sk)
608 skb_set_owner_w(new_skb, skb->sk);
609 dev_kfree_skb(skb);
610 skb = new_skb;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700611 old_iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700612 }
613
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -0700614 skb->transport_header = skb->network_header;
Arnaldo Carvalho de Meloe2d1bca2007-04-10 20:46:21 -0700615 skb_push(skb, sizeof(struct iphdr));
616 skb_reset_network_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700617 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
Patrick McHardy48d5cad2006-02-15 15:10:22 -0800618 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
619 IPSKB_REROUTED);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700620 dst_release(skb->dst);
621 skb->dst = &rt->u.dst;
622
623 /*
624 * Push down and install the IPIP header.
625 */
626
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700627 iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700628 iph->version = 4;
629 iph->ihl = sizeof(struct iphdr)>>2;
630 iph->frag_off = df;
631 iph->protocol = IPPROTO_IPIP;
632 iph->tos = INET_ECN_encapsulate(tos, old_iph->tos);
633 iph->daddr = rt->rt_dst;
634 iph->saddr = rt->rt_src;
635
636 if ((iph->ttl = tiph->ttl) == 0)
637 iph->ttl = old_iph->ttl;
638
639 nf_reset(skb);
640
641 IPTUNNEL_XMIT();
642 tunnel->recursion--;
643 return 0;
644
645tx_error_icmp:
646 dst_link_failure(skb);
647tx_error:
648 stats->tx_errors++;
649 dev_kfree_skb(skb);
650 tunnel->recursion--;
651 return 0;
652}
653
654static int
655ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
656{
657 int err = 0;
658 struct ip_tunnel_parm p;
659 struct ip_tunnel *t;
660
661 switch (cmd) {
662 case SIOCGETTUNNEL:
663 t = NULL;
664 if (dev == ipip_fb_tunnel_dev) {
665 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
666 err = -EFAULT;
667 break;
668 }
669 t = ipip_tunnel_locate(&p, 0);
670 }
671 if (t == NULL)
Patrick McHardy2941a482006-01-08 22:05:26 -0800672 t = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700673 memcpy(&p, &t->parms, sizeof(p));
674 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
675 err = -EFAULT;
676 break;
677
678 case SIOCADDTUNNEL:
679 case SIOCCHGTUNNEL:
680 err = -EPERM;
681 if (!capable(CAP_NET_ADMIN))
682 goto done;
683
684 err = -EFAULT;
685 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
686 goto done;
687
688 err = -EINVAL;
689 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
690 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
691 goto done;
692 if (p.iph.ttl)
693 p.iph.frag_off |= htons(IP_DF);
694
695 t = ipip_tunnel_locate(&p, cmd == SIOCADDTUNNEL);
696
697 if (dev != ipip_fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
698 if (t != NULL) {
699 if (t->dev != dev) {
700 err = -EEXIST;
701 break;
702 }
703 } else {
704 if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
705 (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
706 err = -EINVAL;
707 break;
708 }
Patrick McHardy2941a482006-01-08 22:05:26 -0800709 t = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700710 ipip_tunnel_unlink(t);
711 t->parms.iph.saddr = p.iph.saddr;
712 t->parms.iph.daddr = p.iph.daddr;
713 memcpy(dev->dev_addr, &p.iph.saddr, 4);
714 memcpy(dev->broadcast, &p.iph.daddr, 4);
715 ipip_tunnel_link(t);
716 netdev_state_change(dev);
717 }
718 }
719
720 if (t) {
721 err = 0;
722 if (cmd == SIOCCHGTUNNEL) {
723 t->parms.iph.ttl = p.iph.ttl;
724 t->parms.iph.tos = p.iph.tos;
725 t->parms.iph.frag_off = p.iph.frag_off;
726 }
727 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
728 err = -EFAULT;
729 } else
730 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
731 break;
732
733 case SIOCDELTUNNEL:
734 err = -EPERM;
735 if (!capable(CAP_NET_ADMIN))
736 goto done;
737
738 if (dev == ipip_fb_tunnel_dev) {
739 err = -EFAULT;
740 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
741 goto done;
742 err = -ENOENT;
743 if ((t = ipip_tunnel_locate(&p, 0)) == NULL)
744 goto done;
745 err = -EPERM;
746 if (t->dev == ipip_fb_tunnel_dev)
747 goto done;
748 dev = t->dev;
749 }
Stephen Hemminger22f8cde2007-02-07 00:09:58 -0800750 unregister_netdevice(dev);
751 err = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700752 break;
753
754 default:
755 err = -EINVAL;
756 }
757
758done:
759 return err;
760}
761
762static struct net_device_stats *ipip_tunnel_get_stats(struct net_device *dev)
763{
Patrick McHardy2941a482006-01-08 22:05:26 -0800764 return &(((struct ip_tunnel*)netdev_priv(dev))->stat);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700765}
766
767static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
768{
769 if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr))
770 return -EINVAL;
771 dev->mtu = new_mtu;
772 return 0;
773}
774
775static void ipip_tunnel_setup(struct net_device *dev)
776{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700777 dev->uninit = ipip_tunnel_uninit;
778 dev->hard_start_xmit = ipip_tunnel_xmit;
779 dev->get_stats = ipip_tunnel_get_stats;
780 dev->do_ioctl = ipip_tunnel_ioctl;
781 dev->change_mtu = ipip_tunnel_change_mtu;
782 dev->destructor = free_netdev;
783
784 dev->type = ARPHRD_TUNNEL;
785 dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr);
Kris Katterjohn46f25df2006-01-05 16:35:42 -0800786 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700787 dev->flags = IFF_NOARP;
788 dev->iflink = 0;
789 dev->addr_len = 4;
790}
791
792static int ipip_tunnel_init(struct net_device *dev)
793{
794 struct net_device *tdev = NULL;
795 struct ip_tunnel *tunnel;
796 struct iphdr *iph;
797
Patrick McHardy2941a482006-01-08 22:05:26 -0800798 tunnel = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700799 iph = &tunnel->parms.iph;
800
801 tunnel->dev = dev;
802 strcpy(tunnel->parms.name, dev->name);
803
804 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
805 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
806
807 if (iph->daddr) {
808 struct flowi fl = { .oif = tunnel->parms.link,
809 .nl_u = { .ip4_u =
810 { .daddr = iph->daddr,
811 .saddr = iph->saddr,
812 .tos = RT_TOS(iph->tos) } },
813 .proto = IPPROTO_IPIP };
814 struct rtable *rt;
815 if (!ip_route_output_key(&rt, &fl)) {
816 tdev = rt->u.dst.dev;
817 ip_rt_put(rt);
818 }
819 dev->flags |= IFF_POINTOPOINT;
820 }
821
822 if (!tdev && tunnel->parms.link)
Eric W. Biederman881d9662007-09-17 11:56:21 -0700823 tdev = __dev_get_by_index(&init_net, tunnel->parms.link);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700824
825 if (tdev) {
826 dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
827 dev->mtu = tdev->mtu - sizeof(struct iphdr);
828 }
829 dev->iflink = tunnel->parms.link;
830
831 return 0;
832}
833
834static int __init ipip_fb_tunnel_init(struct net_device *dev)
835{
Patrick McHardy2941a482006-01-08 22:05:26 -0800836 struct ip_tunnel *tunnel = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700837 struct iphdr *iph = &tunnel->parms.iph;
838
839 tunnel->dev = dev;
840 strcpy(tunnel->parms.name, dev->name);
841
842 iph->version = 4;
843 iph->protocol = IPPROTO_IPIP;
844 iph->ihl = 5;
845
846 dev_hold(dev);
847 tunnels_wc[0] = tunnel;
848 return 0;
849}
850
851static struct xfrm_tunnel ipip_handler = {
852 .handler = ipip_rcv,
853 .err_handler = ipip_err,
Herbert Xud2acc342006-03-28 01:12:13 -0800854 .priority = 1,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700855};
856
857static char banner[] __initdata =
858 KERN_INFO "IPv4 over IPv4 tunneling driver\n";
859
860static int __init ipip_init(void)
861{
862 int err;
863
864 printk(banner);
865
Kazunori MIYAZAWAc0d56402007-02-13 12:54:47 -0800866 if (xfrm4_tunnel_register(&ipip_handler, AF_INET)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700867 printk(KERN_INFO "ipip init: can't register tunnel\n");
868 return -EAGAIN;
869 }
870
871 ipip_fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
872 "tunl0",
873 ipip_tunnel_setup);
874 if (!ipip_fb_tunnel_dev) {
875 err = -ENOMEM;
876 goto err1;
877 }
878
879 ipip_fb_tunnel_dev->init = ipip_fb_tunnel_init;
880
881 if ((err = register_netdev(ipip_fb_tunnel_dev)))
882 goto err2;
883 out:
884 return err;
885 err2:
886 free_netdev(ipip_fb_tunnel_dev);
887 err1:
Kazunori MIYAZAWAc0d56402007-02-13 12:54:47 -0800888 xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700889 goto out;
890}
891
Alexey Kuznetsovdb445752005-07-30 17:46:44 -0700892static void __exit ipip_destroy_tunnels(void)
893{
894 int prio;
895
896 for (prio = 1; prio < 4; prio++) {
897 int h;
898 for (h = 0; h < HASH_SIZE; h++) {
899 struct ip_tunnel *t;
900 while ((t = tunnels[prio][h]) != NULL)
901 unregister_netdevice(t->dev);
902 }
903 }
904}
905
Linus Torvalds1da177e2005-04-16 15:20:36 -0700906static void __exit ipip_fini(void)
907{
Kazunori MIYAZAWAc0d56402007-02-13 12:54:47 -0800908 if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700909 printk(KERN_INFO "ipip close: can't deregister tunnel\n");
910
Alexey Kuznetsovdb445752005-07-30 17:46:44 -0700911 rtnl_lock();
912 ipip_destroy_tunnels();
913 unregister_netdevice(ipip_fb_tunnel_dev);
914 rtnl_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700915}
916
917module_init(ipip_init);
918module_exit(ipip_fini);
919MODULE_LICENSE("GPL");