blob: 940b7d2383ec44357764f3eb6095f650726ab92e [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * IPv6 fragment reassembly
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09003 * Linux INET6 implementation
Linus Torvalds1da177e2005-04-16 15:20:36 -07004 *
5 * Authors:
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09006 * Pedro Roque <roque@di.fc.ul.pt>
Linus Torvalds1da177e2005-04-16 15:20:36 -07007 *
8 * $Id: reassembly.c,v 1.26 2001/03/07 22:00:57 davem Exp $
9 *
10 * Based on: net/ipv4/ip_fragment.c
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 */
17
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +090018/*
19 * Fixes:
Linus Torvalds1da177e2005-04-16 15:20:36 -070020 * Andi Kleen Make it work with multiple hosts.
21 * More RFC compliance.
22 *
23 * Horst von Brand Add missing #include <linux/string.h>
24 * Alexey Kuznetsov SMP races, threading, cleanup.
25 * Patrick McHardy LRU queue of frag heads for evictor.
26 * Mitsuru KANDA @USAGI Register inet6_protocol{}.
27 * David Stevens and
28 * YOSHIFUJI,H. @USAGI Always remove fragment header to
29 * calculate ICV correctly.
30 */
Linus Torvalds1da177e2005-04-16 15:20:36 -070031#include <linux/errno.h>
32#include <linux/types.h>
33#include <linux/string.h>
34#include <linux/socket.h>
35#include <linux/sockios.h>
36#include <linux/jiffies.h>
37#include <linux/net.h>
38#include <linux/list.h>
39#include <linux/netdevice.h>
40#include <linux/in6.h>
41#include <linux/ipv6.h>
42#include <linux/icmpv6.h>
43#include <linux/random.h>
44#include <linux/jhash.h>
Herbert Xuf61944e2007-10-15 01:28:47 -070045#include <linux/skbuff.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070046
47#include <net/sock.h>
48#include <net/snmp.h>
49
50#include <net/ipv6.h>
YOSHIFUJI Hideakia11d2062006-11-04 20:11:37 +090051#include <net/ip6_route.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070052#include <net/protocol.h>
53#include <net/transp_v6.h>
54#include <net/rawv6.h>
55#include <net/ndisc.h>
56#include <net/addrconf.h>
Pavel Emelyanov5ab11c92007-10-15 02:24:19 -070057#include <net/inet_frag.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070058
Linus Torvalds1da177e2005-04-16 15:20:36 -070059struct ip6frag_skb_cb
60{
61 struct inet6_skb_parm h;
62 int offset;
63};
64
65#define FRAG6_CB(skb) ((struct ip6frag_skb_cb*)((skb)->cb))
66
67
68/*
69 * Equivalent of ipv4 struct ipq
70 */
71
72struct frag_queue
73{
Pavel Emelyanov5ab11c92007-10-15 02:24:19 -070074 struct inet_frag_queue q;
Linus Torvalds1da177e2005-04-16 15:20:36 -070075
Al Viroe69a4ad2006-11-14 20:56:00 -080076 __be32 id; /* fragment id */
Linus Torvalds1da177e2005-04-16 15:20:36 -070077 struct in6_addr saddr;
78 struct in6_addr daddr;
79
Linus Torvalds1da177e2005-04-16 15:20:36 -070080 int iif;
Linus Torvalds1da177e2005-04-16 15:20:36 -070081 unsigned int csum;
Linus Torvalds1da177e2005-04-16 15:20:36 -070082 __u16 nhoffset;
Linus Torvalds1da177e2005-04-16 15:20:36 -070083};
84
Pavel Emelyanov04128f22007-10-15 02:33:45 -070085struct inet_frags_ctl ip6_frags_ctl __read_mostly = {
86 .high_thresh = 256 * 1024,
87 .low_thresh = 192 * 1024,
88 .timeout = IPV6_FRAG_TIMEOUT,
89 .secret_interval = 10 * 60 * HZ,
90};
91
Pavel Emelyanov7eb95152007-10-15 02:31:52 -070092static struct inet_frags ip6_frags;
Linus Torvalds1da177e2005-04-16 15:20:36 -070093
Pavel Emelyanov7eb95152007-10-15 02:31:52 -070094int ip6_frag_nqueues(void)
95{
96 return ip6_frags.nqueues;
97}
Linus Torvalds1da177e2005-04-16 15:20:36 -070098
Pavel Emelyanov7eb95152007-10-15 02:31:52 -070099int ip6_frag_mem(void)
100{
101 return atomic_read(&ip6_frags.mem);
102}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700103
Herbert Xuf61944e2007-10-15 01:28:47 -0700104static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
105 struct net_device *dev);
106
Zach Brownf6596f92006-04-10 16:05:34 -0700107/*
108 * callers should be careful not to use the hash value outside the ipfrag_lock
109 * as doing so could race with ipfrag_hash_rnd being recalculated.
110 */
Al Viroe69a4ad2006-11-14 20:56:00 -0800111static unsigned int ip6qhashfn(__be32 id, struct in6_addr *saddr,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700112 struct in6_addr *daddr)
113{
114 u32 a, b, c;
115
Al Viroe69a4ad2006-11-14 20:56:00 -0800116 a = (__force u32)saddr->s6_addr32[0];
117 b = (__force u32)saddr->s6_addr32[1];
118 c = (__force u32)saddr->s6_addr32[2];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700119
120 a += JHASH_GOLDEN_RATIO;
121 b += JHASH_GOLDEN_RATIO;
Pavel Emelyanov7eb95152007-10-15 02:31:52 -0700122 c += ip6_frags.rnd;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700123 __jhash_mix(a, b, c);
124
Al Viroe69a4ad2006-11-14 20:56:00 -0800125 a += (__force u32)saddr->s6_addr32[3];
126 b += (__force u32)daddr->s6_addr32[0];
127 c += (__force u32)daddr->s6_addr32[1];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700128 __jhash_mix(a, b, c);
129
Al Viroe69a4ad2006-11-14 20:56:00 -0800130 a += (__force u32)daddr->s6_addr32[2];
131 b += (__force u32)daddr->s6_addr32[3];
132 c += (__force u32)id;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700133 __jhash_mix(a, b, c);
134
Pavel Emelyanov7eb95152007-10-15 02:31:52 -0700135 return c & (INETFRAGS_HASHSZ - 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700136}
137
Pavel Emelyanov321a3a92007-10-15 02:38:08 -0700138static unsigned int ip6_hashfn(struct inet_frag_queue *q)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700139{
Pavel Emelyanov321a3a92007-10-15 02:38:08 -0700140 struct frag_queue *fq;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700141
Pavel Emelyanov321a3a92007-10-15 02:38:08 -0700142 fq = container_of(q, struct frag_queue, q);
143 return ip6qhashfn(fq->id, &fq->saddr, &fq->daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700144}
145
Linus Torvalds1da177e2005-04-16 15:20:36 -0700146/* Memory Tracking Functions. */
147static inline void frag_kfree_skb(struct sk_buff *skb, int *work)
148{
149 if (work)
150 *work -= skb->truesize;
Pavel Emelyanov7eb95152007-10-15 02:31:52 -0700151 atomic_sub(skb->truesize, &ip6_frags.mem);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700152 kfree_skb(skb);
153}
154
Pavel Emelyanov1e4b8282007-10-15 02:39:14 -0700155static void ip6_frag_free(struct inet_frag_queue *fq)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700156{
Pavel Emelyanov1e4b8282007-10-15 02:39:14 -0700157 kfree(container_of(fq, struct frag_queue, q));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700158}
159
160static inline struct frag_queue *frag_alloc_queue(void)
161{
Ingo Oeser78c784c2006-03-20 23:01:17 -0800162 struct frag_queue *fq = kzalloc(sizeof(struct frag_queue), GFP_ATOMIC);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700163
164 if(!fq)
165 return NULL;
Pavel Emelyanov7eb95152007-10-15 02:31:52 -0700166 atomic_add(sizeof(struct frag_queue), &ip6_frags.mem);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700167 return fq;
168}
169
170/* Destruction primitives. */
171
Linus Torvalds1da177e2005-04-16 15:20:36 -0700172static __inline__ void fq_put(struct frag_queue *fq, int *work)
173{
Pavel Emelyanov5ab11c92007-10-15 02:24:19 -0700174 if (atomic_dec_and_test(&fq->q.refcnt))
Pavel Emelyanov1e4b8282007-10-15 02:39:14 -0700175 inet_frag_destroy(&fq->q, &ip6_frags, work);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700176}
177
178/* Kill fq entry. It is not destroyed immediately,
179 * because caller (and someone more) holds reference count.
180 */
181static __inline__ void fq_kill(struct frag_queue *fq)
182{
Pavel Emelyanov277e6502007-10-15 02:37:18 -0700183 inet_frag_kill(&fq->q, &ip6_frags);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700184}
185
YOSHIFUJI Hideakia11d2062006-11-04 20:11:37 +0900186static void ip6_evictor(struct inet6_dev *idev)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700187{
188 struct frag_queue *fq;
189 struct list_head *tmp;
190 int work;
191
Pavel Emelyanov04128f22007-10-15 02:33:45 -0700192 work = atomic_read(&ip6_frags.mem) - ip6_frags_ctl.low_thresh;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700193 if (work <= 0)
194 return;
195
196 while(work > 0) {
Pavel Emelyanov7eb95152007-10-15 02:31:52 -0700197 read_lock(&ip6_frags.lock);
198 if (list_empty(&ip6_frags.lru_list)) {
199 read_unlock(&ip6_frags.lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700200 return;
201 }
Pavel Emelyanov7eb95152007-10-15 02:31:52 -0700202 tmp = ip6_frags.lru_list.next;
Pavel Emelyanov5ab11c92007-10-15 02:24:19 -0700203 fq = list_entry(tmp, struct frag_queue, q.lru_list);
204 atomic_inc(&fq->q.refcnt);
Pavel Emelyanov7eb95152007-10-15 02:31:52 -0700205 read_unlock(&ip6_frags.lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700206
Pavel Emelyanov5ab11c92007-10-15 02:24:19 -0700207 spin_lock(&fq->q.lock);
208 if (!(fq->q.last_in&COMPLETE))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700209 fq_kill(fq);
Pavel Emelyanov5ab11c92007-10-15 02:24:19 -0700210 spin_unlock(&fq->q.lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700211
212 fq_put(fq, &work);
YOSHIFUJI Hideakia11d2062006-11-04 20:11:37 +0900213 IP6_INC_STATS_BH(idev, IPSTATS_MIB_REASMFAILS);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700214 }
215}
216
217static void ip6_frag_expire(unsigned long data)
218{
219 struct frag_queue *fq = (struct frag_queue *) data;
YOSHIFUJI Hideakia11d2062006-11-04 20:11:37 +0900220 struct net_device *dev = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700221
Pavel Emelyanov5ab11c92007-10-15 02:24:19 -0700222 spin_lock(&fq->q.lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700223
Pavel Emelyanov5ab11c92007-10-15 02:24:19 -0700224 if (fq->q.last_in & COMPLETE)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700225 goto out;
226
227 fq_kill(fq);
228
Eric W. Biederman881d9662007-09-17 11:56:21 -0700229 dev = dev_get_by_index(&init_net, fq->iif);
YOSHIFUJI Hideakia11d2062006-11-04 20:11:37 +0900230 if (!dev)
231 goto out;
232
233 rcu_read_lock();
234 IP6_INC_STATS_BH(__in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT);
235 IP6_INC_STATS_BH(__in6_dev_get(dev), IPSTATS_MIB_REASMFAILS);
236 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700237
Ingo Oeser78c784c2006-03-20 23:01:17 -0800238 /* Don't send error if the first segment did not arrive. */
Pavel Emelyanov5ab11c92007-10-15 02:24:19 -0700239 if (!(fq->q.last_in&FIRST_IN) || !fq->q.fragments)
Ingo Oeser78c784c2006-03-20 23:01:17 -0800240 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700241
Ingo Oeser78c784c2006-03-20 23:01:17 -0800242 /*
243 But use as source device on which LAST ARRIVED
244 segment was received. And do not use fq->dev
245 pointer directly, device might already disappeared.
246 */
Pavel Emelyanov5ab11c92007-10-15 02:24:19 -0700247 fq->q.fragments->dev = dev;
248 icmpv6_send(fq->q.fragments, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700249out:
YOSHIFUJI Hideakia11d2062006-11-04 20:11:37 +0900250 if (dev)
251 dev_put(dev);
Pavel Emelyanov5ab11c92007-10-15 02:24:19 -0700252 spin_unlock(&fq->q.lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700253 fq_put(fq, NULL);
254}
255
256/* Creation primitives. */
257
258
Zach Brownf6596f92006-04-10 16:05:34 -0700259static struct frag_queue *ip6_frag_intern(struct frag_queue *fq_in)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700260{
261 struct frag_queue *fq;
Zach Brownf6596f92006-04-10 16:05:34 -0700262 unsigned int hash;
Yasuyuki Kozakaie7c8a412005-11-16 12:55:37 -0800263#ifdef CONFIG_SMP
264 struct hlist_node *n;
265#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700266
Pavel Emelyanov7eb95152007-10-15 02:31:52 -0700267 write_lock(&ip6_frags.lock);
Zach Brownf6596f92006-04-10 16:05:34 -0700268 hash = ip6qhashfn(fq_in->id, &fq_in->saddr, &fq_in->daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700269#ifdef CONFIG_SMP
Pavel Emelyanov7eb95152007-10-15 02:31:52 -0700270 hlist_for_each_entry(fq, n, &ip6_frags.hash[hash], q.list) {
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900271 if (fq->id == fq_in->id &&
Linus Torvalds1da177e2005-04-16 15:20:36 -0700272 ipv6_addr_equal(&fq_in->saddr, &fq->saddr) &&
273 ipv6_addr_equal(&fq_in->daddr, &fq->daddr)) {
Pavel Emelyanov5ab11c92007-10-15 02:24:19 -0700274 atomic_inc(&fq->q.refcnt);
Pavel Emelyanov7eb95152007-10-15 02:31:52 -0700275 write_unlock(&ip6_frags.lock);
Pavel Emelyanov5ab11c92007-10-15 02:24:19 -0700276 fq_in->q.last_in |= COMPLETE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700277 fq_put(fq_in, NULL);
278 return fq;
279 }
280 }
281#endif
282 fq = fq_in;
283
Pavel Emelyanov04128f22007-10-15 02:33:45 -0700284 if (!mod_timer(&fq->q.timer, jiffies + ip6_frags_ctl.timeout))
Pavel Emelyanov5ab11c92007-10-15 02:24:19 -0700285 atomic_inc(&fq->q.refcnt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700286
Pavel Emelyanov5ab11c92007-10-15 02:24:19 -0700287 atomic_inc(&fq->q.refcnt);
Pavel Emelyanov7eb95152007-10-15 02:31:52 -0700288 hlist_add_head(&fq->q.list, &ip6_frags.hash[hash]);
Pavel Emelyanov5ab11c92007-10-15 02:24:19 -0700289 INIT_LIST_HEAD(&fq->q.lru_list);
Pavel Emelyanov7eb95152007-10-15 02:31:52 -0700290 list_add_tail(&fq->q.lru_list, &ip6_frags.lru_list);
291 ip6_frags.nqueues++;
292 write_unlock(&ip6_frags.lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700293 return fq;
294}
295
296
297static struct frag_queue *
Al Viroe69a4ad2006-11-14 20:56:00 -0800298ip6_frag_create(__be32 id, struct in6_addr *src, struct in6_addr *dst,
YOSHIFUJI Hideakia11d2062006-11-04 20:11:37 +0900299 struct inet6_dev *idev)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700300{
301 struct frag_queue *fq;
302
303 if ((fq = frag_alloc_queue()) == NULL)
304 goto oom;
305
Linus Torvalds1da177e2005-04-16 15:20:36 -0700306 fq->id = id;
307 ipv6_addr_copy(&fq->saddr, src);
308 ipv6_addr_copy(&fq->daddr, dst);
309
Pavel Emelyanov5ab11c92007-10-15 02:24:19 -0700310 init_timer(&fq->q.timer);
311 fq->q.timer.function = ip6_frag_expire;
312 fq->q.timer.data = (long) fq;
313 spin_lock_init(&fq->q.lock);
314 atomic_set(&fq->q.refcnt, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700315
Zach Brownf6596f92006-04-10 16:05:34 -0700316 return ip6_frag_intern(fq);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700317
318oom:
YOSHIFUJI Hideakia11d2062006-11-04 20:11:37 +0900319 IP6_INC_STATS_BH(idev, IPSTATS_MIB_REASMFAILS);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700320 return NULL;
321}
322
323static __inline__ struct frag_queue *
Al Viroe69a4ad2006-11-14 20:56:00 -0800324fq_find(__be32 id, struct in6_addr *src, struct in6_addr *dst,
YOSHIFUJI Hideakia11d2062006-11-04 20:11:37 +0900325 struct inet6_dev *idev)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700326{
327 struct frag_queue *fq;
Yasuyuki Kozakaie7c8a412005-11-16 12:55:37 -0800328 struct hlist_node *n;
Zach Brownf6596f92006-04-10 16:05:34 -0700329 unsigned int hash;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700330
Pavel Emelyanov7eb95152007-10-15 02:31:52 -0700331 read_lock(&ip6_frags.lock);
Zach Brownf6596f92006-04-10 16:05:34 -0700332 hash = ip6qhashfn(id, src, dst);
Pavel Emelyanov7eb95152007-10-15 02:31:52 -0700333 hlist_for_each_entry(fq, n, &ip6_frags.hash[hash], q.list) {
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900334 if (fq->id == id &&
Linus Torvalds1da177e2005-04-16 15:20:36 -0700335 ipv6_addr_equal(src, &fq->saddr) &&
336 ipv6_addr_equal(dst, &fq->daddr)) {
Pavel Emelyanov5ab11c92007-10-15 02:24:19 -0700337 atomic_inc(&fq->q.refcnt);
Pavel Emelyanov7eb95152007-10-15 02:31:52 -0700338 read_unlock(&ip6_frags.lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700339 return fq;
340 }
341 }
Pavel Emelyanov7eb95152007-10-15 02:31:52 -0700342 read_unlock(&ip6_frags.lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700343
YOSHIFUJI Hideakia11d2062006-11-04 20:11:37 +0900344 return ip6_frag_create(id, src, dst, idev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700345}
346
347
Herbert Xuf61944e2007-10-15 01:28:47 -0700348static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700349 struct frag_hdr *fhdr, int nhoff)
350{
351 struct sk_buff *prev, *next;
Herbert Xuf61944e2007-10-15 01:28:47 -0700352 struct net_device *dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700353 int offset, end;
354
Pavel Emelyanov5ab11c92007-10-15 02:24:19 -0700355 if (fq->q.last_in & COMPLETE)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700356 goto err;
357
358 offset = ntohs(fhdr->frag_off) & ~0x7;
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -0700359 end = offset + (ntohs(ipv6_hdr(skb)->payload_len) -
360 ((u8 *)(fhdr + 1) - (u8 *)(ipv6_hdr(skb) + 1)));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700361
362 if ((unsigned int)end > IPV6_MAXPLEN) {
YOSHIFUJI Hideakia11d2062006-11-04 20:11:37 +0900363 IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
364 IPSTATS_MIB_INHDRERRORS);
Arnaldo Carvalho de Melod56f90a2007-04-10 20:50:43 -0700365 icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
366 ((u8 *)&fhdr->frag_off -
367 skb_network_header(skb)));
Herbert Xuf61944e2007-10-15 01:28:47 -0700368 return -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700369 }
370
Arnaldo Carvalho de Melod56f90a2007-04-10 20:50:43 -0700371 if (skb->ip_summed == CHECKSUM_COMPLETE) {
372 const unsigned char *nh = skb_network_header(skb);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900373 skb->csum = csum_sub(skb->csum,
Arnaldo Carvalho de Melod56f90a2007-04-10 20:50:43 -0700374 csum_partial(nh, (u8 *)(fhdr + 1) - nh,
375 0));
376 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700377
378 /* Is this the final fragment? */
379 if (!(fhdr->frag_off & htons(IP6_MF))) {
380 /* If we already have some bits beyond end
381 * or have different end, the segment is corrupted.
382 */
Pavel Emelyanov5ab11c92007-10-15 02:24:19 -0700383 if (end < fq->q.len ||
384 ((fq->q.last_in & LAST_IN) && end != fq->q.len))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700385 goto err;
Pavel Emelyanov5ab11c92007-10-15 02:24:19 -0700386 fq->q.last_in |= LAST_IN;
387 fq->q.len = end;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700388 } else {
389 /* Check if the fragment is rounded to 8 bytes.
390 * Required by the RFC.
391 */
392 if (end & 0x7) {
393 /* RFC2460 says always send parameter problem in
394 * this case. -DaveM
395 */
YOSHIFUJI Hideakia11d2062006-11-04 20:11:37 +0900396 IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
397 IPSTATS_MIB_INHDRERRORS);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900398 icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700399 offsetof(struct ipv6hdr, payload_len));
Herbert Xuf61944e2007-10-15 01:28:47 -0700400 return -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700401 }
Pavel Emelyanov5ab11c92007-10-15 02:24:19 -0700402 if (end > fq->q.len) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700403 /* Some bits beyond end -> corruption. */
Pavel Emelyanov5ab11c92007-10-15 02:24:19 -0700404 if (fq->q.last_in & LAST_IN)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700405 goto err;
Pavel Emelyanov5ab11c92007-10-15 02:24:19 -0700406 fq->q.len = end;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700407 }
408 }
409
410 if (end == offset)
411 goto err;
412
413 /* Point into the IP datagram 'data' part. */
414 if (!pskb_pull(skb, (u8 *) (fhdr + 1) - skb->data))
415 goto err;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900416
Stephen Hemminger42ca89c2005-09-08 12:57:43 -0700417 if (pskb_trim_rcsum(skb, end - offset))
418 goto err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700419
420 /* Find out which fragments are in front and at the back of us
421 * in the chain of fragments so far. We must know where to put
422 * this fragment, right?
423 */
424 prev = NULL;
Pavel Emelyanov5ab11c92007-10-15 02:24:19 -0700425 for(next = fq->q.fragments; next != NULL; next = next->next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700426 if (FRAG6_CB(next)->offset >= offset)
427 break; /* bingo! */
428 prev = next;
429 }
430
431 /* We found where to put this one. Check for overlap with
432 * preceding fragment, and, if needed, align things so that
433 * any overlaps are eliminated.
434 */
435 if (prev) {
436 int i = (FRAG6_CB(prev)->offset + prev->len) - offset;
437
438 if (i > 0) {
439 offset += i;
440 if (end <= offset)
441 goto err;
442 if (!pskb_pull(skb, i))
443 goto err;
444 if (skb->ip_summed != CHECKSUM_UNNECESSARY)
445 skb->ip_summed = CHECKSUM_NONE;
446 }
447 }
448
449 /* Look for overlap with succeeding segments.
450 * If we can merge fragments, do it.
451 */
452 while (next && FRAG6_CB(next)->offset < end) {
453 int i = end - FRAG6_CB(next)->offset; /* overlap is 'i' bytes */
454
455 if (i < next->len) {
456 /* Eat head of the next overlapped fragment
457 * and leave the loop. The next ones cannot overlap.
458 */
459 if (!pskb_pull(next, i))
460 goto err;
461 FRAG6_CB(next)->offset += i; /* next fragment */
Pavel Emelyanov5ab11c92007-10-15 02:24:19 -0700462 fq->q.meat -= i;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700463 if (next->ip_summed != CHECKSUM_UNNECESSARY)
464 next->ip_summed = CHECKSUM_NONE;
465 break;
466 } else {
467 struct sk_buff *free_it = next;
468
469 /* Old fragment is completely overridden with
470 * new one drop it.
471 */
472 next = next->next;
473
474 if (prev)
475 prev->next = next;
476 else
Pavel Emelyanov5ab11c92007-10-15 02:24:19 -0700477 fq->q.fragments = next;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700478
Pavel Emelyanov5ab11c92007-10-15 02:24:19 -0700479 fq->q.meat -= free_it->len;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700480 frag_kfree_skb(free_it, NULL);
481 }
482 }
483
484 FRAG6_CB(skb)->offset = offset;
485
486 /* Insert this fragment in the chain of fragments. */
487 skb->next = next;
488 if (prev)
489 prev->next = skb;
490 else
Pavel Emelyanov5ab11c92007-10-15 02:24:19 -0700491 fq->q.fragments = skb;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700492
Herbert Xuf61944e2007-10-15 01:28:47 -0700493 dev = skb->dev;
494 if (dev) {
495 fq->iif = dev->ifindex;
496 skb->dev = NULL;
497 }
Pavel Emelyanov5ab11c92007-10-15 02:24:19 -0700498 fq->q.stamp = skb->tstamp;
499 fq->q.meat += skb->len;
Pavel Emelyanov7eb95152007-10-15 02:31:52 -0700500 atomic_add(skb->truesize, &ip6_frags.mem);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700501
502 /* The first fragment.
503 * nhoffset is obtained from the first fragment, of course.
504 */
505 if (offset == 0) {
506 fq->nhoffset = nhoff;
Pavel Emelyanov5ab11c92007-10-15 02:24:19 -0700507 fq->q.last_in |= FIRST_IN;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700508 }
Herbert Xuf61944e2007-10-15 01:28:47 -0700509
Pavel Emelyanov5ab11c92007-10-15 02:24:19 -0700510 if (fq->q.last_in == (FIRST_IN | LAST_IN) && fq->q.meat == fq->q.len)
Herbert Xuf61944e2007-10-15 01:28:47 -0700511 return ip6_frag_reasm(fq, prev, dev);
512
Pavel Emelyanov7eb95152007-10-15 02:31:52 -0700513 write_lock(&ip6_frags.lock);
514 list_move_tail(&fq->q.lru_list, &ip6_frags.lru_list);
515 write_unlock(&ip6_frags.lock);
Herbert Xuf61944e2007-10-15 01:28:47 -0700516 return -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700517
518err:
YOSHIFUJI Hideakia11d2062006-11-04 20:11:37 +0900519 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_REASMFAILS);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700520 kfree_skb(skb);
Herbert Xuf61944e2007-10-15 01:28:47 -0700521 return -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700522}
523
524/*
525 * Check if this packet is complete.
526 * Returns NULL on failure by any reason, and pointer
527 * to current nexthdr field in reassembled frame.
528 *
529 * It is called with locked fq, and caller must check that
530 * queue is eligible for reassembly i.e. it is not COMPLETE,
531 * the last and the first frames arrived and all the bits are here.
532 */
Herbert Xuf61944e2007-10-15 01:28:47 -0700533static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700534 struct net_device *dev)
535{
Pavel Emelyanov5ab11c92007-10-15 02:24:19 -0700536 struct sk_buff *fp, *head = fq->q.fragments;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700537 int payload_len;
538 unsigned int nhoff;
539
540 fq_kill(fq);
541
Herbert Xuf61944e2007-10-15 01:28:47 -0700542 /* Make the one we just received the head. */
543 if (prev) {
544 head = prev->next;
545 fp = skb_clone(head, GFP_ATOMIC);
546
547 if (!fp)
548 goto out_oom;
549
550 fp->next = head->next;
551 prev->next = fp;
552
Pavel Emelyanov5ab11c92007-10-15 02:24:19 -0700553 skb_morph(head, fq->q.fragments);
554 head->next = fq->q.fragments->next;
Herbert Xuf61944e2007-10-15 01:28:47 -0700555
Pavel Emelyanov5ab11c92007-10-15 02:24:19 -0700556 kfree_skb(fq->q.fragments);
557 fq->q.fragments = head;
Herbert Xuf61944e2007-10-15 01:28:47 -0700558 }
559
Linus Torvalds1da177e2005-04-16 15:20:36 -0700560 BUG_TRAP(head != NULL);
561 BUG_TRAP(FRAG6_CB(head)->offset == 0);
562
563 /* Unfragmented part is taken from the first segment. */
Arnaldo Carvalho de Melod56f90a2007-04-10 20:50:43 -0700564 payload_len = ((head->data - skb_network_header(head)) -
Pavel Emelyanov5ab11c92007-10-15 02:24:19 -0700565 sizeof(struct ipv6hdr) + fq->q.len -
Arnaldo Carvalho de Melod56f90a2007-04-10 20:50:43 -0700566 sizeof(struct frag_hdr));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700567 if (payload_len > IPV6_MAXPLEN)
568 goto out_oversize;
569
570 /* Head of list must not be cloned. */
571 if (skb_cloned(head) && pskb_expand_head(head, 0, 0, GFP_ATOMIC))
572 goto out_oom;
573
574 /* If the first fragment is fragmented itself, we split
575 * it to two chunks: the first with data and paged part
576 * and the second, holding only fragments. */
577 if (skb_shinfo(head)->frag_list) {
578 struct sk_buff *clone;
579 int i, plen = 0;
580
581 if ((clone = alloc_skb(0, GFP_ATOMIC)) == NULL)
582 goto out_oom;
583 clone->next = head->next;
584 head->next = clone;
585 skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list;
586 skb_shinfo(head)->frag_list = NULL;
587 for (i=0; i<skb_shinfo(head)->nr_frags; i++)
588 plen += skb_shinfo(head)->frags[i].size;
589 clone->len = clone->data_len = head->data_len - plen;
590 head->data_len -= clone->len;
591 head->len -= clone->len;
592 clone->csum = 0;
593 clone->ip_summed = head->ip_summed;
Pavel Emelyanov7eb95152007-10-15 02:31:52 -0700594 atomic_add(clone->truesize, &ip6_frags.mem);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700595 }
596
597 /* We have to remove fragment header from datagram and to relocate
598 * header in order to calculate ICV correctly. */
599 nhoff = fq->nhoffset;
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -0700600 skb_network_header(head)[nhoff] = skb_transport_header(head)[0];
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900601 memmove(head->head + sizeof(struct frag_hdr), head->head,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700602 (head->data - head->head) - sizeof(struct frag_hdr));
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -0700603 head->mac_header += sizeof(struct frag_hdr);
604 head->network_header += sizeof(struct frag_hdr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700605
606 skb_shinfo(head)->frag_list = head->next;
Arnaldo Carvalho de Melobadff6d2007-03-13 13:06:52 -0300607 skb_reset_transport_header(head);
Arnaldo Carvalho de Melod56f90a2007-04-10 20:50:43 -0700608 skb_push(head, head->data - skb_network_header(head));
Pavel Emelyanov7eb95152007-10-15 02:31:52 -0700609 atomic_sub(head->truesize, &ip6_frags.mem);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700610
611 for (fp=head->next; fp; fp = fp->next) {
612 head->data_len += fp->len;
613 head->len += fp->len;
614 if (head->ip_summed != fp->ip_summed)
615 head->ip_summed = CHECKSUM_NONE;
Patrick McHardy84fa7932006-08-29 16:44:56 -0700616 else if (head->ip_summed == CHECKSUM_COMPLETE)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700617 head->csum = csum_add(head->csum, fp->csum);
618 head->truesize += fp->truesize;
Pavel Emelyanov7eb95152007-10-15 02:31:52 -0700619 atomic_sub(fp->truesize, &ip6_frags.mem);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700620 }
621
622 head->next = NULL;
623 head->dev = dev;
Pavel Emelyanov5ab11c92007-10-15 02:24:19 -0700624 head->tstamp = fq->q.stamp;
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -0700625 ipv6_hdr(head)->payload_len = htons(payload_len);
Patrick McHardy951dbc82006-01-06 23:02:34 -0800626 IP6CB(head)->nhoff = nhoff;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700627
Linus Torvalds1da177e2005-04-16 15:20:36 -0700628 /* Yes, and fold redundant checksum back. 8) */
Patrick McHardy84fa7932006-08-29 16:44:56 -0700629 if (head->ip_summed == CHECKSUM_COMPLETE)
Arnaldo Carvalho de Melod56f90a2007-04-10 20:50:43 -0700630 head->csum = csum_partial(skb_network_header(head),
Arnaldo Carvalho de Melocfe1fc72007-03-16 17:26:39 -0300631 skb_network_header_len(head),
Arnaldo Carvalho de Melod56f90a2007-04-10 20:50:43 -0700632 head->csum);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700633
YOSHIFUJI Hideakia11d2062006-11-04 20:11:37 +0900634 rcu_read_lock();
635 IP6_INC_STATS_BH(__in6_dev_get(dev), IPSTATS_MIB_REASMOKS);
636 rcu_read_unlock();
Pavel Emelyanov5ab11c92007-10-15 02:24:19 -0700637 fq->q.fragments = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700638 return 1;
639
640out_oversize:
641 if (net_ratelimit())
642 printk(KERN_DEBUG "ip6_frag_reasm: payload len = %d\n", payload_len);
643 goto out_fail;
644out_oom:
645 if (net_ratelimit())
646 printk(KERN_DEBUG "ip6_frag_reasm: no memory for reassembly\n");
647out_fail:
YOSHIFUJI Hideakia11d2062006-11-04 20:11:37 +0900648 rcu_read_lock();
649 IP6_INC_STATS_BH(__in6_dev_get(dev), IPSTATS_MIB_REASMFAILS);
650 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700651 return -1;
652}
653
Patrick McHardy951dbc82006-01-06 23:02:34 -0800654static int ipv6_frag_rcv(struct sk_buff **skbp)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700655{
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900656 struct sk_buff *skb = *skbp;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700657 struct frag_hdr *fhdr;
658 struct frag_queue *fq;
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -0700659 struct ipv6hdr *hdr = ipv6_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700660
YOSHIFUJI Hideakia11d2062006-11-04 20:11:37 +0900661 IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_REASMREQDS);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700662
663 /* Jumbo payload inhibits frag. header */
664 if (hdr->payload_len==0) {
YOSHIFUJI Hideakia11d2062006-11-04 20:11:37 +0900665 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS);
Arnaldo Carvalho de Melocfe1fc72007-03-16 17:26:39 -0300666 icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
667 skb_network_header_len(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700668 return -1;
669 }
Arnaldo Carvalho de Meloea2ae172007-04-25 17:55:53 -0700670 if (!pskb_may_pull(skb, (skb_transport_offset(skb) +
671 sizeof(struct frag_hdr)))) {
YOSHIFUJI Hideakia11d2062006-11-04 20:11:37 +0900672 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS);
Arnaldo Carvalho de Melocfe1fc72007-03-16 17:26:39 -0300673 icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
674 skb_network_header_len(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700675 return -1;
676 }
677
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -0700678 hdr = ipv6_hdr(skb);
Arnaldo Carvalho de Melo9c702202007-04-25 18:04:18 -0700679 fhdr = (struct frag_hdr *)skb_transport_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700680
681 if (!(fhdr->frag_off & htons(0xFFF9))) {
682 /* It is not a fragmented frame */
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -0700683 skb->transport_header += sizeof(struct frag_hdr);
YOSHIFUJI Hideakia11d2062006-11-04 20:11:37 +0900684 IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_REASMOKS);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700685
Arnaldo Carvalho de Melod56f90a2007-04-10 20:50:43 -0700686 IP6CB(skb)->nhoff = (u8 *)fhdr - skb_network_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700687 return 1;
688 }
689
Pavel Emelyanov04128f22007-10-15 02:33:45 -0700690 if (atomic_read(&ip6_frags.mem) > ip6_frags_ctl.high_thresh)
YOSHIFUJI Hideakia11d2062006-11-04 20:11:37 +0900691 ip6_evictor(ip6_dst_idev(skb->dst));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700692
YOSHIFUJI Hideakia11d2062006-11-04 20:11:37 +0900693 if ((fq = fq_find(fhdr->identification, &hdr->saddr, &hdr->daddr,
694 ip6_dst_idev(skb->dst))) != NULL) {
Herbert Xuf61944e2007-10-15 01:28:47 -0700695 int ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700696
Pavel Emelyanov5ab11c92007-10-15 02:24:19 -0700697 spin_lock(&fq->q.lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700698
Herbert Xuf61944e2007-10-15 01:28:47 -0700699 ret = ip6_frag_queue(fq, skb, fhdr, IP6CB(skb)->nhoff);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700700
Pavel Emelyanov5ab11c92007-10-15 02:24:19 -0700701 spin_unlock(&fq->q.lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700702 fq_put(fq, NULL);
703 return ret;
704 }
705
YOSHIFUJI Hideakia11d2062006-11-04 20:11:37 +0900706 IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_REASMFAILS);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700707 kfree_skb(skb);
708 return -1;
709}
710
711static struct inet6_protocol frag_protocol =
712{
713 .handler = ipv6_frag_rcv,
714 .flags = INET6_PROTO_NOPOLICY,
715};
716
717void __init ipv6_frag_init(void)
718{
719 if (inet6_add_protocol(&frag_protocol, IPPROTO_FRAGMENT) < 0)
720 printk(KERN_ERR "ipv6_frag_init: Could not register protocol\n");
721
Pavel Emelyanov04128f22007-10-15 02:33:45 -0700722 ip6_frags.ctl = &ip6_frags_ctl;
Pavel Emelyanov321a3a92007-10-15 02:38:08 -0700723 ip6_frags.hashfn = ip6_hashfn;
Pavel Emelyanov1e4b8282007-10-15 02:39:14 -0700724 ip6_frags.destructor = ip6_frag_free;
725 ip6_frags.skb_free = NULL;
726 ip6_frags.qsize = sizeof(struct frag_queue);
Pavel Emelyanov7eb95152007-10-15 02:31:52 -0700727 inet_frags_init(&ip6_frags);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700728}