blob: 836541e509fe14e67e04c10012b94ae594c0446b [file] [log] [blame]
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001/* Connection state tracking for netfilter. This is separated from,
2 but required by, the NAT layer; it can also be used by an iptables
3 extension. */
4
5/* (C) 1999-2001 Paul `Rusty' Russell
Harald Weltedc808fe2006-03-20 17:56:32 -08006 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08007 * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License version 2 as
11 * published by the Free Software Foundation.
12 *
13 * 23 Apr 2001: Harald Welte <laforge@gnumonks.org>
14 * - new API and handling of conntrack/nat helpers
15 * - now capable of multiple expectations for one master
16 * 16 Jul 2002: Harald Welte <laforge@gnumonks.org>
17 * - add usage/reference counts to ip_conntrack_expect
18 * - export ip_conntrack[_expect]_{find_get,put} functions
19 * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
20 * - generalize L3 protocol denendent part.
21 * 23 Mar 2004: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
22 * - add support various size of conntrack structures.
Harald Weltedc808fe2006-03-20 17:56:32 -080023 * 26 Jan 2006: Harald Welte <laforge@netfilter.org>
24 * - restructure nf_conn (introduce nf_conn_help)
25 * - redesign 'features' how they were originally intended
Pablo Neira Ayusob9f78f92006-03-22 13:56:08 -080026 * 26 Feb 2006: Pablo Neira Ayuso <pablo@eurodev.net>
27 * - add support for L3 protocol module load on demand.
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -080028 *
29 * Derived from net/ipv4/netfilter/ip_conntrack_core.c
30 */
31
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -080032#include <linux/types.h>
33#include <linux/netfilter.h>
34#include <linux/module.h>
35#include <linux/skbuff.h>
36#include <linux/proc_fs.h>
37#include <linux/vmalloc.h>
38#include <linux/stddef.h>
39#include <linux/slab.h>
40#include <linux/random.h>
41#include <linux/jhash.h>
42#include <linux/err.h>
43#include <linux/percpu.h>
44#include <linux/moduleparam.h>
45#include <linux/notifier.h>
46#include <linux/kernel.h>
47#include <linux/netdevice.h>
48#include <linux/socket.h>
49
50/* This rwlock protects the main hash table, protocol/helper/expected
51 registrations, conntrack timers*/
52#define ASSERT_READ_LOCK(x)
53#define ASSERT_WRITE_LOCK(x)
54
55#include <net/netfilter/nf_conntrack.h>
56#include <net/netfilter/nf_conntrack_l3proto.h>
57#include <net/netfilter/nf_conntrack_protocol.h>
58#include <net/netfilter/nf_conntrack_helper.h>
59#include <net/netfilter/nf_conntrack_core.h>
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -080060
Harald Weltedc808fe2006-03-20 17:56:32 -080061#define NF_CONNTRACK_VERSION "0.5.0"
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -080062
63#if 0
64#define DEBUGP printk
65#else
66#define DEBUGP(format, args...)
67#endif
68
69DEFINE_RWLOCK(nf_conntrack_lock);
70
71/* nf_conntrack_standalone needs this */
72atomic_t nf_conntrack_count = ATOMIC_INIT(0);
73
74void (*nf_conntrack_destroyed)(struct nf_conn *conntrack) = NULL;
75LIST_HEAD(nf_conntrack_expect_list);
Brian Haley1192e402006-09-20 12:03:46 -070076struct nf_conntrack_protocol **nf_ct_protos[PF_MAX] __read_mostly;
77struct nf_conntrack_l3proto *nf_ct_l3protos[PF_MAX] __read_mostly;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -080078static LIST_HEAD(helpers);
Brian Haley94aec082006-09-18 00:05:22 -070079unsigned int nf_conntrack_htable_size __read_mostly = 0;
80int nf_conntrack_max __read_mostly;
Brian Haley1192e402006-09-20 12:03:46 -070081struct list_head *nf_conntrack_hash __read_mostly;
82static kmem_cache_t *nf_conntrack_expect_cachep __read_mostly;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -080083struct nf_conn nf_conntrack_untracked;
Brian Haley94aec082006-09-18 00:05:22 -070084unsigned int nf_ct_log_invalid __read_mostly;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -080085static LIST_HEAD(unconfirmed);
Brian Haley1192e402006-09-20 12:03:46 -070086static int nf_conntrack_vmalloc __read_mostly;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -080087
Pablo Neira Ayuso4e3882f2006-03-22 13:55:11 -080088static unsigned int nf_conntrack_next_id;
89static unsigned int nf_conntrack_expect_next_id;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -080090#ifdef CONFIG_NF_CONNTRACK_EVENTS
Alan Sterne041c682006-03-27 01:16:30 -080091ATOMIC_NOTIFIER_HEAD(nf_conntrack_chain);
92ATOMIC_NOTIFIER_HEAD(nf_conntrack_expect_chain);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -080093
94DEFINE_PER_CPU(struct nf_conntrack_ecache, nf_conntrack_ecache);
95
96/* deliver cached events and clear cache entry - must be called with locally
97 * disabled softirqs */
98static inline void
99__nf_ct_deliver_cached_events(struct nf_conntrack_ecache *ecache)
100{
101 DEBUGP("ecache: delivering events for %p\n", ecache->ct);
102 if (nf_ct_is_confirmed(ecache->ct) && !nf_ct_is_dying(ecache->ct)
103 && ecache->events)
Alan Sterne041c682006-03-27 01:16:30 -0800104 atomic_notifier_call_chain(&nf_conntrack_chain, ecache->events,
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800105 ecache->ct);
106
107 ecache->events = 0;
108 nf_ct_put(ecache->ct);
109 ecache->ct = NULL;
110}
111
112/* Deliver all cached events for a particular conntrack. This is called
113 * by code prior to async packet handling for freeing the skb */
114void nf_ct_deliver_cached_events(const struct nf_conn *ct)
115{
116 struct nf_conntrack_ecache *ecache;
117
118 local_bh_disable();
119 ecache = &__get_cpu_var(nf_conntrack_ecache);
120 if (ecache->ct == ct)
121 __nf_ct_deliver_cached_events(ecache);
122 local_bh_enable();
123}
124
125/* Deliver cached events for old pending events, if current conntrack != old */
126void __nf_ct_event_cache_init(struct nf_conn *ct)
127{
128 struct nf_conntrack_ecache *ecache;
129
130 /* take care of delivering potentially old events */
131 ecache = &__get_cpu_var(nf_conntrack_ecache);
132 BUG_ON(ecache->ct == ct);
133 if (ecache->ct)
134 __nf_ct_deliver_cached_events(ecache);
135 /* initialize for this conntrack/packet */
136 ecache->ct = ct;
137 nf_conntrack_get(&ct->ct_general);
138}
139
140/* flush the event cache - touches other CPU's data and must not be called
141 * while packets are still passing through the code */
142static void nf_ct_event_cache_flush(void)
143{
144 struct nf_conntrack_ecache *ecache;
145 int cpu;
146
KAMEZAWA Hiroyuki6f912042006-04-10 22:52:50 -0700147 for_each_possible_cpu(cpu) {
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800148 ecache = &per_cpu(nf_conntrack_ecache, cpu);
149 if (ecache->ct)
150 nf_ct_put(ecache->ct);
151 }
152}
153#else
154static inline void nf_ct_event_cache_flush(void) {}
155#endif /* CONFIG_NF_CONNTRACK_EVENTS */
156
157DEFINE_PER_CPU(struct ip_conntrack_stat, nf_conntrack_stat);
158EXPORT_PER_CPU_SYMBOL(nf_conntrack_stat);
159
160/*
161 * This scheme offers various size of "struct nf_conn" dependent on
162 * features(helper, nat, ...)
163 */
164
165#define NF_CT_FEATURES_NAMELEN 256
166static struct {
167 /* name of slab cache. printed in /proc/slabinfo */
168 char *name;
169
170 /* size of slab cache */
171 size_t size;
172
173 /* slab cache pointer */
174 kmem_cache_t *cachep;
175
176 /* allocated slab cache + modules which uses this slab cache */
177 int use;
178
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800179} nf_ct_cache[NF_CT_F_NUM];
180
181/* protect members of nf_ct_cache except of "use" */
182DEFINE_RWLOCK(nf_ct_cache_lock);
183
184/* This avoids calling kmem_cache_create() with same name simultaneously */
Ingo Molnar57b47a52006-03-20 22:35:41 -0800185static DEFINE_MUTEX(nf_ct_cache_mutex);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800186
187extern struct nf_conntrack_protocol nf_conntrack_generic_protocol;
188struct nf_conntrack_protocol *
Pablo Neira Ayusoc1d10ad2006-01-05 12:19:05 -0800189__nf_ct_proto_find(u_int16_t l3proto, u_int8_t protocol)
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800190{
Yasuyuki Kozakaiddc8d022006-02-04 02:12:14 -0800191 if (unlikely(l3proto >= AF_MAX || nf_ct_protos[l3proto] == NULL))
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800192 return &nf_conntrack_generic_protocol;
193
194 return nf_ct_protos[l3proto][protocol];
195}
196
Pablo Neira Ayusoc1d10ad2006-01-05 12:19:05 -0800197/* this is guaranteed to always return a valid protocol helper, since
198 * it falls back to generic_protocol */
199struct nf_conntrack_protocol *
200nf_ct_proto_find_get(u_int16_t l3proto, u_int8_t protocol)
201{
202 struct nf_conntrack_protocol *p;
203
204 preempt_disable();
205 p = __nf_ct_proto_find(l3proto, protocol);
Yasuyuki Kozakaie1bbdeb2006-04-24 17:15:17 -0700206 if (!try_module_get(p->me))
207 p = &nf_conntrack_generic_protocol;
Pablo Neira Ayusoc1d10ad2006-01-05 12:19:05 -0800208 preempt_enable();
209
210 return p;
211}
212
213void nf_ct_proto_put(struct nf_conntrack_protocol *p)
214{
215 module_put(p->me);
216}
217
218struct nf_conntrack_l3proto *
219nf_ct_l3proto_find_get(u_int16_t l3proto)
220{
221 struct nf_conntrack_l3proto *p;
222
223 preempt_disable();
224 p = __nf_ct_l3proto_find(l3proto);
Yasuyuki Kozakaie1bbdeb2006-04-24 17:15:17 -0700225 if (!try_module_get(p->me))
226 p = &nf_conntrack_generic_l3proto;
Pablo Neira Ayusoc1d10ad2006-01-05 12:19:05 -0800227 preempt_enable();
228
229 return p;
230}
231
232void nf_ct_l3proto_put(struct nf_conntrack_l3proto *p)
233{
234 module_put(p->me);
235}
236
Pablo Neira Ayusob9f78f92006-03-22 13:56:08 -0800237int
238nf_ct_l3proto_try_module_get(unsigned short l3proto)
239{
240 int ret;
241 struct nf_conntrack_l3proto *p;
242
243retry: p = nf_ct_l3proto_find_get(l3proto);
244 if (p == &nf_conntrack_generic_l3proto) {
245 ret = request_module("nf_conntrack-%d", l3proto);
246 if (!ret)
247 goto retry;
248
249 return -EPROTOTYPE;
250 }
251
252 return 0;
253}
254
255void nf_ct_l3proto_module_put(unsigned short l3proto)
256{
257 struct nf_conntrack_l3proto *p;
258
259 preempt_disable();
260 p = __nf_ct_l3proto_find(l3proto);
261 preempt_enable();
262
263 module_put(p->me);
264}
265
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800266static int nf_conntrack_hash_rnd_initted;
267static unsigned int nf_conntrack_hash_rnd;
268
269static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple,
270 unsigned int size, unsigned int rnd)
271{
272 unsigned int a, b;
273 a = jhash((void *)tuple->src.u3.all, sizeof(tuple->src.u3.all),
274 ((tuple->src.l3num) << 16) | tuple->dst.protonum);
275 b = jhash((void *)tuple->dst.u3.all, sizeof(tuple->dst.u3.all),
276 (tuple->src.u.all << 16) | tuple->dst.u.all);
277
278 return jhash_2words(a, b, rnd) % size;
279}
280
281static inline u_int32_t hash_conntrack(const struct nf_conntrack_tuple *tuple)
282{
283 return __hash_conntrack(tuple, nf_conntrack_htable_size,
284 nf_conntrack_hash_rnd);
285}
286
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800287int nf_conntrack_register_cache(u_int32_t features, const char *name,
Harald Weltedc808fe2006-03-20 17:56:32 -0800288 size_t size)
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800289{
290 int ret = 0;
291 char *cache_name;
292 kmem_cache_t *cachep;
293
294 DEBUGP("nf_conntrack_register_cache: features=0x%x, name=%s, size=%d\n",
295 features, name, size);
296
297 if (features < NF_CT_F_BASIC || features >= NF_CT_F_NUM) {
298 DEBUGP("nf_conntrack_register_cache: invalid features.: 0x%x\n",
299 features);
300 return -EINVAL;
301 }
302
Ingo Molnar57b47a52006-03-20 22:35:41 -0800303 mutex_lock(&nf_ct_cache_mutex);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800304
305 write_lock_bh(&nf_ct_cache_lock);
306 /* e.g: multiple helpers are loaded */
307 if (nf_ct_cache[features].use > 0) {
308 DEBUGP("nf_conntrack_register_cache: already resisterd.\n");
309 if ((!strncmp(nf_ct_cache[features].name, name,
310 NF_CT_FEATURES_NAMELEN))
Harald Weltedc808fe2006-03-20 17:56:32 -0800311 && nf_ct_cache[features].size == size) {
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800312 DEBUGP("nf_conntrack_register_cache: reusing.\n");
313 nf_ct_cache[features].use++;
314 ret = 0;
315 } else
316 ret = -EBUSY;
317
318 write_unlock_bh(&nf_ct_cache_lock);
Ingo Molnar57b47a52006-03-20 22:35:41 -0800319 mutex_unlock(&nf_ct_cache_mutex);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800320 return ret;
321 }
322 write_unlock_bh(&nf_ct_cache_lock);
323
324 /*
325 * The memory space for name of slab cache must be alive until
326 * cache is destroyed.
327 */
328 cache_name = kmalloc(sizeof(char)*NF_CT_FEATURES_NAMELEN, GFP_ATOMIC);
329 if (cache_name == NULL) {
330 DEBUGP("nf_conntrack_register_cache: can't alloc cache_name\n");
331 ret = -ENOMEM;
332 goto out_up_mutex;
333 }
334
335 if (strlcpy(cache_name, name, NF_CT_FEATURES_NAMELEN)
336 >= NF_CT_FEATURES_NAMELEN) {
337 printk("nf_conntrack_register_cache: name too long\n");
338 ret = -EINVAL;
339 goto out_free_name;
340 }
341
342 cachep = kmem_cache_create(cache_name, size, 0, 0,
343 NULL, NULL);
344 if (!cachep) {
345 printk("nf_conntrack_register_cache: Can't create slab cache "
346 "for the features = 0x%x\n", features);
347 ret = -ENOMEM;
348 goto out_free_name;
349 }
350
351 write_lock_bh(&nf_ct_cache_lock);
352 nf_ct_cache[features].use = 1;
353 nf_ct_cache[features].size = size;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800354 nf_ct_cache[features].cachep = cachep;
355 nf_ct_cache[features].name = cache_name;
356 write_unlock_bh(&nf_ct_cache_lock);
357
358 goto out_up_mutex;
359
360out_free_name:
361 kfree(cache_name);
362out_up_mutex:
Ingo Molnar57b47a52006-03-20 22:35:41 -0800363 mutex_unlock(&nf_ct_cache_mutex);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800364 return ret;
365}
366
367/* FIXME: In the current, only nf_conntrack_cleanup() can call this function. */
368void nf_conntrack_unregister_cache(u_int32_t features)
369{
370 kmem_cache_t *cachep;
371 char *name;
372
373 /*
374 * This assures that kmem_cache_create() isn't called before destroying
375 * slab cache.
376 */
377 DEBUGP("nf_conntrack_unregister_cache: 0x%04x\n", features);
Ingo Molnar57b47a52006-03-20 22:35:41 -0800378 mutex_lock(&nf_ct_cache_mutex);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800379
380 write_lock_bh(&nf_ct_cache_lock);
381 if (--nf_ct_cache[features].use > 0) {
382 write_unlock_bh(&nf_ct_cache_lock);
Ingo Molnar57b47a52006-03-20 22:35:41 -0800383 mutex_unlock(&nf_ct_cache_mutex);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800384 return;
385 }
386 cachep = nf_ct_cache[features].cachep;
387 name = nf_ct_cache[features].name;
388 nf_ct_cache[features].cachep = NULL;
389 nf_ct_cache[features].name = NULL;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800390 nf_ct_cache[features].size = 0;
391 write_unlock_bh(&nf_ct_cache_lock);
392
393 synchronize_net();
394
395 kmem_cache_destroy(cachep);
396 kfree(name);
397
Ingo Molnar57b47a52006-03-20 22:35:41 -0800398 mutex_unlock(&nf_ct_cache_mutex);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800399}
400
401int
402nf_ct_get_tuple(const struct sk_buff *skb,
403 unsigned int nhoff,
404 unsigned int dataoff,
405 u_int16_t l3num,
406 u_int8_t protonum,
407 struct nf_conntrack_tuple *tuple,
408 const struct nf_conntrack_l3proto *l3proto,
409 const struct nf_conntrack_protocol *protocol)
410{
411 NF_CT_TUPLE_U_BLANK(tuple);
412
413 tuple->src.l3num = l3num;
414 if (l3proto->pkt_to_tuple(skb, nhoff, tuple) == 0)
415 return 0;
416
417 tuple->dst.protonum = protonum;
418 tuple->dst.dir = IP_CT_DIR_ORIGINAL;
419
420 return protocol->pkt_to_tuple(skb, dataoff, tuple);
421}
422
423int
424nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse,
425 const struct nf_conntrack_tuple *orig,
426 const struct nf_conntrack_l3proto *l3proto,
427 const struct nf_conntrack_protocol *protocol)
428{
429 NF_CT_TUPLE_U_BLANK(inverse);
430
431 inverse->src.l3num = orig->src.l3num;
432 if (l3proto->invert_tuple(inverse, orig) == 0)
433 return 0;
434
435 inverse->dst.dir = !orig->dst.dir;
436
437 inverse->dst.protonum = orig->dst.protonum;
438 return protocol->invert_tuple(inverse, orig);
439}
440
441/* nf_conntrack_expect helper functions */
Pablo Neira Ayusoc1d10ad2006-01-05 12:19:05 -0800442void nf_ct_unlink_expect(struct nf_conntrack_expect *exp)
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800443{
Harald Weltedc808fe2006-03-20 17:56:32 -0800444 struct nf_conn_help *master_help = nfct_help(exp->master);
445
446 NF_CT_ASSERT(master_help);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800447 ASSERT_WRITE_LOCK(&nf_conntrack_lock);
Patrick McHardy4a59a812005-11-16 23:14:19 -0800448 NF_CT_ASSERT(!timer_pending(&exp->timeout));
Harald Weltedc808fe2006-03-20 17:56:32 -0800449
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800450 list_del(&exp->list);
451 NF_CT_STAT_INC(expect_delete);
Harald Weltedc808fe2006-03-20 17:56:32 -0800452 master_help->expecting--;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800453 nf_conntrack_expect_put(exp);
454}
455
456static void expectation_timed_out(unsigned long ul_expect)
457{
458 struct nf_conntrack_expect *exp = (void *)ul_expect;
459
460 write_lock_bh(&nf_conntrack_lock);
461 nf_ct_unlink_expect(exp);
462 write_unlock_bh(&nf_conntrack_lock);
463 nf_conntrack_expect_put(exp);
464}
465
Pablo Neira Ayusoc1d10ad2006-01-05 12:19:05 -0800466struct nf_conntrack_expect *
467__nf_conntrack_expect_find(const struct nf_conntrack_tuple *tuple)
468{
469 struct nf_conntrack_expect *i;
470
471 list_for_each_entry(i, &nf_conntrack_expect_list, list) {
472 if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)) {
473 atomic_inc(&i->use);
474 return i;
475 }
476 }
477 return NULL;
478}
479
480/* Just find a expectation corresponding to a tuple. */
481struct nf_conntrack_expect *
482nf_conntrack_expect_find(const struct nf_conntrack_tuple *tuple)
483{
484 struct nf_conntrack_expect *i;
485
486 read_lock_bh(&nf_conntrack_lock);
487 i = __nf_conntrack_expect_find(tuple);
488 read_unlock_bh(&nf_conntrack_lock);
489
490 return i;
491}
492
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800493/* If an expectation for this connection is found, it gets delete from
494 * global list then returned. */
495static struct nf_conntrack_expect *
496find_expectation(const struct nf_conntrack_tuple *tuple)
497{
498 struct nf_conntrack_expect *i;
499
500 list_for_each_entry(i, &nf_conntrack_expect_list, list) {
501 /* If master is not in hash table yet (ie. packet hasn't left
502 this machine yet), how can other end know about expected?
503 Hence these are not the droids you are looking for (if
504 master ct never got confirmed, we'd hold a reference to it
505 and weird things would happen to future packets). */
506 if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)
507 && nf_ct_is_confirmed(i->master)) {
508 if (i->flags & NF_CT_EXPECT_PERMANENT) {
509 atomic_inc(&i->use);
510 return i;
511 } else if (del_timer(&i->timeout)) {
512 nf_ct_unlink_expect(i);
513 return i;
514 }
515 }
516 }
517 return NULL;
518}
519
520/* delete all expectations for this conntrack */
Pablo Neira Ayusoc1d10ad2006-01-05 12:19:05 -0800521void nf_ct_remove_expectations(struct nf_conn *ct)
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800522{
523 struct nf_conntrack_expect *i, *tmp;
Harald Weltedc808fe2006-03-20 17:56:32 -0800524 struct nf_conn_help *help = nfct_help(ct);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800525
526 /* Optimization: most connection never expect any others. */
Harald Weltedc808fe2006-03-20 17:56:32 -0800527 if (!help || help->expecting == 0)
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800528 return;
529
530 list_for_each_entry_safe(i, tmp, &nf_conntrack_expect_list, list) {
531 if (i->master == ct && del_timer(&i->timeout)) {
532 nf_ct_unlink_expect(i);
533 nf_conntrack_expect_put(i);
534 }
535 }
536}
537
538static void
539clean_from_lists(struct nf_conn *ct)
540{
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800541 DEBUGP("clean_from_lists(%p)\n", ct);
542 ASSERT_WRITE_LOCK(&nf_conntrack_lock);
Patrick McHardydf0933d2006-09-20 11:57:53 -0700543 list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
544 list_del(&ct->tuplehash[IP_CT_DIR_REPLY].list);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800545
546 /* Destroy all pending expectations */
Pablo Neira Ayusoc1d10ad2006-01-05 12:19:05 -0800547 nf_ct_remove_expectations(ct);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800548}
549
550static void
551destroy_conntrack(struct nf_conntrack *nfct)
552{
553 struct nf_conn *ct = (struct nf_conn *)nfct;
554 struct nf_conntrack_l3proto *l3proto;
555 struct nf_conntrack_protocol *proto;
556
557 DEBUGP("destroy_conntrack(%p)\n", ct);
558 NF_CT_ASSERT(atomic_read(&nfct->use) == 0);
559 NF_CT_ASSERT(!timer_pending(&ct->timeout));
560
561 nf_conntrack_event(IPCT_DESTROY, ct);
562 set_bit(IPS_DYING_BIT, &ct->status);
563
564 /* To make sure we don't get any weird locking issues here:
565 * destroy_conntrack() MUST NOT be called with a write lock
566 * to nf_conntrack_lock!!! -HW */
Pablo Neira Ayusoc1d10ad2006-01-05 12:19:05 -0800567 l3proto = __nf_ct_l3proto_find(ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.l3num);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800568 if (l3proto && l3proto->destroy)
569 l3proto->destroy(ct);
570
Pablo Neira Ayusoc1d10ad2006-01-05 12:19:05 -0800571 proto = __nf_ct_proto_find(ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.l3num, ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800572 if (proto && proto->destroy)
573 proto->destroy(ct);
574
575 if (nf_conntrack_destroyed)
576 nf_conntrack_destroyed(ct);
577
578 write_lock_bh(&nf_conntrack_lock);
579 /* Expectations will have been removed in clean_from_lists,
580 * except TFTP can create an expectation on the first packet,
581 * before connection is in the list, so we need to clean here,
582 * too. */
Pablo Neira Ayusoc1d10ad2006-01-05 12:19:05 -0800583 nf_ct_remove_expectations(ct);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800584
585 /* We overload first tuple to link into unconfirmed list. */
586 if (!nf_ct_is_confirmed(ct)) {
587 BUG_ON(list_empty(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list));
588 list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
589 }
590
591 NF_CT_STAT_INC(delete);
592 write_unlock_bh(&nf_conntrack_lock);
593
594 if (ct->master)
595 nf_ct_put(ct->master);
596
597 DEBUGP("destroy_conntrack: returning ct=%p to slab\n", ct);
598 nf_conntrack_free(ct);
599}
600
601static void death_by_timeout(unsigned long ul_conntrack)
602{
603 struct nf_conn *ct = (void *)ul_conntrack;
604
605 write_lock_bh(&nf_conntrack_lock);
606 /* Inside lock so preempt is disabled on module removal path.
607 * Otherwise we can get spurious warnings. */
608 NF_CT_STAT_INC(delete_list);
609 clean_from_lists(ct);
610 write_unlock_bh(&nf_conntrack_lock);
611 nf_ct_put(ct);
612}
613
Pablo Neira Ayusoc1d10ad2006-01-05 12:19:05 -0800614struct nf_conntrack_tuple_hash *
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800615__nf_conntrack_find(const struct nf_conntrack_tuple *tuple,
616 const struct nf_conn *ignored_conntrack)
617{
618 struct nf_conntrack_tuple_hash *h;
619 unsigned int hash = hash_conntrack(tuple);
620
621 ASSERT_READ_LOCK(&nf_conntrack_lock);
622 list_for_each_entry(h, &nf_conntrack_hash[hash], list) {
Patrick McHardydf0933d2006-09-20 11:57:53 -0700623 if (nf_ct_tuplehash_to_ctrack(h) != ignored_conntrack &&
624 nf_ct_tuple_equal(tuple, &h->tuple)) {
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800625 NF_CT_STAT_INC(found);
626 return h;
627 }
628 NF_CT_STAT_INC(searched);
629 }
630
631 return NULL;
632}
633
634/* Find a connection corresponding to a tuple. */
635struct nf_conntrack_tuple_hash *
636nf_conntrack_find_get(const struct nf_conntrack_tuple *tuple,
637 const struct nf_conn *ignored_conntrack)
638{
639 struct nf_conntrack_tuple_hash *h;
640
641 read_lock_bh(&nf_conntrack_lock);
642 h = __nf_conntrack_find(tuple, ignored_conntrack);
643 if (h)
644 atomic_inc(&nf_ct_tuplehash_to_ctrack(h)->ct_general.use);
645 read_unlock_bh(&nf_conntrack_lock);
646
647 return h;
648}
649
Pablo Neira Ayusoc1d10ad2006-01-05 12:19:05 -0800650static void __nf_conntrack_hash_insert(struct nf_conn *ct,
651 unsigned int hash,
652 unsigned int repl_hash)
653{
654 ct->id = ++nf_conntrack_next_id;
Patrick McHardydf0933d2006-09-20 11:57:53 -0700655 list_add(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list,
656 &nf_conntrack_hash[hash]);
657 list_add(&ct->tuplehash[IP_CT_DIR_REPLY].list,
658 &nf_conntrack_hash[repl_hash]);
Pablo Neira Ayusoc1d10ad2006-01-05 12:19:05 -0800659}
660
661void nf_conntrack_hash_insert(struct nf_conn *ct)
662{
663 unsigned int hash, repl_hash;
664
665 hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
666 repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
667
668 write_lock_bh(&nf_conntrack_lock);
669 __nf_conntrack_hash_insert(ct, hash, repl_hash);
670 write_unlock_bh(&nf_conntrack_lock);
671}
672
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800673/* Confirm a connection given skb; places it in hash table */
674int
675__nf_conntrack_confirm(struct sk_buff **pskb)
676{
677 unsigned int hash, repl_hash;
Patrick McHardydf0933d2006-09-20 11:57:53 -0700678 struct nf_conntrack_tuple_hash *h;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800679 struct nf_conn *ct;
Patrick McHardydf0933d2006-09-20 11:57:53 -0700680 struct nf_conn_help *help;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800681 enum ip_conntrack_info ctinfo;
682
683 ct = nf_ct_get(*pskb, &ctinfo);
684
685 /* ipt_REJECT uses nf_conntrack_attach to attach related
686 ICMP/TCP RST packets in other direction. Actual packet
687 which created connection will be IP_CT_NEW or for an
688 expected connection, IP_CT_RELATED. */
689 if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
690 return NF_ACCEPT;
691
692 hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
693 repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
694
695 /* We're not in hash table, and we refuse to set up related
696 connections for unconfirmed conns. But packet copies and
697 REJECT will give spurious warnings here. */
698 /* NF_CT_ASSERT(atomic_read(&ct->ct_general.use) == 1); */
699
700 /* No external references means noone else could have
701 confirmed us. */
702 NF_CT_ASSERT(!nf_ct_is_confirmed(ct));
703 DEBUGP("Confirming conntrack %p\n", ct);
704
705 write_lock_bh(&nf_conntrack_lock);
706
707 /* See if there's one in the list already, including reverse:
708 NAT could have grabbed it without realizing, since we're
709 not in the hash. If there is, we lost race. */
Patrick McHardydf0933d2006-09-20 11:57:53 -0700710 list_for_each_entry(h, &nf_conntrack_hash[hash], list)
711 if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
712 &h->tuple))
713 goto out;
714 list_for_each_entry(h, &nf_conntrack_hash[repl_hash], list)
715 if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple,
716 &h->tuple))
717 goto out;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800718
Patrick McHardydf0933d2006-09-20 11:57:53 -0700719 /* Remove from unconfirmed list */
720 list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
721
722 __nf_conntrack_hash_insert(ct, hash, repl_hash);
723 /* Timer relative to confirmation time, not original
724 setting time, otherwise we'd get timer wrap in
725 weird delay cases. */
726 ct->timeout.expires += jiffies;
727 add_timer(&ct->timeout);
728 atomic_inc(&ct->ct_general.use);
729 set_bit(IPS_CONFIRMED_BIT, &ct->status);
730 NF_CT_STAT_INC(insert);
731 write_unlock_bh(&nf_conntrack_lock);
732 help = nfct_help(ct);
733 if (help && help->helper)
734 nf_conntrack_event_cache(IPCT_HELPER, *pskb);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800735#ifdef CONFIG_NF_NAT_NEEDED
Patrick McHardydf0933d2006-09-20 11:57:53 -0700736 if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) ||
737 test_bit(IPS_DST_NAT_DONE_BIT, &ct->status))
738 nf_conntrack_event_cache(IPCT_NATINFO, *pskb);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800739#endif
Patrick McHardydf0933d2006-09-20 11:57:53 -0700740 nf_conntrack_event_cache(master_ct(ct) ?
741 IPCT_RELATED : IPCT_NEW, *pskb);
742 return NF_ACCEPT;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800743
Patrick McHardydf0933d2006-09-20 11:57:53 -0700744out:
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800745 NF_CT_STAT_INC(insert_failed);
746 write_unlock_bh(&nf_conntrack_lock);
747 return NF_DROP;
748}
749
750/* Returns true if a connection correspondings to the tuple (required
751 for NAT). */
752int
753nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple,
754 const struct nf_conn *ignored_conntrack)
755{
756 struct nf_conntrack_tuple_hash *h;
757
758 read_lock_bh(&nf_conntrack_lock);
759 h = __nf_conntrack_find(tuple, ignored_conntrack);
760 read_unlock_bh(&nf_conntrack_lock);
761
762 return h != NULL;
763}
764
765/* There's a small race here where we may free a just-assured
766 connection. Too bad: we're in trouble anyway. */
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800767static int early_drop(struct list_head *chain)
768{
769 /* Traverse backwards: gives us oldest, which is roughly LRU */
770 struct nf_conntrack_tuple_hash *h;
Patrick McHardydf0933d2006-09-20 11:57:53 -0700771 struct nf_conn *ct = NULL, *tmp;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800772 int dropped = 0;
773
774 read_lock_bh(&nf_conntrack_lock);
Patrick McHardydf0933d2006-09-20 11:57:53 -0700775 list_for_each_entry_reverse(h, chain, list) {
776 tmp = nf_ct_tuplehash_to_ctrack(h);
777 if (!test_bit(IPS_ASSURED_BIT, &tmp->status)) {
778 ct = tmp;
779 atomic_inc(&ct->ct_general.use);
780 break;
781 }
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800782 }
783 read_unlock_bh(&nf_conntrack_lock);
784
785 if (!ct)
786 return dropped;
787
788 if (del_timer(&ct->timeout)) {
789 death_by_timeout((unsigned long)ct);
790 dropped = 1;
791 NF_CT_STAT_INC(early_drop);
792 }
793 nf_ct_put(ct);
794 return dropped;
795}
796
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800797static struct nf_conntrack_helper *
Pablo Neira Ayusoc1d10ad2006-01-05 12:19:05 -0800798__nf_ct_helper_find(const struct nf_conntrack_tuple *tuple)
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800799{
Patrick McHardydf0933d2006-09-20 11:57:53 -0700800 struct nf_conntrack_helper *h;
801
802 list_for_each_entry(h, &helpers, list) {
803 if (nf_ct_tuple_mask_cmp(tuple, &h->tuple, &h->mask))
804 return h;
805 }
806 return NULL;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800807}
808
Pablo Neira Ayusoc1d10ad2006-01-05 12:19:05 -0800809struct nf_conntrack_helper *
810nf_ct_helper_find_get( const struct nf_conntrack_tuple *tuple)
811{
812 struct nf_conntrack_helper *helper;
813
814 /* need nf_conntrack_lock to assure that helper exists until
815 * try_module_get() is called */
816 read_lock_bh(&nf_conntrack_lock);
817
818 helper = __nf_ct_helper_find(tuple);
819 if (helper) {
820 /* need to increase module usage count to assure helper will
821 * not go away while the caller is e.g. busy putting a
822 * conntrack in the hash that uses the helper */
823 if (!try_module_get(helper->me))
824 helper = NULL;
825 }
826
827 read_unlock_bh(&nf_conntrack_lock);
828
829 return helper;
830}
831
832void nf_ct_helper_put(struct nf_conntrack_helper *helper)
833{
834 module_put(helper->me);
835}
836
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800837static struct nf_conn *
838__nf_conntrack_alloc(const struct nf_conntrack_tuple *orig,
839 const struct nf_conntrack_tuple *repl,
840 const struct nf_conntrack_l3proto *l3proto)
841{
842 struct nf_conn *conntrack = NULL;
843 u_int32_t features = 0;
Harald Weltedc808fe2006-03-20 17:56:32 -0800844 struct nf_conntrack_helper *helper;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800845
Harald Weltedc808fe2006-03-20 17:56:32 -0800846 if (unlikely(!nf_conntrack_hash_rnd_initted)) {
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800847 get_random_bytes(&nf_conntrack_hash_rnd, 4);
848 nf_conntrack_hash_rnd_initted = 1;
849 }
850
Pablo Neira Ayuso5251e2d2006-09-20 12:01:06 -0700851 /* We don't want any race condition at early drop stage */
852 atomic_inc(&nf_conntrack_count);
853
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800854 if (nf_conntrack_max
Pablo Neira Ayuso5251e2d2006-09-20 12:01:06 -0700855 && atomic_read(&nf_conntrack_count) > nf_conntrack_max) {
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800856 unsigned int hash = hash_conntrack(orig);
857 /* Try dropping from this hash chain. */
858 if (!early_drop(&nf_conntrack_hash[hash])) {
Pablo Neira Ayuso5251e2d2006-09-20 12:01:06 -0700859 atomic_dec(&nf_conntrack_count);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800860 if (net_ratelimit())
861 printk(KERN_WARNING
862 "nf_conntrack: table full, dropping"
863 " packet.\n");
864 return ERR_PTR(-ENOMEM);
865 }
866 }
867
868 /* find features needed by this conntrack. */
869 features = l3proto->get_features(orig);
Harald Weltedc808fe2006-03-20 17:56:32 -0800870
871 /* FIXME: protect helper list per RCU */
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800872 read_lock_bh(&nf_conntrack_lock);
Harald Weltedc808fe2006-03-20 17:56:32 -0800873 helper = __nf_ct_helper_find(repl);
874 if (helper)
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800875 features |= NF_CT_F_HELP;
876 read_unlock_bh(&nf_conntrack_lock);
877
878 DEBUGP("nf_conntrack_alloc: features=0x%x\n", features);
879
880 read_lock_bh(&nf_ct_cache_lock);
881
Harald Weltedc808fe2006-03-20 17:56:32 -0800882 if (unlikely(!nf_ct_cache[features].use)) {
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800883 DEBUGP("nf_conntrack_alloc: not supported features = 0x%x\n",
884 features);
885 goto out;
886 }
887
888 conntrack = kmem_cache_alloc(nf_ct_cache[features].cachep, GFP_ATOMIC);
889 if (conntrack == NULL) {
890 DEBUGP("nf_conntrack_alloc: Can't alloc conntrack from cache\n");
891 goto out;
892 }
893
894 memset(conntrack, 0, nf_ct_cache[features].size);
895 conntrack->features = features;
Harald Weltedc808fe2006-03-20 17:56:32 -0800896 if (helper) {
897 struct nf_conn_help *help = nfct_help(conntrack);
898 NF_CT_ASSERT(help);
899 help->helper = helper;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800900 }
901
902 atomic_set(&conntrack->ct_general.use, 1);
903 conntrack->ct_general.destroy = destroy_conntrack;
904 conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig;
905 conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *repl;
906 /* Don't set timer yet: wait for confirmation */
907 init_timer(&conntrack->timeout);
908 conntrack->timeout.data = (unsigned long)conntrack;
909 conntrack->timeout.function = death_by_timeout;
Pablo Neira Ayuso5251e2d2006-09-20 12:01:06 -0700910 read_unlock_bh(&nf_ct_cache_lock);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800911
Pablo Neira Ayuso5251e2d2006-09-20 12:01:06 -0700912 return conntrack;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800913out:
914 read_unlock_bh(&nf_ct_cache_lock);
Pablo Neira Ayuso5251e2d2006-09-20 12:01:06 -0700915 atomic_dec(&nf_conntrack_count);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800916 return conntrack;
917}
918
919struct nf_conn *nf_conntrack_alloc(const struct nf_conntrack_tuple *orig,
920 const struct nf_conntrack_tuple *repl)
921{
922 struct nf_conntrack_l3proto *l3proto;
923
Pablo Neira Ayusoc1d10ad2006-01-05 12:19:05 -0800924 l3proto = __nf_ct_l3proto_find(orig->src.l3num);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800925 return __nf_conntrack_alloc(orig, repl, l3proto);
926}
927
928void nf_conntrack_free(struct nf_conn *conntrack)
929{
930 u_int32_t features = conntrack->features;
931 NF_CT_ASSERT(features >= NF_CT_F_BASIC && features < NF_CT_F_NUM);
932 DEBUGP("nf_conntrack_free: features = 0x%x, conntrack=%p\n", features,
933 conntrack);
934 kmem_cache_free(nf_ct_cache[features].cachep, conntrack);
935 atomic_dec(&nf_conntrack_count);
936}
937
938/* Allocate a new conntrack: we return -ENOMEM if classification
939 failed due to stress. Otherwise it really is unclassifiable. */
940static struct nf_conntrack_tuple_hash *
941init_conntrack(const struct nf_conntrack_tuple *tuple,
942 struct nf_conntrack_l3proto *l3proto,
943 struct nf_conntrack_protocol *protocol,
944 struct sk_buff *skb,
945 unsigned int dataoff)
946{
947 struct nf_conn *conntrack;
948 struct nf_conntrack_tuple repl_tuple;
949 struct nf_conntrack_expect *exp;
950
951 if (!nf_ct_invert_tuple(&repl_tuple, tuple, l3proto, protocol)) {
952 DEBUGP("Can't invert tuple.\n");
953 return NULL;
954 }
955
956 conntrack = __nf_conntrack_alloc(tuple, &repl_tuple, l3proto);
957 if (conntrack == NULL || IS_ERR(conntrack)) {
958 DEBUGP("Can't allocate conntrack.\n");
959 return (struct nf_conntrack_tuple_hash *)conntrack;
960 }
961
962 if (!protocol->new(conntrack, skb, dataoff)) {
963 nf_conntrack_free(conntrack);
964 DEBUGP("init conntrack: can't track with proto module\n");
965 return NULL;
966 }
967
968 write_lock_bh(&nf_conntrack_lock);
969 exp = find_expectation(tuple);
970
971 if (exp) {
972 DEBUGP("conntrack: expectation arrives ct=%p exp=%p\n",
973 conntrack, exp);
974 /* Welcome, Mr. Bond. We've been expecting you... */
975 __set_bit(IPS_EXPECTED_BIT, &conntrack->status);
976 conntrack->master = exp->master;
977#ifdef CONFIG_NF_CONNTRACK_MARK
978 conntrack->mark = exp->master->mark;
979#endif
James Morris7c9728c2006-06-09 00:31:46 -0700980#ifdef CONFIG_NF_CONNTRACK_SECMARK
981 conntrack->secmark = exp->master->secmark;
982#endif
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800983 nf_conntrack_get(&conntrack->master->ct_general);
984 NF_CT_STAT_INC(expect_new);
Harald Weltedc808fe2006-03-20 17:56:32 -0800985 } else
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800986 NF_CT_STAT_INC(new);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800987
988 /* Overload tuple linked list to put us in unconfirmed list. */
989 list_add(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL].list, &unconfirmed);
990
991 write_unlock_bh(&nf_conntrack_lock);
992
993 if (exp) {
994 if (exp->expectfn)
995 exp->expectfn(conntrack, exp);
996 nf_conntrack_expect_put(exp);
997 }
998
999 return &conntrack->tuplehash[IP_CT_DIR_ORIGINAL];
1000}
1001
1002/* On success, returns conntrack ptr, sets skb->nfct and ctinfo */
1003static inline struct nf_conn *
1004resolve_normal_ct(struct sk_buff *skb,
1005 unsigned int dataoff,
1006 u_int16_t l3num,
1007 u_int8_t protonum,
1008 struct nf_conntrack_l3proto *l3proto,
1009 struct nf_conntrack_protocol *proto,
1010 int *set_reply,
1011 enum ip_conntrack_info *ctinfo)
1012{
1013 struct nf_conntrack_tuple tuple;
1014 struct nf_conntrack_tuple_hash *h;
1015 struct nf_conn *ct;
1016
1017 if (!nf_ct_get_tuple(skb, (unsigned int)(skb->nh.raw - skb->data),
1018 dataoff, l3num, protonum, &tuple, l3proto,
1019 proto)) {
1020 DEBUGP("resolve_normal_ct: Can't get tuple\n");
1021 return NULL;
1022 }
1023
1024 /* look for tuple match */
1025 h = nf_conntrack_find_get(&tuple, NULL);
1026 if (!h) {
1027 h = init_conntrack(&tuple, l3proto, proto, skb, dataoff);
1028 if (!h)
1029 return NULL;
1030 if (IS_ERR(h))
1031 return (void *)h;
1032 }
1033 ct = nf_ct_tuplehash_to_ctrack(h);
1034
1035 /* It exists; we have (non-exclusive) reference. */
1036 if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) {
1037 *ctinfo = IP_CT_ESTABLISHED + IP_CT_IS_REPLY;
1038 /* Please set reply bit if this packet OK */
1039 *set_reply = 1;
1040 } else {
1041 /* Once we've had two way comms, always ESTABLISHED. */
1042 if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
1043 DEBUGP("nf_conntrack_in: normal packet for %p\n", ct);
1044 *ctinfo = IP_CT_ESTABLISHED;
1045 } else if (test_bit(IPS_EXPECTED_BIT, &ct->status)) {
1046 DEBUGP("nf_conntrack_in: related packet for %p\n", ct);
1047 *ctinfo = IP_CT_RELATED;
1048 } else {
1049 DEBUGP("nf_conntrack_in: new packet for %p\n", ct);
1050 *ctinfo = IP_CT_NEW;
1051 }
1052 *set_reply = 0;
1053 }
1054 skb->nfct = &ct->ct_general;
1055 skb->nfctinfo = *ctinfo;
1056 return ct;
1057}
1058
1059unsigned int
1060nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff **pskb)
1061{
1062 struct nf_conn *ct;
1063 enum ip_conntrack_info ctinfo;
1064 struct nf_conntrack_l3proto *l3proto;
1065 struct nf_conntrack_protocol *proto;
1066 unsigned int dataoff;
1067 u_int8_t protonum;
1068 int set_reply = 0;
1069 int ret;
1070
1071 /* Previously seen (loopback or untracked)? Ignore. */
1072 if ((*pskb)->nfct) {
1073 NF_CT_STAT_INC(ignore);
1074 return NF_ACCEPT;
1075 }
1076
Pablo Neira Ayusoc1d10ad2006-01-05 12:19:05 -08001077 l3proto = __nf_ct_l3proto_find((u_int16_t)pf);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001078 if ((ret = l3proto->prepare(pskb, hooknum, &dataoff, &protonum)) <= 0) {
1079 DEBUGP("not prepared to track yet or error occured\n");
1080 return -ret;
1081 }
1082
Pablo Neira Ayusoc1d10ad2006-01-05 12:19:05 -08001083 proto = __nf_ct_proto_find((u_int16_t)pf, protonum);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001084
1085 /* It may be an special packet, error, unclean...
1086 * inverse of the return code tells to the netfilter
1087 * core what to do with the packet. */
1088 if (proto->error != NULL &&
1089 (ret = proto->error(*pskb, dataoff, &ctinfo, pf, hooknum)) <= 0) {
1090 NF_CT_STAT_INC(error);
1091 NF_CT_STAT_INC(invalid);
1092 return -ret;
1093 }
1094
1095 ct = resolve_normal_ct(*pskb, dataoff, pf, protonum, l3proto, proto,
1096 &set_reply, &ctinfo);
1097 if (!ct) {
1098 /* Not valid part of a connection */
1099 NF_CT_STAT_INC(invalid);
1100 return NF_ACCEPT;
1101 }
1102
1103 if (IS_ERR(ct)) {
1104 /* Too stressed to deal. */
1105 NF_CT_STAT_INC(drop);
1106 return NF_DROP;
1107 }
1108
1109 NF_CT_ASSERT((*pskb)->nfct);
1110
1111 ret = proto->packet(ct, *pskb, dataoff, ctinfo, pf, hooknum);
1112 if (ret < 0) {
1113 /* Invalid: inverse of the return code tells
1114 * the netfilter core what to do */
1115 DEBUGP("nf_conntrack_in: Can't track with proto module\n");
1116 nf_conntrack_put((*pskb)->nfct);
1117 (*pskb)->nfct = NULL;
1118 NF_CT_STAT_INC(invalid);
1119 return -ret;
1120 }
1121
1122 if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status))
1123 nf_conntrack_event_cache(IPCT_STATUS, *pskb);
1124
1125 return ret;
1126}
1127
1128int nf_ct_invert_tuplepr(struct nf_conntrack_tuple *inverse,
1129 const struct nf_conntrack_tuple *orig)
1130{
1131 return nf_ct_invert_tuple(inverse, orig,
Pablo Neira Ayusoc1d10ad2006-01-05 12:19:05 -08001132 __nf_ct_l3proto_find(orig->src.l3num),
1133 __nf_ct_proto_find(orig->src.l3num,
1134 orig->dst.protonum));
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001135}
1136
1137/* Would two expected things clash? */
1138static inline int expect_clash(const struct nf_conntrack_expect *a,
1139 const struct nf_conntrack_expect *b)
1140{
1141 /* Part covered by intersection of masks must be unequal,
1142 otherwise they clash */
1143 struct nf_conntrack_tuple intersect_mask;
1144 int count;
1145
1146 intersect_mask.src.l3num = a->mask.src.l3num & b->mask.src.l3num;
1147 intersect_mask.src.u.all = a->mask.src.u.all & b->mask.src.u.all;
1148 intersect_mask.dst.u.all = a->mask.dst.u.all & b->mask.dst.u.all;
1149 intersect_mask.dst.protonum = a->mask.dst.protonum
1150 & b->mask.dst.protonum;
1151
1152 for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){
1153 intersect_mask.src.u3.all[count] =
1154 a->mask.src.u3.all[count] & b->mask.src.u3.all[count];
1155 }
1156
1157 for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){
1158 intersect_mask.dst.u3.all[count] =
1159 a->mask.dst.u3.all[count] & b->mask.dst.u3.all[count];
1160 }
1161
1162 return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask);
1163}
1164
1165static inline int expect_matches(const struct nf_conntrack_expect *a,
1166 const struct nf_conntrack_expect *b)
1167{
1168 return a->master == b->master
1169 && nf_ct_tuple_equal(&a->tuple, &b->tuple)
1170 && nf_ct_tuple_equal(&a->mask, &b->mask);
1171}
1172
1173/* Generally a bad idea to call this: could have matched already. */
1174void nf_conntrack_unexpect_related(struct nf_conntrack_expect *exp)
1175{
1176 struct nf_conntrack_expect *i;
1177
1178 write_lock_bh(&nf_conntrack_lock);
1179 /* choose the the oldest expectation to evict */
1180 list_for_each_entry_reverse(i, &nf_conntrack_expect_list, list) {
1181 if (expect_matches(i, exp) && del_timer(&i->timeout)) {
1182 nf_ct_unlink_expect(i);
1183 write_unlock_bh(&nf_conntrack_lock);
1184 nf_conntrack_expect_put(i);
1185 return;
1186 }
1187 }
1188 write_unlock_bh(&nf_conntrack_lock);
1189}
1190
1191/* We don't increase the master conntrack refcount for non-fulfilled
1192 * conntracks. During the conntrack destruction, the expectations are
1193 * always killed before the conntrack itself */
1194struct nf_conntrack_expect *nf_conntrack_expect_alloc(struct nf_conn *me)
1195{
1196 struct nf_conntrack_expect *new;
1197
1198 new = kmem_cache_alloc(nf_conntrack_expect_cachep, GFP_ATOMIC);
1199 if (!new) {
1200 DEBUGP("expect_related: OOM allocating expect\n");
1201 return NULL;
1202 }
1203 new->master = me;
1204 atomic_set(&new->use, 1);
1205 return new;
1206}
1207
1208void nf_conntrack_expect_put(struct nf_conntrack_expect *exp)
1209{
1210 if (atomic_dec_and_test(&exp->use))
1211 kmem_cache_free(nf_conntrack_expect_cachep, exp);
1212}
1213
1214static void nf_conntrack_expect_insert(struct nf_conntrack_expect *exp)
1215{
Harald Weltedc808fe2006-03-20 17:56:32 -08001216 struct nf_conn_help *master_help = nfct_help(exp->master);
1217
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001218 atomic_inc(&exp->use);
Harald Weltedc808fe2006-03-20 17:56:32 -08001219 master_help->expecting++;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001220 list_add(&exp->list, &nf_conntrack_expect_list);
1221
1222 init_timer(&exp->timeout);
1223 exp->timeout.data = (unsigned long)exp;
1224 exp->timeout.function = expectation_timed_out;
Harald Weltedc808fe2006-03-20 17:56:32 -08001225 exp->timeout.expires = jiffies + master_help->helper->timeout * HZ;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001226 add_timer(&exp->timeout);
1227
Pablo Neira Ayusoc1d10ad2006-01-05 12:19:05 -08001228 exp->id = ++nf_conntrack_expect_next_id;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001229 atomic_inc(&exp->use);
1230 NF_CT_STAT_INC(expect_create);
1231}
1232
1233/* Race with expectations being used means we could have none to find; OK. */
1234static void evict_oldest_expect(struct nf_conn *master)
1235{
1236 struct nf_conntrack_expect *i;
1237
1238 list_for_each_entry_reverse(i, &nf_conntrack_expect_list, list) {
1239 if (i->master == master) {
1240 if (del_timer(&i->timeout)) {
1241 nf_ct_unlink_expect(i);
1242 nf_conntrack_expect_put(i);
1243 }
1244 break;
1245 }
1246 }
1247}
1248
1249static inline int refresh_timer(struct nf_conntrack_expect *i)
1250{
Harald Weltedc808fe2006-03-20 17:56:32 -08001251 struct nf_conn_help *master_help = nfct_help(i->master);
1252
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001253 if (!del_timer(&i->timeout))
1254 return 0;
1255
Harald Weltedc808fe2006-03-20 17:56:32 -08001256 i->timeout.expires = jiffies + master_help->helper->timeout*HZ;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001257 add_timer(&i->timeout);
1258 return 1;
1259}
1260
1261int nf_conntrack_expect_related(struct nf_conntrack_expect *expect)
1262{
1263 struct nf_conntrack_expect *i;
Jesper Juhld695aa82006-01-05 12:16:16 -08001264 struct nf_conn *master = expect->master;
Harald Weltedc808fe2006-03-20 17:56:32 -08001265 struct nf_conn_help *master_help = nfct_help(master);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001266 int ret;
1267
Harald Weltedc808fe2006-03-20 17:56:32 -08001268 NF_CT_ASSERT(master_help);
1269
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001270 DEBUGP("nf_conntrack_expect_related %p\n", related_to);
1271 DEBUGP("tuple: "); NF_CT_DUMP_TUPLE(&expect->tuple);
1272 DEBUGP("mask: "); NF_CT_DUMP_TUPLE(&expect->mask);
1273
1274 write_lock_bh(&nf_conntrack_lock);
1275 list_for_each_entry(i, &nf_conntrack_expect_list, list) {
1276 if (expect_matches(i, expect)) {
1277 /* Refresh timer: if it's dying, ignore.. */
1278 if (refresh_timer(i)) {
1279 ret = 0;
1280 goto out;
1281 }
1282 } else if (expect_clash(i, expect)) {
1283 ret = -EBUSY;
1284 goto out;
1285 }
1286 }
1287 /* Will be over limit? */
Harald Weltedc808fe2006-03-20 17:56:32 -08001288 if (master_help->helper->max_expected &&
1289 master_help->expecting >= master_help->helper->max_expected)
Jesper Juhld695aa82006-01-05 12:16:16 -08001290 evict_oldest_expect(master);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001291
1292 nf_conntrack_expect_insert(expect);
1293 nf_conntrack_expect_event(IPEXP_NEW, expect);
1294 ret = 0;
1295out:
1296 write_unlock_bh(&nf_conntrack_lock);
1297 return ret;
1298}
1299
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001300int nf_conntrack_helper_register(struct nf_conntrack_helper *me)
1301{
1302 int ret;
1303 BUG_ON(me->timeout == 0);
1304
1305 ret = nf_conntrack_register_cache(NF_CT_F_HELP, "nf_conntrack:help",
1306 sizeof(struct nf_conn)
Harald Weltedc808fe2006-03-20 17:56:32 -08001307 + sizeof(struct nf_conn_help)
1308 + __alignof__(struct nf_conn_help));
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001309 if (ret < 0) {
1310 printk(KERN_ERR "nf_conntrack_helper_reigster: Unable to create slab cache for conntracks\n");
1311 return ret;
1312 }
1313 write_lock_bh(&nf_conntrack_lock);
Patrick McHardydf0933d2006-09-20 11:57:53 -07001314 list_add(&me->list, &helpers);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001315 write_unlock_bh(&nf_conntrack_lock);
1316
1317 return 0;
1318}
1319
Pablo Neira Ayusoc1d10ad2006-01-05 12:19:05 -08001320struct nf_conntrack_helper *
1321__nf_conntrack_helper_find_byname(const char *name)
1322{
1323 struct nf_conntrack_helper *h;
1324
1325 list_for_each_entry(h, &helpers, list) {
1326 if (!strcmp(h->name, name))
1327 return h;
1328 }
1329
1330 return NULL;
1331}
1332
Patrick McHardydf0933d2006-09-20 11:57:53 -07001333static inline void unhelp(struct nf_conntrack_tuple_hash *i,
1334 const struct nf_conntrack_helper *me)
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001335{
Harald Weltedc808fe2006-03-20 17:56:32 -08001336 struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(i);
1337 struct nf_conn_help *help = nfct_help(ct);
1338
1339 if (help && help->helper == me) {
1340 nf_conntrack_event(IPCT_HELPER, ct);
1341 help->helper = NULL;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001342 }
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001343}
1344
1345void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
1346{
1347 unsigned int i;
Patrick McHardydf0933d2006-09-20 11:57:53 -07001348 struct nf_conntrack_tuple_hash *h;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001349 struct nf_conntrack_expect *exp, *tmp;
1350
1351 /* Need write lock here, to delete helper. */
1352 write_lock_bh(&nf_conntrack_lock);
Patrick McHardydf0933d2006-09-20 11:57:53 -07001353 list_del(&me->list);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001354
1355 /* Get rid of expectations */
1356 list_for_each_entry_safe(exp, tmp, &nf_conntrack_expect_list, list) {
Harald Weltedc808fe2006-03-20 17:56:32 -08001357 struct nf_conn_help *help = nfct_help(exp->master);
1358 if (help->helper == me && del_timer(&exp->timeout)) {
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001359 nf_ct_unlink_expect(exp);
1360 nf_conntrack_expect_put(exp);
1361 }
1362 }
1363
1364 /* Get rid of expecteds, set helpers to NULL. */
Patrick McHardydf0933d2006-09-20 11:57:53 -07001365 list_for_each_entry(h, &unconfirmed, list)
1366 unhelp(h, me);
1367 for (i = 0; i < nf_conntrack_htable_size; i++) {
1368 list_for_each_entry(h, &nf_conntrack_hash[i], list)
1369 unhelp(h, me);
1370 }
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001371 write_unlock_bh(&nf_conntrack_lock);
1372
1373 /* Someone could be still looking at the helper in a bh. */
1374 synchronize_net();
1375}
1376
1377/* Refresh conntrack for this many jiffies and do accounting if do_acct is 1 */
1378void __nf_ct_refresh_acct(struct nf_conn *ct,
1379 enum ip_conntrack_info ctinfo,
1380 const struct sk_buff *skb,
1381 unsigned long extra_jiffies,
1382 int do_acct)
1383{
1384 int event = 0;
1385
1386 NF_CT_ASSERT(ct->timeout.data == (unsigned long)ct);
1387 NF_CT_ASSERT(skb);
1388
1389 write_lock_bh(&nf_conntrack_lock);
1390
Eric Leblond997ae832006-05-29 18:24:20 -07001391 /* Only update if this is not a fixed timeout */
1392 if (test_bit(IPS_FIXED_TIMEOUT_BIT, &ct->status)) {
1393 write_unlock_bh(&nf_conntrack_lock);
1394 return;
1395 }
1396
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001397 /* If not in hash table, timer will not be active yet */
1398 if (!nf_ct_is_confirmed(ct)) {
1399 ct->timeout.expires = extra_jiffies;
1400 event = IPCT_REFRESH;
1401 } else {
1402 /* Need del_timer for race avoidance (may already be dying). */
1403 if (del_timer(&ct->timeout)) {
1404 ct->timeout.expires = jiffies + extra_jiffies;
1405 add_timer(&ct->timeout);
1406 event = IPCT_REFRESH;
1407 }
1408 }
1409
1410#ifdef CONFIG_NF_CT_ACCT
1411 if (do_acct) {
1412 ct->counters[CTINFO2DIR(ctinfo)].packets++;
1413 ct->counters[CTINFO2DIR(ctinfo)].bytes +=
1414 skb->len - (unsigned int)(skb->nh.raw - skb->data);
1415 if ((ct->counters[CTINFO2DIR(ctinfo)].packets & 0x80000000)
1416 || (ct->counters[CTINFO2DIR(ctinfo)].bytes & 0x80000000))
1417 event |= IPCT_COUNTER_FILLING;
1418 }
1419#endif
1420
1421 write_unlock_bh(&nf_conntrack_lock);
1422
1423 /* must be unlocked when calling event cache */
1424 if (event)
1425 nf_conntrack_event_cache(event, skb);
1426}
1427
Pablo Neira Ayusoc1d10ad2006-01-05 12:19:05 -08001428#if defined(CONFIG_NF_CT_NETLINK) || \
1429 defined(CONFIG_NF_CT_NETLINK_MODULE)
1430
1431#include <linux/netfilter/nfnetlink.h>
1432#include <linux/netfilter/nfnetlink_conntrack.h>
Ingo Molnar57b47a52006-03-20 22:35:41 -08001433#include <linux/mutex.h>
1434
Pablo Neira Ayusoc1d10ad2006-01-05 12:19:05 -08001435
1436/* Generic function for tcp/udp/sctp/dccp and alike. This needs to be
1437 * in ip_conntrack_core, since we don't want the protocols to autoload
1438 * or depend on ctnetlink */
1439int nf_ct_port_tuple_to_nfattr(struct sk_buff *skb,
1440 const struct nf_conntrack_tuple *tuple)
1441{
1442 NFA_PUT(skb, CTA_PROTO_SRC_PORT, sizeof(u_int16_t),
1443 &tuple->src.u.tcp.port);
1444 NFA_PUT(skb, CTA_PROTO_DST_PORT, sizeof(u_int16_t),
1445 &tuple->dst.u.tcp.port);
1446 return 0;
1447
1448nfattr_failure:
1449 return -1;
1450}
1451
1452static const size_t cta_min_proto[CTA_PROTO_MAX] = {
1453 [CTA_PROTO_SRC_PORT-1] = sizeof(u_int16_t),
1454 [CTA_PROTO_DST_PORT-1] = sizeof(u_int16_t)
1455};
1456
1457int nf_ct_port_nfattr_to_tuple(struct nfattr *tb[],
1458 struct nf_conntrack_tuple *t)
1459{
1460 if (!tb[CTA_PROTO_SRC_PORT-1] || !tb[CTA_PROTO_DST_PORT-1])
1461 return -EINVAL;
1462
1463 if (nfattr_bad_size(tb, CTA_PROTO_MAX, cta_min_proto))
1464 return -EINVAL;
1465
1466 t->src.u.tcp.port =
1467 *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_SRC_PORT-1]);
1468 t->dst.u.tcp.port =
1469 *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_DST_PORT-1]);
1470
1471 return 0;
1472}
1473#endif
1474
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001475/* Used by ipt_REJECT and ip6t_REJECT. */
1476void __nf_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb)
1477{
1478 struct nf_conn *ct;
1479 enum ip_conntrack_info ctinfo;
1480
1481 /* This ICMP is in reverse direction to the packet which caused it */
1482 ct = nf_ct_get(skb, &ctinfo);
1483 if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL)
1484 ctinfo = IP_CT_RELATED + IP_CT_IS_REPLY;
1485 else
1486 ctinfo = IP_CT_RELATED;
1487
1488 /* Attach to new skbuff, and increment count */
1489 nskb->nfct = &ct->ct_general;
1490 nskb->nfctinfo = ctinfo;
1491 nf_conntrack_get(nskb->nfct);
1492}
1493
1494static inline int
1495do_iter(const struct nf_conntrack_tuple_hash *i,
1496 int (*iter)(struct nf_conn *i, void *data),
1497 void *data)
1498{
1499 return iter(nf_ct_tuplehash_to_ctrack(i), data);
1500}
1501
1502/* Bring out ya dead! */
Patrick McHardydf0933d2006-09-20 11:57:53 -07001503static struct nf_conn *
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001504get_next_corpse(int (*iter)(struct nf_conn *i, void *data),
1505 void *data, unsigned int *bucket)
1506{
Patrick McHardydf0933d2006-09-20 11:57:53 -07001507 struct nf_conntrack_tuple_hash *h;
1508 struct nf_conn *ct;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001509
1510 write_lock_bh(&nf_conntrack_lock);
1511 for (; *bucket < nf_conntrack_htable_size; (*bucket)++) {
Patrick McHardydf0933d2006-09-20 11:57:53 -07001512 list_for_each_entry(h, &nf_conntrack_hash[*bucket], list) {
1513 ct = nf_ct_tuplehash_to_ctrack(h);
1514 if (iter(ct, data))
1515 goto found;
1516 }
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001517 }
Patrick McHardydf0933d2006-09-20 11:57:53 -07001518 list_for_each_entry(h, &unconfirmed, list) {
1519 ct = nf_ct_tuplehash_to_ctrack(h);
1520 if (iter(ct, data))
1521 goto found;
1522 }
Martin Josefssonc073e3f2006-10-30 15:13:58 -08001523 write_unlock_bh(&nf_conntrack_lock);
Patrick McHardydf0933d2006-09-20 11:57:53 -07001524 return NULL;
1525found:
Martin Josefssonc073e3f2006-10-30 15:13:58 -08001526 atomic_inc(&ct->ct_general.use);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001527 write_unlock_bh(&nf_conntrack_lock);
Patrick McHardydf0933d2006-09-20 11:57:53 -07001528 return ct;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001529}
1530
1531void
1532nf_ct_iterate_cleanup(int (*iter)(struct nf_conn *i, void *data), void *data)
1533{
Patrick McHardydf0933d2006-09-20 11:57:53 -07001534 struct nf_conn *ct;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001535 unsigned int bucket = 0;
1536
Patrick McHardydf0933d2006-09-20 11:57:53 -07001537 while ((ct = get_next_corpse(iter, data, &bucket)) != NULL) {
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001538 /* Time to push up daises... */
1539 if (del_timer(&ct->timeout))
1540 death_by_timeout((unsigned long)ct);
1541 /* ... else the timer will get him soon. */
1542
1543 nf_ct_put(ct);
1544 }
1545}
1546
1547static int kill_all(struct nf_conn *i, void *data)
1548{
1549 return 1;
1550}
1551
1552static void free_conntrack_hash(struct list_head *hash, int vmalloced, int size)
1553{
1554 if (vmalloced)
1555 vfree(hash);
1556 else
1557 free_pages((unsigned long)hash,
1558 get_order(sizeof(struct list_head) * size));
1559}
1560
Pablo Neira Ayusoc1d10ad2006-01-05 12:19:05 -08001561void nf_conntrack_flush()
1562{
1563 nf_ct_iterate_cleanup(kill_all, NULL);
1564}
1565
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001566/* Mishearing the voices in his head, our hero wonders how he's
1567 supposed to kill the mall. */
1568void nf_conntrack_cleanup(void)
1569{
1570 int i;
1571
Yasuyuki Kozakai7d3cdc62006-02-15 15:22:21 -08001572 ip_ct_attach = NULL;
1573
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001574 /* This makes sure all current packets have passed through
1575 netfilter framework. Roll on, two-stage module
1576 delete... */
1577 synchronize_net();
1578
1579 nf_ct_event_cache_flush();
1580 i_see_dead_people:
Pablo Neira Ayusoc1d10ad2006-01-05 12:19:05 -08001581 nf_conntrack_flush();
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001582 if (atomic_read(&nf_conntrack_count) != 0) {
1583 schedule();
1584 goto i_see_dead_people;
1585 }
Patrick McHardy66365682005-12-05 13:36:50 -08001586 /* wait until all references to nf_conntrack_untracked are dropped */
1587 while (atomic_read(&nf_conntrack_untracked.ct_general.use) > 1)
1588 schedule();
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001589
1590 for (i = 0; i < NF_CT_F_NUM; i++) {
1591 if (nf_ct_cache[i].use == 0)
1592 continue;
1593
1594 NF_CT_ASSERT(nf_ct_cache[i].use == 1);
1595 nf_ct_cache[i].use = 1;
1596 nf_conntrack_unregister_cache(i);
1597 }
1598 kmem_cache_destroy(nf_conntrack_expect_cachep);
1599 free_conntrack_hash(nf_conntrack_hash, nf_conntrack_vmalloc,
1600 nf_conntrack_htable_size);
KOVACS Krisztian5a6f294e42005-11-15 16:47:34 -08001601
1602 /* free l3proto protocol tables */
1603 for (i = 0; i < PF_MAX; i++)
1604 if (nf_ct_protos[i]) {
1605 kfree(nf_ct_protos[i]);
1606 nf_ct_protos[i] = NULL;
1607 }
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001608}
1609
1610static struct list_head *alloc_hashtable(int size, int *vmalloced)
1611{
1612 struct list_head *hash;
1613 unsigned int i;
1614
1615 *vmalloced = 0;
1616 hash = (void*)__get_free_pages(GFP_KERNEL,
1617 get_order(sizeof(struct list_head)
1618 * size));
1619 if (!hash) {
1620 *vmalloced = 1;
1621 printk(KERN_WARNING "nf_conntrack: falling back to vmalloc.\n");
1622 hash = vmalloc(sizeof(struct list_head) * size);
1623 }
1624
1625 if (hash)
1626 for (i = 0; i < size; i++)
1627 INIT_LIST_HEAD(&hash[i]);
1628
1629 return hash;
1630}
1631
1632int set_hashsize(const char *val, struct kernel_param *kp)
1633{
1634 int i, bucket, hashsize, vmalloced;
1635 int old_vmalloced, old_size;
1636 int rnd;
1637 struct list_head *hash, *old_hash;
1638 struct nf_conntrack_tuple_hash *h;
1639
1640 /* On boot, we can set this without any fancy locking. */
1641 if (!nf_conntrack_htable_size)
1642 return param_set_uint(val, kp);
1643
1644 hashsize = simple_strtol(val, NULL, 0);
1645 if (!hashsize)
1646 return -EINVAL;
1647
1648 hash = alloc_hashtable(hashsize, &vmalloced);
1649 if (!hash)
1650 return -ENOMEM;
1651
1652 /* We have to rehahs for the new table anyway, so we also can
1653 * use a newrandom seed */
1654 get_random_bytes(&rnd, 4);
1655
1656 write_lock_bh(&nf_conntrack_lock);
1657 for (i = 0; i < nf_conntrack_htable_size; i++) {
1658 while (!list_empty(&nf_conntrack_hash[i])) {
1659 h = list_entry(nf_conntrack_hash[i].next,
1660 struct nf_conntrack_tuple_hash, list);
1661 list_del(&h->list);
1662 bucket = __hash_conntrack(&h->tuple, hashsize, rnd);
1663 list_add_tail(&h->list, &hash[bucket]);
1664 }
1665 }
1666 old_size = nf_conntrack_htable_size;
1667 old_vmalloced = nf_conntrack_vmalloc;
1668 old_hash = nf_conntrack_hash;
1669
1670 nf_conntrack_htable_size = hashsize;
1671 nf_conntrack_vmalloc = vmalloced;
1672 nf_conntrack_hash = hash;
1673 nf_conntrack_hash_rnd = rnd;
1674 write_unlock_bh(&nf_conntrack_lock);
1675
1676 free_conntrack_hash(old_hash, old_vmalloced, old_size);
1677 return 0;
1678}
1679
1680module_param_call(hashsize, set_hashsize, param_get_uint,
1681 &nf_conntrack_htable_size, 0600);
1682
1683int __init nf_conntrack_init(void)
1684{
1685 unsigned int i;
1686 int ret;
1687
1688 /* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB
1689 * machine has 256 buckets. >= 1GB machines have 8192 buckets. */
1690 if (!nf_conntrack_htable_size) {
1691 nf_conntrack_htable_size
1692 = (((num_physpages << PAGE_SHIFT) / 16384)
1693 / sizeof(struct list_head));
1694 if (num_physpages > (1024 * 1024 * 1024 / PAGE_SIZE))
1695 nf_conntrack_htable_size = 8192;
1696 if (nf_conntrack_htable_size < 16)
1697 nf_conntrack_htable_size = 16;
1698 }
1699 nf_conntrack_max = 8 * nf_conntrack_htable_size;
1700
1701 printk("nf_conntrack version %s (%u buckets, %d max)\n",
1702 NF_CONNTRACK_VERSION, nf_conntrack_htable_size,
1703 nf_conntrack_max);
1704
1705 nf_conntrack_hash = alloc_hashtable(nf_conntrack_htable_size,
1706 &nf_conntrack_vmalloc);
1707 if (!nf_conntrack_hash) {
1708 printk(KERN_ERR "Unable to create nf_conntrack_hash\n");
1709 goto err_out;
1710 }
1711
1712 ret = nf_conntrack_register_cache(NF_CT_F_BASIC, "nf_conntrack:basic",
Harald Weltedc808fe2006-03-20 17:56:32 -08001713 sizeof(struct nf_conn));
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001714 if (ret < 0) {
1715 printk(KERN_ERR "Unable to create nf_conn slab cache\n");
1716 goto err_free_hash;
1717 }
1718
1719 nf_conntrack_expect_cachep = kmem_cache_create("nf_conntrack_expect",
1720 sizeof(struct nf_conntrack_expect),
1721 0, 0, NULL, NULL);
1722 if (!nf_conntrack_expect_cachep) {
1723 printk(KERN_ERR "Unable to create nf_expect slab cache\n");
1724 goto err_free_conntrack_slab;
1725 }
1726
1727 /* Don't NEED lock here, but good form anyway. */
1728 write_lock_bh(&nf_conntrack_lock);
1729 for (i = 0; i < PF_MAX; i++)
1730 nf_ct_l3protos[i] = &nf_conntrack_generic_l3proto;
1731 write_unlock_bh(&nf_conntrack_lock);
1732
Yasuyuki Kozakai7d3cdc62006-02-15 15:22:21 -08001733 /* For use by REJECT target */
1734 ip_ct_attach = __nf_conntrack_attach;
1735
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001736 /* Set up fake conntrack:
1737 - to never be deleted, not in any hashes */
1738 atomic_set(&nf_conntrack_untracked.ct_general.use, 1);
1739 /* - and look it like as a confirmed connection */
1740 set_bit(IPS_CONFIRMED_BIT, &nf_conntrack_untracked.status);
1741
1742 return ret;
1743
1744err_free_conntrack_slab:
1745 nf_conntrack_unregister_cache(NF_CT_F_BASIC);
1746err_free_hash:
1747 free_conntrack_hash(nf_conntrack_hash, nf_conntrack_vmalloc,
1748 nf_conntrack_htable_size);
1749err_out:
1750 return -ENOMEM;
1751}