blob: dc68d00222185c7d4e16ed99a580370cbbebae44 [file] [log] [blame]
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001/* Connection state tracking for netfilter. This is separated from,
2 but required by, the NAT layer; it can also be used by an iptables
3 extension. */
4
5/* (C) 1999-2001 Paul `Rusty' Russell
Harald Weltedc808fe2006-03-20 17:56:32 -08006 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08007 * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License version 2 as
11 * published by the Free Software Foundation.
12 *
13 * 23 Apr 2001: Harald Welte <laforge@gnumonks.org>
14 * - new API and handling of conntrack/nat helpers
15 * - now capable of multiple expectations for one master
16 * 16 Jul 2002: Harald Welte <laforge@gnumonks.org>
17 * - add usage/reference counts to ip_conntrack_expect
18 * - export ip_conntrack[_expect]_{find_get,put} functions
19 * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
20 * - generalize L3 protocol denendent part.
21 * 23 Mar 2004: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
22 * - add support various size of conntrack structures.
Harald Weltedc808fe2006-03-20 17:56:32 -080023 * 26 Jan 2006: Harald Welte <laforge@netfilter.org>
24 * - restructure nf_conn (introduce nf_conn_help)
25 * - redesign 'features' how they were originally intended
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -080026 *
27 * Derived from net/ipv4/netfilter/ip_conntrack_core.c
28 */
29
30#include <linux/config.h>
31#include <linux/types.h>
32#include <linux/netfilter.h>
33#include <linux/module.h>
34#include <linux/skbuff.h>
35#include <linux/proc_fs.h>
36#include <linux/vmalloc.h>
37#include <linux/stddef.h>
38#include <linux/slab.h>
39#include <linux/random.h>
40#include <linux/jhash.h>
41#include <linux/err.h>
42#include <linux/percpu.h>
43#include <linux/moduleparam.h>
44#include <linux/notifier.h>
45#include <linux/kernel.h>
46#include <linux/netdevice.h>
47#include <linux/socket.h>
48
49/* This rwlock protects the main hash table, protocol/helper/expected
50 registrations, conntrack timers*/
51#define ASSERT_READ_LOCK(x)
52#define ASSERT_WRITE_LOCK(x)
53
54#include <net/netfilter/nf_conntrack.h>
55#include <net/netfilter/nf_conntrack_l3proto.h>
56#include <net/netfilter/nf_conntrack_protocol.h>
57#include <net/netfilter/nf_conntrack_helper.h>
58#include <net/netfilter/nf_conntrack_core.h>
59#include <linux/netfilter_ipv4/listhelp.h>
60
Harald Weltedc808fe2006-03-20 17:56:32 -080061#define NF_CONNTRACK_VERSION "0.5.0"
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -080062
63#if 0
64#define DEBUGP printk
65#else
66#define DEBUGP(format, args...)
67#endif
68
69DEFINE_RWLOCK(nf_conntrack_lock);
70
71/* nf_conntrack_standalone needs this */
72atomic_t nf_conntrack_count = ATOMIC_INIT(0);
73
74void (*nf_conntrack_destroyed)(struct nf_conn *conntrack) = NULL;
75LIST_HEAD(nf_conntrack_expect_list);
76struct nf_conntrack_protocol **nf_ct_protos[PF_MAX];
77struct nf_conntrack_l3proto *nf_ct_l3protos[PF_MAX];
78static LIST_HEAD(helpers);
79unsigned int nf_conntrack_htable_size = 0;
80int nf_conntrack_max;
81struct list_head *nf_conntrack_hash;
82static kmem_cache_t *nf_conntrack_expect_cachep;
83struct nf_conn nf_conntrack_untracked;
84unsigned int nf_ct_log_invalid;
85static LIST_HEAD(unconfirmed);
86static int nf_conntrack_vmalloc;
87
Pablo Neira Ayusoc1d10ad2006-01-05 12:19:05 -080088static unsigned int nf_conntrack_next_id = 1;
89static unsigned int nf_conntrack_expect_next_id = 1;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -080090#ifdef CONFIG_NF_CONNTRACK_EVENTS
91struct notifier_block *nf_conntrack_chain;
92struct notifier_block *nf_conntrack_expect_chain;
93
94DEFINE_PER_CPU(struct nf_conntrack_ecache, nf_conntrack_ecache);
95
96/* deliver cached events and clear cache entry - must be called with locally
97 * disabled softirqs */
98static inline void
99__nf_ct_deliver_cached_events(struct nf_conntrack_ecache *ecache)
100{
101 DEBUGP("ecache: delivering events for %p\n", ecache->ct);
102 if (nf_ct_is_confirmed(ecache->ct) && !nf_ct_is_dying(ecache->ct)
103 && ecache->events)
104 notifier_call_chain(&nf_conntrack_chain, ecache->events,
105 ecache->ct);
106
107 ecache->events = 0;
108 nf_ct_put(ecache->ct);
109 ecache->ct = NULL;
110}
111
112/* Deliver all cached events for a particular conntrack. This is called
113 * by code prior to async packet handling for freeing the skb */
114void nf_ct_deliver_cached_events(const struct nf_conn *ct)
115{
116 struct nf_conntrack_ecache *ecache;
117
118 local_bh_disable();
119 ecache = &__get_cpu_var(nf_conntrack_ecache);
120 if (ecache->ct == ct)
121 __nf_ct_deliver_cached_events(ecache);
122 local_bh_enable();
123}
124
125/* Deliver cached events for old pending events, if current conntrack != old */
126void __nf_ct_event_cache_init(struct nf_conn *ct)
127{
128 struct nf_conntrack_ecache *ecache;
129
130 /* take care of delivering potentially old events */
131 ecache = &__get_cpu_var(nf_conntrack_ecache);
132 BUG_ON(ecache->ct == ct);
133 if (ecache->ct)
134 __nf_ct_deliver_cached_events(ecache);
135 /* initialize for this conntrack/packet */
136 ecache->ct = ct;
137 nf_conntrack_get(&ct->ct_general);
138}
139
140/* flush the event cache - touches other CPU's data and must not be called
141 * while packets are still passing through the code */
142static void nf_ct_event_cache_flush(void)
143{
144 struct nf_conntrack_ecache *ecache;
145 int cpu;
146
147 for_each_cpu(cpu) {
148 ecache = &per_cpu(nf_conntrack_ecache, cpu);
149 if (ecache->ct)
150 nf_ct_put(ecache->ct);
151 }
152}
153#else
154static inline void nf_ct_event_cache_flush(void) {}
155#endif /* CONFIG_NF_CONNTRACK_EVENTS */
156
157DEFINE_PER_CPU(struct ip_conntrack_stat, nf_conntrack_stat);
158EXPORT_PER_CPU_SYMBOL(nf_conntrack_stat);
159
160/*
161 * This scheme offers various size of "struct nf_conn" dependent on
162 * features(helper, nat, ...)
163 */
164
165#define NF_CT_FEATURES_NAMELEN 256
166static struct {
167 /* name of slab cache. printed in /proc/slabinfo */
168 char *name;
169
170 /* size of slab cache */
171 size_t size;
172
173 /* slab cache pointer */
174 kmem_cache_t *cachep;
175
176 /* allocated slab cache + modules which uses this slab cache */
177 int use;
178
179 /* Initialization */
180 int (*init_conntrack)(struct nf_conn *, u_int32_t);
181
182} nf_ct_cache[NF_CT_F_NUM];
183
184/* protect members of nf_ct_cache except of "use" */
185DEFINE_RWLOCK(nf_ct_cache_lock);
186
187/* This avoids calling kmem_cache_create() with same name simultaneously */
188DECLARE_MUTEX(nf_ct_cache_mutex);
189
190extern struct nf_conntrack_protocol nf_conntrack_generic_protocol;
191struct nf_conntrack_protocol *
Pablo Neira Ayusoc1d10ad2006-01-05 12:19:05 -0800192__nf_ct_proto_find(u_int16_t l3proto, u_int8_t protocol)
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800193{
Yasuyuki Kozakaiddc8d022006-02-04 02:12:14 -0800194 if (unlikely(l3proto >= AF_MAX || nf_ct_protos[l3proto] == NULL))
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800195 return &nf_conntrack_generic_protocol;
196
197 return nf_ct_protos[l3proto][protocol];
198}
199
Pablo Neira Ayusoc1d10ad2006-01-05 12:19:05 -0800200/* this is guaranteed to always return a valid protocol helper, since
201 * it falls back to generic_protocol */
202struct nf_conntrack_protocol *
203nf_ct_proto_find_get(u_int16_t l3proto, u_int8_t protocol)
204{
205 struct nf_conntrack_protocol *p;
206
207 preempt_disable();
208 p = __nf_ct_proto_find(l3proto, protocol);
209 if (p) {
210 if (!try_module_get(p->me))
211 p = &nf_conntrack_generic_protocol;
212 }
213 preempt_enable();
214
215 return p;
216}
217
218void nf_ct_proto_put(struct nf_conntrack_protocol *p)
219{
220 module_put(p->me);
221}
222
223struct nf_conntrack_l3proto *
224nf_ct_l3proto_find_get(u_int16_t l3proto)
225{
226 struct nf_conntrack_l3proto *p;
227
228 preempt_disable();
229 p = __nf_ct_l3proto_find(l3proto);
230 if (p) {
231 if (!try_module_get(p->me))
232 p = &nf_conntrack_generic_l3proto;
233 }
234 preempt_enable();
235
236 return p;
237}
238
239void nf_ct_l3proto_put(struct nf_conntrack_l3proto *p)
240{
241 module_put(p->me);
242}
243
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800244static int nf_conntrack_hash_rnd_initted;
245static unsigned int nf_conntrack_hash_rnd;
246
247static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple,
248 unsigned int size, unsigned int rnd)
249{
250 unsigned int a, b;
251 a = jhash((void *)tuple->src.u3.all, sizeof(tuple->src.u3.all),
252 ((tuple->src.l3num) << 16) | tuple->dst.protonum);
253 b = jhash((void *)tuple->dst.u3.all, sizeof(tuple->dst.u3.all),
254 (tuple->src.u.all << 16) | tuple->dst.u.all);
255
256 return jhash_2words(a, b, rnd) % size;
257}
258
259static inline u_int32_t hash_conntrack(const struct nf_conntrack_tuple *tuple)
260{
261 return __hash_conntrack(tuple, nf_conntrack_htable_size,
262 nf_conntrack_hash_rnd);
263}
264
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800265int nf_conntrack_register_cache(u_int32_t features, const char *name,
Harald Weltedc808fe2006-03-20 17:56:32 -0800266 size_t size)
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800267{
268 int ret = 0;
269 char *cache_name;
270 kmem_cache_t *cachep;
271
272 DEBUGP("nf_conntrack_register_cache: features=0x%x, name=%s, size=%d\n",
273 features, name, size);
274
275 if (features < NF_CT_F_BASIC || features >= NF_CT_F_NUM) {
276 DEBUGP("nf_conntrack_register_cache: invalid features.: 0x%x\n",
277 features);
278 return -EINVAL;
279 }
280
281 down(&nf_ct_cache_mutex);
282
283 write_lock_bh(&nf_ct_cache_lock);
284 /* e.g: multiple helpers are loaded */
285 if (nf_ct_cache[features].use > 0) {
286 DEBUGP("nf_conntrack_register_cache: already resisterd.\n");
287 if ((!strncmp(nf_ct_cache[features].name, name,
288 NF_CT_FEATURES_NAMELEN))
Harald Weltedc808fe2006-03-20 17:56:32 -0800289 && nf_ct_cache[features].size == size) {
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800290 DEBUGP("nf_conntrack_register_cache: reusing.\n");
291 nf_ct_cache[features].use++;
292 ret = 0;
293 } else
294 ret = -EBUSY;
295
296 write_unlock_bh(&nf_ct_cache_lock);
297 up(&nf_ct_cache_mutex);
298 return ret;
299 }
300 write_unlock_bh(&nf_ct_cache_lock);
301
302 /*
303 * The memory space for name of slab cache must be alive until
304 * cache is destroyed.
305 */
306 cache_name = kmalloc(sizeof(char)*NF_CT_FEATURES_NAMELEN, GFP_ATOMIC);
307 if (cache_name == NULL) {
308 DEBUGP("nf_conntrack_register_cache: can't alloc cache_name\n");
309 ret = -ENOMEM;
310 goto out_up_mutex;
311 }
312
313 if (strlcpy(cache_name, name, NF_CT_FEATURES_NAMELEN)
314 >= NF_CT_FEATURES_NAMELEN) {
315 printk("nf_conntrack_register_cache: name too long\n");
316 ret = -EINVAL;
317 goto out_free_name;
318 }
319
320 cachep = kmem_cache_create(cache_name, size, 0, 0,
321 NULL, NULL);
322 if (!cachep) {
323 printk("nf_conntrack_register_cache: Can't create slab cache "
324 "for the features = 0x%x\n", features);
325 ret = -ENOMEM;
326 goto out_free_name;
327 }
328
329 write_lock_bh(&nf_ct_cache_lock);
330 nf_ct_cache[features].use = 1;
331 nf_ct_cache[features].size = size;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800332 nf_ct_cache[features].cachep = cachep;
333 nf_ct_cache[features].name = cache_name;
334 write_unlock_bh(&nf_ct_cache_lock);
335
336 goto out_up_mutex;
337
338out_free_name:
339 kfree(cache_name);
340out_up_mutex:
341 up(&nf_ct_cache_mutex);
342 return ret;
343}
344
345/* FIXME: In the current, only nf_conntrack_cleanup() can call this function. */
346void nf_conntrack_unregister_cache(u_int32_t features)
347{
348 kmem_cache_t *cachep;
349 char *name;
350
351 /*
352 * This assures that kmem_cache_create() isn't called before destroying
353 * slab cache.
354 */
355 DEBUGP("nf_conntrack_unregister_cache: 0x%04x\n", features);
356 down(&nf_ct_cache_mutex);
357
358 write_lock_bh(&nf_ct_cache_lock);
359 if (--nf_ct_cache[features].use > 0) {
360 write_unlock_bh(&nf_ct_cache_lock);
361 up(&nf_ct_cache_mutex);
362 return;
363 }
364 cachep = nf_ct_cache[features].cachep;
365 name = nf_ct_cache[features].name;
366 nf_ct_cache[features].cachep = NULL;
367 nf_ct_cache[features].name = NULL;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800368 nf_ct_cache[features].size = 0;
369 write_unlock_bh(&nf_ct_cache_lock);
370
371 synchronize_net();
372
373 kmem_cache_destroy(cachep);
374 kfree(name);
375
376 up(&nf_ct_cache_mutex);
377}
378
379int
380nf_ct_get_tuple(const struct sk_buff *skb,
381 unsigned int nhoff,
382 unsigned int dataoff,
383 u_int16_t l3num,
384 u_int8_t protonum,
385 struct nf_conntrack_tuple *tuple,
386 const struct nf_conntrack_l3proto *l3proto,
387 const struct nf_conntrack_protocol *protocol)
388{
389 NF_CT_TUPLE_U_BLANK(tuple);
390
391 tuple->src.l3num = l3num;
392 if (l3proto->pkt_to_tuple(skb, nhoff, tuple) == 0)
393 return 0;
394
395 tuple->dst.protonum = protonum;
396 tuple->dst.dir = IP_CT_DIR_ORIGINAL;
397
398 return protocol->pkt_to_tuple(skb, dataoff, tuple);
399}
400
401int
402nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse,
403 const struct nf_conntrack_tuple *orig,
404 const struct nf_conntrack_l3proto *l3proto,
405 const struct nf_conntrack_protocol *protocol)
406{
407 NF_CT_TUPLE_U_BLANK(inverse);
408
409 inverse->src.l3num = orig->src.l3num;
410 if (l3proto->invert_tuple(inverse, orig) == 0)
411 return 0;
412
413 inverse->dst.dir = !orig->dst.dir;
414
415 inverse->dst.protonum = orig->dst.protonum;
416 return protocol->invert_tuple(inverse, orig);
417}
418
419/* nf_conntrack_expect helper functions */
Pablo Neira Ayusoc1d10ad2006-01-05 12:19:05 -0800420void nf_ct_unlink_expect(struct nf_conntrack_expect *exp)
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800421{
Harald Weltedc808fe2006-03-20 17:56:32 -0800422 struct nf_conn_help *master_help = nfct_help(exp->master);
423
424 NF_CT_ASSERT(master_help);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800425 ASSERT_WRITE_LOCK(&nf_conntrack_lock);
Patrick McHardy4a59a812005-11-16 23:14:19 -0800426 NF_CT_ASSERT(!timer_pending(&exp->timeout));
Harald Weltedc808fe2006-03-20 17:56:32 -0800427
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800428 list_del(&exp->list);
429 NF_CT_STAT_INC(expect_delete);
Harald Weltedc808fe2006-03-20 17:56:32 -0800430 master_help->expecting--;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800431 nf_conntrack_expect_put(exp);
432}
433
434static void expectation_timed_out(unsigned long ul_expect)
435{
436 struct nf_conntrack_expect *exp = (void *)ul_expect;
437
438 write_lock_bh(&nf_conntrack_lock);
439 nf_ct_unlink_expect(exp);
440 write_unlock_bh(&nf_conntrack_lock);
441 nf_conntrack_expect_put(exp);
442}
443
Pablo Neira Ayusoc1d10ad2006-01-05 12:19:05 -0800444struct nf_conntrack_expect *
445__nf_conntrack_expect_find(const struct nf_conntrack_tuple *tuple)
446{
447 struct nf_conntrack_expect *i;
448
449 list_for_each_entry(i, &nf_conntrack_expect_list, list) {
450 if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)) {
451 atomic_inc(&i->use);
452 return i;
453 }
454 }
455 return NULL;
456}
457
458/* Just find a expectation corresponding to a tuple. */
459struct nf_conntrack_expect *
460nf_conntrack_expect_find(const struct nf_conntrack_tuple *tuple)
461{
462 struct nf_conntrack_expect *i;
463
464 read_lock_bh(&nf_conntrack_lock);
465 i = __nf_conntrack_expect_find(tuple);
466 read_unlock_bh(&nf_conntrack_lock);
467
468 return i;
469}
470
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800471/* If an expectation for this connection is found, it gets delete from
472 * global list then returned. */
473static struct nf_conntrack_expect *
474find_expectation(const struct nf_conntrack_tuple *tuple)
475{
476 struct nf_conntrack_expect *i;
477
478 list_for_each_entry(i, &nf_conntrack_expect_list, list) {
479 /* If master is not in hash table yet (ie. packet hasn't left
480 this machine yet), how can other end know about expected?
481 Hence these are not the droids you are looking for (if
482 master ct never got confirmed, we'd hold a reference to it
483 and weird things would happen to future packets). */
484 if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)
485 && nf_ct_is_confirmed(i->master)) {
486 if (i->flags & NF_CT_EXPECT_PERMANENT) {
487 atomic_inc(&i->use);
488 return i;
489 } else if (del_timer(&i->timeout)) {
490 nf_ct_unlink_expect(i);
491 return i;
492 }
493 }
494 }
495 return NULL;
496}
497
498/* delete all expectations for this conntrack */
Pablo Neira Ayusoc1d10ad2006-01-05 12:19:05 -0800499void nf_ct_remove_expectations(struct nf_conn *ct)
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800500{
501 struct nf_conntrack_expect *i, *tmp;
Harald Weltedc808fe2006-03-20 17:56:32 -0800502 struct nf_conn_help *help = nfct_help(ct);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800503
504 /* Optimization: most connection never expect any others. */
Harald Weltedc808fe2006-03-20 17:56:32 -0800505 if (!help || help->expecting == 0)
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800506 return;
507
508 list_for_each_entry_safe(i, tmp, &nf_conntrack_expect_list, list) {
509 if (i->master == ct && del_timer(&i->timeout)) {
510 nf_ct_unlink_expect(i);
511 nf_conntrack_expect_put(i);
512 }
513 }
514}
515
516static void
517clean_from_lists(struct nf_conn *ct)
518{
519 unsigned int ho, hr;
520
521 DEBUGP("clean_from_lists(%p)\n", ct);
522 ASSERT_WRITE_LOCK(&nf_conntrack_lock);
523
524 ho = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
525 hr = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
526 LIST_DELETE(&nf_conntrack_hash[ho], &ct->tuplehash[IP_CT_DIR_ORIGINAL]);
527 LIST_DELETE(&nf_conntrack_hash[hr], &ct->tuplehash[IP_CT_DIR_REPLY]);
528
529 /* Destroy all pending expectations */
Pablo Neira Ayusoc1d10ad2006-01-05 12:19:05 -0800530 nf_ct_remove_expectations(ct);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800531}
532
533static void
534destroy_conntrack(struct nf_conntrack *nfct)
535{
536 struct nf_conn *ct = (struct nf_conn *)nfct;
537 struct nf_conntrack_l3proto *l3proto;
538 struct nf_conntrack_protocol *proto;
539
540 DEBUGP("destroy_conntrack(%p)\n", ct);
541 NF_CT_ASSERT(atomic_read(&nfct->use) == 0);
542 NF_CT_ASSERT(!timer_pending(&ct->timeout));
543
544 nf_conntrack_event(IPCT_DESTROY, ct);
545 set_bit(IPS_DYING_BIT, &ct->status);
546
547 /* To make sure we don't get any weird locking issues here:
548 * destroy_conntrack() MUST NOT be called with a write lock
549 * to nf_conntrack_lock!!! -HW */
Pablo Neira Ayusoc1d10ad2006-01-05 12:19:05 -0800550 l3proto = __nf_ct_l3proto_find(ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.l3num);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800551 if (l3proto && l3proto->destroy)
552 l3proto->destroy(ct);
553
Pablo Neira Ayusoc1d10ad2006-01-05 12:19:05 -0800554 proto = __nf_ct_proto_find(ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.l3num, ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800555 if (proto && proto->destroy)
556 proto->destroy(ct);
557
558 if (nf_conntrack_destroyed)
559 nf_conntrack_destroyed(ct);
560
561 write_lock_bh(&nf_conntrack_lock);
562 /* Expectations will have been removed in clean_from_lists,
563 * except TFTP can create an expectation on the first packet,
564 * before connection is in the list, so we need to clean here,
565 * too. */
Pablo Neira Ayusoc1d10ad2006-01-05 12:19:05 -0800566 nf_ct_remove_expectations(ct);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800567
568 /* We overload first tuple to link into unconfirmed list. */
569 if (!nf_ct_is_confirmed(ct)) {
570 BUG_ON(list_empty(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list));
571 list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
572 }
573
574 NF_CT_STAT_INC(delete);
575 write_unlock_bh(&nf_conntrack_lock);
576
577 if (ct->master)
578 nf_ct_put(ct->master);
579
580 DEBUGP("destroy_conntrack: returning ct=%p to slab\n", ct);
581 nf_conntrack_free(ct);
582}
583
584static void death_by_timeout(unsigned long ul_conntrack)
585{
586 struct nf_conn *ct = (void *)ul_conntrack;
587
588 write_lock_bh(&nf_conntrack_lock);
589 /* Inside lock so preempt is disabled on module removal path.
590 * Otherwise we can get spurious warnings. */
591 NF_CT_STAT_INC(delete_list);
592 clean_from_lists(ct);
593 write_unlock_bh(&nf_conntrack_lock);
594 nf_ct_put(ct);
595}
596
597static inline int
598conntrack_tuple_cmp(const struct nf_conntrack_tuple_hash *i,
599 const struct nf_conntrack_tuple *tuple,
600 const struct nf_conn *ignored_conntrack)
601{
602 ASSERT_READ_LOCK(&nf_conntrack_lock);
603 return nf_ct_tuplehash_to_ctrack(i) != ignored_conntrack
604 && nf_ct_tuple_equal(tuple, &i->tuple);
605}
606
Pablo Neira Ayusoc1d10ad2006-01-05 12:19:05 -0800607struct nf_conntrack_tuple_hash *
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800608__nf_conntrack_find(const struct nf_conntrack_tuple *tuple,
609 const struct nf_conn *ignored_conntrack)
610{
611 struct nf_conntrack_tuple_hash *h;
612 unsigned int hash = hash_conntrack(tuple);
613
614 ASSERT_READ_LOCK(&nf_conntrack_lock);
615 list_for_each_entry(h, &nf_conntrack_hash[hash], list) {
616 if (conntrack_tuple_cmp(h, tuple, ignored_conntrack)) {
617 NF_CT_STAT_INC(found);
618 return h;
619 }
620 NF_CT_STAT_INC(searched);
621 }
622
623 return NULL;
624}
625
626/* Find a connection corresponding to a tuple. */
627struct nf_conntrack_tuple_hash *
628nf_conntrack_find_get(const struct nf_conntrack_tuple *tuple,
629 const struct nf_conn *ignored_conntrack)
630{
631 struct nf_conntrack_tuple_hash *h;
632
633 read_lock_bh(&nf_conntrack_lock);
634 h = __nf_conntrack_find(tuple, ignored_conntrack);
635 if (h)
636 atomic_inc(&nf_ct_tuplehash_to_ctrack(h)->ct_general.use);
637 read_unlock_bh(&nf_conntrack_lock);
638
639 return h;
640}
641
Pablo Neira Ayusoc1d10ad2006-01-05 12:19:05 -0800642static void __nf_conntrack_hash_insert(struct nf_conn *ct,
643 unsigned int hash,
644 unsigned int repl_hash)
645{
646 ct->id = ++nf_conntrack_next_id;
647 list_prepend(&nf_conntrack_hash[hash],
648 &ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
649 list_prepend(&nf_conntrack_hash[repl_hash],
650 &ct->tuplehash[IP_CT_DIR_REPLY].list);
651}
652
653void nf_conntrack_hash_insert(struct nf_conn *ct)
654{
655 unsigned int hash, repl_hash;
656
657 hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
658 repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
659
660 write_lock_bh(&nf_conntrack_lock);
661 __nf_conntrack_hash_insert(ct, hash, repl_hash);
662 write_unlock_bh(&nf_conntrack_lock);
663}
664
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800665/* Confirm a connection given skb; places it in hash table */
666int
667__nf_conntrack_confirm(struct sk_buff **pskb)
668{
669 unsigned int hash, repl_hash;
670 struct nf_conn *ct;
671 enum ip_conntrack_info ctinfo;
672
673 ct = nf_ct_get(*pskb, &ctinfo);
674
675 /* ipt_REJECT uses nf_conntrack_attach to attach related
676 ICMP/TCP RST packets in other direction. Actual packet
677 which created connection will be IP_CT_NEW or for an
678 expected connection, IP_CT_RELATED. */
679 if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
680 return NF_ACCEPT;
681
682 hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
683 repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
684
685 /* We're not in hash table, and we refuse to set up related
686 connections for unconfirmed conns. But packet copies and
687 REJECT will give spurious warnings here. */
688 /* NF_CT_ASSERT(atomic_read(&ct->ct_general.use) == 1); */
689
690 /* No external references means noone else could have
691 confirmed us. */
692 NF_CT_ASSERT(!nf_ct_is_confirmed(ct));
693 DEBUGP("Confirming conntrack %p\n", ct);
694
695 write_lock_bh(&nf_conntrack_lock);
696
697 /* See if there's one in the list already, including reverse:
698 NAT could have grabbed it without realizing, since we're
699 not in the hash. If there is, we lost race. */
700 if (!LIST_FIND(&nf_conntrack_hash[hash],
701 conntrack_tuple_cmp,
702 struct nf_conntrack_tuple_hash *,
703 &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, NULL)
704 && !LIST_FIND(&nf_conntrack_hash[repl_hash],
705 conntrack_tuple_cmp,
706 struct nf_conntrack_tuple_hash *,
707 &ct->tuplehash[IP_CT_DIR_REPLY].tuple, NULL)) {
Harald Weltedc808fe2006-03-20 17:56:32 -0800708 struct nf_conn_help *help;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800709 /* Remove from unconfirmed list */
710 list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
711
Pablo Neira Ayusoc1d10ad2006-01-05 12:19:05 -0800712 __nf_conntrack_hash_insert(ct, hash, repl_hash);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800713 /* Timer relative to confirmation time, not original
714 setting time, otherwise we'd get timer wrap in
715 weird delay cases. */
716 ct->timeout.expires += jiffies;
717 add_timer(&ct->timeout);
718 atomic_inc(&ct->ct_general.use);
719 set_bit(IPS_CONFIRMED_BIT, &ct->status);
720 NF_CT_STAT_INC(insert);
721 write_unlock_bh(&nf_conntrack_lock);
Harald Weltedc808fe2006-03-20 17:56:32 -0800722 help = nfct_help(ct);
723 if (help && help->helper)
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800724 nf_conntrack_event_cache(IPCT_HELPER, *pskb);
725#ifdef CONFIG_NF_NAT_NEEDED
726 if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) ||
727 test_bit(IPS_DST_NAT_DONE_BIT, &ct->status))
728 nf_conntrack_event_cache(IPCT_NATINFO, *pskb);
729#endif
730 nf_conntrack_event_cache(master_ct(ct) ?
731 IPCT_RELATED : IPCT_NEW, *pskb);
732 return NF_ACCEPT;
733 }
734
735 NF_CT_STAT_INC(insert_failed);
736 write_unlock_bh(&nf_conntrack_lock);
737 return NF_DROP;
738}
739
740/* Returns true if a connection correspondings to the tuple (required
741 for NAT). */
742int
743nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple,
744 const struct nf_conn *ignored_conntrack)
745{
746 struct nf_conntrack_tuple_hash *h;
747
748 read_lock_bh(&nf_conntrack_lock);
749 h = __nf_conntrack_find(tuple, ignored_conntrack);
750 read_unlock_bh(&nf_conntrack_lock);
751
752 return h != NULL;
753}
754
755/* There's a small race here where we may free a just-assured
756 connection. Too bad: we're in trouble anyway. */
757static inline int unreplied(const struct nf_conntrack_tuple_hash *i)
758{
759 return !(test_bit(IPS_ASSURED_BIT,
760 &nf_ct_tuplehash_to_ctrack(i)->status));
761}
762
763static int early_drop(struct list_head *chain)
764{
765 /* Traverse backwards: gives us oldest, which is roughly LRU */
766 struct nf_conntrack_tuple_hash *h;
767 struct nf_conn *ct = NULL;
768 int dropped = 0;
769
770 read_lock_bh(&nf_conntrack_lock);
771 h = LIST_FIND_B(chain, unreplied, struct nf_conntrack_tuple_hash *);
772 if (h) {
773 ct = nf_ct_tuplehash_to_ctrack(h);
774 atomic_inc(&ct->ct_general.use);
775 }
776 read_unlock_bh(&nf_conntrack_lock);
777
778 if (!ct)
779 return dropped;
780
781 if (del_timer(&ct->timeout)) {
782 death_by_timeout((unsigned long)ct);
783 dropped = 1;
784 NF_CT_STAT_INC(early_drop);
785 }
786 nf_ct_put(ct);
787 return dropped;
788}
789
790static inline int helper_cmp(const struct nf_conntrack_helper *i,
791 const struct nf_conntrack_tuple *rtuple)
792{
793 return nf_ct_tuple_mask_cmp(rtuple, &i->tuple, &i->mask);
794}
795
796static struct nf_conntrack_helper *
Pablo Neira Ayusoc1d10ad2006-01-05 12:19:05 -0800797__nf_ct_helper_find(const struct nf_conntrack_tuple *tuple)
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800798{
799 return LIST_FIND(&helpers, helper_cmp,
800 struct nf_conntrack_helper *,
801 tuple);
802}
803
Pablo Neira Ayusoc1d10ad2006-01-05 12:19:05 -0800804struct nf_conntrack_helper *
805nf_ct_helper_find_get( const struct nf_conntrack_tuple *tuple)
806{
807 struct nf_conntrack_helper *helper;
808
809 /* need nf_conntrack_lock to assure that helper exists until
810 * try_module_get() is called */
811 read_lock_bh(&nf_conntrack_lock);
812
813 helper = __nf_ct_helper_find(tuple);
814 if (helper) {
815 /* need to increase module usage count to assure helper will
816 * not go away while the caller is e.g. busy putting a
817 * conntrack in the hash that uses the helper */
818 if (!try_module_get(helper->me))
819 helper = NULL;
820 }
821
822 read_unlock_bh(&nf_conntrack_lock);
823
824 return helper;
825}
826
827void nf_ct_helper_put(struct nf_conntrack_helper *helper)
828{
829 module_put(helper->me);
830}
831
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800832static struct nf_conn *
833__nf_conntrack_alloc(const struct nf_conntrack_tuple *orig,
834 const struct nf_conntrack_tuple *repl,
835 const struct nf_conntrack_l3proto *l3proto)
836{
837 struct nf_conn *conntrack = NULL;
838 u_int32_t features = 0;
Harald Weltedc808fe2006-03-20 17:56:32 -0800839 struct nf_conntrack_helper *helper;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800840
Harald Weltedc808fe2006-03-20 17:56:32 -0800841 if (unlikely(!nf_conntrack_hash_rnd_initted)) {
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800842 get_random_bytes(&nf_conntrack_hash_rnd, 4);
843 nf_conntrack_hash_rnd_initted = 1;
844 }
845
846 if (nf_conntrack_max
847 && atomic_read(&nf_conntrack_count) >= nf_conntrack_max) {
848 unsigned int hash = hash_conntrack(orig);
849 /* Try dropping from this hash chain. */
850 if (!early_drop(&nf_conntrack_hash[hash])) {
851 if (net_ratelimit())
852 printk(KERN_WARNING
853 "nf_conntrack: table full, dropping"
854 " packet.\n");
855 return ERR_PTR(-ENOMEM);
856 }
857 }
858
859 /* find features needed by this conntrack. */
860 features = l3proto->get_features(orig);
Harald Weltedc808fe2006-03-20 17:56:32 -0800861
862 /* FIXME: protect helper list per RCU */
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800863 read_lock_bh(&nf_conntrack_lock);
Harald Weltedc808fe2006-03-20 17:56:32 -0800864 helper = __nf_ct_helper_find(repl);
865 if (helper)
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800866 features |= NF_CT_F_HELP;
867 read_unlock_bh(&nf_conntrack_lock);
868
869 DEBUGP("nf_conntrack_alloc: features=0x%x\n", features);
870
871 read_lock_bh(&nf_ct_cache_lock);
872
Harald Weltedc808fe2006-03-20 17:56:32 -0800873 if (unlikely(!nf_ct_cache[features].use)) {
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800874 DEBUGP("nf_conntrack_alloc: not supported features = 0x%x\n",
875 features);
876 goto out;
877 }
878
879 conntrack = kmem_cache_alloc(nf_ct_cache[features].cachep, GFP_ATOMIC);
880 if (conntrack == NULL) {
881 DEBUGP("nf_conntrack_alloc: Can't alloc conntrack from cache\n");
882 goto out;
883 }
884
885 memset(conntrack, 0, nf_ct_cache[features].size);
886 conntrack->features = features;
Harald Weltedc808fe2006-03-20 17:56:32 -0800887 if (helper) {
888 struct nf_conn_help *help = nfct_help(conntrack);
889 NF_CT_ASSERT(help);
890 help->helper = helper;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800891 }
892
893 atomic_set(&conntrack->ct_general.use, 1);
894 conntrack->ct_general.destroy = destroy_conntrack;
895 conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig;
896 conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *repl;
897 /* Don't set timer yet: wait for confirmation */
898 init_timer(&conntrack->timeout);
899 conntrack->timeout.data = (unsigned long)conntrack;
900 conntrack->timeout.function = death_by_timeout;
901
902 atomic_inc(&nf_conntrack_count);
903out:
904 read_unlock_bh(&nf_ct_cache_lock);
905 return conntrack;
906}
907
908struct nf_conn *nf_conntrack_alloc(const struct nf_conntrack_tuple *orig,
909 const struct nf_conntrack_tuple *repl)
910{
911 struct nf_conntrack_l3proto *l3proto;
912
Pablo Neira Ayusoc1d10ad2006-01-05 12:19:05 -0800913 l3proto = __nf_ct_l3proto_find(orig->src.l3num);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800914 return __nf_conntrack_alloc(orig, repl, l3proto);
915}
916
917void nf_conntrack_free(struct nf_conn *conntrack)
918{
919 u_int32_t features = conntrack->features;
920 NF_CT_ASSERT(features >= NF_CT_F_BASIC && features < NF_CT_F_NUM);
921 DEBUGP("nf_conntrack_free: features = 0x%x, conntrack=%p\n", features,
922 conntrack);
923 kmem_cache_free(nf_ct_cache[features].cachep, conntrack);
924 atomic_dec(&nf_conntrack_count);
925}
926
927/* Allocate a new conntrack: we return -ENOMEM if classification
928 failed due to stress. Otherwise it really is unclassifiable. */
929static struct nf_conntrack_tuple_hash *
930init_conntrack(const struct nf_conntrack_tuple *tuple,
931 struct nf_conntrack_l3proto *l3proto,
932 struct nf_conntrack_protocol *protocol,
933 struct sk_buff *skb,
934 unsigned int dataoff)
935{
936 struct nf_conn *conntrack;
937 struct nf_conntrack_tuple repl_tuple;
938 struct nf_conntrack_expect *exp;
939
940 if (!nf_ct_invert_tuple(&repl_tuple, tuple, l3proto, protocol)) {
941 DEBUGP("Can't invert tuple.\n");
942 return NULL;
943 }
944
945 conntrack = __nf_conntrack_alloc(tuple, &repl_tuple, l3proto);
946 if (conntrack == NULL || IS_ERR(conntrack)) {
947 DEBUGP("Can't allocate conntrack.\n");
948 return (struct nf_conntrack_tuple_hash *)conntrack;
949 }
950
951 if (!protocol->new(conntrack, skb, dataoff)) {
952 nf_conntrack_free(conntrack);
953 DEBUGP("init conntrack: can't track with proto module\n");
954 return NULL;
955 }
956
957 write_lock_bh(&nf_conntrack_lock);
958 exp = find_expectation(tuple);
959
960 if (exp) {
961 DEBUGP("conntrack: expectation arrives ct=%p exp=%p\n",
962 conntrack, exp);
963 /* Welcome, Mr. Bond. We've been expecting you... */
964 __set_bit(IPS_EXPECTED_BIT, &conntrack->status);
965 conntrack->master = exp->master;
966#ifdef CONFIG_NF_CONNTRACK_MARK
967 conntrack->mark = exp->master->mark;
968#endif
969 nf_conntrack_get(&conntrack->master->ct_general);
970 NF_CT_STAT_INC(expect_new);
Harald Weltedc808fe2006-03-20 17:56:32 -0800971 } else
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800972 NF_CT_STAT_INC(new);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800973
974 /* Overload tuple linked list to put us in unconfirmed list. */
975 list_add(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL].list, &unconfirmed);
976
977 write_unlock_bh(&nf_conntrack_lock);
978
979 if (exp) {
980 if (exp->expectfn)
981 exp->expectfn(conntrack, exp);
982 nf_conntrack_expect_put(exp);
983 }
984
985 return &conntrack->tuplehash[IP_CT_DIR_ORIGINAL];
986}
987
988/* On success, returns conntrack ptr, sets skb->nfct and ctinfo */
989static inline struct nf_conn *
990resolve_normal_ct(struct sk_buff *skb,
991 unsigned int dataoff,
992 u_int16_t l3num,
993 u_int8_t protonum,
994 struct nf_conntrack_l3proto *l3proto,
995 struct nf_conntrack_protocol *proto,
996 int *set_reply,
997 enum ip_conntrack_info *ctinfo)
998{
999 struct nf_conntrack_tuple tuple;
1000 struct nf_conntrack_tuple_hash *h;
1001 struct nf_conn *ct;
1002
1003 if (!nf_ct_get_tuple(skb, (unsigned int)(skb->nh.raw - skb->data),
1004 dataoff, l3num, protonum, &tuple, l3proto,
1005 proto)) {
1006 DEBUGP("resolve_normal_ct: Can't get tuple\n");
1007 return NULL;
1008 }
1009
1010 /* look for tuple match */
1011 h = nf_conntrack_find_get(&tuple, NULL);
1012 if (!h) {
1013 h = init_conntrack(&tuple, l3proto, proto, skb, dataoff);
1014 if (!h)
1015 return NULL;
1016 if (IS_ERR(h))
1017 return (void *)h;
1018 }
1019 ct = nf_ct_tuplehash_to_ctrack(h);
1020
1021 /* It exists; we have (non-exclusive) reference. */
1022 if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) {
1023 *ctinfo = IP_CT_ESTABLISHED + IP_CT_IS_REPLY;
1024 /* Please set reply bit if this packet OK */
1025 *set_reply = 1;
1026 } else {
1027 /* Once we've had two way comms, always ESTABLISHED. */
1028 if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
1029 DEBUGP("nf_conntrack_in: normal packet for %p\n", ct);
1030 *ctinfo = IP_CT_ESTABLISHED;
1031 } else if (test_bit(IPS_EXPECTED_BIT, &ct->status)) {
1032 DEBUGP("nf_conntrack_in: related packet for %p\n", ct);
1033 *ctinfo = IP_CT_RELATED;
1034 } else {
1035 DEBUGP("nf_conntrack_in: new packet for %p\n", ct);
1036 *ctinfo = IP_CT_NEW;
1037 }
1038 *set_reply = 0;
1039 }
1040 skb->nfct = &ct->ct_general;
1041 skb->nfctinfo = *ctinfo;
1042 return ct;
1043}
1044
1045unsigned int
1046nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff **pskb)
1047{
1048 struct nf_conn *ct;
1049 enum ip_conntrack_info ctinfo;
1050 struct nf_conntrack_l3proto *l3proto;
1051 struct nf_conntrack_protocol *proto;
1052 unsigned int dataoff;
1053 u_int8_t protonum;
1054 int set_reply = 0;
1055 int ret;
1056
1057 /* Previously seen (loopback or untracked)? Ignore. */
1058 if ((*pskb)->nfct) {
1059 NF_CT_STAT_INC(ignore);
1060 return NF_ACCEPT;
1061 }
1062
Pablo Neira Ayusoc1d10ad2006-01-05 12:19:05 -08001063 l3proto = __nf_ct_l3proto_find((u_int16_t)pf);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001064 if ((ret = l3proto->prepare(pskb, hooknum, &dataoff, &protonum)) <= 0) {
1065 DEBUGP("not prepared to track yet or error occured\n");
1066 return -ret;
1067 }
1068
Pablo Neira Ayusoc1d10ad2006-01-05 12:19:05 -08001069 proto = __nf_ct_proto_find((u_int16_t)pf, protonum);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001070
1071 /* It may be an special packet, error, unclean...
1072 * inverse of the return code tells to the netfilter
1073 * core what to do with the packet. */
1074 if (proto->error != NULL &&
1075 (ret = proto->error(*pskb, dataoff, &ctinfo, pf, hooknum)) <= 0) {
1076 NF_CT_STAT_INC(error);
1077 NF_CT_STAT_INC(invalid);
1078 return -ret;
1079 }
1080
1081 ct = resolve_normal_ct(*pskb, dataoff, pf, protonum, l3proto, proto,
1082 &set_reply, &ctinfo);
1083 if (!ct) {
1084 /* Not valid part of a connection */
1085 NF_CT_STAT_INC(invalid);
1086 return NF_ACCEPT;
1087 }
1088
1089 if (IS_ERR(ct)) {
1090 /* Too stressed to deal. */
1091 NF_CT_STAT_INC(drop);
1092 return NF_DROP;
1093 }
1094
1095 NF_CT_ASSERT((*pskb)->nfct);
1096
1097 ret = proto->packet(ct, *pskb, dataoff, ctinfo, pf, hooknum);
1098 if (ret < 0) {
1099 /* Invalid: inverse of the return code tells
1100 * the netfilter core what to do */
1101 DEBUGP("nf_conntrack_in: Can't track with proto module\n");
1102 nf_conntrack_put((*pskb)->nfct);
1103 (*pskb)->nfct = NULL;
1104 NF_CT_STAT_INC(invalid);
1105 return -ret;
1106 }
1107
1108 if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status))
1109 nf_conntrack_event_cache(IPCT_STATUS, *pskb);
1110
1111 return ret;
1112}
1113
1114int nf_ct_invert_tuplepr(struct nf_conntrack_tuple *inverse,
1115 const struct nf_conntrack_tuple *orig)
1116{
1117 return nf_ct_invert_tuple(inverse, orig,
Pablo Neira Ayusoc1d10ad2006-01-05 12:19:05 -08001118 __nf_ct_l3proto_find(orig->src.l3num),
1119 __nf_ct_proto_find(orig->src.l3num,
1120 orig->dst.protonum));
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001121}
1122
1123/* Would two expected things clash? */
1124static inline int expect_clash(const struct nf_conntrack_expect *a,
1125 const struct nf_conntrack_expect *b)
1126{
1127 /* Part covered by intersection of masks must be unequal,
1128 otherwise they clash */
1129 struct nf_conntrack_tuple intersect_mask;
1130 int count;
1131
1132 intersect_mask.src.l3num = a->mask.src.l3num & b->mask.src.l3num;
1133 intersect_mask.src.u.all = a->mask.src.u.all & b->mask.src.u.all;
1134 intersect_mask.dst.u.all = a->mask.dst.u.all & b->mask.dst.u.all;
1135 intersect_mask.dst.protonum = a->mask.dst.protonum
1136 & b->mask.dst.protonum;
1137
1138 for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){
1139 intersect_mask.src.u3.all[count] =
1140 a->mask.src.u3.all[count] & b->mask.src.u3.all[count];
1141 }
1142
1143 for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){
1144 intersect_mask.dst.u3.all[count] =
1145 a->mask.dst.u3.all[count] & b->mask.dst.u3.all[count];
1146 }
1147
1148 return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask);
1149}
1150
1151static inline int expect_matches(const struct nf_conntrack_expect *a,
1152 const struct nf_conntrack_expect *b)
1153{
1154 return a->master == b->master
1155 && nf_ct_tuple_equal(&a->tuple, &b->tuple)
1156 && nf_ct_tuple_equal(&a->mask, &b->mask);
1157}
1158
1159/* Generally a bad idea to call this: could have matched already. */
1160void nf_conntrack_unexpect_related(struct nf_conntrack_expect *exp)
1161{
1162 struct nf_conntrack_expect *i;
1163
1164 write_lock_bh(&nf_conntrack_lock);
1165 /* choose the the oldest expectation to evict */
1166 list_for_each_entry_reverse(i, &nf_conntrack_expect_list, list) {
1167 if (expect_matches(i, exp) && del_timer(&i->timeout)) {
1168 nf_ct_unlink_expect(i);
1169 write_unlock_bh(&nf_conntrack_lock);
1170 nf_conntrack_expect_put(i);
1171 return;
1172 }
1173 }
1174 write_unlock_bh(&nf_conntrack_lock);
1175}
1176
1177/* We don't increase the master conntrack refcount for non-fulfilled
1178 * conntracks. During the conntrack destruction, the expectations are
1179 * always killed before the conntrack itself */
1180struct nf_conntrack_expect *nf_conntrack_expect_alloc(struct nf_conn *me)
1181{
1182 struct nf_conntrack_expect *new;
1183
1184 new = kmem_cache_alloc(nf_conntrack_expect_cachep, GFP_ATOMIC);
1185 if (!new) {
1186 DEBUGP("expect_related: OOM allocating expect\n");
1187 return NULL;
1188 }
1189 new->master = me;
1190 atomic_set(&new->use, 1);
1191 return new;
1192}
1193
1194void nf_conntrack_expect_put(struct nf_conntrack_expect *exp)
1195{
1196 if (atomic_dec_and_test(&exp->use))
1197 kmem_cache_free(nf_conntrack_expect_cachep, exp);
1198}
1199
1200static void nf_conntrack_expect_insert(struct nf_conntrack_expect *exp)
1201{
Harald Weltedc808fe2006-03-20 17:56:32 -08001202 struct nf_conn_help *master_help = nfct_help(exp->master);
1203
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001204 atomic_inc(&exp->use);
Harald Weltedc808fe2006-03-20 17:56:32 -08001205 master_help->expecting++;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001206 list_add(&exp->list, &nf_conntrack_expect_list);
1207
1208 init_timer(&exp->timeout);
1209 exp->timeout.data = (unsigned long)exp;
1210 exp->timeout.function = expectation_timed_out;
Harald Weltedc808fe2006-03-20 17:56:32 -08001211 exp->timeout.expires = jiffies + master_help->helper->timeout * HZ;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001212 add_timer(&exp->timeout);
1213
Pablo Neira Ayusoc1d10ad2006-01-05 12:19:05 -08001214 exp->id = ++nf_conntrack_expect_next_id;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001215 atomic_inc(&exp->use);
1216 NF_CT_STAT_INC(expect_create);
1217}
1218
1219/* Race with expectations being used means we could have none to find; OK. */
1220static void evict_oldest_expect(struct nf_conn *master)
1221{
1222 struct nf_conntrack_expect *i;
1223
1224 list_for_each_entry_reverse(i, &nf_conntrack_expect_list, list) {
1225 if (i->master == master) {
1226 if (del_timer(&i->timeout)) {
1227 nf_ct_unlink_expect(i);
1228 nf_conntrack_expect_put(i);
1229 }
1230 break;
1231 }
1232 }
1233}
1234
1235static inline int refresh_timer(struct nf_conntrack_expect *i)
1236{
Harald Weltedc808fe2006-03-20 17:56:32 -08001237 struct nf_conn_help *master_help = nfct_help(i->master);
1238
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001239 if (!del_timer(&i->timeout))
1240 return 0;
1241
Harald Weltedc808fe2006-03-20 17:56:32 -08001242 i->timeout.expires = jiffies + master_help->helper->timeout*HZ;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001243 add_timer(&i->timeout);
1244 return 1;
1245}
1246
1247int nf_conntrack_expect_related(struct nf_conntrack_expect *expect)
1248{
1249 struct nf_conntrack_expect *i;
Jesper Juhld695aa82006-01-05 12:16:16 -08001250 struct nf_conn *master = expect->master;
Harald Weltedc808fe2006-03-20 17:56:32 -08001251 struct nf_conn_help *master_help = nfct_help(master);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001252 int ret;
1253
Harald Weltedc808fe2006-03-20 17:56:32 -08001254 NF_CT_ASSERT(master_help);
1255
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001256 DEBUGP("nf_conntrack_expect_related %p\n", related_to);
1257 DEBUGP("tuple: "); NF_CT_DUMP_TUPLE(&expect->tuple);
1258 DEBUGP("mask: "); NF_CT_DUMP_TUPLE(&expect->mask);
1259
1260 write_lock_bh(&nf_conntrack_lock);
1261 list_for_each_entry(i, &nf_conntrack_expect_list, list) {
1262 if (expect_matches(i, expect)) {
1263 /* Refresh timer: if it's dying, ignore.. */
1264 if (refresh_timer(i)) {
1265 ret = 0;
1266 goto out;
1267 }
1268 } else if (expect_clash(i, expect)) {
1269 ret = -EBUSY;
1270 goto out;
1271 }
1272 }
1273 /* Will be over limit? */
Harald Weltedc808fe2006-03-20 17:56:32 -08001274 if (master_help->helper->max_expected &&
1275 master_help->expecting >= master_help->helper->max_expected)
Jesper Juhld695aa82006-01-05 12:16:16 -08001276 evict_oldest_expect(master);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001277
1278 nf_conntrack_expect_insert(expect);
1279 nf_conntrack_expect_event(IPEXP_NEW, expect);
1280 ret = 0;
1281out:
1282 write_unlock_bh(&nf_conntrack_lock);
1283 return ret;
1284}
1285
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001286int nf_conntrack_helper_register(struct nf_conntrack_helper *me)
1287{
1288 int ret;
1289 BUG_ON(me->timeout == 0);
1290
1291 ret = nf_conntrack_register_cache(NF_CT_F_HELP, "nf_conntrack:help",
1292 sizeof(struct nf_conn)
Harald Weltedc808fe2006-03-20 17:56:32 -08001293 + sizeof(struct nf_conn_help)
1294 + __alignof__(struct nf_conn_help));
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001295 if (ret < 0) {
1296 printk(KERN_ERR "nf_conntrack_helper_reigster: Unable to create slab cache for conntracks\n");
1297 return ret;
1298 }
1299 write_lock_bh(&nf_conntrack_lock);
1300 list_prepend(&helpers, me);
1301 write_unlock_bh(&nf_conntrack_lock);
1302
1303 return 0;
1304}
1305
Pablo Neira Ayusoc1d10ad2006-01-05 12:19:05 -08001306struct nf_conntrack_helper *
1307__nf_conntrack_helper_find_byname(const char *name)
1308{
1309 struct nf_conntrack_helper *h;
1310
1311 list_for_each_entry(h, &helpers, list) {
1312 if (!strcmp(h->name, name))
1313 return h;
1314 }
1315
1316 return NULL;
1317}
1318
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001319static inline int unhelp(struct nf_conntrack_tuple_hash *i,
1320 const struct nf_conntrack_helper *me)
1321{
Harald Weltedc808fe2006-03-20 17:56:32 -08001322 struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(i);
1323 struct nf_conn_help *help = nfct_help(ct);
1324
1325 if (help && help->helper == me) {
1326 nf_conntrack_event(IPCT_HELPER, ct);
1327 help->helper = NULL;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001328 }
1329 return 0;
1330}
1331
1332void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
1333{
1334 unsigned int i;
1335 struct nf_conntrack_expect *exp, *tmp;
1336
1337 /* Need write lock here, to delete helper. */
1338 write_lock_bh(&nf_conntrack_lock);
1339 LIST_DELETE(&helpers, me);
1340
1341 /* Get rid of expectations */
1342 list_for_each_entry_safe(exp, tmp, &nf_conntrack_expect_list, list) {
Harald Weltedc808fe2006-03-20 17:56:32 -08001343 struct nf_conn_help *help = nfct_help(exp->master);
1344 if (help->helper == me && del_timer(&exp->timeout)) {
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001345 nf_ct_unlink_expect(exp);
1346 nf_conntrack_expect_put(exp);
1347 }
1348 }
1349
1350 /* Get rid of expecteds, set helpers to NULL. */
1351 LIST_FIND_W(&unconfirmed, unhelp, struct nf_conntrack_tuple_hash*, me);
1352 for (i = 0; i < nf_conntrack_htable_size; i++)
1353 LIST_FIND_W(&nf_conntrack_hash[i], unhelp,
1354 struct nf_conntrack_tuple_hash *, me);
1355 write_unlock_bh(&nf_conntrack_lock);
1356
1357 /* Someone could be still looking at the helper in a bh. */
1358 synchronize_net();
1359}
1360
1361/* Refresh conntrack for this many jiffies and do accounting if do_acct is 1 */
1362void __nf_ct_refresh_acct(struct nf_conn *ct,
1363 enum ip_conntrack_info ctinfo,
1364 const struct sk_buff *skb,
1365 unsigned long extra_jiffies,
1366 int do_acct)
1367{
1368 int event = 0;
1369
1370 NF_CT_ASSERT(ct->timeout.data == (unsigned long)ct);
1371 NF_CT_ASSERT(skb);
1372
1373 write_lock_bh(&nf_conntrack_lock);
1374
1375 /* If not in hash table, timer will not be active yet */
1376 if (!nf_ct_is_confirmed(ct)) {
1377 ct->timeout.expires = extra_jiffies;
1378 event = IPCT_REFRESH;
1379 } else {
1380 /* Need del_timer for race avoidance (may already be dying). */
1381 if (del_timer(&ct->timeout)) {
1382 ct->timeout.expires = jiffies + extra_jiffies;
1383 add_timer(&ct->timeout);
1384 event = IPCT_REFRESH;
1385 }
1386 }
1387
1388#ifdef CONFIG_NF_CT_ACCT
1389 if (do_acct) {
1390 ct->counters[CTINFO2DIR(ctinfo)].packets++;
1391 ct->counters[CTINFO2DIR(ctinfo)].bytes +=
1392 skb->len - (unsigned int)(skb->nh.raw - skb->data);
1393 if ((ct->counters[CTINFO2DIR(ctinfo)].packets & 0x80000000)
1394 || (ct->counters[CTINFO2DIR(ctinfo)].bytes & 0x80000000))
1395 event |= IPCT_COUNTER_FILLING;
1396 }
1397#endif
1398
1399 write_unlock_bh(&nf_conntrack_lock);
1400
1401 /* must be unlocked when calling event cache */
1402 if (event)
1403 nf_conntrack_event_cache(event, skb);
1404}
1405
Pablo Neira Ayusoc1d10ad2006-01-05 12:19:05 -08001406#if defined(CONFIG_NF_CT_NETLINK) || \
1407 defined(CONFIG_NF_CT_NETLINK_MODULE)
1408
1409#include <linux/netfilter/nfnetlink.h>
1410#include <linux/netfilter/nfnetlink_conntrack.h>
1411
1412/* Generic function for tcp/udp/sctp/dccp and alike. This needs to be
1413 * in ip_conntrack_core, since we don't want the protocols to autoload
1414 * or depend on ctnetlink */
1415int nf_ct_port_tuple_to_nfattr(struct sk_buff *skb,
1416 const struct nf_conntrack_tuple *tuple)
1417{
1418 NFA_PUT(skb, CTA_PROTO_SRC_PORT, sizeof(u_int16_t),
1419 &tuple->src.u.tcp.port);
1420 NFA_PUT(skb, CTA_PROTO_DST_PORT, sizeof(u_int16_t),
1421 &tuple->dst.u.tcp.port);
1422 return 0;
1423
1424nfattr_failure:
1425 return -1;
1426}
1427
1428static const size_t cta_min_proto[CTA_PROTO_MAX] = {
1429 [CTA_PROTO_SRC_PORT-1] = sizeof(u_int16_t),
1430 [CTA_PROTO_DST_PORT-1] = sizeof(u_int16_t)
1431};
1432
1433int nf_ct_port_nfattr_to_tuple(struct nfattr *tb[],
1434 struct nf_conntrack_tuple *t)
1435{
1436 if (!tb[CTA_PROTO_SRC_PORT-1] || !tb[CTA_PROTO_DST_PORT-1])
1437 return -EINVAL;
1438
1439 if (nfattr_bad_size(tb, CTA_PROTO_MAX, cta_min_proto))
1440 return -EINVAL;
1441
1442 t->src.u.tcp.port =
1443 *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_SRC_PORT-1]);
1444 t->dst.u.tcp.port =
1445 *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_DST_PORT-1]);
1446
1447 return 0;
1448}
1449#endif
1450
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001451/* Used by ipt_REJECT and ip6t_REJECT. */
1452void __nf_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb)
1453{
1454 struct nf_conn *ct;
1455 enum ip_conntrack_info ctinfo;
1456
1457 /* This ICMP is in reverse direction to the packet which caused it */
1458 ct = nf_ct_get(skb, &ctinfo);
1459 if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL)
1460 ctinfo = IP_CT_RELATED + IP_CT_IS_REPLY;
1461 else
1462 ctinfo = IP_CT_RELATED;
1463
1464 /* Attach to new skbuff, and increment count */
1465 nskb->nfct = &ct->ct_general;
1466 nskb->nfctinfo = ctinfo;
1467 nf_conntrack_get(nskb->nfct);
1468}
1469
1470static inline int
1471do_iter(const struct nf_conntrack_tuple_hash *i,
1472 int (*iter)(struct nf_conn *i, void *data),
1473 void *data)
1474{
1475 return iter(nf_ct_tuplehash_to_ctrack(i), data);
1476}
1477
1478/* Bring out ya dead! */
1479static struct nf_conntrack_tuple_hash *
1480get_next_corpse(int (*iter)(struct nf_conn *i, void *data),
1481 void *data, unsigned int *bucket)
1482{
1483 struct nf_conntrack_tuple_hash *h = NULL;
1484
1485 write_lock_bh(&nf_conntrack_lock);
1486 for (; *bucket < nf_conntrack_htable_size; (*bucket)++) {
1487 h = LIST_FIND_W(&nf_conntrack_hash[*bucket], do_iter,
1488 struct nf_conntrack_tuple_hash *, iter, data);
1489 if (h)
1490 break;
1491 }
1492 if (!h)
1493 h = LIST_FIND_W(&unconfirmed, do_iter,
1494 struct nf_conntrack_tuple_hash *, iter, data);
1495 if (h)
1496 atomic_inc(&nf_ct_tuplehash_to_ctrack(h)->ct_general.use);
1497 write_unlock_bh(&nf_conntrack_lock);
1498
1499 return h;
1500}
1501
1502void
1503nf_ct_iterate_cleanup(int (*iter)(struct nf_conn *i, void *data), void *data)
1504{
1505 struct nf_conntrack_tuple_hash *h;
1506 unsigned int bucket = 0;
1507
1508 while ((h = get_next_corpse(iter, data, &bucket)) != NULL) {
1509 struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
1510 /* Time to push up daises... */
1511 if (del_timer(&ct->timeout))
1512 death_by_timeout((unsigned long)ct);
1513 /* ... else the timer will get him soon. */
1514
1515 nf_ct_put(ct);
1516 }
1517}
1518
1519static int kill_all(struct nf_conn *i, void *data)
1520{
1521 return 1;
1522}
1523
1524static void free_conntrack_hash(struct list_head *hash, int vmalloced, int size)
1525{
1526 if (vmalloced)
1527 vfree(hash);
1528 else
1529 free_pages((unsigned long)hash,
1530 get_order(sizeof(struct list_head) * size));
1531}
1532
Pablo Neira Ayusoc1d10ad2006-01-05 12:19:05 -08001533void nf_conntrack_flush()
1534{
1535 nf_ct_iterate_cleanup(kill_all, NULL);
1536}
1537
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001538/* Mishearing the voices in his head, our hero wonders how he's
1539 supposed to kill the mall. */
1540void nf_conntrack_cleanup(void)
1541{
1542 int i;
1543
Yasuyuki Kozakai7d3cdc62006-02-15 15:22:21 -08001544 ip_ct_attach = NULL;
1545
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001546 /* This makes sure all current packets have passed through
1547 netfilter framework. Roll on, two-stage module
1548 delete... */
1549 synchronize_net();
1550
1551 nf_ct_event_cache_flush();
1552 i_see_dead_people:
Pablo Neira Ayusoc1d10ad2006-01-05 12:19:05 -08001553 nf_conntrack_flush();
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001554 if (atomic_read(&nf_conntrack_count) != 0) {
1555 schedule();
1556 goto i_see_dead_people;
1557 }
Patrick McHardy66365682005-12-05 13:36:50 -08001558 /* wait until all references to nf_conntrack_untracked are dropped */
1559 while (atomic_read(&nf_conntrack_untracked.ct_general.use) > 1)
1560 schedule();
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001561
1562 for (i = 0; i < NF_CT_F_NUM; i++) {
1563 if (nf_ct_cache[i].use == 0)
1564 continue;
1565
1566 NF_CT_ASSERT(nf_ct_cache[i].use == 1);
1567 nf_ct_cache[i].use = 1;
1568 nf_conntrack_unregister_cache(i);
1569 }
1570 kmem_cache_destroy(nf_conntrack_expect_cachep);
1571 free_conntrack_hash(nf_conntrack_hash, nf_conntrack_vmalloc,
1572 nf_conntrack_htable_size);
KOVACS Krisztian5a6f294e42005-11-15 16:47:34 -08001573
1574 /* free l3proto protocol tables */
1575 for (i = 0; i < PF_MAX; i++)
1576 if (nf_ct_protos[i]) {
1577 kfree(nf_ct_protos[i]);
1578 nf_ct_protos[i] = NULL;
1579 }
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001580}
1581
1582static struct list_head *alloc_hashtable(int size, int *vmalloced)
1583{
1584 struct list_head *hash;
1585 unsigned int i;
1586
1587 *vmalloced = 0;
1588 hash = (void*)__get_free_pages(GFP_KERNEL,
1589 get_order(sizeof(struct list_head)
1590 * size));
1591 if (!hash) {
1592 *vmalloced = 1;
1593 printk(KERN_WARNING "nf_conntrack: falling back to vmalloc.\n");
1594 hash = vmalloc(sizeof(struct list_head) * size);
1595 }
1596
1597 if (hash)
1598 for (i = 0; i < size; i++)
1599 INIT_LIST_HEAD(&hash[i]);
1600
1601 return hash;
1602}
1603
1604int set_hashsize(const char *val, struct kernel_param *kp)
1605{
1606 int i, bucket, hashsize, vmalloced;
1607 int old_vmalloced, old_size;
1608 int rnd;
1609 struct list_head *hash, *old_hash;
1610 struct nf_conntrack_tuple_hash *h;
1611
1612 /* On boot, we can set this without any fancy locking. */
1613 if (!nf_conntrack_htable_size)
1614 return param_set_uint(val, kp);
1615
1616 hashsize = simple_strtol(val, NULL, 0);
1617 if (!hashsize)
1618 return -EINVAL;
1619
1620 hash = alloc_hashtable(hashsize, &vmalloced);
1621 if (!hash)
1622 return -ENOMEM;
1623
1624 /* We have to rehahs for the new table anyway, so we also can
1625 * use a newrandom seed */
1626 get_random_bytes(&rnd, 4);
1627
1628 write_lock_bh(&nf_conntrack_lock);
1629 for (i = 0; i < nf_conntrack_htable_size; i++) {
1630 while (!list_empty(&nf_conntrack_hash[i])) {
1631 h = list_entry(nf_conntrack_hash[i].next,
1632 struct nf_conntrack_tuple_hash, list);
1633 list_del(&h->list);
1634 bucket = __hash_conntrack(&h->tuple, hashsize, rnd);
1635 list_add_tail(&h->list, &hash[bucket]);
1636 }
1637 }
1638 old_size = nf_conntrack_htable_size;
1639 old_vmalloced = nf_conntrack_vmalloc;
1640 old_hash = nf_conntrack_hash;
1641
1642 nf_conntrack_htable_size = hashsize;
1643 nf_conntrack_vmalloc = vmalloced;
1644 nf_conntrack_hash = hash;
1645 nf_conntrack_hash_rnd = rnd;
1646 write_unlock_bh(&nf_conntrack_lock);
1647
1648 free_conntrack_hash(old_hash, old_vmalloced, old_size);
1649 return 0;
1650}
1651
1652module_param_call(hashsize, set_hashsize, param_get_uint,
1653 &nf_conntrack_htable_size, 0600);
1654
1655int __init nf_conntrack_init(void)
1656{
1657 unsigned int i;
1658 int ret;
1659
1660 /* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB
1661 * machine has 256 buckets. >= 1GB machines have 8192 buckets. */
1662 if (!nf_conntrack_htable_size) {
1663 nf_conntrack_htable_size
1664 = (((num_physpages << PAGE_SHIFT) / 16384)
1665 / sizeof(struct list_head));
1666 if (num_physpages > (1024 * 1024 * 1024 / PAGE_SIZE))
1667 nf_conntrack_htable_size = 8192;
1668 if (nf_conntrack_htable_size < 16)
1669 nf_conntrack_htable_size = 16;
1670 }
1671 nf_conntrack_max = 8 * nf_conntrack_htable_size;
1672
1673 printk("nf_conntrack version %s (%u buckets, %d max)\n",
1674 NF_CONNTRACK_VERSION, nf_conntrack_htable_size,
1675 nf_conntrack_max);
1676
1677 nf_conntrack_hash = alloc_hashtable(nf_conntrack_htable_size,
1678 &nf_conntrack_vmalloc);
1679 if (!nf_conntrack_hash) {
1680 printk(KERN_ERR "Unable to create nf_conntrack_hash\n");
1681 goto err_out;
1682 }
1683
1684 ret = nf_conntrack_register_cache(NF_CT_F_BASIC, "nf_conntrack:basic",
Harald Weltedc808fe2006-03-20 17:56:32 -08001685 sizeof(struct nf_conn));
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001686 if (ret < 0) {
1687 printk(KERN_ERR "Unable to create nf_conn slab cache\n");
1688 goto err_free_hash;
1689 }
1690
1691 nf_conntrack_expect_cachep = kmem_cache_create("nf_conntrack_expect",
1692 sizeof(struct nf_conntrack_expect),
1693 0, 0, NULL, NULL);
1694 if (!nf_conntrack_expect_cachep) {
1695 printk(KERN_ERR "Unable to create nf_expect slab cache\n");
1696 goto err_free_conntrack_slab;
1697 }
1698
1699 /* Don't NEED lock here, but good form anyway. */
1700 write_lock_bh(&nf_conntrack_lock);
1701 for (i = 0; i < PF_MAX; i++)
1702 nf_ct_l3protos[i] = &nf_conntrack_generic_l3proto;
1703 write_unlock_bh(&nf_conntrack_lock);
1704
Yasuyuki Kozakai7d3cdc62006-02-15 15:22:21 -08001705 /* For use by REJECT target */
1706 ip_ct_attach = __nf_conntrack_attach;
1707
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001708 /* Set up fake conntrack:
1709 - to never be deleted, not in any hashes */
1710 atomic_set(&nf_conntrack_untracked.ct_general.use, 1);
1711 /* - and look it like as a confirmed connection */
1712 set_bit(IPS_CONFIRMED_BIT, &nf_conntrack_untracked.status);
1713
1714 return ret;
1715
1716err_free_conntrack_slab:
1717 nf_conntrack_unregister_cache(NF_CT_F_BASIC);
1718err_free_hash:
1719 free_conntrack_hash(nf_conntrack_hash, nf_conntrack_vmalloc,
1720 nf_conntrack_htable_size);
1721err_out:
1722 return -ENOMEM;
1723}