blob: 927137b8b3b5d520339743f9b197c5e310682d02 [file] [log] [blame]
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001/* Connection state tracking for netfilter. This is separated from,
2 but required by, the NAT layer; it can also be used by an iptables
3 extension. */
4
5/* (C) 1999-2001 Paul `Rusty' Russell
Harald Weltedc808fe2006-03-20 17:56:32 -08006 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08007 * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License version 2 as
11 * published by the Free Software Foundation.
12 *
13 * 23 Apr 2001: Harald Welte <laforge@gnumonks.org>
14 * - new API and handling of conntrack/nat helpers
15 * - now capable of multiple expectations for one master
16 * 16 Jul 2002: Harald Welte <laforge@gnumonks.org>
17 * - add usage/reference counts to ip_conntrack_expect
18 * - export ip_conntrack[_expect]_{find_get,put} functions
19 * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
20 * - generalize L3 protocol denendent part.
21 * 23 Mar 2004: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
22 * - add support various size of conntrack structures.
Harald Weltedc808fe2006-03-20 17:56:32 -080023 * 26 Jan 2006: Harald Welte <laforge@netfilter.org>
24 * - restructure nf_conn (introduce nf_conn_help)
25 * - redesign 'features' how they were originally intended
Pablo Neira Ayusob9f78f92006-03-22 13:56:08 -080026 * 26 Feb 2006: Pablo Neira Ayuso <pablo@eurodev.net>
27 * - add support for L3 protocol module load on demand.
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -080028 *
29 * Derived from net/ipv4/netfilter/ip_conntrack_core.c
30 */
31
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -080032#include <linux/types.h>
33#include <linux/netfilter.h>
34#include <linux/module.h>
35#include <linux/skbuff.h>
36#include <linux/proc_fs.h>
37#include <linux/vmalloc.h>
38#include <linux/stddef.h>
39#include <linux/slab.h>
40#include <linux/random.h>
41#include <linux/jhash.h>
42#include <linux/err.h>
43#include <linux/percpu.h>
44#include <linux/moduleparam.h>
45#include <linux/notifier.h>
46#include <linux/kernel.h>
47#include <linux/netdevice.h>
48#include <linux/socket.h>
49
50/* This rwlock protects the main hash table, protocol/helper/expected
51 registrations, conntrack timers*/
52#define ASSERT_READ_LOCK(x)
53#define ASSERT_WRITE_LOCK(x)
54
55#include <net/netfilter/nf_conntrack.h>
56#include <net/netfilter/nf_conntrack_l3proto.h>
57#include <net/netfilter/nf_conntrack_protocol.h>
58#include <net/netfilter/nf_conntrack_helper.h>
59#include <net/netfilter/nf_conntrack_core.h>
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -080060
Harald Weltedc808fe2006-03-20 17:56:32 -080061#define NF_CONNTRACK_VERSION "0.5.0"
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -080062
63#if 0
64#define DEBUGP printk
65#else
66#define DEBUGP(format, args...)
67#endif
68
69DEFINE_RWLOCK(nf_conntrack_lock);
70
71/* nf_conntrack_standalone needs this */
72atomic_t nf_conntrack_count = ATOMIC_INIT(0);
73
74void (*nf_conntrack_destroyed)(struct nf_conn *conntrack) = NULL;
75LIST_HEAD(nf_conntrack_expect_list);
76struct nf_conntrack_protocol **nf_ct_protos[PF_MAX];
77struct nf_conntrack_l3proto *nf_ct_l3protos[PF_MAX];
78static LIST_HEAD(helpers);
Brian Haley94aec082006-09-18 00:05:22 -070079unsigned int nf_conntrack_htable_size __read_mostly = 0;
80int nf_conntrack_max __read_mostly;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -080081struct list_head *nf_conntrack_hash;
82static kmem_cache_t *nf_conntrack_expect_cachep;
83struct nf_conn nf_conntrack_untracked;
Brian Haley94aec082006-09-18 00:05:22 -070084unsigned int nf_ct_log_invalid __read_mostly;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -080085static LIST_HEAD(unconfirmed);
86static int nf_conntrack_vmalloc;
87
Pablo Neira Ayuso4e3882f2006-03-22 13:55:11 -080088static unsigned int nf_conntrack_next_id;
89static unsigned int nf_conntrack_expect_next_id;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -080090#ifdef CONFIG_NF_CONNTRACK_EVENTS
Alan Sterne041c682006-03-27 01:16:30 -080091ATOMIC_NOTIFIER_HEAD(nf_conntrack_chain);
92ATOMIC_NOTIFIER_HEAD(nf_conntrack_expect_chain);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -080093
94DEFINE_PER_CPU(struct nf_conntrack_ecache, nf_conntrack_ecache);
95
96/* deliver cached events and clear cache entry - must be called with locally
97 * disabled softirqs */
98static inline void
99__nf_ct_deliver_cached_events(struct nf_conntrack_ecache *ecache)
100{
101 DEBUGP("ecache: delivering events for %p\n", ecache->ct);
102 if (nf_ct_is_confirmed(ecache->ct) && !nf_ct_is_dying(ecache->ct)
103 && ecache->events)
Alan Sterne041c682006-03-27 01:16:30 -0800104 atomic_notifier_call_chain(&nf_conntrack_chain, ecache->events,
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800105 ecache->ct);
106
107 ecache->events = 0;
108 nf_ct_put(ecache->ct);
109 ecache->ct = NULL;
110}
111
112/* Deliver all cached events for a particular conntrack. This is called
113 * by code prior to async packet handling for freeing the skb */
114void nf_ct_deliver_cached_events(const struct nf_conn *ct)
115{
116 struct nf_conntrack_ecache *ecache;
117
118 local_bh_disable();
119 ecache = &__get_cpu_var(nf_conntrack_ecache);
120 if (ecache->ct == ct)
121 __nf_ct_deliver_cached_events(ecache);
122 local_bh_enable();
123}
124
125/* Deliver cached events for old pending events, if current conntrack != old */
126void __nf_ct_event_cache_init(struct nf_conn *ct)
127{
128 struct nf_conntrack_ecache *ecache;
129
130 /* take care of delivering potentially old events */
131 ecache = &__get_cpu_var(nf_conntrack_ecache);
132 BUG_ON(ecache->ct == ct);
133 if (ecache->ct)
134 __nf_ct_deliver_cached_events(ecache);
135 /* initialize for this conntrack/packet */
136 ecache->ct = ct;
137 nf_conntrack_get(&ct->ct_general);
138}
139
140/* flush the event cache - touches other CPU's data and must not be called
141 * while packets are still passing through the code */
142static void nf_ct_event_cache_flush(void)
143{
144 struct nf_conntrack_ecache *ecache;
145 int cpu;
146
KAMEZAWA Hiroyuki6f912042006-04-10 22:52:50 -0700147 for_each_possible_cpu(cpu) {
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800148 ecache = &per_cpu(nf_conntrack_ecache, cpu);
149 if (ecache->ct)
150 nf_ct_put(ecache->ct);
151 }
152}
153#else
154static inline void nf_ct_event_cache_flush(void) {}
155#endif /* CONFIG_NF_CONNTRACK_EVENTS */
156
157DEFINE_PER_CPU(struct ip_conntrack_stat, nf_conntrack_stat);
158EXPORT_PER_CPU_SYMBOL(nf_conntrack_stat);
159
160/*
161 * This scheme offers various size of "struct nf_conn" dependent on
162 * features(helper, nat, ...)
163 */
164
165#define NF_CT_FEATURES_NAMELEN 256
166static struct {
167 /* name of slab cache. printed in /proc/slabinfo */
168 char *name;
169
170 /* size of slab cache */
171 size_t size;
172
173 /* slab cache pointer */
174 kmem_cache_t *cachep;
175
176 /* allocated slab cache + modules which uses this slab cache */
177 int use;
178
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800179} nf_ct_cache[NF_CT_F_NUM];
180
181/* protect members of nf_ct_cache except of "use" */
182DEFINE_RWLOCK(nf_ct_cache_lock);
183
184/* This avoids calling kmem_cache_create() with same name simultaneously */
Ingo Molnar57b47a52006-03-20 22:35:41 -0800185static DEFINE_MUTEX(nf_ct_cache_mutex);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800186
187extern struct nf_conntrack_protocol nf_conntrack_generic_protocol;
188struct nf_conntrack_protocol *
Pablo Neira Ayusoc1d10ad2006-01-05 12:19:05 -0800189__nf_ct_proto_find(u_int16_t l3proto, u_int8_t protocol)
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800190{
Yasuyuki Kozakaiddc8d022006-02-04 02:12:14 -0800191 if (unlikely(l3proto >= AF_MAX || nf_ct_protos[l3proto] == NULL))
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800192 return &nf_conntrack_generic_protocol;
193
194 return nf_ct_protos[l3proto][protocol];
195}
196
Pablo Neira Ayusoc1d10ad2006-01-05 12:19:05 -0800197/* this is guaranteed to always return a valid protocol helper, since
198 * it falls back to generic_protocol */
199struct nf_conntrack_protocol *
200nf_ct_proto_find_get(u_int16_t l3proto, u_int8_t protocol)
201{
202 struct nf_conntrack_protocol *p;
203
204 preempt_disable();
205 p = __nf_ct_proto_find(l3proto, protocol);
Yasuyuki Kozakaie1bbdeb2006-04-24 17:15:17 -0700206 if (!try_module_get(p->me))
207 p = &nf_conntrack_generic_protocol;
Pablo Neira Ayusoc1d10ad2006-01-05 12:19:05 -0800208 preempt_enable();
209
210 return p;
211}
212
213void nf_ct_proto_put(struct nf_conntrack_protocol *p)
214{
215 module_put(p->me);
216}
217
218struct nf_conntrack_l3proto *
219nf_ct_l3proto_find_get(u_int16_t l3proto)
220{
221 struct nf_conntrack_l3proto *p;
222
223 preempt_disable();
224 p = __nf_ct_l3proto_find(l3proto);
Yasuyuki Kozakaie1bbdeb2006-04-24 17:15:17 -0700225 if (!try_module_get(p->me))
226 p = &nf_conntrack_generic_l3proto;
Pablo Neira Ayusoc1d10ad2006-01-05 12:19:05 -0800227 preempt_enable();
228
229 return p;
230}
231
232void nf_ct_l3proto_put(struct nf_conntrack_l3proto *p)
233{
234 module_put(p->me);
235}
236
Pablo Neira Ayusob9f78f92006-03-22 13:56:08 -0800237int
238nf_ct_l3proto_try_module_get(unsigned short l3proto)
239{
240 int ret;
241 struct nf_conntrack_l3proto *p;
242
243retry: p = nf_ct_l3proto_find_get(l3proto);
244 if (p == &nf_conntrack_generic_l3proto) {
245 ret = request_module("nf_conntrack-%d", l3proto);
246 if (!ret)
247 goto retry;
248
249 return -EPROTOTYPE;
250 }
251
252 return 0;
253}
254
255void nf_ct_l3proto_module_put(unsigned short l3proto)
256{
257 struct nf_conntrack_l3proto *p;
258
259 preempt_disable();
260 p = __nf_ct_l3proto_find(l3proto);
261 preempt_enable();
262
263 module_put(p->me);
264}
265
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800266static int nf_conntrack_hash_rnd_initted;
267static unsigned int nf_conntrack_hash_rnd;
268
269static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple,
270 unsigned int size, unsigned int rnd)
271{
272 unsigned int a, b;
273 a = jhash((void *)tuple->src.u3.all, sizeof(tuple->src.u3.all),
274 ((tuple->src.l3num) << 16) | tuple->dst.protonum);
275 b = jhash((void *)tuple->dst.u3.all, sizeof(tuple->dst.u3.all),
276 (tuple->src.u.all << 16) | tuple->dst.u.all);
277
278 return jhash_2words(a, b, rnd) % size;
279}
280
281static inline u_int32_t hash_conntrack(const struct nf_conntrack_tuple *tuple)
282{
283 return __hash_conntrack(tuple, nf_conntrack_htable_size,
284 nf_conntrack_hash_rnd);
285}
286
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800287int nf_conntrack_register_cache(u_int32_t features, const char *name,
Harald Weltedc808fe2006-03-20 17:56:32 -0800288 size_t size)
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800289{
290 int ret = 0;
291 char *cache_name;
292 kmem_cache_t *cachep;
293
294 DEBUGP("nf_conntrack_register_cache: features=0x%x, name=%s, size=%d\n",
295 features, name, size);
296
297 if (features < NF_CT_F_BASIC || features >= NF_CT_F_NUM) {
298 DEBUGP("nf_conntrack_register_cache: invalid features.: 0x%x\n",
299 features);
300 return -EINVAL;
301 }
302
Ingo Molnar57b47a52006-03-20 22:35:41 -0800303 mutex_lock(&nf_ct_cache_mutex);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800304
305 write_lock_bh(&nf_ct_cache_lock);
306 /* e.g: multiple helpers are loaded */
307 if (nf_ct_cache[features].use > 0) {
308 DEBUGP("nf_conntrack_register_cache: already resisterd.\n");
309 if ((!strncmp(nf_ct_cache[features].name, name,
310 NF_CT_FEATURES_NAMELEN))
Harald Weltedc808fe2006-03-20 17:56:32 -0800311 && nf_ct_cache[features].size == size) {
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800312 DEBUGP("nf_conntrack_register_cache: reusing.\n");
313 nf_ct_cache[features].use++;
314 ret = 0;
315 } else
316 ret = -EBUSY;
317
318 write_unlock_bh(&nf_ct_cache_lock);
Ingo Molnar57b47a52006-03-20 22:35:41 -0800319 mutex_unlock(&nf_ct_cache_mutex);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800320 return ret;
321 }
322 write_unlock_bh(&nf_ct_cache_lock);
323
324 /*
325 * The memory space for name of slab cache must be alive until
326 * cache is destroyed.
327 */
328 cache_name = kmalloc(sizeof(char)*NF_CT_FEATURES_NAMELEN, GFP_ATOMIC);
329 if (cache_name == NULL) {
330 DEBUGP("nf_conntrack_register_cache: can't alloc cache_name\n");
331 ret = -ENOMEM;
332 goto out_up_mutex;
333 }
334
335 if (strlcpy(cache_name, name, NF_CT_FEATURES_NAMELEN)
336 >= NF_CT_FEATURES_NAMELEN) {
337 printk("nf_conntrack_register_cache: name too long\n");
338 ret = -EINVAL;
339 goto out_free_name;
340 }
341
342 cachep = kmem_cache_create(cache_name, size, 0, 0,
343 NULL, NULL);
344 if (!cachep) {
345 printk("nf_conntrack_register_cache: Can't create slab cache "
346 "for the features = 0x%x\n", features);
347 ret = -ENOMEM;
348 goto out_free_name;
349 }
350
351 write_lock_bh(&nf_ct_cache_lock);
352 nf_ct_cache[features].use = 1;
353 nf_ct_cache[features].size = size;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800354 nf_ct_cache[features].cachep = cachep;
355 nf_ct_cache[features].name = cache_name;
356 write_unlock_bh(&nf_ct_cache_lock);
357
358 goto out_up_mutex;
359
360out_free_name:
361 kfree(cache_name);
362out_up_mutex:
Ingo Molnar57b47a52006-03-20 22:35:41 -0800363 mutex_unlock(&nf_ct_cache_mutex);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800364 return ret;
365}
366
367/* FIXME: In the current, only nf_conntrack_cleanup() can call this function. */
368void nf_conntrack_unregister_cache(u_int32_t features)
369{
370 kmem_cache_t *cachep;
371 char *name;
372
373 /*
374 * This assures that kmem_cache_create() isn't called before destroying
375 * slab cache.
376 */
377 DEBUGP("nf_conntrack_unregister_cache: 0x%04x\n", features);
Ingo Molnar57b47a52006-03-20 22:35:41 -0800378 mutex_lock(&nf_ct_cache_mutex);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800379
380 write_lock_bh(&nf_ct_cache_lock);
381 if (--nf_ct_cache[features].use > 0) {
382 write_unlock_bh(&nf_ct_cache_lock);
Ingo Molnar57b47a52006-03-20 22:35:41 -0800383 mutex_unlock(&nf_ct_cache_mutex);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800384 return;
385 }
386 cachep = nf_ct_cache[features].cachep;
387 name = nf_ct_cache[features].name;
388 nf_ct_cache[features].cachep = NULL;
389 nf_ct_cache[features].name = NULL;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800390 nf_ct_cache[features].size = 0;
391 write_unlock_bh(&nf_ct_cache_lock);
392
393 synchronize_net();
394
395 kmem_cache_destroy(cachep);
396 kfree(name);
397
Ingo Molnar57b47a52006-03-20 22:35:41 -0800398 mutex_unlock(&nf_ct_cache_mutex);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800399}
400
401int
402nf_ct_get_tuple(const struct sk_buff *skb,
403 unsigned int nhoff,
404 unsigned int dataoff,
405 u_int16_t l3num,
406 u_int8_t protonum,
407 struct nf_conntrack_tuple *tuple,
408 const struct nf_conntrack_l3proto *l3proto,
409 const struct nf_conntrack_protocol *protocol)
410{
411 NF_CT_TUPLE_U_BLANK(tuple);
412
413 tuple->src.l3num = l3num;
414 if (l3proto->pkt_to_tuple(skb, nhoff, tuple) == 0)
415 return 0;
416
417 tuple->dst.protonum = protonum;
418 tuple->dst.dir = IP_CT_DIR_ORIGINAL;
419
420 return protocol->pkt_to_tuple(skb, dataoff, tuple);
421}
422
423int
424nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse,
425 const struct nf_conntrack_tuple *orig,
426 const struct nf_conntrack_l3proto *l3proto,
427 const struct nf_conntrack_protocol *protocol)
428{
429 NF_CT_TUPLE_U_BLANK(inverse);
430
431 inverse->src.l3num = orig->src.l3num;
432 if (l3proto->invert_tuple(inverse, orig) == 0)
433 return 0;
434
435 inverse->dst.dir = !orig->dst.dir;
436
437 inverse->dst.protonum = orig->dst.protonum;
438 return protocol->invert_tuple(inverse, orig);
439}
440
441/* nf_conntrack_expect helper functions */
Pablo Neira Ayusoc1d10ad2006-01-05 12:19:05 -0800442void nf_ct_unlink_expect(struct nf_conntrack_expect *exp)
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800443{
Harald Weltedc808fe2006-03-20 17:56:32 -0800444 struct nf_conn_help *master_help = nfct_help(exp->master);
445
446 NF_CT_ASSERT(master_help);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800447 ASSERT_WRITE_LOCK(&nf_conntrack_lock);
Patrick McHardy4a59a812005-11-16 23:14:19 -0800448 NF_CT_ASSERT(!timer_pending(&exp->timeout));
Harald Weltedc808fe2006-03-20 17:56:32 -0800449
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800450 list_del(&exp->list);
451 NF_CT_STAT_INC(expect_delete);
Harald Weltedc808fe2006-03-20 17:56:32 -0800452 master_help->expecting--;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800453 nf_conntrack_expect_put(exp);
454}
455
456static void expectation_timed_out(unsigned long ul_expect)
457{
458 struct nf_conntrack_expect *exp = (void *)ul_expect;
459
460 write_lock_bh(&nf_conntrack_lock);
461 nf_ct_unlink_expect(exp);
462 write_unlock_bh(&nf_conntrack_lock);
463 nf_conntrack_expect_put(exp);
464}
465
Pablo Neira Ayusoc1d10ad2006-01-05 12:19:05 -0800466struct nf_conntrack_expect *
467__nf_conntrack_expect_find(const struct nf_conntrack_tuple *tuple)
468{
469 struct nf_conntrack_expect *i;
470
471 list_for_each_entry(i, &nf_conntrack_expect_list, list) {
472 if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)) {
473 atomic_inc(&i->use);
474 return i;
475 }
476 }
477 return NULL;
478}
479
480/* Just find a expectation corresponding to a tuple. */
481struct nf_conntrack_expect *
482nf_conntrack_expect_find(const struct nf_conntrack_tuple *tuple)
483{
484 struct nf_conntrack_expect *i;
485
486 read_lock_bh(&nf_conntrack_lock);
487 i = __nf_conntrack_expect_find(tuple);
488 read_unlock_bh(&nf_conntrack_lock);
489
490 return i;
491}
492
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800493/* If an expectation for this connection is found, it gets delete from
494 * global list then returned. */
495static struct nf_conntrack_expect *
496find_expectation(const struct nf_conntrack_tuple *tuple)
497{
498 struct nf_conntrack_expect *i;
499
500 list_for_each_entry(i, &nf_conntrack_expect_list, list) {
501 /* If master is not in hash table yet (ie. packet hasn't left
502 this machine yet), how can other end know about expected?
503 Hence these are not the droids you are looking for (if
504 master ct never got confirmed, we'd hold a reference to it
505 and weird things would happen to future packets). */
506 if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)
507 && nf_ct_is_confirmed(i->master)) {
508 if (i->flags & NF_CT_EXPECT_PERMANENT) {
509 atomic_inc(&i->use);
510 return i;
511 } else if (del_timer(&i->timeout)) {
512 nf_ct_unlink_expect(i);
513 return i;
514 }
515 }
516 }
517 return NULL;
518}
519
520/* delete all expectations for this conntrack */
Pablo Neira Ayusoc1d10ad2006-01-05 12:19:05 -0800521void nf_ct_remove_expectations(struct nf_conn *ct)
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800522{
523 struct nf_conntrack_expect *i, *tmp;
Harald Weltedc808fe2006-03-20 17:56:32 -0800524 struct nf_conn_help *help = nfct_help(ct);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800525
526 /* Optimization: most connection never expect any others. */
Harald Weltedc808fe2006-03-20 17:56:32 -0800527 if (!help || help->expecting == 0)
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800528 return;
529
530 list_for_each_entry_safe(i, tmp, &nf_conntrack_expect_list, list) {
531 if (i->master == ct && del_timer(&i->timeout)) {
532 nf_ct_unlink_expect(i);
533 nf_conntrack_expect_put(i);
534 }
535 }
536}
537
538static void
539clean_from_lists(struct nf_conn *ct)
540{
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800541 DEBUGP("clean_from_lists(%p)\n", ct);
542 ASSERT_WRITE_LOCK(&nf_conntrack_lock);
Patrick McHardydf0933d2006-09-20 11:57:53 -0700543 list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
544 list_del(&ct->tuplehash[IP_CT_DIR_REPLY].list);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800545
546 /* Destroy all pending expectations */
Pablo Neira Ayusoc1d10ad2006-01-05 12:19:05 -0800547 nf_ct_remove_expectations(ct);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800548}
549
550static void
551destroy_conntrack(struct nf_conntrack *nfct)
552{
553 struct nf_conn *ct = (struct nf_conn *)nfct;
554 struct nf_conntrack_l3proto *l3proto;
555 struct nf_conntrack_protocol *proto;
556
557 DEBUGP("destroy_conntrack(%p)\n", ct);
558 NF_CT_ASSERT(atomic_read(&nfct->use) == 0);
559 NF_CT_ASSERT(!timer_pending(&ct->timeout));
560
561 nf_conntrack_event(IPCT_DESTROY, ct);
562 set_bit(IPS_DYING_BIT, &ct->status);
563
564 /* To make sure we don't get any weird locking issues here:
565 * destroy_conntrack() MUST NOT be called with a write lock
566 * to nf_conntrack_lock!!! -HW */
Pablo Neira Ayusoc1d10ad2006-01-05 12:19:05 -0800567 l3proto = __nf_ct_l3proto_find(ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.l3num);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800568 if (l3proto && l3proto->destroy)
569 l3proto->destroy(ct);
570
Pablo Neira Ayusoc1d10ad2006-01-05 12:19:05 -0800571 proto = __nf_ct_proto_find(ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.l3num, ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800572 if (proto && proto->destroy)
573 proto->destroy(ct);
574
575 if (nf_conntrack_destroyed)
576 nf_conntrack_destroyed(ct);
577
578 write_lock_bh(&nf_conntrack_lock);
579 /* Expectations will have been removed in clean_from_lists,
580 * except TFTP can create an expectation on the first packet,
581 * before connection is in the list, so we need to clean here,
582 * too. */
Pablo Neira Ayusoc1d10ad2006-01-05 12:19:05 -0800583 nf_ct_remove_expectations(ct);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800584
585 /* We overload first tuple to link into unconfirmed list. */
586 if (!nf_ct_is_confirmed(ct)) {
587 BUG_ON(list_empty(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list));
588 list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
589 }
590
591 NF_CT_STAT_INC(delete);
592 write_unlock_bh(&nf_conntrack_lock);
593
594 if (ct->master)
595 nf_ct_put(ct->master);
596
597 DEBUGP("destroy_conntrack: returning ct=%p to slab\n", ct);
598 nf_conntrack_free(ct);
599}
600
601static void death_by_timeout(unsigned long ul_conntrack)
602{
603 struct nf_conn *ct = (void *)ul_conntrack;
604
605 write_lock_bh(&nf_conntrack_lock);
606 /* Inside lock so preempt is disabled on module removal path.
607 * Otherwise we can get spurious warnings. */
608 NF_CT_STAT_INC(delete_list);
609 clean_from_lists(ct);
610 write_unlock_bh(&nf_conntrack_lock);
611 nf_ct_put(ct);
612}
613
Pablo Neira Ayusoc1d10ad2006-01-05 12:19:05 -0800614struct nf_conntrack_tuple_hash *
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800615__nf_conntrack_find(const struct nf_conntrack_tuple *tuple,
616 const struct nf_conn *ignored_conntrack)
617{
618 struct nf_conntrack_tuple_hash *h;
619 unsigned int hash = hash_conntrack(tuple);
620
621 ASSERT_READ_LOCK(&nf_conntrack_lock);
622 list_for_each_entry(h, &nf_conntrack_hash[hash], list) {
Patrick McHardydf0933d2006-09-20 11:57:53 -0700623 if (nf_ct_tuplehash_to_ctrack(h) != ignored_conntrack &&
624 nf_ct_tuple_equal(tuple, &h->tuple)) {
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800625 NF_CT_STAT_INC(found);
626 return h;
627 }
628 NF_CT_STAT_INC(searched);
629 }
630
631 return NULL;
632}
633
634/* Find a connection corresponding to a tuple. */
635struct nf_conntrack_tuple_hash *
636nf_conntrack_find_get(const struct nf_conntrack_tuple *tuple,
637 const struct nf_conn *ignored_conntrack)
638{
639 struct nf_conntrack_tuple_hash *h;
640
641 read_lock_bh(&nf_conntrack_lock);
642 h = __nf_conntrack_find(tuple, ignored_conntrack);
643 if (h)
644 atomic_inc(&nf_ct_tuplehash_to_ctrack(h)->ct_general.use);
645 read_unlock_bh(&nf_conntrack_lock);
646
647 return h;
648}
649
Pablo Neira Ayusoc1d10ad2006-01-05 12:19:05 -0800650static void __nf_conntrack_hash_insert(struct nf_conn *ct,
651 unsigned int hash,
652 unsigned int repl_hash)
653{
654 ct->id = ++nf_conntrack_next_id;
Patrick McHardydf0933d2006-09-20 11:57:53 -0700655 list_add(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list,
656 &nf_conntrack_hash[hash]);
657 list_add(&ct->tuplehash[IP_CT_DIR_REPLY].list,
658 &nf_conntrack_hash[repl_hash]);
Pablo Neira Ayusoc1d10ad2006-01-05 12:19:05 -0800659}
660
661void nf_conntrack_hash_insert(struct nf_conn *ct)
662{
663 unsigned int hash, repl_hash;
664
665 hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
666 repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
667
668 write_lock_bh(&nf_conntrack_lock);
669 __nf_conntrack_hash_insert(ct, hash, repl_hash);
670 write_unlock_bh(&nf_conntrack_lock);
671}
672
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800673/* Confirm a connection given skb; places it in hash table */
674int
675__nf_conntrack_confirm(struct sk_buff **pskb)
676{
677 unsigned int hash, repl_hash;
Patrick McHardydf0933d2006-09-20 11:57:53 -0700678 struct nf_conntrack_tuple_hash *h;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800679 struct nf_conn *ct;
Patrick McHardydf0933d2006-09-20 11:57:53 -0700680 struct nf_conn_help *help;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800681 enum ip_conntrack_info ctinfo;
682
683 ct = nf_ct_get(*pskb, &ctinfo);
684
685 /* ipt_REJECT uses nf_conntrack_attach to attach related
686 ICMP/TCP RST packets in other direction. Actual packet
687 which created connection will be IP_CT_NEW or for an
688 expected connection, IP_CT_RELATED. */
689 if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
690 return NF_ACCEPT;
691
692 hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
693 repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
694
695 /* We're not in hash table, and we refuse to set up related
696 connections for unconfirmed conns. But packet copies and
697 REJECT will give spurious warnings here. */
698 /* NF_CT_ASSERT(atomic_read(&ct->ct_general.use) == 1); */
699
700 /* No external references means noone else could have
701 confirmed us. */
702 NF_CT_ASSERT(!nf_ct_is_confirmed(ct));
703 DEBUGP("Confirming conntrack %p\n", ct);
704
705 write_lock_bh(&nf_conntrack_lock);
706
707 /* See if there's one in the list already, including reverse:
708 NAT could have grabbed it without realizing, since we're
709 not in the hash. If there is, we lost race. */
Patrick McHardydf0933d2006-09-20 11:57:53 -0700710 list_for_each_entry(h, &nf_conntrack_hash[hash], list)
711 if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
712 &h->tuple))
713 goto out;
714 list_for_each_entry(h, &nf_conntrack_hash[repl_hash], list)
715 if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple,
716 &h->tuple))
717 goto out;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800718
Patrick McHardydf0933d2006-09-20 11:57:53 -0700719 /* Remove from unconfirmed list */
720 list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
721
722 __nf_conntrack_hash_insert(ct, hash, repl_hash);
723 /* Timer relative to confirmation time, not original
724 setting time, otherwise we'd get timer wrap in
725 weird delay cases. */
726 ct->timeout.expires += jiffies;
727 add_timer(&ct->timeout);
728 atomic_inc(&ct->ct_general.use);
729 set_bit(IPS_CONFIRMED_BIT, &ct->status);
730 NF_CT_STAT_INC(insert);
731 write_unlock_bh(&nf_conntrack_lock);
732 help = nfct_help(ct);
733 if (help && help->helper)
734 nf_conntrack_event_cache(IPCT_HELPER, *pskb);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800735#ifdef CONFIG_NF_NAT_NEEDED
Patrick McHardydf0933d2006-09-20 11:57:53 -0700736 if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) ||
737 test_bit(IPS_DST_NAT_DONE_BIT, &ct->status))
738 nf_conntrack_event_cache(IPCT_NATINFO, *pskb);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800739#endif
Patrick McHardydf0933d2006-09-20 11:57:53 -0700740 nf_conntrack_event_cache(master_ct(ct) ?
741 IPCT_RELATED : IPCT_NEW, *pskb);
742 return NF_ACCEPT;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800743
Patrick McHardydf0933d2006-09-20 11:57:53 -0700744out:
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800745 NF_CT_STAT_INC(insert_failed);
746 write_unlock_bh(&nf_conntrack_lock);
747 return NF_DROP;
748}
749
750/* Returns true if a connection correspondings to the tuple (required
751 for NAT). */
752int
753nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple,
754 const struct nf_conn *ignored_conntrack)
755{
756 struct nf_conntrack_tuple_hash *h;
757
758 read_lock_bh(&nf_conntrack_lock);
759 h = __nf_conntrack_find(tuple, ignored_conntrack);
760 read_unlock_bh(&nf_conntrack_lock);
761
762 return h != NULL;
763}
764
765/* There's a small race here where we may free a just-assured
766 connection. Too bad: we're in trouble anyway. */
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800767static int early_drop(struct list_head *chain)
768{
769 /* Traverse backwards: gives us oldest, which is roughly LRU */
770 struct nf_conntrack_tuple_hash *h;
Patrick McHardydf0933d2006-09-20 11:57:53 -0700771 struct nf_conn *ct = NULL, *tmp;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800772 int dropped = 0;
773
774 read_lock_bh(&nf_conntrack_lock);
Patrick McHardydf0933d2006-09-20 11:57:53 -0700775 list_for_each_entry_reverse(h, chain, list) {
776 tmp = nf_ct_tuplehash_to_ctrack(h);
777 if (!test_bit(IPS_ASSURED_BIT, &tmp->status)) {
778 ct = tmp;
779 atomic_inc(&ct->ct_general.use);
780 break;
781 }
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800782 }
783 read_unlock_bh(&nf_conntrack_lock);
784
785 if (!ct)
786 return dropped;
787
788 if (del_timer(&ct->timeout)) {
789 death_by_timeout((unsigned long)ct);
790 dropped = 1;
791 NF_CT_STAT_INC(early_drop);
792 }
793 nf_ct_put(ct);
794 return dropped;
795}
796
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800797static struct nf_conntrack_helper *
Pablo Neira Ayusoc1d10ad2006-01-05 12:19:05 -0800798__nf_ct_helper_find(const struct nf_conntrack_tuple *tuple)
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800799{
Patrick McHardydf0933d2006-09-20 11:57:53 -0700800 struct nf_conntrack_helper *h;
801
802 list_for_each_entry(h, &helpers, list) {
803 if (nf_ct_tuple_mask_cmp(tuple, &h->tuple, &h->mask))
804 return h;
805 }
806 return NULL;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800807}
808
Pablo Neira Ayusoc1d10ad2006-01-05 12:19:05 -0800809struct nf_conntrack_helper *
810nf_ct_helper_find_get( const struct nf_conntrack_tuple *tuple)
811{
812 struct nf_conntrack_helper *helper;
813
814 /* need nf_conntrack_lock to assure that helper exists until
815 * try_module_get() is called */
816 read_lock_bh(&nf_conntrack_lock);
817
818 helper = __nf_ct_helper_find(tuple);
819 if (helper) {
820 /* need to increase module usage count to assure helper will
821 * not go away while the caller is e.g. busy putting a
822 * conntrack in the hash that uses the helper */
823 if (!try_module_get(helper->me))
824 helper = NULL;
825 }
826
827 read_unlock_bh(&nf_conntrack_lock);
828
829 return helper;
830}
831
832void nf_ct_helper_put(struct nf_conntrack_helper *helper)
833{
834 module_put(helper->me);
835}
836
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800837static struct nf_conn *
838__nf_conntrack_alloc(const struct nf_conntrack_tuple *orig,
839 const struct nf_conntrack_tuple *repl,
840 const struct nf_conntrack_l3proto *l3proto)
841{
842 struct nf_conn *conntrack = NULL;
843 u_int32_t features = 0;
Harald Weltedc808fe2006-03-20 17:56:32 -0800844 struct nf_conntrack_helper *helper;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800845
Harald Weltedc808fe2006-03-20 17:56:32 -0800846 if (unlikely(!nf_conntrack_hash_rnd_initted)) {
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800847 get_random_bytes(&nf_conntrack_hash_rnd, 4);
848 nf_conntrack_hash_rnd_initted = 1;
849 }
850
851 if (nf_conntrack_max
852 && atomic_read(&nf_conntrack_count) >= nf_conntrack_max) {
853 unsigned int hash = hash_conntrack(orig);
854 /* Try dropping from this hash chain. */
855 if (!early_drop(&nf_conntrack_hash[hash])) {
856 if (net_ratelimit())
857 printk(KERN_WARNING
858 "nf_conntrack: table full, dropping"
859 " packet.\n");
860 return ERR_PTR(-ENOMEM);
861 }
862 }
863
864 /* find features needed by this conntrack. */
865 features = l3proto->get_features(orig);
Harald Weltedc808fe2006-03-20 17:56:32 -0800866
867 /* FIXME: protect helper list per RCU */
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800868 read_lock_bh(&nf_conntrack_lock);
Harald Weltedc808fe2006-03-20 17:56:32 -0800869 helper = __nf_ct_helper_find(repl);
870 if (helper)
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800871 features |= NF_CT_F_HELP;
872 read_unlock_bh(&nf_conntrack_lock);
873
874 DEBUGP("nf_conntrack_alloc: features=0x%x\n", features);
875
876 read_lock_bh(&nf_ct_cache_lock);
877
Harald Weltedc808fe2006-03-20 17:56:32 -0800878 if (unlikely(!nf_ct_cache[features].use)) {
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800879 DEBUGP("nf_conntrack_alloc: not supported features = 0x%x\n",
880 features);
881 goto out;
882 }
883
884 conntrack = kmem_cache_alloc(nf_ct_cache[features].cachep, GFP_ATOMIC);
885 if (conntrack == NULL) {
886 DEBUGP("nf_conntrack_alloc: Can't alloc conntrack from cache\n");
887 goto out;
888 }
889
890 memset(conntrack, 0, nf_ct_cache[features].size);
891 conntrack->features = features;
Harald Weltedc808fe2006-03-20 17:56:32 -0800892 if (helper) {
893 struct nf_conn_help *help = nfct_help(conntrack);
894 NF_CT_ASSERT(help);
895 help->helper = helper;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800896 }
897
898 atomic_set(&conntrack->ct_general.use, 1);
899 conntrack->ct_general.destroy = destroy_conntrack;
900 conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig;
901 conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *repl;
902 /* Don't set timer yet: wait for confirmation */
903 init_timer(&conntrack->timeout);
904 conntrack->timeout.data = (unsigned long)conntrack;
905 conntrack->timeout.function = death_by_timeout;
906
907 atomic_inc(&nf_conntrack_count);
908out:
909 read_unlock_bh(&nf_ct_cache_lock);
910 return conntrack;
911}
912
913struct nf_conn *nf_conntrack_alloc(const struct nf_conntrack_tuple *orig,
914 const struct nf_conntrack_tuple *repl)
915{
916 struct nf_conntrack_l3proto *l3proto;
917
Pablo Neira Ayusoc1d10ad2006-01-05 12:19:05 -0800918 l3proto = __nf_ct_l3proto_find(orig->src.l3num);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800919 return __nf_conntrack_alloc(orig, repl, l3proto);
920}
921
922void nf_conntrack_free(struct nf_conn *conntrack)
923{
924 u_int32_t features = conntrack->features;
925 NF_CT_ASSERT(features >= NF_CT_F_BASIC && features < NF_CT_F_NUM);
926 DEBUGP("nf_conntrack_free: features = 0x%x, conntrack=%p\n", features,
927 conntrack);
928 kmem_cache_free(nf_ct_cache[features].cachep, conntrack);
929 atomic_dec(&nf_conntrack_count);
930}
931
932/* Allocate a new conntrack: we return -ENOMEM if classification
933 failed due to stress. Otherwise it really is unclassifiable. */
934static struct nf_conntrack_tuple_hash *
935init_conntrack(const struct nf_conntrack_tuple *tuple,
936 struct nf_conntrack_l3proto *l3proto,
937 struct nf_conntrack_protocol *protocol,
938 struct sk_buff *skb,
939 unsigned int dataoff)
940{
941 struct nf_conn *conntrack;
942 struct nf_conntrack_tuple repl_tuple;
943 struct nf_conntrack_expect *exp;
944
945 if (!nf_ct_invert_tuple(&repl_tuple, tuple, l3proto, protocol)) {
946 DEBUGP("Can't invert tuple.\n");
947 return NULL;
948 }
949
950 conntrack = __nf_conntrack_alloc(tuple, &repl_tuple, l3proto);
951 if (conntrack == NULL || IS_ERR(conntrack)) {
952 DEBUGP("Can't allocate conntrack.\n");
953 return (struct nf_conntrack_tuple_hash *)conntrack;
954 }
955
956 if (!protocol->new(conntrack, skb, dataoff)) {
957 nf_conntrack_free(conntrack);
958 DEBUGP("init conntrack: can't track with proto module\n");
959 return NULL;
960 }
961
962 write_lock_bh(&nf_conntrack_lock);
963 exp = find_expectation(tuple);
964
965 if (exp) {
966 DEBUGP("conntrack: expectation arrives ct=%p exp=%p\n",
967 conntrack, exp);
968 /* Welcome, Mr. Bond. We've been expecting you... */
969 __set_bit(IPS_EXPECTED_BIT, &conntrack->status);
970 conntrack->master = exp->master;
971#ifdef CONFIG_NF_CONNTRACK_MARK
972 conntrack->mark = exp->master->mark;
973#endif
James Morris7c9728c2006-06-09 00:31:46 -0700974#ifdef CONFIG_NF_CONNTRACK_SECMARK
975 conntrack->secmark = exp->master->secmark;
976#endif
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800977 nf_conntrack_get(&conntrack->master->ct_general);
978 NF_CT_STAT_INC(expect_new);
Harald Weltedc808fe2006-03-20 17:56:32 -0800979 } else
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800980 NF_CT_STAT_INC(new);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800981
982 /* Overload tuple linked list to put us in unconfirmed list. */
983 list_add(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL].list, &unconfirmed);
984
985 write_unlock_bh(&nf_conntrack_lock);
986
987 if (exp) {
988 if (exp->expectfn)
989 exp->expectfn(conntrack, exp);
990 nf_conntrack_expect_put(exp);
991 }
992
993 return &conntrack->tuplehash[IP_CT_DIR_ORIGINAL];
994}
995
996/* On success, returns conntrack ptr, sets skb->nfct and ctinfo */
997static inline struct nf_conn *
998resolve_normal_ct(struct sk_buff *skb,
999 unsigned int dataoff,
1000 u_int16_t l3num,
1001 u_int8_t protonum,
1002 struct nf_conntrack_l3proto *l3proto,
1003 struct nf_conntrack_protocol *proto,
1004 int *set_reply,
1005 enum ip_conntrack_info *ctinfo)
1006{
1007 struct nf_conntrack_tuple tuple;
1008 struct nf_conntrack_tuple_hash *h;
1009 struct nf_conn *ct;
1010
1011 if (!nf_ct_get_tuple(skb, (unsigned int)(skb->nh.raw - skb->data),
1012 dataoff, l3num, protonum, &tuple, l3proto,
1013 proto)) {
1014 DEBUGP("resolve_normal_ct: Can't get tuple\n");
1015 return NULL;
1016 }
1017
1018 /* look for tuple match */
1019 h = nf_conntrack_find_get(&tuple, NULL);
1020 if (!h) {
1021 h = init_conntrack(&tuple, l3proto, proto, skb, dataoff);
1022 if (!h)
1023 return NULL;
1024 if (IS_ERR(h))
1025 return (void *)h;
1026 }
1027 ct = nf_ct_tuplehash_to_ctrack(h);
1028
1029 /* It exists; we have (non-exclusive) reference. */
1030 if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) {
1031 *ctinfo = IP_CT_ESTABLISHED + IP_CT_IS_REPLY;
1032 /* Please set reply bit if this packet OK */
1033 *set_reply = 1;
1034 } else {
1035 /* Once we've had two way comms, always ESTABLISHED. */
1036 if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
1037 DEBUGP("nf_conntrack_in: normal packet for %p\n", ct);
1038 *ctinfo = IP_CT_ESTABLISHED;
1039 } else if (test_bit(IPS_EXPECTED_BIT, &ct->status)) {
1040 DEBUGP("nf_conntrack_in: related packet for %p\n", ct);
1041 *ctinfo = IP_CT_RELATED;
1042 } else {
1043 DEBUGP("nf_conntrack_in: new packet for %p\n", ct);
1044 *ctinfo = IP_CT_NEW;
1045 }
1046 *set_reply = 0;
1047 }
1048 skb->nfct = &ct->ct_general;
1049 skb->nfctinfo = *ctinfo;
1050 return ct;
1051}
1052
1053unsigned int
1054nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff **pskb)
1055{
1056 struct nf_conn *ct;
1057 enum ip_conntrack_info ctinfo;
1058 struct nf_conntrack_l3proto *l3proto;
1059 struct nf_conntrack_protocol *proto;
1060 unsigned int dataoff;
1061 u_int8_t protonum;
1062 int set_reply = 0;
1063 int ret;
1064
1065 /* Previously seen (loopback or untracked)? Ignore. */
1066 if ((*pskb)->nfct) {
1067 NF_CT_STAT_INC(ignore);
1068 return NF_ACCEPT;
1069 }
1070
Pablo Neira Ayusoc1d10ad2006-01-05 12:19:05 -08001071 l3proto = __nf_ct_l3proto_find((u_int16_t)pf);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001072 if ((ret = l3proto->prepare(pskb, hooknum, &dataoff, &protonum)) <= 0) {
1073 DEBUGP("not prepared to track yet or error occured\n");
1074 return -ret;
1075 }
1076
Pablo Neira Ayusoc1d10ad2006-01-05 12:19:05 -08001077 proto = __nf_ct_proto_find((u_int16_t)pf, protonum);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001078
1079 /* It may be an special packet, error, unclean...
1080 * inverse of the return code tells to the netfilter
1081 * core what to do with the packet. */
1082 if (proto->error != NULL &&
1083 (ret = proto->error(*pskb, dataoff, &ctinfo, pf, hooknum)) <= 0) {
1084 NF_CT_STAT_INC(error);
1085 NF_CT_STAT_INC(invalid);
1086 return -ret;
1087 }
1088
1089 ct = resolve_normal_ct(*pskb, dataoff, pf, protonum, l3proto, proto,
1090 &set_reply, &ctinfo);
1091 if (!ct) {
1092 /* Not valid part of a connection */
1093 NF_CT_STAT_INC(invalid);
1094 return NF_ACCEPT;
1095 }
1096
1097 if (IS_ERR(ct)) {
1098 /* Too stressed to deal. */
1099 NF_CT_STAT_INC(drop);
1100 return NF_DROP;
1101 }
1102
1103 NF_CT_ASSERT((*pskb)->nfct);
1104
1105 ret = proto->packet(ct, *pskb, dataoff, ctinfo, pf, hooknum);
1106 if (ret < 0) {
1107 /* Invalid: inverse of the return code tells
1108 * the netfilter core what to do */
1109 DEBUGP("nf_conntrack_in: Can't track with proto module\n");
1110 nf_conntrack_put((*pskb)->nfct);
1111 (*pskb)->nfct = NULL;
1112 NF_CT_STAT_INC(invalid);
1113 return -ret;
1114 }
1115
1116 if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status))
1117 nf_conntrack_event_cache(IPCT_STATUS, *pskb);
1118
1119 return ret;
1120}
1121
1122int nf_ct_invert_tuplepr(struct nf_conntrack_tuple *inverse,
1123 const struct nf_conntrack_tuple *orig)
1124{
1125 return nf_ct_invert_tuple(inverse, orig,
Pablo Neira Ayusoc1d10ad2006-01-05 12:19:05 -08001126 __nf_ct_l3proto_find(orig->src.l3num),
1127 __nf_ct_proto_find(orig->src.l3num,
1128 orig->dst.protonum));
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001129}
1130
1131/* Would two expected things clash? */
1132static inline int expect_clash(const struct nf_conntrack_expect *a,
1133 const struct nf_conntrack_expect *b)
1134{
1135 /* Part covered by intersection of masks must be unequal,
1136 otherwise they clash */
1137 struct nf_conntrack_tuple intersect_mask;
1138 int count;
1139
1140 intersect_mask.src.l3num = a->mask.src.l3num & b->mask.src.l3num;
1141 intersect_mask.src.u.all = a->mask.src.u.all & b->mask.src.u.all;
1142 intersect_mask.dst.u.all = a->mask.dst.u.all & b->mask.dst.u.all;
1143 intersect_mask.dst.protonum = a->mask.dst.protonum
1144 & b->mask.dst.protonum;
1145
1146 for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){
1147 intersect_mask.src.u3.all[count] =
1148 a->mask.src.u3.all[count] & b->mask.src.u3.all[count];
1149 }
1150
1151 for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){
1152 intersect_mask.dst.u3.all[count] =
1153 a->mask.dst.u3.all[count] & b->mask.dst.u3.all[count];
1154 }
1155
1156 return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask);
1157}
1158
1159static inline int expect_matches(const struct nf_conntrack_expect *a,
1160 const struct nf_conntrack_expect *b)
1161{
1162 return a->master == b->master
1163 && nf_ct_tuple_equal(&a->tuple, &b->tuple)
1164 && nf_ct_tuple_equal(&a->mask, &b->mask);
1165}
1166
1167/* Generally a bad idea to call this: could have matched already. */
1168void nf_conntrack_unexpect_related(struct nf_conntrack_expect *exp)
1169{
1170 struct nf_conntrack_expect *i;
1171
1172 write_lock_bh(&nf_conntrack_lock);
1173 /* choose the the oldest expectation to evict */
1174 list_for_each_entry_reverse(i, &nf_conntrack_expect_list, list) {
1175 if (expect_matches(i, exp) && del_timer(&i->timeout)) {
1176 nf_ct_unlink_expect(i);
1177 write_unlock_bh(&nf_conntrack_lock);
1178 nf_conntrack_expect_put(i);
1179 return;
1180 }
1181 }
1182 write_unlock_bh(&nf_conntrack_lock);
1183}
1184
1185/* We don't increase the master conntrack refcount for non-fulfilled
1186 * conntracks. During the conntrack destruction, the expectations are
1187 * always killed before the conntrack itself */
1188struct nf_conntrack_expect *nf_conntrack_expect_alloc(struct nf_conn *me)
1189{
1190 struct nf_conntrack_expect *new;
1191
1192 new = kmem_cache_alloc(nf_conntrack_expect_cachep, GFP_ATOMIC);
1193 if (!new) {
1194 DEBUGP("expect_related: OOM allocating expect\n");
1195 return NULL;
1196 }
1197 new->master = me;
1198 atomic_set(&new->use, 1);
1199 return new;
1200}
1201
1202void nf_conntrack_expect_put(struct nf_conntrack_expect *exp)
1203{
1204 if (atomic_dec_and_test(&exp->use))
1205 kmem_cache_free(nf_conntrack_expect_cachep, exp);
1206}
1207
1208static void nf_conntrack_expect_insert(struct nf_conntrack_expect *exp)
1209{
Harald Weltedc808fe2006-03-20 17:56:32 -08001210 struct nf_conn_help *master_help = nfct_help(exp->master);
1211
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001212 atomic_inc(&exp->use);
Harald Weltedc808fe2006-03-20 17:56:32 -08001213 master_help->expecting++;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001214 list_add(&exp->list, &nf_conntrack_expect_list);
1215
1216 init_timer(&exp->timeout);
1217 exp->timeout.data = (unsigned long)exp;
1218 exp->timeout.function = expectation_timed_out;
Harald Weltedc808fe2006-03-20 17:56:32 -08001219 exp->timeout.expires = jiffies + master_help->helper->timeout * HZ;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001220 add_timer(&exp->timeout);
1221
Pablo Neira Ayusoc1d10ad2006-01-05 12:19:05 -08001222 exp->id = ++nf_conntrack_expect_next_id;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001223 atomic_inc(&exp->use);
1224 NF_CT_STAT_INC(expect_create);
1225}
1226
1227/* Race with expectations being used means we could have none to find; OK. */
1228static void evict_oldest_expect(struct nf_conn *master)
1229{
1230 struct nf_conntrack_expect *i;
1231
1232 list_for_each_entry_reverse(i, &nf_conntrack_expect_list, list) {
1233 if (i->master == master) {
1234 if (del_timer(&i->timeout)) {
1235 nf_ct_unlink_expect(i);
1236 nf_conntrack_expect_put(i);
1237 }
1238 break;
1239 }
1240 }
1241}
1242
1243static inline int refresh_timer(struct nf_conntrack_expect *i)
1244{
Harald Weltedc808fe2006-03-20 17:56:32 -08001245 struct nf_conn_help *master_help = nfct_help(i->master);
1246
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001247 if (!del_timer(&i->timeout))
1248 return 0;
1249
Harald Weltedc808fe2006-03-20 17:56:32 -08001250 i->timeout.expires = jiffies + master_help->helper->timeout*HZ;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001251 add_timer(&i->timeout);
1252 return 1;
1253}
1254
1255int nf_conntrack_expect_related(struct nf_conntrack_expect *expect)
1256{
1257 struct nf_conntrack_expect *i;
Jesper Juhld695aa82006-01-05 12:16:16 -08001258 struct nf_conn *master = expect->master;
Harald Weltedc808fe2006-03-20 17:56:32 -08001259 struct nf_conn_help *master_help = nfct_help(master);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001260 int ret;
1261
Harald Weltedc808fe2006-03-20 17:56:32 -08001262 NF_CT_ASSERT(master_help);
1263
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001264 DEBUGP("nf_conntrack_expect_related %p\n", related_to);
1265 DEBUGP("tuple: "); NF_CT_DUMP_TUPLE(&expect->tuple);
1266 DEBUGP("mask: "); NF_CT_DUMP_TUPLE(&expect->mask);
1267
1268 write_lock_bh(&nf_conntrack_lock);
1269 list_for_each_entry(i, &nf_conntrack_expect_list, list) {
1270 if (expect_matches(i, expect)) {
1271 /* Refresh timer: if it's dying, ignore.. */
1272 if (refresh_timer(i)) {
1273 ret = 0;
1274 goto out;
1275 }
1276 } else if (expect_clash(i, expect)) {
1277 ret = -EBUSY;
1278 goto out;
1279 }
1280 }
1281 /* Will be over limit? */
Harald Weltedc808fe2006-03-20 17:56:32 -08001282 if (master_help->helper->max_expected &&
1283 master_help->expecting >= master_help->helper->max_expected)
Jesper Juhld695aa82006-01-05 12:16:16 -08001284 evict_oldest_expect(master);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001285
1286 nf_conntrack_expect_insert(expect);
1287 nf_conntrack_expect_event(IPEXP_NEW, expect);
1288 ret = 0;
1289out:
1290 write_unlock_bh(&nf_conntrack_lock);
1291 return ret;
1292}
1293
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001294int nf_conntrack_helper_register(struct nf_conntrack_helper *me)
1295{
1296 int ret;
1297 BUG_ON(me->timeout == 0);
1298
1299 ret = nf_conntrack_register_cache(NF_CT_F_HELP, "nf_conntrack:help",
1300 sizeof(struct nf_conn)
Harald Weltedc808fe2006-03-20 17:56:32 -08001301 + sizeof(struct nf_conn_help)
1302 + __alignof__(struct nf_conn_help));
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001303 if (ret < 0) {
1304 printk(KERN_ERR "nf_conntrack_helper_reigster: Unable to create slab cache for conntracks\n");
1305 return ret;
1306 }
1307 write_lock_bh(&nf_conntrack_lock);
Patrick McHardydf0933d2006-09-20 11:57:53 -07001308 list_add(&me->list, &helpers);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001309 write_unlock_bh(&nf_conntrack_lock);
1310
1311 return 0;
1312}
1313
Pablo Neira Ayusoc1d10ad2006-01-05 12:19:05 -08001314struct nf_conntrack_helper *
1315__nf_conntrack_helper_find_byname(const char *name)
1316{
1317 struct nf_conntrack_helper *h;
1318
1319 list_for_each_entry(h, &helpers, list) {
1320 if (!strcmp(h->name, name))
1321 return h;
1322 }
1323
1324 return NULL;
1325}
1326
Patrick McHardydf0933d2006-09-20 11:57:53 -07001327static inline void unhelp(struct nf_conntrack_tuple_hash *i,
1328 const struct nf_conntrack_helper *me)
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001329{
Harald Weltedc808fe2006-03-20 17:56:32 -08001330 struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(i);
1331 struct nf_conn_help *help = nfct_help(ct);
1332
1333 if (help && help->helper == me) {
1334 nf_conntrack_event(IPCT_HELPER, ct);
1335 help->helper = NULL;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001336 }
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001337}
1338
1339void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
1340{
1341 unsigned int i;
Patrick McHardydf0933d2006-09-20 11:57:53 -07001342 struct nf_conntrack_tuple_hash *h;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001343 struct nf_conntrack_expect *exp, *tmp;
1344
1345 /* Need write lock here, to delete helper. */
1346 write_lock_bh(&nf_conntrack_lock);
Patrick McHardydf0933d2006-09-20 11:57:53 -07001347 list_del(&me->list);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001348
1349 /* Get rid of expectations */
1350 list_for_each_entry_safe(exp, tmp, &nf_conntrack_expect_list, list) {
Harald Weltedc808fe2006-03-20 17:56:32 -08001351 struct nf_conn_help *help = nfct_help(exp->master);
1352 if (help->helper == me && del_timer(&exp->timeout)) {
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001353 nf_ct_unlink_expect(exp);
1354 nf_conntrack_expect_put(exp);
1355 }
1356 }
1357
1358 /* Get rid of expecteds, set helpers to NULL. */
Patrick McHardydf0933d2006-09-20 11:57:53 -07001359 list_for_each_entry(h, &unconfirmed, list)
1360 unhelp(h, me);
1361 for (i = 0; i < nf_conntrack_htable_size; i++) {
1362 list_for_each_entry(h, &nf_conntrack_hash[i], list)
1363 unhelp(h, me);
1364 }
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001365 write_unlock_bh(&nf_conntrack_lock);
1366
1367 /* Someone could be still looking at the helper in a bh. */
1368 synchronize_net();
1369}
1370
1371/* Refresh conntrack for this many jiffies and do accounting if do_acct is 1 */
1372void __nf_ct_refresh_acct(struct nf_conn *ct,
1373 enum ip_conntrack_info ctinfo,
1374 const struct sk_buff *skb,
1375 unsigned long extra_jiffies,
1376 int do_acct)
1377{
1378 int event = 0;
1379
1380 NF_CT_ASSERT(ct->timeout.data == (unsigned long)ct);
1381 NF_CT_ASSERT(skb);
1382
1383 write_lock_bh(&nf_conntrack_lock);
1384
Eric Leblond997ae832006-05-29 18:24:20 -07001385 /* Only update if this is not a fixed timeout */
1386 if (test_bit(IPS_FIXED_TIMEOUT_BIT, &ct->status)) {
1387 write_unlock_bh(&nf_conntrack_lock);
1388 return;
1389 }
1390
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001391 /* If not in hash table, timer will not be active yet */
1392 if (!nf_ct_is_confirmed(ct)) {
1393 ct->timeout.expires = extra_jiffies;
1394 event = IPCT_REFRESH;
1395 } else {
1396 /* Need del_timer for race avoidance (may already be dying). */
1397 if (del_timer(&ct->timeout)) {
1398 ct->timeout.expires = jiffies + extra_jiffies;
1399 add_timer(&ct->timeout);
1400 event = IPCT_REFRESH;
1401 }
1402 }
1403
1404#ifdef CONFIG_NF_CT_ACCT
1405 if (do_acct) {
1406 ct->counters[CTINFO2DIR(ctinfo)].packets++;
1407 ct->counters[CTINFO2DIR(ctinfo)].bytes +=
1408 skb->len - (unsigned int)(skb->nh.raw - skb->data);
1409 if ((ct->counters[CTINFO2DIR(ctinfo)].packets & 0x80000000)
1410 || (ct->counters[CTINFO2DIR(ctinfo)].bytes & 0x80000000))
1411 event |= IPCT_COUNTER_FILLING;
1412 }
1413#endif
1414
1415 write_unlock_bh(&nf_conntrack_lock);
1416
1417 /* must be unlocked when calling event cache */
1418 if (event)
1419 nf_conntrack_event_cache(event, skb);
1420}
1421
Pablo Neira Ayusoc1d10ad2006-01-05 12:19:05 -08001422#if defined(CONFIG_NF_CT_NETLINK) || \
1423 defined(CONFIG_NF_CT_NETLINK_MODULE)
1424
1425#include <linux/netfilter/nfnetlink.h>
1426#include <linux/netfilter/nfnetlink_conntrack.h>
Ingo Molnar57b47a52006-03-20 22:35:41 -08001427#include <linux/mutex.h>
1428
Pablo Neira Ayusoc1d10ad2006-01-05 12:19:05 -08001429
1430/* Generic function for tcp/udp/sctp/dccp and alike. This needs to be
1431 * in ip_conntrack_core, since we don't want the protocols to autoload
1432 * or depend on ctnetlink */
1433int nf_ct_port_tuple_to_nfattr(struct sk_buff *skb,
1434 const struct nf_conntrack_tuple *tuple)
1435{
1436 NFA_PUT(skb, CTA_PROTO_SRC_PORT, sizeof(u_int16_t),
1437 &tuple->src.u.tcp.port);
1438 NFA_PUT(skb, CTA_PROTO_DST_PORT, sizeof(u_int16_t),
1439 &tuple->dst.u.tcp.port);
1440 return 0;
1441
1442nfattr_failure:
1443 return -1;
1444}
1445
1446static const size_t cta_min_proto[CTA_PROTO_MAX] = {
1447 [CTA_PROTO_SRC_PORT-1] = sizeof(u_int16_t),
1448 [CTA_PROTO_DST_PORT-1] = sizeof(u_int16_t)
1449};
1450
1451int nf_ct_port_nfattr_to_tuple(struct nfattr *tb[],
1452 struct nf_conntrack_tuple *t)
1453{
1454 if (!tb[CTA_PROTO_SRC_PORT-1] || !tb[CTA_PROTO_DST_PORT-1])
1455 return -EINVAL;
1456
1457 if (nfattr_bad_size(tb, CTA_PROTO_MAX, cta_min_proto))
1458 return -EINVAL;
1459
1460 t->src.u.tcp.port =
1461 *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_SRC_PORT-1]);
1462 t->dst.u.tcp.port =
1463 *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_DST_PORT-1]);
1464
1465 return 0;
1466}
1467#endif
1468
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001469/* Used by ipt_REJECT and ip6t_REJECT. */
1470void __nf_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb)
1471{
1472 struct nf_conn *ct;
1473 enum ip_conntrack_info ctinfo;
1474
1475 /* This ICMP is in reverse direction to the packet which caused it */
1476 ct = nf_ct_get(skb, &ctinfo);
1477 if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL)
1478 ctinfo = IP_CT_RELATED + IP_CT_IS_REPLY;
1479 else
1480 ctinfo = IP_CT_RELATED;
1481
1482 /* Attach to new skbuff, and increment count */
1483 nskb->nfct = &ct->ct_general;
1484 nskb->nfctinfo = ctinfo;
1485 nf_conntrack_get(nskb->nfct);
1486}
1487
1488static inline int
1489do_iter(const struct nf_conntrack_tuple_hash *i,
1490 int (*iter)(struct nf_conn *i, void *data),
1491 void *data)
1492{
1493 return iter(nf_ct_tuplehash_to_ctrack(i), data);
1494}
1495
1496/* Bring out ya dead! */
Patrick McHardydf0933d2006-09-20 11:57:53 -07001497static struct nf_conn *
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001498get_next_corpse(int (*iter)(struct nf_conn *i, void *data),
1499 void *data, unsigned int *bucket)
1500{
Patrick McHardydf0933d2006-09-20 11:57:53 -07001501 struct nf_conntrack_tuple_hash *h;
1502 struct nf_conn *ct;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001503
1504 write_lock_bh(&nf_conntrack_lock);
1505 for (; *bucket < nf_conntrack_htable_size; (*bucket)++) {
Patrick McHardydf0933d2006-09-20 11:57:53 -07001506 list_for_each_entry(h, &nf_conntrack_hash[*bucket], list) {
1507 ct = nf_ct_tuplehash_to_ctrack(h);
1508 if (iter(ct, data))
1509 goto found;
1510 }
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001511 }
Patrick McHardydf0933d2006-09-20 11:57:53 -07001512 list_for_each_entry(h, &unconfirmed, list) {
1513 ct = nf_ct_tuplehash_to_ctrack(h);
1514 if (iter(ct, data))
1515 goto found;
1516 }
1517 return NULL;
1518found:
1519 atomic_inc(&nf_ct_tuplehash_to_ctrack(h)->ct_general.use);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001520 write_unlock_bh(&nf_conntrack_lock);
Patrick McHardydf0933d2006-09-20 11:57:53 -07001521 return ct;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001522}
1523
1524void
1525nf_ct_iterate_cleanup(int (*iter)(struct nf_conn *i, void *data), void *data)
1526{
Patrick McHardydf0933d2006-09-20 11:57:53 -07001527 struct nf_conn *ct;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001528 unsigned int bucket = 0;
1529
Patrick McHardydf0933d2006-09-20 11:57:53 -07001530 while ((ct = get_next_corpse(iter, data, &bucket)) != NULL) {
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001531 /* Time to push up daises... */
1532 if (del_timer(&ct->timeout))
1533 death_by_timeout((unsigned long)ct);
1534 /* ... else the timer will get him soon. */
1535
1536 nf_ct_put(ct);
1537 }
1538}
1539
1540static int kill_all(struct nf_conn *i, void *data)
1541{
1542 return 1;
1543}
1544
1545static void free_conntrack_hash(struct list_head *hash, int vmalloced, int size)
1546{
1547 if (vmalloced)
1548 vfree(hash);
1549 else
1550 free_pages((unsigned long)hash,
1551 get_order(sizeof(struct list_head) * size));
1552}
1553
Pablo Neira Ayusoc1d10ad2006-01-05 12:19:05 -08001554void nf_conntrack_flush()
1555{
1556 nf_ct_iterate_cleanup(kill_all, NULL);
1557}
1558
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001559/* Mishearing the voices in his head, our hero wonders how he's
1560 supposed to kill the mall. */
1561void nf_conntrack_cleanup(void)
1562{
1563 int i;
1564
Yasuyuki Kozakai7d3cdc62006-02-15 15:22:21 -08001565 ip_ct_attach = NULL;
1566
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001567 /* This makes sure all current packets have passed through
1568 netfilter framework. Roll on, two-stage module
1569 delete... */
1570 synchronize_net();
1571
1572 nf_ct_event_cache_flush();
1573 i_see_dead_people:
Pablo Neira Ayusoc1d10ad2006-01-05 12:19:05 -08001574 nf_conntrack_flush();
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001575 if (atomic_read(&nf_conntrack_count) != 0) {
1576 schedule();
1577 goto i_see_dead_people;
1578 }
Patrick McHardy66365682005-12-05 13:36:50 -08001579 /* wait until all references to nf_conntrack_untracked are dropped */
1580 while (atomic_read(&nf_conntrack_untracked.ct_general.use) > 1)
1581 schedule();
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001582
1583 for (i = 0; i < NF_CT_F_NUM; i++) {
1584 if (nf_ct_cache[i].use == 0)
1585 continue;
1586
1587 NF_CT_ASSERT(nf_ct_cache[i].use == 1);
1588 nf_ct_cache[i].use = 1;
1589 nf_conntrack_unregister_cache(i);
1590 }
1591 kmem_cache_destroy(nf_conntrack_expect_cachep);
1592 free_conntrack_hash(nf_conntrack_hash, nf_conntrack_vmalloc,
1593 nf_conntrack_htable_size);
KOVACS Krisztian5a6f2942005-11-15 16:47:34 -08001594
1595 /* free l3proto protocol tables */
1596 for (i = 0; i < PF_MAX; i++)
1597 if (nf_ct_protos[i]) {
1598 kfree(nf_ct_protos[i]);
1599 nf_ct_protos[i] = NULL;
1600 }
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001601}
1602
1603static struct list_head *alloc_hashtable(int size, int *vmalloced)
1604{
1605 struct list_head *hash;
1606 unsigned int i;
1607
1608 *vmalloced = 0;
1609 hash = (void*)__get_free_pages(GFP_KERNEL,
1610 get_order(sizeof(struct list_head)
1611 * size));
1612 if (!hash) {
1613 *vmalloced = 1;
1614 printk(KERN_WARNING "nf_conntrack: falling back to vmalloc.\n");
1615 hash = vmalloc(sizeof(struct list_head) * size);
1616 }
1617
1618 if (hash)
1619 for (i = 0; i < size; i++)
1620 INIT_LIST_HEAD(&hash[i]);
1621
1622 return hash;
1623}
1624
1625int set_hashsize(const char *val, struct kernel_param *kp)
1626{
1627 int i, bucket, hashsize, vmalloced;
1628 int old_vmalloced, old_size;
1629 int rnd;
1630 struct list_head *hash, *old_hash;
1631 struct nf_conntrack_tuple_hash *h;
1632
1633 /* On boot, we can set this without any fancy locking. */
1634 if (!nf_conntrack_htable_size)
1635 return param_set_uint(val, kp);
1636
1637 hashsize = simple_strtol(val, NULL, 0);
1638 if (!hashsize)
1639 return -EINVAL;
1640
1641 hash = alloc_hashtable(hashsize, &vmalloced);
1642 if (!hash)
1643 return -ENOMEM;
1644
1645 /* We have to rehahs for the new table anyway, so we also can
1646 * use a newrandom seed */
1647 get_random_bytes(&rnd, 4);
1648
1649 write_lock_bh(&nf_conntrack_lock);
1650 for (i = 0; i < nf_conntrack_htable_size; i++) {
1651 while (!list_empty(&nf_conntrack_hash[i])) {
1652 h = list_entry(nf_conntrack_hash[i].next,
1653 struct nf_conntrack_tuple_hash, list);
1654 list_del(&h->list);
1655 bucket = __hash_conntrack(&h->tuple, hashsize, rnd);
1656 list_add_tail(&h->list, &hash[bucket]);
1657 }
1658 }
1659 old_size = nf_conntrack_htable_size;
1660 old_vmalloced = nf_conntrack_vmalloc;
1661 old_hash = nf_conntrack_hash;
1662
1663 nf_conntrack_htable_size = hashsize;
1664 nf_conntrack_vmalloc = vmalloced;
1665 nf_conntrack_hash = hash;
1666 nf_conntrack_hash_rnd = rnd;
1667 write_unlock_bh(&nf_conntrack_lock);
1668
1669 free_conntrack_hash(old_hash, old_vmalloced, old_size);
1670 return 0;
1671}
1672
1673module_param_call(hashsize, set_hashsize, param_get_uint,
1674 &nf_conntrack_htable_size, 0600);
1675
1676int __init nf_conntrack_init(void)
1677{
1678 unsigned int i;
1679 int ret;
1680
1681 /* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB
1682 * machine has 256 buckets. >= 1GB machines have 8192 buckets. */
1683 if (!nf_conntrack_htable_size) {
1684 nf_conntrack_htable_size
1685 = (((num_physpages << PAGE_SHIFT) / 16384)
1686 / sizeof(struct list_head));
1687 if (num_physpages > (1024 * 1024 * 1024 / PAGE_SIZE))
1688 nf_conntrack_htable_size = 8192;
1689 if (nf_conntrack_htable_size < 16)
1690 nf_conntrack_htable_size = 16;
1691 }
1692 nf_conntrack_max = 8 * nf_conntrack_htable_size;
1693
1694 printk("nf_conntrack version %s (%u buckets, %d max)\n",
1695 NF_CONNTRACK_VERSION, nf_conntrack_htable_size,
1696 nf_conntrack_max);
1697
1698 nf_conntrack_hash = alloc_hashtable(nf_conntrack_htable_size,
1699 &nf_conntrack_vmalloc);
1700 if (!nf_conntrack_hash) {
1701 printk(KERN_ERR "Unable to create nf_conntrack_hash\n");
1702 goto err_out;
1703 }
1704
1705 ret = nf_conntrack_register_cache(NF_CT_F_BASIC, "nf_conntrack:basic",
Harald Weltedc808fe2006-03-20 17:56:32 -08001706 sizeof(struct nf_conn));
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001707 if (ret < 0) {
1708 printk(KERN_ERR "Unable to create nf_conn slab cache\n");
1709 goto err_free_hash;
1710 }
1711
1712 nf_conntrack_expect_cachep = kmem_cache_create("nf_conntrack_expect",
1713 sizeof(struct nf_conntrack_expect),
1714 0, 0, NULL, NULL);
1715 if (!nf_conntrack_expect_cachep) {
1716 printk(KERN_ERR "Unable to create nf_expect slab cache\n");
1717 goto err_free_conntrack_slab;
1718 }
1719
1720 /* Don't NEED lock here, but good form anyway. */
1721 write_lock_bh(&nf_conntrack_lock);
1722 for (i = 0; i < PF_MAX; i++)
1723 nf_ct_l3protos[i] = &nf_conntrack_generic_l3proto;
1724 write_unlock_bh(&nf_conntrack_lock);
1725
Yasuyuki Kozakai7d3cdc62006-02-15 15:22:21 -08001726 /* For use by REJECT target */
1727 ip_ct_attach = __nf_conntrack_attach;
1728
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001729 /* Set up fake conntrack:
1730 - to never be deleted, not in any hashes */
1731 atomic_set(&nf_conntrack_untracked.ct_general.use, 1);
1732 /* - and look it like as a confirmed connection */
1733 set_bit(IPS_CONFIRMED_BIT, &nf_conntrack_untracked.status);
1734
1735 return ret;
1736
1737err_free_conntrack_slab:
1738 nf_conntrack_unregister_cache(NF_CT_F_BASIC);
1739err_free_hash:
1740 free_conntrack_hash(nf_conntrack_hash, nf_conntrack_vmalloc,
1741 nf_conntrack_htable_size);
1742err_out:
1743 return -ENOMEM;
1744}