blob: fb50fc0ac19fc2e4f7bb529408d8f670c3da9515 [file] [log] [blame]
Bart De Schuymerd86fb4e2002-08-24 09:23:41 +00001/*
2 * Packet matching code.
3 *
4 * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
5 * Copyright (C) 2009-2002 Netfilter core team <coreteam@netfilter.org>
6 *
7 * 19 Jan 2002 Harald Welte <laforge@gnumonks.org>
8 * - increase module usage count as soon as we have rules inside
9 * a table
10 */
11#include <linux/config.h>
12#include <linux/cache.h>
13#include <linux/skbuff.h>
14#include <linux/kmod.h>
15#include <linux/vmalloc.h>
16#include <linux/netdevice.h>
17#include <linux/module.h>
18#include <linux/tcp.h>
19#include <linux/udp.h>
20#include <linux/icmp.h>
21#include <net/ip.h>
22#include <asm/uaccess.h>
23#include <asm/semaphore.h>
24#include <linux/proc_fs.h>
25
26#include <linux/netfilter_ipv4/ip_tables.h>
27
28/*#define DEBUG_IP_FIREWALL*/
29/*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */
30/*#define DEBUG_IP_FIREWALL_USER*/
31
32#ifdef DEBUG_IP_FIREWALL
33#define dprintf(format, args...) printk(format , ## args)
34#else
35#define dprintf(format, args...)
36#endif
37
38#ifdef DEBUG_IP_FIREWALL_USER
39#define duprintf(format, args...) printk(format , ## args)
40#else
41#define duprintf(format, args...)
42#endif
43
44#ifdef CONFIG_NETFILTER_DEBUG
45#define IP_NF_ASSERT(x) \
46do { \
47 if (!(x)) \
48 printk("IP_NF_ASSERT: %s:%s:%u\n", \
49 __FUNCTION__, __FILE__, __LINE__); \
50} while(0)
51#else
52#define IP_NF_ASSERT(x)
53#endif
54#define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1))
55
56/* Mutex protects lists (only traversed in user context). */
57static DECLARE_MUTEX(ipt_mutex);
58
59/* Must have mutex */
60#define ASSERT_READ_LOCK(x) IP_NF_ASSERT(down_trylock(&ipt_mutex) != 0)
61#define ASSERT_WRITE_LOCK(x) IP_NF_ASSERT(down_trylock(&ipt_mutex) != 0)
62#include <linux/netfilter_ipv4/lockhelp.h>
63#include <linux/netfilter_ipv4/listhelp.h>
64
65#if 0
66/* All the better to debug you with... */
67#define static
68#define inline
69#endif
70
Bart De Schuymera07c59d2002-09-18 20:40:54 +000071/* Locking is simple: we assume at worst case there will be one packet
72 in user context and one from bottom halves (or soft irq if Alexey's
73 softnet patch was applied).
74
Bart De Schuymerd86fb4e2002-08-24 09:23:41 +000075 We keep a set of rules for each CPU, so we can avoid write-locking
Bart De Schuymer0434b962002-09-18 20:39:33 +000076 them in the softirq when updating the counters and therefore
77 only need to read-lock in the softirq; doing a write_lock_bh() in user
78 context stops packets coming through and allows user context to read
79 the counters or update the rules.
Bart De Schuymerd86fb4e2002-08-24 09:23:41 +000080
81 To be cache friendly on SMP, we arrange them like so:
82 [ n-entries ]
83 ... cache-align padding ...
84 [ n-entries ]
85
86 Hence the start of any table is given by get_table() below. */
87
88/* The table itself */
89struct ipt_table_info
90{
91 /* Size per table */
92 unsigned int size;
93 /* Number of entries: FIXME. --RR */
94 unsigned int number;
95 /* Initial number of entries. Needed for module usage count */
96 unsigned int initial_entries;
97
98 /* Entry points and underflows */
99 unsigned int hook_entry[NF_IP_NUMHOOKS];
100 unsigned int underflow[NF_IP_NUMHOOKS];
101
102 /* ipt_entry tables: one per CPU */
103 char entries[0] ____cacheline_aligned;
104};
105
106static LIST_HEAD(ipt_target);
107static LIST_HEAD(ipt_match);
108static LIST_HEAD(ipt_tables);
109#define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0)
110
111#ifdef CONFIG_SMP
112#define TABLE_OFFSET(t,p) (SMP_ALIGN((t)->size)*(p))
113#else
114#define TABLE_OFFSET(t,p) 0
115#endif
116
117#if 0
118#define down(x) do { printk("DOWN:%u:" #x "\n", __LINE__); down(x); } while(0)
119#define down_interruptible(x) ({ int __r; printk("DOWNi:%u:" #x "\n", __LINE__); __r = down_interruptible(x); if (__r != 0) printk("ABORT-DOWNi:%u\n", __LINE__); __r; })
120#define up(x) do { printk("UP:%u:" #x "\n", __LINE__); up(x); } while(0)
121#endif
122
123/* Returns whether matches rule or not. */
124static inline int
125ip_packet_match(const struct iphdr *ip,
126 const char *indev,
127 const char *physindev,
128 const char *outdev,
129 const char *physoutdev,
130 const struct ipt_ip *ipinfo,
131 int isfrag)
132{
133 size_t i;
Bart De Schuymer41426492002-09-18 18:18:25 +0000134 unsigned long ret, ret2;
Bart De Schuymerd86fb4e2002-08-24 09:23:41 +0000135
136#define FWINV(bool,invflg) ((bool) ^ !!(ipinfo->invflags & invflg))
137
138 if (FWINV((ip->saddr&ipinfo->smsk.s_addr) != ipinfo->src.s_addr,
139 IPT_INV_SRCIP)
140 || FWINV((ip->daddr&ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr,
141 IPT_INV_DSTIP)) {
142 dprintf("Source or dest mismatch.\n");
143
144 dprintf("SRC: %u.%u.%u.%u. Mask: %u.%u.%u.%u. Target: %u.%u.%u.%u.%s\n",
145 NIPQUAD(ip->saddr),
146 NIPQUAD(ipinfo->smsk.s_addr),
147 NIPQUAD(ipinfo->src.s_addr),
148 ipinfo->invflags & IPT_INV_SRCIP ? " (INV)" : "");
149 dprintf("DST: %u.%u.%u.%u Mask: %u.%u.%u.%u Target: %u.%u.%u.%u.%s\n",
150 NIPQUAD(ip->daddr),
151 NIPQUAD(ipinfo->dmsk.s_addr),
152 NIPQUAD(ipinfo->dst.s_addr),
153 ipinfo->invflags & IPT_INV_DSTIP ? " (INV)" : "");
154 return 0;
155 }
156
157 /* Look for ifname matches; this should unroll nicely. */
158 for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
159 ret |= (((const unsigned long *)indev)[i]
160 ^ ((const unsigned long *)ipinfo->iniface)[i])
161 & ((const unsigned long *)ipinfo->iniface_mask)[i];
162 }
163
164 for (i = 0, ret2 = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
165 ret2 |= (((const unsigned long *)physindev)[i]
166 ^ ((const unsigned long *)ipinfo->iniface)[i])
167 & ((const unsigned long *)ipinfo->iniface_mask)[i];
168 }
169
170 if (FWINV(ret != 0 && ret2 != 0, IPT_INV_VIA_IN)) {
171 dprintf("VIA in mismatch (%s vs %s).%s\n",
172 indev, ipinfo->iniface,
173 ipinfo->invflags&IPT_INV_VIA_IN ?" (INV)":"");
174 return 0;
175 }
176
177 for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
178 ret |= (((const unsigned long *)outdev)[i]
179 ^ ((const unsigned long *)ipinfo->outiface)[i])
180 & ((const unsigned long *)ipinfo->outiface_mask)[i];
181 }
182
183 for (i = 0, ret2 = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
184 ret2 |= (((const unsigned long *)physoutdev)[i]
185 ^ ((const unsigned long *)ipinfo->outiface)[i])
186 & ((const unsigned long *)ipinfo->outiface_mask)[i];
187 }
188
189 if (FWINV(ret != 0 && ret2 != 0, IPT_INV_VIA_OUT)) {
190 dprintf("VIA out mismatch (%s vs %s).%s\n",
191 outdev, ipinfo->outiface,
192 ipinfo->invflags&IPT_INV_VIA_OUT ?" (INV)":"");
193 return 0;
194 }
195
196 /* Check specific protocol */
197 if (ipinfo->proto
198 && FWINV(ip->protocol != ipinfo->proto, IPT_INV_PROTO)) {
199 dprintf("Packet protocol %hi does not match %hi.%s\n",
200 ip->protocol, ipinfo->proto,
201 ipinfo->invflags&IPT_INV_PROTO ? " (INV)":"");
202 return 0;
203 }
204
205 /* If we have a fragment rule but the packet is not a fragment
206 * then we return zero */
207 if (FWINV((ipinfo->flags&IPT_F_FRAG) && !isfrag, IPT_INV_FRAG)) {
208 dprintf("Fragment rule but not fragment.%s\n",
209 ipinfo->invflags & IPT_INV_FRAG ? " (INV)" : "");
210 return 0;
211 }
212
213 return 1;
214}
215
216static inline int
217ip_checkentry(const struct ipt_ip *ip)
218{
219 if (ip->flags & ~IPT_F_MASK) {
220 duprintf("Unknown flag bits set: %08X\n",
221 ip->flags & ~IPT_F_MASK);
222 return 0;
223 }
224 if (ip->invflags & ~IPT_INV_MASK) {
225 duprintf("Unknown invflag bits set: %08X\n",
226 ip->invflags & ~IPT_INV_MASK);
227 return 0;
228 }
229 return 1;
230}
231
232static unsigned int
233ipt_error(struct sk_buff **pskb,
234 unsigned int hooknum,
235 const struct net_device *in,
236 const struct net_device *out,
237 const void *targinfo,
238 void *userinfo)
239{
240 if (net_ratelimit())
241 printk("ip_tables: error: `%s'\n", (char *)targinfo);
242
243 return NF_DROP;
244}
245
246static inline
247int do_match(struct ipt_entry_match *m,
248 const struct sk_buff *skb,
249 const struct net_device *in,
250 const struct net_device *out,
251 int offset,
252 const void *hdr,
253 u_int16_t datalen,
254 int *hotdrop)
255{
256 /* Stop iteration if it doesn't match */
257 if (!m->u.kernel.match->match(skb, in, out, m->data,
258 offset, hdr, datalen, hotdrop))
259 return 1;
260 else
261 return 0;
262}
263
264static inline struct ipt_entry *
265get_entry(void *base, unsigned int offset)
266{
267 return (struct ipt_entry *)(base + offset);
268}
269
270/* Returns one of the generic firewall policies, like NF_ACCEPT. */
271unsigned int
272ipt_do_table(struct sk_buff **pskb,
273 unsigned int hook,
274 const struct net_device *in,
275 const struct net_device *out,
276 struct ipt_table *table,
277 void *userdata)
278{
279 static const char nulldevname[IFNAMSIZ] = { 0 };
280 u_int16_t offset;
281 struct iphdr *ip;
282 void *protohdr;
283 u_int16_t datalen;
284 int hotdrop = 0;
285 /* Initializing verdict to NF_DROP keeps gcc happy. */
286 unsigned int verdict = NF_DROP;
287 const char *indev, *outdev;
288 const char *physindev, *physoutdev;
289 void *table_base;
290 struct ipt_entry *e, *back;
291
292 /* Initialization */
293 ip = (*pskb)->nh.iph;
294 protohdr = (u_int32_t *)ip + ip->ihl;
295 datalen = (*pskb)->len - ip->ihl * 4;
296 indev = in ? in->name : nulldevname;
297 outdev = out ? out->name : nulldevname;
298 physindev = (*pskb)->physindev ? (*pskb)->physindev->name : nulldevname;
299 physoutdev = (*pskb)->physoutdev ? (*pskb)->physoutdev->name : nulldevname;
300
301 /* We handle fragments by dealing with the first fragment as
302 * if it was a normal packet. All other fragments are treated
303 * normally, except that they will NEVER match rules that ask
304 * things we don't know, ie. tcp syn flag or ports). If the
305 * rule is also a fragment-specific rule, non-fragments won't
306 * match it. */
307 offset = ntohs(ip->frag_off) & IP_OFFSET;
308
309 read_lock_bh(&table->lock);
310 IP_NF_ASSERT(table->valid_hooks & (1 << hook));
311 table_base = (void *)table->private->entries
312 + TABLE_OFFSET(table->private, smp_processor_id());
313 e = get_entry(table_base, table->private->hook_entry[hook]);
314
315#ifdef CONFIG_NETFILTER_DEBUG
316 /* Check noone else using our table */
317 if (((struct ipt_entry *)table_base)->comefrom != 0xdead57ac
318 && ((struct ipt_entry *)table_base)->comefrom != 0xeeeeeeec) {
319 printk("ASSERT: CPU #%u, %s comefrom(%p) = %X\n",
320 smp_processor_id(),
321 table->name,
322 &((struct ipt_entry *)table_base)->comefrom,
323 ((struct ipt_entry *)table_base)->comefrom);
324 }
325 ((struct ipt_entry *)table_base)->comefrom = 0x57acc001;
326#endif
327
328 /* For return from builtin chain */
329 back = get_entry(table_base, table->private->underflow[hook]);
330
331 do {
332 IP_NF_ASSERT(e);
333 IP_NF_ASSERT(back);
334 (*pskb)->nfcache |= e->nfcache;
Bart De Schuymer41426492002-09-18 18:18:25 +0000335 if (ip_packet_match(ip, indev, physindev, outdev, physoutdev,
Bart De Schuymer2717b282002-08-24 13:41:45 +0000336 &e->ip, offset)) {
Bart De Schuymerd86fb4e2002-08-24 09:23:41 +0000337 struct ipt_entry_target *t;
338
339 if (IPT_MATCH_ITERATE(e, do_match,
340 *pskb, in, out,
341 offset, protohdr,
342 datalen, &hotdrop) != 0)
343 goto no_match;
344
345 ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1);
346
347 t = ipt_get_target(e);
348 IP_NF_ASSERT(t->u.kernel.target);
349 /* Standard target? */
350 if (!t->u.kernel.target->target) {
351 int v;
352
353 v = ((struct ipt_standard_target *)t)->verdict;
354 if (v < 0) {
355 /* Pop from stack? */
356 if (v != IPT_RETURN) {
357 verdict = (unsigned)(-v) - 1;
358 break;
359 }
360 e = back;
361 back = get_entry(table_base,
362 back->comefrom);
363 continue;
364 }
365 if (table_base + v
366 != (void *)e + e->next_offset) {
367 /* Save old back ptr in next entry */
368 struct ipt_entry *next
369 = (void *)e + e->next_offset;
370 next->comefrom
371 = (void *)back - table_base;
372 /* set back pointer to next entry */
373 back = next;
374 }
375
376 e = get_entry(table_base, v);
377 } else {
378 /* Targets which reenter must return
379 abs. verdicts */
380#ifdef CONFIG_NETFILTER_DEBUG
381 ((struct ipt_entry *)table_base)->comefrom
382 = 0xeeeeeeec;
383#endif
384 verdict = t->u.kernel.target->target(pskb,
385 hook,
386 in, out,
387 t->data,
388 userdata);
389
390#ifdef CONFIG_NETFILTER_DEBUG
391 if (((struct ipt_entry *)table_base)->comefrom
392 != 0xeeeeeeec
393 && verdict == IPT_CONTINUE) {
394 printk("Target %s reentered!\n",
395 t->u.kernel.target->name);
396 verdict = NF_DROP;
397 }
398 ((struct ipt_entry *)table_base)->comefrom
399 = 0x57acc001;
400#endif
401 /* Target might have changed stuff. */
402 ip = (*pskb)->nh.iph;
403 protohdr = (u_int32_t *)ip + ip->ihl;
404 datalen = (*pskb)->len - ip->ihl * 4;
405
406 if (verdict == IPT_CONTINUE)
407 e = (void *)e + e->next_offset;
408 else
409 /* Verdict */
410 break;
411 }
412 } else {
413
414 no_match:
415 e = (void *)e + e->next_offset;
416 }
417 } while (!hotdrop);
418
419#ifdef CONFIG_NETFILTER_DEBUG
420 ((struct ipt_entry *)table_base)->comefrom = 0xdead57ac;
421#endif
422 read_unlock_bh(&table->lock);
423
424#ifdef DEBUG_ALLOW_ALL
425 return NF_ACCEPT;
426#else
427 if (hotdrop)
428 return NF_DROP;
429 else return verdict;
430#endif
431}
432
433/* If it succeeds, returns element and locks mutex */
434static inline void *
435find_inlist_lock_noload(struct list_head *head,
436 const char *name,
437 int *error,
438 struct semaphore *mutex)
439{
440 void *ret;
441
442#if 0
443 duprintf("find_inlist: searching for `%s' in %s.\n",
444 name, head == &ipt_target ? "ipt_target"
445 : head == &ipt_match ? "ipt_match"
446 : head == &ipt_tables ? "ipt_tables" : "UNKNOWN");
447#endif
448
449 *error = down_interruptible(mutex);
450 if (*error != 0)
451 return NULL;
452
453 ret = list_named_find(head, name);
454 if (!ret) {
455 *error = -ENOENT;
456 up(mutex);
457 }
458 return ret;
459}
460
461#ifndef CONFIG_KMOD
462#define find_inlist_lock(h,n,p,e,m) find_inlist_lock_noload((h),(n),(e),(m))
463#else
464static void *
465find_inlist_lock(struct list_head *head,
466 const char *name,
467 const char *prefix,
468 int *error,
469 struct semaphore *mutex)
470{
471 void *ret;
472
473 ret = find_inlist_lock_noload(head, name, error, mutex);
474 if (!ret) {
475 char modulename[IPT_FUNCTION_MAXNAMELEN + strlen(prefix) + 1];
476 strcpy(modulename, prefix);
477 strcat(modulename, name);
478 duprintf("find_inlist: loading `%s'.\n", modulename);
479 request_module(modulename);
480 ret = find_inlist_lock_noload(head, name, error, mutex);
481 }
482
483 return ret;
484}
485#endif
486
487static inline struct ipt_table *
488find_table_lock(const char *name, int *error, struct semaphore *mutex)
489{
490 return find_inlist_lock(&ipt_tables, name, "iptable_", error, mutex);
491}
492
493static inline struct ipt_match *
494find_match_lock(const char *name, int *error, struct semaphore *mutex)
495{
496 return find_inlist_lock(&ipt_match, name, "ipt_", error, mutex);
497}
498
499static inline struct ipt_target *
500find_target_lock(const char *name, int *error, struct semaphore *mutex)
501{
502 return find_inlist_lock(&ipt_target, name, "ipt_", error, mutex);
503}
504
505/* All zeroes == unconditional rule. */
506static inline int
507unconditional(const struct ipt_ip *ip)
508{
509 unsigned int i;
510
511 for (i = 0; i < sizeof(*ip)/sizeof(__u32); i++)
512 if (((__u32 *)ip)[i])
513 return 0;
514
515 return 1;
516}
517
518/* Figures out from what hook each rule can be called: returns 0 if
519 there are loops. Puts hook bitmask in comefrom. */
520static int
521mark_source_chains(struct ipt_table_info *newinfo, unsigned int valid_hooks)
522{
523 unsigned int hook;
524
525 /* No recursion; use packet counter to save back ptrs (reset
526 to 0 as we leave), and comefrom to save source hook bitmask */
527 for (hook = 0; hook < NF_IP_NUMHOOKS; hook++) {
528 unsigned int pos = newinfo->hook_entry[hook];
529 struct ipt_entry *e
530 = (struct ipt_entry *)(newinfo->entries + pos);
531
532 if (!(valid_hooks & (1 << hook)))
533 continue;
534
535 /* Set initial back pointer. */
536 e->counters.pcnt = pos;
537
538 for (;;) {
539 struct ipt_standard_target *t
540 = (void *)ipt_get_target(e);
541
542 if (e->comefrom & (1 << NF_IP_NUMHOOKS)) {
543 printk("iptables: loop hook %u pos %u %08X.\n",
544 hook, pos, e->comefrom);
545 return 0;
546 }
547 e->comefrom
548 |= ((1 << hook) | (1 << NF_IP_NUMHOOKS));
549
550 /* Unconditional return/END. */
551 if (e->target_offset == sizeof(struct ipt_entry)
552 && (strcmp(t->target.u.user.name,
553 IPT_STANDARD_TARGET) == 0)
554 && t->verdict < 0
555 && unconditional(&e->ip)) {
556 unsigned int oldpos, size;
557
558 /* Return: backtrack through the last
559 big jump. */
560 do {
561 e->comefrom ^= (1<<NF_IP_NUMHOOKS);
562#ifdef DEBUG_IP_FIREWALL_USER
563 if (e->comefrom
564 & (1 << NF_IP_NUMHOOKS)) {
565 duprintf("Back unset "
566 "on hook %u "
567 "rule %u\n",
568 hook, pos);
569 }
570#endif
571 oldpos = pos;
572 pos = e->counters.pcnt;
573 e->counters.pcnt = 0;
574
575 /* We're at the start. */
576 if (pos == oldpos)
577 goto next;
578
579 e = (struct ipt_entry *)
580 (newinfo->entries + pos);
581 } while (oldpos == pos + e->next_offset);
582
583 /* Move along one */
584 size = e->next_offset;
585 e = (struct ipt_entry *)
586 (newinfo->entries + pos + size);
587 e->counters.pcnt = pos;
588 pos += size;
589 } else {
590 int newpos = t->verdict;
591
592 if (strcmp(t->target.u.user.name,
593 IPT_STANDARD_TARGET) == 0
594 && newpos >= 0) {
595 /* This a jump; chase it. */
596 duprintf("Jump rule %u -> %u\n",
597 pos, newpos);
598 } else {
599 /* ... this is a fallthru */
600 newpos = pos + e->next_offset;
601 }
602 e = (struct ipt_entry *)
603 (newinfo->entries + newpos);
604 e->counters.pcnt = pos;
605 pos = newpos;
606 }
607 }
608 next:
609 duprintf("Finished chain %u\n", hook);
610 }
611 return 1;
612}
613
614static inline int
615cleanup_match(struct ipt_entry_match *m, unsigned int *i)
616{
617 if (i && (*i)-- == 0)
618 return 1;
619
620 if (m->u.kernel.match->destroy)
621 m->u.kernel.match->destroy(m->data,
622 m->u.match_size - sizeof(*m));
623
624 if (m->u.kernel.match->me)
625 __MOD_DEC_USE_COUNT(m->u.kernel.match->me);
626
627 return 0;
628}
629
630static inline int
631standard_check(const struct ipt_entry_target *t,
632 unsigned int max_offset)
633{
634 struct ipt_standard_target *targ = (void *)t;
635
636 /* Check standard info. */
637 if (t->u.target_size
638 != IPT_ALIGN(sizeof(struct ipt_standard_target))) {
639 duprintf("standard_check: target size %u != %u\n",
640 t->u.target_size,
641 IPT_ALIGN(sizeof(struct ipt_standard_target)));
642 return 0;
643 }
644
645 if (targ->verdict >= 0
646 && targ->verdict > max_offset - sizeof(struct ipt_entry)) {
647 duprintf("ipt_standard_check: bad verdict (%i)\n",
648 targ->verdict);
649 return 0;
650 }
651
652 if (targ->verdict < -NF_MAX_VERDICT - 1) {
653 duprintf("ipt_standard_check: bad negative verdict (%i)\n",
654 targ->verdict);
655 return 0;
656 }
657 return 1;
658}
659
660static inline int
661check_match(struct ipt_entry_match *m,
662 const char *name,
663 const struct ipt_ip *ip,
664 unsigned int hookmask,
665 unsigned int *i)
666{
667 int ret;
668 struct ipt_match *match;
669
670 match = find_match_lock(m->u.user.name, &ret, &ipt_mutex);
671 if (!match) {
672 duprintf("check_match: `%s' not found\n", m->u.user.name);
673 return ret;
674 }
675 if (match->me)
676 __MOD_INC_USE_COUNT(match->me);
677 m->u.kernel.match = match;
678 up(&ipt_mutex);
679
680 if (m->u.kernel.match->checkentry
681 && !m->u.kernel.match->checkentry(name, ip, m->data,
682 m->u.match_size - sizeof(*m),
683 hookmask)) {
684 if (m->u.kernel.match->me)
685 __MOD_DEC_USE_COUNT(m->u.kernel.match->me);
686 duprintf("ip_tables: check failed for `%s'.\n",
687 m->u.kernel.match->name);
688 return -EINVAL;
689 }
690
691 (*i)++;
692 return 0;
693}
694
695static struct ipt_target ipt_standard_target;
696
697static inline int
698check_entry(struct ipt_entry *e, const char *name, unsigned int size,
699 unsigned int *i)
700{
701 struct ipt_entry_target *t;
702 struct ipt_target *target;
703 int ret;
704 unsigned int j;
705
706 if (!ip_checkentry(&e->ip)) {
707 duprintf("ip_tables: ip check failed %p %s.\n", e, name);
708 return -EINVAL;
709 }
710
711 j = 0;
712 ret = IPT_MATCH_ITERATE(e, check_match, name, &e->ip, e->comefrom, &j);
713 if (ret != 0)
714 goto cleanup_matches;
715
716 t = ipt_get_target(e);
717 target = find_target_lock(t->u.user.name, &ret, &ipt_mutex);
718 if (!target) {
719 duprintf("check_entry: `%s' not found\n", t->u.user.name);
720 goto cleanup_matches;
721 }
722 if (target->me)
723 __MOD_INC_USE_COUNT(target->me);
724 t->u.kernel.target = target;
725 up(&ipt_mutex);
726
727 if (t->u.kernel.target == &ipt_standard_target) {
728 if (!standard_check(t, size)) {
729 ret = -EINVAL;
730 goto cleanup_matches;
731 }
732 } else if (t->u.kernel.target->checkentry
733 && !t->u.kernel.target->checkentry(name, e, t->data,
734 t->u.target_size
735 - sizeof(*t),
736 e->comefrom)) {
737 if (t->u.kernel.target->me)
738 __MOD_DEC_USE_COUNT(t->u.kernel.target->me);
739 duprintf("ip_tables: check failed for `%s'.\n",
740 t->u.kernel.target->name);
741 ret = -EINVAL;
742 goto cleanup_matches;
743 }
744
745 (*i)++;
746 return 0;
747
748 cleanup_matches:
749 IPT_MATCH_ITERATE(e, cleanup_match, &j);
750 return ret;
751}
752
753static inline int
754check_entry_size_and_hooks(struct ipt_entry *e,
755 struct ipt_table_info *newinfo,
756 unsigned char *base,
757 unsigned char *limit,
758 const unsigned int *hook_entries,
759 const unsigned int *underflows,
760 unsigned int *i)
761{
762 unsigned int h;
763
764 if ((unsigned long)e % __alignof__(struct ipt_entry) != 0
765 || (unsigned char *)e + sizeof(struct ipt_entry) >= limit) {
766 duprintf("Bad offset %p\n", e);
767 return -EINVAL;
768 }
769
770 if (e->next_offset
771 < sizeof(struct ipt_entry) + sizeof(struct ipt_entry_target)) {
772 duprintf("checking: element %p size %u\n",
773 e, e->next_offset);
774 return -EINVAL;
775 }
776
777 /* Check hooks & underflows */
778 for (h = 0; h < NF_IP_NUMHOOKS; h++) {
779 if ((unsigned char *)e - base == hook_entries[h])
780 newinfo->hook_entry[h] = hook_entries[h];
781 if ((unsigned char *)e - base == underflows[h])
782 newinfo->underflow[h] = underflows[h];
783 }
784
785 /* FIXME: underflows must be unconditional, standard verdicts
786 < 0 (not IPT_RETURN). --RR */
787
788 /* Clear counters and comefrom */
789 e->counters = ((struct ipt_counters) { 0, 0 });
790 e->comefrom = 0;
791
792 (*i)++;
793 return 0;
794}
795
796static inline int
797cleanup_entry(struct ipt_entry *e, unsigned int *i)
798{
799 struct ipt_entry_target *t;
800
801 if (i && (*i)-- == 0)
802 return 1;
803
804 /* Cleanup all matches */
805 IPT_MATCH_ITERATE(e, cleanup_match, NULL);
806 t = ipt_get_target(e);
807 if (t->u.kernel.target->destroy)
808 t->u.kernel.target->destroy(t->data,
809 t->u.target_size - sizeof(*t));
810 if (t->u.kernel.target->me)
811 __MOD_DEC_USE_COUNT(t->u.kernel.target->me);
812
813 return 0;
814}
815
816/* Checks and translates the user-supplied table segment (held in
817 newinfo) */
818static int
819translate_table(const char *name,
820 unsigned int valid_hooks,
821 struct ipt_table_info *newinfo,
822 unsigned int size,
823 unsigned int number,
824 const unsigned int *hook_entries,
825 const unsigned int *underflows)
826{
827 unsigned int i;
828 int ret;
829
830 newinfo->size = size;
831 newinfo->number = number;
832
833 /* Init all hooks to impossible value. */
834 for (i = 0; i < NF_IP_NUMHOOKS; i++) {
835 newinfo->hook_entry[i] = 0xFFFFFFFF;
836 newinfo->underflow[i] = 0xFFFFFFFF;
837 }
838
839 duprintf("translate_table: size %u\n", newinfo->size);
840 i = 0;
841 /* Walk through entries, checking offsets. */
842 ret = IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
843 check_entry_size_and_hooks,
844 newinfo,
845 newinfo->entries,
846 newinfo->entries + size,
847 hook_entries, underflows, &i);
848 if (ret != 0)
849 return ret;
850
851 if (i != number) {
852 duprintf("translate_table: %u not %u entries\n",
853 i, number);
854 return -EINVAL;
855 }
856
857 /* Check hooks all assigned */
858 for (i = 0; i < NF_IP_NUMHOOKS; i++) {
859 /* Only hooks which are valid */
860 if (!(valid_hooks & (1 << i)))
861 continue;
862 if (newinfo->hook_entry[i] == 0xFFFFFFFF) {
863 duprintf("Invalid hook entry %u %u\n",
864 i, hook_entries[i]);
865 return -EINVAL;
866 }
867 if (newinfo->underflow[i] == 0xFFFFFFFF) {
868 duprintf("Invalid underflow %u %u\n",
869 i, underflows[i]);
870 return -EINVAL;
871 }
872 }
873
874 if (!mark_source_chains(newinfo, valid_hooks))
875 return -ELOOP;
876
877 /* Finally, each sanity check must pass */
878 i = 0;
879 ret = IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
880 check_entry, name, size, &i);
881
882 if (ret != 0) {
883 IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
884 cleanup_entry, &i);
885 return ret;
886 }
887
888 /* And one copy for every other CPU */
889 for (i = 1; i < NR_CPUS; i++) {
890 memcpy(newinfo->entries + SMP_ALIGN(newinfo->size)*i,
891 newinfo->entries,
892 SMP_ALIGN(newinfo->size));
893 }
894
895 return ret;
896}
897
898static struct ipt_table_info *
899replace_table(struct ipt_table *table,
900 unsigned int num_counters,
901 struct ipt_table_info *newinfo,
902 int *error)
903{
904 struct ipt_table_info *oldinfo;
905
906#ifdef CONFIG_NETFILTER_DEBUG
907 {
908 struct ipt_entry *table_base;
909 unsigned int i;
910
911 for (i = 0; i < NR_CPUS; i++) {
912 table_base =
913 (void *)newinfo->entries
914 + TABLE_OFFSET(newinfo, i);
915
916 table_base->comefrom = 0xdead57ac;
917 }
918 }
919#endif
920
921 /* Do the substitution. */
922 write_lock_bh(&table->lock);
923 /* Check inside lock: is the old number correct? */
924 if (num_counters != table->private->number) {
925 duprintf("num_counters != table->private->number (%u/%u)\n",
926 num_counters, table->private->number);
927 write_unlock_bh(&table->lock);
928 *error = -EAGAIN;
929 return NULL;
930 }
931 oldinfo = table->private;
932 table->private = newinfo;
933 newinfo->initial_entries = oldinfo->initial_entries;
934 write_unlock_bh(&table->lock);
935
936 return oldinfo;
937}
938
939/* Gets counters. */
940static inline int
941add_entry_to_counter(const struct ipt_entry *e,
942 struct ipt_counters total[],
943 unsigned int *i)
944{
945 ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
946
947 (*i)++;
948 return 0;
949}
950
951static void
952get_counters(const struct ipt_table_info *t,
953 struct ipt_counters counters[])
954{
955 unsigned int cpu;
956 unsigned int i;
957
958 for (cpu = 0; cpu < NR_CPUS; cpu++) {
959 i = 0;
960 IPT_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu),
961 t->size,
962 add_entry_to_counter,
963 counters,
964 &i);
965 }
966}
967
968static int
969copy_entries_to_user(unsigned int total_size,
970 struct ipt_table *table,
971 void *userptr)
972{
973 unsigned int off, num, countersize;
974 struct ipt_entry *e;
975 struct ipt_counters *counters;
976 int ret = 0;
977
978 /* We need atomic snapshot of counters: rest doesn't change
979 (other than comefrom, which userspace doesn't care
980 about). */
981 countersize = sizeof(struct ipt_counters) * table->private->number;
982 counters = vmalloc(countersize);
983
984 if (counters == NULL)
985 return -ENOMEM;
986
987 /* First, sum counters... */
988 memset(counters, 0, countersize);
989 write_lock_bh(&table->lock);
990 get_counters(table->private, counters);
991 write_unlock_bh(&table->lock);
992
993 /* ... then copy entire thing from CPU 0... */
994 if (copy_to_user(userptr, table->private->entries, total_size) != 0) {
995 ret = -EFAULT;
996 goto free_counters;
997 }
998
999 /* FIXME: use iterator macros --RR */
1000 /* ... then go back and fix counters and names */
1001 for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
1002 unsigned int i;
1003 struct ipt_entry_match *m;
1004 struct ipt_entry_target *t;
1005
1006 e = (struct ipt_entry *)(table->private->entries + off);
1007 if (copy_to_user(userptr + off
1008 + offsetof(struct ipt_entry, counters),
1009 &counters[num],
1010 sizeof(counters[num])) != 0) {
1011 ret = -EFAULT;
1012 goto free_counters;
1013 }
1014
1015 for (i = sizeof(struct ipt_entry);
1016 i < e->target_offset;
1017 i += m->u.match_size) {
1018 m = (void *)e + i;
1019
1020 if (copy_to_user(userptr + off + i
1021 + offsetof(struct ipt_entry_match,
1022 u.user.name),
1023 m->u.kernel.match->name,
1024 strlen(m->u.kernel.match->name)+1)
1025 != 0) {
1026 ret = -EFAULT;
1027 goto free_counters;
1028 }
1029 }
1030
1031 t = ipt_get_target(e);
1032 if (copy_to_user(userptr + off + e->target_offset
1033 + offsetof(struct ipt_entry_target,
1034 u.user.name),
1035 t->u.kernel.target->name,
1036 strlen(t->u.kernel.target->name)+1) != 0) {
1037 ret = -EFAULT;
1038 goto free_counters;
1039 }
1040 }
1041
1042 free_counters:
1043 vfree(counters);
1044 return ret;
1045}
1046
1047static int
1048get_entries(const struct ipt_get_entries *entries,
1049 struct ipt_get_entries *uptr)
1050{
1051 int ret;
1052 struct ipt_table *t;
1053
1054 t = find_table_lock(entries->name, &ret, &ipt_mutex);
1055 if (t) {
1056 duprintf("t->private->number = %u\n",
1057 t->private->number);
1058 if (entries->size == t->private->size)
1059 ret = copy_entries_to_user(t->private->size,
1060 t, uptr->entrytable);
1061 else {
1062 duprintf("get_entries: I've got %u not %u!\n",
1063 t->private->size,
1064 entries->size);
1065 ret = -EINVAL;
1066 }
1067 up(&ipt_mutex);
1068 } else
1069 duprintf("get_entries: Can't find %s!\n",
1070 entries->name);
1071
1072 return ret;
1073}
1074
1075static int
1076do_replace(void *user, unsigned int len)
1077{
1078 int ret;
1079 struct ipt_replace tmp;
1080 struct ipt_table *t;
1081 struct ipt_table_info *newinfo, *oldinfo;
1082 struct ipt_counters *counters;
1083
1084 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1085 return -EFAULT;
1086
1087 /* Hack: Causes ipchains to give correct error msg --RR */
1088 if (len != sizeof(tmp) + tmp.size)
1089 return -ENOPROTOOPT;
1090
1091 /* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */
1092 if ((SMP_ALIGN(tmp.size) >> PAGE_SHIFT) + 2 > num_physpages)
1093 return -ENOMEM;
1094
1095 newinfo = vmalloc(sizeof(struct ipt_table_info)
1096 + SMP_ALIGN(tmp.size) * NR_CPUS);
1097 if (!newinfo)
1098 return -ENOMEM;
1099
1100 if (copy_from_user(newinfo->entries, user + sizeof(tmp),
1101 tmp.size) != 0) {
1102 ret = -EFAULT;
1103 goto free_newinfo;
1104 }
1105
1106 counters = vmalloc(tmp.num_counters * sizeof(struct ipt_counters));
1107 if (!counters) {
1108 ret = -ENOMEM;
1109 goto free_newinfo;
1110 }
1111 memset(counters, 0, tmp.num_counters * sizeof(struct ipt_counters));
1112
1113 ret = translate_table(tmp.name, tmp.valid_hooks,
1114 newinfo, tmp.size, tmp.num_entries,
1115 tmp.hook_entry, tmp.underflow);
1116 if (ret != 0)
1117 goto free_newinfo_counters;
1118
1119 duprintf("ip_tables: Translated table\n");
1120
1121 t = find_table_lock(tmp.name, &ret, &ipt_mutex);
1122 if (!t)
1123 goto free_newinfo_counters_untrans;
1124
1125 /* You lied! */
1126 if (tmp.valid_hooks != t->valid_hooks) {
1127 duprintf("Valid hook crap: %08X vs %08X\n",
1128 tmp.valid_hooks, t->valid_hooks);
1129 ret = -EINVAL;
1130 goto free_newinfo_counters_untrans_unlock;
1131 }
1132
1133 oldinfo = replace_table(t, tmp.num_counters, newinfo, &ret);
1134 if (!oldinfo)
1135 goto free_newinfo_counters_untrans_unlock;
1136
1137 /* Update module usage count based on number of rules */
1138 duprintf("do_replace: oldnum=%u, initnum=%u, newnum=%u\n",
1139 oldinfo->number, oldinfo->initial_entries, newinfo->number);
1140 if (t->me && (oldinfo->number <= oldinfo->initial_entries) &&
1141 (newinfo->number > oldinfo->initial_entries))
1142 __MOD_INC_USE_COUNT(t->me);
1143 else if (t->me && (oldinfo->number > oldinfo->initial_entries) &&
1144 (newinfo->number <= oldinfo->initial_entries))
1145 __MOD_DEC_USE_COUNT(t->me);
1146
1147 /* Get the old counters. */
1148 get_counters(oldinfo, counters);
1149 /* Decrease module usage counts and free resource */
1150 IPT_ENTRY_ITERATE(oldinfo->entries, oldinfo->size, cleanup_entry,NULL);
1151 vfree(oldinfo);
1152 /* Silent error: too late now. */
1153 copy_to_user(tmp.counters, counters,
1154 sizeof(struct ipt_counters) * tmp.num_counters);
1155 vfree(counters);
1156 up(&ipt_mutex);
1157 return 0;
1158
1159 free_newinfo_counters_untrans_unlock:
1160 up(&ipt_mutex);
1161 free_newinfo_counters_untrans:
1162 IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size, cleanup_entry,NULL);
1163 free_newinfo_counters:
1164 vfree(counters);
1165 free_newinfo:
1166 vfree(newinfo);
1167 return ret;
1168}
1169
1170/* We're lazy, and add to the first CPU; overflow works its fey magic
1171 * and everything is OK. */
1172static inline int
1173add_counter_to_entry(struct ipt_entry *e,
1174 const struct ipt_counters addme[],
1175 unsigned int *i)
1176{
1177#if 0
1178 duprintf("add_counter: Entry %u %lu/%lu + %lu/%lu\n",
1179 *i,
1180 (long unsigned int)e->counters.pcnt,
1181 (long unsigned int)e->counters.bcnt,
1182 (long unsigned int)addme[*i].pcnt,
1183 (long unsigned int)addme[*i].bcnt);
1184#endif
1185
1186 ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
1187
1188 (*i)++;
1189 return 0;
1190}
1191
1192static int
1193do_add_counters(void *user, unsigned int len)
1194{
1195 unsigned int i;
1196 struct ipt_counters_info tmp, *paddc;
1197 struct ipt_table *t;
1198 int ret;
1199
1200 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1201 return -EFAULT;
1202
1203 if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct ipt_counters))
1204 return -EINVAL;
1205
1206 paddc = vmalloc(len);
1207 if (!paddc)
1208 return -ENOMEM;
1209
1210 if (copy_from_user(paddc, user, len) != 0) {
1211 ret = -EFAULT;
1212 goto free;
1213 }
1214
1215 t = find_table_lock(tmp.name, &ret, &ipt_mutex);
1216 if (!t)
1217 goto free;
1218
1219 write_lock_bh(&t->lock);
1220 if (t->private->number != paddc->num_counters) {
1221 ret = -EINVAL;
1222 goto unlock_up_free;
1223 }
1224
1225 i = 0;
1226 IPT_ENTRY_ITERATE(t->private->entries,
1227 t->private->size,
1228 add_counter_to_entry,
1229 paddc->counters,
1230 &i);
1231 unlock_up_free:
1232 write_unlock_bh(&t->lock);
1233 up(&ipt_mutex);
1234 free:
1235 vfree(paddc);
1236
1237 return ret;
1238}
1239
1240static int
1241do_ipt_set_ctl(struct sock *sk, int cmd, void *user, unsigned int len)
1242{
1243 int ret;
1244
1245 if (!capable(CAP_NET_ADMIN))
1246 return -EPERM;
1247
1248 switch (cmd) {
1249 case IPT_SO_SET_REPLACE:
1250 ret = do_replace(user, len);
1251 break;
1252
1253 case IPT_SO_SET_ADD_COUNTERS:
1254 ret = do_add_counters(user, len);
1255 break;
1256
1257 default:
1258 duprintf("do_ipt_set_ctl: unknown request %i\n", cmd);
1259 ret = -EINVAL;
1260 }
1261
1262 return ret;
1263}
1264
1265static int
1266do_ipt_get_ctl(struct sock *sk, int cmd, void *user, int *len)
1267{
1268 int ret;
1269
1270 if (!capable(CAP_NET_ADMIN))
1271 return -EPERM;
1272
1273 switch (cmd) {
1274 case IPT_SO_GET_INFO: {
1275 char name[IPT_TABLE_MAXNAMELEN];
1276 struct ipt_table *t;
1277
1278 if (*len != sizeof(struct ipt_getinfo)) {
1279 duprintf("length %u != %u\n", *len,
1280 sizeof(struct ipt_getinfo));
1281 ret = -EINVAL;
1282 break;
1283 }
1284
1285 if (copy_from_user(name, user, sizeof(name)) != 0) {
1286 ret = -EFAULT;
1287 break;
1288 }
1289 name[IPT_TABLE_MAXNAMELEN-1] = '\0';
1290 t = find_table_lock(name, &ret, &ipt_mutex);
1291 if (t) {
1292 struct ipt_getinfo info;
1293
1294 info.valid_hooks = t->valid_hooks;
1295 memcpy(info.hook_entry, t->private->hook_entry,
1296 sizeof(info.hook_entry));
1297 memcpy(info.underflow, t->private->underflow,
1298 sizeof(info.underflow));
1299 info.num_entries = t->private->number;
1300 info.size = t->private->size;
1301 strcpy(info.name, name);
1302
1303 if (copy_to_user(user, &info, *len) != 0)
1304 ret = -EFAULT;
1305 else
1306 ret = 0;
1307
1308 up(&ipt_mutex);
1309 }
1310 }
1311 break;
1312
1313 case IPT_SO_GET_ENTRIES: {
1314 struct ipt_get_entries get;
1315
1316 if (*len < sizeof(get)) {
1317 duprintf("get_entries: %u < %u\n", *len, sizeof(get));
1318 ret = -EINVAL;
1319 } else if (copy_from_user(&get, user, sizeof(get)) != 0) {
1320 ret = -EFAULT;
1321 } else if (*len != sizeof(struct ipt_get_entries) + get.size) {
1322 duprintf("get_entries: %u != %u\n", *len,
1323 sizeof(struct ipt_get_entries) + get.size);
1324 ret = -EINVAL;
1325 } else
1326 ret = get_entries(&get, user);
1327 break;
1328 }
1329
1330 default:
1331 duprintf("do_ipt_get_ctl: unknown request %i\n", cmd);
1332 ret = -EINVAL;
1333 }
1334
1335 return ret;
1336}
1337
1338/* Registration hooks for targets. */
1339int
1340ipt_register_target(struct ipt_target *target)
1341{
1342 int ret;
1343
1344 MOD_INC_USE_COUNT;
1345 ret = down_interruptible(&ipt_mutex);
1346 if (ret != 0) {
1347 MOD_DEC_USE_COUNT;
1348 return ret;
1349 }
1350 if (!list_named_insert(&ipt_target, target)) {
1351 duprintf("ipt_register_target: `%s' already in list!\n",
1352 target->name);
1353 ret = -EINVAL;
1354 MOD_DEC_USE_COUNT;
1355 }
1356 up(&ipt_mutex);
1357 return ret;
1358}
1359
1360void
1361ipt_unregister_target(struct ipt_target *target)
1362{
1363 down(&ipt_mutex);
1364 LIST_DELETE(&ipt_target, target);
1365 up(&ipt_mutex);
1366 MOD_DEC_USE_COUNT;
1367}
1368
1369int
1370ipt_register_match(struct ipt_match *match)
1371{
1372 int ret;
1373
1374 MOD_INC_USE_COUNT;
1375 ret = down_interruptible(&ipt_mutex);
1376 if (ret != 0) {
1377 MOD_DEC_USE_COUNT;
1378 return ret;
1379 }
1380 if (!list_named_insert(&ipt_match, match)) {
1381 duprintf("ipt_register_match: `%s' already in list!\n",
1382 match->name);
1383 MOD_DEC_USE_COUNT;
1384 ret = -EINVAL;
1385 }
1386 up(&ipt_mutex);
1387
1388 return ret;
1389}
1390
1391void
1392ipt_unregister_match(struct ipt_match *match)
1393{
1394 down(&ipt_mutex);
1395 LIST_DELETE(&ipt_match, match);
1396 up(&ipt_mutex);
1397 MOD_DEC_USE_COUNT;
1398}
1399
1400int ipt_register_table(struct ipt_table *table)
1401{
1402 int ret;
1403 struct ipt_table_info *newinfo;
1404 static struct ipt_table_info bootstrap
1405 = { 0, 0, 0, { 0 }, { 0 }, { } };
1406
1407 MOD_INC_USE_COUNT;
1408 newinfo = vmalloc(sizeof(struct ipt_table_info)
1409 + SMP_ALIGN(table->table->size) * NR_CPUS);
1410 if (!newinfo) {
1411 ret = -ENOMEM;
1412 MOD_DEC_USE_COUNT;
1413 return ret;
1414 }
1415 memcpy(newinfo->entries, table->table->entries, table->table->size);
1416
1417 ret = translate_table(table->name, table->valid_hooks,
1418 newinfo, table->table->size,
1419 table->table->num_entries,
1420 table->table->hook_entry,
1421 table->table->underflow);
1422 if (ret != 0) {
1423 vfree(newinfo);
1424 MOD_DEC_USE_COUNT;
1425 return ret;
1426 }
1427
1428 ret = down_interruptible(&ipt_mutex);
1429 if (ret != 0) {
1430 vfree(newinfo);
1431 MOD_DEC_USE_COUNT;
1432 return ret;
1433 }
1434
1435 /* Don't autoload: we'd eat our tail... */
1436 if (list_named_find(&ipt_tables, table->name)) {
1437 ret = -EEXIST;
1438 goto free_unlock;
1439 }
1440
1441 /* Simplifies replace_table code. */
1442 table->private = &bootstrap;
1443 if (!replace_table(table, 0, newinfo, &ret))
1444 goto free_unlock;
1445
1446 duprintf("table->private->number = %u\n",
1447 table->private->number);
1448
1449 /* save number of initial entries */
1450 table->private->initial_entries = table->private->number;
1451
1452 table->lock = RW_LOCK_UNLOCKED;
1453 list_prepend(&ipt_tables, table);
1454
1455 unlock:
1456 up(&ipt_mutex);
1457 return ret;
1458
1459 free_unlock:
1460 vfree(newinfo);
1461 MOD_DEC_USE_COUNT;
1462 goto unlock;
1463}
1464
1465void ipt_unregister_table(struct ipt_table *table)
1466{
1467 down(&ipt_mutex);
1468 LIST_DELETE(&ipt_tables, table);
1469 up(&ipt_mutex);
1470
1471 /* Decrease module usage counts and free resources */
1472 IPT_ENTRY_ITERATE(table->private->entries, table->private->size,
1473 cleanup_entry, NULL);
1474 vfree(table->private);
1475 MOD_DEC_USE_COUNT;
1476}
1477
1478/* Returns 1 if the port is matched by the range, 0 otherwise */
1479static inline int
1480port_match(u_int16_t min, u_int16_t max, u_int16_t port, int invert)
1481{
1482 int ret;
1483
1484 ret = (port >= min && port <= max) ^ invert;
1485 return ret;
1486}
1487
1488static int
1489tcp_find_option(u_int8_t option,
1490 const struct tcphdr *tcp,
1491 u_int16_t datalen,
1492 int invert,
1493 int *hotdrop)
1494{
1495 unsigned int i = sizeof(struct tcphdr);
1496 const u_int8_t *opt = (u_int8_t *)tcp;
1497
1498 duprintf("tcp_match: finding option\n");
1499 /* If we don't have the whole header, drop packet. */
1500 if (tcp->doff * 4 > datalen) {
1501 *hotdrop = 1;
1502 return 0;
1503 }
1504
1505 while (i < tcp->doff * 4) {
1506 if (opt[i] == option) return !invert;
1507 if (opt[i] < 2) i++;
1508 else i += opt[i+1]?:1;
1509 }
1510
1511 return invert;
1512}
1513
1514static int
1515tcp_match(const struct sk_buff *skb,
1516 const struct net_device *in,
1517 const struct net_device *out,
1518 const void *matchinfo,
1519 int offset,
1520 const void *hdr,
1521 u_int16_t datalen,
1522 int *hotdrop)
1523{
1524 const struct tcphdr *tcp = hdr;
1525 const struct ipt_tcp *tcpinfo = matchinfo;
1526
1527 /* To quote Alan:
1528
1529 Don't allow a fragment of TCP 8 bytes in. Nobody normal
1530 causes this. Its a cracker trying to break in by doing a
1531 flag overwrite to pass the direction checks.
1532 */
1533
1534 if (offset == 1) {
1535 duprintf("Dropping evil TCP offset=1 frag.\n");
1536 *hotdrop = 1;
1537 return 0;
1538 } else if (offset == 0 && datalen < sizeof(struct tcphdr)) {
1539 /* We've been asked to examine this packet, and we
1540 can't. Hence, no choice but to drop. */
1541 duprintf("Dropping evil TCP offset=0 tinygram.\n");
1542 *hotdrop = 1;
1543 return 0;
1544 }
1545
1546 /* FIXME: Try tcp doff >> packet len against various stacks --RR */
1547
1548#define FWINVTCP(bool,invflg) ((bool) ^ !!(tcpinfo->invflags & invflg))
1549
1550 /* Must not be a fragment. */
1551 return !offset
1552 && port_match(tcpinfo->spts[0], tcpinfo->spts[1],
1553 ntohs(tcp->source),
1554 !!(tcpinfo->invflags & IPT_TCP_INV_SRCPT))
1555 && port_match(tcpinfo->dpts[0], tcpinfo->dpts[1],
1556 ntohs(tcp->dest),
1557 !!(tcpinfo->invflags & IPT_TCP_INV_DSTPT))
1558 && FWINVTCP((((unsigned char *)tcp)[13]
1559 & tcpinfo->flg_mask)
1560 == tcpinfo->flg_cmp,
1561 IPT_TCP_INV_FLAGS)
1562 && (!tcpinfo->option
1563 || tcp_find_option(tcpinfo->option, tcp, datalen,
1564 tcpinfo->invflags
1565 & IPT_TCP_INV_OPTION,
1566 hotdrop));
1567}
1568
1569/* Called when user tries to insert an entry of this type. */
1570static int
1571tcp_checkentry(const char *tablename,
1572 const struct ipt_ip *ip,
1573 void *matchinfo,
1574 unsigned int matchsize,
1575 unsigned int hook_mask)
1576{
1577 const struct ipt_tcp *tcpinfo = matchinfo;
1578
1579 /* Must specify proto == TCP, and no unknown invflags */
1580 return ip->proto == IPPROTO_TCP
1581 && !(ip->invflags & IPT_INV_PROTO)
1582 && matchsize == IPT_ALIGN(sizeof(struct ipt_tcp))
1583 && !(tcpinfo->invflags & ~IPT_TCP_INV_MASK);
1584}
1585
1586static int
1587udp_match(const struct sk_buff *skb,
1588 const struct net_device *in,
1589 const struct net_device *out,
1590 const void *matchinfo,
1591 int offset,
1592 const void *hdr,
1593 u_int16_t datalen,
1594 int *hotdrop)
1595{
1596 const struct udphdr *udp = hdr;
1597 const struct ipt_udp *udpinfo = matchinfo;
1598
1599 if (offset == 0 && datalen < sizeof(struct udphdr)) {
1600 /* We've been asked to examine this packet, and we
1601 can't. Hence, no choice but to drop. */
1602 duprintf("Dropping evil UDP tinygram.\n");
1603 *hotdrop = 1;
1604 return 0;
1605 }
1606
1607 /* Must not be a fragment. */
1608 return !offset
1609 && port_match(udpinfo->spts[0], udpinfo->spts[1],
1610 ntohs(udp->source),
1611 !!(udpinfo->invflags & IPT_UDP_INV_SRCPT))
1612 && port_match(udpinfo->dpts[0], udpinfo->dpts[1],
1613 ntohs(udp->dest),
1614 !!(udpinfo->invflags & IPT_UDP_INV_DSTPT));
1615}
1616
1617/* Called when user tries to insert an entry of this type. */
1618static int
1619udp_checkentry(const char *tablename,
1620 const struct ipt_ip *ip,
1621 void *matchinfo,
1622 unsigned int matchinfosize,
1623 unsigned int hook_mask)
1624{
1625 const struct ipt_udp *udpinfo = matchinfo;
1626
1627 /* Must specify proto == UDP, and no unknown invflags */
1628 if (ip->proto != IPPROTO_UDP || (ip->invflags & IPT_INV_PROTO)) {
1629 duprintf("ipt_udp: Protocol %u != %u\n", ip->proto,
1630 IPPROTO_UDP);
1631 return 0;
1632 }
1633 if (matchinfosize != IPT_ALIGN(sizeof(struct ipt_udp))) {
1634 duprintf("ipt_udp: matchsize %u != %u\n",
1635 matchinfosize, IPT_ALIGN(sizeof(struct ipt_udp)));
1636 return 0;
1637 }
1638 if (udpinfo->invflags & ~IPT_UDP_INV_MASK) {
1639 duprintf("ipt_udp: unknown flags %X\n",
1640 udpinfo->invflags);
1641 return 0;
1642 }
1643
1644 return 1;
1645}
1646
1647/* Returns 1 if the type and code is matched by the range, 0 otherwise */
1648static inline int
1649icmp_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code,
1650 u_int8_t type, u_int8_t code,
1651 int invert)
1652{
1653 return (type == test_type && code >= min_code && code <= max_code)
1654 ^ invert;
1655}
1656
1657static int
1658icmp_match(const struct sk_buff *skb,
1659 const struct net_device *in,
1660 const struct net_device *out,
1661 const void *matchinfo,
1662 int offset,
1663 const void *hdr,
1664 u_int16_t datalen,
1665 int *hotdrop)
1666{
1667 const struct icmphdr *icmp = hdr;
1668 const struct ipt_icmp *icmpinfo = matchinfo;
1669
1670 if (offset == 0 && datalen < 2) {
1671 /* We've been asked to examine this packet, and we
1672 can't. Hence, no choice but to drop. */
1673 duprintf("Dropping evil ICMP tinygram.\n");
1674 *hotdrop = 1;
1675 return 0;
1676 }
1677
1678 /* Must not be a fragment. */
1679 return !offset
1680 && icmp_type_code_match(icmpinfo->type,
1681 icmpinfo->code[0],
1682 icmpinfo->code[1],
1683 icmp->type, icmp->code,
1684 !!(icmpinfo->invflags&IPT_ICMP_INV));
1685}
1686
1687/* Called when user tries to insert an entry of this type. */
1688static int
1689icmp_checkentry(const char *tablename,
1690 const struct ipt_ip *ip,
1691 void *matchinfo,
1692 unsigned int matchsize,
1693 unsigned int hook_mask)
1694{
1695 const struct ipt_icmp *icmpinfo = matchinfo;
1696
1697 /* Must specify proto == ICMP, and no unknown invflags */
1698 return ip->proto == IPPROTO_ICMP
1699 && !(ip->invflags & IPT_INV_PROTO)
1700 && matchsize == IPT_ALIGN(sizeof(struct ipt_icmp))
1701 && !(icmpinfo->invflags & ~IPT_ICMP_INV);
1702}
1703
1704/* The built-in targets: standard (NULL) and error. */
1705static struct ipt_target ipt_standard_target
1706= { { NULL, NULL }, IPT_STANDARD_TARGET, NULL, NULL, NULL };
1707static struct ipt_target ipt_error_target
1708= { { NULL, NULL }, IPT_ERROR_TARGET, ipt_error, NULL, NULL };
1709
1710static struct nf_sockopt_ops ipt_sockopts
1711= { { NULL, NULL }, PF_INET, IPT_BASE_CTL, IPT_SO_SET_MAX+1, do_ipt_set_ctl,
1712 IPT_BASE_CTL, IPT_SO_GET_MAX+1, do_ipt_get_ctl, 0, NULL };
1713
1714static struct ipt_match tcp_matchstruct
1715= { { NULL, NULL }, "tcp", &tcp_match, &tcp_checkentry, NULL };
1716static struct ipt_match udp_matchstruct
1717= { { NULL, NULL }, "udp", &udp_match, &udp_checkentry, NULL };
1718static struct ipt_match icmp_matchstruct
1719= { { NULL, NULL }, "icmp", &icmp_match, &icmp_checkentry, NULL };
1720
1721#ifdef CONFIG_PROC_FS
1722static inline int print_name(const struct ipt_table *t,
1723 off_t start_offset, char *buffer, int length,
1724 off_t *pos, unsigned int *count)
1725{
1726 if ((*count)++ >= start_offset) {
1727 unsigned int namelen;
1728
1729 namelen = sprintf(buffer + *pos, "%s\n", t->name);
1730 if (*pos + namelen > length) {
1731 /* Stop iterating */
1732 return 1;
1733 }
1734 *pos += namelen;
1735 }
1736 return 0;
1737}
1738
1739static int ipt_get_tables(char *buffer, char **start, off_t offset, int length)
1740{
1741 off_t pos = 0;
1742 unsigned int count = 0;
1743
1744 if (down_interruptible(&ipt_mutex) != 0)
1745 return 0;
1746
1747 LIST_FIND(&ipt_tables, print_name, struct ipt_table *,
1748 offset, buffer, length, &pos, &count);
1749
1750 up(&ipt_mutex);
1751
1752 /* `start' hack - see fs/proc/generic.c line ~105 */
1753 *start=(char *)((unsigned long)count-offset);
1754 return pos;
1755}
1756#endif /*CONFIG_PROC_FS*/
1757
1758static int __init init(void)
1759{
1760 int ret;
1761
1762 /* Noone else will be downing sem now, so we won't sleep */
1763 down(&ipt_mutex);
1764 list_append(&ipt_target, &ipt_standard_target);
1765 list_append(&ipt_target, &ipt_error_target);
1766 list_append(&ipt_match, &tcp_matchstruct);
1767 list_append(&ipt_match, &udp_matchstruct);
1768 list_append(&ipt_match, &icmp_matchstruct);
1769 up(&ipt_mutex);
1770
1771 /* Register setsockopt */
1772 ret = nf_register_sockopt(&ipt_sockopts);
1773 if (ret < 0) {
1774 duprintf("Unable to register sockopts.\n");
1775 return ret;
1776 }
1777
1778#ifdef CONFIG_PROC_FS
1779 {
1780 struct proc_dir_entry *proc;
1781
1782 proc = proc_net_create("ip_tables_names", 0, ipt_get_tables);
1783 if (!proc) {
1784 nf_unregister_sockopt(&ipt_sockopts);
1785 return -ENOMEM;
1786 }
1787 proc->owner = THIS_MODULE;
1788 }
1789#endif
1790
1791 printk("ip_tables: (C) 2000-2002 Netfilter core team\n");
1792 return 0;
1793}
1794
1795static void __exit fini(void)
1796{
1797 nf_unregister_sockopt(&ipt_sockopts);
1798#ifdef CONFIG_PROC_FS
1799 proc_net_remove("ip_tables_names");
1800#endif
1801}
1802
1803EXPORT_SYMBOL(ipt_register_table);
1804EXPORT_SYMBOL(ipt_unregister_table);
1805EXPORT_SYMBOL(ipt_register_match);
1806EXPORT_SYMBOL(ipt_unregister_match);
1807EXPORT_SYMBOL(ipt_do_table);
1808EXPORT_SYMBOL(ipt_register_target);
1809EXPORT_SYMBOL(ipt_unregister_target);
1810
1811module_init(init);
1812module_exit(fini);
1813MODULE_LICENSE("GPL");