blob: c88dfcd38c5623792e9876810129be204e010915 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Packet matching code.
3 *
4 * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
5 * Copyright (C) 2000-2004 Netfilter Core Team <coreteam@netfilter.org>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 *
11 * 19 Jan 2002 Harald Welte <laforge@gnumonks.org>
12 * - increase module usage count as soon as we have rules inside
13 * a table
14 */
15#include <linux/config.h>
16#include <linux/cache.h>
17#include <linux/skbuff.h>
18#include <linux/kmod.h>
19#include <linux/vmalloc.h>
20#include <linux/netdevice.h>
21#include <linux/module.h>
22#include <linux/tcp.h>
23#include <linux/udp.h>
24#include <linux/icmp.h>
25#include <net/ip.h>
26#include <asm/uaccess.h>
27#include <asm/semaphore.h>
28#include <linux/proc_fs.h>
29#include <linux/err.h>
30
31#include <linux/netfilter_ipv4/ip_tables.h>
32
33MODULE_LICENSE("GPL");
34MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
35MODULE_DESCRIPTION("IPv4 packet filter");
36
37/*#define DEBUG_IP_FIREWALL*/
38/*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */
39/*#define DEBUG_IP_FIREWALL_USER*/
40
41#ifdef DEBUG_IP_FIREWALL
42#define dprintf(format, args...) printk(format , ## args)
43#else
44#define dprintf(format, args...)
45#endif
46
47#ifdef DEBUG_IP_FIREWALL_USER
48#define duprintf(format, args...) printk(format , ## args)
49#else
50#define duprintf(format, args...)
51#endif
52
53#ifdef CONFIG_NETFILTER_DEBUG
54#define IP_NF_ASSERT(x) \
55do { \
56 if (!(x)) \
57 printk("IP_NF_ASSERT: %s:%s:%u\n", \
58 __FUNCTION__, __FILE__, __LINE__); \
59} while(0)
60#else
61#define IP_NF_ASSERT(x)
62#endif
63#define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1))
64
65static DECLARE_MUTEX(ipt_mutex);
66
67/* Must have mutex */
68#define ASSERT_READ_LOCK(x) IP_NF_ASSERT(down_trylock(&ipt_mutex) != 0)
69#define ASSERT_WRITE_LOCK(x) IP_NF_ASSERT(down_trylock(&ipt_mutex) != 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -070070#include <linux/netfilter_ipv4/listhelp.h>
71
72#if 0
73/* All the better to debug you with... */
74#define static
75#define inline
76#endif
77
78/*
79 We keep a set of rules for each CPU, so we can avoid write-locking
80 them in the softirq when updating the counters and therefore
81 only need to read-lock in the softirq; doing a write_lock_bh() in user
82 context stops packets coming through and allows user context to read
83 the counters or update the rules.
84
85 To be cache friendly on SMP, we arrange them like so:
86 [ n-entries ]
87 ... cache-align padding ...
88 [ n-entries ]
89
90 Hence the start of any table is given by get_table() below. */
91
92/* The table itself */
93struct ipt_table_info
94{
95 /* Size per table */
96 unsigned int size;
97 /* Number of entries: FIXME. --RR */
98 unsigned int number;
99 /* Initial number of entries. Needed for module usage count */
100 unsigned int initial_entries;
101
102 /* Entry points and underflows */
103 unsigned int hook_entry[NF_IP_NUMHOOKS];
104 unsigned int underflow[NF_IP_NUMHOOKS];
105
106 /* ipt_entry tables: one per CPU */
107 char entries[0] ____cacheline_aligned;
108};
109
110static LIST_HEAD(ipt_target);
111static LIST_HEAD(ipt_match);
112static LIST_HEAD(ipt_tables);
113#define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0)
114
115#ifdef CONFIG_SMP
116#define TABLE_OFFSET(t,p) (SMP_ALIGN((t)->size)*(p))
117#else
118#define TABLE_OFFSET(t,p) 0
119#endif
120
121#if 0
122#define down(x) do { printk("DOWN:%u:" #x "\n", __LINE__); down(x); } while(0)
123#define down_interruptible(x) ({ int __r; printk("DOWNi:%u:" #x "\n", __LINE__); __r = down_interruptible(x); if (__r != 0) printk("ABORT-DOWNi:%u\n", __LINE__); __r; })
124#define up(x) do { printk("UP:%u:" #x "\n", __LINE__); up(x); } while(0)
125#endif
126
127/* Returns whether matches rule or not. */
128static inline int
129ip_packet_match(const struct iphdr *ip,
130 const char *indev,
131 const char *outdev,
132 const struct ipt_ip *ipinfo,
133 int isfrag)
134{
135 size_t i;
136 unsigned long ret;
137
138#define FWINV(bool,invflg) ((bool) ^ !!(ipinfo->invflags & invflg))
139
140 if (FWINV((ip->saddr&ipinfo->smsk.s_addr) != ipinfo->src.s_addr,
141 IPT_INV_SRCIP)
142 || FWINV((ip->daddr&ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr,
143 IPT_INV_DSTIP)) {
144 dprintf("Source or dest mismatch.\n");
145
146 dprintf("SRC: %u.%u.%u.%u. Mask: %u.%u.%u.%u. Target: %u.%u.%u.%u.%s\n",
147 NIPQUAD(ip->saddr),
148 NIPQUAD(ipinfo->smsk.s_addr),
149 NIPQUAD(ipinfo->src.s_addr),
150 ipinfo->invflags & IPT_INV_SRCIP ? " (INV)" : "");
151 dprintf("DST: %u.%u.%u.%u Mask: %u.%u.%u.%u Target: %u.%u.%u.%u.%s\n",
152 NIPQUAD(ip->daddr),
153 NIPQUAD(ipinfo->dmsk.s_addr),
154 NIPQUAD(ipinfo->dst.s_addr),
155 ipinfo->invflags & IPT_INV_DSTIP ? " (INV)" : "");
156 return 0;
157 }
158
159 /* Look for ifname matches; this should unroll nicely. */
160 for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
161 ret |= (((const unsigned long *)indev)[i]
162 ^ ((const unsigned long *)ipinfo->iniface)[i])
163 & ((const unsigned long *)ipinfo->iniface_mask)[i];
164 }
165
166 if (FWINV(ret != 0, IPT_INV_VIA_IN)) {
167 dprintf("VIA in mismatch (%s vs %s).%s\n",
168 indev, ipinfo->iniface,
169 ipinfo->invflags&IPT_INV_VIA_IN ?" (INV)":"");
170 return 0;
171 }
172
173 for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
174 ret |= (((const unsigned long *)outdev)[i]
175 ^ ((const unsigned long *)ipinfo->outiface)[i])
176 & ((const unsigned long *)ipinfo->outiface_mask)[i];
177 }
178
179 if (FWINV(ret != 0, IPT_INV_VIA_OUT)) {
180 dprintf("VIA out mismatch (%s vs %s).%s\n",
181 outdev, ipinfo->outiface,
182 ipinfo->invflags&IPT_INV_VIA_OUT ?" (INV)":"");
183 return 0;
184 }
185
186 /* Check specific protocol */
187 if (ipinfo->proto
188 && FWINV(ip->protocol != ipinfo->proto, IPT_INV_PROTO)) {
189 dprintf("Packet protocol %hi does not match %hi.%s\n",
190 ip->protocol, ipinfo->proto,
191 ipinfo->invflags&IPT_INV_PROTO ? " (INV)":"");
192 return 0;
193 }
194
195 /* If we have a fragment rule but the packet is not a fragment
196 * then we return zero */
197 if (FWINV((ipinfo->flags&IPT_F_FRAG) && !isfrag, IPT_INV_FRAG)) {
198 dprintf("Fragment rule but not fragment.%s\n",
199 ipinfo->invflags & IPT_INV_FRAG ? " (INV)" : "");
200 return 0;
201 }
202
203 return 1;
204}
205
206static inline int
207ip_checkentry(const struct ipt_ip *ip)
208{
209 if (ip->flags & ~IPT_F_MASK) {
210 duprintf("Unknown flag bits set: %08X\n",
211 ip->flags & ~IPT_F_MASK);
212 return 0;
213 }
214 if (ip->invflags & ~IPT_INV_MASK) {
215 duprintf("Unknown invflag bits set: %08X\n",
216 ip->invflags & ~IPT_INV_MASK);
217 return 0;
218 }
219 return 1;
220}
221
222static unsigned int
223ipt_error(struct sk_buff **pskb,
224 const struct net_device *in,
225 const struct net_device *out,
226 unsigned int hooknum,
227 const void *targinfo,
228 void *userinfo)
229{
230 if (net_ratelimit())
231 printk("ip_tables: error: `%s'\n", (char *)targinfo);
232
233 return NF_DROP;
234}
235
236static inline
237int do_match(struct ipt_entry_match *m,
238 const struct sk_buff *skb,
239 const struct net_device *in,
240 const struct net_device *out,
241 int offset,
242 int *hotdrop)
243{
244 /* Stop iteration if it doesn't match */
245 if (!m->u.kernel.match->match(skb, in, out, m->data, offset, hotdrop))
246 return 1;
247 else
248 return 0;
249}
250
251static inline struct ipt_entry *
252get_entry(void *base, unsigned int offset)
253{
254 return (struct ipt_entry *)(base + offset);
255}
256
257/* Returns one of the generic firewall policies, like NF_ACCEPT. */
258unsigned int
259ipt_do_table(struct sk_buff **pskb,
260 unsigned int hook,
261 const struct net_device *in,
262 const struct net_device *out,
263 struct ipt_table *table,
264 void *userdata)
265{
266 static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
267 u_int16_t offset;
268 struct iphdr *ip;
269 u_int16_t datalen;
270 int hotdrop = 0;
271 /* Initializing verdict to NF_DROP keeps gcc happy. */
272 unsigned int verdict = NF_DROP;
273 const char *indev, *outdev;
274 void *table_base;
275 struct ipt_entry *e, *back;
276
277 /* Initialization */
278 ip = (*pskb)->nh.iph;
279 datalen = (*pskb)->len - ip->ihl * 4;
280 indev = in ? in->name : nulldevname;
281 outdev = out ? out->name : nulldevname;
282 /* We handle fragments by dealing with the first fragment as
283 * if it was a normal packet. All other fragments are treated
284 * normally, except that they will NEVER match rules that ask
285 * things we don't know, ie. tcp syn flag or ports). If the
286 * rule is also a fragment-specific rule, non-fragments won't
287 * match it. */
288 offset = ntohs(ip->frag_off) & IP_OFFSET;
289
290 read_lock_bh(&table->lock);
291 IP_NF_ASSERT(table->valid_hooks & (1 << hook));
292 table_base = (void *)table->private->entries
293 + TABLE_OFFSET(table->private, smp_processor_id());
294 e = get_entry(table_base, table->private->hook_entry[hook]);
295
296#ifdef CONFIG_NETFILTER_DEBUG
297 /* Check noone else using our table */
298 if (((struct ipt_entry *)table_base)->comefrom != 0xdead57ac
299 && ((struct ipt_entry *)table_base)->comefrom != 0xeeeeeeec) {
300 printk("ASSERT: CPU #%u, %s comefrom(%p) = %X\n",
301 smp_processor_id(),
302 table->name,
303 &((struct ipt_entry *)table_base)->comefrom,
304 ((struct ipt_entry *)table_base)->comefrom);
305 }
306 ((struct ipt_entry *)table_base)->comefrom = 0x57acc001;
307#endif
308
309 /* For return from builtin chain */
310 back = get_entry(table_base, table->private->underflow[hook]);
311
312 do {
313 IP_NF_ASSERT(e);
314 IP_NF_ASSERT(back);
315 (*pskb)->nfcache |= e->nfcache;
316 if (ip_packet_match(ip, indev, outdev, &e->ip, offset)) {
317 struct ipt_entry_target *t;
318
319 if (IPT_MATCH_ITERATE(e, do_match,
320 *pskb, in, out,
321 offset, &hotdrop) != 0)
322 goto no_match;
323
324 ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1);
325
326 t = ipt_get_target(e);
327 IP_NF_ASSERT(t->u.kernel.target);
328 /* Standard target? */
329 if (!t->u.kernel.target->target) {
330 int v;
331
332 v = ((struct ipt_standard_target *)t)->verdict;
333 if (v < 0) {
334 /* Pop from stack? */
335 if (v != IPT_RETURN) {
336 verdict = (unsigned)(-v) - 1;
337 break;
338 }
339 e = back;
340 back = get_entry(table_base,
341 back->comefrom);
342 continue;
343 }
344 if (table_base + v
345 != (void *)e + e->next_offset) {
346 /* Save old back ptr in next entry */
347 struct ipt_entry *next
348 = (void *)e + e->next_offset;
349 next->comefrom
350 = (void *)back - table_base;
351 /* set back pointer to next entry */
352 back = next;
353 }
354
355 e = get_entry(table_base, v);
356 } else {
357 /* Targets which reenter must return
358 abs. verdicts */
359#ifdef CONFIG_NETFILTER_DEBUG
360 ((struct ipt_entry *)table_base)->comefrom
361 = 0xeeeeeeec;
362#endif
363 verdict = t->u.kernel.target->target(pskb,
364 in, out,
365 hook,
366 t->data,
367 userdata);
368
369#ifdef CONFIG_NETFILTER_DEBUG
370 if (((struct ipt_entry *)table_base)->comefrom
371 != 0xeeeeeeec
372 && verdict == IPT_CONTINUE) {
373 printk("Target %s reentered!\n",
374 t->u.kernel.target->name);
375 verdict = NF_DROP;
376 }
377 ((struct ipt_entry *)table_base)->comefrom
378 = 0x57acc001;
379#endif
380 /* Target might have changed stuff. */
381 ip = (*pskb)->nh.iph;
382 datalen = (*pskb)->len - ip->ihl * 4;
383
384 if (verdict == IPT_CONTINUE)
385 e = (void *)e + e->next_offset;
386 else
387 /* Verdict */
388 break;
389 }
390 } else {
391
392 no_match:
393 e = (void *)e + e->next_offset;
394 }
395 } while (!hotdrop);
396
397#ifdef CONFIG_NETFILTER_DEBUG
398 ((struct ipt_entry *)table_base)->comefrom = 0xdead57ac;
399#endif
400 read_unlock_bh(&table->lock);
401
402#ifdef DEBUG_ALLOW_ALL
403 return NF_ACCEPT;
404#else
405 if (hotdrop)
406 return NF_DROP;
407 else return verdict;
408#endif
409}
410
411/*
412 * These are weird, but module loading must not be done with mutex
413 * held (since they will register), and we have to have a single
414 * function to use try_then_request_module().
415 */
416
417/* Find table by name, grabs mutex & ref. Returns ERR_PTR() on error. */
418static inline struct ipt_table *find_table_lock(const char *name)
419{
420 struct ipt_table *t;
421
422 if (down_interruptible(&ipt_mutex) != 0)
423 return ERR_PTR(-EINTR);
424
425 list_for_each_entry(t, &ipt_tables, list)
426 if (strcmp(t->name, name) == 0 && try_module_get(t->me))
427 return t;
428 up(&ipt_mutex);
429 return NULL;
430}
431
432/* Find match, grabs ref. Returns ERR_PTR() on error. */
433static inline struct ipt_match *find_match(const char *name, u8 revision)
434{
435 struct ipt_match *m;
436 int err = 0;
437
438 if (down_interruptible(&ipt_mutex) != 0)
439 return ERR_PTR(-EINTR);
440
441 list_for_each_entry(m, &ipt_match, list) {
442 if (strcmp(m->name, name) == 0) {
443 if (m->revision == revision) {
444 if (try_module_get(m->me)) {
445 up(&ipt_mutex);
446 return m;
447 }
448 } else
449 err = -EPROTOTYPE; /* Found something. */
450 }
451 }
452 up(&ipt_mutex);
453 return ERR_PTR(err);
454}
455
456/* Find target, grabs ref. Returns ERR_PTR() on error. */
457static inline struct ipt_target *find_target(const char *name, u8 revision)
458{
459 struct ipt_target *t;
460 int err = 0;
461
462 if (down_interruptible(&ipt_mutex) != 0)
463 return ERR_PTR(-EINTR);
464
465 list_for_each_entry(t, &ipt_target, list) {
466 if (strcmp(t->name, name) == 0) {
467 if (t->revision == revision) {
468 if (try_module_get(t->me)) {
469 up(&ipt_mutex);
470 return t;
471 }
472 } else
473 err = -EPROTOTYPE; /* Found something. */
474 }
475 }
476 up(&ipt_mutex);
477 return ERR_PTR(err);
478}
479
480struct ipt_target *ipt_find_target(const char *name, u8 revision)
481{
482 struct ipt_target *target;
483
484 target = try_then_request_module(find_target(name, revision),
485 "ipt_%s", name);
486 if (IS_ERR(target) || !target)
487 return NULL;
488 return target;
489}
490
491static int match_revfn(const char *name, u8 revision, int *bestp)
492{
493 struct ipt_match *m;
494 int have_rev = 0;
495
496 list_for_each_entry(m, &ipt_match, list) {
497 if (strcmp(m->name, name) == 0) {
498 if (m->revision > *bestp)
499 *bestp = m->revision;
500 if (m->revision == revision)
501 have_rev = 1;
502 }
503 }
504 return have_rev;
505}
506
507static int target_revfn(const char *name, u8 revision, int *bestp)
508{
509 struct ipt_target *t;
510 int have_rev = 0;
511
512 list_for_each_entry(t, &ipt_target, list) {
513 if (strcmp(t->name, name) == 0) {
514 if (t->revision > *bestp)
515 *bestp = t->revision;
516 if (t->revision == revision)
517 have_rev = 1;
518 }
519 }
520 return have_rev;
521}
522
523/* Returns true or false (if no such extension at all) */
524static inline int find_revision(const char *name, u8 revision,
525 int (*revfn)(const char *, u8, int *),
526 int *err)
527{
528 int have_rev, best = -1;
529
530 if (down_interruptible(&ipt_mutex) != 0) {
531 *err = -EINTR;
532 return 1;
533 }
534 have_rev = revfn(name, revision, &best);
535 up(&ipt_mutex);
536
537 /* Nothing at all? Return 0 to try loading module. */
538 if (best == -1) {
539 *err = -ENOENT;
540 return 0;
541 }
542
543 *err = best;
544 if (!have_rev)
545 *err = -EPROTONOSUPPORT;
546 return 1;
547}
548
549
550/* All zeroes == unconditional rule. */
551static inline int
552unconditional(const struct ipt_ip *ip)
553{
554 unsigned int i;
555
556 for (i = 0; i < sizeof(*ip)/sizeof(__u32); i++)
557 if (((__u32 *)ip)[i])
558 return 0;
559
560 return 1;
561}
562
563/* Figures out from what hook each rule can be called: returns 0 if
564 there are loops. Puts hook bitmask in comefrom. */
565static int
566mark_source_chains(struct ipt_table_info *newinfo, unsigned int valid_hooks)
567{
568 unsigned int hook;
569
570 /* No recursion; use packet counter to save back ptrs (reset
571 to 0 as we leave), and comefrom to save source hook bitmask */
572 for (hook = 0; hook < NF_IP_NUMHOOKS; hook++) {
573 unsigned int pos = newinfo->hook_entry[hook];
574 struct ipt_entry *e
575 = (struct ipt_entry *)(newinfo->entries + pos);
576
577 if (!(valid_hooks & (1 << hook)))
578 continue;
579
580 /* Set initial back pointer. */
581 e->counters.pcnt = pos;
582
583 for (;;) {
584 struct ipt_standard_target *t
585 = (void *)ipt_get_target(e);
586
587 if (e->comefrom & (1 << NF_IP_NUMHOOKS)) {
588 printk("iptables: loop hook %u pos %u %08X.\n",
589 hook, pos, e->comefrom);
590 return 0;
591 }
592 e->comefrom
593 |= ((1 << hook) | (1 << NF_IP_NUMHOOKS));
594
595 /* Unconditional return/END. */
596 if (e->target_offset == sizeof(struct ipt_entry)
597 && (strcmp(t->target.u.user.name,
598 IPT_STANDARD_TARGET) == 0)
599 && t->verdict < 0
600 && unconditional(&e->ip)) {
601 unsigned int oldpos, size;
602
603 /* Return: backtrack through the last
604 big jump. */
605 do {
606 e->comefrom ^= (1<<NF_IP_NUMHOOKS);
607#ifdef DEBUG_IP_FIREWALL_USER
608 if (e->comefrom
609 & (1 << NF_IP_NUMHOOKS)) {
610 duprintf("Back unset "
611 "on hook %u "
612 "rule %u\n",
613 hook, pos);
614 }
615#endif
616 oldpos = pos;
617 pos = e->counters.pcnt;
618 e->counters.pcnt = 0;
619
620 /* We're at the start. */
621 if (pos == oldpos)
622 goto next;
623
624 e = (struct ipt_entry *)
625 (newinfo->entries + pos);
626 } while (oldpos == pos + e->next_offset);
627
628 /* Move along one */
629 size = e->next_offset;
630 e = (struct ipt_entry *)
631 (newinfo->entries + pos + size);
632 e->counters.pcnt = pos;
633 pos += size;
634 } else {
635 int newpos = t->verdict;
636
637 if (strcmp(t->target.u.user.name,
638 IPT_STANDARD_TARGET) == 0
639 && newpos >= 0) {
640 /* This a jump; chase it. */
641 duprintf("Jump rule %u -> %u\n",
642 pos, newpos);
643 } else {
644 /* ... this is a fallthru */
645 newpos = pos + e->next_offset;
646 }
647 e = (struct ipt_entry *)
648 (newinfo->entries + newpos);
649 e->counters.pcnt = pos;
650 pos = newpos;
651 }
652 }
653 next:
654 duprintf("Finished chain %u\n", hook);
655 }
656 return 1;
657}
658
659static inline int
660cleanup_match(struct ipt_entry_match *m, unsigned int *i)
661{
662 if (i && (*i)-- == 0)
663 return 1;
664
665 if (m->u.kernel.match->destroy)
666 m->u.kernel.match->destroy(m->data,
667 m->u.match_size - sizeof(*m));
668 module_put(m->u.kernel.match->me);
669 return 0;
670}
671
672static inline int
673standard_check(const struct ipt_entry_target *t,
674 unsigned int max_offset)
675{
676 struct ipt_standard_target *targ = (void *)t;
677
678 /* Check standard info. */
679 if (t->u.target_size
680 != IPT_ALIGN(sizeof(struct ipt_standard_target))) {
681 duprintf("standard_check: target size %u != %u\n",
682 t->u.target_size,
683 IPT_ALIGN(sizeof(struct ipt_standard_target)));
684 return 0;
685 }
686
687 if (targ->verdict >= 0
688 && targ->verdict > max_offset - sizeof(struct ipt_entry)) {
689 duprintf("ipt_standard_check: bad verdict (%i)\n",
690 targ->verdict);
691 return 0;
692 }
693
694 if (targ->verdict < -NF_MAX_VERDICT - 1) {
695 duprintf("ipt_standard_check: bad negative verdict (%i)\n",
696 targ->verdict);
697 return 0;
698 }
699 return 1;
700}
701
702static inline int
703check_match(struct ipt_entry_match *m,
704 const char *name,
705 const struct ipt_ip *ip,
706 unsigned int hookmask,
707 unsigned int *i)
708{
709 struct ipt_match *match;
710
711 match = try_then_request_module(find_match(m->u.user.name,
712 m->u.user.revision),
713 "ipt_%s", m->u.user.name);
714 if (IS_ERR(match) || !match) {
715 duprintf("check_match: `%s' not found\n", m->u.user.name);
716 return match ? PTR_ERR(match) : -ENOENT;
717 }
718 m->u.kernel.match = match;
719
720 if (m->u.kernel.match->checkentry
721 && !m->u.kernel.match->checkentry(name, ip, m->data,
722 m->u.match_size - sizeof(*m),
723 hookmask)) {
724 module_put(m->u.kernel.match->me);
725 duprintf("ip_tables: check failed for `%s'.\n",
726 m->u.kernel.match->name);
727 return -EINVAL;
728 }
729
730 (*i)++;
731 return 0;
732}
733
734static struct ipt_target ipt_standard_target;
735
736static inline int
737check_entry(struct ipt_entry *e, const char *name, unsigned int size,
738 unsigned int *i)
739{
740 struct ipt_entry_target *t;
741 struct ipt_target *target;
742 int ret;
743 unsigned int j;
744
745 if (!ip_checkentry(&e->ip)) {
746 duprintf("ip_tables: ip check failed %p %s.\n", e, name);
747 return -EINVAL;
748 }
749
750 j = 0;
751 ret = IPT_MATCH_ITERATE(e, check_match, name, &e->ip, e->comefrom, &j);
752 if (ret != 0)
753 goto cleanup_matches;
754
755 t = ipt_get_target(e);
756 target = try_then_request_module(find_target(t->u.user.name,
757 t->u.user.revision),
758 "ipt_%s", t->u.user.name);
759 if (IS_ERR(target) || !target) {
760 duprintf("check_entry: `%s' not found\n", t->u.user.name);
761 ret = target ? PTR_ERR(target) : -ENOENT;
762 goto cleanup_matches;
763 }
764 t->u.kernel.target = target;
765
766 if (t->u.kernel.target == &ipt_standard_target) {
767 if (!standard_check(t, size)) {
768 ret = -EINVAL;
769 goto cleanup_matches;
770 }
771 } else if (t->u.kernel.target->checkentry
772 && !t->u.kernel.target->checkentry(name, e, t->data,
773 t->u.target_size
774 - sizeof(*t),
775 e->comefrom)) {
776 module_put(t->u.kernel.target->me);
777 duprintf("ip_tables: check failed for `%s'.\n",
778 t->u.kernel.target->name);
779 ret = -EINVAL;
780 goto cleanup_matches;
781 }
782
783 (*i)++;
784 return 0;
785
786 cleanup_matches:
787 IPT_MATCH_ITERATE(e, cleanup_match, &j);
788 return ret;
789}
790
791static inline int
792check_entry_size_and_hooks(struct ipt_entry *e,
793 struct ipt_table_info *newinfo,
794 unsigned char *base,
795 unsigned char *limit,
796 const unsigned int *hook_entries,
797 const unsigned int *underflows,
798 unsigned int *i)
799{
800 unsigned int h;
801
802 if ((unsigned long)e % __alignof__(struct ipt_entry) != 0
803 || (unsigned char *)e + sizeof(struct ipt_entry) >= limit) {
804 duprintf("Bad offset %p\n", e);
805 return -EINVAL;
806 }
807
808 if (e->next_offset
809 < sizeof(struct ipt_entry) + sizeof(struct ipt_entry_target)) {
810 duprintf("checking: element %p size %u\n",
811 e, e->next_offset);
812 return -EINVAL;
813 }
814
815 /* Check hooks & underflows */
816 for (h = 0; h < NF_IP_NUMHOOKS; h++) {
817 if ((unsigned char *)e - base == hook_entries[h])
818 newinfo->hook_entry[h] = hook_entries[h];
819 if ((unsigned char *)e - base == underflows[h])
820 newinfo->underflow[h] = underflows[h];
821 }
822
823 /* FIXME: underflows must be unconditional, standard verdicts
824 < 0 (not IPT_RETURN). --RR */
825
826 /* Clear counters and comefrom */
827 e->counters = ((struct ipt_counters) { 0, 0 });
828 e->comefrom = 0;
829
830 (*i)++;
831 return 0;
832}
833
834static inline int
835cleanup_entry(struct ipt_entry *e, unsigned int *i)
836{
837 struct ipt_entry_target *t;
838
839 if (i && (*i)-- == 0)
840 return 1;
841
842 /* Cleanup all matches */
843 IPT_MATCH_ITERATE(e, cleanup_match, NULL);
844 t = ipt_get_target(e);
845 if (t->u.kernel.target->destroy)
846 t->u.kernel.target->destroy(t->data,
847 t->u.target_size - sizeof(*t));
848 module_put(t->u.kernel.target->me);
849 return 0;
850}
851
852/* Checks and translates the user-supplied table segment (held in
853 newinfo) */
854static int
855translate_table(const char *name,
856 unsigned int valid_hooks,
857 struct ipt_table_info *newinfo,
858 unsigned int size,
859 unsigned int number,
860 const unsigned int *hook_entries,
861 const unsigned int *underflows)
862{
863 unsigned int i;
864 int ret;
865
866 newinfo->size = size;
867 newinfo->number = number;
868
869 /* Init all hooks to impossible value. */
870 for (i = 0; i < NF_IP_NUMHOOKS; i++) {
871 newinfo->hook_entry[i] = 0xFFFFFFFF;
872 newinfo->underflow[i] = 0xFFFFFFFF;
873 }
874
875 duprintf("translate_table: size %u\n", newinfo->size);
876 i = 0;
877 /* Walk through entries, checking offsets. */
878 ret = IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
879 check_entry_size_and_hooks,
880 newinfo,
881 newinfo->entries,
882 newinfo->entries + size,
883 hook_entries, underflows, &i);
884 if (ret != 0)
885 return ret;
886
887 if (i != number) {
888 duprintf("translate_table: %u not %u entries\n",
889 i, number);
890 return -EINVAL;
891 }
892
893 /* Check hooks all assigned */
894 for (i = 0; i < NF_IP_NUMHOOKS; i++) {
895 /* Only hooks which are valid */
896 if (!(valid_hooks & (1 << i)))
897 continue;
898 if (newinfo->hook_entry[i] == 0xFFFFFFFF) {
899 duprintf("Invalid hook entry %u %u\n",
900 i, hook_entries[i]);
901 return -EINVAL;
902 }
903 if (newinfo->underflow[i] == 0xFFFFFFFF) {
904 duprintf("Invalid underflow %u %u\n",
905 i, underflows[i]);
906 return -EINVAL;
907 }
908 }
909
910 if (!mark_source_chains(newinfo, valid_hooks))
911 return -ELOOP;
912
913 /* Finally, each sanity check must pass */
914 i = 0;
915 ret = IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
916 check_entry, name, size, &i);
917
918 if (ret != 0) {
919 IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
920 cleanup_entry, &i);
921 return ret;
922 }
923
924 /* And one copy for every other CPU */
925 for (i = 1; i < num_possible_cpus(); i++) {
926 memcpy(newinfo->entries + SMP_ALIGN(newinfo->size)*i,
927 newinfo->entries,
928 SMP_ALIGN(newinfo->size));
929 }
930
931 return ret;
932}
933
934static struct ipt_table_info *
935replace_table(struct ipt_table *table,
936 unsigned int num_counters,
937 struct ipt_table_info *newinfo,
938 int *error)
939{
940 struct ipt_table_info *oldinfo;
941
942#ifdef CONFIG_NETFILTER_DEBUG
943 {
944 struct ipt_entry *table_base;
945 unsigned int i;
946
947 for (i = 0; i < num_possible_cpus(); i++) {
948 table_base =
949 (void *)newinfo->entries
950 + TABLE_OFFSET(newinfo, i);
951
952 table_base->comefrom = 0xdead57ac;
953 }
954 }
955#endif
956
957 /* Do the substitution. */
958 write_lock_bh(&table->lock);
959 /* Check inside lock: is the old number correct? */
960 if (num_counters != table->private->number) {
961 duprintf("num_counters != table->private->number (%u/%u)\n",
962 num_counters, table->private->number);
963 write_unlock_bh(&table->lock);
964 *error = -EAGAIN;
965 return NULL;
966 }
967 oldinfo = table->private;
968 table->private = newinfo;
969 newinfo->initial_entries = oldinfo->initial_entries;
970 write_unlock_bh(&table->lock);
971
972 return oldinfo;
973}
974
975/* Gets counters. */
976static inline int
977add_entry_to_counter(const struct ipt_entry *e,
978 struct ipt_counters total[],
979 unsigned int *i)
980{
981 ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
982
983 (*i)++;
984 return 0;
985}
986
987static void
988get_counters(const struct ipt_table_info *t,
989 struct ipt_counters counters[])
990{
991 unsigned int cpu;
992 unsigned int i;
993
994 for (cpu = 0; cpu < num_possible_cpus(); cpu++) {
995 i = 0;
996 IPT_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu),
997 t->size,
998 add_entry_to_counter,
999 counters,
1000 &i);
1001 }
1002}
1003
1004static int
1005copy_entries_to_user(unsigned int total_size,
1006 struct ipt_table *table,
1007 void __user *userptr)
1008{
1009 unsigned int off, num, countersize;
1010 struct ipt_entry *e;
1011 struct ipt_counters *counters;
1012 int ret = 0;
1013
1014 /* We need atomic snapshot of counters: rest doesn't change
1015 (other than comefrom, which userspace doesn't care
1016 about). */
1017 countersize = sizeof(struct ipt_counters) * table->private->number;
1018 counters = vmalloc(countersize);
1019
1020 if (counters == NULL)
1021 return -ENOMEM;
1022
1023 /* First, sum counters... */
1024 memset(counters, 0, countersize);
1025 write_lock_bh(&table->lock);
1026 get_counters(table->private, counters);
1027 write_unlock_bh(&table->lock);
1028
1029 /* ... then copy entire thing from CPU 0... */
1030 if (copy_to_user(userptr, table->private->entries, total_size) != 0) {
1031 ret = -EFAULT;
1032 goto free_counters;
1033 }
1034
1035 /* FIXME: use iterator macros --RR */
1036 /* ... then go back and fix counters and names */
1037 for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
1038 unsigned int i;
1039 struct ipt_entry_match *m;
1040 struct ipt_entry_target *t;
1041
1042 e = (struct ipt_entry *)(table->private->entries + off);
1043 if (copy_to_user(userptr + off
1044 + offsetof(struct ipt_entry, counters),
1045 &counters[num],
1046 sizeof(counters[num])) != 0) {
1047 ret = -EFAULT;
1048 goto free_counters;
1049 }
1050
1051 for (i = sizeof(struct ipt_entry);
1052 i < e->target_offset;
1053 i += m->u.match_size) {
1054 m = (void *)e + i;
1055
1056 if (copy_to_user(userptr + off + i
1057 + offsetof(struct ipt_entry_match,
1058 u.user.name),
1059 m->u.kernel.match->name,
1060 strlen(m->u.kernel.match->name)+1)
1061 != 0) {
1062 ret = -EFAULT;
1063 goto free_counters;
1064 }
1065 }
1066
1067 t = ipt_get_target(e);
1068 if (copy_to_user(userptr + off + e->target_offset
1069 + offsetof(struct ipt_entry_target,
1070 u.user.name),
1071 t->u.kernel.target->name,
1072 strlen(t->u.kernel.target->name)+1) != 0) {
1073 ret = -EFAULT;
1074 goto free_counters;
1075 }
1076 }
1077
1078 free_counters:
1079 vfree(counters);
1080 return ret;
1081}
1082
1083static int
1084get_entries(const struct ipt_get_entries *entries,
1085 struct ipt_get_entries __user *uptr)
1086{
1087 int ret;
1088 struct ipt_table *t;
1089
1090 t = find_table_lock(entries->name);
1091 if (t && !IS_ERR(t)) {
1092 duprintf("t->private->number = %u\n",
1093 t->private->number);
1094 if (entries->size == t->private->size)
1095 ret = copy_entries_to_user(t->private->size,
1096 t, uptr->entrytable);
1097 else {
1098 duprintf("get_entries: I've got %u not %u!\n",
1099 t->private->size,
1100 entries->size);
1101 ret = -EINVAL;
1102 }
1103 module_put(t->me);
1104 up(&ipt_mutex);
1105 } else
1106 ret = t ? PTR_ERR(t) : -ENOENT;
1107
1108 return ret;
1109}
1110
1111static int
1112do_replace(void __user *user, unsigned int len)
1113{
1114 int ret;
1115 struct ipt_replace tmp;
1116 struct ipt_table *t;
1117 struct ipt_table_info *newinfo, *oldinfo;
1118 struct ipt_counters *counters;
1119
1120 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1121 return -EFAULT;
1122
1123 /* Hack: Causes ipchains to give correct error msg --RR */
1124 if (len != sizeof(tmp) + tmp.size)
1125 return -ENOPROTOOPT;
1126
1127 /* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */
1128 if ((SMP_ALIGN(tmp.size) >> PAGE_SHIFT) + 2 > num_physpages)
1129 return -ENOMEM;
1130
1131 newinfo = vmalloc(sizeof(struct ipt_table_info)
1132 + SMP_ALIGN(tmp.size) * num_possible_cpus());
1133 if (!newinfo)
1134 return -ENOMEM;
1135
1136 if (copy_from_user(newinfo->entries, user + sizeof(tmp),
1137 tmp.size) != 0) {
1138 ret = -EFAULT;
1139 goto free_newinfo;
1140 }
1141
1142 counters = vmalloc(tmp.num_counters * sizeof(struct ipt_counters));
1143 if (!counters) {
1144 ret = -ENOMEM;
1145 goto free_newinfo;
1146 }
1147 memset(counters, 0, tmp.num_counters * sizeof(struct ipt_counters));
1148
1149 ret = translate_table(tmp.name, tmp.valid_hooks,
1150 newinfo, tmp.size, tmp.num_entries,
1151 tmp.hook_entry, tmp.underflow);
1152 if (ret != 0)
1153 goto free_newinfo_counters;
1154
1155 duprintf("ip_tables: Translated table\n");
1156
1157 t = try_then_request_module(find_table_lock(tmp.name),
1158 "iptable_%s", tmp.name);
1159 if (!t || IS_ERR(t)) {
1160 ret = t ? PTR_ERR(t) : -ENOENT;
1161 goto free_newinfo_counters_untrans;
1162 }
1163
1164 /* You lied! */
1165 if (tmp.valid_hooks != t->valid_hooks) {
1166 duprintf("Valid hook crap: %08X vs %08X\n",
1167 tmp.valid_hooks, t->valid_hooks);
1168 ret = -EINVAL;
1169 goto put_module;
1170 }
1171
1172 oldinfo = replace_table(t, tmp.num_counters, newinfo, &ret);
1173 if (!oldinfo)
1174 goto put_module;
1175
1176 /* Update module usage count based on number of rules */
1177 duprintf("do_replace: oldnum=%u, initnum=%u, newnum=%u\n",
1178 oldinfo->number, oldinfo->initial_entries, newinfo->number);
1179 if ((oldinfo->number > oldinfo->initial_entries) ||
1180 (newinfo->number <= oldinfo->initial_entries))
1181 module_put(t->me);
1182 if ((oldinfo->number > oldinfo->initial_entries) &&
1183 (newinfo->number <= oldinfo->initial_entries))
1184 module_put(t->me);
1185
1186 /* Get the old counters. */
1187 get_counters(oldinfo, counters);
1188 /* Decrease module usage counts and free resource */
1189 IPT_ENTRY_ITERATE(oldinfo->entries, oldinfo->size, cleanup_entry,NULL);
1190 vfree(oldinfo);
1191 if (copy_to_user(tmp.counters, counters,
1192 sizeof(struct ipt_counters) * tmp.num_counters) != 0)
1193 ret = -EFAULT;
1194 vfree(counters);
1195 up(&ipt_mutex);
1196 return ret;
1197
1198 put_module:
1199 module_put(t->me);
1200 up(&ipt_mutex);
1201 free_newinfo_counters_untrans:
1202 IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size, cleanup_entry,NULL);
1203 free_newinfo_counters:
1204 vfree(counters);
1205 free_newinfo:
1206 vfree(newinfo);
1207 return ret;
1208}
1209
1210/* We're lazy, and add to the first CPU; overflow works its fey magic
1211 * and everything is OK. */
1212static inline int
1213add_counter_to_entry(struct ipt_entry *e,
1214 const struct ipt_counters addme[],
1215 unsigned int *i)
1216{
1217#if 0
1218 duprintf("add_counter: Entry %u %lu/%lu + %lu/%lu\n",
1219 *i,
1220 (long unsigned int)e->counters.pcnt,
1221 (long unsigned int)e->counters.bcnt,
1222 (long unsigned int)addme[*i].pcnt,
1223 (long unsigned int)addme[*i].bcnt);
1224#endif
1225
1226 ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
1227
1228 (*i)++;
1229 return 0;
1230}
1231
1232static int
1233do_add_counters(void __user *user, unsigned int len)
1234{
1235 unsigned int i;
1236 struct ipt_counters_info tmp, *paddc;
1237 struct ipt_table *t;
1238 int ret = 0;
1239
1240 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1241 return -EFAULT;
1242
1243 if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct ipt_counters))
1244 return -EINVAL;
1245
1246 paddc = vmalloc(len);
1247 if (!paddc)
1248 return -ENOMEM;
1249
1250 if (copy_from_user(paddc, user, len) != 0) {
1251 ret = -EFAULT;
1252 goto free;
1253 }
1254
1255 t = find_table_lock(tmp.name);
1256 if (!t || IS_ERR(t)) {
1257 ret = t ? PTR_ERR(t) : -ENOENT;
1258 goto free;
1259 }
1260
1261 write_lock_bh(&t->lock);
1262 if (t->private->number != paddc->num_counters) {
1263 ret = -EINVAL;
1264 goto unlock_up_free;
1265 }
1266
1267 i = 0;
1268 IPT_ENTRY_ITERATE(t->private->entries,
1269 t->private->size,
1270 add_counter_to_entry,
1271 paddc->counters,
1272 &i);
1273 unlock_up_free:
1274 write_unlock_bh(&t->lock);
1275 up(&ipt_mutex);
1276 module_put(t->me);
1277 free:
1278 vfree(paddc);
1279
1280 return ret;
1281}
1282
1283static int
1284do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
1285{
1286 int ret;
1287
1288 if (!capable(CAP_NET_ADMIN))
1289 return -EPERM;
1290
1291 switch (cmd) {
1292 case IPT_SO_SET_REPLACE:
1293 ret = do_replace(user, len);
1294 break;
1295
1296 case IPT_SO_SET_ADD_COUNTERS:
1297 ret = do_add_counters(user, len);
1298 break;
1299
1300 default:
1301 duprintf("do_ipt_set_ctl: unknown request %i\n", cmd);
1302 ret = -EINVAL;
1303 }
1304
1305 return ret;
1306}
1307
1308static int
1309do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
1310{
1311 int ret;
1312
1313 if (!capable(CAP_NET_ADMIN))
1314 return -EPERM;
1315
1316 switch (cmd) {
1317 case IPT_SO_GET_INFO: {
1318 char name[IPT_TABLE_MAXNAMELEN];
1319 struct ipt_table *t;
1320
1321 if (*len != sizeof(struct ipt_getinfo)) {
1322 duprintf("length %u != %u\n", *len,
1323 sizeof(struct ipt_getinfo));
1324 ret = -EINVAL;
1325 break;
1326 }
1327
1328 if (copy_from_user(name, user, sizeof(name)) != 0) {
1329 ret = -EFAULT;
1330 break;
1331 }
1332 name[IPT_TABLE_MAXNAMELEN-1] = '\0';
1333
1334 t = try_then_request_module(find_table_lock(name),
1335 "iptable_%s", name);
1336 if (t && !IS_ERR(t)) {
1337 struct ipt_getinfo info;
1338
1339 info.valid_hooks = t->valid_hooks;
1340 memcpy(info.hook_entry, t->private->hook_entry,
1341 sizeof(info.hook_entry));
1342 memcpy(info.underflow, t->private->underflow,
1343 sizeof(info.underflow));
1344 info.num_entries = t->private->number;
1345 info.size = t->private->size;
1346 memcpy(info.name, name, sizeof(info.name));
1347
1348 if (copy_to_user(user, &info, *len) != 0)
1349 ret = -EFAULT;
1350 else
1351 ret = 0;
1352 up(&ipt_mutex);
1353 module_put(t->me);
1354 } else
1355 ret = t ? PTR_ERR(t) : -ENOENT;
1356 }
1357 break;
1358
1359 case IPT_SO_GET_ENTRIES: {
1360 struct ipt_get_entries get;
1361
1362 if (*len < sizeof(get)) {
1363 duprintf("get_entries: %u < %u\n", *len, sizeof(get));
1364 ret = -EINVAL;
1365 } else if (copy_from_user(&get, user, sizeof(get)) != 0) {
1366 ret = -EFAULT;
1367 } else if (*len != sizeof(struct ipt_get_entries) + get.size) {
1368 duprintf("get_entries: %u != %u\n", *len,
1369 sizeof(struct ipt_get_entries) + get.size);
1370 ret = -EINVAL;
1371 } else
1372 ret = get_entries(&get, user);
1373 break;
1374 }
1375
1376 case IPT_SO_GET_REVISION_MATCH:
1377 case IPT_SO_GET_REVISION_TARGET: {
1378 struct ipt_get_revision rev;
1379 int (*revfn)(const char *, u8, int *);
1380
1381 if (*len != sizeof(rev)) {
1382 ret = -EINVAL;
1383 break;
1384 }
1385 if (copy_from_user(&rev, user, sizeof(rev)) != 0) {
1386 ret = -EFAULT;
1387 break;
1388 }
1389
1390 if (cmd == IPT_SO_GET_REVISION_TARGET)
1391 revfn = target_revfn;
1392 else
1393 revfn = match_revfn;
1394
1395 try_then_request_module(find_revision(rev.name, rev.revision,
1396 revfn, &ret),
1397 "ipt_%s", rev.name);
1398 break;
1399 }
1400
1401 default:
1402 duprintf("do_ipt_get_ctl: unknown request %i\n", cmd);
1403 ret = -EINVAL;
1404 }
1405
1406 return ret;
1407}
1408
1409/* Registration hooks for targets. */
1410int
1411ipt_register_target(struct ipt_target *target)
1412{
1413 int ret;
1414
1415 ret = down_interruptible(&ipt_mutex);
1416 if (ret != 0)
1417 return ret;
1418 list_add(&target->list, &ipt_target);
1419 up(&ipt_mutex);
1420 return ret;
1421}
1422
1423void
1424ipt_unregister_target(struct ipt_target *target)
1425{
1426 down(&ipt_mutex);
1427 LIST_DELETE(&ipt_target, target);
1428 up(&ipt_mutex);
1429}
1430
1431int
1432ipt_register_match(struct ipt_match *match)
1433{
1434 int ret;
1435
1436 ret = down_interruptible(&ipt_mutex);
1437 if (ret != 0)
1438 return ret;
1439
1440 list_add(&match->list, &ipt_match);
1441 up(&ipt_mutex);
1442
1443 return ret;
1444}
1445
1446void
1447ipt_unregister_match(struct ipt_match *match)
1448{
1449 down(&ipt_mutex);
1450 LIST_DELETE(&ipt_match, match);
1451 up(&ipt_mutex);
1452}
1453
1454int ipt_register_table(struct ipt_table *table, const struct ipt_replace *repl)
1455{
1456 int ret;
1457 struct ipt_table_info *newinfo;
1458 static struct ipt_table_info bootstrap
1459 = { 0, 0, 0, { 0 }, { 0 }, { } };
1460
1461 newinfo = vmalloc(sizeof(struct ipt_table_info)
1462 + SMP_ALIGN(repl->size) * num_possible_cpus());
1463 if (!newinfo)
1464 return -ENOMEM;
1465
1466 memcpy(newinfo->entries, repl->entries, repl->size);
1467
1468 ret = translate_table(table->name, table->valid_hooks,
1469 newinfo, repl->size,
1470 repl->num_entries,
1471 repl->hook_entry,
1472 repl->underflow);
1473 if (ret != 0) {
1474 vfree(newinfo);
1475 return ret;
1476 }
1477
1478 ret = down_interruptible(&ipt_mutex);
1479 if (ret != 0) {
1480 vfree(newinfo);
1481 return ret;
1482 }
1483
1484 /* Don't autoload: we'd eat our tail... */
1485 if (list_named_find(&ipt_tables, table->name)) {
1486 ret = -EEXIST;
1487 goto free_unlock;
1488 }
1489
1490 /* Simplifies replace_table code. */
1491 table->private = &bootstrap;
1492 if (!replace_table(table, 0, newinfo, &ret))
1493 goto free_unlock;
1494
1495 duprintf("table->private->number = %u\n",
1496 table->private->number);
1497
1498 /* save number of initial entries */
1499 table->private->initial_entries = table->private->number;
1500
1501 rwlock_init(&table->lock);
1502 list_prepend(&ipt_tables, table);
1503
1504 unlock:
1505 up(&ipt_mutex);
1506 return ret;
1507
1508 free_unlock:
1509 vfree(newinfo);
1510 goto unlock;
1511}
1512
1513void ipt_unregister_table(struct ipt_table *table)
1514{
1515 down(&ipt_mutex);
1516 LIST_DELETE(&ipt_tables, table);
1517 up(&ipt_mutex);
1518
1519 /* Decrease module usage counts and free resources */
1520 IPT_ENTRY_ITERATE(table->private->entries, table->private->size,
1521 cleanup_entry, NULL);
1522 vfree(table->private);
1523}
1524
1525/* Returns 1 if the port is matched by the range, 0 otherwise */
1526static inline int
1527port_match(u_int16_t min, u_int16_t max, u_int16_t port, int invert)
1528{
1529 int ret;
1530
1531 ret = (port >= min && port <= max) ^ invert;
1532 return ret;
1533}
1534
1535static int
1536tcp_find_option(u_int8_t option,
1537 const struct sk_buff *skb,
1538 unsigned int optlen,
1539 int invert,
1540 int *hotdrop)
1541{
1542 /* tcp.doff is only 4 bits, ie. max 15 * 4 bytes */
1543 u_int8_t _opt[60 - sizeof(struct tcphdr)], *op;
1544 unsigned int i;
1545
1546 duprintf("tcp_match: finding option\n");
1547
1548 if (!optlen)
1549 return invert;
1550
1551 /* If we don't have the whole header, drop packet. */
1552 op = skb_header_pointer(skb,
1553 skb->nh.iph->ihl*4 + sizeof(struct tcphdr),
1554 optlen, _opt);
1555 if (op == NULL) {
1556 *hotdrop = 1;
1557 return 0;
1558 }
1559
1560 for (i = 0; i < optlen; ) {
1561 if (op[i] == option) return !invert;
1562 if (op[i] < 2) i++;
1563 else i += op[i+1]?:1;
1564 }
1565
1566 return invert;
1567}
1568
1569static int
1570tcp_match(const struct sk_buff *skb,
1571 const struct net_device *in,
1572 const struct net_device *out,
1573 const void *matchinfo,
1574 int offset,
1575 int *hotdrop)
1576{
1577 struct tcphdr _tcph, *th;
1578 const struct ipt_tcp *tcpinfo = matchinfo;
1579
1580 if (offset) {
1581 /* To quote Alan:
1582
1583 Don't allow a fragment of TCP 8 bytes in. Nobody normal
1584 causes this. Its a cracker trying to break in by doing a
1585 flag overwrite to pass the direction checks.
1586 */
1587 if (offset == 1) {
1588 duprintf("Dropping evil TCP offset=1 frag.\n");
1589 *hotdrop = 1;
1590 }
1591 /* Must not be a fragment. */
1592 return 0;
1593 }
1594
1595#define FWINVTCP(bool,invflg) ((bool) ^ !!(tcpinfo->invflags & invflg))
1596
1597 th = skb_header_pointer(skb, skb->nh.iph->ihl*4,
1598 sizeof(_tcph), &_tcph);
1599 if (th == NULL) {
1600 /* We've been asked to examine this packet, and we
1601 can't. Hence, no choice but to drop. */
1602 duprintf("Dropping evil TCP offset=0 tinygram.\n");
1603 *hotdrop = 1;
1604 return 0;
1605 }
1606
1607 if (!port_match(tcpinfo->spts[0], tcpinfo->spts[1],
1608 ntohs(th->source),
1609 !!(tcpinfo->invflags & IPT_TCP_INV_SRCPT)))
1610 return 0;
1611 if (!port_match(tcpinfo->dpts[0], tcpinfo->dpts[1],
1612 ntohs(th->dest),
1613 !!(tcpinfo->invflags & IPT_TCP_INV_DSTPT)))
1614 return 0;
1615 if (!FWINVTCP((((unsigned char *)th)[13] & tcpinfo->flg_mask)
1616 == tcpinfo->flg_cmp,
1617 IPT_TCP_INV_FLAGS))
1618 return 0;
1619 if (tcpinfo->option) {
1620 if (th->doff * 4 < sizeof(_tcph)) {
1621 *hotdrop = 1;
1622 return 0;
1623 }
1624 if (!tcp_find_option(tcpinfo->option, skb,
1625 th->doff*4 - sizeof(_tcph),
1626 tcpinfo->invflags & IPT_TCP_INV_OPTION,
1627 hotdrop))
1628 return 0;
1629 }
1630 return 1;
1631}
1632
1633/* Called when user tries to insert an entry of this type. */
1634static int
1635tcp_checkentry(const char *tablename,
1636 const struct ipt_ip *ip,
1637 void *matchinfo,
1638 unsigned int matchsize,
1639 unsigned int hook_mask)
1640{
1641 const struct ipt_tcp *tcpinfo = matchinfo;
1642
1643 /* Must specify proto == TCP, and no unknown invflags */
1644 return ip->proto == IPPROTO_TCP
1645 && !(ip->invflags & IPT_INV_PROTO)
1646 && matchsize == IPT_ALIGN(sizeof(struct ipt_tcp))
1647 && !(tcpinfo->invflags & ~IPT_TCP_INV_MASK);
1648}
1649
1650static int
1651udp_match(const struct sk_buff *skb,
1652 const struct net_device *in,
1653 const struct net_device *out,
1654 const void *matchinfo,
1655 int offset,
1656 int *hotdrop)
1657{
1658 struct udphdr _udph, *uh;
1659 const struct ipt_udp *udpinfo = matchinfo;
1660
1661 /* Must not be a fragment. */
1662 if (offset)
1663 return 0;
1664
1665 uh = skb_header_pointer(skb, skb->nh.iph->ihl*4,
1666 sizeof(_udph), &_udph);
1667 if (uh == NULL) {
1668 /* We've been asked to examine this packet, and we
1669 can't. Hence, no choice but to drop. */
1670 duprintf("Dropping evil UDP tinygram.\n");
1671 *hotdrop = 1;
1672 return 0;
1673 }
1674
1675 return port_match(udpinfo->spts[0], udpinfo->spts[1],
1676 ntohs(uh->source),
1677 !!(udpinfo->invflags & IPT_UDP_INV_SRCPT))
1678 && port_match(udpinfo->dpts[0], udpinfo->dpts[1],
1679 ntohs(uh->dest),
1680 !!(udpinfo->invflags & IPT_UDP_INV_DSTPT));
1681}
1682
1683/* Called when user tries to insert an entry of this type. */
1684static int
1685udp_checkentry(const char *tablename,
1686 const struct ipt_ip *ip,
1687 void *matchinfo,
1688 unsigned int matchinfosize,
1689 unsigned int hook_mask)
1690{
1691 const struct ipt_udp *udpinfo = matchinfo;
1692
1693 /* Must specify proto == UDP, and no unknown invflags */
1694 if (ip->proto != IPPROTO_UDP || (ip->invflags & IPT_INV_PROTO)) {
1695 duprintf("ipt_udp: Protocol %u != %u\n", ip->proto,
1696 IPPROTO_UDP);
1697 return 0;
1698 }
1699 if (matchinfosize != IPT_ALIGN(sizeof(struct ipt_udp))) {
1700 duprintf("ipt_udp: matchsize %u != %u\n",
1701 matchinfosize, IPT_ALIGN(sizeof(struct ipt_udp)));
1702 return 0;
1703 }
1704 if (udpinfo->invflags & ~IPT_UDP_INV_MASK) {
1705 duprintf("ipt_udp: unknown flags %X\n",
1706 udpinfo->invflags);
1707 return 0;
1708 }
1709
1710 return 1;
1711}
1712
1713/* Returns 1 if the type and code is matched by the range, 0 otherwise */
1714static inline int
1715icmp_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code,
1716 u_int8_t type, u_int8_t code,
1717 int invert)
1718{
1719 return ((test_type == 0xFF) || (type == test_type && code >= min_code && code <= max_code))
1720 ^ invert;
1721}
1722
1723static int
1724icmp_match(const struct sk_buff *skb,
1725 const struct net_device *in,
1726 const struct net_device *out,
1727 const void *matchinfo,
1728 int offset,
1729 int *hotdrop)
1730{
1731 struct icmphdr _icmph, *ic;
1732 const struct ipt_icmp *icmpinfo = matchinfo;
1733
1734 /* Must not be a fragment. */
1735 if (offset)
1736 return 0;
1737
1738 ic = skb_header_pointer(skb, skb->nh.iph->ihl*4,
1739 sizeof(_icmph), &_icmph);
1740 if (ic == NULL) {
1741 /* We've been asked to examine this packet, and we
1742 * can't. Hence, no choice but to drop.
1743 */
1744 duprintf("Dropping evil ICMP tinygram.\n");
1745 *hotdrop = 1;
1746 return 0;
1747 }
1748
1749 return icmp_type_code_match(icmpinfo->type,
1750 icmpinfo->code[0],
1751 icmpinfo->code[1],
1752 ic->type, ic->code,
1753 !!(icmpinfo->invflags&IPT_ICMP_INV));
1754}
1755
1756/* Called when user tries to insert an entry of this type. */
1757static int
1758icmp_checkentry(const char *tablename,
1759 const struct ipt_ip *ip,
1760 void *matchinfo,
1761 unsigned int matchsize,
1762 unsigned int hook_mask)
1763{
1764 const struct ipt_icmp *icmpinfo = matchinfo;
1765
1766 /* Must specify proto == ICMP, and no unknown invflags */
1767 return ip->proto == IPPROTO_ICMP
1768 && !(ip->invflags & IPT_INV_PROTO)
1769 && matchsize == IPT_ALIGN(sizeof(struct ipt_icmp))
1770 && !(icmpinfo->invflags & ~IPT_ICMP_INV);
1771}
1772
1773/* The built-in targets: standard (NULL) and error. */
1774static struct ipt_target ipt_standard_target = {
1775 .name = IPT_STANDARD_TARGET,
1776};
1777
1778static struct ipt_target ipt_error_target = {
1779 .name = IPT_ERROR_TARGET,
1780 .target = ipt_error,
1781};
1782
1783static struct nf_sockopt_ops ipt_sockopts = {
1784 .pf = PF_INET,
1785 .set_optmin = IPT_BASE_CTL,
1786 .set_optmax = IPT_SO_SET_MAX+1,
1787 .set = do_ipt_set_ctl,
1788 .get_optmin = IPT_BASE_CTL,
1789 .get_optmax = IPT_SO_GET_MAX+1,
1790 .get = do_ipt_get_ctl,
1791};
1792
1793static struct ipt_match tcp_matchstruct = {
1794 .name = "tcp",
1795 .match = &tcp_match,
1796 .checkentry = &tcp_checkentry,
1797};
1798
1799static struct ipt_match udp_matchstruct = {
1800 .name = "udp",
1801 .match = &udp_match,
1802 .checkentry = &udp_checkentry,
1803};
1804
1805static struct ipt_match icmp_matchstruct = {
1806 .name = "icmp",
1807 .match = &icmp_match,
1808 .checkentry = &icmp_checkentry,
1809};
1810
1811#ifdef CONFIG_PROC_FS
1812static inline int print_name(const char *i,
1813 off_t start_offset, char *buffer, int length,
1814 off_t *pos, unsigned int *count)
1815{
1816 if ((*count)++ >= start_offset) {
1817 unsigned int namelen;
1818
1819 namelen = sprintf(buffer + *pos, "%s\n",
1820 i + sizeof(struct list_head));
1821 if (*pos + namelen > length) {
1822 /* Stop iterating */
1823 return 1;
1824 }
1825 *pos += namelen;
1826 }
1827 return 0;
1828}
1829
1830static inline int print_target(const struct ipt_target *t,
1831 off_t start_offset, char *buffer, int length,
1832 off_t *pos, unsigned int *count)
1833{
1834 if (t == &ipt_standard_target || t == &ipt_error_target)
1835 return 0;
1836 return print_name((char *)t, start_offset, buffer, length, pos, count);
1837}
1838
1839static int ipt_get_tables(char *buffer, char **start, off_t offset, int length)
1840{
1841 off_t pos = 0;
1842 unsigned int count = 0;
1843
1844 if (down_interruptible(&ipt_mutex) != 0)
1845 return 0;
1846
1847 LIST_FIND(&ipt_tables, print_name, void *,
1848 offset, buffer, length, &pos, &count);
1849
1850 up(&ipt_mutex);
1851
1852 /* `start' hack - see fs/proc/generic.c line ~105 */
1853 *start=(char *)((unsigned long)count-offset);
1854 return pos;
1855}
1856
1857static int ipt_get_targets(char *buffer, char **start, off_t offset, int length)
1858{
1859 off_t pos = 0;
1860 unsigned int count = 0;
1861
1862 if (down_interruptible(&ipt_mutex) != 0)
1863 return 0;
1864
1865 LIST_FIND(&ipt_target, print_target, struct ipt_target *,
1866 offset, buffer, length, &pos, &count);
1867
1868 up(&ipt_mutex);
1869
1870 *start = (char *)((unsigned long)count - offset);
1871 return pos;
1872}
1873
1874static int ipt_get_matches(char *buffer, char **start, off_t offset, int length)
1875{
1876 off_t pos = 0;
1877 unsigned int count = 0;
1878
1879 if (down_interruptible(&ipt_mutex) != 0)
1880 return 0;
1881
1882 LIST_FIND(&ipt_match, print_name, void *,
1883 offset, buffer, length, &pos, &count);
1884
1885 up(&ipt_mutex);
1886
1887 *start = (char *)((unsigned long)count - offset);
1888 return pos;
1889}
1890
1891static struct { char *name; get_info_t *get_info; } ipt_proc_entry[] =
1892{ { "ip_tables_names", ipt_get_tables },
1893 { "ip_tables_targets", ipt_get_targets },
1894 { "ip_tables_matches", ipt_get_matches },
1895 { NULL, NULL} };
1896#endif /*CONFIG_PROC_FS*/
1897
1898static int __init init(void)
1899{
1900 int ret;
1901
1902 /* Noone else will be downing sem now, so we won't sleep */
1903 down(&ipt_mutex);
1904 list_append(&ipt_target, &ipt_standard_target);
1905 list_append(&ipt_target, &ipt_error_target);
1906 list_append(&ipt_match, &tcp_matchstruct);
1907 list_append(&ipt_match, &udp_matchstruct);
1908 list_append(&ipt_match, &icmp_matchstruct);
1909 up(&ipt_mutex);
1910
1911 /* Register setsockopt */
1912 ret = nf_register_sockopt(&ipt_sockopts);
1913 if (ret < 0) {
1914 duprintf("Unable to register sockopts.\n");
1915 return ret;
1916 }
1917
1918#ifdef CONFIG_PROC_FS
1919 {
1920 struct proc_dir_entry *proc;
1921 int i;
1922
1923 for (i = 0; ipt_proc_entry[i].name; i++) {
1924 proc = proc_net_create(ipt_proc_entry[i].name, 0,
1925 ipt_proc_entry[i].get_info);
1926 if (!proc) {
1927 while (--i >= 0)
1928 proc_net_remove(ipt_proc_entry[i].name);
1929 nf_unregister_sockopt(&ipt_sockopts);
1930 return -ENOMEM;
1931 }
1932 proc->owner = THIS_MODULE;
1933 }
1934 }
1935#endif
1936
1937 printk("ip_tables: (C) 2000-2002 Netfilter core team\n");
1938 return 0;
1939}
1940
1941static void __exit fini(void)
1942{
1943 nf_unregister_sockopt(&ipt_sockopts);
1944#ifdef CONFIG_PROC_FS
1945 {
1946 int i;
1947 for (i = 0; ipt_proc_entry[i].name; i++)
1948 proc_net_remove(ipt_proc_entry[i].name);
1949 }
1950#endif
1951}
1952
1953EXPORT_SYMBOL(ipt_register_table);
1954EXPORT_SYMBOL(ipt_unregister_table);
1955EXPORT_SYMBOL(ipt_register_match);
1956EXPORT_SYMBOL(ipt_unregister_match);
1957EXPORT_SYMBOL(ipt_do_table);
1958EXPORT_SYMBOL(ipt_register_target);
1959EXPORT_SYMBOL(ipt_unregister_target);
1960EXPORT_SYMBOL(ipt_find_target);
1961
1962module_init(init);
1963module_exit(fini);