br-nf-bds/linux2.5/net/core/netfilter.c - external/ebtables - Gitiles

 /* netfilter.c: look after the filters for various protocols.
  * Heavily influenced by the old firewall.c by David Bonn and Alan Cox.
  *
  * Thanks to Rob `CmdrTaco' Malda for not influencing this code in any
  * way.
  *
  * Rusty Russell (C)2000 -- This code is GPL.
  *
  * February 2000: Modified by James Morris to have 1 queue per protocol.
  * 15-Mar-2000:   Added NF_REPEAT --RR.
  */
 #include <linux/config.h>
 #include <linux/netfilter.h>
 #include <net/protocol.h>
 #include <linux/init.h>
 #include <linux/skbuff.h>
 #include <linux/wait.h>
 #include <linux/module.h>
 #include <linux/interrupt.h>
 #include <linux/if.h>
 #include <linux/netdevice.h>
 #include <linux/brlock.h>
 #include <linux/inetdevice.h>
 #include <net/sock.h>
 #include <net/route.h>
 #include <linux/ip.h>

 #define __KERNEL_SYSCALLS__
 #include <linux/unistd.h>

 /* In this code, we can be waiting indefinitely for userspace to
  * service a packet if a hook returns NF_QUEUE.  We could keep a count
  * of skbuffs queued for userspace, and not deregister a hook unless
  * this is zero, but that sucks.  Now, we simply check when the
  * packets come back: if the hook is gone, the packet is discarded. */
 #ifdef CONFIG_NETFILTER_DEBUG
 #define NFDEBUG(format, args...)  printk(format , ## args)
 #else
 #define NFDEBUG(format, args...)
 #endif

 /* Sockopts only registered and called from user context, so
    BR_NETPROTO_LOCK would be overkill.  Also, [gs]etsockopt calls may
    sleep. */
 static DECLARE_MUTEX(nf_sockopt_mutex);

 struct list_head nf_hooks[NPROTO][NF_MAX_HOOKS];
 static LIST_HEAD(nf_sockopts);

 /*
  * A queue handler may be registered for each protocol.  Each is protected by
  * long term mutex.  The handler must provide an an outfn() to accept packets
  * for queueing and must reinject all packets it receives, no matter what.
  */
 static struct nf_queue_handler_t {
 	nf_queue_outfn_t outfn;
 	void *data;
 } queue_handler[NPROTO];

 int nf_register_hook(struct nf_hook_ops *reg)
 {
 	struct list_head *i;

 	br_write_lock_bh(BR_NETPROTO_LOCK);
 	for (i = nf_hooks[reg->pf][reg->hooknum].next;
 	     i != &nf_hooks[reg->pf][reg->hooknum];
 	     i = i->next) {
 		if (reg->priority < ((struct nf_hook_ops *)i)->priority)
 			break;
 	}
 	list_add(&reg->list, i->prev);
 	br_write_unlock_bh(BR_NETPROTO_LOCK);
 	return 0;
 }

 void nf_unregister_hook(struct nf_hook_ops *reg)
 {
 	br_write_lock_bh(BR_NETPROTO_LOCK);
 	list_del(&reg->list);
 	br_write_unlock_bh(BR_NETPROTO_LOCK);
 }

 /* Do exclusive ranges overlap? */
 static inline int overlap(int min1, int max1, int min2, int max2)
 {
 	return max1 > min2 && min1 < max2;
 }

 /* Functions to register sockopt ranges (exclusive). */
 int nf_register_sockopt(struct nf_sockopt_ops *reg)
 {
 	struct list_head *i;
 	int ret = 0;

 	if (down_interruptible(&nf_sockopt_mutex) != 0)
 		return -EINTR;

 	for (i = nf_sockopts.next; i != &nf_sockopts; i = i->next) {
 		struct nf_sockopt_ops *ops = (struct nf_sockopt_ops *)i;
 		if (ops->pf == reg->pf
 		    && (overlap(ops->set_optmin, ops->set_optmax,
 				reg->set_optmin, reg->set_optmax)
 			|| overlap(ops->get_optmin, ops->get_optmax,
 				   reg->get_optmin, reg->get_optmax))) {
 			NFDEBUG("nf_sock overlap: %u-%u/%u-%u v %u-%u/%u-%u\n",
 				ops->set_optmin, ops->set_optmax,
 				ops->get_optmin, ops->get_optmax,
 				reg->set_optmin, reg->set_optmax,
 				reg->get_optmin, reg->get_optmax);
 			ret = -EBUSY;
 			goto out;
 		}
 	}

 	list_add(&reg->list, &nf_sockopts);
 out:
 	up(&nf_sockopt_mutex);
 	return ret;
 }

 void nf_unregister_sockopt(struct nf_sockopt_ops *reg)
 {
 	/* No point being interruptible: we're probably in cleanup_module() */
  restart:
 	down(&nf_sockopt_mutex);
 	if (reg->use != 0) {
 		/* To be woken by nf_sockopt call... */
 		/* FIXME: Stuart Young's name appears gratuitously. */
 		set_current_state(TASK_UNINTERRUPTIBLE);
 		reg->cleanup_task = current;
 		up(&nf_sockopt_mutex);
 		schedule();
 		goto restart;
 	}
 	list_del(&reg->list);
 	up(&nf_sockopt_mutex);
 }

 #ifdef CONFIG_NETFILTER_DEBUG
 #include <net/ip.h>
 #include <net/route.h>
 #include <net/tcp.h>
 #include <linux/netfilter_ipv4.h>

 static void debug_print_hooks_ip(unsigned int nf_debug)
 {
 	if (nf_debug & (1 << NF_IP_PRE_ROUTING)) {
 		printk("PRE_ROUTING ");
 		nf_debug ^= (1 << NF_IP_PRE_ROUTING);
 	}
 	if (nf_debug & (1 << NF_IP_LOCAL_IN)) {
 		printk("LOCAL_IN ");
 		nf_debug ^= (1 << NF_IP_LOCAL_IN);
 	}
 	if (nf_debug & (1 << NF_IP_FORWARD)) {
 		printk("FORWARD ");
 		nf_debug ^= (1 << NF_IP_FORWARD);
 	}
 	if (nf_debug & (1 << NF_IP_LOCAL_OUT)) {
 		printk("LOCAL_OUT ");
 		nf_debug ^= (1 << NF_IP_LOCAL_OUT);
 	}
 	if (nf_debug & (1 << NF_IP_POST_ROUTING)) {
 		printk("POST_ROUTING ");
 		nf_debug ^= (1 << NF_IP_POST_ROUTING);
 	}
 	if (nf_debug)
 		printk("Crap bits: 0x%04X", nf_debug);
 	printk("\n");
 }

 void nf_dump_skb(int pf, struct sk_buff *skb)
 {
 	printk("skb: pf=%i %s dev=%s len=%u\n",
 	       pf,
 	       skb->sk ? "(owned)" : "(unowned)",
 	       skb->dev ? skb->dev->name : "(no dev)",
 	       skb->len);
 	switch (pf) {
 	case PF_INET: {
 		const struct iphdr *ip = skb->nh.iph;
 		__u32 *opt = (__u32 *) (ip + 1);
 		int opti;
 		__u16 src_port = 0, dst_port = 0;

 		if (ip->protocol == IPPROTO_TCP
 		    || ip->protocol == IPPROTO_UDP) {
 			struct tcphdr *tcp=(struct tcphdr *)((__u32 *)ip+ip->ihl);
 			src_port = ntohs(tcp->source);
 			dst_port = ntohs(tcp->dest);
 		}

 		printk("PROTO=%d %u.%u.%u.%u:%hu %u.%u.%u.%u:%hu"
 		       " L=%hu S=0x%2.2hX I=%hu F=0x%4.4hX T=%hu",
 		       ip->protocol, NIPQUAD(ip->saddr),
 		       src_port, NIPQUAD(ip->daddr),
 		       dst_port,
 		       ntohs(ip->tot_len), ip->tos, ntohs(ip->id),
 		       ntohs(ip->frag_off), ip->ttl);

 		for (opti = 0; opti < (ip->ihl - sizeof(struct iphdr) / 4); opti++)
 			printk(" O=0x%8.8X", *opt++);
 		printk("\n");
 	}
 	}
 }

 void nf_debug_ip_local_deliver(struct sk_buff *skb)
 {
 	/* If it's a loopback packet, it must have come through
 	 * NF_IP_LOCAL_OUT, NF_IP_RAW_INPUT, NF_IP_PRE_ROUTING and
 	 * NF_IP_LOCAL_IN.  Otherwise, must have gone through
 	 * NF_IP_RAW_INPUT and NF_IP_PRE_ROUTING.  */
 	if (!skb->dev) {
 		printk("ip_local_deliver: skb->dev is NULL.\n");
 	}
 	else if (strcmp(skb->dev->name, "lo") == 0) {
 		if (skb->nf_debug != ((1 << NF_IP_LOCAL_OUT)
 				      | (1 << NF_IP_POST_ROUTING)
 				      | (1 << NF_IP_PRE_ROUTING)
 				      | (1 << NF_IP_LOCAL_IN))) {
 			printk("ip_local_deliver: bad loopback skb: ");
 			debug_print_hooks_ip(skb->nf_debug);
 			nf_dump_skb(PF_INET, skb);
 		}
 	}
 	else {
 		if (skb->nf_debug != ((1<<NF_IP_PRE_ROUTING)
 				      | (1<<NF_IP_LOCAL_IN))) {
 			printk("ip_local_deliver: bad non-lo skb: ");
 			debug_print_hooks_ip(skb->nf_debug);
 			nf_dump_skb(PF_INET, skb);
 		}
 	}
 }

 void nf_debug_ip_loopback_xmit(struct sk_buff *newskb)
 {
 	if (newskb->nf_debug != ((1 << NF_IP_LOCAL_OUT)
 				 | (1 << NF_IP_POST_ROUTING))) {
 		printk("ip_dev_loopback_xmit: bad owned skb = %p: ",
 		       newskb);
 		debug_print_hooks_ip(newskb->nf_debug);
 		nf_dump_skb(PF_INET, newskb);
 	}
 	/* Clear to avoid confusing input check */
 	newskb->nf_debug = 0;
 }

 void nf_debug_ip_finish_output2(struct sk_buff *skb)
 {
 	/* If it's owned, it must have gone through the
 	 * NF_IP_LOCAL_OUT and NF_IP_POST_ROUTING.
 	 * Otherwise, must have gone through
 	 * NF_IP_PRE_ROUTING, NF_IP_FORWARD and NF_IP_POST_ROUTING.
 	 */
 	if (skb->sk) {
 		if (skb->nf_debug != ((1 << NF_IP_LOCAL_OUT)
 				      | (1 << NF_IP_POST_ROUTING))) {
 			printk("ip_finish_output: bad owned skb = %p: ", skb);
 			debug_print_hooks_ip(skb->nf_debug);
 			nf_dump_skb(PF_INET, skb);
 		}
 	} else {
 		if (skb->nf_debug != ((1 << NF_IP_PRE_ROUTING)
 				      | (1 << NF_IP_FORWARD)
 				      | (1 << NF_IP_POST_ROUTING))) {
 			/* Fragments, entunnelled packets, TCP RSTs
                            generated by ipt_REJECT will have no
                            owners, but still may be local */
 			if (skb->nf_debug != ((1 << NF_IP_LOCAL_OUT)
 					      | (1 << NF_IP_POST_ROUTING))){
 				printk("ip_finish_output:"
 				       " bad unowned skb = %p: ",skb);
 				debug_print_hooks_ip(skb->nf_debug);
 				nf_dump_skb(PF_INET, skb);
 			}
 		}
 	}
 }
 #endif /*CONFIG_NETFILTER_DEBUG*/

 /* Call get/setsockopt() */
 static int nf_sockopt(struct sock *sk, int pf, int val,
 		      char *opt, int *len, int get)
 {
 	struct list_head *i;
 	struct nf_sockopt_ops *ops;
 	int ret;

 	if (down_interruptible(&nf_sockopt_mutex) != 0)
 		return -EINTR;

 	for (i = nf_sockopts.next; i != &nf_sockopts; i = i->next) {
 		ops = (struct nf_sockopt_ops *)i;
 		if (ops->pf == pf) {
 			if (get) {
 				if (val >= ops->get_optmin
 				    && val < ops->get_optmax) {
 					ops->use++;
 					up(&nf_sockopt_mutex);
 					ret = ops->get(sk, val, opt, len);
 					goto out;
 				}
 			} else {
 				if (val >= ops->set_optmin
 				    && val < ops->set_optmax) {
 					ops->use++;
 					up(&nf_sockopt_mutex);
 					ret = ops->set(sk, val, opt, *len);
 					goto out;
 				}
 			}
 		}
 	}
 	up(&nf_sockopt_mutex);
 	return -ENOPROTOOPT;

  out:
 	down(&nf_sockopt_mutex);
 	ops->use--;
 	if (ops->cleanup_task)
 		wake_up_process(ops->cleanup_task);
 	up(&nf_sockopt_mutex);
 	return ret;
 }

 int nf_setsockopt(struct sock *sk, int pf, int val, char *opt,
 		  int len)
 {
 	return nf_sockopt(sk, pf, val, opt, &len, 0);
 }

 int nf_getsockopt(struct sock *sk, int pf, int val, char *opt, int *len)
 {
 	return nf_sockopt(sk, pf, val, opt, len, 1);
 }

 static unsigned int nf_iterate(struct list_head *head,
 			       struct sk_buff **skb,
 			       int hook,
 			       const struct net_device *indev,
 			       const struct net_device *outdev,
 			       struct list_head **i,
 			       int (*okfn)(struct sk_buff *),
 			       int hook_thresh)
 {
 	for (*i = (*i)->next; *i != head; *i = (*i)->next) {
 		struct nf_hook_ops *elem = (struct nf_hook_ops *)*i;

 		if (hook_thresh > elem->priority)
 			continue;

 		switch (elem->hook(hook, skb, indev, outdev, okfn)) {
 		case NF_QUEUE:
 			return NF_QUEUE;

 		case NF_STOLEN:
 			return NF_STOLEN;

 		case NF_DROP:
 			return NF_DROP;

 		case NF_REPEAT:
 			*i = (*i)->prev;
 			break;

 #ifdef CONFIG_NETFILTER_DEBUG
 		case NF_ACCEPT:
 			break;

 		default:
 			NFDEBUG("Evil return from %p(%u).\n",
 				elem->hook, hook);
 #endif
 		}
 	}
 	return NF_ACCEPT;
 }

 int nf_register_queue_handler(int pf, nf_queue_outfn_t outfn, void *data)
 {
 	int ret;

 	br_write_lock_bh(BR_NETPROTO_LOCK);
 	if (queue_handler[pf].outfn)
 		ret = -EBUSY;
 	else {
 		queue_handler[pf].outfn = outfn;
 		queue_handler[pf].data = data;
 		ret = 0;
 	}
 	br_write_unlock_bh(BR_NETPROTO_LOCK);

 	return ret;
 }

 /* The caller must flush their queue before this */
 int nf_unregister_queue_handler(int pf)
 {
 	br_write_lock_bh(BR_NETPROTO_LOCK);
 	queue_handler[pf].outfn = NULL;
 	queue_handler[pf].data = NULL;
 	br_write_unlock_bh(BR_NETPROTO_LOCK);
 	return 0;
 }

 /*
  * Any packet that leaves via this function must come back
  * through nf_reinject().
  */
 static void nf_queue(struct sk_buff *skb,
 		     struct list_head *elem,
 		     int pf, unsigned int hook,
 		     struct net_device *indev,
 		     struct net_device *outdev,
 		     int (*okfn)(struct sk_buff *))
 {
 	int status;
 	struct nf_info *info;
 	struct net_device *physindev = NULL;
 	struct net_device *physoutdev = NULL;

 	if (!queue_handler[pf].outfn) {
 		kfree_skb(skb);
 		return;
 	}

 	info = kmalloc(sizeof(*info), GFP_ATOMIC);
 	if (!info) {
 		if (net_ratelimit())
 			printk(KERN_ERR "OOM queueing packet %p\n",
 			       skb);
 		kfree_skb(skb);
 		return;
 	}

 	*info = (struct nf_info) {
 		(struct nf_hook_ops *)elem, pf, hook, indev, outdev, okfn };

 	/* Bump dev refs so they don't vanish while packet is out */
 	if (indev) dev_hold(indev);
 	if (outdev) dev_hold(outdev);

 	if (skb->nf_bridge) {
 		physindev = skb->nf_bridge->physindev;
 		if (physindev) dev_hold(physindev);
 		physoutdev = skb->nf_bridge->physoutdev;
 		if (physoutdev) dev_hold(physoutdev);
 	}

 	status = queue_handler[pf].outfn(skb, info, queue_handler[pf].data);
 	if (status < 0) {
 		/* James M doesn't say fuck enough. */
 		if (indev) dev_put(indev);
 		if (outdev) dev_put(outdev);
 		if (physindev) dev_put(physindev);
 		if (physoutdev) dev_put(physoutdev);
 		kfree(info);
 		kfree_skb(skb);
 		return;
 	}
 }

 int nf_hook_slow(int pf, unsigned int hook, struct sk_buff *skb,
 		 struct net_device *indev,
 		 struct net_device *outdev,
 		 int (*okfn)(struct sk_buff *),
 		 int hook_thresh)
 {
 	struct list_head *elem;
 	unsigned int verdict;
 	int ret = 0;

 	/* This stopgap cannot be removed until all the hooks are audited. */
 	if (skb_is_nonlinear(skb) && skb_linearize(skb, GFP_ATOMIC) != 0) {
 		kfree_skb(skb);
 		return -ENOMEM;
 	}
 	if (skb->ip_summed == CHECKSUM_HW) {
 		if (outdev == NULL) {
 			skb->ip_summed = CHECKSUM_NONE;
 		} else {
 			skb_checksum_help(skb);
 		}
 	}

 	/* We may already have this, but read-locks nest anyway */
 	br_read_lock_bh(BR_NETPROTO_LOCK);

 #ifdef CONFIG_NETFILTER_DEBUG
 	if (skb->nf_debug & (1 << hook)) {
 		printk("nf_hook: hook %i already set.\n", hook);
 		nf_dump_skb(pf, skb);
 	}
 	skb->nf_debug |= (1 << hook);
 #endif

 	elem = &nf_hooks[pf][hook];
 	verdict = nf_iterate(&nf_hooks[pf][hook], &skb, hook, indev,
 			     outdev, &elem, okfn, hook_thresh);
 	if (verdict == NF_QUEUE) {
 		NFDEBUG("nf_hook: Verdict = QUEUE.\n");
 		nf_queue(skb, elem, pf, hook, indev, outdev, okfn);
 	}

 	switch (verdict) {
 	case NF_ACCEPT:
 		ret = okfn(skb);
 		break;

 	case NF_DROP:
 		kfree_skb(skb);
 		ret = -EPERM;
 		break;
 	}

 	br_read_unlock_bh(BR_NETPROTO_LOCK);
 	return ret;
 }

 void nf_reinject(struct sk_buff *skb, struct nf_info *info,
 		 unsigned int verdict)
 {
 	struct list_head *elem = &info->elem->list;
 	struct list_head *i;

 	/* We don't have BR_NETPROTO_LOCK here */
 	br_read_lock_bh(BR_NETPROTO_LOCK);
 	for (i = nf_hooks[info->pf][info->hook].next; i != elem; i = i->next) {
 		if (i == &nf_hooks[info->pf][info->hook]) {
 			/* The module which sent it to userspace is gone. */
 			NFDEBUG("%s: module disappeared, dropping packet.\n",
 			         __FUNCTION__);
 			verdict = NF_DROP;
 			break;
 		}
 	}

 	/* Continue traversal iff userspace said ok... */
 	if (verdict == NF_REPEAT) {
 		elem = elem->prev;
 		verdict = NF_ACCEPT;
 	}

 	if (verdict == NF_ACCEPT) {
 		verdict = nf_iterate(&nf_hooks[info->pf][info->hook],
 				     &skb, info->hook,
 				     info->indev, info->outdev, &elem,
 				     info->okfn, INT_MIN);
 	}

 	switch (verdict) {
 	case NF_ACCEPT:
 		info->okfn(skb);
 		break;

 	case NF_QUEUE:
 		nf_queue(skb, elem, info->pf, info->hook,
 			 info->indev, info->outdev, info->okfn);
 		break;

 	case NF_DROP:
 		kfree_skb(skb);
 		break;
 	}
 	br_read_unlock_bh(BR_NETPROTO_LOCK);

 	/* Release those devices we held, or Alexey will kill me. */
 	if (info->indev) dev_put(info->indev);
 	if (info->outdev) dev_put(info->outdev);

 	kfree(info);
 	return;
 }

 #ifdef CONFIG_INET
 /* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */
 int ip_route_me_harder(struct sk_buff **pskb)
 {
 	struct iphdr *iph = (*pskb)->nh.iph;
 	struct rtable *rt;
 	struct flowi fl = { .nl_u = { .ip4_u =
 				      { .daddr = iph->daddr,
 					.saddr = iph->saddr,
 					.tos = RT_TOS(iph->tos)|RTO_CONN,
 #ifdef CONFIG_IP_ROUTE_FWMARK
 					.fwmark = (*pskb)->nfmark
 #endif
 				      } },
 			    .oif = (*pskb)->sk ? (*pskb)->sk->bound_dev_if : 0,
 			    };
 	struct net_device *dev_src = NULL;
 	int err;

 	/* accomodate ip_route_output_slow(), which expects the key src to be
 	   0 or a local address; however some non-standard hacks like
 	   ipt_REJECT.c:send_reset() can cause packets with foreign
            saddr to be appear on the NF_IP_LOCAL_OUT hook -MB */
 	if(fl.fl4_src && !(dev_src = ip_dev_find(fl.fl4_src)))
 		fl.fl4_src = 0;

 	if ((err=ip_route_output_key(&rt, &fl)) != 0) {
 		printk("route_me_harder: ip_route_output_key(dst=%u.%u.%u.%u, src=%u.%u.%u.%u, oif=%d, tos=0x%x, fwmark=0x%lx) error %d\n",
 			NIPQUAD(iph->daddr), NIPQUAD(iph->saddr),
 			(*pskb)->sk ? (*pskb)->sk->bound_dev_if : 0,
 			RT_TOS(iph->tos)|RTO_CONN,
 #ifdef CONFIG_IP_ROUTE_FWMARK
 			(*pskb)->nfmark,
 #else
 			0UL,
 #endif
 			err);
 		goto out;
 	}

 	/* Drop old route. */
 	dst_release((*pskb)->dst);

 	(*pskb)->dst = &rt->u.dst;

 	/* Change in oif may mean change in hh_len. */
 	if (skb_headroom(*pskb) < (*pskb)->dst->dev->hard_header_len) {
 		struct sk_buff *nskb;

 		nskb = skb_realloc_headroom(*pskb,
 					    (*pskb)->dst->dev->hard_header_len);
 		if (!nskb) {
 			err = -ENOMEM;
 			goto out;
 		}
 		if ((*pskb)->sk)
 			skb_set_owner_w(nskb, (*pskb)->sk);
 		kfree_skb(*pskb);
 		*pskb = nskb;
 	}

 out:
 	if (dev_src)
 		dev_put(dev_src);

 	return err;
 }
 #endif /*CONFIG_INET*/

 /* This does not belong here, but ipt_REJECT needs it if connection
    tracking in use: without this, connection may not be in hash table,
    and hence manufactured ICMP or RST packets will not be associated
    with it. */
 void (*ip_ct_attach)(struct sk_buff *, struct nf_ct_info *);

 void __init netfilter_init(void)
 {
 	int i, h;

 	for (i = 0; i < NPROTO; i++) {
 		for (h = 0; h < NF_MAX_HOOKS; h++)
 			INIT_LIST_HEAD(&nf_hooks[i][h]);
 	}
 }
	/* netfilter.c: look after the filters for various protocols.
	* Heavily influenced by the old firewall.c by David Bonn and Alan Cox.
	*
	* Thanks to Rob `CmdrTaco' Malda for not influencing this code in any
	* way.
	*
	* Rusty Russell (C)2000 -- This code is GPL.
	*
	* February 2000: Modified by James Morris to have 1 queue per protocol.
	* 15-Mar-2000: Added NF_REPEAT --RR.
	*/
	#include <linux/config.h>
	#include <linux/netfilter.h>
	#include <net/protocol.h>
	#include <linux/init.h>
	#include <linux/skbuff.h>
	#include <linux/wait.h>
	#include <linux/module.h>
	#include <linux/interrupt.h>
	#include <linux/if.h>
	#include <linux/netdevice.h>
	#include <linux/brlock.h>
	#include <linux/inetdevice.h>
	#include <net/sock.h>
	#include <net/route.h>
	#include <linux/ip.h>

	#define __KERNEL_SYSCALLS__
	#include <linux/unistd.h>

	/* In this code, we can be waiting indefinitely for userspace to
	* service a packet if a hook returns NF_QUEUE. We could keep a count
	* of skbuffs queued for userspace, and not deregister a hook unless
	* this is zero, but that sucks. Now, we simply check when the
	* packets come back: if the hook is gone, the packet is discarded. */
	#ifdef CONFIG_NETFILTER_DEBUG
	#define NFDEBUG(format, args...) printk(format , ## args)
	#else
	#define NFDEBUG(format, args...)
	#endif

	/* Sockopts only registered and called from user context, so
	BR_NETPROTO_LOCK would be overkill. Also, [gs]etsockopt calls may
	sleep. */
	static DECLARE_MUTEX(nf_sockopt_mutex);

	struct list_head nf_hooks[NPROTO][NF_MAX_HOOKS];
	static LIST_HEAD(nf_sockopts);

	/*
	* A queue handler may be registered for each protocol. Each is protected by
	* long term mutex. The handler must provide an an outfn() to accept packets
	* for queueing and must reinject all packets it receives, no matter what.
	*/
	static struct nf_queue_handler_t {
	nf_queue_outfn_t outfn;
	void *data;
	} queue_handler[NPROTO];

	int nf_register_hook(struct nf_hook_ops *reg)
	{
	struct list_head *i;

	br_write_lock_bh(BR_NETPROTO_LOCK);
	for (i = nf_hooks[reg->pf][reg->hooknum].next;
	i != &nf_hooks[reg->pf][reg->hooknum];
	i = i->next) {
	if (reg->priority < ((struct nf_hook_ops *)i)->priority)
	break;
	}
	list_add(&reg->list, i->prev);
	br_write_unlock_bh(BR_NETPROTO_LOCK);
	return 0;
	}

	void nf_unregister_hook(struct nf_hook_ops *reg)
	{
	br_write_lock_bh(BR_NETPROTO_LOCK);
	list_del(&reg->list);
	br_write_unlock_bh(BR_NETPROTO_LOCK);
	}

	/* Do exclusive ranges overlap? */
	static inline int overlap(int min1, int max1, int min2, int max2)
	{
	return max1 > min2 && min1 < max2;
	}

	/* Functions to register sockopt ranges (exclusive). */
	int nf_register_sockopt(struct nf_sockopt_ops *reg)
	{
	struct list_head *i;
	int ret = 0;

	if (down_interruptible(&nf_sockopt_mutex) != 0)
	return -EINTR;

	for (i = nf_sockopts.next; i != &nf_sockopts; i = i->next) {
	struct nf_sockopt_ops ops = (struct nf_sockopt_ops )i;
	if (ops->pf == reg->pf
	&& (overlap(ops->set_optmin, ops->set_optmax,
	reg->set_optmin, reg->set_optmax)
	\|\| overlap(ops->get_optmin, ops->get_optmax,
	reg->get_optmin, reg->get_optmax))) {
	NFDEBUG("nf_sock overlap: %u-%u/%u-%u v %u-%u/%u-%u\n",
	ops->set_optmin, ops->set_optmax,
	ops->get_optmin, ops->get_optmax,
	reg->set_optmin, reg->set_optmax,
	reg->get_optmin, reg->get_optmax);
	ret = -EBUSY;
	goto out;
	}
	}

	list_add(&reg->list, &nf_sockopts);
	out:
	up(&nf_sockopt_mutex);
	return ret;
	}

	void nf_unregister_sockopt(struct nf_sockopt_ops *reg)
	{
	/* No point being interruptible: we're probably in cleanup_module() */
	restart:
	down(&nf_sockopt_mutex);
	if (reg->use != 0) {
	/* To be woken by nf_sockopt call... */
	/* FIXME: Stuart Young's name appears gratuitously. */
	set_current_state(TASK_UNINTERRUPTIBLE);
	reg->cleanup_task = current;
	up(&nf_sockopt_mutex);
	schedule();
	goto restart;
	}
	list_del(&reg->list);
	up(&nf_sockopt_mutex);
	}

	#ifdef CONFIG_NETFILTER_DEBUG
	#include <net/ip.h>
	#include <net/route.h>
	#include <net/tcp.h>
	#include <linux/netfilter_ipv4.h>

	static void debug_print_hooks_ip(unsigned int nf_debug)
	{
	if (nf_debug & (1 << NF_IP_PRE_ROUTING)) {
	printk("PRE_ROUTING ");
	nf_debug ^= (1 << NF_IP_PRE_ROUTING);
	}
	if (nf_debug & (1 << NF_IP_LOCAL_IN)) {
	printk("LOCAL_IN ");
	nf_debug ^= (1 << NF_IP_LOCAL_IN);
	}
	if (nf_debug & (1 << NF_IP_FORWARD)) {
	printk("FORWARD ");
	nf_debug ^= (1 << NF_IP_FORWARD);
	}
	if (nf_debug & (1 << NF_IP_LOCAL_OUT)) {
	printk("LOCAL_OUT ");
	nf_debug ^= (1 << NF_IP_LOCAL_OUT);
	}
	if (nf_debug & (1 << NF_IP_POST_ROUTING)) {
	printk("POST_ROUTING ");
	nf_debug ^= (1 << NF_IP_POST_ROUTING);
	}
	if (nf_debug)
	printk("Crap bits: 0x%04X", nf_debug);
	printk("\n");
	}

	void nf_dump_skb(int pf, struct sk_buff *skb)
	{
	printk("skb: pf=%i %s dev=%s len=%u\n",
	pf,
	skb->sk ? "(owned)" : "(unowned)",
	skb->dev ? skb->dev->name : "(no dev)",
	skb->len);
	switch (pf) {
	case PF_INET: {
	const struct iphdr *ip = skb->nh.iph;
	__u32 opt = (__u32 ) (ip + 1);
	int opti;
	__u16 src_port = 0, dst_port = 0;

	if (ip->protocol == IPPROTO_TCP
	\|\| ip->protocol == IPPROTO_UDP) {
	struct tcphdr tcp=(struct tcphdr )((__u32 *)ip+ip->ihl);
	src_port = ntohs(tcp->source);
	dst_port = ntohs(tcp->dest);
	}

	printk("PROTO=%d %u.%u.%u.%u:%hu %u.%u.%u.%u:%hu"
	" L=%hu S=0x%2.2hX I=%hu F=0x%4.4hX T=%hu",
	ip->protocol, NIPQUAD(ip->saddr),
	src_port, NIPQUAD(ip->daddr),
	dst_port,
	ntohs(ip->tot_len), ip->tos, ntohs(ip->id),
	ntohs(ip->frag_off), ip->ttl);

	for (opti = 0; opti < (ip->ihl - sizeof(struct iphdr) / 4); opti++)
	printk(" O=0x%8.8X", *opt++);
	printk("\n");
	}
	}
	}

	void nf_debug_ip_local_deliver(struct sk_buff *skb)
	{
	/* If it's a loopback packet, it must have come through
	* NF_IP_LOCAL_OUT, NF_IP_RAW_INPUT, NF_IP_PRE_ROUTING and
	* NF_IP_LOCAL_IN. Otherwise, must have gone through
	* NF_IP_RAW_INPUT and NF_IP_PRE_ROUTING. */
	if (!skb->dev) {
	printk("ip_local_deliver: skb->dev is NULL.\n");
	}
	else if (strcmp(skb->dev->name, "lo") == 0) {
	if (skb->nf_debug != ((1 << NF_IP_LOCAL_OUT)
	\| (1 << NF_IP_POST_ROUTING)
	\| (1 << NF_IP_PRE_ROUTING)
	\| (1 << NF_IP_LOCAL_IN))) {
	printk("ip_local_deliver: bad loopback skb: ");
	debug_print_hooks_ip(skb->nf_debug);
	nf_dump_skb(PF_INET, skb);
	}
	}
	else {
	if (skb->nf_debug != ((1<<NF_IP_PRE_ROUTING)
	\| (1<<NF_IP_LOCAL_IN))) {
	printk("ip_local_deliver: bad non-lo skb: ");
	debug_print_hooks_ip(skb->nf_debug);
	nf_dump_skb(PF_INET, skb);
	}
	}
	}

	void nf_debug_ip_loopback_xmit(struct sk_buff *newskb)
	{
	if (newskb->nf_debug != ((1 << NF_IP_LOCAL_OUT)
	\| (1 << NF_IP_POST_ROUTING))) {
	printk("ip_dev_loopback_xmit: bad owned skb = %p: ",
	newskb);
	debug_print_hooks_ip(newskb->nf_debug);
	nf_dump_skb(PF_INET, newskb);
	}
	/* Clear to avoid confusing input check */
	newskb->nf_debug = 0;
	}

	void nf_debug_ip_finish_output2(struct sk_buff *skb)
	{
	/* If it's owned, it must have gone through the
	* NF_IP_LOCAL_OUT and NF_IP_POST_ROUTING.
	* Otherwise, must have gone through
	* NF_IP_PRE_ROUTING, NF_IP_FORWARD and NF_IP_POST_ROUTING.
	*/
	if (skb->sk) {
	if (skb->nf_debug != ((1 << NF_IP_LOCAL_OUT)
	\| (1 << NF_IP_POST_ROUTING))) {
	printk("ip_finish_output: bad owned skb = %p: ", skb);
	debug_print_hooks_ip(skb->nf_debug);
	nf_dump_skb(PF_INET, skb);
	}
	} else {
	if (skb->nf_debug != ((1 << NF_IP_PRE_ROUTING)
	\| (1 << NF_IP_FORWARD)
	\| (1 << NF_IP_POST_ROUTING))) {
	/* Fragments, entunnelled packets, TCP RSTs
	generated by ipt_REJECT will have no
	owners, but still may be local */
	if (skb->nf_debug != ((1 << NF_IP_LOCAL_OUT)
	\| (1 << NF_IP_POST_ROUTING))){
	printk("ip_finish_output:"
	" bad unowned skb = %p: ",skb);
	debug_print_hooks_ip(skb->nf_debug);
	nf_dump_skb(PF_INET, skb);
	}
	}
	}
	}
	#endif /CONFIG_NETFILTER_DEBUG/

	/* Call get/setsockopt() */
	static int nf_sockopt(struct sock *sk, int pf, int val,
	char opt, int len, int get)
	{
	struct list_head *i;
	struct nf_sockopt_ops *ops;
	int ret;

	if (down_interruptible(&nf_sockopt_mutex) != 0)
	return -EINTR;

	for (i = nf_sockopts.next; i != &nf_sockopts; i = i->next) {
	ops = (struct nf_sockopt_ops *)i;
	if (ops->pf == pf) {
	if (get) {
	if (val >= ops->get_optmin
	&& val < ops->get_optmax) {
	ops->use++;
	up(&nf_sockopt_mutex);
	ret = ops->get(sk, val, opt, len);
	goto out;
	}
	} else {
	if (val >= ops->set_optmin
	&& val < ops->set_optmax) {
	ops->use++;
	up(&nf_sockopt_mutex);
	ret = ops->set(sk, val, opt, *len);
	goto out;
	}
	}
	}
	}
	up(&nf_sockopt_mutex);
	return -ENOPROTOOPT;

	out:
	down(&nf_sockopt_mutex);
	ops->use--;
	if (ops->cleanup_task)
	wake_up_process(ops->cleanup_task);
	up(&nf_sockopt_mutex);
	return ret;
	}

	int nf_setsockopt(struct sock sk, int pf, int val, char opt,
	int len)
	{
	return nf_sockopt(sk, pf, val, opt, &len, 0);
	}

	int nf_getsockopt(struct sock sk, int pf, int val, char opt, int *len)
	{
	return nf_sockopt(sk, pf, val, opt, len, 1);
	}

	static unsigned int nf_iterate(struct list_head *head,
	struct sk_buff **skb,
	int hook,
	const struct net_device *indev,
	const struct net_device *outdev,
	struct list_head **i,
	int (okfn)(struct sk_buff ),
	int hook_thresh)
	{
	for (i = (i)->next; i != head; i = (*i)->next) {
	struct nf_hook_ops elem = (struct nf_hook_ops )*i;

	if (hook_thresh > elem->priority)
	continue;

	switch (elem->hook(hook, skb, indev, outdev, okfn)) {
	case NF_QUEUE:
	return NF_QUEUE;

	case NF_STOLEN:
	return NF_STOLEN;

	case NF_DROP:
	return NF_DROP;

	case NF_REPEAT:
	i = (i)->prev;
	break;

	#ifdef CONFIG_NETFILTER_DEBUG
	case NF_ACCEPT:
	break;

	default:
	NFDEBUG("Evil return from %p(%u).\n",
	elem->hook, hook);
	#endif
	}
	}
	return NF_ACCEPT;
	}

	int nf_register_queue_handler(int pf, nf_queue_outfn_t outfn, void *data)
	{
	int ret;

	br_write_lock_bh(BR_NETPROTO_LOCK);
	if (queue_handler[pf].outfn)
	ret = -EBUSY;
	else {
	queue_handler[pf].outfn = outfn;
	queue_handler[pf].data = data;
	ret = 0;
	}
	br_write_unlock_bh(BR_NETPROTO_LOCK);

	return ret;
	}

	/* The caller must flush their queue before this */
	int nf_unregister_queue_handler(int pf)
	{
	br_write_lock_bh(BR_NETPROTO_LOCK);
	queue_handler[pf].outfn = NULL;
	queue_handler[pf].data = NULL;
	br_write_unlock_bh(BR_NETPROTO_LOCK);
	return 0;
	}

	/*
	* Any packet that leaves via this function must come back
	* through nf_reinject().
	*/
	static void nf_queue(struct sk_buff *skb,
	struct list_head *elem,
	int pf, unsigned int hook,
	struct net_device *indev,
	struct net_device *outdev,
	int (okfn)(struct sk_buff ))
	{
	int status;
	struct nf_info *info;
	struct net_device *physindev = NULL;
	struct net_device *physoutdev = NULL;

	if (!queue_handler[pf].outfn) {
	kfree_skb(skb);
	return;
	}

	info = kmalloc(sizeof(*info), GFP_ATOMIC);
	if (!info) {
	if (net_ratelimit())
	printk(KERN_ERR "OOM queueing packet %p\n",
	skb);
	kfree_skb(skb);
	return;
	}

	*info = (struct nf_info) {
	(struct nf_hook_ops *)elem, pf, hook, indev, outdev, okfn };

	/* Bump dev refs so they don't vanish while packet is out */
	if (indev) dev_hold(indev);
	if (outdev) dev_hold(outdev);

	if (skb->nf_bridge) {
	physindev = skb->nf_bridge->physindev;
	if (physindev) dev_hold(physindev);
	physoutdev = skb->nf_bridge->physoutdev;
	if (physoutdev) dev_hold(physoutdev);
	}

	status = queue_handler[pf].outfn(skb, info, queue_handler[pf].data);
	if (status < 0) {
	/* James M doesn't say fuck enough. */
	if (indev) dev_put(indev);
	if (outdev) dev_put(outdev);
	if (physindev) dev_put(physindev);
	if (physoutdev) dev_put(physoutdev);
	kfree(info);
	kfree_skb(skb);
	return;
	}
	}

	int nf_hook_slow(int pf, unsigned int hook, struct sk_buff *skb,
	struct net_device *indev,
	struct net_device *outdev,
	int (okfn)(struct sk_buff ),
	int hook_thresh)
	{
	struct list_head *elem;
	unsigned int verdict;
	int ret = 0;

	/* This stopgap cannot be removed until all the hooks are audited. */
	if (skb_is_nonlinear(skb) && skb_linearize(skb, GFP_ATOMIC) != 0) {
	kfree_skb(skb);
	return -ENOMEM;
	}
	if (skb->ip_summed == CHECKSUM_HW) {
	if (outdev == NULL) {
	skb->ip_summed = CHECKSUM_NONE;
	} else {
	skb_checksum_help(skb);
	}
	}

	/* We may already have this, but read-locks nest anyway */
	br_read_lock_bh(BR_NETPROTO_LOCK);

	#ifdef CONFIG_NETFILTER_DEBUG
	if (skb->nf_debug & (1 << hook)) {
	printk("nf_hook: hook %i already set.\n", hook);
	nf_dump_skb(pf, skb);
	}
	skb->nf_debug \|= (1 << hook);
	#endif

	elem = &nf_hooks[pf][hook];
	verdict = nf_iterate(&nf_hooks[pf][hook], &skb, hook, indev,
	outdev, &elem, okfn, hook_thresh);
	if (verdict == NF_QUEUE) {
	NFDEBUG("nf_hook: Verdict = QUEUE.\n");
	nf_queue(skb, elem, pf, hook, indev, outdev, okfn);
	}

	switch (verdict) {
	case NF_ACCEPT:
	ret = okfn(skb);
	break;

	case NF_DROP:
	kfree_skb(skb);
	ret = -EPERM;
	break;
	}

	br_read_unlock_bh(BR_NETPROTO_LOCK);
	return ret;
	}

	void nf_reinject(struct sk_buff skb, struct nf_info info,
	unsigned int verdict)
	{
	struct list_head *elem = &info->elem->list;
	struct list_head *i;

	/* We don't have BR_NETPROTO_LOCK here */
	br_read_lock_bh(BR_NETPROTO_LOCK);
	for (i = nf_hooks[info->pf][info->hook].next; i != elem; i = i->next) {
	if (i == &nf_hooks[info->pf][info->hook]) {
	/* The module which sent it to userspace is gone. */
	NFDEBUG("%s: module disappeared, dropping packet.\n",
	__FUNCTION__);
	verdict = NF_DROP;
	break;
	}
	}

	/* Continue traversal iff userspace said ok... */
	if (verdict == NF_REPEAT) {
	elem = elem->prev;
	verdict = NF_ACCEPT;
	}

	if (verdict == NF_ACCEPT) {
	verdict = nf_iterate(&nf_hooks[info->pf][info->hook],
	&skb, info->hook,
	info->indev, info->outdev, &elem,
	info->okfn, INT_MIN);
	}

	switch (verdict) {
	case NF_ACCEPT:
	info->okfn(skb);
	break;

	case NF_QUEUE:
	nf_queue(skb, elem, info->pf, info->hook,
	info->indev, info->outdev, info->okfn);
	break;

	case NF_DROP:
	kfree_skb(skb);
	break;
	}
	br_read_unlock_bh(BR_NETPROTO_LOCK);

	/* Release those devices we held, or Alexey will kill me. */
	if (info->indev) dev_put(info->indev);
	if (info->outdev) dev_put(info->outdev);

	kfree(info);
	return;
	}

	#ifdef CONFIG_INET
	/* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */
	int ip_route_me_harder(struct sk_buff **pskb)
	{
	struct iphdr iph = (pskb)->nh.iph;
	struct rtable *rt;
	struct flowi fl = { .nl_u = { .ip4_u =
	{ .daddr = iph->daddr,
	.saddr = iph->saddr,
	.tos = RT_TOS(iph->tos)\|RTO_CONN,
	#ifdef CONFIG_IP_ROUTE_FWMARK
	.fwmark = (*pskb)->nfmark
	#endif
	} },
	.oif = (pskb)->sk ? (pskb)->sk->bound_dev_if : 0,
	};
	struct net_device *dev_src = NULL;
	int err;

	/* accomodate ip_route_output_slow(), which expects the key src to be
	0 or a local address; however some non-standard hacks like
	ipt_REJECT.c:send_reset() can cause packets with foreign
	saddr to be appear on the NF_IP_LOCAL_OUT hook -MB */
	if(fl.fl4_src && !(dev_src = ip_dev_find(fl.fl4_src)))
	fl.fl4_src = 0;

	if ((err=ip_route_output_key(&rt, &fl)) != 0) {
	printk("route_me_harder: ip_route_output_key(dst=%u.%u.%u.%u, src=%u.%u.%u.%u, oif=%d, tos=0x%x, fwmark=0x%lx) error %d\n",
	NIPQUAD(iph->daddr), NIPQUAD(iph->saddr),
	(pskb)->sk ? (pskb)->sk->bound_dev_if : 0,
	RT_TOS(iph->tos)\|RTO_CONN,
	#ifdef CONFIG_IP_ROUTE_FWMARK
	(*pskb)->nfmark,
	#else
	0UL,
	#endif
	err);
	goto out;
	}

	/* Drop old route. */
	dst_release((*pskb)->dst);

	(*pskb)->dst = &rt->u.dst;

	/* Change in oif may mean change in hh_len. */
	if (skb_headroom(pskb) < (pskb)->dst->dev->hard_header_len) {
	struct sk_buff *nskb;

	nskb = skb_realloc_headroom(*pskb,
	(*pskb)->dst->dev->hard_header_len);
	if (!nskb) {
	err = -ENOMEM;
	goto out;
	}
	if ((*pskb)->sk)
	skb_set_owner_w(nskb, (*pskb)->sk);
	kfree_skb(*pskb);
	*pskb = nskb;
	}

	out:
	if (dev_src)
	dev_put(dev_src);

	return err;
	}
	#endif /CONFIG_INET/

	/* This does not belong here, but ipt_REJECT needs it if connection
	tracking in use: without this, connection may not be in hash table,
	and hence manufactured ICMP or RST packets will not be associated
	with it. */
	void (ip_ct_attach)(struct sk_buff , struct nf_ct_info *);

	void __init netfilter_init(void)
	{
	int i, h;

	for (i = 0; i < NPROTO; i++) {
	for (h = 0; h < NF_MAX_HOOKS; h++)
	INIT_LIST_HEAD(&nf_hooks[i][h]);
	}
	}