| /* netfilter.c: look after the filters for various protocols. |
| * Heavily influenced by the old firewall.c by David Bonn and Alan Cox. |
| * |
| * Thanks to Rob `CmdrTaco' Malda for not influencing this code in any |
| * way. |
| * |
| * Rusty Russell (C)2000 -- This code is GPL. |
| * |
| * February 2000: Modified by James Morris to have 1 queue per protocol. |
| * 15-Mar-2000: Added NF_REPEAT --RR. |
| */ |
| #include <linux/config.h> |
| #include <linux/netfilter.h> |
| #include <net/protocol.h> |
| #include <linux/init.h> |
| #include <linux/skbuff.h> |
| #include <linux/wait.h> |
| #include <linux/module.h> |
| #include <linux/interrupt.h> |
| #include <linux/if.h> |
| #include <linux/netdevice.h> |
| #include <linux/brlock.h> |
| #include <linux/inetdevice.h> |
| #include <net/sock.h> |
| #include <net/route.h> |
| #include <linux/ip.h> |
| |
| #define __KERNEL_SYSCALLS__ |
| #include <linux/unistd.h> |
| |
| /* In this code, we can be waiting indefinitely for userspace to |
| * service a packet if a hook returns NF_QUEUE. We could keep a count |
| * of skbuffs queued for userspace, and not deregister a hook unless |
| * this is zero, but that sucks. Now, we simply check when the |
| * packets come back: if the hook is gone, the packet is discarded. */ |
| #ifdef CONFIG_NETFILTER_DEBUG |
| #define NFDEBUG(format, args...) printk(format , ## args) |
| #else |
| #define NFDEBUG(format, args...) |
| #endif |
| |
| /* Sockopts only registered and called from user context, so |
| BR_NETPROTO_LOCK would be overkill. Also, [gs]etsockopt calls may |
| sleep. */ |
| static DECLARE_MUTEX(nf_sockopt_mutex); |
| |
| struct list_head nf_hooks[NPROTO][NF_MAX_HOOKS]; |
| static LIST_HEAD(nf_sockopts); |
| |
| /* |
| * A queue handler may be registered for each protocol. Each is protected by |
| * long term mutex. The handler must provide an an outfn() to accept packets |
| * for queueing and must reinject all packets it receives, no matter what. |
| */ |
| static struct nf_queue_handler_t { |
| nf_queue_outfn_t outfn; |
| void *data; |
| } queue_handler[NPROTO]; |
| |
| int nf_register_hook(struct nf_hook_ops *reg) |
| { |
| struct list_head *i; |
| |
| br_write_lock_bh(BR_NETPROTO_LOCK); |
| for (i = nf_hooks[reg->pf][reg->hooknum].next; |
| i != &nf_hooks[reg->pf][reg->hooknum]; |
| i = i->next) { |
| if (reg->priority < ((struct nf_hook_ops *)i)->priority) |
| break; |
| } |
| list_add(®->list, i->prev); |
| br_write_unlock_bh(BR_NETPROTO_LOCK); |
| return 0; |
| } |
| |
| void nf_unregister_hook(struct nf_hook_ops *reg) |
| { |
| br_write_lock_bh(BR_NETPROTO_LOCK); |
| list_del(®->list); |
| br_write_unlock_bh(BR_NETPROTO_LOCK); |
| } |
| |
| /* Do exclusive ranges overlap? */ |
| static inline int overlap(int min1, int max1, int min2, int max2) |
| { |
| return max1 > min2 && min1 < max2; |
| } |
| |
| /* Functions to register sockopt ranges (exclusive). */ |
| int nf_register_sockopt(struct nf_sockopt_ops *reg) |
| { |
| struct list_head *i; |
| int ret = 0; |
| |
| if (down_interruptible(&nf_sockopt_mutex) != 0) |
| return -EINTR; |
| |
| for (i = nf_sockopts.next; i != &nf_sockopts; i = i->next) { |
| struct nf_sockopt_ops *ops = (struct nf_sockopt_ops *)i; |
| if (ops->pf == reg->pf |
| && (overlap(ops->set_optmin, ops->set_optmax, |
| reg->set_optmin, reg->set_optmax) |
| || overlap(ops->get_optmin, ops->get_optmax, |
| reg->get_optmin, reg->get_optmax))) { |
| NFDEBUG("nf_sock overlap: %u-%u/%u-%u v %u-%u/%u-%u\n", |
| ops->set_optmin, ops->set_optmax, |
| ops->get_optmin, ops->get_optmax, |
| reg->set_optmin, reg->set_optmax, |
| reg->get_optmin, reg->get_optmax); |
| ret = -EBUSY; |
| goto out; |
| } |
| } |
| |
| list_add(®->list, &nf_sockopts); |
| out: |
| up(&nf_sockopt_mutex); |
| return ret; |
| } |
| |
| void nf_unregister_sockopt(struct nf_sockopt_ops *reg) |
| { |
| /* No point being interruptible: we're probably in cleanup_module() */ |
| restart: |
| down(&nf_sockopt_mutex); |
| if (reg->use != 0) { |
| /* To be woken by nf_sockopt call... */ |
| /* FIXME: Stuart Young's name appears gratuitously. */ |
| set_current_state(TASK_UNINTERRUPTIBLE); |
| reg->cleanup_task = current; |
| up(&nf_sockopt_mutex); |
| schedule(); |
| goto restart; |
| } |
| list_del(®->list); |
| up(&nf_sockopt_mutex); |
| } |
| |
| #ifdef CONFIG_NETFILTER_DEBUG |
| #include <net/ip.h> |
| #include <net/route.h> |
| #include <net/tcp.h> |
| #include <linux/netfilter_ipv4.h> |
| |
| static void debug_print_hooks_ip(unsigned int nf_debug) |
| { |
| if (nf_debug & (1 << NF_IP_PRE_ROUTING)) { |
| printk("PRE_ROUTING "); |
| nf_debug ^= (1 << NF_IP_PRE_ROUTING); |
| } |
| if (nf_debug & (1 << NF_IP_LOCAL_IN)) { |
| printk("LOCAL_IN "); |
| nf_debug ^= (1 << NF_IP_LOCAL_IN); |
| } |
| if (nf_debug & (1 << NF_IP_FORWARD)) { |
| printk("FORWARD "); |
| nf_debug ^= (1 << NF_IP_FORWARD); |
| } |
| if (nf_debug & (1 << NF_IP_LOCAL_OUT)) { |
| printk("LOCAL_OUT "); |
| nf_debug ^= (1 << NF_IP_LOCAL_OUT); |
| } |
| if (nf_debug & (1 << NF_IP_POST_ROUTING)) { |
| printk("POST_ROUTING "); |
| nf_debug ^= (1 << NF_IP_POST_ROUTING); |
| } |
| if (nf_debug) |
| printk("Crap bits: 0x%04X", nf_debug); |
| printk("\n"); |
| } |
| |
| void nf_dump_skb(int pf, struct sk_buff *skb) |
| { |
| printk("skb: pf=%i %s dev=%s len=%u\n", |
| pf, |
| skb->sk ? "(owned)" : "(unowned)", |
| skb->dev ? skb->dev->name : "(no dev)", |
| skb->len); |
| switch (pf) { |
| case PF_INET: { |
| const struct iphdr *ip = skb->nh.iph; |
| __u32 *opt = (__u32 *) (ip + 1); |
| int opti; |
| __u16 src_port = 0, dst_port = 0; |
| |
| if (ip->protocol == IPPROTO_TCP |
| || ip->protocol == IPPROTO_UDP) { |
| struct tcphdr *tcp=(struct tcphdr *)((__u32 *)ip+ip->ihl); |
| src_port = ntohs(tcp->source); |
| dst_port = ntohs(tcp->dest); |
| } |
| |
| printk("PROTO=%d %u.%u.%u.%u:%hu %u.%u.%u.%u:%hu" |
| " L=%hu S=0x%2.2hX I=%hu F=0x%4.4hX T=%hu", |
| ip->protocol, NIPQUAD(ip->saddr), |
| src_port, NIPQUAD(ip->daddr), |
| dst_port, |
| ntohs(ip->tot_len), ip->tos, ntohs(ip->id), |
| ntohs(ip->frag_off), ip->ttl); |
| |
| for (opti = 0; opti < (ip->ihl - sizeof(struct iphdr) / 4); opti++) |
| printk(" O=0x%8.8X", *opt++); |
| printk("\n"); |
| } |
| } |
| } |
| |
| void nf_debug_ip_local_deliver(struct sk_buff *skb) |
| { |
| /* If it's a loopback packet, it must have come through |
| * NF_IP_LOCAL_OUT, NF_IP_RAW_INPUT, NF_IP_PRE_ROUTING and |
| * NF_IP_LOCAL_IN. Otherwise, must have gone through |
| * NF_IP_RAW_INPUT and NF_IP_PRE_ROUTING. */ |
| if (!skb->dev) { |
| printk("ip_local_deliver: skb->dev is NULL.\n"); |
| } |
| else if (strcmp(skb->dev->name, "lo") == 0) { |
| if (skb->nf_debug != ((1 << NF_IP_LOCAL_OUT) |
| | (1 << NF_IP_POST_ROUTING) |
| | (1 << NF_IP_PRE_ROUTING) |
| | (1 << NF_IP_LOCAL_IN))) { |
| printk("ip_local_deliver: bad loopback skb: "); |
| debug_print_hooks_ip(skb->nf_debug); |
| nf_dump_skb(PF_INET, skb); |
| } |
| } |
| else { |
| if (skb->nf_debug != ((1<<NF_IP_PRE_ROUTING) |
| | (1<<NF_IP_LOCAL_IN))) { |
| printk("ip_local_deliver: bad non-lo skb: "); |
| debug_print_hooks_ip(skb->nf_debug); |
| nf_dump_skb(PF_INET, skb); |
| } |
| } |
| } |
| |
| void nf_debug_ip_loopback_xmit(struct sk_buff *newskb) |
| { |
| if (newskb->nf_debug != ((1 << NF_IP_LOCAL_OUT) |
| | (1 << NF_IP_POST_ROUTING))) { |
| printk("ip_dev_loopback_xmit: bad owned skb = %p: ", |
| newskb); |
| debug_print_hooks_ip(newskb->nf_debug); |
| nf_dump_skb(PF_INET, newskb); |
| } |
| /* Clear to avoid confusing input check */ |
| newskb->nf_debug = 0; |
| } |
| |
| void nf_debug_ip_finish_output2(struct sk_buff *skb) |
| { |
| /* If it's owned, it must have gone through the |
| * NF_IP_LOCAL_OUT and NF_IP_POST_ROUTING. |
| * Otherwise, must have gone through |
| * NF_IP_PRE_ROUTING, NF_IP_FORWARD and NF_IP_POST_ROUTING. |
| */ |
| if (skb->sk) { |
| if (skb->nf_debug != ((1 << NF_IP_LOCAL_OUT) |
| | (1 << NF_IP_POST_ROUTING))) { |
| printk("ip_finish_output: bad owned skb = %p: ", skb); |
| debug_print_hooks_ip(skb->nf_debug); |
| nf_dump_skb(PF_INET, skb); |
| } |
| } else { |
| if (skb->nf_debug != ((1 << NF_IP_PRE_ROUTING) |
| | (1 << NF_IP_FORWARD) |
| | (1 << NF_IP_POST_ROUTING))) { |
| /* Fragments, entunnelled packets, TCP RSTs |
| generated by ipt_REJECT will have no |
| owners, but still may be local */ |
| if (skb->nf_debug != ((1 << NF_IP_LOCAL_OUT) |
| | (1 << NF_IP_POST_ROUTING))){ |
| printk("ip_finish_output:" |
| " bad unowned skb = %p: ",skb); |
| debug_print_hooks_ip(skb->nf_debug); |
| nf_dump_skb(PF_INET, skb); |
| } |
| } |
| } |
| } |
| #endif /*CONFIG_NETFILTER_DEBUG*/ |
| |
| /* Call get/setsockopt() */ |
| static int nf_sockopt(struct sock *sk, int pf, int val, |
| char *opt, int *len, int get) |
| { |
| struct list_head *i; |
| struct nf_sockopt_ops *ops; |
| int ret; |
| |
| if (down_interruptible(&nf_sockopt_mutex) != 0) |
| return -EINTR; |
| |
| for (i = nf_sockopts.next; i != &nf_sockopts; i = i->next) { |
| ops = (struct nf_sockopt_ops *)i; |
| if (ops->pf == pf) { |
| if (get) { |
| if (val >= ops->get_optmin |
| && val < ops->get_optmax) { |
| ops->use++; |
| up(&nf_sockopt_mutex); |
| ret = ops->get(sk, val, opt, len); |
| goto out; |
| } |
| } else { |
| if (val >= ops->set_optmin |
| && val < ops->set_optmax) { |
| ops->use++; |
| up(&nf_sockopt_mutex); |
| ret = ops->set(sk, val, opt, *len); |
| goto out; |
| } |
| } |
| } |
| } |
| up(&nf_sockopt_mutex); |
| return -ENOPROTOOPT; |
| |
| out: |
| down(&nf_sockopt_mutex); |
| ops->use--; |
| if (ops->cleanup_task) |
| wake_up_process(ops->cleanup_task); |
| up(&nf_sockopt_mutex); |
| return ret; |
| } |
| |
| int nf_setsockopt(struct sock *sk, int pf, int val, char *opt, |
| int len) |
| { |
| return nf_sockopt(sk, pf, val, opt, &len, 0); |
| } |
| |
| int nf_getsockopt(struct sock *sk, int pf, int val, char *opt, int *len) |
| { |
| return nf_sockopt(sk, pf, val, opt, len, 1); |
| } |
| |
| static unsigned int nf_iterate(struct list_head *head, |
| struct sk_buff **skb, |
| int hook, |
| const struct net_device *indev, |
| const struct net_device *outdev, |
| struct list_head **i, |
| int (*okfn)(struct sk_buff *), |
| int hook_thresh) |
| { |
| for (*i = (*i)->next; *i != head; *i = (*i)->next) { |
| struct nf_hook_ops *elem = (struct nf_hook_ops *)*i; |
| |
| if (hook_thresh > elem->priority) |
| continue; |
| |
| switch (elem->hook(hook, skb, indev, outdev, okfn)) { |
| case NF_QUEUE: |
| return NF_QUEUE; |
| |
| case NF_STOLEN: |
| return NF_STOLEN; |
| |
| case NF_DROP: |
| return NF_DROP; |
| |
| case NF_REPEAT: |
| *i = (*i)->prev; |
| break; |
| |
| #ifdef CONFIG_NETFILTER_DEBUG |
| case NF_ACCEPT: |
| break; |
| |
| default: |
| NFDEBUG("Evil return from %p(%u).\n", |
| elem->hook, hook); |
| #endif |
| } |
| } |
| return NF_ACCEPT; |
| } |
| |
| int nf_register_queue_handler(int pf, nf_queue_outfn_t outfn, void *data) |
| { |
| int ret; |
| |
| br_write_lock_bh(BR_NETPROTO_LOCK); |
| if (queue_handler[pf].outfn) |
| ret = -EBUSY; |
| else { |
| queue_handler[pf].outfn = outfn; |
| queue_handler[pf].data = data; |
| ret = 0; |
| } |
| br_write_unlock_bh(BR_NETPROTO_LOCK); |
| |
| return ret; |
| } |
| |
| /* The caller must flush their queue before this */ |
| int nf_unregister_queue_handler(int pf) |
| { |
| br_write_lock_bh(BR_NETPROTO_LOCK); |
| queue_handler[pf].outfn = NULL; |
| queue_handler[pf].data = NULL; |
| br_write_unlock_bh(BR_NETPROTO_LOCK); |
| return 0; |
| } |
| |
| /* |
| * Any packet that leaves via this function must come back |
| * through nf_reinject(). |
| */ |
| static void nf_queue(struct sk_buff *skb, |
| struct list_head *elem, |
| int pf, unsigned int hook, |
| struct net_device *indev, |
| struct net_device *outdev, |
| int (*okfn)(struct sk_buff *)) |
| { |
| int status; |
| struct nf_info *info; |
| struct net_device *physindev = NULL; |
| struct net_device *physoutdev = NULL; |
| |
| if (!queue_handler[pf].outfn) { |
| kfree_skb(skb); |
| return; |
| } |
| |
| info = kmalloc(sizeof(*info), GFP_ATOMIC); |
| if (!info) { |
| if (net_ratelimit()) |
| printk(KERN_ERR "OOM queueing packet %p\n", |
| skb); |
| kfree_skb(skb); |
| return; |
| } |
| |
| *info = (struct nf_info) { |
| (struct nf_hook_ops *)elem, pf, hook, indev, outdev, okfn }; |
| |
| /* Bump dev refs so they don't vanish while packet is out */ |
| if (indev) dev_hold(indev); |
| if (outdev) dev_hold(outdev); |
| |
| if (skb->nf_bridge) { |
| physindev = skb->nf_bridge->physindev; |
| if (physindev) dev_hold(physindev); |
| physoutdev = skb->nf_bridge->physoutdev; |
| if (physoutdev) dev_hold(physoutdev); |
| } |
| |
| status = queue_handler[pf].outfn(skb, info, queue_handler[pf].data); |
| if (status < 0) { |
| /* James M doesn't say fuck enough. */ |
| if (indev) dev_put(indev); |
| if (outdev) dev_put(outdev); |
| if (physindev) dev_put(physindev); |
| if (physoutdev) dev_put(physoutdev); |
| kfree(info); |
| kfree_skb(skb); |
| return; |
| } |
| } |
| |
| int nf_hook_slow(int pf, unsigned int hook, struct sk_buff *skb, |
| struct net_device *indev, |
| struct net_device *outdev, |
| int (*okfn)(struct sk_buff *), |
| int hook_thresh) |
| { |
| struct list_head *elem; |
| unsigned int verdict; |
| int ret = 0; |
| |
| /* This stopgap cannot be removed until all the hooks are audited. */ |
| if (skb_is_nonlinear(skb) && skb_linearize(skb, GFP_ATOMIC) != 0) { |
| kfree_skb(skb); |
| return -ENOMEM; |
| } |
| if (skb->ip_summed == CHECKSUM_HW) { |
| if (outdev == NULL) { |
| skb->ip_summed = CHECKSUM_NONE; |
| } else { |
| skb_checksum_help(skb); |
| } |
| } |
| |
| /* We may already have this, but read-locks nest anyway */ |
| br_read_lock_bh(BR_NETPROTO_LOCK); |
| |
| #ifdef CONFIG_NETFILTER_DEBUG |
| if (skb->nf_debug & (1 << hook)) { |
| printk("nf_hook: hook %i already set.\n", hook); |
| nf_dump_skb(pf, skb); |
| } |
| skb->nf_debug |= (1 << hook); |
| #endif |
| |
| elem = &nf_hooks[pf][hook]; |
| verdict = nf_iterate(&nf_hooks[pf][hook], &skb, hook, indev, |
| outdev, &elem, okfn, hook_thresh); |
| if (verdict == NF_QUEUE) { |
| NFDEBUG("nf_hook: Verdict = QUEUE.\n"); |
| nf_queue(skb, elem, pf, hook, indev, outdev, okfn); |
| } |
| |
| switch (verdict) { |
| case NF_ACCEPT: |
| ret = okfn(skb); |
| break; |
| |
| case NF_DROP: |
| kfree_skb(skb); |
| ret = -EPERM; |
| break; |
| } |
| |
| br_read_unlock_bh(BR_NETPROTO_LOCK); |
| return ret; |
| } |
| |
| void nf_reinject(struct sk_buff *skb, struct nf_info *info, |
| unsigned int verdict) |
| { |
| struct list_head *elem = &info->elem->list; |
| struct list_head *i; |
| |
| /* We don't have BR_NETPROTO_LOCK here */ |
| br_read_lock_bh(BR_NETPROTO_LOCK); |
| for (i = nf_hooks[info->pf][info->hook].next; i != elem; i = i->next) { |
| if (i == &nf_hooks[info->pf][info->hook]) { |
| /* The module which sent it to userspace is gone. */ |
| NFDEBUG("%s: module disappeared, dropping packet.\n", |
| __FUNCTION__); |
| verdict = NF_DROP; |
| break; |
| } |
| } |
| |
| /* Continue traversal iff userspace said ok... */ |
| if (verdict == NF_REPEAT) { |
| elem = elem->prev; |
| verdict = NF_ACCEPT; |
| } |
| |
| if (verdict == NF_ACCEPT) { |
| verdict = nf_iterate(&nf_hooks[info->pf][info->hook], |
| &skb, info->hook, |
| info->indev, info->outdev, &elem, |
| info->okfn, INT_MIN); |
| } |
| |
| switch (verdict) { |
| case NF_ACCEPT: |
| info->okfn(skb); |
| break; |
| |
| case NF_QUEUE: |
| nf_queue(skb, elem, info->pf, info->hook, |
| info->indev, info->outdev, info->okfn); |
| break; |
| |
| case NF_DROP: |
| kfree_skb(skb); |
| break; |
| } |
| br_read_unlock_bh(BR_NETPROTO_LOCK); |
| |
| /* Release those devices we held, or Alexey will kill me. */ |
| if (info->indev) dev_put(info->indev); |
| if (info->outdev) dev_put(info->outdev); |
| |
| kfree(info); |
| return; |
| } |
| |
| #ifdef CONFIG_INET |
| /* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */ |
| int ip_route_me_harder(struct sk_buff **pskb) |
| { |
| struct iphdr *iph = (*pskb)->nh.iph; |
| struct rtable *rt; |
| struct flowi fl = { .nl_u = { .ip4_u = |
| { .daddr = iph->daddr, |
| .saddr = iph->saddr, |
| .tos = RT_TOS(iph->tos)|RTO_CONN, |
| #ifdef CONFIG_IP_ROUTE_FWMARK |
| .fwmark = (*pskb)->nfmark |
| #endif |
| } }, |
| .oif = (*pskb)->sk ? (*pskb)->sk->bound_dev_if : 0, |
| }; |
| struct net_device *dev_src = NULL; |
| int err; |
| |
| /* accomodate ip_route_output_slow(), which expects the key src to be |
| 0 or a local address; however some non-standard hacks like |
| ipt_REJECT.c:send_reset() can cause packets with foreign |
| saddr to be appear on the NF_IP_LOCAL_OUT hook -MB */ |
| if(fl.fl4_src && !(dev_src = ip_dev_find(fl.fl4_src))) |
| fl.fl4_src = 0; |
| |
| if ((err=ip_route_output_key(&rt, &fl)) != 0) { |
| printk("route_me_harder: ip_route_output_key(dst=%u.%u.%u.%u, src=%u.%u.%u.%u, oif=%d, tos=0x%x, fwmark=0x%lx) error %d\n", |
| NIPQUAD(iph->daddr), NIPQUAD(iph->saddr), |
| (*pskb)->sk ? (*pskb)->sk->bound_dev_if : 0, |
| RT_TOS(iph->tos)|RTO_CONN, |
| #ifdef CONFIG_IP_ROUTE_FWMARK |
| (*pskb)->nfmark, |
| #else |
| 0UL, |
| #endif |
| err); |
| goto out; |
| } |
| |
| /* Drop old route. */ |
| dst_release((*pskb)->dst); |
| |
| (*pskb)->dst = &rt->u.dst; |
| |
| /* Change in oif may mean change in hh_len. */ |
| if (skb_headroom(*pskb) < (*pskb)->dst->dev->hard_header_len) { |
| struct sk_buff *nskb; |
| |
| nskb = skb_realloc_headroom(*pskb, |
| (*pskb)->dst->dev->hard_header_len); |
| if (!nskb) { |
| err = -ENOMEM; |
| goto out; |
| } |
| if ((*pskb)->sk) |
| skb_set_owner_w(nskb, (*pskb)->sk); |
| kfree_skb(*pskb); |
| *pskb = nskb; |
| } |
| |
| out: |
| if (dev_src) |
| dev_put(dev_src); |
| |
| return err; |
| } |
| #endif /*CONFIG_INET*/ |
| |
| /* This does not belong here, but ipt_REJECT needs it if connection |
| tracking in use: without this, connection may not be in hash table, |
| and hence manufactured ICMP or RST packets will not be associated |
| with it. */ |
| void (*ip_ct_attach)(struct sk_buff *, struct nf_ct_info *); |
| |
| void __init netfilter_init(void) |
| { |
| int i, h; |
| |
| for (i = 0; i < NPROTO; i++) { |
| for (h = 0; h < NF_MAX_HOOKS; h++) |
| INIT_LIST_HEAD(&nf_hooks[i][h]); |
| } |
| } |