Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | /* netfilter.c: look after the filters for various protocols. |
| 2 | * Heavily influenced by the old firewall.c by David Bonn and Alan Cox. |
| 3 | * |
| 4 | * Thanks to Rob `CmdrTaco' Malda for not influencing this code in any |
| 5 | * way. |
| 6 | * |
| 7 | * Rusty Russell (C)2000 -- This code is GPL. |
| 8 | * |
| 9 | * February 2000: Modified by James Morris to have 1 queue per protocol. |
| 10 | * 15-Mar-2000: Added NF_REPEAT --RR. |
| 11 | * 08-May-2003: Internal logging interface added by Jozsef Kadlecsik. |
| 12 | */ |
| 13 | #include <linux/config.h> |
| 14 | #include <linux/kernel.h> |
| 15 | #include <linux/netfilter.h> |
| 16 | #include <net/protocol.h> |
| 17 | #include <linux/init.h> |
| 18 | #include <linux/skbuff.h> |
| 19 | #include <linux/wait.h> |
| 20 | #include <linux/module.h> |
| 21 | #include <linux/interrupt.h> |
| 22 | #include <linux/if.h> |
| 23 | #include <linux/netdevice.h> |
| 24 | #include <linux/inetdevice.h> |
| 25 | #include <linux/tcp.h> |
| 26 | #include <linux/udp.h> |
| 27 | #include <linux/icmp.h> |
| 28 | #include <net/sock.h> |
| 29 | #include <net/route.h> |
| 30 | #include <linux/ip.h> |
| 31 | |
| 32 | /* In this code, we can be waiting indefinitely for userspace to |
| 33 | * service a packet if a hook returns NF_QUEUE. We could keep a count |
| 34 | * of skbuffs queued for userspace, and not deregister a hook unless |
| 35 | * this is zero, but that sucks. Now, we simply check when the |
| 36 | * packets come back: if the hook is gone, the packet is discarded. */ |
| 37 | #ifdef CONFIG_NETFILTER_DEBUG |
| 38 | #define NFDEBUG(format, args...) printk(format , ## args) |
| 39 | #else |
| 40 | #define NFDEBUG(format, args...) |
| 41 | #endif |
| 42 | |
| 43 | /* Sockopts only registered and called from user context, so |
| 44 | net locking would be overkill. Also, [gs]etsockopt calls may |
| 45 | sleep. */ |
| 46 | static DECLARE_MUTEX(nf_sockopt_mutex); |
| 47 | |
| 48 | struct list_head nf_hooks[NPROTO][NF_MAX_HOOKS]; |
| 49 | static LIST_HEAD(nf_sockopts); |
| 50 | static DEFINE_SPINLOCK(nf_hook_lock); |
| 51 | |
| 52 | /* |
| 53 | * A queue handler may be registered for each protocol. Each is protected by |
| 54 | * long term mutex. The handler must provide an an outfn() to accept packets |
| 55 | * for queueing and must reinject all packets it receives, no matter what. |
| 56 | */ |
| 57 | static struct nf_queue_handler_t { |
| 58 | nf_queue_outfn_t outfn; |
| 59 | void *data; |
| 60 | } queue_handler[NPROTO]; |
| 61 | static DEFINE_RWLOCK(queue_handler_lock); |
| 62 | |
| 63 | int nf_register_hook(struct nf_hook_ops *reg) |
| 64 | { |
| 65 | struct list_head *i; |
| 66 | |
| 67 | spin_lock_bh(&nf_hook_lock); |
| 68 | list_for_each(i, &nf_hooks[reg->pf][reg->hooknum]) { |
| 69 | if (reg->priority < ((struct nf_hook_ops *)i)->priority) |
| 70 | break; |
| 71 | } |
| 72 | list_add_rcu(®->list, i->prev); |
| 73 | spin_unlock_bh(&nf_hook_lock); |
| 74 | |
| 75 | synchronize_net(); |
| 76 | return 0; |
| 77 | } |
| 78 | |
| 79 | void nf_unregister_hook(struct nf_hook_ops *reg) |
| 80 | { |
| 81 | spin_lock_bh(&nf_hook_lock); |
| 82 | list_del_rcu(®->list); |
| 83 | spin_unlock_bh(&nf_hook_lock); |
| 84 | |
| 85 | synchronize_net(); |
| 86 | } |
| 87 | |
| 88 | /* Do exclusive ranges overlap? */ |
| 89 | static inline int overlap(int min1, int max1, int min2, int max2) |
| 90 | { |
| 91 | return max1 > min2 && min1 < max2; |
| 92 | } |
| 93 | |
| 94 | /* Functions to register sockopt ranges (exclusive). */ |
| 95 | int nf_register_sockopt(struct nf_sockopt_ops *reg) |
| 96 | { |
| 97 | struct list_head *i; |
| 98 | int ret = 0; |
| 99 | |
| 100 | if (down_interruptible(&nf_sockopt_mutex) != 0) |
| 101 | return -EINTR; |
| 102 | |
| 103 | list_for_each(i, &nf_sockopts) { |
| 104 | struct nf_sockopt_ops *ops = (struct nf_sockopt_ops *)i; |
| 105 | if (ops->pf == reg->pf |
| 106 | && (overlap(ops->set_optmin, ops->set_optmax, |
| 107 | reg->set_optmin, reg->set_optmax) |
| 108 | || overlap(ops->get_optmin, ops->get_optmax, |
| 109 | reg->get_optmin, reg->get_optmax))) { |
| 110 | NFDEBUG("nf_sock overlap: %u-%u/%u-%u v %u-%u/%u-%u\n", |
| 111 | ops->set_optmin, ops->set_optmax, |
| 112 | ops->get_optmin, ops->get_optmax, |
| 113 | reg->set_optmin, reg->set_optmax, |
| 114 | reg->get_optmin, reg->get_optmax); |
| 115 | ret = -EBUSY; |
| 116 | goto out; |
| 117 | } |
| 118 | } |
| 119 | |
| 120 | list_add(®->list, &nf_sockopts); |
| 121 | out: |
| 122 | up(&nf_sockopt_mutex); |
| 123 | return ret; |
| 124 | } |
| 125 | |
| 126 | void nf_unregister_sockopt(struct nf_sockopt_ops *reg) |
| 127 | { |
| 128 | /* No point being interruptible: we're probably in cleanup_module() */ |
| 129 | restart: |
| 130 | down(&nf_sockopt_mutex); |
| 131 | if (reg->use != 0) { |
| 132 | /* To be woken by nf_sockopt call... */ |
| 133 | /* FIXME: Stuart Young's name appears gratuitously. */ |
| 134 | set_current_state(TASK_UNINTERRUPTIBLE); |
| 135 | reg->cleanup_task = current; |
| 136 | up(&nf_sockopt_mutex); |
| 137 | schedule(); |
| 138 | goto restart; |
| 139 | } |
| 140 | list_del(®->list); |
| 141 | up(&nf_sockopt_mutex); |
| 142 | } |
| 143 | |
| 144 | #ifdef CONFIG_NETFILTER_DEBUG |
| 145 | #include <net/ip.h> |
| 146 | #include <net/tcp.h> |
| 147 | #include <linux/netfilter_ipv4.h> |
| 148 | |
| 149 | static void debug_print_hooks_ip(unsigned int nf_debug) |
| 150 | { |
| 151 | if (nf_debug & (1 << NF_IP_PRE_ROUTING)) { |
| 152 | printk("PRE_ROUTING "); |
| 153 | nf_debug ^= (1 << NF_IP_PRE_ROUTING); |
| 154 | } |
| 155 | if (nf_debug & (1 << NF_IP_LOCAL_IN)) { |
| 156 | printk("LOCAL_IN "); |
| 157 | nf_debug ^= (1 << NF_IP_LOCAL_IN); |
| 158 | } |
| 159 | if (nf_debug & (1 << NF_IP_FORWARD)) { |
| 160 | printk("FORWARD "); |
| 161 | nf_debug ^= (1 << NF_IP_FORWARD); |
| 162 | } |
| 163 | if (nf_debug & (1 << NF_IP_LOCAL_OUT)) { |
| 164 | printk("LOCAL_OUT "); |
| 165 | nf_debug ^= (1 << NF_IP_LOCAL_OUT); |
| 166 | } |
| 167 | if (nf_debug & (1 << NF_IP_POST_ROUTING)) { |
| 168 | printk("POST_ROUTING "); |
| 169 | nf_debug ^= (1 << NF_IP_POST_ROUTING); |
| 170 | } |
| 171 | if (nf_debug) |
| 172 | printk("Crap bits: 0x%04X", nf_debug); |
| 173 | printk("\n"); |
| 174 | } |
| 175 | |
| 176 | static void nf_dump_skb(int pf, struct sk_buff *skb) |
| 177 | { |
| 178 | printk("skb: pf=%i %s dev=%s len=%u\n", |
| 179 | pf, |
| 180 | skb->sk ? "(owned)" : "(unowned)", |
| 181 | skb->dev ? skb->dev->name : "(no dev)", |
| 182 | skb->len); |
| 183 | switch (pf) { |
| 184 | case PF_INET: { |
| 185 | const struct iphdr *ip = skb->nh.iph; |
| 186 | __u32 *opt = (__u32 *) (ip + 1); |
| 187 | int opti; |
| 188 | __u16 src_port = 0, dst_port = 0; |
| 189 | |
| 190 | if (ip->protocol == IPPROTO_TCP |
| 191 | || ip->protocol == IPPROTO_UDP) { |
| 192 | struct tcphdr *tcp=(struct tcphdr *)((__u32 *)ip+ip->ihl); |
| 193 | src_port = ntohs(tcp->source); |
| 194 | dst_port = ntohs(tcp->dest); |
| 195 | } |
| 196 | |
| 197 | printk("PROTO=%d %u.%u.%u.%u:%hu %u.%u.%u.%u:%hu" |
| 198 | " L=%hu S=0x%2.2hX I=%hu F=0x%4.4hX T=%hu", |
| 199 | ip->protocol, NIPQUAD(ip->saddr), |
| 200 | src_port, NIPQUAD(ip->daddr), |
| 201 | dst_port, |
| 202 | ntohs(ip->tot_len), ip->tos, ntohs(ip->id), |
| 203 | ntohs(ip->frag_off), ip->ttl); |
| 204 | |
| 205 | for (opti = 0; opti < (ip->ihl - sizeof(struct iphdr) / 4); opti++) |
| 206 | printk(" O=0x%8.8X", *opt++); |
| 207 | printk("\n"); |
| 208 | } |
| 209 | } |
| 210 | } |
| 211 | |
| 212 | void nf_debug_ip_local_deliver(struct sk_buff *skb) |
| 213 | { |
| 214 | /* If it's a loopback packet, it must have come through |
| 215 | * NF_IP_LOCAL_OUT, NF_IP_RAW_INPUT, NF_IP_PRE_ROUTING and |
| 216 | * NF_IP_LOCAL_IN. Otherwise, must have gone through |
| 217 | * NF_IP_RAW_INPUT and NF_IP_PRE_ROUTING. */ |
| 218 | if (!skb->dev) { |
| 219 | printk("ip_local_deliver: skb->dev is NULL.\n"); |
| 220 | } |
| 221 | else if (strcmp(skb->dev->name, "lo") == 0) { |
| 222 | if (skb->nf_debug != ((1 << NF_IP_LOCAL_OUT) |
| 223 | | (1 << NF_IP_POST_ROUTING) |
| 224 | | (1 << NF_IP_PRE_ROUTING) |
| 225 | | (1 << NF_IP_LOCAL_IN))) { |
| 226 | printk("ip_local_deliver: bad loopback skb: "); |
| 227 | debug_print_hooks_ip(skb->nf_debug); |
| 228 | nf_dump_skb(PF_INET, skb); |
| 229 | } |
| 230 | } |
| 231 | else { |
| 232 | if (skb->nf_debug != ((1<<NF_IP_PRE_ROUTING) |
| 233 | | (1<<NF_IP_LOCAL_IN))) { |
| 234 | printk("ip_local_deliver: bad non-lo skb: "); |
| 235 | debug_print_hooks_ip(skb->nf_debug); |
| 236 | nf_dump_skb(PF_INET, skb); |
| 237 | } |
| 238 | } |
| 239 | } |
| 240 | |
| 241 | void nf_debug_ip_loopback_xmit(struct sk_buff *newskb) |
| 242 | { |
| 243 | if (newskb->nf_debug != ((1 << NF_IP_LOCAL_OUT) |
| 244 | | (1 << NF_IP_POST_ROUTING))) { |
| 245 | printk("ip_dev_loopback_xmit: bad owned skb = %p: ", |
| 246 | newskb); |
| 247 | debug_print_hooks_ip(newskb->nf_debug); |
| 248 | nf_dump_skb(PF_INET, newskb); |
| 249 | } |
| 250 | /* Clear to avoid confusing input check */ |
| 251 | newskb->nf_debug = 0; |
| 252 | } |
| 253 | |
| 254 | void nf_debug_ip_finish_output2(struct sk_buff *skb) |
| 255 | { |
| 256 | /* If it's owned, it must have gone through the |
| 257 | * NF_IP_LOCAL_OUT and NF_IP_POST_ROUTING. |
| 258 | * Otherwise, must have gone through |
| 259 | * NF_IP_PRE_ROUTING, NF_IP_FORWARD and NF_IP_POST_ROUTING. |
| 260 | */ |
| 261 | if (skb->sk) { |
| 262 | if (skb->nf_debug != ((1 << NF_IP_LOCAL_OUT) |
| 263 | | (1 << NF_IP_POST_ROUTING))) { |
| 264 | printk("ip_finish_output: bad owned skb = %p: ", skb); |
| 265 | debug_print_hooks_ip(skb->nf_debug); |
| 266 | nf_dump_skb(PF_INET, skb); |
| 267 | } |
| 268 | } else { |
| 269 | if (skb->nf_debug != ((1 << NF_IP_PRE_ROUTING) |
| 270 | | (1 << NF_IP_FORWARD) |
| 271 | | (1 << NF_IP_POST_ROUTING))) { |
| 272 | /* Fragments, entunnelled packets, TCP RSTs |
| 273 | generated by ipt_REJECT will have no |
| 274 | owners, but still may be local */ |
| 275 | if (skb->nf_debug != ((1 << NF_IP_LOCAL_OUT) |
| 276 | | (1 << NF_IP_POST_ROUTING))){ |
| 277 | printk("ip_finish_output:" |
| 278 | " bad unowned skb = %p: ",skb); |
| 279 | debug_print_hooks_ip(skb->nf_debug); |
| 280 | nf_dump_skb(PF_INET, skb); |
| 281 | } |
| 282 | } |
| 283 | } |
| 284 | } |
| 285 | #endif /*CONFIG_NETFILTER_DEBUG*/ |
| 286 | |
| 287 | /* Call get/setsockopt() */ |
| 288 | static int nf_sockopt(struct sock *sk, int pf, int val, |
| 289 | char __user *opt, int *len, int get) |
| 290 | { |
| 291 | struct list_head *i; |
| 292 | struct nf_sockopt_ops *ops; |
| 293 | int ret; |
| 294 | |
| 295 | if (down_interruptible(&nf_sockopt_mutex) != 0) |
| 296 | return -EINTR; |
| 297 | |
| 298 | list_for_each(i, &nf_sockopts) { |
| 299 | ops = (struct nf_sockopt_ops *)i; |
| 300 | if (ops->pf == pf) { |
| 301 | if (get) { |
| 302 | if (val >= ops->get_optmin |
| 303 | && val < ops->get_optmax) { |
| 304 | ops->use++; |
| 305 | up(&nf_sockopt_mutex); |
| 306 | ret = ops->get(sk, val, opt, len); |
| 307 | goto out; |
| 308 | } |
| 309 | } else { |
| 310 | if (val >= ops->set_optmin |
| 311 | && val < ops->set_optmax) { |
| 312 | ops->use++; |
| 313 | up(&nf_sockopt_mutex); |
| 314 | ret = ops->set(sk, val, opt, *len); |
| 315 | goto out; |
| 316 | } |
| 317 | } |
| 318 | } |
| 319 | } |
| 320 | up(&nf_sockopt_mutex); |
| 321 | return -ENOPROTOOPT; |
| 322 | |
| 323 | out: |
| 324 | down(&nf_sockopt_mutex); |
| 325 | ops->use--; |
| 326 | if (ops->cleanup_task) |
| 327 | wake_up_process(ops->cleanup_task); |
| 328 | up(&nf_sockopt_mutex); |
| 329 | return ret; |
| 330 | } |
| 331 | |
| 332 | int nf_setsockopt(struct sock *sk, int pf, int val, char __user *opt, |
| 333 | int len) |
| 334 | { |
| 335 | return nf_sockopt(sk, pf, val, opt, &len, 0); |
| 336 | } |
| 337 | |
| 338 | int nf_getsockopt(struct sock *sk, int pf, int val, char __user *opt, int *len) |
| 339 | { |
| 340 | return nf_sockopt(sk, pf, val, opt, len, 1); |
| 341 | } |
| 342 | |
| 343 | static unsigned int nf_iterate(struct list_head *head, |
| 344 | struct sk_buff **skb, |
| 345 | int hook, |
| 346 | const struct net_device *indev, |
| 347 | const struct net_device *outdev, |
| 348 | struct list_head **i, |
| 349 | int (*okfn)(struct sk_buff *), |
| 350 | int hook_thresh) |
| 351 | { |
| 352 | unsigned int verdict; |
| 353 | |
| 354 | /* |
| 355 | * The caller must not block between calls to this |
| 356 | * function because of risk of continuing from deleted element. |
| 357 | */ |
| 358 | list_for_each_continue_rcu(*i, head) { |
| 359 | struct nf_hook_ops *elem = (struct nf_hook_ops *)*i; |
| 360 | |
| 361 | if (hook_thresh > elem->priority) |
| 362 | continue; |
| 363 | |
| 364 | /* Optimization: we don't need to hold module |
| 365 | reference here, since function can't sleep. --RR */ |
| 366 | verdict = elem->hook(hook, skb, indev, outdev, okfn); |
| 367 | if (verdict != NF_ACCEPT) { |
| 368 | #ifdef CONFIG_NETFILTER_DEBUG |
| 369 | if (unlikely(verdict > NF_MAX_VERDICT)) { |
| 370 | NFDEBUG("Evil return from %p(%u).\n", |
| 371 | elem->hook, hook); |
| 372 | continue; |
| 373 | } |
| 374 | #endif |
| 375 | if (verdict != NF_REPEAT) |
| 376 | return verdict; |
| 377 | *i = (*i)->prev; |
| 378 | } |
| 379 | } |
| 380 | return NF_ACCEPT; |
| 381 | } |
| 382 | |
| 383 | int nf_register_queue_handler(int pf, nf_queue_outfn_t outfn, void *data) |
| 384 | { |
| 385 | int ret; |
| 386 | |
| 387 | write_lock_bh(&queue_handler_lock); |
| 388 | if (queue_handler[pf].outfn) |
| 389 | ret = -EBUSY; |
| 390 | else { |
| 391 | queue_handler[pf].outfn = outfn; |
| 392 | queue_handler[pf].data = data; |
| 393 | ret = 0; |
| 394 | } |
| 395 | write_unlock_bh(&queue_handler_lock); |
| 396 | |
| 397 | return ret; |
| 398 | } |
| 399 | |
| 400 | /* The caller must flush their queue before this */ |
| 401 | int nf_unregister_queue_handler(int pf) |
| 402 | { |
| 403 | write_lock_bh(&queue_handler_lock); |
| 404 | queue_handler[pf].outfn = NULL; |
| 405 | queue_handler[pf].data = NULL; |
| 406 | write_unlock_bh(&queue_handler_lock); |
| 407 | |
| 408 | return 0; |
| 409 | } |
| 410 | |
| 411 | /* |
| 412 | * Any packet that leaves via this function must come back |
| 413 | * through nf_reinject(). |
| 414 | */ |
| 415 | static int nf_queue(struct sk_buff *skb, |
| 416 | struct list_head *elem, |
| 417 | int pf, unsigned int hook, |
| 418 | struct net_device *indev, |
| 419 | struct net_device *outdev, |
| 420 | int (*okfn)(struct sk_buff *)) |
| 421 | { |
| 422 | int status; |
| 423 | struct nf_info *info; |
| 424 | #ifdef CONFIG_BRIDGE_NETFILTER |
| 425 | struct net_device *physindev = NULL; |
| 426 | struct net_device *physoutdev = NULL; |
| 427 | #endif |
| 428 | |
| 429 | /* QUEUE == DROP if noone is waiting, to be safe. */ |
| 430 | read_lock(&queue_handler_lock); |
| 431 | if (!queue_handler[pf].outfn) { |
| 432 | read_unlock(&queue_handler_lock); |
| 433 | kfree_skb(skb); |
| 434 | return 1; |
| 435 | } |
| 436 | |
| 437 | info = kmalloc(sizeof(*info), GFP_ATOMIC); |
| 438 | if (!info) { |
| 439 | if (net_ratelimit()) |
| 440 | printk(KERN_ERR "OOM queueing packet %p\n", |
| 441 | skb); |
| 442 | read_unlock(&queue_handler_lock); |
| 443 | kfree_skb(skb); |
| 444 | return 1; |
| 445 | } |
| 446 | |
| 447 | *info = (struct nf_info) { |
| 448 | (struct nf_hook_ops *)elem, pf, hook, indev, outdev, okfn }; |
| 449 | |
| 450 | /* If it's going away, ignore hook. */ |
| 451 | if (!try_module_get(info->elem->owner)) { |
| 452 | read_unlock(&queue_handler_lock); |
| 453 | kfree(info); |
| 454 | return 0; |
| 455 | } |
| 456 | |
| 457 | /* Bump dev refs so they don't vanish while packet is out */ |
| 458 | if (indev) dev_hold(indev); |
| 459 | if (outdev) dev_hold(outdev); |
| 460 | |
| 461 | #ifdef CONFIG_BRIDGE_NETFILTER |
| 462 | if (skb->nf_bridge) { |
| 463 | physindev = skb->nf_bridge->physindev; |
| 464 | if (physindev) dev_hold(physindev); |
| 465 | physoutdev = skb->nf_bridge->physoutdev; |
| 466 | if (physoutdev) dev_hold(physoutdev); |
| 467 | } |
| 468 | #endif |
| 469 | |
| 470 | status = queue_handler[pf].outfn(skb, info, queue_handler[pf].data); |
| 471 | read_unlock(&queue_handler_lock); |
| 472 | |
| 473 | if (status < 0) { |
| 474 | /* James M doesn't say fuck enough. */ |
| 475 | if (indev) dev_put(indev); |
| 476 | if (outdev) dev_put(outdev); |
| 477 | #ifdef CONFIG_BRIDGE_NETFILTER |
| 478 | if (physindev) dev_put(physindev); |
| 479 | if (physoutdev) dev_put(physoutdev); |
| 480 | #endif |
| 481 | module_put(info->elem->owner); |
| 482 | kfree(info); |
| 483 | kfree_skb(skb); |
| 484 | return 1; |
| 485 | } |
| 486 | return 1; |
| 487 | } |
| 488 | |
| 489 | /* Returns 1 if okfn() needs to be executed by the caller, |
| 490 | * -EPERM for NF_DROP, 0 otherwise. */ |
| 491 | int nf_hook_slow(int pf, unsigned int hook, struct sk_buff **pskb, |
| 492 | struct net_device *indev, |
| 493 | struct net_device *outdev, |
| 494 | int (*okfn)(struct sk_buff *), |
| 495 | int hook_thresh) |
| 496 | { |
| 497 | struct list_head *elem; |
| 498 | unsigned int verdict; |
| 499 | int ret = 0; |
| 500 | |
| 501 | /* We may already have this, but read-locks nest anyway */ |
| 502 | rcu_read_lock(); |
| 503 | |
| 504 | #ifdef CONFIG_NETFILTER_DEBUG |
| 505 | if (unlikely((*pskb)->nf_debug & (1 << hook))) { |
| 506 | printk("nf_hook: hook %i already set.\n", hook); |
| 507 | nf_dump_skb(pf, *pskb); |
| 508 | } |
| 509 | (*pskb)->nf_debug |= (1 << hook); |
| 510 | #endif |
| 511 | |
| 512 | elem = &nf_hooks[pf][hook]; |
| 513 | next_hook: |
| 514 | verdict = nf_iterate(&nf_hooks[pf][hook], pskb, hook, indev, |
| 515 | outdev, &elem, okfn, hook_thresh); |
| 516 | if (verdict == NF_ACCEPT || verdict == NF_STOP) { |
| 517 | ret = 1; |
| 518 | goto unlock; |
| 519 | } else if (verdict == NF_DROP) { |
| 520 | kfree_skb(*pskb); |
| 521 | ret = -EPERM; |
| 522 | } else if (verdict == NF_QUEUE) { |
| 523 | NFDEBUG("nf_hook: Verdict = QUEUE.\n"); |
| 524 | if (!nf_queue(*pskb, elem, pf, hook, indev, outdev, okfn)) |
| 525 | goto next_hook; |
| 526 | } |
| 527 | unlock: |
| 528 | rcu_read_unlock(); |
| 529 | return ret; |
| 530 | } |
| 531 | |
| 532 | void nf_reinject(struct sk_buff *skb, struct nf_info *info, |
| 533 | unsigned int verdict) |
| 534 | { |
| 535 | struct list_head *elem = &info->elem->list; |
| 536 | struct list_head *i; |
| 537 | |
| 538 | rcu_read_lock(); |
| 539 | |
| 540 | /* Release those devices we held, or Alexey will kill me. */ |
| 541 | if (info->indev) dev_put(info->indev); |
| 542 | if (info->outdev) dev_put(info->outdev); |
| 543 | #ifdef CONFIG_BRIDGE_NETFILTER |
| 544 | if (skb->nf_bridge) { |
| 545 | if (skb->nf_bridge->physindev) |
| 546 | dev_put(skb->nf_bridge->physindev); |
| 547 | if (skb->nf_bridge->physoutdev) |
| 548 | dev_put(skb->nf_bridge->physoutdev); |
| 549 | } |
| 550 | #endif |
| 551 | |
| 552 | /* Drop reference to owner of hook which queued us. */ |
| 553 | module_put(info->elem->owner); |
| 554 | |
| 555 | list_for_each_rcu(i, &nf_hooks[info->pf][info->hook]) { |
| 556 | if (i == elem) |
| 557 | break; |
| 558 | } |
| 559 | |
| 560 | if (elem == &nf_hooks[info->pf][info->hook]) { |
| 561 | /* The module which sent it to userspace is gone. */ |
| 562 | NFDEBUG("%s: module disappeared, dropping packet.\n", |
| 563 | __FUNCTION__); |
| 564 | verdict = NF_DROP; |
| 565 | } |
| 566 | |
| 567 | /* Continue traversal iff userspace said ok... */ |
| 568 | if (verdict == NF_REPEAT) { |
| 569 | elem = elem->prev; |
| 570 | verdict = NF_ACCEPT; |
| 571 | } |
| 572 | |
| 573 | if (verdict == NF_ACCEPT) { |
| 574 | next_hook: |
| 575 | verdict = nf_iterate(&nf_hooks[info->pf][info->hook], |
| 576 | &skb, info->hook, |
| 577 | info->indev, info->outdev, &elem, |
| 578 | info->okfn, INT_MIN); |
| 579 | } |
| 580 | |
| 581 | switch (verdict) { |
| 582 | case NF_ACCEPT: |
| 583 | info->okfn(skb); |
| 584 | break; |
| 585 | |
| 586 | case NF_QUEUE: |
| 587 | if (!nf_queue(skb, elem, info->pf, info->hook, |
| 588 | info->indev, info->outdev, info->okfn)) |
| 589 | goto next_hook; |
| 590 | break; |
| 591 | } |
| 592 | rcu_read_unlock(); |
| 593 | |
| 594 | if (verdict == NF_DROP) |
| 595 | kfree_skb(skb); |
| 596 | |
| 597 | kfree(info); |
| 598 | return; |
| 599 | } |
| 600 | |
| 601 | #ifdef CONFIG_INET |
| 602 | /* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */ |
| 603 | int ip_route_me_harder(struct sk_buff **pskb) |
| 604 | { |
| 605 | struct iphdr *iph = (*pskb)->nh.iph; |
| 606 | struct rtable *rt; |
| 607 | struct flowi fl = {}; |
| 608 | struct dst_entry *odst; |
| 609 | unsigned int hh_len; |
| 610 | |
| 611 | /* some non-standard hacks like ipt_REJECT.c:send_reset() can cause |
| 612 | * packets with foreign saddr to appear on the NF_IP_LOCAL_OUT hook. |
| 613 | */ |
| 614 | if (inet_addr_type(iph->saddr) == RTN_LOCAL) { |
| 615 | fl.nl_u.ip4_u.daddr = iph->daddr; |
| 616 | fl.nl_u.ip4_u.saddr = iph->saddr; |
| 617 | fl.nl_u.ip4_u.tos = RT_TOS(iph->tos); |
| 618 | fl.oif = (*pskb)->sk ? (*pskb)->sk->sk_bound_dev_if : 0; |
| 619 | #ifdef CONFIG_IP_ROUTE_FWMARK |
| 620 | fl.nl_u.ip4_u.fwmark = (*pskb)->nfmark; |
| 621 | #endif |
| 622 | fl.proto = iph->protocol; |
| 623 | if (ip_route_output_key(&rt, &fl) != 0) |
| 624 | return -1; |
| 625 | |
| 626 | /* Drop old route. */ |
| 627 | dst_release((*pskb)->dst); |
| 628 | (*pskb)->dst = &rt->u.dst; |
| 629 | } else { |
| 630 | /* non-local src, find valid iif to satisfy |
| 631 | * rp-filter when calling ip_route_input. */ |
| 632 | fl.nl_u.ip4_u.daddr = iph->saddr; |
| 633 | if (ip_route_output_key(&rt, &fl) != 0) |
| 634 | return -1; |
| 635 | |
| 636 | odst = (*pskb)->dst; |
| 637 | if (ip_route_input(*pskb, iph->daddr, iph->saddr, |
| 638 | RT_TOS(iph->tos), rt->u.dst.dev) != 0) { |
| 639 | dst_release(&rt->u.dst); |
| 640 | return -1; |
| 641 | } |
| 642 | dst_release(&rt->u.dst); |
| 643 | dst_release(odst); |
| 644 | } |
| 645 | |
| 646 | if ((*pskb)->dst->error) |
| 647 | return -1; |
| 648 | |
| 649 | /* Change in oif may mean change in hh_len. */ |
| 650 | hh_len = (*pskb)->dst->dev->hard_header_len; |
| 651 | if (skb_headroom(*pskb) < hh_len) { |
| 652 | struct sk_buff *nskb; |
| 653 | |
| 654 | nskb = skb_realloc_headroom(*pskb, hh_len); |
| 655 | if (!nskb) |
| 656 | return -1; |
| 657 | if ((*pskb)->sk) |
| 658 | skb_set_owner_w(nskb, (*pskb)->sk); |
| 659 | kfree_skb(*pskb); |
| 660 | *pskb = nskb; |
| 661 | } |
| 662 | |
| 663 | return 0; |
| 664 | } |
| 665 | EXPORT_SYMBOL(ip_route_me_harder); |
| 666 | |
| 667 | int skb_ip_make_writable(struct sk_buff **pskb, unsigned int writable_len) |
| 668 | { |
| 669 | struct sk_buff *nskb; |
| 670 | |
| 671 | if (writable_len > (*pskb)->len) |
| 672 | return 0; |
| 673 | |
| 674 | /* Not exclusive use of packet? Must copy. */ |
| 675 | if (skb_shared(*pskb) || skb_cloned(*pskb)) |
| 676 | goto copy_skb; |
| 677 | |
| 678 | return pskb_may_pull(*pskb, writable_len); |
| 679 | |
| 680 | copy_skb: |
| 681 | nskb = skb_copy(*pskb, GFP_ATOMIC); |
| 682 | if (!nskb) |
| 683 | return 0; |
| 684 | BUG_ON(skb_is_nonlinear(nskb)); |
| 685 | |
| 686 | /* Rest of kernel will get very unhappy if we pass it a |
| 687 | suddenly-orphaned skbuff */ |
| 688 | if ((*pskb)->sk) |
| 689 | skb_set_owner_w(nskb, (*pskb)->sk); |
| 690 | kfree_skb(*pskb); |
| 691 | *pskb = nskb; |
| 692 | return 1; |
| 693 | } |
| 694 | EXPORT_SYMBOL(skb_ip_make_writable); |
| 695 | #endif /*CONFIG_INET*/ |
| 696 | |
| 697 | /* Internal logging interface, which relies on the real |
| 698 | LOG target modules */ |
| 699 | |
| 700 | #define NF_LOG_PREFIXLEN 128 |
| 701 | |
| 702 | static nf_logfn *nf_logging[NPROTO]; /* = NULL */ |
| 703 | static int reported = 0; |
| 704 | static DEFINE_SPINLOCK(nf_log_lock); |
| 705 | |
| 706 | int nf_log_register(int pf, nf_logfn *logfn) |
| 707 | { |
| 708 | int ret = -EBUSY; |
| 709 | |
| 710 | /* Any setup of logging members must be done before |
| 711 | * substituting pointer. */ |
| 712 | spin_lock(&nf_log_lock); |
| 713 | if (!nf_logging[pf]) { |
| 714 | rcu_assign_pointer(nf_logging[pf], logfn); |
| 715 | ret = 0; |
| 716 | } |
| 717 | spin_unlock(&nf_log_lock); |
| 718 | return ret; |
| 719 | } |
| 720 | |
| 721 | void nf_log_unregister(int pf, nf_logfn *logfn) |
| 722 | { |
| 723 | spin_lock(&nf_log_lock); |
| 724 | if (nf_logging[pf] == logfn) |
| 725 | nf_logging[pf] = NULL; |
| 726 | spin_unlock(&nf_log_lock); |
| 727 | |
| 728 | /* Give time to concurrent readers. */ |
| 729 | synchronize_net(); |
| 730 | } |
| 731 | |
| 732 | void nf_log_packet(int pf, |
| 733 | unsigned int hooknum, |
| 734 | const struct sk_buff *skb, |
| 735 | const struct net_device *in, |
| 736 | const struct net_device *out, |
| 737 | const char *fmt, ...) |
| 738 | { |
| 739 | va_list args; |
| 740 | char prefix[NF_LOG_PREFIXLEN]; |
| 741 | nf_logfn *logfn; |
| 742 | |
| 743 | rcu_read_lock(); |
| 744 | logfn = rcu_dereference(nf_logging[pf]); |
| 745 | if (logfn) { |
| 746 | va_start(args, fmt); |
| 747 | vsnprintf(prefix, sizeof(prefix), fmt, args); |
| 748 | va_end(args); |
| 749 | /* We must read logging before nf_logfn[pf] */ |
| 750 | logfn(hooknum, skb, in, out, prefix); |
| 751 | } else if (!reported) { |
| 752 | printk(KERN_WARNING "nf_log_packet: can\'t log yet, " |
| 753 | "no backend logging module loaded in!\n"); |
| 754 | reported++; |
| 755 | } |
| 756 | rcu_read_unlock(); |
| 757 | } |
| 758 | EXPORT_SYMBOL(nf_log_register); |
| 759 | EXPORT_SYMBOL(nf_log_unregister); |
| 760 | EXPORT_SYMBOL(nf_log_packet); |
| 761 | |
| 762 | /* This does not belong here, but locally generated errors need it if connection |
| 763 | tracking in use: without this, connection may not be in hash table, and hence |
| 764 | manufactured ICMP or RST packets will not be associated with it. */ |
| 765 | void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *); |
| 766 | |
| 767 | void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) |
| 768 | { |
| 769 | void (*attach)(struct sk_buff *, struct sk_buff *); |
| 770 | |
| 771 | if (skb->nfct && (attach = ip_ct_attach) != NULL) { |
| 772 | mb(); /* Just to be sure: must be read before executing this */ |
| 773 | attach(new, skb); |
| 774 | } |
| 775 | } |
| 776 | |
| 777 | void __init netfilter_init(void) |
| 778 | { |
| 779 | int i, h; |
| 780 | |
| 781 | for (i = 0; i < NPROTO; i++) { |
| 782 | for (h = 0; h < NF_MAX_HOOKS; h++) |
| 783 | INIT_LIST_HEAD(&nf_hooks[i][h]); |
| 784 | } |
| 785 | } |
| 786 | |
| 787 | EXPORT_SYMBOL(ip_ct_attach); |
| 788 | EXPORT_SYMBOL(nf_ct_attach); |
| 789 | EXPORT_SYMBOL(nf_getsockopt); |
| 790 | EXPORT_SYMBOL(nf_hook_slow); |
| 791 | EXPORT_SYMBOL(nf_hooks); |
| 792 | EXPORT_SYMBOL(nf_register_hook); |
| 793 | EXPORT_SYMBOL(nf_register_queue_handler); |
| 794 | EXPORT_SYMBOL(nf_register_sockopt); |
| 795 | EXPORT_SYMBOL(nf_reinject); |
| 796 | EXPORT_SYMBOL(nf_setsockopt); |
| 797 | EXPORT_SYMBOL(nf_unregister_hook); |
| 798 | EXPORT_SYMBOL(nf_unregister_queue_handler); |
| 799 | EXPORT_SYMBOL(nf_unregister_sockopt); |