Jozsef Kadlecsik | 5b1158e | 2006-12-02 22:07:13 -0800 | [diff] [blame^] | 1 | /* (C) 1999-2001 Paul `Rusty' Russell |
| 2 | * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> |
| 3 | * |
| 4 | * This program is free software; you can redistribute it and/or modify |
| 5 | * it under the terms of the GNU General Public License version 2 as |
| 6 | * published by the Free Software Foundation. |
| 7 | */ |
| 8 | #include <linux/types.h> |
| 9 | #include <linux/icmp.h> |
| 10 | #include <linux/ip.h> |
| 11 | #include <linux/netfilter.h> |
| 12 | #include <linux/netfilter_ipv4.h> |
| 13 | #include <linux/module.h> |
| 14 | #include <linux/skbuff.h> |
| 15 | #include <linux/proc_fs.h> |
| 16 | #include <net/ip.h> |
| 17 | #include <net/checksum.h> |
| 18 | #include <linux/spinlock.h> |
| 19 | |
| 20 | #include <net/netfilter/nf_conntrack.h> |
| 21 | #include <net/netfilter/nf_conntrack_core.h> |
| 22 | #include <net/netfilter/nf_nat.h> |
| 23 | #include <net/netfilter/nf_nat_rule.h> |
| 24 | #include <net/netfilter/nf_nat_protocol.h> |
| 25 | #include <net/netfilter/nf_nat_core.h> |
| 26 | #include <net/netfilter/nf_nat_helper.h> |
| 27 | #include <linux/netfilter_ipv4/ip_tables.h> |
| 28 | |
| 29 | #if 0 |
| 30 | #define DEBUGP printk |
| 31 | #else |
| 32 | #define DEBUGP(format, args...) |
| 33 | #endif |
| 34 | |
| 35 | #define HOOKNAME(hooknum) ((hooknum) == NF_IP_POST_ROUTING ? "POST_ROUTING" \ |
| 36 | : ((hooknum) == NF_IP_PRE_ROUTING ? "PRE_ROUTING" \ |
| 37 | : ((hooknum) == NF_IP_LOCAL_OUT ? "LOCAL_OUT" \ |
| 38 | : ((hooknum) == NF_IP_LOCAL_IN ? "LOCAL_IN" \ |
| 39 | : "*ERROR*"))) |
| 40 | |
| 41 | #ifdef CONFIG_XFRM |
| 42 | static void nat_decode_session(struct sk_buff *skb, struct flowi *fl) |
| 43 | { |
| 44 | struct nf_conn *ct; |
| 45 | struct nf_conntrack_tuple *t; |
| 46 | enum ip_conntrack_info ctinfo; |
| 47 | enum ip_conntrack_dir dir; |
| 48 | unsigned long statusbit; |
| 49 | |
| 50 | ct = nf_ct_get(skb, &ctinfo); |
| 51 | if (ct == NULL) |
| 52 | return; |
| 53 | dir = CTINFO2DIR(ctinfo); |
| 54 | t = &ct->tuplehash[dir].tuple; |
| 55 | |
| 56 | if (dir == IP_CT_DIR_ORIGINAL) |
| 57 | statusbit = IPS_DST_NAT; |
| 58 | else |
| 59 | statusbit = IPS_SRC_NAT; |
| 60 | |
| 61 | if (ct->status & statusbit) { |
| 62 | fl->fl4_dst = t->dst.u3.ip; |
| 63 | if (t->dst.protonum == IPPROTO_TCP || |
| 64 | t->dst.protonum == IPPROTO_UDP) |
| 65 | fl->fl_ip_dport = t->dst.u.tcp.port; |
| 66 | } |
| 67 | |
| 68 | statusbit ^= IPS_NAT_MASK; |
| 69 | |
| 70 | if (ct->status & statusbit) { |
| 71 | fl->fl4_src = t->src.u3.ip; |
| 72 | if (t->dst.protonum == IPPROTO_TCP || |
| 73 | t->dst.protonum == IPPROTO_UDP) |
| 74 | fl->fl_ip_sport = t->src.u.tcp.port; |
| 75 | } |
| 76 | } |
| 77 | #endif |
| 78 | |
| 79 | static unsigned int |
| 80 | nf_nat_fn(unsigned int hooknum, |
| 81 | struct sk_buff **pskb, |
| 82 | const struct net_device *in, |
| 83 | const struct net_device *out, |
| 84 | int (*okfn)(struct sk_buff *)) |
| 85 | { |
| 86 | struct nf_conn *ct; |
| 87 | enum ip_conntrack_info ctinfo; |
| 88 | struct nf_conn_nat *nat; |
| 89 | struct nf_nat_info *info; |
| 90 | /* maniptype == SRC for postrouting. */ |
| 91 | enum nf_nat_manip_type maniptype = HOOK2MANIP(hooknum); |
| 92 | |
| 93 | /* We never see fragments: conntrack defrags on pre-routing |
| 94 | and local-out, and nf_nat_out protects post-routing. */ |
| 95 | NF_CT_ASSERT(!((*pskb)->nh.iph->frag_off |
| 96 | & htons(IP_MF|IP_OFFSET))); |
| 97 | |
| 98 | ct = nf_ct_get(*pskb, &ctinfo); |
| 99 | /* Can't track? It's not due to stress, or conntrack would |
| 100 | have dropped it. Hence it's the user's responsibilty to |
| 101 | packet filter it out, or implement conntrack/NAT for that |
| 102 | protocol. 8) --RR */ |
| 103 | if (!ct) { |
| 104 | /* Exception: ICMP redirect to new connection (not in |
| 105 | hash table yet). We must not let this through, in |
| 106 | case we're doing NAT to the same network. */ |
| 107 | if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP) { |
| 108 | struct icmphdr _hdr, *hp; |
| 109 | |
| 110 | hp = skb_header_pointer(*pskb, |
| 111 | (*pskb)->nh.iph->ihl*4, |
| 112 | sizeof(_hdr), &_hdr); |
| 113 | if (hp != NULL && |
| 114 | hp->type == ICMP_REDIRECT) |
| 115 | return NF_DROP; |
| 116 | } |
| 117 | return NF_ACCEPT; |
| 118 | } |
| 119 | |
| 120 | /* Don't try to NAT if this packet is not conntracked */ |
| 121 | if (ct == &nf_conntrack_untracked) |
| 122 | return NF_ACCEPT; |
| 123 | |
| 124 | nat = nfct_nat(ct); |
| 125 | if (!nat) |
| 126 | return NF_DROP; |
| 127 | |
| 128 | switch (ctinfo) { |
| 129 | case IP_CT_RELATED: |
| 130 | case IP_CT_RELATED+IP_CT_IS_REPLY: |
| 131 | if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP) { |
| 132 | if (!nf_nat_icmp_reply_translation(ct, ctinfo, |
| 133 | hooknum, pskb)) |
| 134 | return NF_DROP; |
| 135 | else |
| 136 | return NF_ACCEPT; |
| 137 | } |
| 138 | /* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */ |
| 139 | case IP_CT_NEW: |
| 140 | info = &nat->info; |
| 141 | |
| 142 | /* Seen it before? This can happen for loopback, retrans, |
| 143 | or local packets.. */ |
| 144 | if (!nf_nat_initialized(ct, maniptype)) { |
| 145 | unsigned int ret; |
| 146 | |
| 147 | if (unlikely(nf_ct_is_confirmed(ct))) |
| 148 | /* NAT module was loaded late */ |
| 149 | ret = alloc_null_binding_confirmed(ct, info, |
| 150 | hooknum); |
| 151 | else if (hooknum == NF_IP_LOCAL_IN) |
| 152 | /* LOCAL_IN hook doesn't have a chain! */ |
| 153 | ret = alloc_null_binding(ct, info, hooknum); |
| 154 | else |
| 155 | ret = nf_nat_rule_find(pskb, hooknum, in, out, |
| 156 | ct, info); |
| 157 | |
| 158 | if (ret != NF_ACCEPT) { |
| 159 | return ret; |
| 160 | } |
| 161 | } else |
| 162 | DEBUGP("Already setup manip %s for ct %p\n", |
| 163 | maniptype == IP_NAT_MANIP_SRC ? "SRC" : "DST", |
| 164 | ct); |
| 165 | break; |
| 166 | |
| 167 | default: |
| 168 | /* ESTABLISHED */ |
| 169 | NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED || |
| 170 | ctinfo == (IP_CT_ESTABLISHED+IP_CT_IS_REPLY)); |
| 171 | info = &nat->info; |
| 172 | } |
| 173 | |
| 174 | NF_CT_ASSERT(info); |
| 175 | return nf_nat_packet(ct, ctinfo, hooknum, pskb); |
| 176 | } |
| 177 | |
| 178 | static unsigned int |
| 179 | nf_nat_in(unsigned int hooknum, |
| 180 | struct sk_buff **pskb, |
| 181 | const struct net_device *in, |
| 182 | const struct net_device *out, |
| 183 | int (*okfn)(struct sk_buff *)) |
| 184 | { |
| 185 | unsigned int ret; |
| 186 | __be32 daddr = (*pskb)->nh.iph->daddr; |
| 187 | |
| 188 | ret = nf_nat_fn(hooknum, pskb, in, out, okfn); |
| 189 | if (ret != NF_DROP && ret != NF_STOLEN && |
| 190 | daddr != (*pskb)->nh.iph->daddr) { |
| 191 | dst_release((*pskb)->dst); |
| 192 | (*pskb)->dst = NULL; |
| 193 | } |
| 194 | return ret; |
| 195 | } |
| 196 | |
| 197 | static unsigned int |
| 198 | nf_nat_out(unsigned int hooknum, |
| 199 | struct sk_buff **pskb, |
| 200 | const struct net_device *in, |
| 201 | const struct net_device *out, |
| 202 | int (*okfn)(struct sk_buff *)) |
| 203 | { |
| 204 | #ifdef CONFIG_XFRM |
| 205 | struct nf_conn *ct; |
| 206 | enum ip_conntrack_info ctinfo; |
| 207 | #endif |
| 208 | unsigned int ret; |
| 209 | |
| 210 | /* root is playing with raw sockets. */ |
| 211 | if ((*pskb)->len < sizeof(struct iphdr) || |
| 212 | (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) |
| 213 | return NF_ACCEPT; |
| 214 | |
| 215 | ret = nf_nat_fn(hooknum, pskb, in, out, okfn); |
| 216 | #ifdef CONFIG_XFRM |
| 217 | if (ret != NF_DROP && ret != NF_STOLEN && |
| 218 | (ct = nf_ct_get(*pskb, &ctinfo)) != NULL) { |
| 219 | enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); |
| 220 | |
| 221 | if (ct->tuplehash[dir].tuple.src.u3.ip != |
| 222 | ct->tuplehash[!dir].tuple.dst.u3.ip |
| 223 | || ct->tuplehash[dir].tuple.src.u.all != |
| 224 | ct->tuplehash[!dir].tuple.dst.u.all |
| 225 | ) |
| 226 | return ip_xfrm_me_harder(pskb) == 0 ? ret : NF_DROP; |
| 227 | } |
| 228 | #endif |
| 229 | return ret; |
| 230 | } |
| 231 | |
| 232 | static unsigned int |
| 233 | nf_nat_local_fn(unsigned int hooknum, |
| 234 | struct sk_buff **pskb, |
| 235 | const struct net_device *in, |
| 236 | const struct net_device *out, |
| 237 | int (*okfn)(struct sk_buff *)) |
| 238 | { |
| 239 | struct nf_conn *ct; |
| 240 | enum ip_conntrack_info ctinfo; |
| 241 | unsigned int ret; |
| 242 | |
| 243 | /* root is playing with raw sockets. */ |
| 244 | if ((*pskb)->len < sizeof(struct iphdr) || |
| 245 | (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) |
| 246 | return NF_ACCEPT; |
| 247 | |
| 248 | ret = nf_nat_fn(hooknum, pskb, in, out, okfn); |
| 249 | if (ret != NF_DROP && ret != NF_STOLEN && |
| 250 | (ct = nf_ct_get(*pskb, &ctinfo)) != NULL) { |
| 251 | enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); |
| 252 | |
| 253 | if (ct->tuplehash[dir].tuple.dst.u3.ip != |
| 254 | ct->tuplehash[!dir].tuple.src.u3.ip |
| 255 | #ifdef CONFIG_XFRM |
| 256 | || ct->tuplehash[dir].tuple.dst.u.all != |
| 257 | ct->tuplehash[!dir].tuple.src.u.all |
| 258 | #endif |
| 259 | ) |
| 260 | if (ip_route_me_harder(pskb, RTN_UNSPEC)) |
| 261 | ret = NF_DROP; |
| 262 | } |
| 263 | return ret; |
| 264 | } |
| 265 | |
| 266 | static unsigned int |
| 267 | nf_nat_adjust(unsigned int hooknum, |
| 268 | struct sk_buff **pskb, |
| 269 | const struct net_device *in, |
| 270 | const struct net_device *out, |
| 271 | int (*okfn)(struct sk_buff *)) |
| 272 | { |
| 273 | struct nf_conn *ct; |
| 274 | enum ip_conntrack_info ctinfo; |
| 275 | |
| 276 | ct = nf_ct_get(*pskb, &ctinfo); |
| 277 | if (ct && test_bit(IPS_SEQ_ADJUST_BIT, &ct->status)) { |
| 278 | DEBUGP("nf_nat_standalone: adjusting sequence number\n"); |
| 279 | if (!nf_nat_seq_adjust(pskb, ct, ctinfo)) |
| 280 | return NF_DROP; |
| 281 | } |
| 282 | return NF_ACCEPT; |
| 283 | } |
| 284 | |
| 285 | /* We must be after connection tracking and before packet filtering. */ |
| 286 | |
| 287 | static struct nf_hook_ops nf_nat_ops[] = { |
| 288 | /* Before packet filtering, change destination */ |
| 289 | { |
| 290 | .hook = nf_nat_in, |
| 291 | .owner = THIS_MODULE, |
| 292 | .pf = PF_INET, |
| 293 | .hooknum = NF_IP_PRE_ROUTING, |
| 294 | .priority = NF_IP_PRI_NAT_DST, |
| 295 | }, |
| 296 | /* After packet filtering, change source */ |
| 297 | { |
| 298 | .hook = nf_nat_out, |
| 299 | .owner = THIS_MODULE, |
| 300 | .pf = PF_INET, |
| 301 | .hooknum = NF_IP_POST_ROUTING, |
| 302 | .priority = NF_IP_PRI_NAT_SRC, |
| 303 | }, |
| 304 | /* After conntrack, adjust sequence number */ |
| 305 | { |
| 306 | .hook = nf_nat_adjust, |
| 307 | .owner = THIS_MODULE, |
| 308 | .pf = PF_INET, |
| 309 | .hooknum = NF_IP_POST_ROUTING, |
| 310 | .priority = NF_IP_PRI_NAT_SEQ_ADJUST, |
| 311 | }, |
| 312 | /* Before packet filtering, change destination */ |
| 313 | { |
| 314 | .hook = nf_nat_local_fn, |
| 315 | .owner = THIS_MODULE, |
| 316 | .pf = PF_INET, |
| 317 | .hooknum = NF_IP_LOCAL_OUT, |
| 318 | .priority = NF_IP_PRI_NAT_DST, |
| 319 | }, |
| 320 | /* After packet filtering, change source */ |
| 321 | { |
| 322 | .hook = nf_nat_fn, |
| 323 | .owner = THIS_MODULE, |
| 324 | .pf = PF_INET, |
| 325 | .hooknum = NF_IP_LOCAL_IN, |
| 326 | .priority = NF_IP_PRI_NAT_SRC, |
| 327 | }, |
| 328 | /* After conntrack, adjust sequence number */ |
| 329 | { |
| 330 | .hook = nf_nat_adjust, |
| 331 | .owner = THIS_MODULE, |
| 332 | .pf = PF_INET, |
| 333 | .hooknum = NF_IP_LOCAL_IN, |
| 334 | .priority = NF_IP_PRI_NAT_SEQ_ADJUST, |
| 335 | }, |
| 336 | }; |
| 337 | |
| 338 | static int __init nf_nat_standalone_init(void) |
| 339 | { |
| 340 | int size, ret = 0; |
| 341 | |
| 342 | need_conntrack(); |
| 343 | |
| 344 | size = ALIGN(sizeof(struct nf_conn), __alignof__(struct nf_conn_nat)) + |
| 345 | sizeof(struct nf_conn_nat); |
| 346 | ret = nf_conntrack_register_cache(NF_CT_F_NAT, "nf_nat:base", size); |
| 347 | if (ret < 0) { |
| 348 | printk(KERN_ERR "nf_nat_init: Unable to create slab cache\n"); |
| 349 | return ret; |
| 350 | } |
| 351 | |
| 352 | size = ALIGN(size, __alignof__(struct nf_conn_help)) + |
| 353 | sizeof(struct nf_conn_help); |
| 354 | ret = nf_conntrack_register_cache(NF_CT_F_NAT|NF_CT_F_HELP, |
| 355 | "nf_nat:help", size); |
| 356 | if (ret < 0) { |
| 357 | printk(KERN_ERR "nf_nat_init: Unable to create slab cache\n"); |
| 358 | goto cleanup_register_cache; |
| 359 | } |
| 360 | #ifdef CONFIG_XFRM |
| 361 | BUG_ON(ip_nat_decode_session != NULL); |
| 362 | ip_nat_decode_session = nat_decode_session; |
| 363 | #endif |
| 364 | ret = nf_nat_rule_init(); |
| 365 | if (ret < 0) { |
| 366 | printk("nf_nat_init: can't setup rules.\n"); |
| 367 | goto cleanup_decode_session; |
| 368 | } |
| 369 | ret = nf_register_hooks(nf_nat_ops, ARRAY_SIZE(nf_nat_ops)); |
| 370 | if (ret < 0) { |
| 371 | printk("nf_nat_init: can't register hooks.\n"); |
| 372 | goto cleanup_rule_init; |
| 373 | } |
| 374 | nf_nat_module_is_loaded = 1; |
| 375 | return ret; |
| 376 | |
| 377 | cleanup_rule_init: |
| 378 | nf_nat_rule_cleanup(); |
| 379 | cleanup_decode_session: |
| 380 | #ifdef CONFIG_XFRM |
| 381 | ip_nat_decode_session = NULL; |
| 382 | synchronize_net(); |
| 383 | #endif |
| 384 | nf_conntrack_unregister_cache(NF_CT_F_NAT|NF_CT_F_HELP); |
| 385 | cleanup_register_cache: |
| 386 | nf_conntrack_unregister_cache(NF_CT_F_NAT); |
| 387 | return ret; |
| 388 | } |
| 389 | |
| 390 | static void __exit nf_nat_standalone_fini(void) |
| 391 | { |
| 392 | nf_unregister_hooks(nf_nat_ops, ARRAY_SIZE(nf_nat_ops)); |
| 393 | nf_nat_rule_cleanup(); |
| 394 | nf_nat_module_is_loaded = 0; |
| 395 | #ifdef CONFIG_XFRM |
| 396 | ip_nat_decode_session = NULL; |
| 397 | synchronize_net(); |
| 398 | #endif |
| 399 | /* Conntrack caches are unregistered in nf_conntrack_cleanup */ |
| 400 | } |
| 401 | |
| 402 | module_init(nf_nat_standalone_init); |
| 403 | module_exit(nf_nat_standalone_fini); |
| 404 | |
| 405 | MODULE_LICENSE("GPL"); |
| 406 | MODULE_ALIAS("ip_nat"); |