blob: 1adedb743f609f8b349a545184040d54512f639d [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/* NAT for netfilter; shared with compatibility layer. */
2
3/* (C) 1999-2001 Paul `Rusty' Russell
4 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10
11#include <linux/module.h>
12#include <linux/types.h>
13#include <linux/timer.h>
14#include <linux/skbuff.h>
15#include <linux/netfilter_ipv4.h>
16#include <linux/vmalloc.h>
17#include <net/checksum.h>
18#include <net/icmp.h>
19#include <net/ip.h>
20#include <net/tcp.h> /* For tcp_prot in getorigdst */
21#include <linux/icmp.h>
22#include <linux/udp.h>
23#include <linux/jhash.h>
24
Patrick McHardye45b1be2005-06-21 14:01:30 -070025#define ASSERT_READ_LOCK(x)
26#define ASSERT_WRITE_LOCK(x)
Linus Torvalds1da177e2005-04-16 15:20:36 -070027
28#include <linux/netfilter_ipv4/ip_conntrack.h>
29#include <linux/netfilter_ipv4/ip_conntrack_core.h>
30#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
31#include <linux/netfilter_ipv4/ip_nat.h>
32#include <linux/netfilter_ipv4/ip_nat_protocol.h>
33#include <linux/netfilter_ipv4/ip_nat_core.h>
34#include <linux/netfilter_ipv4/ip_nat_helper.h>
35#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
36#include <linux/netfilter_ipv4/listhelp.h>
37
38#if 0
39#define DEBUGP printk
40#else
41#define DEBUGP(format, args...)
42#endif
43
Patrick McHardye45b1be2005-06-21 14:01:30 -070044DEFINE_RWLOCK(ip_nat_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -070045
46/* Calculated at init based on memory size */
47static unsigned int ip_nat_htable_size;
48
49static struct list_head *bysource;
Harald Welte080774a2005-08-09 19:32:58 -070050
51#define MAX_IP_NAT_PROTO 256
Linus Torvalds1da177e2005-04-16 15:20:36 -070052struct ip_nat_protocol *ip_nat_protos[MAX_IP_NAT_PROTO];
53
Harald Welte080774a2005-08-09 19:32:58 -070054static inline struct ip_nat_protocol *
55__ip_nat_proto_find(u_int8_t protonum)
56{
57 return ip_nat_protos[protonum];
58}
59
60struct ip_nat_protocol *
61ip_nat_proto_find_get(u_int8_t protonum)
62{
63 struct ip_nat_protocol *p;
64
65 /* we need to disable preemption to make sure 'p' doesn't get
66 * removed until we've grabbed the reference */
67 preempt_disable();
68 p = __ip_nat_proto_find(protonum);
69 if (p) {
70 if (!try_module_get(p->me))
71 p = &ip_nat_unknown_protocol;
72 }
73 preempt_enable();
74
75 return p;
76}
77
78void
79ip_nat_proto_put(struct ip_nat_protocol *p)
80{
81 module_put(p->me);
82}
Linus Torvalds1da177e2005-04-16 15:20:36 -070083
84/* We keep an extra hash for each conntrack, for fast searching. */
85static inline unsigned int
86hash_by_src(const struct ip_conntrack_tuple *tuple)
87{
88 /* Original src, to ensure we map it consistently if poss. */
89 return jhash_3words(tuple->src.ip, tuple->src.u.all,
90 tuple->dst.protonum, 0) % ip_nat_htable_size;
91}
92
93/* Noone using conntrack by the time this called. */
94static void ip_nat_cleanup_conntrack(struct ip_conntrack *conn)
95{
96 if (!(conn->status & IPS_NAT_DONE_MASK))
97 return;
98
Patrick McHardye45b1be2005-06-21 14:01:30 -070099 write_lock_bh(&ip_nat_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700100 list_del(&conn->nat.info.bysource);
Patrick McHardye45b1be2005-06-21 14:01:30 -0700101 write_unlock_bh(&ip_nat_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700102}
103
104/* We do checksum mangling, so if they were wrong before they're still
105 * wrong. Also works for incomplete packets (eg. ICMP dest
106 * unreachables.) */
107u_int16_t
108ip_nat_cheat_check(u_int32_t oldvalinv, u_int32_t newval, u_int16_t oldcheck)
109{
110 u_int32_t diffs[] = { oldvalinv, newval };
111 return csum_fold(csum_partial((char *)diffs, sizeof(diffs),
112 oldcheck^0xFFFF));
113}
114
115/* Is this tuple already taken? (not by us) */
116int
117ip_nat_used_tuple(const struct ip_conntrack_tuple *tuple,
118 const struct ip_conntrack *ignored_conntrack)
119{
120 /* Conntrack tracking doesn't keep track of outgoing tuples; only
121 incoming ones. NAT means they don't have a fixed mapping,
122 so we invert the tuple and look for the incoming reply.
123
124 We could keep a separate hash if this proves too slow. */
125 struct ip_conntrack_tuple reply;
126
127 invert_tuplepr(&reply, tuple);
128 return ip_conntrack_tuple_taken(&reply, ignored_conntrack);
129}
130
131/* If we source map this tuple so reply looks like reply_tuple, will
132 * that meet the constraints of range. */
133static int
134in_range(const struct ip_conntrack_tuple *tuple,
135 const struct ip_nat_range *range)
136{
Harald Welte080774a2005-08-09 19:32:58 -0700137 struct ip_nat_protocol *proto =
138 __ip_nat_proto_find(tuple->dst.protonum);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700139
140 /* If we are supposed to map IPs, then we must be in the
141 range specified, otherwise let this drag us onto a new src IP. */
142 if (range->flags & IP_NAT_RANGE_MAP_IPS) {
143 if (ntohl(tuple->src.ip) < ntohl(range->min_ip)
144 || ntohl(tuple->src.ip) > ntohl(range->max_ip))
145 return 0;
146 }
147
148 if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)
149 || proto->in_range(tuple, IP_NAT_MANIP_SRC,
150 &range->min, &range->max))
151 return 1;
152
153 return 0;
154}
155
156static inline int
157same_src(const struct ip_conntrack *ct,
158 const struct ip_conntrack_tuple *tuple)
159{
160 return (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum
161 == tuple->dst.protonum
162 && ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip
163 == tuple->src.ip
164 && ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.all
165 == tuple->src.u.all);
166}
167
168/* Only called for SRC manip */
169static int
170find_appropriate_src(const struct ip_conntrack_tuple *tuple,
171 struct ip_conntrack_tuple *result,
172 const struct ip_nat_range *range)
173{
174 unsigned int h = hash_by_src(tuple);
175 struct ip_conntrack *ct;
176
Patrick McHardye45b1be2005-06-21 14:01:30 -0700177 read_lock_bh(&ip_nat_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700178 list_for_each_entry(ct, &bysource[h], nat.info.bysource) {
179 if (same_src(ct, tuple)) {
180 /* Copy source part from reply tuple. */
181 invert_tuplepr(result,
182 &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
183 result->dst = tuple->dst;
184
185 if (in_range(result, range)) {
Patrick McHardye45b1be2005-06-21 14:01:30 -0700186 read_unlock_bh(&ip_nat_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700187 return 1;
188 }
189 }
190 }
Patrick McHardye45b1be2005-06-21 14:01:30 -0700191 read_unlock_bh(&ip_nat_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700192 return 0;
193}
194
195/* For [FUTURE] fragmentation handling, we want the least-used
196 src-ip/dst-ip/proto triple. Fairness doesn't come into it. Thus
197 if the range specifies 1.2.3.4 ports 10000-10005 and 1.2.3.5 ports
198 1-65535, we don't do pro-rata allocation based on ports; we choose
199 the ip with the lowest src-ip/dst-ip/proto usage.
200*/
201static void
202find_best_ips_proto(struct ip_conntrack_tuple *tuple,
203 const struct ip_nat_range *range,
204 const struct ip_conntrack *conntrack,
205 enum ip_nat_manip_type maniptype)
206{
207 u_int32_t *var_ipp;
208 /* Host order */
209 u_int32_t minip, maxip, j;
210
211 /* No IP mapping? Do nothing. */
212 if (!(range->flags & IP_NAT_RANGE_MAP_IPS))
213 return;
214
215 if (maniptype == IP_NAT_MANIP_SRC)
216 var_ipp = &tuple->src.ip;
217 else
218 var_ipp = &tuple->dst.ip;
219
220 /* Fast path: only one choice. */
221 if (range->min_ip == range->max_ip) {
222 *var_ipp = range->min_ip;
223 return;
224 }
225
226 /* Hashing source and destination IPs gives a fairly even
227 * spread in practice (if there are a small number of IPs
228 * involved, there usually aren't that many connections
229 * anyway). The consistency means that servers see the same
230 * client coming from the same IP (some Internet Banking sites
231 * like this), even across reboots. */
232 minip = ntohl(range->min_ip);
233 maxip = ntohl(range->max_ip);
234 j = jhash_2words(tuple->src.ip, tuple->dst.ip, 0);
235 *var_ipp = htonl(minip + j % (maxip - minip + 1));
236}
237
238/* Manipulate the tuple into the range given. For NF_IP_POST_ROUTING,
239 * we change the source to map into the range. For NF_IP_PRE_ROUTING
240 * and NF_IP_LOCAL_OUT, we change the destination to map into the
241 * range. It might not be possible to get a unique tuple, but we try.
242 * At worst (or if we race), we will end up with a final duplicate in
243 * __ip_conntrack_confirm and drop the packet. */
244static void
245get_unique_tuple(struct ip_conntrack_tuple *tuple,
246 const struct ip_conntrack_tuple *orig_tuple,
247 const struct ip_nat_range *range,
248 struct ip_conntrack *conntrack,
249 enum ip_nat_manip_type maniptype)
250{
Harald Welte080774a2005-08-09 19:32:58 -0700251 struct ip_nat_protocol *proto;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700252
253 /* 1) If this srcip/proto/src-proto-part is currently mapped,
254 and that same mapping gives a unique tuple within the given
255 range, use that.
256
257 This is only required for source (ie. NAT/masq) mappings.
258 So far, we don't do local source mappings, so multiple
259 manips not an issue. */
260 if (maniptype == IP_NAT_MANIP_SRC) {
261 if (find_appropriate_src(orig_tuple, tuple, range)) {
262 DEBUGP("get_unique_tuple: Found current src map\n");
263 if (!ip_nat_used_tuple(tuple, conntrack))
264 return;
265 }
266 }
267
268 /* 2) Select the least-used IP/proto combination in the given
269 range. */
270 *tuple = *orig_tuple;
271 find_best_ips_proto(tuple, range, conntrack, maniptype);
272
273 /* 3) The per-protocol part of the manip is made to map into
274 the range to make a unique tuple. */
275
Harald Welte080774a2005-08-09 19:32:58 -0700276 proto = ip_nat_proto_find_get(orig_tuple->dst.protonum);
277
Linus Torvalds1da177e2005-04-16 15:20:36 -0700278 /* Only bother mapping if it's not already in range and unique */
279 if ((!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)
280 || proto->in_range(tuple, maniptype, &range->min, &range->max))
Harald Welte080774a2005-08-09 19:32:58 -0700281 && !ip_nat_used_tuple(tuple, conntrack)) {
282 ip_nat_proto_put(proto);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700283 return;
Harald Welte080774a2005-08-09 19:32:58 -0700284 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700285
286 /* Last change: get protocol to try to obtain unique tuple. */
287 proto->unique_tuple(tuple, range, maniptype, conntrack);
Harald Welte080774a2005-08-09 19:32:58 -0700288
289 ip_nat_proto_put(proto);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700290}
291
292unsigned int
293ip_nat_setup_info(struct ip_conntrack *conntrack,
294 const struct ip_nat_range *range,
295 unsigned int hooknum)
296{
297 struct ip_conntrack_tuple curr_tuple, new_tuple;
298 struct ip_nat_info *info = &conntrack->nat.info;
299 int have_to_hash = !(conntrack->status & IPS_NAT_DONE_MASK);
300 enum ip_nat_manip_type maniptype = HOOK2MANIP(hooknum);
301
302 IP_NF_ASSERT(hooknum == NF_IP_PRE_ROUTING
303 || hooknum == NF_IP_POST_ROUTING
304 || hooknum == NF_IP_LOCAL_IN
305 || hooknum == NF_IP_LOCAL_OUT);
306 BUG_ON(ip_nat_initialized(conntrack, maniptype));
307
308 /* What we've got will look like inverse of reply. Normally
309 this is what is in the conntrack, except for prior
310 manipulations (future optimization: if num_manips == 0,
311 orig_tp =
312 conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple) */
313 invert_tuplepr(&curr_tuple,
314 &conntrack->tuplehash[IP_CT_DIR_REPLY].tuple);
315
316 get_unique_tuple(&new_tuple, &curr_tuple, range, conntrack, maniptype);
317
318 if (!ip_ct_tuple_equal(&new_tuple, &curr_tuple)) {
319 struct ip_conntrack_tuple reply;
320
321 /* Alter conntrack table so will recognize replies. */
322 invert_tuplepr(&reply, &new_tuple);
323 ip_conntrack_alter_reply(conntrack, &reply);
324
325 /* Non-atomic: we own this at the moment. */
326 if (maniptype == IP_NAT_MANIP_SRC)
327 conntrack->status |= IPS_SRC_NAT;
328 else
329 conntrack->status |= IPS_DST_NAT;
330 }
331
332 /* Place in source hash if this is the first time. */
333 if (have_to_hash) {
334 unsigned int srchash
335 = hash_by_src(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL]
336 .tuple);
Patrick McHardye45b1be2005-06-21 14:01:30 -0700337 write_lock_bh(&ip_nat_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700338 list_add(&info->bysource, &bysource[srchash]);
Patrick McHardye45b1be2005-06-21 14:01:30 -0700339 write_unlock_bh(&ip_nat_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700340 }
341
342 /* It's done. */
343 if (maniptype == IP_NAT_MANIP_DST)
344 set_bit(IPS_DST_NAT_DONE_BIT, &conntrack->status);
345 else
346 set_bit(IPS_SRC_NAT_DONE_BIT, &conntrack->status);
347
348 return NF_ACCEPT;
349}
350
351/* Returns true if succeeded. */
352static int
353manip_pkt(u_int16_t proto,
354 struct sk_buff **pskb,
355 unsigned int iphdroff,
356 const struct ip_conntrack_tuple *target,
357 enum ip_nat_manip_type maniptype)
358{
359 struct iphdr *iph;
Harald Welte080774a2005-08-09 19:32:58 -0700360 struct ip_nat_protocol *p;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700361
Harald Welte089af262005-08-09 19:37:23 -0700362 if (!skb_make_writable(pskb, iphdroff + sizeof(*iph)))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700363 return 0;
364
365 iph = (void *)(*pskb)->data + iphdroff;
366
367 /* Manipulate protcol part. */
Harald Welte080774a2005-08-09 19:32:58 -0700368 p = ip_nat_proto_find_get(proto);
369 if (!p->manip_pkt(pskb, iphdroff, target, maniptype)) {
370 ip_nat_proto_put(p);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700371 return 0;
Harald Welte080774a2005-08-09 19:32:58 -0700372 }
373 ip_nat_proto_put(p);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700374
375 iph = (void *)(*pskb)->data + iphdroff;
376
377 if (maniptype == IP_NAT_MANIP_SRC) {
378 iph->check = ip_nat_cheat_check(~iph->saddr, target->src.ip,
379 iph->check);
380 iph->saddr = target->src.ip;
381 } else {
382 iph->check = ip_nat_cheat_check(~iph->daddr, target->dst.ip,
383 iph->check);
384 iph->daddr = target->dst.ip;
385 }
386 return 1;
387}
388
389/* Do packet manipulations according to ip_nat_setup_info. */
390unsigned int nat_packet(struct ip_conntrack *ct,
391 enum ip_conntrack_info ctinfo,
392 unsigned int hooknum,
393 struct sk_buff **pskb)
394{
395 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
396 unsigned long statusbit;
397 enum ip_nat_manip_type mtype = HOOK2MANIP(hooknum);
398
Linus Torvalds1da177e2005-04-16 15:20:36 -0700399 if (mtype == IP_NAT_MANIP_SRC)
400 statusbit = IPS_SRC_NAT;
401 else
402 statusbit = IPS_DST_NAT;
403
404 /* Invert if this is reply dir. */
405 if (dir == IP_CT_DIR_REPLY)
406 statusbit ^= IPS_NAT_MASK;
407
408 /* Non-atomic: these bits don't change. */
409 if (ct->status & statusbit) {
410 struct ip_conntrack_tuple target;
411
412 /* We are aiming to look like inverse of other direction. */
413 invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
414
415 if (!manip_pkt(target.dst.protonum, pskb, 0, &target, mtype))
416 return NF_DROP;
417 }
418 return NF_ACCEPT;
419}
420
421/* Dir is direction ICMP is coming from (opposite to packet it contains) */
422int icmp_reply_translation(struct sk_buff **pskb,
423 struct ip_conntrack *ct,
424 enum ip_nat_manip_type manip,
425 enum ip_conntrack_dir dir)
426{
427 struct {
428 struct icmphdr icmp;
429 struct iphdr ip;
430 } *inside;
431 struct ip_conntrack_tuple inner, target;
432 int hdrlen = (*pskb)->nh.iph->ihl * 4;
433
Harald Welte089af262005-08-09 19:37:23 -0700434 if (!skb_make_writable(pskb, hdrlen + sizeof(*inside)))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700435 return 0;
436
437 inside = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4;
438
439 /* We're actually going to mangle it beyond trivial checksum
440 adjustment, so make sure the current checksum is correct. */
441 if ((*pskb)->ip_summed != CHECKSUM_UNNECESSARY) {
442 hdrlen = (*pskb)->nh.iph->ihl * 4;
443 if ((u16)csum_fold(skb_checksum(*pskb, hdrlen,
444 (*pskb)->len - hdrlen, 0)))
445 return 0;
446 }
447
448 /* Must be RELATED */
449 IP_NF_ASSERT((*pskb)->nfctinfo == IP_CT_RELATED ||
450 (*pskb)->nfctinfo == IP_CT_RELATED+IP_CT_IS_REPLY);
451
452 /* Redirects on non-null nats must be dropped, else they'll
453 start talking to each other without our translation, and be
454 confused... --RR */
455 if (inside->icmp.type == ICMP_REDIRECT) {
456 /* If NAT isn't finished, assume it and drop. */
457 if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK)
458 return 0;
459
460 if (ct->status & IPS_NAT_MASK)
461 return 0;
462 }
463
464 DEBUGP("icmp_reply_translation: translating error %p manp %u dir %s\n",
465 *pskb, manip, dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY");
466
467 if (!ip_ct_get_tuple(&inside->ip, *pskb, (*pskb)->nh.iph->ihl*4 +
468 sizeof(struct icmphdr) + inside->ip.ihl*4,
Harald Welte080774a2005-08-09 19:32:58 -0700469 &inner,
470 __ip_conntrack_proto_find(inside->ip.protocol)))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700471 return 0;
472
473 /* Change inner back to look like incoming packet. We do the
474 opposite manip on this hook to normal, because it might not
475 pass all hooks (locally-generated ICMP). Consider incoming
476 packet: PREROUTING (DST manip), routing produces ICMP, goes
477 through POSTROUTING (which must correct the DST manip). */
478 if (!manip_pkt(inside->ip.protocol, pskb,
479 (*pskb)->nh.iph->ihl*4
480 + sizeof(inside->icmp),
481 &ct->tuplehash[!dir].tuple,
482 !manip))
483 return 0;
484
485 /* Reloading "inside" here since manip_pkt inner. */
486 inside = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4;
487 inside->icmp.checksum = 0;
488 inside->icmp.checksum = csum_fold(skb_checksum(*pskb, hdrlen,
489 (*pskb)->len - hdrlen,
490 0));
491
492 /* Change outer to look the reply to an incoming packet
493 * (proto 0 means don't invert per-proto part). */
494
495 /* Obviously, we need to NAT destination IP, but source IP
496 should be NAT'ed only if it is from a NAT'd host.
497
498 Explanation: some people use NAT for anonymizing. Also,
499 CERT recommends dropping all packets from private IP
500 addresses (although ICMP errors from internal links with
501 such addresses are not too uncommon, as Alan Cox points
502 out) */
503 if (manip != IP_NAT_MANIP_SRC
504 || ((*pskb)->nh.iph->saddr == ct->tuplehash[dir].tuple.src.ip)) {
505 invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
506 if (!manip_pkt(0, pskb, 0, &target, manip))
507 return 0;
508 }
509
510 return 1;
511}
512
513/* Protocol registration. */
514int ip_nat_protocol_register(struct ip_nat_protocol *proto)
515{
516 int ret = 0;
517
Patrick McHardye45b1be2005-06-21 14:01:30 -0700518 write_lock_bh(&ip_nat_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700519 if (ip_nat_protos[proto->protonum] != &ip_nat_unknown_protocol) {
520 ret = -EBUSY;
521 goto out;
522 }
523 ip_nat_protos[proto->protonum] = proto;
524 out:
Patrick McHardye45b1be2005-06-21 14:01:30 -0700525 write_unlock_bh(&ip_nat_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700526 return ret;
527}
528
529/* Noone stores the protocol anywhere; simply delete it. */
530void ip_nat_protocol_unregister(struct ip_nat_protocol *proto)
531{
Patrick McHardye45b1be2005-06-21 14:01:30 -0700532 write_lock_bh(&ip_nat_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700533 ip_nat_protos[proto->protonum] = &ip_nat_unknown_protocol;
Patrick McHardye45b1be2005-06-21 14:01:30 -0700534 write_unlock_bh(&ip_nat_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700535
536 /* Someone could be still looking at the proto in a bh. */
537 synchronize_net();
538}
539
Harald Welte080774a2005-08-09 19:32:58 -0700540#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
541 defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
542int
543ip_nat_port_range_to_nfattr(struct sk_buff *skb,
544 const struct ip_nat_range *range)
545{
546 NFA_PUT(skb, CTA_PROTONAT_PORT_MIN, sizeof(u_int16_t),
547 &range->min.tcp.port);
548 NFA_PUT(skb, CTA_PROTONAT_PORT_MAX, sizeof(u_int16_t),
549 &range->max.tcp.port);
550
551 return 0;
552
553nfattr_failure:
554 return -1;
555}
556
557int
558ip_nat_port_nfattr_to_range(struct nfattr *tb[], struct ip_nat_range *range)
559{
560 int ret = 0;
561
562 /* we have to return whether we actually parsed something or not */
563
564 if (tb[CTA_PROTONAT_PORT_MIN-1]) {
565 ret = 1;
566 range->min.tcp.port =
567 *(u_int16_t *)NFA_DATA(tb[CTA_PROTONAT_PORT_MIN-1]);
568 }
569
570 if (!tb[CTA_PROTONAT_PORT_MAX-1]) {
571 if (ret)
572 range->max.tcp.port = range->min.tcp.port;
573 } else {
574 ret = 1;
575 range->max.tcp.port =
576 *(u_int16_t *)NFA_DATA(tb[CTA_PROTONAT_PORT_MAX-1]);
577 }
578
579 return ret;
580}
581#endif
582
Linus Torvalds1da177e2005-04-16 15:20:36 -0700583int __init ip_nat_init(void)
584{
585 size_t i;
586
587 /* Leave them the same for the moment. */
588 ip_nat_htable_size = ip_conntrack_htable_size;
589
590 /* One vmalloc for both hash tables */
591 bysource = vmalloc(sizeof(struct list_head) * ip_nat_htable_size);
592 if (!bysource)
593 return -ENOMEM;
594
595 /* Sew in builtin protocols. */
Patrick McHardye45b1be2005-06-21 14:01:30 -0700596 write_lock_bh(&ip_nat_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700597 for (i = 0; i < MAX_IP_NAT_PROTO; i++)
598 ip_nat_protos[i] = &ip_nat_unknown_protocol;
599 ip_nat_protos[IPPROTO_TCP] = &ip_nat_protocol_tcp;
600 ip_nat_protos[IPPROTO_UDP] = &ip_nat_protocol_udp;
601 ip_nat_protos[IPPROTO_ICMP] = &ip_nat_protocol_icmp;
Patrick McHardye45b1be2005-06-21 14:01:30 -0700602 write_unlock_bh(&ip_nat_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700603
604 for (i = 0; i < ip_nat_htable_size; i++) {
605 INIT_LIST_HEAD(&bysource[i]);
606 }
607
608 /* FIXME: Man, this is a hack. <SIGH> */
609 IP_NF_ASSERT(ip_conntrack_destroyed == NULL);
610 ip_conntrack_destroyed = &ip_nat_cleanup_conntrack;
611
612 /* Initialize fake conntrack so that NAT will skip it */
613 ip_conntrack_untracked.status |= IPS_NAT_DONE_MASK;
614 return 0;
615}
616
617/* Clear NAT section of all conntracks, in case we're loaded again. */
618static int clean_nat(struct ip_conntrack *i, void *data)
619{
620 memset(&i->nat, 0, sizeof(i->nat));
621 i->status &= ~(IPS_NAT_MASK | IPS_NAT_DONE_MASK | IPS_SEQ_ADJUST);
622 return 0;
623}
624
625/* Not __exit: called from ip_nat_standalone.c:init_or_cleanup() --RR */
626void ip_nat_cleanup(void)
627{
628 ip_ct_iterate_cleanup(&clean_nat, NULL);
629 ip_conntrack_destroyed = NULL;
630 vfree(bysource);
631}