blob: 1bdae8f188e1fa29bf3d3a44ecceb75d8e814964 [file] [log] [blame]
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001/* Connection state tracking for netfilter. This is separated from,
2 but required by, the NAT layer; it can also be used by an iptables
3 extension. */
4
5/* (C) 1999-2001 Paul `Rusty' Russell
Harald Weltedc808fe2006-03-20 17:56:32 -08006 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08007 * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
Patrick McHardyf229f6c2013-04-06 15:24:29 +02008 * (C) 2005-2012 Patrick McHardy <kaber@trash.net>
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08009 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License version 2 as
12 * published by the Free Software Foundation.
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -080013 */
14
Weongyo Jeongccd63c22016-03-15 10:57:44 -070015#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
16
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -080017#include <linux/types.h>
18#include <linux/netfilter.h>
19#include <linux/module.h>
Alexey Dobriyand43c36d2009-10-07 17:09:06 +040020#include <linux/sched.h>
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -080021#include <linux/skbuff.h>
22#include <linux/proc_fs.h>
23#include <linux/vmalloc.h>
24#include <linux/stddef.h>
25#include <linux/slab.h>
26#include <linux/random.h>
27#include <linux/jhash.h>
Florian Westphal19224762019-08-17 00:01:34 +010028#include <linux/siphash.h>
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -080029#include <linux/err.h>
30#include <linux/percpu.h>
31#include <linux/moduleparam.h>
32#include <linux/notifier.h>
33#include <linux/kernel.h>
34#include <linux/netdevice.h>
35#include <linux/socket.h>
Al Virod7fe0f22006-12-03 23:15:30 -050036#include <linux/mm.h>
Patrick McHardyd696c7b2010-02-08 11:18:07 -080037#include <linux/nsproxy.h>
Eric Dumazetea781f12009-03-25 21:05:46 +010038#include <linux/rculist_nulls.h>
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -080039
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -080040#include <net/netfilter/nf_conntrack.h>
41#include <net/netfilter/nf_conntrack_l3proto.h>
Martin Josefsson605dcad2006-11-29 02:35:06 +010042#include <net/netfilter/nf_conntrack_l4proto.h>
Martin Josefsson77ab9cf2006-11-29 02:34:58 +010043#include <net/netfilter/nf_conntrack_expect.h>
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -080044#include <net/netfilter/nf_conntrack_helper.h>
Patrick McHardy41d73ec2013-08-27 08:50:12 +020045#include <net/netfilter/nf_conntrack_seqadj.h>
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -080046#include <net/netfilter/nf_conntrack_core.h>
Yasuyuki Kozakaiecfab2c2007-07-07 22:23:21 -070047#include <net/netfilter/nf_conntrack_extend.h>
Krzysztof Piotr Oledzki58401572008-07-21 10:01:34 -070048#include <net/netfilter/nf_conntrack_acct.h>
Pablo Neira Ayusoa0891aa2009-06-13 12:26:29 +020049#include <net/netfilter/nf_conntrack_ecache.h>
Patrick McHardy5d0aa2c2010-02-15 18:13:33 +010050#include <net/netfilter/nf_conntrack_zones.h>
Pablo Neira Ayusoa992ca22011-01-19 16:00:07 +010051#include <net/netfilter/nf_conntrack_timestamp.h>
Pablo Neira Ayusodd705072012-02-28 23:36:48 +010052#include <net/netfilter/nf_conntrack_timeout.h>
Florian Westphalc539f012013-01-11 06:30:44 +000053#include <net/netfilter/nf_conntrack_labels.h>
Patrick McHardy48b1de42013-08-27 08:50:14 +020054#include <net/netfilter/nf_conntrack_synproxy.h>
Pablo Neira Ayusoe6a7d3c2008-10-14 11:58:31 -070055#include <net/netfilter/nf_nat.h>
Patrick McHardye17b6662008-11-18 12:24:17 +010056#include <net/netfilter/nf_nat_core.h>
stephen hemminger49376362013-03-16 07:00:28 +000057#include <net/netfilter/nf_nat_helper.h>
Florian Westphal1b8c8a92016-05-03 00:25:58 +020058#include <net/netns/hash.h>
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -080059
Harald Weltedc808fe2006-03-20 17:56:32 -080060#define NF_CONNTRACK_VERSION "0.5.0"
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -080061
Patrick McHardye17b6662008-11-18 12:24:17 +010062int (*nfnetlink_parse_nat_setup_hook)(struct nf_conn *ct,
63 enum nf_nat_manip_type manip,
Patrick McHardy39938322009-08-25 16:07:58 +020064 const struct nlattr *attr) __read_mostly;
Pablo Neira Ayusoe6a7d3c2008-10-14 11:58:31 -070065EXPORT_SYMBOL_GPL(nfnetlink_parse_nat_setup_hook);
66
Jesper Dangaard Brouer93bb0ce2014-03-03 14:46:13 +010067__cacheline_aligned_in_smp spinlock_t nf_conntrack_locks[CONNTRACK_LOCKS];
68EXPORT_SYMBOL_GPL(nf_conntrack_locks);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -080069
Jesper Dangaard Brouerca7433d2014-03-03 14:46:01 +010070__cacheline_aligned_in_smp DEFINE_SPINLOCK(nf_conntrack_expect_lock);
71EXPORT_SYMBOL_GPL(nf_conntrack_expect_lock);
72
Florian Westphal56d52d42016-05-02 18:39:55 +020073struct hlist_nulls_head *nf_conntrack_hash __read_mostly;
74EXPORT_SYMBOL_GPL(nf_conntrack_hash);
75
Florian Westphalb87a2f92016-08-25 15:33:33 +020076struct conntrack_gc_work {
77 struct delayed_work dwork;
78 u32 last_bucket;
79 bool exiting;
Florian Westphale0df8ca2016-11-04 16:54:58 +010080 long next_gc_run;
Florian Westphalb87a2f92016-08-25 15:33:33 +020081};
82
Florian Westphal0c5366b2016-05-09 16:24:32 +020083static __read_mostly struct kmem_cache *nf_conntrack_cachep;
Sasha Levinb16c2912016-01-18 19:23:51 -050084static __read_mostly spinlock_t nf_conntrack_locks_all_lock;
Florian Westphal70d72b72016-04-24 01:17:14 +020085static __read_mostly DEFINE_SPINLOCK(nf_conntrack_locks_all_lock);
Sasha Levinb16c2912016-01-18 19:23:51 -050086static __read_mostly bool nf_conntrack_locks_all;
87
Florian Westphale0df8ca2016-11-04 16:54:58 +010088/* every gc cycle scans at most 1/GC_MAX_BUCKETS_DIV part of table */
Florian Westphal371d0342017-01-18 02:01:22 +010089#define GC_MAX_BUCKETS_DIV 128u
90/* upper bound of full table scan */
91#define GC_MAX_SCAN_JIFFIES (16u * HZ)
92/* desired ratio of entries found to be expired */
93#define GC_EVICT_RATIO 50u
Florian Westphalb87a2f92016-08-25 15:33:33 +020094
95static struct conntrack_gc_work conntrack_gc_work;
96
Sasha Levinb16c2912016-01-18 19:23:51 -050097void nf_conntrack_lock(spinlock_t *lock) __acquires(lock)
98{
Manfred Spraul4c7f54a2017-07-06 20:45:59 +020099 /* 1) Acquire the lock */
Sasha Levinb16c2912016-01-18 19:23:51 -0500100 spin_lock(lock);
Peter Zijlstrab316ff72016-05-24 15:00:38 +0200101
Manfred Spraul4c7f54a2017-07-06 20:45:59 +0200102 /* 2) read nf_conntrack_locks_all, with ACQUIRE semantics
103 * It pairs with the smp_store_release() in nf_conntrack_all_unlock()
104 */
105 if (likely(smp_load_acquire(&nf_conntrack_locks_all) == false))
106 return;
107
108 /* fast path failed, unlock */
109 spin_unlock(lock);
110
111 /* Slow path 1) get global lock */
112 spin_lock(&nf_conntrack_locks_all_lock);
113
114 /* Slow path 2) get the lock we want */
115 spin_lock(lock);
116
117 /* Slow path 3) release the global lock */
118 spin_unlock(&nf_conntrack_locks_all_lock);
Sasha Levinb16c2912016-01-18 19:23:51 -0500119}
120EXPORT_SYMBOL_GPL(nf_conntrack_lock);
121
Jesper Dangaard Brouer93bb0ce2014-03-03 14:46:13 +0100122static void nf_conntrack_double_unlock(unsigned int h1, unsigned int h2)
123{
124 h1 %= CONNTRACK_LOCKS;
125 h2 %= CONNTRACK_LOCKS;
126 spin_unlock(&nf_conntrack_locks[h1]);
127 if (h1 != h2)
128 spin_unlock(&nf_conntrack_locks[h2]);
129}
130
131/* return true if we need to recompute hashes (in case hash table was resized) */
132static bool nf_conntrack_double_lock(struct net *net, unsigned int h1,
133 unsigned int h2, unsigned int sequence)
134{
135 h1 %= CONNTRACK_LOCKS;
136 h2 %= CONNTRACK_LOCKS;
137 if (h1 <= h2) {
Sasha Levinb16c2912016-01-18 19:23:51 -0500138 nf_conntrack_lock(&nf_conntrack_locks[h1]);
Jesper Dangaard Brouer93bb0ce2014-03-03 14:46:13 +0100139 if (h1 != h2)
140 spin_lock_nested(&nf_conntrack_locks[h2],
141 SINGLE_DEPTH_NESTING);
142 } else {
Sasha Levinb16c2912016-01-18 19:23:51 -0500143 nf_conntrack_lock(&nf_conntrack_locks[h2]);
Jesper Dangaard Brouer93bb0ce2014-03-03 14:46:13 +0100144 spin_lock_nested(&nf_conntrack_locks[h1],
145 SINGLE_DEPTH_NESTING);
146 }
Florian Westphala3efd812016-04-18 16:16:59 +0200147 if (read_seqcount_retry(&nf_conntrack_generation, sequence)) {
Jesper Dangaard Brouer93bb0ce2014-03-03 14:46:13 +0100148 nf_conntrack_double_unlock(h1, h2);
149 return true;
150 }
151 return false;
152}
153
154static void nf_conntrack_all_lock(void)
155{
156 int i;
157
Sasha Levinb16c2912016-01-18 19:23:51 -0500158 spin_lock(&nf_conntrack_locks_all_lock);
Manfred Spraul4c7f54a2017-07-06 20:45:59 +0200159
Sasha Levinb16c2912016-01-18 19:23:51 -0500160 nf_conntrack_locks_all = true;
161
162 for (i = 0; i < CONNTRACK_LOCKS; i++) {
Manfred Spraul4c7f54a2017-07-06 20:45:59 +0200163 spin_lock(&nf_conntrack_locks[i]);
164
165 /* This spin_unlock provides the "release" to ensure that
166 * nf_conntrack_locks_all==true is visible to everyone that
167 * acquired spin_lock(&nf_conntrack_locks[]).
168 */
169 spin_unlock(&nf_conntrack_locks[i]);
Sasha Levinb16c2912016-01-18 19:23:51 -0500170 }
Jesper Dangaard Brouer93bb0ce2014-03-03 14:46:13 +0100171}
172
173static void nf_conntrack_all_unlock(void)
174{
Manfred Spraul4c7f54a2017-07-06 20:45:59 +0200175 /* All prior stores must be complete before we clear
Peter Zijlstrab316ff72016-05-24 15:00:38 +0200176 * 'nf_conntrack_locks_all'. Otherwise nf_conntrack_lock()
177 * might observe the false value but not the entire
Manfred Spraul4c7f54a2017-07-06 20:45:59 +0200178 * critical section.
179 * It pairs with the smp_load_acquire() in nf_conntrack_lock()
Peter Zijlstrab316ff72016-05-24 15:00:38 +0200180 */
181 smp_store_release(&nf_conntrack_locks_all, false);
Sasha Levinb16c2912016-01-18 19:23:51 -0500182 spin_unlock(&nf_conntrack_locks_all_lock);
Jesper Dangaard Brouer93bb0ce2014-03-03 14:46:13 +0100183}
184
Martin Josefssone2b76062006-11-29 02:35:04 +0100185unsigned int nf_conntrack_htable_size __read_mostly;
Pablo Neira Ayuso2567c4e2016-08-18 11:15:12 +0200186EXPORT_SYMBOL_GPL(nf_conntrack_htable_size);
187
Hagen Paul Pfeifere4780752009-02-20 10:47:09 +0100188unsigned int nf_conntrack_max __read_mostly;
Liping Zhang92e47ba2016-08-13 22:35:36 +0800189seqcount_t nf_conntrack_generation __read_mostly;
Patrick McHardy13b18332006-12-02 22:11:25 -0800190
Eric Dumazetb3c51632010-06-09 14:43:38 +0200191DEFINE_PER_CPU(struct nf_conn, nf_conntrack_untracked);
192EXPORT_PER_CPU_SYMBOL(nf_conntrack_untracked);
Patrick McHardy13b18332006-12-02 22:11:25 -0800193
Florian Westphal141658f2016-04-18 16:17:01 +0200194static unsigned int nf_conntrack_hash_rnd __read_mostly;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800195
Florian Westphal1b8c8a92016-05-03 00:25:58 +0200196static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple,
197 const struct net *net)
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800198{
Patrick McHardy07949352008-01-31 04:40:52 -0800199 unsigned int n;
Florian Westphal1b8c8a92016-05-03 00:25:58 +0200200 u32 seed;
Sami Farin9b887902007-03-14 16:43:00 -0700201
Florian Westphal141658f2016-04-18 16:17:01 +0200202 get_random_once(&nf_conntrack_hash_rnd, sizeof(nf_conntrack_hash_rnd));
203
Patrick McHardy07949352008-01-31 04:40:52 -0800204 /* The direction must be ignored, so we hash everything up to the
205 * destination ports (which is a multiple of 4) and treat the last
206 * three bytes manually.
207 */
Florian Westphal1b8c8a92016-05-03 00:25:58 +0200208 seed = nf_conntrack_hash_rnd ^ net_hash_mix(net);
Patrick McHardy07949352008-01-31 04:40:52 -0800209 n = (sizeof(tuple->src) + sizeof(tuple->dst.u3)) / sizeof(u32);
Florian Westphal1b8c8a92016-05-03 00:25:58 +0200210 return jhash2((u32 *)tuple, n, seed ^
Changli Gao99f07e92010-09-21 17:49:20 +0200211 (((__force __u16)tuple->dst.u.all << 16) |
212 tuple->dst.protonum));
213}
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800214
Florian Westphal56d52d42016-05-02 18:39:55 +0200215static u32 scale_hash(u32 hash)
Changli Gao99f07e92010-09-21 17:49:20 +0200216{
Florian Westphal56d52d42016-05-02 18:39:55 +0200217 return reciprocal_scale(hash, nf_conntrack_htable_size);
Changli Gao99f07e92010-09-21 17:49:20 +0200218}
219
Florian Westphal1b8c8a92016-05-03 00:25:58 +0200220static u32 __hash_conntrack(const struct net *net,
221 const struct nf_conntrack_tuple *tuple,
222 unsigned int size)
Changli Gao99f07e92010-09-21 17:49:20 +0200223{
Florian Westphal1b8c8a92016-05-03 00:25:58 +0200224 return reciprocal_scale(hash_conntrack_raw(tuple, net), size);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800225}
226
Florian Westphal1b8c8a92016-05-03 00:25:58 +0200227static u32 hash_conntrack(const struct net *net,
228 const struct nf_conntrack_tuple *tuple)
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800229{
Florian Westphal56d52d42016-05-02 18:39:55 +0200230 return scale_hash(hash_conntrack_raw(tuple, net));
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800231}
232
Jan Engelhardt5f2b4c92008-04-14 11:15:53 +0200233bool
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800234nf_ct_get_tuple(const struct sk_buff *skb,
235 unsigned int nhoff,
236 unsigned int dataoff,
237 u_int16_t l3num,
238 u_int8_t protonum,
Eric W. Biedermana31f1ad2015-09-18 14:33:04 -0500239 struct net *net,
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800240 struct nf_conntrack_tuple *tuple,
241 const struct nf_conntrack_l3proto *l3proto,
Martin Josefsson605dcad2006-11-29 02:35:06 +0100242 const struct nf_conntrack_l4proto *l4proto)
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800243{
Philip Craig443a70d2008-04-29 03:35:10 -0700244 memset(tuple, 0, sizeof(*tuple));
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800245
246 tuple->src.l3num = l3num;
247 if (l3proto->pkt_to_tuple(skb, nhoff, tuple) == 0)
Jan Engelhardt5f2b4c92008-04-14 11:15:53 +0200248 return false;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800249
250 tuple->dst.protonum = protonum;
251 tuple->dst.dir = IP_CT_DIR_ORIGINAL;
252
Eric W. Biedermana31f1ad2015-09-18 14:33:04 -0500253 return l4proto->pkt_to_tuple(skb, dataoff, net, tuple);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800254}
Patrick McHardy13b18332006-12-02 22:11:25 -0800255EXPORT_SYMBOL_GPL(nf_ct_get_tuple);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800256
Jan Engelhardt5f2b4c92008-04-14 11:15:53 +0200257bool nf_ct_get_tuplepr(const struct sk_buff *skb, unsigned int nhoff,
Eric W. Biedermana31f1ad2015-09-18 14:33:04 -0500258 u_int16_t l3num,
259 struct net *net, struct nf_conntrack_tuple *tuple)
Yasuyuki Kozakaie2a31232007-07-14 20:45:14 -0700260{
261 struct nf_conntrack_l3proto *l3proto;
262 struct nf_conntrack_l4proto *l4proto;
263 unsigned int protoff;
264 u_int8_t protonum;
265 int ret;
266
267 rcu_read_lock();
268
269 l3proto = __nf_ct_l3proto_find(l3num);
270 ret = l3proto->get_l4proto(skb, nhoff, &protoff, &protonum);
271 if (ret != NF_ACCEPT) {
272 rcu_read_unlock();
Jan Engelhardt5f2b4c92008-04-14 11:15:53 +0200273 return false;
Yasuyuki Kozakaie2a31232007-07-14 20:45:14 -0700274 }
275
276 l4proto = __nf_ct_l4proto_find(l3num, protonum);
277
Eric W. Biedermana31f1ad2015-09-18 14:33:04 -0500278 ret = nf_ct_get_tuple(skb, nhoff, protoff, l3num, protonum, net, tuple,
Yasuyuki Kozakaie2a31232007-07-14 20:45:14 -0700279 l3proto, l4proto);
280
281 rcu_read_unlock();
282 return ret;
283}
284EXPORT_SYMBOL_GPL(nf_ct_get_tuplepr);
285
Jan Engelhardt5f2b4c92008-04-14 11:15:53 +0200286bool
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800287nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse,
288 const struct nf_conntrack_tuple *orig,
289 const struct nf_conntrack_l3proto *l3proto,
Martin Josefsson605dcad2006-11-29 02:35:06 +0100290 const struct nf_conntrack_l4proto *l4proto)
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800291{
Philip Craig443a70d2008-04-29 03:35:10 -0700292 memset(inverse, 0, sizeof(*inverse));
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800293
294 inverse->src.l3num = orig->src.l3num;
295 if (l3proto->invert_tuple(inverse, orig) == 0)
Jan Engelhardt5f2b4c92008-04-14 11:15:53 +0200296 return false;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800297
298 inverse->dst.dir = !orig->dst.dir;
299
300 inverse->dst.protonum = orig->dst.protonum;
Martin Josefsson605dcad2006-11-29 02:35:06 +0100301 return l4proto->invert_tuple(inverse, orig);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800302}
Patrick McHardy13b18332006-12-02 22:11:25 -0800303EXPORT_SYMBOL_GPL(nf_ct_invert_tuple);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800304
Florian Westphal19224762019-08-17 00:01:34 +0100305/* Generate a almost-unique pseudo-id for a given conntrack.
306 *
307 * intentionally doesn't re-use any of the seeds used for hash
308 * table location, we assume id gets exposed to userspace.
309 *
310 * Following nf_conn items do not change throughout lifetime
Dirk Morris62b08632019-08-08 13:57:51 -0700311 * of the nf_conn:
Florian Westphal19224762019-08-17 00:01:34 +0100312 *
313 * 1. nf_conn address
Dirk Morris62b08632019-08-08 13:57:51 -0700314 * 2. nf_conn->master address (normally NULL)
315 * 3. the associated net namespace
316 * 4. the original direction tuple
Florian Westphal19224762019-08-17 00:01:34 +0100317 */
318u32 nf_ct_get_id(const struct nf_conn *ct)
319{
320 static __read_mostly siphash_key_t ct_id_seed;
321 unsigned long a, b, c, d;
322
323 net_get_random_once(&ct_id_seed, sizeof(ct_id_seed));
324
325 a = (unsigned long)ct;
Dirk Morris62b08632019-08-08 13:57:51 -0700326 b = (unsigned long)ct->master;
327 c = (unsigned long)nf_ct_net(ct);
328 d = (unsigned long)siphash(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
329 sizeof(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple),
Florian Westphal19224762019-08-17 00:01:34 +0100330 &ct_id_seed);
331#ifdef CONFIG_64BIT
332 return siphash_4u64((u64)a, (u64)b, (u64)c, (u64)d, &ct_id_seed);
333#else
334 return siphash_4u32((u32)a, (u32)b, (u32)c, (u32)d, &ct_id_seed);
335#endif
336}
337EXPORT_SYMBOL_GPL(nf_ct_get_id);
338
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800339static void
340clean_from_lists(struct nf_conn *ct)
341{
Patrick McHardy0d537782007-07-07 22:39:38 -0700342 pr_debug("clean_from_lists(%p)\n", ct);
Eric Dumazetea781f12009-03-25 21:05:46 +0100343 hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode);
344 hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800345
346 /* Destroy all pending expectations */
Pablo Neira Ayusoc1d10ad2006-01-05 12:19:05 -0800347 nf_ct_remove_expectations(ct);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800348}
349
Jesper Dangaard Brouerb7779d02014-03-03 14:45:20 +0100350/* must be called with local_bh_disable */
351static void nf_ct_add_to_dying_list(struct nf_conn *ct)
352{
353 struct ct_pcpu *pcpu;
354
355 /* add this conntrack to the (per cpu) dying list */
356 ct->cpu = smp_processor_id();
357 pcpu = per_cpu_ptr(nf_ct_net(ct)->ct.pcpu_lists, ct->cpu);
358
359 spin_lock(&pcpu->lock);
360 hlist_nulls_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
361 &pcpu->dying);
362 spin_unlock(&pcpu->lock);
363}
364
365/* must be called with local_bh_disable */
366static void nf_ct_add_to_unconfirmed_list(struct nf_conn *ct)
367{
368 struct ct_pcpu *pcpu;
369
370 /* add this conntrack to the (per cpu) unconfirmed list */
371 ct->cpu = smp_processor_id();
372 pcpu = per_cpu_ptr(nf_ct_net(ct)->ct.pcpu_lists, ct->cpu);
373
374 spin_lock(&pcpu->lock);
375 hlist_nulls_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
376 &pcpu->unconfirmed);
377 spin_unlock(&pcpu->lock);
378}
379
380/* must be called with local_bh_disable */
381static void nf_ct_del_from_dying_or_unconfirmed_list(struct nf_conn *ct)
382{
383 struct ct_pcpu *pcpu;
384
385 /* We overload first tuple to link into unconfirmed or dying list.*/
386 pcpu = per_cpu_ptr(nf_ct_net(ct)->ct.pcpu_lists, ct->cpu);
387
388 spin_lock(&pcpu->lock);
389 BUG_ON(hlist_nulls_unhashed(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode));
390 hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode);
391 spin_unlock(&pcpu->lock);
392}
393
Pablo Neira Ayuso0838aa72015-07-13 15:11:48 +0200394/* Released via destroy_conntrack() */
Daniel Borkmann308ac912015-08-08 21:40:01 +0200395struct nf_conn *nf_ct_tmpl_alloc(struct net *net,
396 const struct nf_conntrack_zone *zone,
397 gfp_t flags)
Pablo Neira Ayuso0838aa72015-07-13 15:11:48 +0200398{
399 struct nf_conn *tmpl;
400
Joe Stringerf58e5aa2015-08-04 18:34:00 -0700401 tmpl = kzalloc(sizeof(*tmpl), flags);
Pablo Neira Ayuso0838aa72015-07-13 15:11:48 +0200402 if (tmpl == NULL)
403 return NULL;
404
405 tmpl->status = IPS_TEMPLATE;
406 write_pnet(&tmpl->ct_net, net);
Florian Westphal6c8dee92016-06-11 21:57:35 +0200407 nf_ct_zone_add(tmpl, zone);
Pablo Neira Ayuso0838aa72015-07-13 15:11:48 +0200408 atomic_set(&tmpl->ct_general.use, 0);
409
410 return tmpl;
Pablo Neira Ayuso0838aa72015-07-13 15:11:48 +0200411}
412EXPORT_SYMBOL_GPL(nf_ct_tmpl_alloc);
413
Daniel Borkmann9cf94ea2015-08-31 19:11:02 +0200414void nf_ct_tmpl_free(struct nf_conn *tmpl)
Pablo Neira Ayuso0838aa72015-07-13 15:11:48 +0200415{
416 nf_ct_ext_destroy(tmpl);
417 nf_ct_ext_free(tmpl);
418 kfree(tmpl);
419}
Daniel Borkmann9cf94ea2015-08-31 19:11:02 +0200420EXPORT_SYMBOL_GPL(nf_ct_tmpl_free);
Pablo Neira Ayuso0838aa72015-07-13 15:11:48 +0200421
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800422static void
423destroy_conntrack(struct nf_conntrack *nfct)
424{
425 struct nf_conn *ct = (struct nf_conn *)nfct;
Martin Josefsson605dcad2006-11-29 02:35:06 +0100426 struct nf_conntrack_l4proto *l4proto;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800427
Patrick McHardy0d537782007-07-07 22:39:38 -0700428 pr_debug("destroy_conntrack(%p)\n", ct);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800429 NF_CT_ASSERT(atomic_read(&nfct->use) == 0);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800430
Pablo Neira Ayuso0838aa72015-07-13 15:11:48 +0200431 if (unlikely(nf_ct_is_template(ct))) {
432 nf_ct_tmpl_free(ct);
433 return;
434 }
Patrick McHardy923f4902007-02-12 11:12:57 -0800435 rcu_read_lock();
Patrick McHardy5e8fbe22008-04-14 11:15:52 +0200436 l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
Pablo Neira Ayuso4b4ceb92016-05-01 00:34:37 +0200437 if (l4proto->destroy)
Martin Josefsson605dcad2006-11-29 02:35:06 +0100438 l4proto->destroy(ct);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800439
Patrick McHardy982d9a92007-02-12 11:14:11 -0800440 rcu_read_unlock();
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800441
Jesper Dangaard Brouerca7433d2014-03-03 14:46:01 +0100442 local_bh_disable();
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800443 /* Expectations will have been removed in clean_from_lists,
444 * except TFTP can create an expectation on the first packet,
445 * before connection is in the list, so we need to clean here,
Jesper Dangaard Brouerca7433d2014-03-03 14:46:01 +0100446 * too.
447 */
Pablo Neira Ayusoc1d10ad2006-01-05 12:19:05 -0800448 nf_ct_remove_expectations(ct);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800449
Jesper Dangaard Brouerb7779d02014-03-03 14:45:20 +0100450 nf_ct_del_from_dying_or_unconfirmed_list(ct);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800451
Jesper Dangaard Brouerca7433d2014-03-03 14:46:01 +0100452 local_bh_enable();
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800453
454 if (ct->master)
455 nf_ct_put(ct->master);
456
Patrick McHardy0d537782007-07-07 22:39:38 -0700457 pr_debug("destroy_conntrack: returning ct=%p to slab\n", ct);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800458 nf_conntrack_free(ct);
459}
460
Florian Westphal02982c22013-07-29 15:41:54 +0200461static void nf_ct_delete_from_lists(struct nf_conn *ct)
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800462{
Alexey Dobriyan0d55af82008-10-08 11:35:07 +0200463 struct net *net = nf_ct_net(ct);
Jesper Dangaard Brouer93bb0ce2014-03-03 14:46:13 +0100464 unsigned int hash, reply_hash;
Jesper Dangaard Brouer93bb0ce2014-03-03 14:46:13 +0100465 unsigned int sequence;
Patrick McHardy5397e972007-05-19 14:23:52 -0700466
Pablo Neira Ayuso9858a3a2009-06-13 12:28:22 +0200467 nf_ct_helper_destroy(ct);
Jesper Dangaard Brouer93bb0ce2014-03-03 14:46:13 +0100468
469 local_bh_disable();
470 do {
Florian Westphala3efd812016-04-18 16:16:59 +0200471 sequence = read_seqcount_begin(&nf_conntrack_generation);
Daniel Borkmanndeedb592015-08-14 16:03:39 +0200472 hash = hash_conntrack(net,
Jesper Dangaard Brouer93bb0ce2014-03-03 14:46:13 +0100473 &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
Daniel Borkmanndeedb592015-08-14 16:03:39 +0200474 reply_hash = hash_conntrack(net,
Jesper Dangaard Brouer93bb0ce2014-03-03 14:46:13 +0100475 &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
476 } while (nf_conntrack_double_lock(net, hash, reply_hash, sequence));
477
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800478 clean_from_lists(ct);
Jesper Dangaard Brouer93bb0ce2014-03-03 14:46:13 +0100479 nf_conntrack_double_unlock(hash, reply_hash);
480
Jesper Dangaard Brouerb7779d02014-03-03 14:45:20 +0100481 nf_ct_add_to_dying_list(ct);
Jesper Dangaard Brouer93bb0ce2014-03-03 14:46:13 +0100482
Jesper Dangaard Brouer93bb0ce2014-03-03 14:46:13 +0100483 local_bh_enable();
Pablo Neira Ayusodd7669a2009-06-13 12:30:52 +0200484}
Pablo Neira Ayusodd7669a2009-06-13 12:30:52 +0200485
Florian Westphal02982c22013-07-29 15:41:54 +0200486bool nf_ct_delete(struct nf_conn *ct, u32 portid, int report)
Pablo Neira Ayusodd7669a2009-06-13 12:30:52 +0200487{
Pablo Neira Ayusoa992ca22011-01-19 16:00:07 +0100488 struct nf_conn_tstamp *tstamp;
489
Florian Westphalf330a7f2016-08-25 15:33:31 +0200490 if (test_and_set_bit(IPS_DYING_BIT, &ct->status))
491 return false;
492
Pablo Neira Ayusoa992ca22011-01-19 16:00:07 +0100493 tstamp = nf_conn_tstamp_find(ct);
494 if (tstamp && tstamp->stop == 0)
Eric Dumazetd2de8752014-08-22 18:32:09 -0700495 tstamp->stop = ktime_get_real_ns();
Pablo Neira Ayusodd7669a2009-06-13 12:30:52 +0200496
Florian Westphal95005072014-06-10 23:12:56 +0200497 if (nf_conntrack_event_report(IPCT_DESTROY, ct,
498 portid, report) < 0) {
Florian Westphalf330a7f2016-08-25 15:33:31 +0200499 /* destroy event was not delivered. nf_ct_put will
500 * be done by event cache worker on redelivery.
501 */
Pablo Neira Ayusodd7669a2009-06-13 12:30:52 +0200502 nf_ct_delete_from_lists(ct);
Florian Westphal95005072014-06-10 23:12:56 +0200503 nf_conntrack_ecache_delayed_work(nf_ct_net(ct));
Florian Westphal02982c22013-07-29 15:41:54 +0200504 return false;
Pablo Neira Ayusodd7669a2009-06-13 12:30:52 +0200505 }
Florian Westphal95005072014-06-10 23:12:56 +0200506
507 nf_conntrack_ecache_work(nf_ct_net(ct));
Pablo Neira Ayusodd7669a2009-06-13 12:30:52 +0200508 nf_ct_delete_from_lists(ct);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800509 nf_ct_put(ct);
Florian Westphal02982c22013-07-29 15:41:54 +0200510 return true;
511}
512EXPORT_SYMBOL_GPL(nf_ct_delete);
513
Andrey Vaginc6825c02014-01-29 19:34:14 +0100514static inline bool
515nf_ct_key_equal(struct nf_conntrack_tuple_hash *h,
Daniel Borkmann308ac912015-08-08 21:40:01 +0200516 const struct nf_conntrack_tuple *tuple,
Florian Westphale0c7d472016-04-28 19:13:45 +0200517 const struct nf_conntrack_zone *zone,
518 const struct net *net)
Andrey Vaginc6825c02014-01-29 19:34:14 +0100519{
520 struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
521
522 /* A conntrack can be recreated with the equal tuple,
523 * so we need to check that the conntrack is confirmed
524 */
525 return nf_ct_tuple_equal(tuple, &h->tuple) &&
Daniel Borkmanndeedb592015-08-14 16:03:39 +0200526 nf_ct_zone_equal(ct, zone, NF_CT_DIRECTION(h)) &&
Florian Westphale0c7d472016-04-28 19:13:45 +0200527 nf_ct_is_confirmed(ct) &&
528 net_eq(net, nf_ct_net(ct));
Andrey Vaginc6825c02014-01-29 19:34:14 +0100529}
530
Florian Westphalf330a7f2016-08-25 15:33:31 +0200531/* caller must hold rcu readlock and none of the nf_conntrack_locks */
532static void nf_ct_gc_expired(struct nf_conn *ct)
533{
534 if (!atomic_inc_not_zero(&ct->ct_general.use))
535 return;
536
537 if (nf_ct_should_gc(ct))
538 nf_ct_kill(ct);
539
540 nf_ct_put(ct);
541}
542
Eric Dumazetea781f12009-03-25 21:05:46 +0100543/*
544 * Warning :
545 * - Caller must take a reference on returned object
546 * and recheck nf_ct_tuple_equal(tuple, &h->tuple)
Eric Dumazetea781f12009-03-25 21:05:46 +0100547 */
Changli Gao99f07e92010-09-21 17:49:20 +0200548static struct nf_conntrack_tuple_hash *
Daniel Borkmann308ac912015-08-08 21:40:01 +0200549____nf_conntrack_find(struct net *net, const struct nf_conntrack_zone *zone,
Changli Gao99f07e92010-09-21 17:49:20 +0200550 const struct nf_conntrack_tuple *tuple, u32 hash)
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800551{
552 struct nf_conntrack_tuple_hash *h;
Florian Westphal5e3c61f2016-04-28 19:13:41 +0200553 struct hlist_nulls_head *ct_hash;
Eric Dumazetea781f12009-03-25 21:05:46 +0100554 struct hlist_nulls_node *n;
Liping Zhang92e47ba2016-08-13 22:35:36 +0800555 unsigned int bucket, hsize;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800556
Eric Dumazetea781f12009-03-25 21:05:46 +0100557begin:
Liping Zhang92e47ba2016-08-13 22:35:36 +0800558 nf_conntrack_get_ht(&ct_hash, &hsize);
559 bucket = reciprocal_scale(hash, hsize);
Florian Westphal5e3c61f2016-04-28 19:13:41 +0200560
561 hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[bucket], hnnode) {
Florian Westphalf330a7f2016-08-25 15:33:31 +0200562 struct nf_conn *ct;
563
564 ct = nf_ct_tuplehash_to_ctrack(h);
565 if (nf_ct_is_expired(ct)) {
566 nf_ct_gc_expired(ct);
567 continue;
568 }
569
570 if (nf_ct_is_dying(ct))
571 continue;
572
Florian Westphal8e8118f2016-09-11 22:55:53 +0200573 if (nf_ct_key_equal(h, tuple, zone, net))
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800574 return h;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800575 }
Eric Dumazetea781f12009-03-25 21:05:46 +0100576 /*
577 * if the nulls value we got at the end of this lookup is
578 * not the expected one, we must restart lookup.
579 * We probably met an item that was moved to another chain.
580 */
Changli Gao99f07e92010-09-21 17:49:20 +0200581 if (get_nulls_value(n) != bucket) {
Florian Westphal2cf12342016-04-28 19:13:40 +0200582 NF_CT_STAT_INC_ATOMIC(net, search_restart);
Eric Dumazetea781f12009-03-25 21:05:46 +0100583 goto begin;
Jesper Dangaard Broueraf740b22010-04-23 12:34:56 +0200584 }
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800585
586 return NULL;
587}
Changli Gao99f07e92010-09-21 17:49:20 +0200588
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800589/* Find a connection corresponding to a tuple. */
Changli Gao99f07e92010-09-21 17:49:20 +0200590static struct nf_conntrack_tuple_hash *
Daniel Borkmann308ac912015-08-08 21:40:01 +0200591__nf_conntrack_find_get(struct net *net, const struct nf_conntrack_zone *zone,
Changli Gao99f07e92010-09-21 17:49:20 +0200592 const struct nf_conntrack_tuple *tuple, u32 hash)
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800593{
594 struct nf_conntrack_tuple_hash *h;
Patrick McHardy76507f62008-01-31 04:38:38 -0800595 struct nf_conn *ct;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800596
Patrick McHardy76507f62008-01-31 04:38:38 -0800597 rcu_read_lock();
Eric Dumazetea781f12009-03-25 21:05:46 +0100598begin:
Changli Gao99f07e92010-09-21 17:49:20 +0200599 h = ____nf_conntrack_find(net, zone, tuple, hash);
Patrick McHardy76507f62008-01-31 04:38:38 -0800600 if (h) {
601 ct = nf_ct_tuplehash_to_ctrack(h);
Patrick McHardy8d8890b72009-06-22 14:14:41 +0200602 if (unlikely(nf_ct_is_dying(ct) ||
603 !atomic_inc_not_zero(&ct->ct_general.use)))
Patrick McHardy76507f62008-01-31 04:38:38 -0800604 h = NULL;
Eric Dumazetea781f12009-03-25 21:05:46 +0100605 else {
Florian Westphale0c7d472016-04-28 19:13:45 +0200606 if (unlikely(!nf_ct_key_equal(h, tuple, zone, net))) {
Eric Dumazetea781f12009-03-25 21:05:46 +0100607 nf_ct_put(ct);
608 goto begin;
609 }
610 }
Patrick McHardy76507f62008-01-31 04:38:38 -0800611 }
612 rcu_read_unlock();
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800613
614 return h;
615}
Changli Gao99f07e92010-09-21 17:49:20 +0200616
617struct nf_conntrack_tuple_hash *
Daniel Borkmann308ac912015-08-08 21:40:01 +0200618nf_conntrack_find_get(struct net *net, const struct nf_conntrack_zone *zone,
Changli Gao99f07e92010-09-21 17:49:20 +0200619 const struct nf_conntrack_tuple *tuple)
620{
621 return __nf_conntrack_find_get(net, zone, tuple,
Florian Westphal1b8c8a92016-05-03 00:25:58 +0200622 hash_conntrack_raw(tuple, net));
Changli Gao99f07e92010-09-21 17:49:20 +0200623}
Patrick McHardy13b18332006-12-02 22:11:25 -0800624EXPORT_SYMBOL_GPL(nf_conntrack_find_get);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800625
Pablo Neira Ayusoc1d10ad2006-01-05 12:19:05 -0800626static void __nf_conntrack_hash_insert(struct nf_conn *ct,
627 unsigned int hash,
Jesper Dangaard Brouerb476b722014-03-03 14:44:54 +0100628 unsigned int reply_hash)
Pablo Neira Ayusoc1d10ad2006-01-05 12:19:05 -0800629{
Eric Dumazetea781f12009-03-25 21:05:46 +0100630 hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
Florian Westphal56d52d42016-05-02 18:39:55 +0200631 &nf_conntrack_hash[hash]);
Eric Dumazetea781f12009-03-25 21:05:46 +0100632 hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode,
Florian Westphal56d52d42016-05-02 18:39:55 +0200633 &nf_conntrack_hash[reply_hash]);
Pablo Neira Ayusoc1d10ad2006-01-05 12:19:05 -0800634}
635
Jozsef Kadlecsik7d367e02012-02-24 11:45:49 +0100636int
637nf_conntrack_hash_check_insert(struct nf_conn *ct)
Pablo Neira Ayusoc1d10ad2006-01-05 12:19:05 -0800638{
Daniel Borkmann308ac912015-08-08 21:40:01 +0200639 const struct nf_conntrack_zone *zone;
Patrick McHardyd696c7b2010-02-08 11:18:07 -0800640 struct net *net = nf_ct_net(ct);
Jesper Dangaard Brouerb476b722014-03-03 14:44:54 +0100641 unsigned int hash, reply_hash;
Jozsef Kadlecsik7d367e02012-02-24 11:45:49 +0100642 struct nf_conntrack_tuple_hash *h;
643 struct hlist_nulls_node *n;
Jesper Dangaard Brouer93bb0ce2014-03-03 14:46:13 +0100644 unsigned int sequence;
Pablo Neira Ayusoc1d10ad2006-01-05 12:19:05 -0800645
Patrick McHardy5d0aa2c2010-02-15 18:13:33 +0100646 zone = nf_ct_zone(ct);
Pablo Neira Ayusoc1d10ad2006-01-05 12:19:05 -0800647
Jesper Dangaard Brouer93bb0ce2014-03-03 14:46:13 +0100648 local_bh_disable();
649 do {
Florian Westphala3efd812016-04-18 16:16:59 +0200650 sequence = read_seqcount_begin(&nf_conntrack_generation);
Daniel Borkmanndeedb592015-08-14 16:03:39 +0200651 hash = hash_conntrack(net,
Jesper Dangaard Brouer93bb0ce2014-03-03 14:46:13 +0100652 &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
Daniel Borkmanndeedb592015-08-14 16:03:39 +0200653 reply_hash = hash_conntrack(net,
Jesper Dangaard Brouer93bb0ce2014-03-03 14:46:13 +0100654 &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
655 } while (nf_conntrack_double_lock(net, hash, reply_hash, sequence));
Jozsef Kadlecsik7d367e02012-02-24 11:45:49 +0100656
657 /* See if there's one in the list already, including reverse */
Florian Westphal56d52d42016-05-02 18:39:55 +0200658 hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[hash], hnnode)
Florian Westphal86804342016-04-28 19:13:43 +0200659 if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
Florian Westphale0c7d472016-04-28 19:13:45 +0200660 zone, net))
Jozsef Kadlecsik7d367e02012-02-24 11:45:49 +0100661 goto out;
Florian Westphal86804342016-04-28 19:13:43 +0200662
Florian Westphal56d52d42016-05-02 18:39:55 +0200663 hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[reply_hash], hnnode)
Florian Westphal86804342016-04-28 19:13:43 +0200664 if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
Florian Westphale0c7d472016-04-28 19:13:45 +0200665 zone, net))
Jozsef Kadlecsik7d367e02012-02-24 11:45:49 +0100666 goto out;
667
Pablo Neira Ayusoe53376b2014-02-03 20:01:53 +0100668 smp_wmb();
669 /* The caller holds a reference to this object */
670 atomic_set(&ct->ct_general.use, 2);
Jesper Dangaard Brouerb476b722014-03-03 14:44:54 +0100671 __nf_conntrack_hash_insert(ct, hash, reply_hash);
Jesper Dangaard Brouer93bb0ce2014-03-03 14:46:13 +0100672 nf_conntrack_double_unlock(hash, reply_hash);
Jozsef Kadlecsik7d367e02012-02-24 11:45:49 +0100673 NF_CT_STAT_INC(net, insert);
Jesper Dangaard Brouer93bb0ce2014-03-03 14:46:13 +0100674 local_bh_enable();
Jozsef Kadlecsik7d367e02012-02-24 11:45:49 +0100675 return 0;
676
677out:
Jesper Dangaard Brouer93bb0ce2014-03-03 14:46:13 +0100678 nf_conntrack_double_unlock(hash, reply_hash);
Jozsef Kadlecsik7d367e02012-02-24 11:45:49 +0100679 NF_CT_STAT_INC(net, insert_failed);
Jesper Dangaard Brouer93bb0ce2014-03-03 14:46:13 +0100680 local_bh_enable();
Jozsef Kadlecsik7d367e02012-02-24 11:45:49 +0100681 return -EEXIST;
Pablo Neira Ayusoc1d10ad2006-01-05 12:19:05 -0800682}
Jozsef Kadlecsik7d367e02012-02-24 11:45:49 +0100683EXPORT_SYMBOL_GPL(nf_conntrack_hash_check_insert);
Pablo Neira Ayusoc1d10ad2006-01-05 12:19:05 -0800684
Pablo Neira Ayusoba767382016-05-02 21:28:57 +0200685static inline void nf_ct_acct_update(struct nf_conn *ct,
686 enum ip_conntrack_info ctinfo,
687 unsigned int len)
688{
689 struct nf_conn_acct *acct;
690
691 acct = nf_conn_acct_find(ct);
692 if (acct) {
693 struct nf_conn_counter *counter = acct->counter;
694
695 atomic64_inc(&counter[CTINFO2DIR(ctinfo)].packets);
696 atomic64_add(len, &counter[CTINFO2DIR(ctinfo)].bytes);
697 }
698}
699
Pablo Neira Ayuso71d8c472016-05-01 00:28:40 +0200700static void nf_ct_acct_merge(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
701 const struct nf_conn *loser_ct)
702{
703 struct nf_conn_acct *acct;
704
705 acct = nf_conn_acct_find(loser_ct);
706 if (acct) {
707 struct nf_conn_counter *counter = acct->counter;
Pablo Neira Ayuso71d8c472016-05-01 00:28:40 +0200708 unsigned int bytes;
709
710 /* u32 should be fine since we must have seen one packet. */
711 bytes = atomic64_read(&counter[CTINFO2DIR(ctinfo)].bytes);
712 nf_ct_acct_update(ct, ctinfo, bytes);
713 }
714}
715
716/* Resolve race on insertion if this protocol allows this. */
717static int nf_ct_resolve_clash(struct net *net, struct sk_buff *skb,
718 enum ip_conntrack_info ctinfo,
719 struct nf_conntrack_tuple_hash *h)
720{
721 /* This is the conntrack entry already in hashes that won race. */
722 struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
723 struct nf_conntrack_l4proto *l4proto;
724
725 l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
726 if (l4proto->allow_clash &&
Florian Westphal25db12f2017-03-28 10:31:03 +0200727 ((ct->status & IPS_NAT_DONE_MASK) == 0) &&
Pablo Neira Ayuso71d8c472016-05-01 00:28:40 +0200728 !nf_ct_is_dying(ct) &&
729 atomic_inc_not_zero(&ct->ct_general.use)) {
730 nf_ct_acct_merge(ct, ctinfo, (struct nf_conn *)skb->nfct);
731 nf_conntrack_put(skb->nfct);
732 /* Assign conntrack already in hashes to this skbuff. Don't
733 * modify skb->nfctinfo to ensure consistent stateful filtering.
734 */
735 skb->nfct = &ct->ct_general;
736 return NF_ACCEPT;
737 }
738 NF_CT_STAT_INC(net, drop);
739 return NF_DROP;
740}
741
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800742/* Confirm a connection given skb; places it in hash table */
743int
Herbert Xu3db05fe2007-10-15 00:53:15 -0700744__nf_conntrack_confirm(struct sk_buff *skb)
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800745{
Daniel Borkmann308ac912015-08-08 21:40:01 +0200746 const struct nf_conntrack_zone *zone;
Jesper Dangaard Brouerb476b722014-03-03 14:44:54 +0100747 unsigned int hash, reply_hash;
Patrick McHardydf0933d2006-09-20 11:57:53 -0700748 struct nf_conntrack_tuple_hash *h;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800749 struct nf_conn *ct;
Patrick McHardydf0933d2006-09-20 11:57:53 -0700750 struct nf_conn_help *help;
Pablo Neira Ayusoa992ca22011-01-19 16:00:07 +0100751 struct nf_conn_tstamp *tstamp;
Eric Dumazetea781f12009-03-25 21:05:46 +0100752 struct hlist_nulls_node *n;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800753 enum ip_conntrack_info ctinfo;
Alexey Dobriyan400dad32008-10-08 11:35:03 +0200754 struct net *net;
Jesper Dangaard Brouer93bb0ce2014-03-03 14:46:13 +0100755 unsigned int sequence;
Pablo Neira Ayuso71d8c472016-05-01 00:28:40 +0200756 int ret = NF_DROP;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800757
Herbert Xu3db05fe2007-10-15 00:53:15 -0700758 ct = nf_ct_get(skb, &ctinfo);
Alexey Dobriyan400dad32008-10-08 11:35:03 +0200759 net = nf_ct_net(ct);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800760
761 /* ipt_REJECT uses nf_conntrack_attach to attach related
762 ICMP/TCP RST packets in other direction. Actual packet
763 which created connection will be IP_CT_NEW or for an
764 expected connection, IP_CT_RELATED. */
765 if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
766 return NF_ACCEPT;
767
Patrick McHardy5d0aa2c2010-02-15 18:13:33 +0100768 zone = nf_ct_zone(ct);
Jesper Dangaard Brouer93bb0ce2014-03-03 14:46:13 +0100769 local_bh_disable();
770
771 do {
Florian Westphala3efd812016-04-18 16:16:59 +0200772 sequence = read_seqcount_begin(&nf_conntrack_generation);
Jesper Dangaard Brouer93bb0ce2014-03-03 14:46:13 +0100773 /* reuse the hash saved before */
774 hash = *(unsigned long *)&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev;
Florian Westphal56d52d42016-05-02 18:39:55 +0200775 hash = scale_hash(hash);
Daniel Borkmanndeedb592015-08-14 16:03:39 +0200776 reply_hash = hash_conntrack(net,
Jesper Dangaard Brouer93bb0ce2014-03-03 14:46:13 +0100777 &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
778
779 } while (nf_conntrack_double_lock(net, hash, reply_hash, sequence));
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800780
781 /* We're not in hash table, and we refuse to set up related
Jesper Dangaard Brouer93bb0ce2014-03-03 14:46:13 +0100782 * connections for unconfirmed conns. But packet copies and
783 * REJECT will give spurious warnings here.
784 */
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800785 /* NF_CT_ASSERT(atomic_read(&ct->ct_general.use) == 1); */
786
Lucas De Marchi25985ed2011-03-30 22:57:33 -0300787 /* No external references means no one else could have
Jesper Dangaard Brouer93bb0ce2014-03-03 14:46:13 +0100788 * confirmed us.
789 */
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800790 NF_CT_ASSERT(!nf_ct_is_confirmed(ct));
Patrick McHardy0d537782007-07-07 22:39:38 -0700791 pr_debug("Confirming conntrack %p\n", ct);
Pablo Neira Ayuso8ca3f5e2014-11-25 00:14:47 +0100792 /* We have to check the DYING flag after unlink to prevent
793 * a race against nf_ct_get_next_corpse() possibly called from
794 * user context, else we insert an already 'dead' hash, blocking
795 * further use of that particular connection -JM.
796 */
797 nf_ct_del_from_dying_or_unconfirmed_list(ct);
Joerg Marxfc350772010-05-20 15:55:30 +0200798
Pablo Neira Ayuso71d8c472016-05-01 00:28:40 +0200799 if (unlikely(nf_ct_is_dying(ct))) {
800 nf_ct_add_to_dying_list(ct);
801 goto dying;
802 }
Joerg Marxfc350772010-05-20 15:55:30 +0200803
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800804 /* See if there's one in the list already, including reverse:
805 NAT could have grabbed it without realizing, since we're
806 not in the hash. If there is, we lost race. */
Florian Westphal56d52d42016-05-02 18:39:55 +0200807 hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[hash], hnnode)
Florian Westphal86804342016-04-28 19:13:43 +0200808 if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
Florian Westphale0c7d472016-04-28 19:13:45 +0200809 zone, net))
Patrick McHardydf0933d2006-09-20 11:57:53 -0700810 goto out;
Florian Westphal86804342016-04-28 19:13:43 +0200811
Florian Westphal56d52d42016-05-02 18:39:55 +0200812 hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[reply_hash], hnnode)
Florian Westphal86804342016-04-28 19:13:43 +0200813 if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
Florian Westphale0c7d472016-04-28 19:13:45 +0200814 zone, net))
Patrick McHardydf0933d2006-09-20 11:57:53 -0700815 goto out;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800816
Patrick McHardydf0933d2006-09-20 11:57:53 -0700817 /* Timer relative to confirmation time, not original
818 setting time, otherwise we'd get timer wrap in
819 weird delay cases. */
Florian Westphalf330a7f2016-08-25 15:33:31 +0200820 ct->timeout += nfct_time_stamp;
Patrick McHardydf0933d2006-09-20 11:57:53 -0700821 atomic_inc(&ct->ct_general.use);
Changli Gao45eec342011-01-18 15:08:13 +0100822 ct->status |= IPS_CONFIRMED;
Patrick McHardy5c8ec912009-06-22 14:14:16 +0200823
Pablo Neira Ayusoa992ca22011-01-19 16:00:07 +0100824 /* set conntrack timestamp, if enabled. */
825 tstamp = nf_conn_tstamp_find(ct);
826 if (tstamp) {
827 if (skb->tstamp.tv64 == 0)
Joe Perchese3192692012-06-03 17:41:40 +0000828 __net_timestamp(skb);
Pablo Neira Ayusoa992ca22011-01-19 16:00:07 +0100829
830 tstamp->start = ktime_to_ns(skb->tstamp);
831 }
Patrick McHardy5c8ec912009-06-22 14:14:16 +0200832 /* Since the lookup is lockless, hash insertion must be done after
833 * starting the timer and setting the CONFIRMED bit. The RCU barriers
834 * guarantee that no other CPU can find the conntrack before the above
835 * stores are visible.
836 */
Jesper Dangaard Brouerb476b722014-03-03 14:44:54 +0100837 __nf_conntrack_hash_insert(ct, hash, reply_hash);
Jesper Dangaard Brouer93bb0ce2014-03-03 14:46:13 +0100838 nf_conntrack_double_unlock(hash, reply_hash);
Jesper Dangaard Brouer93bb0ce2014-03-03 14:46:13 +0100839 local_bh_enable();
Patrick McHardy5c8ec912009-06-22 14:14:16 +0200840
Patrick McHardydf0933d2006-09-20 11:57:53 -0700841 help = nfct_help(ct);
842 if (help && help->helper)
Alexey Dobriyana71996f2008-10-08 11:35:07 +0200843 nf_conntrack_event_cache(IPCT_HELPER, ct);
Pablo Neira Ayuso17e6e4e2009-06-02 20:08:46 +0200844
Patrick McHardydf0933d2006-09-20 11:57:53 -0700845 nf_conntrack_event_cache(master_ct(ct) ?
Alexey Dobriyana71996f2008-10-08 11:35:07 +0200846 IPCT_RELATED : IPCT_NEW, ct);
Patrick McHardydf0933d2006-09-20 11:57:53 -0700847 return NF_ACCEPT;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800848
Patrick McHardydf0933d2006-09-20 11:57:53 -0700849out:
Pablo Neira Ayuso8ca3f5e2014-11-25 00:14:47 +0100850 nf_ct_add_to_dying_list(ct);
Pablo Neira Ayuso71d8c472016-05-01 00:28:40 +0200851 ret = nf_ct_resolve_clash(net, skb, ctinfo, h);
852dying:
Jesper Dangaard Brouer93bb0ce2014-03-03 14:46:13 +0100853 nf_conntrack_double_unlock(hash, reply_hash);
Alexey Dobriyan0d55af82008-10-08 11:35:07 +0200854 NF_CT_STAT_INC(net, insert_failed);
Jesper Dangaard Brouer93bb0ce2014-03-03 14:46:13 +0100855 local_bh_enable();
Pablo Neira Ayuso71d8c472016-05-01 00:28:40 +0200856 return ret;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800857}
Patrick McHardy13b18332006-12-02 22:11:25 -0800858EXPORT_SYMBOL_GPL(__nf_conntrack_confirm);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800859
860/* Returns true if a connection correspondings to the tuple (required
861 for NAT). */
862int
863nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple,
864 const struct nf_conn *ignored_conntrack)
865{
Alexey Dobriyan400dad32008-10-08 11:35:03 +0200866 struct net *net = nf_ct_net(ignored_conntrack);
Daniel Borkmann308ac912015-08-08 21:40:01 +0200867 const struct nf_conntrack_zone *zone;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800868 struct nf_conntrack_tuple_hash *h;
Florian Westphal5e3c61f2016-04-28 19:13:41 +0200869 struct hlist_nulls_head *ct_hash;
Liping Zhang92e47ba2016-08-13 22:35:36 +0800870 unsigned int hash, hsize;
Eric Dumazetea781f12009-03-25 21:05:46 +0100871 struct hlist_nulls_node *n;
Patrick McHardy5d0aa2c2010-02-15 18:13:33 +0100872 struct nf_conn *ct;
Daniel Borkmann308ac912015-08-08 21:40:01 +0200873
874 zone = nf_ct_zone(ignored_conntrack);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800875
Florian Westphal2cf12342016-04-28 19:13:40 +0200876 rcu_read_lock();
Florian Westphal95a8d192016-08-25 15:33:29 +0200877 begin:
Liping Zhang92e47ba2016-08-13 22:35:36 +0800878 nf_conntrack_get_ht(&ct_hash, &hsize);
879 hash = __hash_conntrack(net, tuple, hsize);
Florian Westphal5e3c61f2016-04-28 19:13:41 +0200880
881 hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[hash], hnnode) {
Patrick McHardy5d0aa2c2010-02-15 18:13:33 +0100882 ct = nf_ct_tuplehash_to_ctrack(h);
Florian Westphalf330a7f2016-08-25 15:33:31 +0200883
884 if (ct == ignored_conntrack)
885 continue;
886
887 if (nf_ct_is_expired(ct)) {
888 nf_ct_gc_expired(ct);
889 continue;
890 }
891
892 if (nf_ct_key_equal(h, tuple, zone, net)) {
Martynas Pumputis92044922019-01-29 15:51:42 +0100893 /* Tuple is taken already, so caller will need to find
894 * a new source port to use.
895 *
896 * Only exception:
897 * If the *original tuples* are identical, then both
898 * conntracks refer to the same flow.
899 * This is a rare situation, it can occur e.g. when
900 * more than one UDP packet is sent from same socket
901 * in different threads.
902 *
903 * Let nf_ct_resolve_clash() deal with this later.
904 */
905 if (nf_ct_tuple_equal(&ignored_conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
906 &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple))
907 continue;
908
Florian Westphal2cf12342016-04-28 19:13:40 +0200909 NF_CT_STAT_INC_ATOMIC(net, found);
910 rcu_read_unlock();
Patrick McHardyba419af2008-01-31 04:39:23 -0800911 return 1;
912 }
Patrick McHardyba419af2008-01-31 04:39:23 -0800913 }
Florian Westphal95a8d192016-08-25 15:33:29 +0200914
915 if (get_nulls_value(n) != hash) {
916 NF_CT_STAT_INC_ATOMIC(net, search_restart);
917 goto begin;
918 }
919
Florian Westphal2cf12342016-04-28 19:13:40 +0200920 rcu_read_unlock();
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800921
Patrick McHardyba419af2008-01-31 04:39:23 -0800922 return 0;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800923}
Patrick McHardy13b18332006-12-02 22:11:25 -0800924EXPORT_SYMBOL_GPL(nf_conntrack_tuple_taken);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800925
Patrick McHardy7ae77302007-07-07 22:37:38 -0700926#define NF_CT_EVICTION_RANGE 8
927
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800928/* There's a small race here where we may free a just-assured
929 connection. Too bad: we're in trouble anyway. */
Florian Westphal242922a2016-07-03 20:44:01 +0200930static unsigned int early_drop_list(struct net *net,
931 struct hlist_nulls_head *head)
932{
933 struct nf_conntrack_tuple_hash *h;
934 struct hlist_nulls_node *n;
935 unsigned int drops = 0;
936 struct nf_conn *tmp;
937
938 hlist_nulls_for_each_entry_rcu(h, n, head, hnnode) {
939 tmp = nf_ct_tuplehash_to_ctrack(h);
940
Florian Westphalf330a7f2016-08-25 15:33:31 +0200941 if (nf_ct_is_expired(tmp)) {
942 nf_ct_gc_expired(tmp);
943 continue;
944 }
945
Florian Westphal242922a2016-07-03 20:44:01 +0200946 if (test_bit(IPS_ASSURED_BIT, &tmp->status) ||
947 !net_eq(nf_ct_net(tmp), net) ||
948 nf_ct_is_dying(tmp))
949 continue;
950
951 if (!atomic_inc_not_zero(&tmp->ct_general.use))
952 continue;
953
954 /* kill only if still in same netns -- might have moved due to
955 * SLAB_DESTROY_BY_RCU rules.
956 *
957 * We steal the timer reference. If that fails timer has
958 * already fired or someone else deleted it. Just drop ref
959 * and move to next entry.
960 */
961 if (net_eq(nf_ct_net(tmp), net) &&
962 nf_ct_is_confirmed(tmp) &&
Florian Westphal242922a2016-07-03 20:44:01 +0200963 nf_ct_delete(tmp, 0, 0))
964 drops++;
965
966 nf_ct_put(tmp);
967 }
968
969 return drops;
970}
971
Vasily Khoruzhick7e1e1952018-10-25 12:15:43 -0700972static noinline int early_drop(struct net *net, unsigned int hash)
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800973{
Vasily Khoruzhick7e1e1952018-10-25 12:15:43 -0700974 unsigned int i, bucket;
Florian Westphal3e866382016-05-02 18:40:14 +0200975
Florian Westphal242922a2016-07-03 20:44:01 +0200976 for (i = 0; i < NF_CT_EVICTION_RANGE; i++) {
977 struct hlist_nulls_head *ct_hash;
Vasily Khoruzhick7e1e1952018-10-25 12:15:43 -0700978 unsigned int hsize, drops;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800979
Liping Zhang3101e0f2016-07-12 19:45:00 +0800980 rcu_read_lock();
Liping Zhang92e47ba2016-08-13 22:35:36 +0800981 nf_conntrack_get_ht(&ct_hash, &hsize);
Vasily Khoruzhick7e1e1952018-10-25 12:15:43 -0700982 if (!i)
983 bucket = reciprocal_scale(hash, hsize);
984 else
985 bucket = (bucket + 1) % hsize;
Florian Westphal3e866382016-05-02 18:40:14 +0200986
Vasily Khoruzhick7e1e1952018-10-25 12:15:43 -0700987 drops = early_drop_list(net, &ct_hash[bucket]);
Liping Zhang3101e0f2016-07-12 19:45:00 +0800988 rcu_read_unlock();
Florian Westphal3e866382016-05-02 18:40:14 +0200989
Florian Westphal242922a2016-07-03 20:44:01 +0200990 if (drops) {
991 NF_CT_STAT_ADD_ATOMIC(net, early_drop, drops);
992 return true;
Pablo Neira Ayuso741385112012-03-06 01:22:55 +0000993 }
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800994 }
Florian Westphal3e866382016-05-02 18:40:14 +0200995
Florian Westphal242922a2016-07-03 20:44:01 +0200996 return false;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -0800997}
998
Florian Westphalb87a2f92016-08-25 15:33:33 +0200999static void gc_worker(struct work_struct *work)
1000{
Florian Westphal371d0342017-01-18 02:01:22 +01001001 unsigned int min_interval = max(HZ / GC_MAX_BUCKETS_DIV, 1u);
Florian Westphalb87a2f92016-08-25 15:33:33 +02001002 unsigned int i, goal, buckets = 0, expired_count = 0;
Florian Westphalb87a2f92016-08-25 15:33:33 +02001003 struct conntrack_gc_work *gc_work;
Florian Westphale0df8ca2016-11-04 16:54:58 +01001004 unsigned int ratio, scanned = 0;
1005 unsigned long next_run;
Florian Westphalb87a2f92016-08-25 15:33:33 +02001006
1007 gc_work = container_of(work, struct conntrack_gc_work, dwork.work);
1008
Florian Westphale0df8ca2016-11-04 16:54:58 +01001009 goal = nf_conntrack_htable_size / GC_MAX_BUCKETS_DIV;
Florian Westphalb87a2f92016-08-25 15:33:33 +02001010 i = gc_work->last_bucket;
1011
1012 do {
1013 struct nf_conntrack_tuple_hash *h;
1014 struct hlist_nulls_head *ct_hash;
1015 struct hlist_nulls_node *n;
1016 unsigned int hashsz;
1017 struct nf_conn *tmp;
1018
1019 i++;
1020 rcu_read_lock();
1021
1022 nf_conntrack_get_ht(&ct_hash, &hashsz);
1023 if (i >= hashsz)
1024 i = 0;
1025
1026 hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[i], hnnode) {
1027 tmp = nf_ct_tuplehash_to_ctrack(h);
1028
Florian Westphalc023c0e2016-08-25 15:33:34 +02001029 scanned++;
Florian Westphalb87a2f92016-08-25 15:33:33 +02001030 if (nf_ct_is_expired(tmp)) {
1031 nf_ct_gc_expired(tmp);
1032 expired_count++;
1033 continue;
1034 }
1035 }
1036
1037 /* could check get_nulls_value() here and restart if ct
1038 * was moved to another chain. But given gc is best-effort
1039 * we will just continue with next hash slot.
1040 */
1041 rcu_read_unlock();
1042 cond_resched_rcu_qs();
Florian Westphal5f7ff592017-01-16 18:24:56 +01001043 } while (++buckets < goal);
Florian Westphalb87a2f92016-08-25 15:33:33 +02001044
1045 if (gc_work->exiting)
1046 return;
1047
Florian Westphale0df8ca2016-11-04 16:54:58 +01001048 /*
1049 * Eviction will normally happen from the packet path, and not
1050 * from this gc worker.
1051 *
1052 * This worker is only here to reap expired entries when system went
1053 * idle after a busy period.
1054 *
1055 * The heuristics below are supposed to balance conflicting goals:
1056 *
1057 * 1. Minimize time until we notice a stale entry
1058 * 2. Maximize scan intervals to not waste cycles
1059 *
Florian Westphal371d0342017-01-18 02:01:22 +01001060 * Normally, expire ratio will be close to 0.
Florian Westphale0df8ca2016-11-04 16:54:58 +01001061 *
Florian Westphal371d0342017-01-18 02:01:22 +01001062 * As soon as a sizeable fraction of the entries have expired
1063 * increase scan frequency.
Florian Westphale0df8ca2016-11-04 16:54:58 +01001064 */
Florian Westphalc023c0e2016-08-25 15:33:34 +02001065 ratio = scanned ? expired_count * 100 / scanned : 0;
Florian Westphal371d0342017-01-18 02:01:22 +01001066 if (ratio > GC_EVICT_RATIO) {
1067 gc_work->next_gc_run = min_interval;
Florian Westphale0df8ca2016-11-04 16:54:58 +01001068 } else {
Florian Westphal371d0342017-01-18 02:01:22 +01001069 unsigned int max = GC_MAX_SCAN_JIFFIES / GC_MAX_BUCKETS_DIV;
Florian Westphale0df8ca2016-11-04 16:54:58 +01001070
Florian Westphal371d0342017-01-18 02:01:22 +01001071 BUILD_BUG_ON((GC_MAX_SCAN_JIFFIES / GC_MAX_BUCKETS_DIV) == 0);
1072
1073 gc_work->next_gc_run += min_interval;
1074 if (gc_work->next_gc_run > max)
1075 gc_work->next_gc_run = max;
Florian Westphale0df8ca2016-11-04 16:54:58 +01001076 }
Florian Westphalc023c0e2016-08-25 15:33:34 +02001077
Florian Westphal371d0342017-01-18 02:01:22 +01001078 next_run = gc_work->next_gc_run;
Florian Westphalb87a2f92016-08-25 15:33:33 +02001079 gc_work->last_bucket = i;
Florian Westphale0df8ca2016-11-04 16:54:58 +01001080 queue_delayed_work(system_long_wq, &gc_work->dwork, next_run);
Florian Westphalb87a2f92016-08-25 15:33:33 +02001081}
1082
1083static void conntrack_gc_work_init(struct conntrack_gc_work *gc_work)
1084{
1085 INIT_DELAYED_WORK(&gc_work->dwork, gc_worker);
Florian Westphal371d0342017-01-18 02:01:22 +01001086 gc_work->next_gc_run = HZ;
Florian Westphalb87a2f92016-08-25 15:33:33 +02001087 gc_work->exiting = false;
1088}
1089
Changli Gao99f07e92010-09-21 17:49:20 +02001090static struct nf_conn *
Daniel Borkmann308ac912015-08-08 21:40:01 +02001091__nf_conntrack_alloc(struct net *net,
1092 const struct nf_conntrack_zone *zone,
Changli Gao99f07e92010-09-21 17:49:20 +02001093 const struct nf_conntrack_tuple *orig,
1094 const struct nf_conntrack_tuple *repl,
1095 gfp_t gfp, u32 hash)
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001096{
Julia Lawallcd7fcbf2009-01-12 00:06:08 +00001097 struct nf_conn *ct;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001098
Pablo Neira Ayuso5251e2d2006-09-20 12:01:06 -07001099 /* We don't want any race condition at early drop stage */
Alexey Dobriyan49ac8712008-10-08 11:35:03 +02001100 atomic_inc(&net->ct.count);
Pablo Neira Ayuso5251e2d2006-09-20 12:01:06 -07001101
Patrick McHardy76eb9462008-01-31 04:41:44 -08001102 if (nf_conntrack_max &&
Alexey Dobriyan49ac8712008-10-08 11:35:03 +02001103 unlikely(atomic_read(&net->ct.count) > nf_conntrack_max)) {
Jesper Dangaard Brouer93bb0ce2014-03-03 14:46:13 +01001104 if (!early_drop(net, hash)) {
Alexey Dobriyan49ac8712008-10-08 11:35:03 +02001105 atomic_dec(&net->ct.count);
Joe Perchese87cc472012-05-13 21:56:26 +00001106 net_warn_ratelimited("nf_conntrack: table full, dropping packet\n");
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001107 return ERR_PTR(-ENOMEM);
1108 }
1109 }
1110
Eric Dumazet941297f2009-07-16 14:03:40 +02001111 /*
1112 * Do not use kmem_cache_zalloc(), as this cache uses
1113 * SLAB_DESTROY_BY_RCU.
1114 */
Florian Westphal0c5366b2016-05-09 16:24:32 +02001115 ct = kmem_cache_alloc(nf_conntrack_cachep, gfp);
Daniel Borkmann5e8018f2015-08-14 16:03:40 +02001116 if (ct == NULL)
1117 goto out;
1118
Patrick McHardy440f0d52009-06-10 14:32:47 +02001119 spin_lock_init(&ct->lock);
Patrick McHardyc88130b2008-01-31 04:42:11 -08001120 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig;
Eric Dumazet941297f2009-07-16 14:03:40 +02001121 ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode.pprev = NULL;
Patrick McHardyc88130b2008-01-31 04:42:11 -08001122 ct->tuplehash[IP_CT_DIR_REPLY].tuple = *repl;
Changli Gao99f07e92010-09-21 17:49:20 +02001123 /* save hash for reusing when confirming */
1124 *(unsigned long *)(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev) = hash;
Florian Westphalc41884c2014-11-24 15:25:57 +01001125 ct->status = 0;
Eric Dumazetc2d9ba92010-06-01 06:51:19 +00001126 write_pnet(&ct->ct_net, net);
Florian Westphalc41884c2014-11-24 15:25:57 +01001127 memset(&ct->__nfct_init_offset[0], 0,
1128 offsetof(struct nf_conn, proto) -
1129 offsetof(struct nf_conn, __nfct_init_offset[0]));
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001130
Florian Westphal6c8dee92016-06-11 21:57:35 +02001131 nf_ct_zone_add(ct, zone);
Daniel Borkmann5e8018f2015-08-14 16:03:40 +02001132
Pablo Neira Ayusoe53376b2014-02-03 20:01:53 +01001133 /* Because we use RCU lookups, we set ct_general.use to zero before
1134 * this is inserted in any list.
Eric Dumazet941297f2009-07-16 14:03:40 +02001135 */
Pablo Neira Ayusoe53376b2014-02-03 20:01:53 +01001136 atomic_set(&ct->ct_general.use, 0);
Patrick McHardyc88130b2008-01-31 04:42:11 -08001137 return ct;
Daniel Borkmann5e8018f2015-08-14 16:03:40 +02001138out:
1139 atomic_dec(&net->ct.count);
Patrick McHardy5d0aa2c2010-02-15 18:13:33 +01001140 return ERR_PTR(-ENOMEM);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001141}
Changli Gao99f07e92010-09-21 17:49:20 +02001142
Daniel Borkmann308ac912015-08-08 21:40:01 +02001143struct nf_conn *nf_conntrack_alloc(struct net *net,
1144 const struct nf_conntrack_zone *zone,
Changli Gao99f07e92010-09-21 17:49:20 +02001145 const struct nf_conntrack_tuple *orig,
1146 const struct nf_conntrack_tuple *repl,
1147 gfp_t gfp)
1148{
1149 return __nf_conntrack_alloc(net, zone, orig, repl, gfp, 0);
1150}
Patrick McHardy13b18332006-12-02 22:11:25 -08001151EXPORT_SYMBOL_GPL(nf_conntrack_alloc);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001152
Patrick McHardyc88130b2008-01-31 04:42:11 -08001153void nf_conntrack_free(struct nf_conn *ct)
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001154{
Eric Dumazet1d452092009-03-24 14:26:50 +01001155 struct net *net = nf_ct_net(ct);
1156
Pablo Neira Ayusoe53376b2014-02-03 20:01:53 +01001157 /* A freed object has refcnt == 0, that's
1158 * the golden rule for SLAB_DESTROY_BY_RCU
1159 */
1160 NF_CT_ASSERT(atomic_read(&ct->ct_general.use) == 0);
1161
Patrick McHardyceeff752008-06-11 17:51:10 -07001162 nf_ct_ext_destroy(ct);
Eric Dumazetea781f12009-03-25 21:05:46 +01001163 nf_ct_ext_free(ct);
Florian Westphal0c5366b2016-05-09 16:24:32 +02001164 kmem_cache_free(nf_conntrack_cachep, ct);
Peter Zijlstra4e857c52014-03-17 18:06:10 +01001165 smp_mb__before_atomic();
Pablo Neira Ayuso0c3c6c02013-11-18 12:53:59 +01001166 atomic_dec(&net->ct.count);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001167}
Patrick McHardy13b18332006-12-02 22:11:25 -08001168EXPORT_SYMBOL_GPL(nf_conntrack_free);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001169
Florian Westphalc539f012013-01-11 06:30:44 +00001170
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001171/* Allocate a new conntrack: we return -ENOMEM if classification
1172 failed due to stress. Otherwise it really is unclassifiable. */
1173static struct nf_conntrack_tuple_hash *
Patrick McHardyb2a15a62010-02-03 14:13:03 +01001174init_conntrack(struct net *net, struct nf_conn *tmpl,
Alexey Dobriyan5a1fb392008-10-08 11:35:02 +02001175 const struct nf_conntrack_tuple *tuple,
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001176 struct nf_conntrack_l3proto *l3proto,
Martin Josefsson605dcad2006-11-29 02:35:06 +01001177 struct nf_conntrack_l4proto *l4proto,
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001178 struct sk_buff *skb,
Pablo Neira Ayuso60b5f8f2012-03-23 00:04:53 +01001179 unsigned int dataoff, u32 hash)
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001180{
Patrick McHardyc88130b2008-01-31 04:42:11 -08001181 struct nf_conn *ct;
Patrick McHarrdy3c158f72007-06-05 12:55:27 -07001182 struct nf_conn_help *help;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001183 struct nf_conntrack_tuple repl_tuple;
Patrick McHardyb2a15a62010-02-03 14:13:03 +01001184 struct nf_conntrack_ecache *ecache;
Jesper Dangaard Brouerca7433d2014-03-03 14:46:01 +01001185 struct nf_conntrack_expect *exp = NULL;
Daniel Borkmann308ac912015-08-08 21:40:01 +02001186 const struct nf_conntrack_zone *zone;
Pablo Neira Ayuso60b5f8f2012-03-23 00:04:53 +01001187 struct nf_conn_timeout *timeout_ext;
Daniel Borkmann5e8018f2015-08-14 16:03:40 +02001188 struct nf_conntrack_zone tmp;
Pablo Neira Ayuso60b5f8f2012-03-23 00:04:53 +01001189 unsigned int *timeouts;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001190
Martin Josefsson605dcad2006-11-29 02:35:06 +01001191 if (!nf_ct_invert_tuple(&repl_tuple, tuple, l3proto, l4proto)) {
Patrick McHardy0d537782007-07-07 22:39:38 -07001192 pr_debug("Can't invert tuple.\n");
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001193 return NULL;
1194 }
1195
Daniel Borkmann5e8018f2015-08-14 16:03:40 +02001196 zone = nf_ct_zone_tmpl(tmpl, skb, &tmp);
Changli Gao99f07e92010-09-21 17:49:20 +02001197 ct = __nf_conntrack_alloc(net, zone, tuple, &repl_tuple, GFP_ATOMIC,
1198 hash);
Joe Perches0a9ee812011-08-29 14:17:25 -07001199 if (IS_ERR(ct))
Patrick McHardyc88130b2008-01-31 04:42:11 -08001200 return (struct nf_conntrack_tuple_hash *)ct;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001201
Gao Feng4440a2a2016-09-13 08:49:18 +08001202 if (!nf_ct_add_synproxy(ct, tmpl)) {
1203 nf_conntrack_free(ct);
1204 return ERR_PTR(-ENOMEM);
Patrick McHardy48b1de42013-08-27 08:50:14 +02001205 }
1206
Pablo Neira Ayuso60b5f8f2012-03-23 00:04:53 +01001207 timeout_ext = tmpl ? nf_ct_timeout_find(tmpl) : NULL;
Pablo Neira Ayusoae2d7082015-10-05 16:51:01 +02001208 if (timeout_ext) {
1209 timeouts = nf_ct_timeout_data(timeout_ext);
1210 if (unlikely(!timeouts))
1211 timeouts = l4proto->get_timeouts(net);
1212 } else {
Pablo Neira Ayuso60b5f8f2012-03-23 00:04:53 +01001213 timeouts = l4proto->get_timeouts(net);
Pablo Neira Ayusoae2d7082015-10-05 16:51:01 +02001214 }
Pablo Neira Ayuso60b5f8f2012-03-23 00:04:53 +01001215
Pablo Neira Ayuso2c8503f2012-02-28 18:23:31 +01001216 if (!l4proto->new(ct, skb, dataoff, timeouts)) {
Patrick McHardyc88130b2008-01-31 04:42:11 -08001217 nf_conntrack_free(ct);
Weongyo Jeongccd63c22016-03-15 10:57:44 -07001218 pr_debug("can't track with proto module\n");
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001219 return NULL;
1220 }
1221
Pablo Neira Ayuso60b5f8f2012-03-23 00:04:53 +01001222 if (timeout_ext)
Pablo Neira Ayusoae2d7082015-10-05 16:51:01 +02001223 nf_ct_timeout_ext_add(ct, rcu_dereference(timeout_ext->timeout),
1224 GFP_ATOMIC);
Pablo Neira Ayuso60b5f8f2012-03-23 00:04:53 +01001225
Krzysztof Piotr Oledzki58401572008-07-21 10:01:34 -07001226 nf_ct_acct_ext_add(ct, GFP_ATOMIC);
Pablo Neira Ayusoa992ca22011-01-19 16:00:07 +01001227 nf_ct_tstamp_ext_add(ct, GFP_ATOMIC);
Florian Westphalc539f012013-01-11 06:30:44 +00001228 nf_ct_labels_ext_add(ct);
Patrick McHardyb2a15a62010-02-03 14:13:03 +01001229
1230 ecache = tmpl ? nf_ct_ecache_find(tmpl) : NULL;
1231 nf_ct_ecache_ext_add(ct, ecache ? ecache->ctmask : 0,
1232 ecache ? ecache->expmask : 0,
1233 GFP_ATOMIC);
Krzysztof Piotr Oledzki58401572008-07-21 10:01:34 -07001234
Jesper Dangaard Brouerca7433d2014-03-03 14:46:01 +01001235 local_bh_disable();
1236 if (net->ct.expect_count) {
1237 spin_lock(&nf_conntrack_expect_lock);
1238 exp = nf_ct_find_expectation(net, zone, tuple);
1239 if (exp) {
Weongyo Jeongccd63c22016-03-15 10:57:44 -07001240 pr_debug("expectation arrives ct=%p exp=%p\n",
Jesper Dangaard Brouerca7433d2014-03-03 14:46:01 +01001241 ct, exp);
1242 /* Welcome, Mr. Bond. We've been expecting you... */
1243 __set_bit(IPS_EXPECTED_BIT, &ct->status);
1244 /* exp->master safe, refcnt bumped in nf_ct_find_expectation */
1245 ct->master = exp->master;
1246 if (exp->helper) {
1247 help = nf_ct_helper_ext_add(ct, exp->helper,
1248 GFP_ATOMIC);
1249 if (help)
1250 rcu_assign_pointer(help->helper, exp->helper);
1251 }
Yasuyuki Kozakaiceceae12007-07-07 22:23:42 -07001252
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001253#ifdef CONFIG_NF_CONNTRACK_MARK
Jesper Dangaard Brouerca7433d2014-03-03 14:46:01 +01001254 ct->mark = exp->master->mark;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001255#endif
James Morris7c9728c2006-06-09 00:31:46 -07001256#ifdef CONFIG_NF_CONNTRACK_SECMARK
Jesper Dangaard Brouerca7433d2014-03-03 14:46:01 +01001257 ct->secmark = exp->master->secmark;
James Morris7c9728c2006-06-09 00:31:46 -07001258#endif
Jesper Dangaard Brouerca7433d2014-03-03 14:46:01 +01001259 NF_CT_STAT_INC(net, expect_new);
1260 }
1261 spin_unlock(&nf_conntrack_expect_lock);
1262 }
Florian Westphal8e8118f2016-09-11 22:55:53 +02001263 if (!exp)
Patrick McHardyb2a15a62010-02-03 14:13:03 +01001264 __nf_ct_try_assign_helper(ct, tmpl, GFP_ATOMIC);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001265
Pablo Neira Ayusoe53376b2014-02-03 20:01:53 +01001266 /* Now it is inserted into the unconfirmed list, bump refcount */
1267 nf_conntrack_get(&ct->ct_general);
Jesper Dangaard Brouerb7779d02014-03-03 14:45:20 +01001268 nf_ct_add_to_unconfirmed_list(ct);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001269
Jesper Dangaard Brouerca7433d2014-03-03 14:46:01 +01001270 local_bh_enable();
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001271
1272 if (exp) {
1273 if (exp->expectfn)
Patrick McHardyc88130b2008-01-31 04:42:11 -08001274 exp->expectfn(ct, exp);
Patrick McHardy68236452007-07-07 22:30:49 -07001275 nf_ct_expect_put(exp);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001276 }
1277
Patrick McHardyc88130b2008-01-31 04:42:11 -08001278 return &ct->tuplehash[IP_CT_DIR_ORIGINAL];
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001279}
1280
1281/* On success, returns conntrack ptr, sets skb->nfct and ctinfo */
1282static inline struct nf_conn *
Patrick McHardyb2a15a62010-02-03 14:13:03 +01001283resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
Alexey Dobriyana702a652008-10-08 11:35:04 +02001284 struct sk_buff *skb,
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001285 unsigned int dataoff,
1286 u_int16_t l3num,
1287 u_int8_t protonum,
1288 struct nf_conntrack_l3proto *l3proto,
Martin Josefsson605dcad2006-11-29 02:35:06 +01001289 struct nf_conntrack_l4proto *l4proto,
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001290 int *set_reply,
Pablo Neira Ayuso60b5f8f2012-03-23 00:04:53 +01001291 enum ip_conntrack_info *ctinfo)
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001292{
Daniel Borkmann308ac912015-08-08 21:40:01 +02001293 const struct nf_conntrack_zone *zone;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001294 struct nf_conntrack_tuple tuple;
1295 struct nf_conntrack_tuple_hash *h;
Daniel Borkmann5e8018f2015-08-14 16:03:40 +02001296 struct nf_conntrack_zone tmp;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001297 struct nf_conn *ct;
Changli Gao99f07e92010-09-21 17:49:20 +02001298 u32 hash;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001299
Arnaldo Carvalho de Melobbe735e2007-03-10 22:16:10 -03001300 if (!nf_ct_get_tuple(skb, skb_network_offset(skb),
Eric W. Biedermana31f1ad2015-09-18 14:33:04 -05001301 dataoff, l3num, protonum, net, &tuple, l3proto,
Martin Josefsson605dcad2006-11-29 02:35:06 +01001302 l4proto)) {
Weongyo Jeongccd63c22016-03-15 10:57:44 -07001303 pr_debug("Can't get tuple\n");
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001304 return NULL;
1305 }
1306
1307 /* look for tuple match */
Daniel Borkmann5e8018f2015-08-14 16:03:40 +02001308 zone = nf_ct_zone_tmpl(tmpl, skb, &tmp);
Florian Westphal1b8c8a92016-05-03 00:25:58 +02001309 hash = hash_conntrack_raw(&tuple, net);
Changli Gao99f07e92010-09-21 17:49:20 +02001310 h = __nf_conntrack_find_get(net, zone, &tuple, hash);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001311 if (!h) {
Patrick McHardyb2a15a62010-02-03 14:13:03 +01001312 h = init_conntrack(net, tmpl, &tuple, l3proto, l4proto,
Pablo Neira Ayuso60b5f8f2012-03-23 00:04:53 +01001313 skb, dataoff, hash);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001314 if (!h)
1315 return NULL;
1316 if (IS_ERR(h))
1317 return (void *)h;
1318 }
1319 ct = nf_ct_tuplehash_to_ctrack(h);
1320
1321 /* It exists; we have (non-exclusive) reference. */
1322 if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) {
Eric Dumazetfb048832011-05-19 15:44:27 +02001323 *ctinfo = IP_CT_ESTABLISHED_REPLY;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001324 /* Please set reply bit if this packet OK */
1325 *set_reply = 1;
1326 } else {
1327 /* Once we've had two way comms, always ESTABLISHED. */
1328 if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
Weongyo Jeongccd63c22016-03-15 10:57:44 -07001329 pr_debug("normal packet for %p\n", ct);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001330 *ctinfo = IP_CT_ESTABLISHED;
1331 } else if (test_bit(IPS_EXPECTED_BIT, &ct->status)) {
Weongyo Jeongccd63c22016-03-15 10:57:44 -07001332 pr_debug("related packet for %p\n", ct);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001333 *ctinfo = IP_CT_RELATED;
1334 } else {
Weongyo Jeongccd63c22016-03-15 10:57:44 -07001335 pr_debug("new packet for %p\n", ct);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001336 *ctinfo = IP_CT_NEW;
1337 }
1338 *set_reply = 0;
1339 }
1340 skb->nfct = &ct->ct_general;
1341 skb->nfctinfo = *ctinfo;
1342 return ct;
1343}
1344
1345unsigned int
Alexey Dobriyana702a652008-10-08 11:35:04 +02001346nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
1347 struct sk_buff *skb)
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001348{
Patrick McHardyb2a15a62010-02-03 14:13:03 +01001349 struct nf_conn *ct, *tmpl = NULL;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001350 enum ip_conntrack_info ctinfo;
1351 struct nf_conntrack_l3proto *l3proto;
Martin Josefsson605dcad2006-11-29 02:35:06 +01001352 struct nf_conntrack_l4proto *l4proto;
Pablo Neira Ayuso2c8503f2012-02-28 18:23:31 +01001353 unsigned int *timeouts;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001354 unsigned int dataoff;
1355 u_int8_t protonum;
1356 int set_reply = 0;
1357 int ret;
1358
Herbert Xu3db05fe2007-10-15 00:53:15 -07001359 if (skb->nfct) {
Patrick McHardyb2a15a62010-02-03 14:13:03 +01001360 /* Previously seen (loopback or untracked)? Ignore. */
1361 tmpl = (struct nf_conn *)skb->nfct;
1362 if (!nf_ct_is_template(tmpl)) {
1363 NF_CT_STAT_INC_ATOMIC(net, ignore);
1364 return NF_ACCEPT;
1365 }
1366 skb->nfct = NULL;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001367 }
1368
Aaron Conolee2361cb2016-09-21 11:35:04 -04001369 /* rcu_read_lock()ed by nf_hook_thresh */
Jan Engelhardt76108ce2008-10-08 11:35:00 +02001370 l3proto = __nf_ct_l3proto_find(pf);
Herbert Xu3db05fe2007-10-15 00:53:15 -07001371 ret = l3proto->get_l4proto(skb, skb_network_offset(skb),
Yasuyuki Kozakaiffc30692007-07-14 20:44:50 -07001372 &dataoff, &protonum);
1373 if (ret <= 0) {
Lucas De Marchi25985ed2011-03-30 22:57:33 -03001374 pr_debug("not prepared to track yet or error occurred\n");
Alexey Dobriyan0d55af82008-10-08 11:35:07 +02001375 NF_CT_STAT_INC_ATOMIC(net, error);
1376 NF_CT_STAT_INC_ATOMIC(net, invalid);
Patrick McHardyb2a15a62010-02-03 14:13:03 +01001377 ret = -ret;
1378 goto out;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001379 }
1380
Jan Engelhardt76108ce2008-10-08 11:35:00 +02001381 l4proto = __nf_ct_l4proto_find(pf, protonum);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001382
1383 /* It may be an special packet, error, unclean...
1384 * inverse of the return code tells to the netfilter
1385 * core what to do with the packet. */
Alexey Dobriyan74c51a12008-10-08 11:35:05 +02001386 if (l4proto->error != NULL) {
Patrick McHardy8fea97e2010-02-15 17:45:08 +01001387 ret = l4proto->error(net, tmpl, skb, dataoff, &ctinfo,
1388 pf, hooknum);
Alexey Dobriyan74c51a12008-10-08 11:35:05 +02001389 if (ret <= 0) {
Alexey Dobriyan0d55af82008-10-08 11:35:07 +02001390 NF_CT_STAT_INC_ATOMIC(net, error);
1391 NF_CT_STAT_INC_ATOMIC(net, invalid);
Patrick McHardyb2a15a62010-02-03 14:13:03 +01001392 ret = -ret;
1393 goto out;
Alexey Dobriyan74c51a12008-10-08 11:35:05 +02001394 }
Pablo Neira Ayuso88ed01d2011-06-02 15:08:45 +02001395 /* ICMP[v6] protocol trackers may assign one conntrack. */
1396 if (skb->nfct)
1397 goto out;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001398 }
1399
Patrick McHardyb2a15a62010-02-03 14:13:03 +01001400 ct = resolve_normal_ct(net, tmpl, skb, dataoff, pf, protonum,
Pablo Neira Ayuso60b5f8f2012-03-23 00:04:53 +01001401 l3proto, l4proto, &set_reply, &ctinfo);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001402 if (!ct) {
1403 /* Not valid part of a connection */
Alexey Dobriyan0d55af82008-10-08 11:35:07 +02001404 NF_CT_STAT_INC_ATOMIC(net, invalid);
Patrick McHardyb2a15a62010-02-03 14:13:03 +01001405 ret = NF_ACCEPT;
1406 goto out;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001407 }
1408
1409 if (IS_ERR(ct)) {
1410 /* Too stressed to deal. */
Alexey Dobriyan0d55af82008-10-08 11:35:07 +02001411 NF_CT_STAT_INC_ATOMIC(net, drop);
Patrick McHardyb2a15a62010-02-03 14:13:03 +01001412 ret = NF_DROP;
1413 goto out;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001414 }
1415
Herbert Xu3db05fe2007-10-15 00:53:15 -07001416 NF_CT_ASSERT(skb->nfct);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001417
Pablo Neira Ayuso60b5f8f2012-03-23 00:04:53 +01001418 /* Decide what timeout policy we want to apply to this flow. */
Pablo Neira Ayuso84b5ee92012-08-28 00:53:15 +00001419 timeouts = nf_ct_timeout_lookup(net, ct, l4proto);
Pablo Neira Ayuso60b5f8f2012-03-23 00:04:53 +01001420
Pablo Neira Ayuso2c8503f2012-02-28 18:23:31 +01001421 ret = l4proto->packet(ct, skb, dataoff, ctinfo, pf, hooknum, timeouts);
Christoph Paaschec8d5402009-03-16 15:51:29 +01001422 if (ret <= 0) {
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001423 /* Invalid: inverse of the return code tells
1424 * the netfilter core what to do */
Patrick McHardy0d537782007-07-07 22:39:38 -07001425 pr_debug("nf_conntrack_in: Can't track with proto module\n");
Herbert Xu3db05fe2007-10-15 00:53:15 -07001426 nf_conntrack_put(skb->nfct);
1427 skb->nfct = NULL;
Alexey Dobriyan0d55af82008-10-08 11:35:07 +02001428 NF_CT_STAT_INC_ATOMIC(net, invalid);
Pablo Neira Ayuso7d1e0452009-02-24 14:48:01 +01001429 if (ret == -NF_DROP)
1430 NF_CT_STAT_INC_ATOMIC(net, drop);
Patrick McHardyb2a15a62010-02-03 14:13:03 +01001431 ret = -ret;
1432 goto out;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001433 }
1434
1435 if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status))
Patrick McHardy858b31332010-02-03 13:48:53 +01001436 nf_conntrack_event_cache(IPCT_REPLY, ct);
Patrick McHardyb2a15a62010-02-03 14:13:03 +01001437out:
Pablo Neira Ayusoc3174282011-02-09 08:08:20 +01001438 if (tmpl) {
1439 /* Special case: we have to repeat this hook, assign the
1440 * template again to this packet. We assume that this packet
1441 * has no conntrack assigned. This is used by nf_ct_tcp. */
1442 if (ret == NF_REPEAT)
1443 skb->nfct = (struct nf_conntrack *)tmpl;
1444 else
1445 nf_ct_put(tmpl);
1446 }
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001447
1448 return ret;
1449}
Patrick McHardy13b18332006-12-02 22:11:25 -08001450EXPORT_SYMBOL_GPL(nf_conntrack_in);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001451
Jan Engelhardt5f2b4c92008-04-14 11:15:53 +02001452bool nf_ct_invert_tuplepr(struct nf_conntrack_tuple *inverse,
1453 const struct nf_conntrack_tuple *orig)
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001454{
Jan Engelhardt5f2b4c92008-04-14 11:15:53 +02001455 bool ret;
Patrick McHardy923f4902007-02-12 11:12:57 -08001456
1457 rcu_read_lock();
1458 ret = nf_ct_invert_tuple(inverse, orig,
1459 __nf_ct_l3proto_find(orig->src.l3num),
1460 __nf_ct_l4proto_find(orig->src.l3num,
1461 orig->dst.protonum));
1462 rcu_read_unlock();
1463 return ret;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001464}
Patrick McHardy13b18332006-12-02 22:11:25 -08001465EXPORT_SYMBOL_GPL(nf_ct_invert_tuplepr);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001466
Jozsef Kadlecsik5b1158e2006-12-02 22:07:13 -08001467/* Alter reply tuple (maybe alter helper). This is for NAT, and is
1468 implicitly racy: see __nf_conntrack_confirm */
1469void nf_conntrack_alter_reply(struct nf_conn *ct,
1470 const struct nf_conntrack_tuple *newreply)
1471{
1472 struct nf_conn_help *help = nfct_help(ct);
1473
Jozsef Kadlecsik5b1158e2006-12-02 22:07:13 -08001474 /* Should be unconfirmed, so not in hash table yet */
1475 NF_CT_ASSERT(!nf_ct_is_confirmed(ct));
1476
Patrick McHardy0d537782007-07-07 22:39:38 -07001477 pr_debug("Altering reply tuple of %p to ", ct);
Jan Engelhardt3c9fba62008-04-14 11:15:54 +02001478 nf_ct_dump_tuple(newreply);
Jozsef Kadlecsik5b1158e2006-12-02 22:07:13 -08001479
1480 ct->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply;
Patrick McHardyef1a5a52008-04-14 11:21:01 +02001481 if (ct->master || (help && !hlist_empty(&help->expectations)))
Patrick McHardyc52fbb42008-01-31 04:37:36 -08001482 return;
Yasuyuki Kozakaiceceae12007-07-07 22:23:42 -07001483
Patrick McHardyc52fbb42008-01-31 04:37:36 -08001484 rcu_read_lock();
Patrick McHardyb2a15a62010-02-03 14:13:03 +01001485 __nf_ct_try_assign_helper(ct, NULL, GFP_ATOMIC);
Patrick McHardyc52fbb42008-01-31 04:37:36 -08001486 rcu_read_unlock();
Jozsef Kadlecsik5b1158e2006-12-02 22:07:13 -08001487}
Patrick McHardy13b18332006-12-02 22:11:25 -08001488EXPORT_SYMBOL_GPL(nf_conntrack_alter_reply);
Jozsef Kadlecsik5b1158e2006-12-02 22:07:13 -08001489
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001490/* Refresh conntrack for this many jiffies and do accounting if do_acct is 1 */
1491void __nf_ct_refresh_acct(struct nf_conn *ct,
1492 enum ip_conntrack_info ctinfo,
1493 const struct sk_buff *skb,
1494 unsigned long extra_jiffies,
1495 int do_acct)
1496{
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001497 NF_CT_ASSERT(skb);
1498
Eric Leblond997ae832006-05-29 18:24:20 -07001499 /* Only update if this is not a fixed timeout */
Patrick McHardy47d95042008-01-31 04:36:31 -08001500 if (test_bit(IPS_FIXED_TIMEOUT_BIT, &ct->status))
1501 goto acct;
Eric Leblond997ae832006-05-29 18:24:20 -07001502
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001503 /* If not in hash table, timer will not be active yet */
Florian Westphalf330a7f2016-08-25 15:33:31 +02001504 if (nf_ct_is_confirmed(ct))
1505 extra_jiffies += nfct_time_stamp;
Martin Josefssonbe00c8e2006-11-29 02:35:12 +01001506
Florian Westphalf330a7f2016-08-25 15:33:31 +02001507 ct->timeout = extra_jiffies;
Patrick McHardy47d95042008-01-31 04:36:31 -08001508acct:
Pablo Neira Ayusoba767382016-05-02 21:28:57 +02001509 if (do_acct)
1510 nf_ct_acct_update(ct, ctinfo, skb->len);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001511}
Patrick McHardy13b18332006-12-02 22:11:25 -08001512EXPORT_SYMBOL_GPL(__nf_ct_refresh_acct);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001513
Florian Westphalad667132016-08-25 15:33:35 +02001514bool nf_ct_kill_acct(struct nf_conn *ct,
1515 enum ip_conntrack_info ctinfo,
1516 const struct sk_buff *skb)
Patrick McHardy51091762008-06-09 15:59:06 -07001517{
Florian Westphalad667132016-08-25 15:33:35 +02001518 nf_ct_acct_update(ct, ctinfo, skb->len);
Krzysztof Piotr Oledzki58401572008-07-21 10:01:34 -07001519
Florian Westphalf330a7f2016-08-25 15:33:31 +02001520 return nf_ct_delete(ct, 0, 0);
Patrick McHardy51091762008-06-09 15:59:06 -07001521}
Florian Westphalad667132016-08-25 15:33:35 +02001522EXPORT_SYMBOL_GPL(nf_ct_kill_acct);
Patrick McHardy51091762008-06-09 15:59:06 -07001523
Igor Maravićc0cd1152011-12-12 02:58:24 +00001524#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
Pablo Neira Ayusoc1d10ad2006-01-05 12:19:05 -08001525
1526#include <linux/netfilter/nfnetlink.h>
1527#include <linux/netfilter/nfnetlink_conntrack.h>
Ingo Molnar57b47a52006-03-20 22:35:41 -08001528#include <linux/mutex.h>
1529
Pablo Neira Ayusoc1d10ad2006-01-05 12:19:05 -08001530/* Generic function for tcp/udp/sctp/dccp and alike. This needs to be
1531 * in ip_conntrack_core, since we don't want the protocols to autoload
1532 * or depend on ctnetlink */
Patrick McHardyfdf70832007-09-28 14:37:41 -07001533int nf_ct_port_tuple_to_nlattr(struct sk_buff *skb,
Pablo Neira Ayusoc1d10ad2006-01-05 12:19:05 -08001534 const struct nf_conntrack_tuple *tuple)
1535{
David S. Millerbae65be2012-04-01 18:58:28 -04001536 if (nla_put_be16(skb, CTA_PROTO_SRC_PORT, tuple->src.u.tcp.port) ||
1537 nla_put_be16(skb, CTA_PROTO_DST_PORT, tuple->dst.u.tcp.port))
1538 goto nla_put_failure;
Pablo Neira Ayusoc1d10ad2006-01-05 12:19:05 -08001539 return 0;
1540
Patrick McHardydf6fb862007-09-28 14:37:03 -07001541nla_put_failure:
Pablo Neira Ayusoc1d10ad2006-01-05 12:19:05 -08001542 return -1;
1543}
Patrick McHardyfdf70832007-09-28 14:37:41 -07001544EXPORT_SYMBOL_GPL(nf_ct_port_tuple_to_nlattr);
Pablo Neira Ayusoc1d10ad2006-01-05 12:19:05 -08001545
Patrick McHardyf73e9242007-09-28 14:39:55 -07001546const struct nla_policy nf_ct_port_nla_policy[CTA_PROTO_MAX+1] = {
1547 [CTA_PROTO_SRC_PORT] = { .type = NLA_U16 },
1548 [CTA_PROTO_DST_PORT] = { .type = NLA_U16 },
Pablo Neira Ayusoc1d10ad2006-01-05 12:19:05 -08001549};
Patrick McHardyf73e9242007-09-28 14:39:55 -07001550EXPORT_SYMBOL_GPL(nf_ct_port_nla_policy);
Pablo Neira Ayusoc1d10ad2006-01-05 12:19:05 -08001551
Patrick McHardyfdf70832007-09-28 14:37:41 -07001552int nf_ct_port_nlattr_to_tuple(struct nlattr *tb[],
Pablo Neira Ayusoc1d10ad2006-01-05 12:19:05 -08001553 struct nf_conntrack_tuple *t)
1554{
Patrick McHardydf6fb862007-09-28 14:37:03 -07001555 if (!tb[CTA_PROTO_SRC_PORT] || !tb[CTA_PROTO_DST_PORT])
Pablo Neira Ayusoc1d10ad2006-01-05 12:19:05 -08001556 return -EINVAL;
1557
Patrick McHardy77236b62007-12-17 22:29:45 -08001558 t->src.u.tcp.port = nla_get_be16(tb[CTA_PROTO_SRC_PORT]);
1559 t->dst.u.tcp.port = nla_get_be16(tb[CTA_PROTO_DST_PORT]);
Pablo Neira Ayusoc1d10ad2006-01-05 12:19:05 -08001560
1561 return 0;
1562}
Patrick McHardyfdf70832007-09-28 14:37:41 -07001563EXPORT_SYMBOL_GPL(nf_ct_port_nlattr_to_tuple);
Holger Eitzenberger5c0de292009-03-25 21:52:17 +01001564
1565int nf_ct_port_nlattr_tuple_size(void)
1566{
1567 return nla_policy_len(nf_ct_port_nla_policy, CTA_PROTO_MAX + 1);
1568}
1569EXPORT_SYMBOL_GPL(nf_ct_port_nlattr_tuple_size);
Pablo Neira Ayusoc1d10ad2006-01-05 12:19:05 -08001570#endif
1571
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001572/* Used by ipt_REJECT and ip6t_REJECT. */
Patrick McHardy312a0c162013-07-28 22:54:08 +02001573static void nf_conntrack_attach(struct sk_buff *nskb, const struct sk_buff *skb)
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001574{
1575 struct nf_conn *ct;
1576 enum ip_conntrack_info ctinfo;
1577
1578 /* This ICMP is in reverse direction to the packet which caused it */
1579 ct = nf_ct_get(skb, &ctinfo);
1580 if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL)
Eric Dumazetfb048832011-05-19 15:44:27 +02001581 ctinfo = IP_CT_RELATED_REPLY;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001582 else
1583 ctinfo = IP_CT_RELATED;
1584
1585 /* Attach to new skbuff, and increment count */
1586 nskb->nfct = &ct->ct_general;
1587 nskb->nfctinfo = ctinfo;
1588 nf_conntrack_get(nskb->nfct);
1589}
1590
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001591/* Bring out ya dead! */
Patrick McHardydf0933d2006-09-20 11:57:53 -07001592static struct nf_conn *
Alexey Dobriyan400dad32008-10-08 11:35:03 +02001593get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data),
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001594 void *data, unsigned int *bucket)
1595{
Patrick McHardydf0933d2006-09-20 11:57:53 -07001596 struct nf_conntrack_tuple_hash *h;
1597 struct nf_conn *ct;
Eric Dumazetea781f12009-03-25 21:05:46 +01001598 struct hlist_nulls_node *n;
Jesper Dangaard Brouer93bb0ce2014-03-03 14:46:13 +01001599 spinlock_t *lockp;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001600
Florian Westphal56d52d42016-05-02 18:39:55 +02001601 for (; *bucket < nf_conntrack_htable_size; (*bucket)++) {
Jesper Dangaard Brouer93bb0ce2014-03-03 14:46:13 +01001602 lockp = &nf_conntrack_locks[*bucket % CONNTRACK_LOCKS];
1603 local_bh_disable();
Sasha Levinb16c2912016-01-18 19:23:51 -05001604 nf_conntrack_lock(lockp);
Florian Westphal56d52d42016-05-02 18:39:55 +02001605 if (*bucket < nf_conntrack_htable_size) {
1606 hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[*bucket], hnnode) {
Jesper Dangaard Brouer93bb0ce2014-03-03 14:46:13 +01001607 if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL)
1608 continue;
1609 ct = nf_ct_tuplehash_to_ctrack(h);
Florian Westphale0c7d472016-04-28 19:13:45 +02001610 if (net_eq(nf_ct_net(ct), net) &&
1611 iter(ct, data))
Jesper Dangaard Brouer93bb0ce2014-03-03 14:46:13 +01001612 goto found;
1613 }
Patrick McHardydf0933d2006-09-20 11:57:53 -07001614 }
Jesper Dangaard Brouer93bb0ce2014-03-03 14:46:13 +01001615 spin_unlock(lockp);
1616 local_bh_enable();
Florian Westphald93c6252016-01-20 11:16:43 +01001617 cond_resched();
YOSHIFUJI Hideaki601e68e2007-02-12 11:15:49 -08001618 }
Jesper Dangaard Brouerb7779d02014-03-03 14:45:20 +01001619
Patrick McHardydf0933d2006-09-20 11:57:53 -07001620 return NULL;
1621found:
Martin Josefssonc073e3f2006-10-30 15:13:58 -08001622 atomic_inc(&ct->ct_general.use);
Jesper Dangaard Brouer93bb0ce2014-03-03 14:46:13 +01001623 spin_unlock(lockp);
1624 local_bh_enable();
Patrick McHardydf0933d2006-09-20 11:57:53 -07001625 return ct;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001626}
1627
Florian Westphalf51c4542017-05-21 12:52:56 +02001628static void
1629__nf_ct_unconfirmed_destroy(struct net *net)
1630{
1631 int cpu;
1632
1633 for_each_possible_cpu(cpu) {
1634 struct nf_conntrack_tuple_hash *h;
1635 struct hlist_nulls_node *n;
1636 struct ct_pcpu *pcpu;
1637
1638 pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu);
1639
1640 spin_lock_bh(&pcpu->lock);
1641 hlist_nulls_for_each_entry(h, n, &pcpu->unconfirmed, hnnode) {
1642 struct nf_conn *ct;
1643
1644 ct = nf_ct_tuplehash_to_ctrack(h);
1645
1646 /* we cannot call iter() on unconfirmed list, the
1647 * owning cpu can reallocate ct->ext at any time.
1648 */
1649 set_bit(IPS_DYING_BIT, &ct->status);
1650 }
1651 spin_unlock_bh(&pcpu->lock);
1652 cond_resched();
1653 }
1654}
1655
Alexey Dobriyan400dad32008-10-08 11:35:03 +02001656void nf_ct_iterate_cleanup(struct net *net,
1657 int (*iter)(struct nf_conn *i, void *data),
Florian Westphalc655bc62013-07-29 15:41:55 +02001658 void *data, u32 portid, int report)
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001659{
Patrick McHardydf0933d2006-09-20 11:57:53 -07001660 struct nf_conn *ct;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001661 unsigned int bucket = 0;
1662
Florian Westphald93c6252016-01-20 11:16:43 +01001663 might_sleep();
1664
Florian Westphal88b68bc2016-04-28 19:13:42 +02001665 if (atomic_read(&net->ct.count) == 0)
1666 return;
1667
Florian Westphalf51c4542017-05-21 12:52:56 +02001668 __nf_ct_unconfirmed_destroy(net);
1669
1670 synchronize_net();
1671
Alexey Dobriyan400dad32008-10-08 11:35:03 +02001672 while ((ct = get_next_corpse(net, iter, data, &bucket)) != NULL) {
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001673 /* Time to push up daises... */
Florian Westphal02982c22013-07-29 15:41:54 +02001674
Florian Westphalf330a7f2016-08-25 15:33:31 +02001675 nf_ct_delete(ct, portid, report);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001676 nf_ct_put(ct);
Florian Westphald93c6252016-01-20 11:16:43 +01001677 cond_resched();
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001678 }
1679}
Patrick McHardy13b18332006-12-02 22:11:25 -08001680EXPORT_SYMBOL_GPL(nf_ct_iterate_cleanup);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001681
Pablo Neira Ayuso274d3832009-06-02 20:08:38 +02001682static int kill_all(struct nf_conn *i, void *data)
1683{
1684 return 1;
1685}
1686
Patrick McHardyd862a662011-01-14 15:45:56 +01001687void nf_ct_free_hashtable(void *hash, unsigned int size)
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001688{
Patrick McHardyd862a662011-01-14 15:45:56 +01001689 if (is_vmalloc_addr(hash))
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001690 vfree(hash);
1691 else
YOSHIFUJI Hideaki601e68e2007-02-12 11:15:49 -08001692 free_pages((unsigned long)hash,
Patrick McHardyf205c5e2007-07-07 22:28:14 -07001693 get_order(sizeof(struct hlist_head) * size));
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001694}
Patrick McHardyac565e52007-07-07 22:30:08 -07001695EXPORT_SYMBOL_GPL(nf_ct_free_hashtable);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001696
Eric Dumazetb3c51632010-06-09 14:43:38 +02001697static int untrack_refs(void)
1698{
1699 int cnt = 0, cpu;
1700
1701 for_each_possible_cpu(cpu) {
1702 struct nf_conn *ct = &per_cpu(nf_conntrack_untracked, cpu);
1703
1704 cnt += atomic_read(&ct->ct_general.use) - 1;
1705 }
1706 return cnt;
1707}
1708
Gao fengf94161c2013-01-21 22:10:24 +00001709void nf_conntrack_cleanup_start(void)
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001710{
Florian Westphalb87a2f92016-08-25 15:33:33 +02001711 conntrack_gc_work.exiting = true;
Gao fengf94161c2013-01-21 22:10:24 +00001712 RCU_INIT_POINTER(ip_ct_attach, NULL);
1713}
1714
1715void nf_conntrack_cleanup_end(void)
1716{
1717 RCU_INIT_POINTER(nf_ct_destroy, NULL);
Eric Dumazetb3c51632010-06-09 14:43:38 +02001718 while (untrack_refs() > 0)
Patrick McHardy9edd7ca2010-02-08 11:16:26 -08001719 schedule();
1720
Florian Westphalb87a2f92016-08-25 15:33:33 +02001721 cancel_delayed_work_sync(&conntrack_gc_work.dwork);
Florian Westphal56d52d42016-05-02 18:39:55 +02001722 nf_ct_free_hashtable(nf_conntrack_hash, nf_conntrack_htable_size);
1723
Gao feng04d87002013-01-21 22:10:32 +00001724 nf_conntrack_proto_fini();
Patrick McHardy41d73ec2013-08-27 08:50:12 +02001725 nf_conntrack_seqadj_fini();
Gao feng5f69b8f2013-01-21 22:10:31 +00001726 nf_conntrack_labels_fini();
Gao feng5e615b22013-01-21 22:10:30 +00001727 nf_conntrack_helper_fini();
Gao feng86840942013-01-21 22:10:29 +00001728 nf_conntrack_timeout_fini();
Gao feng3fe0f942013-01-21 22:10:28 +00001729 nf_conntrack_ecache_fini();
Gao feng73f40012013-01-21 22:10:27 +00001730 nf_conntrack_tstamp_fini();
Gao fengb7ff3a12013-01-21 22:10:26 +00001731 nf_conntrack_acct_fini();
Gao feng83b4dbe2013-01-21 22:10:25 +00001732 nf_conntrack_expect_fini();
Florian Westphal77571142016-06-10 17:25:19 +02001733
1734 kmem_cache_destroy(nf_conntrack_cachep);
Alexey Dobriyan08f65472008-10-08 11:35:09 +02001735}
Yasuyuki Kozakai7d3cdc62006-02-15 15:22:21 -08001736
Gao fengf94161c2013-01-21 22:10:24 +00001737/*
1738 * Mishearing the voices in his head, our hero wonders how he's
1739 * supposed to kill the mall.
1740 */
1741void nf_conntrack_cleanup_net(struct net *net)
Alexey Dobriyan08f65472008-10-08 11:35:09 +02001742{
Vladimir Davydovdece40e2013-03-13 23:40:14 +00001743 LIST_HEAD(single);
1744
1745 list_add(&net->exit_list, &single);
1746 nf_conntrack_cleanup_net_list(&single);
1747}
1748
1749void nf_conntrack_cleanup_net_list(struct list_head *net_exit_list)
1750{
1751 int busy;
1752 struct net *net;
1753
Gao fengf94161c2013-01-21 22:10:24 +00001754 /*
1755 * This makes sure all current packets have passed through
1756 * netfilter framework. Roll on, two-stage module
1757 * delete...
1758 */
1759 synchronize_net();
Vladimir Davydovdece40e2013-03-13 23:40:14 +00001760i_see_dead_people:
1761 busy = 0;
1762 list_for_each_entry(net, net_exit_list, exit_list) {
Florian Westphalc655bc62013-07-29 15:41:55 +02001763 nf_ct_iterate_cleanup(net, kill_all, NULL, 0, 0);
Vladimir Davydovdece40e2013-03-13 23:40:14 +00001764 if (atomic_read(&net->ct.count) != 0)
1765 busy = 1;
1766 }
1767 if (busy) {
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001768 schedule();
1769 goto i_see_dead_people;
1770 }
1771
Vladimir Davydovdece40e2013-03-13 23:40:14 +00001772 list_for_each_entry(net, net_exit_list, exit_list) {
Vladimir Davydovdece40e2013-03-13 23:40:14 +00001773 nf_conntrack_proto_pernet_fini(net);
1774 nf_conntrack_helper_pernet_fini(net);
1775 nf_conntrack_ecache_pernet_fini(net);
1776 nf_conntrack_tstamp_pernet_fini(net);
1777 nf_conntrack_acct_pernet_fini(net);
1778 nf_conntrack_expect_pernet_fini(net);
Vladimir Davydovdece40e2013-03-13 23:40:14 +00001779 free_percpu(net->ct.stat);
Jesper Dangaard Brouerb7779d02014-03-03 14:45:20 +01001780 free_percpu(net->ct.pcpu_lists);
Vladimir Davydovdece40e2013-03-13 23:40:14 +00001781 }
Alexey Dobriyan08f65472008-10-08 11:35:09 +02001782}
1783
Patrick McHardyd862a662011-01-14 15:45:56 +01001784void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls)
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001785{
Eric Dumazetea781f12009-03-25 21:05:46 +01001786 struct hlist_nulls_head *hash;
1787 unsigned int nr_slots, i;
1788 size_t sz;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001789
Florian Westphal9cc1c732016-04-24 01:18:21 +02001790 if (*sizep > (UINT_MAX / sizeof(struct hlist_nulls_head)))
1791 return NULL;
1792
Eric Dumazetea781f12009-03-25 21:05:46 +01001793 BUILD_BUG_ON(sizeof(struct hlist_nulls_head) != sizeof(struct hlist_head));
1794 nr_slots = *sizep = roundup(*sizep, PAGE_SIZE / sizeof(struct hlist_nulls_head));
Florian Westphal9cc1c732016-04-24 01:18:21 +02001795
1796 if (nr_slots > (UINT_MAX / sizeof(struct hlist_nulls_head)))
1797 return NULL;
1798
Eric Dumazetea781f12009-03-25 21:05:46 +01001799 sz = nr_slots * sizeof(struct hlist_nulls_head);
1800 hash = (void *)__get_free_pages(GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO,
1801 get_order(sz));
Pablo Neira Ayusof0ad4622015-07-23 13:06:10 +02001802 if (!hash)
Eric Dumazet966567b2011-12-19 16:01:38 -05001803 hash = vzalloc(sz);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001804
Eric Dumazetea781f12009-03-25 21:05:46 +01001805 if (hash && nulls)
1806 for (i = 0; i < nr_slots; i++)
1807 INIT_HLIST_NULLS_HEAD(&hash[i], i);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001808
1809 return hash;
1810}
Patrick McHardyac565e52007-07-07 22:30:08 -07001811EXPORT_SYMBOL_GPL(nf_ct_alloc_hashtable);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001812
Florian Westphal3183ab82016-06-22 13:26:10 +02001813int nf_conntrack_hash_resize(unsigned int hashsize)
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001814{
Florian Westphal3183ab82016-06-22 13:26:10 +02001815 int i, bucket;
1816 unsigned int old_size;
Eric Dumazetea781f12009-03-25 21:05:46 +01001817 struct hlist_nulls_head *hash, *old_hash;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001818 struct nf_conntrack_tuple_hash *h;
Patrick McHardy5d0aa2c2010-02-15 18:13:33 +01001819 struct nf_conn *ct;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001820
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001821 if (!hashsize)
1822 return -EINVAL;
1823
Patrick McHardyd862a662011-01-14 15:45:56 +01001824 hash = nf_ct_alloc_hashtable(&hashsize, 1);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001825 if (!hash)
1826 return -ENOMEM;
1827
Florian Westphal3183ab82016-06-22 13:26:10 +02001828 old_size = nf_conntrack_htable_size;
1829 if (old_size == hashsize) {
1830 nf_ct_free_hashtable(hash, hashsize);
1831 return 0;
1832 }
1833
Jesper Dangaard Brouer93bb0ce2014-03-03 14:46:13 +01001834 local_bh_disable();
1835 nf_conntrack_all_lock();
Florian Westphala3efd812016-04-18 16:16:59 +02001836 write_seqcount_begin(&nf_conntrack_generation);
Jesper Dangaard Brouer93bb0ce2014-03-03 14:46:13 +01001837
Patrick McHardy76507f62008-01-31 04:38:38 -08001838 /* Lookups in the old hash might happen in parallel, which means we
1839 * might get false negatives during connection lookup. New connections
1840 * created because of a false negative won't make it into the hash
Jesper Dangaard Brouer93bb0ce2014-03-03 14:46:13 +01001841 * though since that required taking the locks.
Patrick McHardy76507f62008-01-31 04:38:38 -08001842 */
Jesper Dangaard Brouer93bb0ce2014-03-03 14:46:13 +01001843
Florian Westphal56d52d42016-05-02 18:39:55 +02001844 for (i = 0; i < nf_conntrack_htable_size; i++) {
1845 while (!hlist_nulls_empty(&nf_conntrack_hash[i])) {
1846 h = hlist_nulls_entry(nf_conntrack_hash[i].first,
1847 struct nf_conntrack_tuple_hash, hnnode);
Patrick McHardy5d0aa2c2010-02-15 18:13:33 +01001848 ct = nf_ct_tuplehash_to_ctrack(h);
Eric Dumazetea781f12009-03-25 21:05:46 +01001849 hlist_nulls_del_rcu(&h->hnnode);
Florian Westphal1b8c8a92016-05-03 00:25:58 +02001850 bucket = __hash_conntrack(nf_ct_net(ct),
1851 &h->tuple, hashsize);
Eric Dumazetea781f12009-03-25 21:05:46 +01001852 hlist_nulls_add_head_rcu(&h->hnnode, &hash[bucket]);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001853 }
1854 }
Florian Westphal56d52d42016-05-02 18:39:55 +02001855 old_size = nf_conntrack_htable_size;
1856 old_hash = nf_conntrack_hash;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001857
Florian Westphal56d52d42016-05-02 18:39:55 +02001858 nf_conntrack_hash = hash;
1859 nf_conntrack_htable_size = hashsize;
Jesper Dangaard Brouer93bb0ce2014-03-03 14:46:13 +01001860
Florian Westphala3efd812016-04-18 16:16:59 +02001861 write_seqcount_end(&nf_conntrack_generation);
Jesper Dangaard Brouer93bb0ce2014-03-03 14:46:13 +01001862 nf_conntrack_all_unlock();
1863 local_bh_enable();
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001864
Florian Westphal5e3c61f2016-04-28 19:13:41 +02001865 synchronize_net();
Patrick McHardyd862a662011-01-14 15:45:56 +01001866 nf_ct_free_hashtable(old_hash, old_size);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001867 return 0;
1868}
Florian Westphal3183ab82016-06-22 13:26:10 +02001869
1870int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
1871{
1872 unsigned int hashsize;
1873 int rc;
1874
1875 if (current->nsproxy->net_ns != &init_net)
1876 return -EOPNOTSUPP;
1877
1878 /* On boot, we can set this without any fancy locking. */
Andrey Ryabinind8a77d12018-07-06 16:38:53 +03001879 if (!nf_conntrack_hash)
Florian Westphal3183ab82016-06-22 13:26:10 +02001880 return param_set_uint(val, kp);
1881
1882 rc = kstrtouint(val, 0, &hashsize);
1883 if (rc)
1884 return rc;
1885
1886 return nf_conntrack_hash_resize(hashsize);
1887}
Patrick McHardyfae718d2007-12-24 21:09:10 -08001888EXPORT_SYMBOL_GPL(nf_conntrack_set_hashsize);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001889
Patrick McHardyfae718d2007-12-24 21:09:10 -08001890module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint,
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001891 &nf_conntrack_htable_size, 0600);
1892
Eric Dumazet5bfddbd2010-06-08 16:09:52 +02001893void nf_ct_untracked_status_or(unsigned long bits)
1894{
Eric Dumazetb3c51632010-06-09 14:43:38 +02001895 int cpu;
1896
1897 for_each_possible_cpu(cpu)
1898 per_cpu(nf_conntrack_untracked, cpu).status |= bits;
Eric Dumazet5bfddbd2010-06-08 16:09:52 +02001899}
1900EXPORT_SYMBOL_GPL(nf_ct_untracked_status_or);
1901
Gao fengf94161c2013-01-21 22:10:24 +00001902int nf_conntrack_init_start(void)
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001903{
Patrick McHardyf205c5e2007-07-07 22:28:14 -07001904 int max_factor = 8;
Florian Westphal0c5366b2016-05-09 16:24:32 +02001905 int ret = -ENOMEM;
1906 int i, cpu;
Jesper Dangaard Brouer93bb0ce2014-03-03 14:46:13 +01001907
Florian Westphala3efd812016-04-18 16:16:59 +02001908 seqcount_init(&nf_conntrack_generation);
1909
Eric Dumazetd5d20912014-03-17 13:37:53 -07001910 for (i = 0; i < CONNTRACK_LOCKS; i++)
Jesper Dangaard Brouer93bb0ce2014-03-03 14:46:13 +01001911 spin_lock_init(&nf_conntrack_locks[i]);
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001912
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001913 if (!nf_conntrack_htable_size) {
Marcelo Leitner88eab472014-12-03 17:30:19 -02001914 /* Idea from tcp.c: use 1/16384 of memory.
1915 * On i386: 32MB machine has 512 buckets.
1916 * >= 1GB machines have 16384 buckets.
1917 * >= 4GB machines have 65536 buckets.
1918 */
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001919 nf_conntrack_htable_size
Jan Beulich44813742009-09-21 17:03:05 -07001920 = (((totalram_pages << PAGE_SHIFT) / 16384)
Patrick McHardyf205c5e2007-07-07 22:28:14 -07001921 / sizeof(struct hlist_head));
Marcelo Leitner88eab472014-12-03 17:30:19 -02001922 if (totalram_pages > (4 * (1024 * 1024 * 1024 / PAGE_SIZE)))
1923 nf_conntrack_htable_size = 65536;
1924 else if (totalram_pages > (1024 * 1024 * 1024 / PAGE_SIZE))
Patrick McHardyf205c5e2007-07-07 22:28:14 -07001925 nf_conntrack_htable_size = 16384;
1926 if (nf_conntrack_htable_size < 32)
1927 nf_conntrack_htable_size = 32;
1928
1929 /* Use a max. factor of four by default to get the same max as
1930 * with the old struct list_heads. When a table size is given
1931 * we use the old value of 8 to avoid reducing the max.
1932 * entries. */
1933 max_factor = 4;
Yasuyuki Kozakai9fb9cbb2005-11-09 16:38:16 -08001934 }
Florian Westphal56d52d42016-05-02 18:39:55 +02001935
1936 nf_conntrack_hash = nf_ct_alloc_hashtable(&nf_conntrack_htable_size, 1);
1937 if (!nf_conntrack_hash)
1938 return -ENOMEM;
1939
Patrick McHardyf205c5e2007-07-07 22:28:14 -07001940 nf_conntrack_max = max_factor * nf_conntrack_htable_size;
Patrick McHardy8e5105a2007-07-07 22:27:33 -07001941
Florian Westphal0c5366b2016-05-09 16:24:32 +02001942 nf_conntrack_cachep = kmem_cache_create("nf_conntrack",
1943 sizeof(struct nf_conn), 0,
Florian Westphal5a75cde2016-06-10 22:25:22 +02001944 SLAB_DESTROY_BY_RCU | SLAB_HWCACHE_ALIGN, NULL);
Florian Westphal0c5366b2016-05-09 16:24:32 +02001945 if (!nf_conntrack_cachep)
1946 goto err_cachep;
1947
Stephen Hemminger654d0fb2010-05-13 15:02:08 +02001948 printk(KERN_INFO "nf_conntrack version %s (%u buckets, %d max)\n",
Patrick McHardy8e5105a2007-07-07 22:27:33 -07001949 NF_CONNTRACK_VERSION, nf_conntrack_htable_size,
1950 nf_conntrack_max);
Gao feng83b4dbe2013-01-21 22:10:25 +00001951
1952 ret = nf_conntrack_expect_init();
1953 if (ret < 0)
1954 goto err_expect;
1955
Gao fengb7ff3a12013-01-21 22:10:26 +00001956 ret = nf_conntrack_acct_init();
1957 if (ret < 0)
1958 goto err_acct;
1959
Gao feng73f40012013-01-21 22:10:27 +00001960 ret = nf_conntrack_tstamp_init();
1961 if (ret < 0)
1962 goto err_tstamp;
1963
Gao feng3fe0f942013-01-21 22:10:28 +00001964 ret = nf_conntrack_ecache_init();
1965 if (ret < 0)
1966 goto err_ecache;
1967
Gao feng86840942013-01-21 22:10:29 +00001968 ret = nf_conntrack_timeout_init();
1969 if (ret < 0)
1970 goto err_timeout;
1971
Gao feng5e615b22013-01-21 22:10:30 +00001972 ret = nf_conntrack_helper_init();
1973 if (ret < 0)
1974 goto err_helper;
1975
Gao feng5f69b8f2013-01-21 22:10:31 +00001976 ret = nf_conntrack_labels_init();
1977 if (ret < 0)
1978 goto err_labels;
1979
Patrick McHardy41d73ec2013-08-27 08:50:12 +02001980 ret = nf_conntrack_seqadj_init();
1981 if (ret < 0)
1982 goto err_seqadj;
1983
Gao feng04d87002013-01-21 22:10:32 +00001984 ret = nf_conntrack_proto_init();
1985 if (ret < 0)
1986 goto err_proto;
1987
Patrick McHardy9edd7ca2010-02-08 11:16:26 -08001988 /* Set up fake conntrack: to never be deleted, not in any hashes */
Eric Dumazetb3c51632010-06-09 14:43:38 +02001989 for_each_possible_cpu(cpu) {
1990 struct nf_conn *ct = &per_cpu(nf_conntrack_untracked, cpu);
Eric Dumazetb3c51632010-06-09 14:43:38 +02001991 write_pnet(&ct->ct_net, &init_net);
1992 atomic_set(&ct->ct_general.use, 1);
1993 }
Patrick McHardy9edd7ca2010-02-08 11:16:26 -08001994 /* - and look it like as a confirmed connection */
Eric Dumazet5bfddbd2010-06-08 16:09:52 +02001995 nf_ct_untracked_status_or(IPS_CONFIRMED | IPS_UNTRACKED);
Florian Westphalb87a2f92016-08-25 15:33:33 +02001996
1997 conntrack_gc_work_init(&conntrack_gc_work);
Florian Westphal371d0342017-01-18 02:01:22 +01001998 queue_delayed_work(system_long_wq, &conntrack_gc_work.dwork, HZ);
Florian Westphalb87a2f92016-08-25 15:33:33 +02001999
Alexey Dobriyan08f65472008-10-08 11:35:09 +02002000 return 0;
2001
Gao feng04d87002013-01-21 22:10:32 +00002002err_proto:
Patrick McHardy41d73ec2013-08-27 08:50:12 +02002003 nf_conntrack_seqadj_fini();
2004err_seqadj:
Gao feng04d87002013-01-21 22:10:32 +00002005 nf_conntrack_labels_fini();
Gao feng5f69b8f2013-01-21 22:10:31 +00002006err_labels:
2007 nf_conntrack_helper_fini();
Gao feng5e615b22013-01-21 22:10:30 +00002008err_helper:
2009 nf_conntrack_timeout_fini();
Gao feng86840942013-01-21 22:10:29 +00002010err_timeout:
2011 nf_conntrack_ecache_fini();
Gao feng3fe0f942013-01-21 22:10:28 +00002012err_ecache:
2013 nf_conntrack_tstamp_fini();
Gao feng73f40012013-01-21 22:10:27 +00002014err_tstamp:
2015 nf_conntrack_acct_fini();
Gao fengb7ff3a12013-01-21 22:10:26 +00002016err_acct:
2017 nf_conntrack_expect_fini();
Gao feng83b4dbe2013-01-21 22:10:25 +00002018err_expect:
Florian Westphal0c5366b2016-05-09 16:24:32 +02002019 kmem_cache_destroy(nf_conntrack_cachep);
2020err_cachep:
Florian Westphal56d52d42016-05-02 18:39:55 +02002021 nf_ct_free_hashtable(nf_conntrack_hash, nf_conntrack_htable_size);
Alexey Dobriyan08f65472008-10-08 11:35:09 +02002022 return ret;
2023}
2024
Gao fengf94161c2013-01-21 22:10:24 +00002025void nf_conntrack_init_end(void)
2026{
2027 /* For use by REJECT target */
2028 RCU_INIT_POINTER(ip_ct_attach, nf_conntrack_attach);
2029 RCU_INIT_POINTER(nf_ct_destroy, destroy_conntrack);
Gao fengf94161c2013-01-21 22:10:24 +00002030}
2031
Eric Dumazet8cc20192009-06-22 14:13:55 +02002032/*
2033 * We need to use special "null" values, not used in hash table
2034 */
2035#define UNCONFIRMED_NULLS_VAL ((1<<30)+0)
2036#define DYING_NULLS_VAL ((1<<30)+1)
Pablo Neira Ayuso252b3e82012-12-11 04:07:42 +00002037#define TEMPLATE_NULLS_VAL ((1<<30)+2)
Eric Dumazet8cc20192009-06-22 14:13:55 +02002038
Gao fengf94161c2013-01-21 22:10:24 +00002039int nf_conntrack_init_net(struct net *net)
Alexey Dobriyan08f65472008-10-08 11:35:09 +02002040{
Jesper Dangaard Brouerb7779d02014-03-03 14:45:20 +01002041 int ret = -ENOMEM;
2042 int cpu;
Alexey Dobriyan08f65472008-10-08 11:35:09 +02002043
2044 atomic_set(&net->ct.count, 0);
Jesper Dangaard Brouerb7779d02014-03-03 14:45:20 +01002045
2046 net->ct.pcpu_lists = alloc_percpu(struct ct_pcpu);
2047 if (!net->ct.pcpu_lists)
Alexey Dobriyan08f65472008-10-08 11:35:09 +02002048 goto err_stat;
Jesper Dangaard Brouerb7779d02014-03-03 14:45:20 +01002049
2050 for_each_possible_cpu(cpu) {
2051 struct ct_pcpu *pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu);
2052
2053 spin_lock_init(&pcpu->lock);
2054 INIT_HLIST_NULLS_HEAD(&pcpu->unconfirmed, UNCONFIRMED_NULLS_VAL);
2055 INIT_HLIST_NULLS_HEAD(&pcpu->dying, DYING_NULLS_VAL);
Alexey Dobriyan08f65472008-10-08 11:35:09 +02002056 }
Eric Dumazet5b3501f2010-02-08 11:16:56 -08002057
Jesper Dangaard Brouerb7779d02014-03-03 14:45:20 +01002058 net->ct.stat = alloc_percpu(struct ip_conntrack_stat);
2059 if (!net->ct.stat)
2060 goto err_pcpu_lists;
2061
Gao feng83b4dbe2013-01-21 22:10:25 +00002062 ret = nf_conntrack_expect_pernet_init(net);
Alexey Dobriyan08f65472008-10-08 11:35:09 +02002063 if (ret < 0)
2064 goto err_expect;
Gao fengb7ff3a12013-01-21 22:10:26 +00002065 ret = nf_conntrack_acct_pernet_init(net);
Krzysztof Piotr Oledzki58401572008-07-21 10:01:34 -07002066 if (ret < 0)
Alexey Dobriyan08f65472008-10-08 11:35:09 +02002067 goto err_acct;
Gao feng73f40012013-01-21 22:10:27 +00002068 ret = nf_conntrack_tstamp_pernet_init(net);
Pablo Neira Ayusoa992ca22011-01-19 16:00:07 +01002069 if (ret < 0)
2070 goto err_tstamp;
Gao feng3fe0f942013-01-21 22:10:28 +00002071 ret = nf_conntrack_ecache_pernet_init(net);
Pablo Neira Ayusoa0891aa2009-06-13 12:26:29 +02002072 if (ret < 0)
2073 goto err_ecache;
Gao feng5e615b22013-01-21 22:10:30 +00002074 ret = nf_conntrack_helper_pernet_init(net);
Eric Leblonda9006892012-04-18 11:20:41 +02002075 if (ret < 0)
2076 goto err_helper;
Gao feng04d87002013-01-21 22:10:32 +00002077 ret = nf_conntrack_proto_pernet_init(net);
Gao fengf94161c2013-01-21 22:10:24 +00002078 if (ret < 0)
2079 goto err_proto;
Alexey Dobriyan08f65472008-10-08 11:35:09 +02002080 return 0;
Florian Westphalc539f012013-01-11 06:30:44 +00002081
Gao fengf94161c2013-01-21 22:10:24 +00002082err_proto:
Gao feng5e615b22013-01-21 22:10:30 +00002083 nf_conntrack_helper_pernet_fini(net);
Eric Leblonda9006892012-04-18 11:20:41 +02002084err_helper:
Gao feng3fe0f942013-01-21 22:10:28 +00002085 nf_conntrack_ecache_pernet_fini(net);
Pablo Neira Ayusoa0891aa2009-06-13 12:26:29 +02002086err_ecache:
Gao feng73f40012013-01-21 22:10:27 +00002087 nf_conntrack_tstamp_pernet_fini(net);
Pablo Neira Ayusoa992ca22011-01-19 16:00:07 +01002088err_tstamp:
Gao fengb7ff3a12013-01-21 22:10:26 +00002089 nf_conntrack_acct_pernet_fini(net);
Alexey Dobriyan08f65472008-10-08 11:35:09 +02002090err_acct:
Gao feng83b4dbe2013-01-21 22:10:25 +00002091 nf_conntrack_expect_pernet_fini(net);
Alexey Dobriyan08f65472008-10-08 11:35:09 +02002092err_expect:
Alexey Dobriyan0d55af82008-10-08 11:35:07 +02002093 free_percpu(net->ct.stat);
Jesper Dangaard Brouerb7779d02014-03-03 14:45:20 +01002094err_pcpu_lists:
2095 free_percpu(net->ct.pcpu_lists);
Alexey Dobriyan0d55af82008-10-08 11:35:07 +02002096err_stat:
Alexey Dobriyan08f65472008-10-08 11:35:09 +02002097 return ret;
2098}