netfilter: xtables: don't hook tables by default delay hook registration until the table is being requested inside a namespace. Historically, a particular table (iptables mangle, ip6tables filter, etc) was registered on module load. When netns support was added to iptables only the ip/ip6tables ruleset was made namespace aware, not the actual hook points. This means f.e. that when ipt_filter table/module is loaded on a system, then each namespace on that system has an (empty) iptables filter ruleset. In other words, if a namespace sends a packet, such skb is 'caught' by netfilter machinery and fed to hooking points for that table (i.e. INPUT, FORWARD, etc). Thanks to Eric Biederman, hooks are no longer global, but per namespace. This means that we can avoid allocation of empty ruleset in a namespace and defer hook registration until we need the functionality. We register a tables hook entry points ONLY in the initial namespace. When an iptables get/setockopt is issued inside a given namespace, we check if the table is found in the per-namespace list. If not, we attempt to find it in the initial namespace, and, if found, create an empty default table in the requesting namespace and register the needed hooks. Hook points are destroyed only once namespace is deleted, there is no 'usage count' (it makes no sense since there is no 'remove table' operation in xtables api). Signed-off-by: Florian Westphal <fw@strlen.de> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>

commit: b9e69e127397187b70c813a4397cce7afb5e8cb1 [log] [tgz]
author: Florian Westphal <fw@strlen.de> Thu Feb 25 10:08:36 2016 +0100
committer: Pablo Neira Ayuso <pablo@netfilter.org> Wed Mar 02 20:05:24 2016 +0100
tree: 1a5a4769ef5ba3087fa15579d15cdd3aa3c3dabd
parent: a67dd266adf42a24df31380e9da78390bb4d65ef [diff] [blame]
diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c
index 88bc52f..ff22659 100644
--- a/net/ipv4/netfilter/iptable_security.c
+++ b/net/ipv4/netfilter/iptable_security.c

@@ -28,12 +28,15 @@
 				(1 << NF_INET_FORWARD) | \
 				(1 << NF_INET_LOCAL_OUT)
 
+static int __net_init iptable_security_table_init(struct net *net);
+
 static const struct xt_table security_table = {
 	.name		= "security",
 	.valid_hooks	= SECURITY_VALID_HOOKS,
 	.me		= THIS_MODULE,
 	.af		= NFPROTO_IPV4,
 	.priority	= NF_IP_PRI_SECURITY,
+	.table_init	= iptable_security_table_init,
 };
 
 static unsigned int
@@ -51,11 +54,14 @@
 
 static struct nf_hook_ops *sectbl_ops __read_mostly;
 
-static int __net_init iptable_security_net_init(struct net *net)
+static int __net_init iptable_security_table_init(struct net *net)
 {
 	struct ipt_replace *repl;
 	int ret;
 
+	if (net->ipv4.iptable_security)
+		return 0;
+
 	repl = ipt_alloc_initial_table(&security_table);
 	if (repl == NULL)
 		return -ENOMEM;
@@ -67,11 +73,14 @@
 
 static void __net_exit iptable_security_net_exit(struct net *net)
 {
+	if (!net->ipv4.iptable_security)
+		return;
+
 	ipt_unregister_table(net, net->ipv4.iptable_security, sectbl_ops);
+	net->ipv4.iptable_security = NULL;
 }
 
 static struct pernet_operations iptable_security_net_ops = {
-	.init = iptable_security_net_init,
 	.exit = iptable_security_net_exit,
 };
 
@@ -79,27 +88,29 @@
 {
 	int ret;
 
-	ret = register_pernet_subsys(&iptable_security_net_ops);
-	if (ret < 0)
-		return ret;
+	sectbl_ops = xt_hook_ops_alloc(&security_table, iptable_security_hook);
+	if (IS_ERR(sectbl_ops))
+		return PTR_ERR(sectbl_ops);
 
-	sectbl_ops = xt_hook_link(&security_table, iptable_security_hook);
-	if (IS_ERR(sectbl_ops)) {
-		ret = PTR_ERR(sectbl_ops);
-		goto cleanup_table;
+	ret = register_pernet_subsys(&iptable_security_net_ops);
+	if (ret < 0) {
+		kfree(sectbl_ops);
+		return ret;
 	}
 
-	return ret;
+	ret = iptable_security_table_init(&init_net);
+	if (ret) {
+		unregister_pernet_subsys(&iptable_security_net_ops);
+		kfree(sectbl_ops);
+	}
 
-cleanup_table:
-	unregister_pernet_subsys(&iptable_security_net_ops);
 	return ret;
 }
 
 static void __exit iptable_security_fini(void)
 {
-	xt_hook_unlink(&security_table, sectbl_ops);
 	unregister_pernet_subsys(&iptable_security_net_ops);
+	kfree(sectbl_ops);
 }
 
 module_init(iptable_security_init);
commit	b9e69e127397187b70c813a4397cce7afb5e8cb1	[log] [tgz]
author	Florian Westphal <fw@strlen.de>	Thu Feb 25 10:08:36 2016 +0100
committer	Pablo Neira Ayuso <pablo@netfilter.org>	Wed Mar 02 20:05:24 2016 +0100
tree	1a5a4769ef5ba3087fa15579d15cdd3aa3c3dabd
parent	a67dd266adf42a24df31380e9da78390bb4d65ef [diff] [blame]