Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf

Pablo Neira Ayuso says:

====================
Netfilter/nf_tables fixes

The following patchset contains nf_tables fixes, they are:

1) Fix wrong transaction handling when the table flags are not
   modified.

2) Fix missing rcu read_lock section in the netlink dump path, which
   is not protected by the nfnl_lock.

3) Set NLM_F_DUMP_INTR in the netlink dump path to indicate
   interferences with updates.

4) Fix 64 bits chain counters when they are retrieved from a 32 bits
   arch, from Eric Dumazet.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 713b0b8..c4d8619 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -6,6 +6,7 @@
 #include <linux/netfilter/nfnetlink.h>
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter/nf_tables.h>
+#include <linux/u64_stats_sync.h>
 #include <net/netlink.h>
 
 #define NFT_JUMP_STACK_SIZE	16
@@ -528,8 +529,9 @@
 };
 
 struct nft_stats {
-	u64 bytes;
-	u64 pkts;
+	u64			bytes;
+	u64			pkts;
+	struct u64_stats_sync	syncp;
 };
 
 #define NFT_HOOK_OPS_MAX		2
diff --git a/include/net/netns/nftables.h b/include/net/netns/nftables.h
index 26a394c..eee608b 100644
--- a/include/net/netns/nftables.h
+++ b/include/net/netns/nftables.h
@@ -13,8 +13,8 @@
 	struct nft_af_info	*inet;
 	struct nft_af_info	*arp;
 	struct nft_af_info	*bridge;
+	unsigned int		base_seq;
 	u8			gencursor;
-	u8			genctr;
 };
 
 #endif
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index ab4566c..8746ff9 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -35,7 +35,7 @@
 {
 	INIT_LIST_HEAD(&afi->tables);
 	nfnl_lock(NFNL_SUBSYS_NFTABLES);
-	list_add_tail(&afi->list, &net->nft.af_info);
+	list_add_tail_rcu(&afi->list, &net->nft.af_info);
 	nfnl_unlock(NFNL_SUBSYS_NFTABLES);
 	return 0;
 }
@@ -51,7 +51,7 @@
 void nft_unregister_afinfo(struct nft_af_info *afi)
 {
 	nfnl_lock(NFNL_SUBSYS_NFTABLES);
-	list_del(&afi->list);
+	list_del_rcu(&afi->list);
 	nfnl_unlock(NFNL_SUBSYS_NFTABLES);
 }
 EXPORT_SYMBOL_GPL(nft_unregister_afinfo);
@@ -277,11 +277,14 @@
 	struct net *net = sock_net(skb->sk);
 	int family = nfmsg->nfgen_family;
 
-	list_for_each_entry(afi, &net->nft.af_info, list) {
+	rcu_read_lock();
+	cb->seq = net->nft.base_seq;
+
+	list_for_each_entry_rcu(afi, &net->nft.af_info, list) {
 		if (family != NFPROTO_UNSPEC && family != afi->family)
 			continue;
 
-		list_for_each_entry(table, &afi->tables, list) {
+		list_for_each_entry_rcu(table, &afi->tables, list) {
 			if (idx < s_idx)
 				goto cont;
 			if (idx > s_idx)
@@ -294,11 +297,14 @@
 						      NLM_F_MULTI,
 						      afi->family, table) < 0)
 				goto done;
+
+			nl_dump_check_consistent(cb, nlmsg_hdr(skb));
 cont:
 			idx++;
 		}
 	}
 done:
+	rcu_read_unlock();
 	cb->args[0] = idx;
 	return skb->len;
 }
@@ -407,6 +413,9 @@
 	if (flags & ~NFT_TABLE_F_DORMANT)
 		return -EINVAL;
 
+	if (flags == ctx->table->flags)
+		return 0;
+
 	trans = nft_trans_alloc(ctx, NFT_MSG_NEWTABLE,
 				sizeof(struct nft_trans_table));
 	if (trans == NULL)
@@ -514,7 +523,7 @@
 		module_put(afi->owner);
 		return err;
 	}
-	list_add_tail(&table->list, &afi->tables);
+	list_add_tail_rcu(&table->list, &afi->tables);
 	return 0;
 }
 
@@ -546,7 +555,7 @@
 	if (err < 0)
 		return err;
 
-	list_del(&table->list);
+	list_del_rcu(&table->list);
 	return 0;
 }
 
@@ -635,13 +644,20 @@
 {
 	struct nft_stats *cpu_stats, total;
 	struct nlattr *nest;
+	unsigned int seq;
+	u64 pkts, bytes;
 	int cpu;
 
 	memset(&total, 0, sizeof(total));
 	for_each_possible_cpu(cpu) {
 		cpu_stats = per_cpu_ptr(stats, cpu);
-		total.pkts += cpu_stats->pkts;
-		total.bytes += cpu_stats->bytes;
+		do {
+			seq = u64_stats_fetch_begin_irq(&cpu_stats->syncp);
+			pkts = cpu_stats->pkts;
+			bytes = cpu_stats->bytes;
+		} while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, seq));
+		total.pkts += pkts;
+		total.bytes += bytes;
 	}
 	nest = nla_nest_start(skb, NFTA_CHAIN_COUNTERS);
 	if (nest == NULL)
@@ -761,12 +777,15 @@
 	struct net *net = sock_net(skb->sk);
 	int family = nfmsg->nfgen_family;
 
-	list_for_each_entry(afi, &net->nft.af_info, list) {
+	rcu_read_lock();
+	cb->seq = net->nft.base_seq;
+
+	list_for_each_entry_rcu(afi, &net->nft.af_info, list) {
 		if (family != NFPROTO_UNSPEC && family != afi->family)
 			continue;
 
-		list_for_each_entry(table, &afi->tables, list) {
-			list_for_each_entry(chain, &table->chains, list) {
+		list_for_each_entry_rcu(table, &afi->tables, list) {
+			list_for_each_entry_rcu(chain, &table->chains, list) {
 				if (idx < s_idx)
 					goto cont;
 				if (idx > s_idx)
@@ -778,17 +797,19 @@
 							      NLM_F_MULTI,
 							      afi->family, table, chain) < 0)
 					goto done;
+
+				nl_dump_check_consistent(cb, nlmsg_hdr(skb));
 cont:
 				idx++;
 			}
 		}
 	}
 done:
+	rcu_read_unlock();
 	cb->args[0] = idx;
 	return skb->len;
 }
 
-
 static int nf_tables_getchain(struct sock *nlsk, struct sk_buff *skb,
 			      const struct nlmsghdr *nlh,
 			      const struct nlattr * const nla[])
@@ -861,7 +882,7 @@
 	if (!tb[NFTA_COUNTER_BYTES] || !tb[NFTA_COUNTER_PACKETS])
 		return ERR_PTR(-EINVAL);
 
-	newstats = alloc_percpu(struct nft_stats);
+	newstats = netdev_alloc_pcpu_stats(struct nft_stats);
 	if (newstats == NULL)
 		return ERR_PTR(-ENOMEM);
 
@@ -1077,7 +1098,7 @@
 			}
 			basechain->stats = stats;
 		} else {
-			stats = alloc_percpu(struct nft_stats);
+			stats = netdev_alloc_pcpu_stats(struct nft_stats);
 			if (IS_ERR(stats)) {
 				module_put(type->owner);
 				kfree(basechain);
@@ -1130,7 +1151,7 @@
 		goto err2;
 
 	table->use++;
-	list_add_tail(&chain->list, &table->chains);
+	list_add_tail_rcu(&chain->list, &table->chains);
 	return 0;
 err2:
 	if (!(table->flags & NFT_TABLE_F_DORMANT) &&
@@ -1180,7 +1201,7 @@
 		return err;
 
 	table->use--;
-	list_del(&chain->list);
+	list_del_rcu(&chain->list);
 	return 0;
 }
 
@@ -1199,9 +1220,9 @@
 {
 	nfnl_lock(NFNL_SUBSYS_NFTABLES);
 	if (type->family == NFPROTO_UNSPEC)
-		list_add_tail(&type->list, &nf_tables_expressions);
+		list_add_tail_rcu(&type->list, &nf_tables_expressions);
 	else
-		list_add(&type->list, &nf_tables_expressions);
+		list_add_rcu(&type->list, &nf_tables_expressions);
 	nfnl_unlock(NFNL_SUBSYS_NFTABLES);
 	return 0;
 }
@@ -1216,7 +1237,7 @@
 void nft_unregister_expr(struct nft_expr_type *type)
 {
 	nfnl_lock(NFNL_SUBSYS_NFTABLES);
-	list_del(&type->list);
+	list_del_rcu(&type->list);
 	nfnl_unlock(NFNL_SUBSYS_NFTABLES);
 }
 EXPORT_SYMBOL_GPL(nft_unregister_expr);
@@ -1549,16 +1570,17 @@
 	unsigned int idx = 0, s_idx = cb->args[0];
 	struct net *net = sock_net(skb->sk);
 	int family = nfmsg->nfgen_family;
-	u8 genctr = ACCESS_ONCE(net->nft.genctr);
-	u8 gencursor = ACCESS_ONCE(net->nft.gencursor);
 
-	list_for_each_entry(afi, &net->nft.af_info, list) {
+	rcu_read_lock();
+	cb->seq = net->nft.base_seq;
+
+	list_for_each_entry_rcu(afi, &net->nft.af_info, list) {
 		if (family != NFPROTO_UNSPEC && family != afi->family)
 			continue;
 
-		list_for_each_entry(table, &afi->tables, list) {
-			list_for_each_entry(chain, &table->chains, list) {
-				list_for_each_entry(rule, &chain->rules, list) {
+		list_for_each_entry_rcu(table, &afi->tables, list) {
+			list_for_each_entry_rcu(chain, &table->chains, list) {
+				list_for_each_entry_rcu(rule, &chain->rules, list) {
 					if (!nft_rule_is_active(net, rule))
 						goto cont;
 					if (idx < s_idx)
@@ -1572,6 +1594,8 @@
 								      NLM_F_MULTI | NLM_F_APPEND,
 								      afi->family, table, chain, rule) < 0)
 						goto done;
+
+					nl_dump_check_consistent(cb, nlmsg_hdr(skb));
 cont:
 					idx++;
 				}
@@ -1579,9 +1603,7 @@
 		}
 	}
 done:
-	/* Invalidate this dump, a transition to the new generation happened */
-	if (gencursor != net->nft.gencursor || genctr != net->nft.genctr)
-		return -EBUSY;
+	rcu_read_unlock();
 
 	cb->args[0] = idx;
 	return skb->len;
@@ -1932,7 +1954,7 @@
 int nft_register_set(struct nft_set_ops *ops)
 {
 	nfnl_lock(NFNL_SUBSYS_NFTABLES);
-	list_add_tail(&ops->list, &nf_tables_set_ops);
+	list_add_tail_rcu(&ops->list, &nf_tables_set_ops);
 	nfnl_unlock(NFNL_SUBSYS_NFTABLES);
 	return 0;
 }
@@ -1941,7 +1963,7 @@
 void nft_unregister_set(struct nft_set_ops *ops)
 {
 	nfnl_lock(NFNL_SUBSYS_NFTABLES);
-	list_del(&ops->list);
+	list_del_rcu(&ops->list);
 	nfnl_unlock(NFNL_SUBSYS_NFTABLES);
 }
 EXPORT_SYMBOL_GPL(nft_unregister_set);
@@ -2234,7 +2256,10 @@
 	if (cb->args[1])
 		return skb->len;
 
-	list_for_each_entry(set, &ctx->table->sets, list) {
+	rcu_read_lock();
+	cb->seq = ctx->net->nft.base_seq;
+
+	list_for_each_entry_rcu(set, &ctx->table->sets, list) {
 		if (idx < s_idx)
 			goto cont;
 		if (nf_tables_fill_set(skb, ctx, set, NFT_MSG_NEWSET,
@@ -2242,11 +2267,13 @@
 			cb->args[0] = idx;
 			goto done;
 		}
+		nl_dump_check_consistent(cb, nlmsg_hdr(skb));
 cont:
 		idx++;
 	}
 	cb->args[1] = 1;
 done:
+	rcu_read_unlock();
 	return skb->len;
 }
 
@@ -2260,7 +2287,10 @@
 	if (cb->args[1])
 		return skb->len;
 
-	list_for_each_entry(table, &ctx->afi->tables, list) {
+	rcu_read_lock();
+	cb->seq = ctx->net->nft.base_seq;
+
+	list_for_each_entry_rcu(table, &ctx->afi->tables, list) {
 		if (cur_table) {
 			if (cur_table != table)
 				continue;
@@ -2269,7 +2299,7 @@
 		}
 		ctx->table = table;
 		idx = 0;
-		list_for_each_entry(set, &ctx->table->sets, list) {
+		list_for_each_entry_rcu(set, &ctx->table->sets, list) {
 			if (idx < s_idx)
 				goto cont;
 			if (nf_tables_fill_set(skb, ctx, set, NFT_MSG_NEWSET,
@@ -2278,12 +2308,14 @@
 				cb->args[2] = (unsigned long) table;
 				goto done;
 			}
+			nl_dump_check_consistent(cb, nlmsg_hdr(skb));
 cont:
 			idx++;
 		}
 	}
 	cb->args[1] = 1;
 done:
+	rcu_read_unlock();
 	return skb->len;
 }
 
@@ -2300,7 +2332,10 @@
 	if (cb->args[1])
 		return skb->len;
 
-	list_for_each_entry(afi, &net->nft.af_info, list) {
+	rcu_read_lock();
+	cb->seq = net->nft.base_seq;
+
+	list_for_each_entry_rcu(afi, &net->nft.af_info, list) {
 		if (cur_family) {
 			if (afi->family != cur_family)
 				continue;
@@ -2308,7 +2343,7 @@
 			cur_family = 0;
 		}
 
-		list_for_each_entry(table, &afi->tables, list) {
+		list_for_each_entry_rcu(table, &afi->tables, list) {
 			if (cur_table) {
 				if (cur_table != table)
 					continue;
@@ -2319,7 +2354,7 @@
 			ctx->table = table;
 			ctx->afi = afi;
 			idx = 0;
-			list_for_each_entry(set, &ctx->table->sets, list) {
+			list_for_each_entry_rcu(set, &ctx->table->sets, list) {
 				if (idx < s_idx)
 					goto cont;
 				if (nf_tables_fill_set(skb, ctx, set,
@@ -2330,6 +2365,7 @@
 					cb->args[3] = afi->family;
 					goto done;
 				}
+				nl_dump_check_consistent(cb, nlmsg_hdr(skb));
 cont:
 				idx++;
 			}
@@ -2339,6 +2375,7 @@
 	}
 	cb->args[1] = 1;
 done:
+	rcu_read_unlock();
 	return skb->len;
 }
 
@@ -2597,7 +2634,7 @@
 	if (err < 0)
 		goto err2;
 
-	list_add_tail(&set->list, &table->sets);
+	list_add_tail_rcu(&set->list, &table->sets);
 	table->use++;
 	return 0;
 
@@ -2617,7 +2654,7 @@
 
 static void nf_tables_set_destroy(const struct nft_ctx *ctx, struct nft_set *set)
 {
-	list_del(&set->list);
+	list_del_rcu(&set->list);
 	nf_tables_set_notify(ctx, set, NFT_MSG_DELSET, GFP_ATOMIC);
 	nft_set_destroy(set);
 }
@@ -2652,7 +2689,7 @@
 	if (err < 0)
 		return err;
 
-	list_del(&set->list);
+	list_del_rcu(&set->list);
 	ctx.table->use--;
 	return 0;
 }
@@ -2704,14 +2741,14 @@
 	}
 bind:
 	binding->chain = ctx->chain;
-	list_add_tail(&binding->list, &set->bindings);
+	list_add_tail_rcu(&binding->list, &set->bindings);
 	return 0;
 }
 
 void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
 			  struct nft_set_binding *binding)
 {
-	list_del(&binding->list);
+	list_del_rcu(&binding->list);
 
 	if (list_empty(&set->bindings) && set->flags & NFT_SET_ANONYMOUS &&
 	    !(set->flags & NFT_SET_INACTIVE))
@@ -3346,7 +3383,7 @@
 	struct nft_set *set;
 
 	/* Bump generation counter, invalidate any dump in progress */
-	net->nft.genctr++;
+	while (++net->nft.base_seq == 0);
 
 	/* A new generation has just started */
 	net->nft.gencursor = gencursor_next(net);
@@ -3491,12 +3528,12 @@
 				}
 				nft_trans_destroy(trans);
 			} else {
-				list_del(&trans->ctx.table->list);
+				list_del_rcu(&trans->ctx.table->list);
 			}
 			break;
 		case NFT_MSG_DELTABLE:
-			list_add_tail(&trans->ctx.table->list,
-				      &trans->ctx.afi->tables);
+			list_add_tail_rcu(&trans->ctx.table->list,
+					  &trans->ctx.afi->tables);
 			nft_trans_destroy(trans);
 			break;
 		case NFT_MSG_NEWCHAIN:
@@ -3507,7 +3544,7 @@
 				nft_trans_destroy(trans);
 			} else {
 				trans->ctx.table->use--;
-				list_del(&trans->ctx.chain->list);
+				list_del_rcu(&trans->ctx.chain->list);
 				if (!(trans->ctx.table->flags & NFT_TABLE_F_DORMANT) &&
 				    trans->ctx.chain->flags & NFT_BASE_CHAIN) {
 					nf_unregister_hooks(nft_base_chain(trans->ctx.chain)->ops,
@@ -3517,8 +3554,8 @@
 			break;
 		case NFT_MSG_DELCHAIN:
 			trans->ctx.table->use++;
-			list_add_tail(&trans->ctx.chain->list,
-				      &trans->ctx.table->chains);
+			list_add_tail_rcu(&trans->ctx.chain->list,
+					  &trans->ctx.table->chains);
 			nft_trans_destroy(trans);
 			break;
 		case NFT_MSG_NEWRULE:
@@ -3532,12 +3569,12 @@
 			break;
 		case NFT_MSG_NEWSET:
 			trans->ctx.table->use--;
-			list_del(&nft_trans_set(trans)->list);
+			list_del_rcu(&nft_trans_set(trans)->list);
 			break;
 		case NFT_MSG_DELSET:
 			trans->ctx.table->use++;
-			list_add_tail(&nft_trans_set(trans)->list,
-				      &trans->ctx.table->sets);
+			list_add_tail_rcu(&nft_trans_set(trans)->list,
+					  &trans->ctx.table->sets);
 			nft_trans_destroy(trans);
 			break;
 		case NFT_MSG_NEWSETELEM:
@@ -3951,6 +3988,7 @@
 {
 	INIT_LIST_HEAD(&net->nft.af_info);
 	INIT_LIST_HEAD(&net->nft.commit_list);
+	net->nft.base_seq = 1;
 	return 0;
 }
 
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index 345acfb..3b90eb2 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -109,7 +109,7 @@
 	struct nft_data data[NFT_REG_MAX + 1];
 	unsigned int stackptr = 0;
 	struct nft_jumpstack jumpstack[NFT_JUMP_STACK_SIZE];
-	struct nft_stats __percpu *stats;
+	struct nft_stats *stats;
 	int rulenum;
 	/*
 	 * Cache cursor to avoid problems in case that the cursor is updated
@@ -205,9 +205,11 @@
 		nft_trace_packet(pkt, basechain, -1, NFT_TRACE_POLICY);
 
 	rcu_read_lock_bh();
-	stats = rcu_dereference(nft_base_chain(basechain)->stats);
-	__this_cpu_inc(stats->pkts);
-	__this_cpu_add(stats->bytes, pkt->skb->len);
+	stats = this_cpu_ptr(rcu_dereference(nft_base_chain(basechain)->stats));
+	u64_stats_update_begin(&stats->syncp);
+	stats->pkts++;
+	stats->bytes += pkt->skb->len;
+	u64_stats_update_end(&stats->syncp);
 	rcu_read_unlock_bh();
 
 	return nft_base_chain(basechain)->policy;