netfilter: xt_rateest: Better struct xt_rateest layout

We currently dirty two cache lines in struct xt_rateest, this hurts SMP
performance.

This patch moves lock/bstats/rstats at beginning of structure so that
they share a single cache line.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
diff --git a/include/net/netfilter/xt_rateest.h b/include/net/netfilter/xt_rateest.h
index ddbf37e..b1d780e 100644
--- a/include/net/netfilter/xt_rateest.h
+++ b/include/net/netfilter/xt_rateest.h
@@ -2,13 +2,17 @@
 #define _XT_RATEEST_H
 
 struct xt_rateest {
+	/* keep lock and bstats on same cache line to speedup xt_rateest_tg() */
+	struct gnet_stats_basic_packed	bstats;
+	spinlock_t			lock;
+	/* keep rstats and lock on same cache line to speedup xt_rateest_mt() */
+	struct gnet_stats_rate_est	rstats;
+
+	/* following fields not accessed in hot path */
 	struct hlist_node		list;
 	char				name[IFNAMSIZ];
 	unsigned int			refcnt;
-	spinlock_t			lock;
 	struct gnet_estimator		params;
-	struct gnet_stats_rate_est	rstats;
-	struct gnet_stats_basic_packed	bstats;
 };
 
 extern struct xt_rateest *xt_rateest_lookup(const char *name);