[NET]: NET_CLS_ROUTE : convert ip_rt_acct to per_cpu variables

ip_rt_acct needs 4096 bytes per cpu to perform some accounting.
It is actually allocated as a single huge array [4096*NR_CPUS]
(rounded up to a power of two)

Converting it to a per cpu variable is wanted to :
 - Save space on machines were num_possible_cpus() < NR_CPUS
 - Better NUMA placement (each cpu gets memory on its node)

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 94ef788..a21021b 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2858,12 +2858,10 @@
 #endif
 
 #ifdef CONFIG_NET_CLS_ROUTE
-struct ip_rt_acct *ip_rt_acct;
-
-/* This code sucks.  But you should have seen it before! --RR */
+struct ip_rt_acct *ip_rt_acct __read_mostly;
 
 /* IP route accounting ptr for this logical cpu number. */
-#define IP_RT_ACCT_CPU(i) (ip_rt_acct + i * 256)
+#define IP_RT_ACCT_CPU(cpu) (per_cpu_ptr(ip_rt_acct, cpu))
 
 #ifdef CONFIG_PROC_FS
 static int ip_rt_acct_read(char *buffer, char **start, off_t offset,
@@ -2923,16 +2921,9 @@
 			     (jiffies ^ (jiffies >> 7)));
 
 #ifdef CONFIG_NET_CLS_ROUTE
-	{
-	int order;
-	for (order = 0;
-	     (PAGE_SIZE << order) < 256 * sizeof(struct ip_rt_acct) * NR_CPUS; order++)
-		/* NOTHING */;
-	ip_rt_acct = (struct ip_rt_acct *)__get_free_pages(GFP_KERNEL, order);
+	ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct));
 	if (!ip_rt_acct)
 		panic("IP: failed to allocate ip_rt_acct\n");
-	memset(ip_rt_acct, 0, PAGE_SIZE << order);
-	}
 #endif
 
 	ipv4_dst_ops.kmem_cachep =