cpumask: make for_each_cpu_mask a bit smaller

The for_each_cpu_mask loop is used quite often in the kernel. It
makes use of two functions: first_cpu and next_cpu. This patch
changes for_each_cpu_mask to use only the latter. Because next_cpu
finds the next eligible cpu _after_ the given one, the iteration
variable has to be initialized to -1 and next_cpu has to be
called with this value before the first iteration. An x86_64
defconfig kernel (from sched/latest) is about 2500 bytes smaller
with this patch applied:

   text	   data	    bss	    dec	    hex	filename
6222517	 917952	 749932	7890401	 7865e1	vmlinux.orig
6219922	 917952	 749932	7887806	 785bbe	vmlinux

The same size reduction is seen for defconfig+MAXSMP

   text	   data	    bss	    dec	    hex	filename
6241772	2563968	1492716	10298456	 9d2458	vmlinux.orig
6239211	2563968	1492716	10295895	 9d1a57	vmlinux

Signed-off-by: Alexander van Heukelum <heukelum@fastmail.fm>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index b49472d..80226e7 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -370,10 +370,10 @@
 #define first_cpu(src)		__first_cpu(&(src))
 #define next_cpu(n, src)	__next_cpu((n), &(src))
 #define any_online_cpu(mask) __any_online_cpu(&(mask))
-#define for_each_cpu_mask(cpu, mask)		\
-	for ((cpu) = first_cpu(mask);		\
-		(cpu) < NR_CPUS;		\
-		(cpu) = next_cpu((cpu), (mask)))
+#define for_each_cpu_mask(cpu, mask)			\
+	for ((cpu) = -1;				\
+		(cpu) = next_cpu((cpu), (mask)),	\
+		(cpu) < NR_CPUS; )
 #endif
 
 #if NR_CPUS <= 64
@@ -387,10 +387,10 @@
 int __next_cpu_nr(int n, const cpumask_t *srcp);
 #define next_cpu_nr(n, src)	__next_cpu_nr((n), &(src))
 #define cpus_weight_nr(cpumask)	__cpus_weight(&(cpumask), nr_cpu_ids)
-#define for_each_cpu_mask_nr(cpu, mask)		\
-	for ((cpu) = first_cpu(mask);		\
-		(cpu) < nr_cpu_ids;		\
-		(cpu) = next_cpu_nr((cpu), (mask)))
+#define for_each_cpu_mask_nr(cpu, mask)			\
+	for ((cpu) = -1;				\
+		(cpu) = next_cpu_nr((cpu), (mask)),	\
+		(cpu) < nr_cpu_ids; )
 
 #endif /* NR_CPUS > 64 */