MIPS: Collect FPU emulator statistics per-CPU.

On SMP systems, the collection of statistics can cause cache line
bouncing in the lines associated with the counters.  Also there are
races incrementing the counters on multiple CPUs.

To fix both problems, we collect the statistics in per-CPU variables,
and add them up in the debugfs read operation.

As a test I ran the LTP float_bessel test on a 12 CPU Octeon system.

Without CONFIG_DEBUG_FS :             2602 seconds.
With CONFIG_DEBUG_FS:                 2640 seconds.
With non-cpu-local atomic statistics: 14569 seconds.

Signed-off-by: David Daney <ddaney@caviumnetworks.com>
Cc: linux-mips@linux-mips.org
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c
index 454b539..8f2f8e9 100644
--- a/arch/mips/math-emu/cp1emu.c
+++ b/arch/mips/math-emu/cp1emu.c
@@ -35,6 +35,7 @@
  * better performance by compiling with -msoft-float!
  */
 #include <linux/sched.h>
+#include <linux/module.h>
 #include <linux/debugfs.h>
 
 #include <asm/inst.h>
@@ -68,7 +69,9 @@
 
 /* Further private data for which no space exists in mips_fpu_struct */
 
-struct mips_fpu_emulator_stats fpuemustats;
+#ifdef CONFIG_DEBUG_FS
+DEFINE_PER_CPU(struct mips_fpu_emulator_stats, fpuemustats);
+#endif
 
 /* Control registers */
 
@@ -209,7 +212,7 @@
 	unsigned int cond;
 
 	if (get_user(ir, (mips_instruction __user *) xcp->cp0_epc)) {
-		fpuemustats.errors++;
+		MIPS_FPU_EMU_INC_STATS(errors);
 		return SIGBUS;
 	}
 
@@ -240,7 +243,7 @@
 			return SIGILL;
 		}
 		if (get_user(ir, (mips_instruction __user *) emulpc)) {
-			fpuemustats.errors++;
+			MIPS_FPU_EMU_INC_STATS(errors);
 			return SIGBUS;
 		}
 		/* __compute_return_epc() will have updated cp0_epc */
@@ -253,16 +256,16 @@
 	}
 
       emul:
-	fpuemustats.emulated++;
+	MIPS_FPU_EMU_INC_STATS(emulated);
 	switch (MIPSInst_OPCODE(ir)) {
 	case ldc1_op:{
 		u64 __user *va = (u64 __user *) (xcp->regs[MIPSInst_RS(ir)] +
 			MIPSInst_SIMM(ir));
 		u64 val;
 
-		fpuemustats.loads++;
+		MIPS_FPU_EMU_INC_STATS(loads);
 		if (get_user(val, va)) {
-			fpuemustats.errors++;
+			MIPS_FPU_EMU_INC_STATS(errors);
 			return SIGBUS;
 		}
 		DITOREG(val, MIPSInst_RT(ir));
@@ -274,10 +277,10 @@
 			MIPSInst_SIMM(ir));
 		u64 val;
 
-		fpuemustats.stores++;
+		MIPS_FPU_EMU_INC_STATS(stores);
 		DIFROMREG(val, MIPSInst_RT(ir));
 		if (put_user(val, va)) {
-			fpuemustats.errors++;
+			MIPS_FPU_EMU_INC_STATS(errors);
 			return SIGBUS;
 		}
 		break;
@@ -288,9 +291,9 @@
 			MIPSInst_SIMM(ir));
 		u32 val;
 
-		fpuemustats.loads++;
+		MIPS_FPU_EMU_INC_STATS(loads);
 		if (get_user(val, va)) {
-			fpuemustats.errors++;
+			MIPS_FPU_EMU_INC_STATS(errors);
 			return SIGBUS;
 		}
 		SITOREG(val, MIPSInst_RT(ir));
@@ -302,10 +305,10 @@
 			MIPSInst_SIMM(ir));
 		u32 val;
 
-		fpuemustats.stores++;
+		MIPS_FPU_EMU_INC_STATS(stores);
 		SIFROMREG(val, MIPSInst_RT(ir));
 		if (put_user(val, va)) {
-			fpuemustats.errors++;
+			MIPS_FPU_EMU_INC_STATS(errors);
 			return SIGBUS;
 		}
 		break;
@@ -429,7 +432,7 @@
 
 				if (get_user(ir,
 				    (mips_instruction __user *) xcp->cp0_epc)) {
-					fpuemustats.errors++;
+					MIPS_FPU_EMU_INC_STATS(errors);
 					return SIGBUS;
 				}
 
@@ -595,7 +598,7 @@
 {
 	unsigned rcsr = 0;	/* resulting csr */
 
-	fpuemustats.cp1xops++;
+	MIPS_FPU_EMU_INC_STATS(cp1xops);
 
 	switch (MIPSInst_FMA_FFMT(ir)) {
 	case s_fmt:{		/* 0 */
@@ -610,9 +613,9 @@
 			va = (void __user *) (xcp->regs[MIPSInst_FR(ir)] +
 				xcp->regs[MIPSInst_FT(ir)]);
 
-			fpuemustats.loads++;
+			MIPS_FPU_EMU_INC_STATS(loads);
 			if (get_user(val, va)) {
-				fpuemustats.errors++;
+				MIPS_FPU_EMU_INC_STATS(errors);
 				return SIGBUS;
 			}
 			SITOREG(val, MIPSInst_FD(ir));
@@ -622,11 +625,11 @@
 			va = (void __user *) (xcp->regs[MIPSInst_FR(ir)] +
 				xcp->regs[MIPSInst_FT(ir)]);
 
-			fpuemustats.stores++;
+			MIPS_FPU_EMU_INC_STATS(stores);
 
 			SIFROMREG(val, MIPSInst_FS(ir));
 			if (put_user(val, va)) {
-				fpuemustats.errors++;
+				MIPS_FPU_EMU_INC_STATS(errors);
 				return SIGBUS;
 			}
 			break;
@@ -687,9 +690,9 @@
 			va = (void __user *) (xcp->regs[MIPSInst_FR(ir)] +
 				xcp->regs[MIPSInst_FT(ir)]);
 
-			fpuemustats.loads++;
+			MIPS_FPU_EMU_INC_STATS(loads);
 			if (get_user(val, va)) {
-				fpuemustats.errors++;
+				MIPS_FPU_EMU_INC_STATS(errors);
 				return SIGBUS;
 			}
 			DITOREG(val, MIPSInst_FD(ir));
@@ -699,10 +702,10 @@
 			va = (void __user *) (xcp->regs[MIPSInst_FR(ir)] +
 				xcp->regs[MIPSInst_FT(ir)]);
 
-			fpuemustats.stores++;
+			MIPS_FPU_EMU_INC_STATS(stores);
 			DIFROMREG(val, MIPSInst_FS(ir));
 			if (put_user(val, va)) {
-				fpuemustats.errors++;
+				MIPS_FPU_EMU_INC_STATS(errors);
 				return SIGBUS;
 			}
 			break;
@@ -769,7 +772,7 @@
 #endif
 	} rv;			/* resulting value */
 
-	fpuemustats.cp1ops++;
+	MIPS_FPU_EMU_INC_STATS(cp1ops);
 	switch (rfmt = (MIPSInst_FFMT(ir) & 0xf)) {
 	case s_fmt:{		/* 0 */
 		union {
@@ -1240,7 +1243,7 @@
 		prevepc = xcp->cp0_epc;
 
 		if (get_user(insn, (mips_instruction __user *) xcp->cp0_epc)) {
-			fpuemustats.errors++;
+			MIPS_FPU_EMU_INC_STATS(errors);
 			return SIGBUS;
 		}
 		if (insn == 0)
@@ -1276,33 +1279,50 @@
 }
 
 #ifdef CONFIG_DEBUG_FS
+
+static int fpuemu_stat_get(void *data, u64 *val)
+{
+	int cpu;
+	unsigned long sum = 0;
+	for_each_online_cpu(cpu) {
+		struct mips_fpu_emulator_stats *ps;
+		local_t *pv;
+		ps = &per_cpu(fpuemustats, cpu);
+		pv = (void *)ps + (unsigned long)data;
+		sum += local_read(pv);
+	}
+	*val = sum;
+	return 0;
+}
+DEFINE_SIMPLE_ATTRIBUTE(fops_fpuemu_stat, fpuemu_stat_get, NULL, "%llu\n");
+
 extern struct dentry *mips_debugfs_dir;
 static int __init debugfs_fpuemu(void)
 {
 	struct dentry *d, *dir;
-	int i;
-	static struct {
-		const char *name;
-		unsigned int *v;
-	} vars[] __initdata = {
-		{ "emulated", &fpuemustats.emulated },
-		{ "loads",    &fpuemustats.loads },
-		{ "stores",   &fpuemustats.stores },
-		{ "cp1ops",   &fpuemustats.cp1ops },
-		{ "cp1xops",  &fpuemustats.cp1xops },
-		{ "errors",   &fpuemustats.errors },
-	};
 
 	if (!mips_debugfs_dir)
 		return -ENODEV;
 	dir = debugfs_create_dir("fpuemustats", mips_debugfs_dir);
 	if (!dir)
 		return -ENOMEM;
-	for (i = 0; i < ARRAY_SIZE(vars); i++) {
-		d = debugfs_create_u32(vars[i].name, S_IRUGO, dir, vars[i].v);
-		if (!d)
-			return -ENOMEM;
-	}
+
+#define FPU_STAT_CREATE(M)						\
+	do {								\
+		d = debugfs_create_file(#M , S_IRUGO, dir,		\
+			(void *)offsetof(struct mips_fpu_emulator_stats, M), \
+			&fops_fpuemu_stat);				\
+		if (!d)							\
+			return -ENOMEM;					\
+	} while (0)
+
+	FPU_STAT_CREATE(emulated);
+	FPU_STAT_CREATE(loads);
+	FPU_STAT_CREATE(stores);
+	FPU_STAT_CREATE(cp1ops);
+	FPU_STAT_CREATE(cp1xops);
+	FPU_STAT_CREATE(errors);
+
 	return 0;
 }
 __initcall(debugfs_fpuemu);