[PATCH] powerpc: G4+ oprofile support

This patch adds oprofile support for the 7450 and all its multitudinous
derivatives.

* Added 7450 (and derivatives) support for oprofile
* Changed e500 cputable to have oprofile model and cpu_type fields
* Added support for classic 32-bit performance monitor interrupt
* Cleaned up common powerpc oprofile code to be as common as possible
* Cleaned up oprofile_impl.h to reflect 32 bit classic code
* Added 32-bit MMCRx bitfield definitions and SPR numbers

Signed-off-by: Andy Fleming <afleming@freescale.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index 1d85ced..f7f2a83 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -545,7 +545,11 @@
 		.icache_bsize		= 32,
 		.dcache_bsize		= 32,
 		.num_pmcs		= 6,
-		.cpu_setup		= __setup_cpu_745x
+		.cpu_setup		= __setup_cpu_745x,
+#ifdef CONFIG_OPROFILE
+		.oprofile_cpu_type      = "ppc/7450",
+		.oprofile_model         = &op_model_7450,
+#endif
 	},
 	{	/* 7450 2.1 */
 		.pvr_mask		= 0xffffffff,
@@ -556,7 +560,11 @@
 		.icache_bsize		= 32,
 		.dcache_bsize		= 32,
 		.num_pmcs		= 6,
-		.cpu_setup		= __setup_cpu_745x
+		.cpu_setup		= __setup_cpu_745x,
+#ifdef CONFIG_OPROFILE
+		.oprofile_cpu_type      = "ppc/7450",
+		.oprofile_model         = &op_model_7450,
+#endif
 	},
 	{	/* 7450 2.3 and newer */
 		.pvr_mask		= 0xffff0000,
@@ -567,7 +575,11 @@
 		.icache_bsize		= 32,
 		.dcache_bsize		= 32,
 		.num_pmcs		= 6,
-		.cpu_setup		= __setup_cpu_745x
+		.cpu_setup		= __setup_cpu_745x,
+#ifdef CONFIG_OPROFILE
+		.oprofile_cpu_type      = "ppc/7450",
+		.oprofile_model         = &op_model_7450,
+#endif
 	},
 	{	/* 7455 rev 1.x */
 		.pvr_mask		= 0xffffff00,
@@ -578,7 +590,11 @@
 		.icache_bsize		= 32,
 		.dcache_bsize		= 32,
 		.num_pmcs		= 6,
-		.cpu_setup		= __setup_cpu_745x
+		.cpu_setup		= __setup_cpu_745x,
+#ifdef CONFIG_OPROFILE
+		.oprofile_cpu_type      = "ppc/7450",
+		.oprofile_model         = &op_model_7450,
+#endif
 	},
 	{	/* 7455 rev 2.0 */
 		.pvr_mask		= 0xffffffff,
@@ -589,7 +605,11 @@
 		.icache_bsize		= 32,
 		.dcache_bsize		= 32,
 		.num_pmcs		= 6,
-		.cpu_setup		= __setup_cpu_745x
+		.cpu_setup		= __setup_cpu_745x,
+#ifdef CONFIG_OPROFILE
+		.oprofile_cpu_type      = "ppc/7450",
+		.oprofile_model         = &op_model_7450,
+#endif
 	},
 	{	/* 7455 others */
 		.pvr_mask		= 0xffff0000,
@@ -600,7 +620,11 @@
 		.icache_bsize		= 32,
 		.dcache_bsize		= 32,
 		.num_pmcs		= 6,
-		.cpu_setup		= __setup_cpu_745x
+		.cpu_setup		= __setup_cpu_745x,
+#ifdef CONFIG_OPROFILE
+		.oprofile_cpu_type      = "ppc/7450",
+		.oprofile_model         = &op_model_7450,
+#endif
 	},
 	{	/* 7447/7457 Rev 1.0 */
 		.pvr_mask		= 0xffffffff,
@@ -611,7 +635,11 @@
 		.icache_bsize		= 32,
 		.dcache_bsize		= 32,
 		.num_pmcs		= 6,
-		.cpu_setup		= __setup_cpu_745x
+		.cpu_setup		= __setup_cpu_745x,
+#ifdef CONFIG_OPROFILE
+		.oprofile_cpu_type      = "ppc/7450",
+		.oprofile_model         = &op_model_7450,
+#endif
 	},
 	{	/* 7447/7457 Rev 1.1 */
 		.pvr_mask		= 0xffffffff,
@@ -622,7 +650,11 @@
 		.icache_bsize		= 32,
 		.dcache_bsize		= 32,
 		.num_pmcs		= 6,
-		.cpu_setup		= __setup_cpu_745x
+		.cpu_setup		= __setup_cpu_745x,
+#ifdef CONFIG_OPROFILE
+		.oprofile_cpu_type      = "ppc/7450",
+		.oprofile_model         = &op_model_7450,
+#endif
 	},
 	{	/* 7447/7457 Rev 1.2 and later */
 		.pvr_mask		= 0xffff0000,
@@ -633,7 +665,11 @@
 		.icache_bsize		= 32,
 		.dcache_bsize		= 32,
 		.num_pmcs		= 6,
-		.cpu_setup		= __setup_cpu_745x
+		.cpu_setup		= __setup_cpu_745x,
+#ifdef CONFIG_OPROFILE
+		.oprofile_cpu_type      = "ppc/7450",
+		.oprofile_model         = &op_model_7450,
+#endif
 	},
 	{	/* 7447A */
 		.pvr_mask		= 0xffff0000,
@@ -644,7 +680,11 @@
 		.icache_bsize		= 32,
 		.dcache_bsize		= 32,
 		.num_pmcs		= 6,
-		.cpu_setup		= __setup_cpu_745x
+		.cpu_setup		= __setup_cpu_745x,
+#ifdef CONFIG_OPROFILE
+		.oprofile_cpu_type      = "ppc/7450",
+		.oprofile_model         = &op_model_7450,
+#endif
 	},
 	{	/* 7448 */
 		.pvr_mask		= 0xffff0000,
@@ -655,7 +695,11 @@
 		.icache_bsize		= 32,
 		.dcache_bsize		= 32,
 		.num_pmcs		= 6,
-		.cpu_setup		= __setup_cpu_745x
+		.cpu_setup		= __setup_cpu_745x,
+#ifdef CONFIG_OPROFILE
+		.oprofile_cpu_type      = "ppc/7450",
+		.oprofile_model         = &op_model_7450,
+#endif
 	},
 	{	/* 82xx (8240, 8245, 8260 are all 603e cores) */
 		.pvr_mask		= 0x7fff0000,
@@ -979,6 +1023,10 @@
 		.icache_bsize		= 32,
 		.dcache_bsize		= 32,
 		.num_pmcs		= 4,
+#ifdef CONFIG_OPROFILE
+		.oprofile_cpu_type	= "ppc/e500",
+		.oprofile_model		= &op_model_fsl_booke,
+#endif
 	},
 	{	/* e500v2 */
 		.pvr_mask		= 0xffff0000,
@@ -992,6 +1040,10 @@
 		.icache_bsize		= 32,
 		.dcache_bsize		= 32,
 		.num_pmcs		= 4,
+#ifdef CONFIG_OPROFILE
+		.oprofile_cpu_type	= "ppc/e500",
+		.oprofile_model		= &op_model_fsl_booke,
+#endif
 	},
 #endif
 #if !CLASSIC_PPC
diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S
index 6359e36..bf37ef2 100644
--- a/arch/powerpc/kernel/head_32.S
+++ b/arch/powerpc/kernel/head_32.S
@@ -466,16 +466,11 @@
  * by executing an altivec instruction.
  */
 	. = 0xf00
-	b	Trap_0f
+	b	PerformanceMonitor
 
 	. = 0xf20
 	b	AltiVecUnavailable
 
-Trap_0f:
-	EXCEPTION_PROLOG
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-	EXC_XFER_EE(0xf00, unknown_exception)
-
 /*
  * Handle TLB miss for instruction on 603/603e.
  * Note: we get an alternate set of r0 - r3 to use automatically.
@@ -719,6 +714,11 @@
 #endif /* CONFIG_ALTIVEC */
 	EXC_XFER_EE_LITE(0xf20, altivec_unavailable_exception)
 
+PerformanceMonitor:
+	EXCEPTION_PROLOG
+	addi	r3,r1,STACK_FRAME_OVERHEAD
+	EXC_XFER_STD(0xf00, performance_monitor_exception)
+
 #ifdef CONFIG_ALTIVEC
 /* Note that the AltiVec support is closely modeled after the FP
  * support.  Changes to one are likely to be applicable to the
diff --git a/arch/powerpc/kernel/pmc.c b/arch/powerpc/kernel/pmc.c
index 2d333cc..e6fb194 100644
--- a/arch/powerpc/kernel/pmc.c
+++ b/arch/powerpc/kernel/pmc.c
@@ -43,8 +43,13 @@
 	mtspr(SPRN_MMCR0, mmcr0);
 }
 #else
+/* Ensure exceptions are disabled */
 static void dummy_perf(struct pt_regs *regs)
 {
+	unsigned int mmcr0 = mfspr(SPRN_MMCR0);
+
+	mmcr0 &= ~(MMCR0_PMXE);
+	mtspr(SPRN_MMCR0, mmcr0);
 }
 #endif
 
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 76b579c..6c79346 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -901,12 +901,10 @@
 	die("Unrecoverable VMX/Altivec Unavailable Exception", regs, SIGABRT);
 }
 
-#if defined(CONFIG_PPC64) || defined(CONFIG_E500)
 void performance_monitor_exception(struct pt_regs *regs)
 {
 	perf_irq(regs);
 }
-#endif
 
 #ifdef CONFIG_8xx
 void SoftwareEmulation(struct pt_regs *regs)
diff --git a/arch/powerpc/oprofile/Makefile b/arch/powerpc/oprofile/Makefile
index 0782d0c..554cd7c 100644
--- a/arch/powerpc/oprofile/Makefile
+++ b/arch/powerpc/oprofile/Makefile
@@ -9,3 +9,4 @@
 oprofile-y := $(DRIVER_OBJS) common.o
 oprofile-$(CONFIG_PPC64) += op_model_rs64.o op_model_power4.o
 oprofile-$(CONFIG_FSL_BOOKE) += op_model_fsl_booke.o
+oprofile-$(CONFIG_PPC32) += op_model_7450.o
diff --git a/arch/powerpc/oprofile/common.c b/arch/powerpc/oprofile/common.c
index af2c05d..a370778 100644
--- a/arch/powerpc/oprofile/common.c
+++ b/arch/powerpc/oprofile/common.c
@@ -14,9 +14,6 @@
  */
 
 #include <linux/oprofile.h>
-#ifndef __powerpc64__
-#include <linux/slab.h>
-#endif /* ! __powerpc64__ */
 #include <linux/init.h>
 #include <linux/smp.h>
 #include <linux/errno.h>
@@ -31,10 +28,6 @@
 static struct op_counter_config ctr[OP_MAX_COUNTER];
 static struct op_system_config sys;
 
-#ifndef __powerpc64__
-static char *cpu_type;
-#endif /* ! __powerpc64__ */
-
 static void op_handle_interrupt(struct pt_regs *regs)
 {
 	model->handle_interrupt(regs, ctr);
@@ -53,14 +46,7 @@
 	model->reg_setup(ctr, &sys, model->num_counters);
 
 	/* Configure the registers on all cpus.  */
-#ifdef __powerpc64__
 	on_each_cpu(model->cpu_setup, NULL, 0, 1);
-#else /* __powerpc64__ */
-#if 0
-	/* FIXME: Make multi-cpu work */
-	on_each_cpu(model->reg_setup, NULL, 0, 1);
-#endif
-#endif /* __powerpc64__ */
 
 	return 0;
 }
@@ -95,7 +81,7 @@
 {
 	int i;
 
-#ifdef __powerpc64__
+#ifdef CONFIG_PPC64
 	/*
 	 * There is one mmcr0, mmcr1 and mmcra for setting the events for
 	 * all of the counters.
@@ -103,7 +89,7 @@
 	oprofilefs_create_ulong(sb, root, "mmcr0", &sys.mmcr0);
 	oprofilefs_create_ulong(sb, root, "mmcr1", &sys.mmcr1);
 	oprofilefs_create_ulong(sb, root, "mmcra", &sys.mmcra);
-#endif /* __powerpc64__ */
+#endif
 
 	for (i = 0; i < model->num_counters; ++i) {
 		struct dentry *dir;
@@ -115,65 +101,46 @@
 		oprofilefs_create_ulong(sb, dir, "enabled", &ctr[i].enabled);
 		oprofilefs_create_ulong(sb, dir, "event", &ctr[i].event);
 		oprofilefs_create_ulong(sb, dir, "count", &ctr[i].count);
-#ifdef __powerpc64__
+
 		/*
-		 * We dont support per counter user/kernel selection, but
-		 * we leave the entries because userspace expects them
+		 * Classic PowerPC doesn't support per-counter
+		 * control like this, but the options are
+		 * expected, so they remain.  For Freescale
+		 * Book-E style performance monitors, we do
+		 * support them.
 		 */
-#endif /* __powerpc64__ */
 		oprofilefs_create_ulong(sb, dir, "kernel", &ctr[i].kernel);
 		oprofilefs_create_ulong(sb, dir, "user", &ctr[i].user);
 
-#ifndef __powerpc64__
-		/* FIXME: Not sure if this is used */
-#endif /* ! __powerpc64__ */
 		oprofilefs_create_ulong(sb, dir, "unit_mask", &ctr[i].unit_mask);
 	}
 
 	oprofilefs_create_ulong(sb, root, "enable_kernel", &sys.enable_kernel);
 	oprofilefs_create_ulong(sb, root, "enable_user", &sys.enable_user);
-#ifdef __powerpc64__
+#ifdef CONFIG_PPC64
 	oprofilefs_create_ulong(sb, root, "backtrace_spinlocks",
 				&sys.backtrace_spinlocks);
-#endif /* __powerpc64__ */
+#endif
 
 	/* Default to tracing both kernel and user */
 	sys.enable_kernel = 1;
 	sys.enable_user = 1;
-#ifdef __powerpc64__
+#ifdef CONFIG_PPC64
 	/* Turn on backtracing through spinlocks by default */
 	sys.backtrace_spinlocks = 1;
-#endif /* __powerpc64__ */
+#endif
 
 	return 0;
 }
 
 int __init oprofile_arch_init(struct oprofile_operations *ops)
 {
-#ifndef __powerpc64__
-#ifdef CONFIG_FSL_BOOKE
-	model = &op_model_fsl_booke;
-#else
-	return -ENODEV;
-#endif
-
-	cpu_type = kmalloc(32, GFP_KERNEL);
-	if (NULL == cpu_type)
-		return -ENOMEM;
-
-	sprintf(cpu_type, "ppc/%s", cur_cpu_spec->cpu_name);
-
-	model->num_counters = cur_cpu_spec->num_pmcs;
-
-	ops->cpu_type = cpu_type;
-#else /* __powerpc64__ */
 	if (!cur_cpu_spec->oprofile_model || !cur_cpu_spec->oprofile_cpu_type)
 		return -ENODEV;
 	model = cur_cpu_spec->oprofile_model;
 	model->num_counters = cur_cpu_spec->num_pmcs;
 
 	ops->cpu_type = cur_cpu_spec->oprofile_cpu_type;
-#endif /* __powerpc64__ */
 	ops->create_files = op_powerpc_create_files;
 	ops->setup = op_powerpc_setup;
 	ops->shutdown = op_powerpc_shutdown;
@@ -188,8 +155,4 @@
 
 void oprofile_arch_exit(void)
 {
-#ifndef __powerpc64__
-	kfree(cpu_type);
-	cpu_type = NULL;
-#endif /* ! __powerpc64__ */
 }
diff --git a/arch/powerpc/oprofile/op_model_7450.c b/arch/powerpc/oprofile/op_model_7450.c
new file mode 100644
index 0000000..32abfdb
--- /dev/null
+++ b/arch/powerpc/oprofile/op_model_7450.c
@@ -0,0 +1,206 @@
+/*
+ * oprofile/op_model_7450.c
+ *
+ * Freescale 745x/744x oprofile support, based on fsl_booke support
+ * Copyright (C) 2004 Anton Blanchard <anton@au.ibm.com>, IBM
+ *
+ * Copyright (c) 2004 Freescale Semiconductor, Inc
+ *
+ * Author: Andy Fleming
+ * Maintainer: Kumar Gala <galak@kernel.crashing.org>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/oprofile.h>
+#include <linux/init.h>
+#include <linux/smp.h>
+#include <asm/ptrace.h>
+#include <asm/system.h>
+#include <asm/processor.h>
+#include <asm/cputable.h>
+#include <asm/page.h>
+#include <asm/pmc.h>
+#include <asm/oprofile_impl.h>
+
+static unsigned long reset_value[OP_MAX_COUNTER];
+
+static int oprofile_running;
+static u32 mmcr0_val, mmcr1_val, mmcr2_val;
+
+#define MMCR0_PMC1_SHIFT	6
+#define MMCR0_PMC2_SHIFT	0
+#define MMCR1_PMC3_SHIFT	27
+#define MMCR1_PMC4_SHIFT	22
+#define MMCR1_PMC5_SHIFT	17
+#define MMCR1_PMC6_SHIFT	11
+
+#define mmcr0_event1(event) \
+	((event << MMCR0_PMC1_SHIFT) & MMCR0_PMC1SEL)
+#define mmcr0_event2(event) \
+	((event << MMCR0_PMC2_SHIFT) & MMCR0_PMC2SEL)
+
+#define mmcr1_event3(event) \
+	((event << MMCR1_PMC3_SHIFT) & MMCR1_PMC3SEL)
+#define mmcr1_event4(event) \
+	((event << MMCR1_PMC4_SHIFT) & MMCR1_PMC4SEL)
+#define mmcr1_event5(event) \
+	((event << MMCR1_PMC5_SHIFT) & MMCR1_PMC5SEL)
+#define mmcr1_event6(event) \
+	((event << MMCR1_PMC6_SHIFT) & MMCR1_PMC6SEL)
+
+#define MMCR0_INIT (MMCR0_FC | MMCR0_FCS | MMCR0_FCP | MMCR0_FCM1 | MMCR0_FCM0)
+
+/* Unfreezes the counters on this CPU, enables the interrupt,
+ * enables the counters to trigger the interrupt, and sets the
+ * counters to only count when the mark bit is not set.
+ */
+static void pmc_start_ctrs(void)
+{
+	u32 mmcr0 = mfspr(SPRN_MMCR0);
+
+	mmcr0 &= ~(MMCR0_FC | MMCR0_FCM0);
+	mmcr0 |= (MMCR0_FCECE | MMCR0_PMC1CE | MMCR0_PMCnCE | MMCR0_PMXE);
+
+	mtspr(SPRN_MMCR0, mmcr0);
+}
+
+/* Disables the counters on this CPU, and freezes them */
+static void pmc_stop_ctrs(void)
+{
+	u32 mmcr0 = mfspr(SPRN_MMCR0);
+
+	mmcr0 |= MMCR0_FC;
+	mmcr0 &= ~(MMCR0_FCECE | MMCR0_PMC1CE | MMCR0_PMCnCE | MMCR0_PMXE);
+
+	mtspr(SPRN_MMCR0, mmcr0);
+}
+
+/* Configures the counters on this CPU based on the global
+ * settings */
+static void fsl7450_cpu_setup(void *unused)
+{
+	/* freeze all counters */
+	pmc_stop_ctrs();
+
+	mtspr(SPRN_MMCR0, mmcr0_val);
+	mtspr(SPRN_MMCR1, mmcr1_val);
+	mtspr(SPRN_MMCR2, mmcr2_val);
+}
+
+#define NUM_CTRS 6
+
+/* Configures the global settings for the countes on all CPUs. */
+static void fsl7450_reg_setup(struct op_counter_config *ctr,
+			     struct op_system_config *sys,
+			     int num_ctrs)
+{
+	int i;
+
+	/* Our counters count up, and "count" refers to
+	 * how much before the next interrupt, and we interrupt
+	 * on overflow.  So we calculate the starting value
+	 * which will give us "count" until overflow.
+	 * Then we set the events on the enabled counters */
+	for (i = 0; i < NUM_CTRS; ++i)
+		reset_value[i] = 0x80000000UL - ctr[i].count;
+
+	/* Set events for Counters 1 & 2 */
+	mmcr0_val = MMCR0_INIT | mmcr0_event1(ctr[0].event)
+		| mmcr0_event2(ctr[1].event);
+
+	/* Setup user/kernel bits */
+	if (sys->enable_kernel)
+		mmcr0_val &= ~(MMCR0_FCS);
+
+	if (sys->enable_user)
+		mmcr0_val &= ~(MMCR0_FCP);
+
+	/* Set events for Counters 3-6 */
+	mmcr1_val = mmcr1_event3(ctr[2].event)
+		| mmcr1_event4(ctr[3].event)
+		| mmcr1_event5(ctr[4].event)
+		| mmcr1_event6(ctr[5].event);
+
+	mmcr2_val = 0;
+}
+
+/* Sets the counters on this CPU to the chosen values, and starts them */
+static void fsl7450_start(struct op_counter_config *ctr)
+{
+	int i;
+
+	mtmsr(mfmsr() | MSR_PMM);
+
+	for (i = 0; i < NUM_CTRS; ++i) {
+		if (ctr[i].enabled)
+			ctr_write(i, reset_value[i]);
+		else
+			ctr_write(i, 0);
+	}
+
+	/* Clear the freeze bit, and enable the interrupt.
+	 * The counters won't actually start until the rfi clears
+	 * the PMM bit */
+	pmc_start_ctrs();
+
+	oprofile_running = 1;
+}
+
+/* Stop the counters on this CPU */
+static void fsl7450_stop(void)
+{
+	/* freeze counters */
+	pmc_stop_ctrs();
+
+	oprofile_running = 0;
+
+	mb();
+}
+
+
+/* Handle the interrupt on this CPU, and log a sample for each
+ * event that triggered the interrupt */
+static void fsl7450_handle_interrupt(struct pt_regs *regs,
+				    struct op_counter_config *ctr)
+{
+	unsigned long pc;
+	int is_kernel;
+	int val;
+	int i;
+
+	/* set the PMM bit (see comment below) */
+	mtmsr(mfmsr() | MSR_PMM);
+
+	pc = mfspr(SPRN_SIAR);
+	is_kernel = (pc >= KERNELBASE);
+
+	for (i = 0; i < NUM_CTRS; ++i) {
+		val = ctr_read(i);
+		if (val < 0) {
+			if (oprofile_running && ctr[i].enabled) {
+				oprofile_add_pc(pc, is_kernel, i);
+				ctr_write(i, reset_value[i]);
+			} else {
+				ctr_write(i, 0);
+			}
+		}
+	}
+
+	/* The freeze bit was set by the interrupt. */
+	/* Clear the freeze bit, and reenable the interrupt.
+	 * The counters won't actually start until the rfi clears
+	 * the PMM bit */
+	pmc_start_ctrs();
+}
+
+struct op_powerpc_model op_model_7450= {
+	.reg_setup		= fsl7450_reg_setup,
+	.cpu_setup		= fsl7450_cpu_setup,
+	.start			= fsl7450_start,
+	.stop			= fsl7450_stop,
+	.handle_interrupt	= fsl7450_handle_interrupt,
+};