ARM: perf: add L2x0 PMU driver

This patch adds initial support for the PMU featured in the L2x0
series of Level 2 Cache Controllers, supporting L220 & PL310.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Change-Id: I04d54f5b2586264ac3325a0ed19c452d25f8fd4c
[ashwinc@codeaurora.org: Remove unused functions for passing compilation.]
Signed-off-by: Ashwin Chaugule <ashwinc@codeaurora.org>
diff --git a/arch/arm/include/asm/hardware/cache-l2x0.h b/arch/arm/include/asm/hardware/cache-l2x0.h
index 926ac0e..5dc9a66 100644
--- a/arch/arm/include/asm/hardware/cache-l2x0.h
+++ b/arch/arm/include/asm/hardware/cache-l2x0.h
@@ -69,6 +69,7 @@
 #define L2X0_CACHE_ID_REV_MASK		(0x3f)
 #define L2X0_CACHE_ID_PART_MASK		(0xf << 6)
 #define L2X0_CACHE_ID_PART_L210		(1 << 6)
+#define L2X0_CACHE_ID_PART_L220         (2 << 6)
 #define L2X0_CACHE_ID_PART_L310		(3 << 6)
 #define L2X0_CACHE_ID_RTL_MASK          0x3f
 #define L2X0_CACHE_ID_RTL_R0P0          0x0
@@ -104,6 +105,7 @@
 #define L2X0_LATENCY_CTRL_WR_SHIFT	8
 
 #define L2X0_ADDR_FILTER_EN		1
+#define L2X0_INTR_MASK_ECNTR           1
 
 #define REV_PL310_R2P0				4
 
@@ -146,6 +148,49 @@
 
 extern struct l2x0_regs l2x0_saved_regs;
 
+#ifdef CONFIG_HW_PERF_EVENTS
+/* L220/PL310 Event control register values */
+#define L2X0_EVENT_CNT_ENABLE_MASK             1
+#define L2X0_EVENT_CNT_ENABLE                  1
+#define L2X0_EVENT_CNT_RESET(x)                (1 << (x+1))
+
+/* Bit-shifted event counter config values */
+enum l2x0_perf_types {
+	L2X0_EVENT_CNT_CFG_DISABLED             = 0x0,
+	L2X0_EVENT_CNT_CFG_CO                   = 0x1,
+	L2X0_EVENT_CNT_CFG_DRHIT                = 0x2,
+	L2X0_EVENT_CNT_CFG_DRREQ                = 0x3,
+	L2X0_EVENT_CNT_CFG_DWHIT                = 0x4,
+	L2X0_EVENT_CNT_CFG_DWREQ                = 0x5,
+	L2X0_EVENT_CNT_CFG_DWTREQ               = 0x6,
+	L2X0_EVENT_CNT_CFG_IRHIT                = 0x7,
+	L2X0_EVENT_CNT_CFG_IRREQ                = 0x8,
+	L2X0_EVENT_CNT_CFG_WA                   = 0x9,
+
+	/* PL310 only */
+	L2X0_EVENT_CNT_CFG_IPFALLOC             = 0xA,
+	L2X0_EVENT_CNT_CFG_EPFHIT               = 0xB,
+	L2X0_EVENT_CNT_CFG_EPFALLOC             = 0xC,
+	L2X0_EVENT_CNT_CFG_SRRCVD               = 0xD,
+	L2X0_EVENT_CNT_CFG_SRCONF               = 0xE,
+	L2X0_EVENT_CNT_CFG_EPFRCVD              = 0xF,
+};
+
+#define L220_EVENT_CNT_CFG_MAX                 L2X0_EVENT_CNT_CFG_WA
+#define PL310_EVENT_CNT_CFG_MAX                L2X0_EVENT_CNT_CFG_EPFRCVD
+
+#define L2X0_EVENT_CNT_CFG_SHIFT               2
+#define L2X0_EVENT_CNT_CFG_MASK                (0xF << 2)
+
+#define L2X0_EVENT_CNT_CFG_INTR_MASK           0x3
+#define L2X0_EVENT_CNT_CFG_INTR_DISABLED       0x0
+#define L2X0_EVENT_CNT_CFG_INTR_INCREMENT      0x1
+#define L2X0_EVENT_CNT_CFG_INTR_OVERFLOW       0x2
+
+#define L2X0_NUM_COUNTERS                      2
+
+#endif /* CONFIG_HW_PERF_EVENTS */
+
 #endif /* __ASSEMBLY__ */
 
 #endif
diff --git a/arch/arm/include/asm/perf_event.h b/arch/arm/include/asm/perf_event.h
index a40f81e..4f41fd6 100644
--- a/arch/arm/include/asm/perf_event.h
+++ b/arch/arm/include/asm/perf_event.h
@@ -22,6 +22,7 @@
 	ARM_PERF_PMU_ID_CA9,
 	ARM_PERF_PMU_ID_CA5,
 	ARM_PERF_PMU_ID_CA15,
+	ARM_PERF_PMU_ID_L2X0,
 	ARM_PERF_PMU_ID_CA7,
 	ARM_PERF_PMU_ID_SCORPION,
 	ARM_PERF_PMU_ID_SCORPIONMP,
diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c
index bb4da0f..adab76d 100644
--- a/arch/arm/mm/cache-l2x0.c
+++ b/arch/arm/mm/cache-l2x0.c
@@ -23,9 +23,12 @@
 #include <linux/io.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
+#include <linux/platform_device.h>
 
 #include <asm/cacheflush.h>
 #include <asm/hardware/cache-l2x0.h>
+#include <asm/irq_regs.h>
+#include <asm/pmu.h>
 
 #define CACHE_LINE_SIZE		32
 
@@ -653,3 +656,315 @@
 	pl310_resume();
 	dmb();
 }
+
+#ifdef CONFIG_HW_PERF_EVENTS
+/*
+ * L220/PL310 PMU-specific functionality.
+ * TODO: Put this in a separate file and get the l2x0 driver to register
+ * the PMU from l2x0_{of}_init.
+ */
+
+static struct arm_pmu l2x0_pmu;
+
+static u64 l2x0pmu_max_event_id;
+
+static struct perf_event *events[2];
+static unsigned long used_mask[BITS_TO_LONGS(2)];
+static struct pmu_hw_events hw_events = {
+	.events = events,
+	.used_mask = used_mask,
+	.pmu_lock = __RAW_SPIN_LOCK_UNLOCKED(l2x0pmu_hw_events.pmu_lock),
+};
+
+#define COUNTER_CFG_ADDR(idx)	(l2x0_base + L2X0_EVENT_CNT0_CFG - 4*idx)
+
+#define COUNTER_CTRL_ADDR	(l2x0_base + L2X0_EVENT_CNT_CTRL)
+
+#define COUNTER_ADDR(idx)	(l2x0_base + L2X0_EVENT_CNT0_VAL - 4*idx)
+
+static u32 l2x0_read_intr_mask(void)
+{
+	return readl_relaxed(l2x0_base + L2X0_INTR_MASK);
+}
+
+static void l2x0_write_intr_mask(u32 val)
+{
+	writel_relaxed(val, l2x0_base + L2X0_INTR_MASK);
+}
+
+static void l2x0_enable_counter_interrupt(void)
+{
+	u32 intr_mask = l2x0_read_intr_mask();
+	intr_mask |= L2X0_INTR_MASK_ECNTR;
+	l2x0_write_intr_mask(intr_mask);
+}
+
+static void l2x0_disable_counter_interrupt(void)
+{
+	u32 intr_mask = l2x0_read_intr_mask();
+	intr_mask &= ~L2X0_INTR_MASK_ECNTR;
+	l2x0_write_intr_mask(intr_mask);
+}
+
+static void l2x0_clear_interrupts(u32 flags)
+{
+	writel_relaxed(flags, l2x0_base + L2X0_INTR_CLEAR);
+}
+
+static struct pmu_hw_events *l2x0pmu_get_hw_events(void)
+{
+	return &hw_events;
+}
+
+static u32 l2x0pmu_read_ctrl(void)
+{
+	return readl_relaxed(COUNTER_CTRL_ADDR);
+}
+
+static void l2x0pmu_write_ctrl(u32 val)
+{
+	writel_relaxed(val, COUNTER_CTRL_ADDR);
+}
+
+static u32 l2x0pmu_read_cfg(int idx)
+{
+	return readl_relaxed(COUNTER_CFG_ADDR(idx));
+}
+
+static void l2x0pmu_write_cfg(u32 val, int idx)
+{
+	writel_relaxed(val, COUNTER_CFG_ADDR(idx));
+}
+
+static void l2x0pmu_enable_counter(u32 cfg, int idx)
+{
+	cfg |= L2X0_EVENT_CNT_CFG_INTR_OVERFLOW;
+	l2x0pmu_write_cfg(cfg, idx);
+}
+
+static u32 l2x0pmu_disable_counter(int idx)
+{
+	u32 cfg, oldcfg;
+
+	cfg = oldcfg = l2x0pmu_read_cfg(idx);
+	cfg &= ~L2X0_EVENT_CNT_CFG_MASK;
+	cfg &= ~L2X0_EVENT_CNT_CFG_INTR_MASK;
+	l2x0pmu_write_cfg(cfg, idx);
+
+	return oldcfg;
+}
+
+static u32 l2x0pmu_read_counter(int idx)
+{
+	return readl_relaxed(COUNTER_ADDR(idx));
+}
+
+static void l2x0pmu_write_counter(int idx, u32 val)
+{
+	/*
+	 * L2X0 counters can only be written to when they are disabled.
+	 * As perf core does not disable counters before writing to them
+	 * under interrupts, we must do so here.
+	 */
+	u32 cfg = l2x0pmu_disable_counter(idx);
+	writel_relaxed(val, COUNTER_ADDR(idx));
+	l2x0pmu_write_cfg(cfg, idx);
+}
+
+static int counter_is_saturated(int idx)
+{
+	return l2x0pmu_read_counter(idx) == 0xFFFFFFFF;
+}
+
+static void l2x0pmu_start(void)
+{
+	unsigned long flags;
+	u32 val;
+
+	raw_spin_lock_irqsave(&hw_events.pmu_lock, flags);
+
+	l2x0_enable_counter_interrupt();
+
+	val = l2x0pmu_read_ctrl();
+	val |= L2X0_EVENT_CNT_ENABLE;
+	l2x0pmu_write_ctrl(val);
+
+	raw_spin_unlock_irqrestore(&hw_events.pmu_lock, flags);
+}
+
+static void l2x0pmu_stop(void)
+{
+	unsigned long flags;
+	u32 val;
+
+	raw_spin_lock_irqsave(&hw_events.pmu_lock, flags);
+
+	val = l2x0pmu_read_ctrl();
+	val &= ~L2X0_EVENT_CNT_ENABLE_MASK;
+	l2x0pmu_write_ctrl(val);
+
+	l2x0_disable_counter_interrupt();
+
+	raw_spin_unlock_irqrestore(&hw_events.pmu_lock, flags);
+}
+
+static void l2x0pmu_enable(struct hw_perf_event *event, int idx, int cpu)
+{
+	unsigned long flags;
+	u32 cfg;
+
+	raw_spin_lock_irqsave(&hw_events.pmu_lock, flags);
+
+	cfg = (event->config_base << L2X0_EVENT_CNT_CFG_SHIFT) &
+						L2X0_EVENT_CNT_CFG_MASK;
+	l2x0pmu_enable_counter(cfg, idx);
+
+	raw_spin_unlock_irqrestore(&hw_events.pmu_lock, flags);
+}
+
+static void l2x0pmu_disable(struct hw_perf_event *event, int idx)
+{
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&hw_events.pmu_lock, flags);
+	l2x0pmu_disable_counter(idx);
+	raw_spin_unlock_irqrestore(&hw_events.pmu_lock, flags);
+}
+
+static int l2x0pmu_get_event_idx(struct pmu_hw_events *events,
+					struct hw_perf_event *hwc)
+{
+	int idx;
+
+	/* Counters are identical. Just grab a free one. */
+	for (idx = 0; idx < L2X0_NUM_COUNTERS; ++idx) {
+		if (!test_and_set_bit(idx, hw_events.used_mask))
+			return idx;
+	}
+
+	return -EAGAIN;
+}
+
+/*
+ * As System PMUs are affine to CPU0, the fact that interrupts are disabled
+ * during interrupt handling is enough to serialise our actions and make this
+ * safe. We do not need to grab our pmu_lock here.
+ */
+static irqreturn_t l2x0pmu_handle_irq(int irq, void *dev)
+{
+	irqreturn_t status = IRQ_NONE;
+	struct perf_sample_data data;
+	struct pt_regs *regs;
+	int idx;
+
+	regs = get_irq_regs();
+
+	for (idx = 0; idx < L2X0_NUM_COUNTERS; ++idx) {
+		struct perf_event *event = hw_events.events[idx];
+		struct hw_perf_event *hwc;
+
+		if (!counter_is_saturated(idx))
+			continue;
+
+		status = IRQ_HANDLED;
+
+		hwc = &event->hw;
+
+		/*
+		 * The armpmu_* functions expect counters to overflow, but
+		 * L220/PL310 counters saturate instead. Fake the overflow
+		 * here so the hardware is in sync with what the framework
+		 * expects.
+		 */
+		l2x0pmu_write_counter(idx, 0);
+
+		armpmu_event_update(event, hwc, idx);
+		data.period = event->hw.last_period;
+
+		if (!armpmu_event_set_period(event, hwc, idx))
+			continue;
+
+		if (perf_event_overflow(event, &data, regs))
+			l2x0pmu_disable_counter(idx);
+	}
+
+	l2x0_clear_interrupts(L2X0_INTR_MASK_ECNTR);
+
+	irq_work_run();
+
+	return status;
+}
+
+static int map_l2x0_raw_event(u64 config)
+{
+	return (config <= l2x0pmu_max_event_id) ? config : -ENOENT;
+}
+
+static int l2x0pmu_map_event(struct perf_event *event)
+{
+	u64 config = event->attr.config;
+	u64 supported_samples = (PERF_SAMPLE_TIME	|
+				 PERF_SAMPLE_ID		|
+				 PERF_SAMPLE_PERIOD	|
+				 PERF_SAMPLE_STREAM_ID	|
+				 PERF_SAMPLE_RAW);
+
+	if (event->attr.type != l2x0_pmu.pmu.type)
+		return -ENOENT;
+
+	/*
+	 * L2x0 counters are global across CPUs.
+	 * If userspace ask perf to monitor from multiple CPUs, each CPU will
+	 * report the shared total. When summed, this will be the actual value
+	 * multiplied by the number of CPUs. We limit monitoring to a single
+	 * CPU (0) to prevent confusion stemming from this.
+	 */
+	if (event->cpu != 0)
+		return -ENOENT;
+
+	if (event->attr.sample_type & ~supported_samples)
+		return -ENOENT;
+
+	return map_l2x0_raw_event(config);
+}
+
+static struct arm_pmu l2x0_pmu = {
+	.id		= ARM_PERF_PMU_ID_L2X0,
+	.type		= ARM_PMU_DEVICE_L2CC,
+	.name		= "ARM L220/PL310 L2 Cache controller",
+	.start		= l2x0pmu_start,
+	.stop		= l2x0pmu_stop,
+	.handle_irq	= l2x0pmu_handle_irq,
+	.enable		= l2x0pmu_enable,
+	.disable	= l2x0pmu_disable,
+	.get_event_idx	= l2x0pmu_get_event_idx,
+	.read_counter	= l2x0pmu_read_counter,
+	.write_counter	= l2x0pmu_write_counter,
+	.map_event	= l2x0pmu_map_event,
+	.num_events	= 2,
+	.max_period	= 0xFFFFFFFF,
+	.get_hw_events	= l2x0pmu_get_hw_events,
+};
+
+static int __devinit l2x0pmu_device_probe(struct platform_device *pdev)
+{
+	l2x0_pmu.plat_device = pdev;
+	/* FIXME: return code? */
+	armpmu_register(&l2x0_pmu, "l2x0", -1);
+	return 0;
+}
+
+static struct platform_driver l2x0pmu_driver = {
+	.driver		= {
+		.name	= "l2x0-pmu",
+	},
+	.probe		= l2x0pmu_device_probe,
+};
+
+static int __init register_pmu_driver(void)
+{
+	return platform_driver_register(&l2x0pmu_driver);
+}
+device_initcall(register_pmu_driver);
+
+#endif /* CONFIG_HW_PERF_EVENTS */