ARM: perf: add L2x0 PMU driver
This patch adds initial support for the PMU featured in the L2x0
series of Level 2 Cache Controllers, supporting L220 & PL310.
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Change-Id: I04d54f5b2586264ac3325a0ed19c452d25f8fd4c
[ashwinc@codeaurora.org: Remove unused functions for passing compilation.]
Signed-off-by: Ashwin Chaugule <ashwinc@codeaurora.org>
diff --git a/arch/arm/include/asm/hardware/cache-l2x0.h b/arch/arm/include/asm/hardware/cache-l2x0.h
index 926ac0e..5dc9a66 100644
--- a/arch/arm/include/asm/hardware/cache-l2x0.h
+++ b/arch/arm/include/asm/hardware/cache-l2x0.h
@@ -69,6 +69,7 @@
#define L2X0_CACHE_ID_REV_MASK (0x3f)
#define L2X0_CACHE_ID_PART_MASK (0xf << 6)
#define L2X0_CACHE_ID_PART_L210 (1 << 6)
+#define L2X0_CACHE_ID_PART_L220 (2 << 6)
#define L2X0_CACHE_ID_PART_L310 (3 << 6)
#define L2X0_CACHE_ID_RTL_MASK 0x3f
#define L2X0_CACHE_ID_RTL_R0P0 0x0
@@ -104,6 +105,7 @@
#define L2X0_LATENCY_CTRL_WR_SHIFT 8
#define L2X0_ADDR_FILTER_EN 1
+#define L2X0_INTR_MASK_ECNTR 1
#define REV_PL310_R2P0 4
@@ -146,6 +148,49 @@
extern struct l2x0_regs l2x0_saved_regs;
+#ifdef CONFIG_HW_PERF_EVENTS
+/* L220/PL310 Event control register values */
+#define L2X0_EVENT_CNT_ENABLE_MASK 1
+#define L2X0_EVENT_CNT_ENABLE 1
+#define L2X0_EVENT_CNT_RESET(x) (1 << (x+1))
+
+/* Bit-shifted event counter config values */
+enum l2x0_perf_types {
+ L2X0_EVENT_CNT_CFG_DISABLED = 0x0,
+ L2X0_EVENT_CNT_CFG_CO = 0x1,
+ L2X0_EVENT_CNT_CFG_DRHIT = 0x2,
+ L2X0_EVENT_CNT_CFG_DRREQ = 0x3,
+ L2X0_EVENT_CNT_CFG_DWHIT = 0x4,
+ L2X0_EVENT_CNT_CFG_DWREQ = 0x5,
+ L2X0_EVENT_CNT_CFG_DWTREQ = 0x6,
+ L2X0_EVENT_CNT_CFG_IRHIT = 0x7,
+ L2X0_EVENT_CNT_CFG_IRREQ = 0x8,
+ L2X0_EVENT_CNT_CFG_WA = 0x9,
+
+ /* PL310 only */
+ L2X0_EVENT_CNT_CFG_IPFALLOC = 0xA,
+ L2X0_EVENT_CNT_CFG_EPFHIT = 0xB,
+ L2X0_EVENT_CNT_CFG_EPFALLOC = 0xC,
+ L2X0_EVENT_CNT_CFG_SRRCVD = 0xD,
+ L2X0_EVENT_CNT_CFG_SRCONF = 0xE,
+ L2X0_EVENT_CNT_CFG_EPFRCVD = 0xF,
+};
+
+#define L220_EVENT_CNT_CFG_MAX L2X0_EVENT_CNT_CFG_WA
+#define PL310_EVENT_CNT_CFG_MAX L2X0_EVENT_CNT_CFG_EPFRCVD
+
+#define L2X0_EVENT_CNT_CFG_SHIFT 2
+#define L2X0_EVENT_CNT_CFG_MASK (0xF << 2)
+
+#define L2X0_EVENT_CNT_CFG_INTR_MASK 0x3
+#define L2X0_EVENT_CNT_CFG_INTR_DISABLED 0x0
+#define L2X0_EVENT_CNT_CFG_INTR_INCREMENT 0x1
+#define L2X0_EVENT_CNT_CFG_INTR_OVERFLOW 0x2
+
+#define L2X0_NUM_COUNTERS 2
+
+#endif /* CONFIG_HW_PERF_EVENTS */
+
#endif /* __ASSEMBLY__ */
#endif
diff --git a/arch/arm/include/asm/perf_event.h b/arch/arm/include/asm/perf_event.h
index a40f81e..4f41fd6 100644
--- a/arch/arm/include/asm/perf_event.h
+++ b/arch/arm/include/asm/perf_event.h
@@ -22,6 +22,7 @@
ARM_PERF_PMU_ID_CA9,
ARM_PERF_PMU_ID_CA5,
ARM_PERF_PMU_ID_CA15,
+ ARM_PERF_PMU_ID_L2X0,
ARM_PERF_PMU_ID_CA7,
ARM_PERF_PMU_ID_SCORPION,
ARM_PERF_PMU_ID_SCORPIONMP,
diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c
index bb4da0f..adab76d 100644
--- a/arch/arm/mm/cache-l2x0.c
+++ b/arch/arm/mm/cache-l2x0.c
@@ -23,9 +23,12 @@
#include <linux/io.h>
#include <linux/of.h>
#include <linux/of_address.h>
+#include <linux/platform_device.h>
#include <asm/cacheflush.h>
#include <asm/hardware/cache-l2x0.h>
+#include <asm/irq_regs.h>
+#include <asm/pmu.h>
#define CACHE_LINE_SIZE 32
@@ -653,3 +656,315 @@
pl310_resume();
dmb();
}
+
+#ifdef CONFIG_HW_PERF_EVENTS
+/*
+ * L220/PL310 PMU-specific functionality.
+ * TODO: Put this in a separate file and get the l2x0 driver to register
+ * the PMU from l2x0_{of}_init.
+ */
+
+static struct arm_pmu l2x0_pmu;
+
+static u64 l2x0pmu_max_event_id;
+
+static struct perf_event *events[2];
+static unsigned long used_mask[BITS_TO_LONGS(2)];
+static struct pmu_hw_events hw_events = {
+ .events = events,
+ .used_mask = used_mask,
+ .pmu_lock = __RAW_SPIN_LOCK_UNLOCKED(l2x0pmu_hw_events.pmu_lock),
+};
+
+#define COUNTER_CFG_ADDR(idx) (l2x0_base + L2X0_EVENT_CNT0_CFG - 4*idx)
+
+#define COUNTER_CTRL_ADDR (l2x0_base + L2X0_EVENT_CNT_CTRL)
+
+#define COUNTER_ADDR(idx) (l2x0_base + L2X0_EVENT_CNT0_VAL - 4*idx)
+
+static u32 l2x0_read_intr_mask(void)
+{
+ return readl_relaxed(l2x0_base + L2X0_INTR_MASK);
+}
+
+static void l2x0_write_intr_mask(u32 val)
+{
+ writel_relaxed(val, l2x0_base + L2X0_INTR_MASK);
+}
+
+static void l2x0_enable_counter_interrupt(void)
+{
+ u32 intr_mask = l2x0_read_intr_mask();
+ intr_mask |= L2X0_INTR_MASK_ECNTR;
+ l2x0_write_intr_mask(intr_mask);
+}
+
+static void l2x0_disable_counter_interrupt(void)
+{
+ u32 intr_mask = l2x0_read_intr_mask();
+ intr_mask &= ~L2X0_INTR_MASK_ECNTR;
+ l2x0_write_intr_mask(intr_mask);
+}
+
+static void l2x0_clear_interrupts(u32 flags)
+{
+ writel_relaxed(flags, l2x0_base + L2X0_INTR_CLEAR);
+}
+
+static struct pmu_hw_events *l2x0pmu_get_hw_events(void)
+{
+ return &hw_events;
+}
+
+static u32 l2x0pmu_read_ctrl(void)
+{
+ return readl_relaxed(COUNTER_CTRL_ADDR);
+}
+
+static void l2x0pmu_write_ctrl(u32 val)
+{
+ writel_relaxed(val, COUNTER_CTRL_ADDR);
+}
+
+static u32 l2x0pmu_read_cfg(int idx)
+{
+ return readl_relaxed(COUNTER_CFG_ADDR(idx));
+}
+
+static void l2x0pmu_write_cfg(u32 val, int idx)
+{
+ writel_relaxed(val, COUNTER_CFG_ADDR(idx));
+}
+
+static void l2x0pmu_enable_counter(u32 cfg, int idx)
+{
+ cfg |= L2X0_EVENT_CNT_CFG_INTR_OVERFLOW;
+ l2x0pmu_write_cfg(cfg, idx);
+}
+
+static u32 l2x0pmu_disable_counter(int idx)
+{
+ u32 cfg, oldcfg;
+
+ cfg = oldcfg = l2x0pmu_read_cfg(idx);
+ cfg &= ~L2X0_EVENT_CNT_CFG_MASK;
+ cfg &= ~L2X0_EVENT_CNT_CFG_INTR_MASK;
+ l2x0pmu_write_cfg(cfg, idx);
+
+ return oldcfg;
+}
+
+static u32 l2x0pmu_read_counter(int idx)
+{
+ return readl_relaxed(COUNTER_ADDR(idx));
+}
+
+static void l2x0pmu_write_counter(int idx, u32 val)
+{
+ /*
+ * L2X0 counters can only be written to when they are disabled.
+ * As perf core does not disable counters before writing to them
+ * under interrupts, we must do so here.
+ */
+ u32 cfg = l2x0pmu_disable_counter(idx);
+ writel_relaxed(val, COUNTER_ADDR(idx));
+ l2x0pmu_write_cfg(cfg, idx);
+}
+
+static int counter_is_saturated(int idx)
+{
+ return l2x0pmu_read_counter(idx) == 0xFFFFFFFF;
+}
+
+static void l2x0pmu_start(void)
+{
+ unsigned long flags;
+ u32 val;
+
+ raw_spin_lock_irqsave(&hw_events.pmu_lock, flags);
+
+ l2x0_enable_counter_interrupt();
+
+ val = l2x0pmu_read_ctrl();
+ val |= L2X0_EVENT_CNT_ENABLE;
+ l2x0pmu_write_ctrl(val);
+
+ raw_spin_unlock_irqrestore(&hw_events.pmu_lock, flags);
+}
+
+static void l2x0pmu_stop(void)
+{
+ unsigned long flags;
+ u32 val;
+
+ raw_spin_lock_irqsave(&hw_events.pmu_lock, flags);
+
+ val = l2x0pmu_read_ctrl();
+ val &= ~L2X0_EVENT_CNT_ENABLE_MASK;
+ l2x0pmu_write_ctrl(val);
+
+ l2x0_disable_counter_interrupt();
+
+ raw_spin_unlock_irqrestore(&hw_events.pmu_lock, flags);
+}
+
+static void l2x0pmu_enable(struct hw_perf_event *event, int idx, int cpu)
+{
+ unsigned long flags;
+ u32 cfg;
+
+ raw_spin_lock_irqsave(&hw_events.pmu_lock, flags);
+
+ cfg = (event->config_base << L2X0_EVENT_CNT_CFG_SHIFT) &
+ L2X0_EVENT_CNT_CFG_MASK;
+ l2x0pmu_enable_counter(cfg, idx);
+
+ raw_spin_unlock_irqrestore(&hw_events.pmu_lock, flags);
+}
+
+static void l2x0pmu_disable(struct hw_perf_event *event, int idx)
+{
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&hw_events.pmu_lock, flags);
+ l2x0pmu_disable_counter(idx);
+ raw_spin_unlock_irqrestore(&hw_events.pmu_lock, flags);
+}
+
+static int l2x0pmu_get_event_idx(struct pmu_hw_events *events,
+ struct hw_perf_event *hwc)
+{
+ int idx;
+
+ /* Counters are identical. Just grab a free one. */
+ for (idx = 0; idx < L2X0_NUM_COUNTERS; ++idx) {
+ if (!test_and_set_bit(idx, hw_events.used_mask))
+ return idx;
+ }
+
+ return -EAGAIN;
+}
+
+/*
+ * As System PMUs are affine to CPU0, the fact that interrupts are disabled
+ * during interrupt handling is enough to serialise our actions and make this
+ * safe. We do not need to grab our pmu_lock here.
+ */
+static irqreturn_t l2x0pmu_handle_irq(int irq, void *dev)
+{
+ irqreturn_t status = IRQ_NONE;
+ struct perf_sample_data data;
+ struct pt_regs *regs;
+ int idx;
+
+ regs = get_irq_regs();
+
+ for (idx = 0; idx < L2X0_NUM_COUNTERS; ++idx) {
+ struct perf_event *event = hw_events.events[idx];
+ struct hw_perf_event *hwc;
+
+ if (!counter_is_saturated(idx))
+ continue;
+
+ status = IRQ_HANDLED;
+
+ hwc = &event->hw;
+
+ /*
+ * The armpmu_* functions expect counters to overflow, but
+ * L220/PL310 counters saturate instead. Fake the overflow
+ * here so the hardware is in sync with what the framework
+ * expects.
+ */
+ l2x0pmu_write_counter(idx, 0);
+
+ armpmu_event_update(event, hwc, idx);
+ data.period = event->hw.last_period;
+
+ if (!armpmu_event_set_period(event, hwc, idx))
+ continue;
+
+ if (perf_event_overflow(event, &data, regs))
+ l2x0pmu_disable_counter(idx);
+ }
+
+ l2x0_clear_interrupts(L2X0_INTR_MASK_ECNTR);
+
+ irq_work_run();
+
+ return status;
+}
+
+static int map_l2x0_raw_event(u64 config)
+{
+ return (config <= l2x0pmu_max_event_id) ? config : -ENOENT;
+}
+
+static int l2x0pmu_map_event(struct perf_event *event)
+{
+ u64 config = event->attr.config;
+ u64 supported_samples = (PERF_SAMPLE_TIME |
+ PERF_SAMPLE_ID |
+ PERF_SAMPLE_PERIOD |
+ PERF_SAMPLE_STREAM_ID |
+ PERF_SAMPLE_RAW);
+
+ if (event->attr.type != l2x0_pmu.pmu.type)
+ return -ENOENT;
+
+ /*
+ * L2x0 counters are global across CPUs.
+ * If userspace ask perf to monitor from multiple CPUs, each CPU will
+ * report the shared total. When summed, this will be the actual value
+ * multiplied by the number of CPUs. We limit monitoring to a single
+ * CPU (0) to prevent confusion stemming from this.
+ */
+ if (event->cpu != 0)
+ return -ENOENT;
+
+ if (event->attr.sample_type & ~supported_samples)
+ return -ENOENT;
+
+ return map_l2x0_raw_event(config);
+}
+
+static struct arm_pmu l2x0_pmu = {
+ .id = ARM_PERF_PMU_ID_L2X0,
+ .type = ARM_PMU_DEVICE_L2CC,
+ .name = "ARM L220/PL310 L2 Cache controller",
+ .start = l2x0pmu_start,
+ .stop = l2x0pmu_stop,
+ .handle_irq = l2x0pmu_handle_irq,
+ .enable = l2x0pmu_enable,
+ .disable = l2x0pmu_disable,
+ .get_event_idx = l2x0pmu_get_event_idx,
+ .read_counter = l2x0pmu_read_counter,
+ .write_counter = l2x0pmu_write_counter,
+ .map_event = l2x0pmu_map_event,
+ .num_events = 2,
+ .max_period = 0xFFFFFFFF,
+ .get_hw_events = l2x0pmu_get_hw_events,
+};
+
+static int __devinit l2x0pmu_device_probe(struct platform_device *pdev)
+{
+ l2x0_pmu.plat_device = pdev;
+ /* FIXME: return code? */
+ armpmu_register(&l2x0_pmu, "l2x0", -1);
+ return 0;
+}
+
+static struct platform_driver l2x0pmu_driver = {
+ .driver = {
+ .name = "l2x0-pmu",
+ },
+ .probe = l2x0pmu_device_probe,
+};
+
+static int __init register_pmu_driver(void)
+{
+ return platform_driver_register(&l2x0pmu_driver);
+}
+device_initcall(register_pmu_driver);
+
+#endif /* CONFIG_HW_PERF_EVENTS */