msm: perf: Add L2 support for tracecounters
Add L2 counter output to the tracecounters implementation.
This enables collection of L2 PMU counter values upon context switch.
Set up filtering based on number of L2 counters and number of cores.
Read L2 counter values and display them along with L1 counter values.
Change-Id: I8bbbd251ee52c66505d22ccf037c601013a916f0
Signed-off-by: Sheetal Sahasrabudhe <sheetals@codeaurora.org>
diff --git a/arch/arm/mach-msm/include/mach/msm-krait-l2-accessors.h b/arch/arm/mach-msm/include/mach/msm-krait-l2-accessors.h
index a35ff4d..45d000b 100644
--- a/arch/arm/mach-msm/include/mach/msm-krait-l2-accessors.h
+++ b/arch/arm/mach-msm/include/mach/msm-krait-l2-accessors.h
@@ -2,7 +2,7 @@
#define __ASM_ARCH_MSM_MSM_KRAIT_L2_ACCESSORS_H
/*
- * Copyright (c) 2011,2012, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2011-2013 The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -14,6 +14,55 @@
* GNU General Public License for more details.
*/
+#define MAX_L2_PERIOD ((1ULL << 32) - 1)
+#define MAX_KRAIT_L2_CTRS 10
+
+#define PMCR_NUM_EV_SHIFT 11
+#define PMCR_NUM_EV_MASK 0x1f
+
+#define L2_EVT_MASK 0xfffff
+
+#define L2_SLAVE_EV_PREFIX 4
+#define L2_TRACECTR_PREFIX 5
+
+#define L2PMCCNTR 0x409
+#define L2PMCCNTCR 0x408
+#define L2PMCCNTSR 0x40A
+#define L2CYCLE_CTR_BIT 31
+#define L2CYCLE_CTR_RAW_CODE 0xfe
+
+#define L2PMOVSR 0x406
+
+#define L2PMCR 0x400
+#define L2PMCR_RESET_ALL 0x6
+#define L2PMCR_GLOBAL_ENABLE 0x1
+#define L2PMCR_GLOBAL_DISABLE 0x0
+
+#define L2PMCNTENSET 0x403
+#define L2PMCNTENCLR 0x402
+
+#define L2PMINTENSET 0x405
+#define L2PMINTENCLR 0x404
+
+#define IA_L2PMXEVCNTCR_BASE 0x420
+#define IA_L2PMXEVTYPER_BASE 0x424
+#define IA_L2PMRESX_BASE 0x410
+#define IA_L2PMXEVFILTER_BASE 0x423
+#define IA_L2PMXEVCNTR_BASE 0x421
+
+/* event format is -e rsRCCG See get_event_desc() */
+
+#define EVENT_PREFIX_MASK 0xf0000
+#define EVENT_REG_MASK 0x0f000
+#define EVENT_GROUPSEL_MASK 0x0000f
+#define EVENT_GROUPCODE_MASK 0x00ff0
+
+#define EVENT_PREFIX_SHIFT 16
+#define EVENT_REG_SHIFT 12
+#define EVENT_GROUPCODE_SHIFT 4
+
+#define RESRX_VALUE_EN 0x80000000
+
#ifdef CONFIG_ARCH_MSM_KRAIT
extern void set_l2_indirect_reg(u32 reg_addr, u32 val);
extern u32 get_l2_indirect_reg(u32 reg_addr);
diff --git a/arch/arm/mach-msm/perf_debug.c b/arch/arm/mach-msm/perf_debug.c
index 28d8e42..7e1206a 100644
--- a/arch/arm/mach-msm/perf_debug.c
+++ b/arch/arm/mach-msm/perf_debug.c
@@ -35,6 +35,7 @@
"10 Perf: Fix counts across power collapse\n"
"11 ARM: dts: msm: add perf-events support for msm8x10, msm8x12\n"
"12 Perf: Make per-process counters configurable\n"
+ "13 msm: perf: Add L2 support for tracecounters\n"
;
static ssize_t desc_read(struct file *fp, char __user *buf,
diff --git a/arch/arm/mach-msm/perf_event_msm_krait_l2.c b/arch/arm/mach-msm/perf_event_msm_krait_l2.c
index ad34457..1e11314 100644
--- a/arch/arm/mach-msm/perf_event_msm_krait_l2.c
+++ b/arch/arm/mach-msm/perf_event_msm_krait_l2.c
@@ -18,54 +18,6 @@
#include <mach/msm-krait-l2-accessors.h>
-#define MAX_L2_PERIOD ((1ULL << 32) - 1)
-#define MAX_KRAIT_L2_CTRS 10
-
-#define PMCR_NUM_EV_SHIFT 11
-#define PMCR_NUM_EV_MASK 0x1f
-
-#define L2_EVT_MASK 0xfffff
-
-#define L2_SLAVE_EV_PREFIX 4
-
-#define L2PMCCNTR 0x409
-#define L2PMCCNTCR 0x408
-#define L2PMCCNTSR 0x40A
-#define L2CYCLE_CTR_BIT 31
-#define L2CYCLE_CTR_RAW_CODE 0xfe
-
-#define L2PMOVSR 0x406
-
-#define L2PMCR 0x400
-#define L2PMCR_RESET_ALL 0x6
-#define L2PMCR_GLOBAL_ENABLE 0x1
-#define L2PMCR_GLOBAL_DISABLE 0x0
-
-#define L2PMCNTENSET 0x403
-#define L2PMCNTENCLR 0x402
-
-#define L2PMINTENSET 0x405
-#define L2PMINTENCLR 0x404
-
-#define IA_L2PMXEVCNTCR_BASE 0x420
-#define IA_L2PMXEVTYPER_BASE 0x424
-#define IA_L2PMRESX_BASE 0x410
-#define IA_L2PMXEVFILTER_BASE 0x423
-#define IA_L2PMXEVCNTR_BASE 0x421
-
-/* event format is -e rsRCCG See get_event_desc() */
-
-#define EVENT_PREFIX_MASK 0xf0000
-#define EVENT_REG_MASK 0x0f000
-#define EVENT_GROUPSEL_MASK 0x0000f
-#define EVENT_GROUPCODE_MASK 0x00ff0
-
-#define EVENT_PREFIX_SHIFT 16
-#define EVENT_REG_SHIFT 12
-#define EVENT_GROUPCODE_SHIFT 4
-
-#define RESRX_VALUE_EN 0x80000000
-
/*
* The L2 PMU is shared between all CPU's, so protect
* its bitmap access.
@@ -197,13 +149,16 @@
set_l2_indirect_reg(filter_reg, filter_val);
}
-static void set_evfilter_sys_mode(int ctr, unsigned int is_slv)
+static void set_evfilter_sys_mode(int ctr, unsigned int is_slv, int cpu,
+ unsigned int is_tracectr)
{
u32 filter_reg = (ctr * 16) + IA_L2PMXEVFILTER_BASE;
u32 filter_val = l2_orig_filter_prefix | 0xf;
- if (is_slv)
+ if (is_slv == 1)
filter_val = l2_slv_filter_prefix;
+ if (is_tracectr == 1)
+ filter_val = l2_orig_filter_prefix | 1 << cpu;
set_l2_indirect_reg(filter_reg, filter_val);
}
@@ -277,6 +232,7 @@
struct event_desc evdesc;
unsigned long iflags;
unsigned int is_slv = 0;
+ unsigned int is_tracectr = 0;
unsigned int evt_prefix;
raw_spin_lock_irqsave(&krait_l2_pmu_hw_events.pmu_lock, iflags);
@@ -290,6 +246,8 @@
if (evt_prefix == L2_SLAVE_EV_PREFIX)
is_slv = 1;
+ else if (evt_prefix == L2_TRACECTR_PREFIX)
+ is_tracectr = 1;
set_evcntcr(idx);
@@ -305,7 +263,7 @@
if (cpu < 0)
set_evfilter_task_mode(idx, is_slv);
else
- set_evfilter_sys_mode(idx, is_slv);
+ set_evfilter_sys_mode(idx, is_slv, cpu, is_tracectr);
out:
enable_intenset(idx);
@@ -456,6 +414,7 @@
static int msm_l2_test_set_ev_constraint(struct perf_event *event)
{
u32 evt_type = event->attr.config & L2_EVT_MASK;
+ u8 evt_prefix = (evt_type & EVENT_PREFIX_MASK) >> EVENT_PREFIX_SHIFT;
u8 reg = (evt_type & 0x0F000) >> 12;
u8 group = evt_type & 0x0000F;
u8 code = (evt_type & 0x00FF0) >> 4;
@@ -464,6 +423,8 @@
u64 bitmap_t;
u32 shift_idx;
+ if (evt_prefix == L2_TRACECTR_PREFIX)
+ return err;
/*
* Cycle counter collision is detected in
* get_event_idx().
@@ -507,12 +468,15 @@
static int msm_l2_clear_ev_constraint(struct perf_event *event)
{
u32 evt_type = event->attr.config & L2_EVT_MASK;
+ u8 evt_prefix = (evt_type & EVENT_PREFIX_MASK) >> EVENT_PREFIX_SHIFT;
u8 reg = (evt_type & 0x0F000) >> 12;
u8 group = evt_type & 0x0000F;
unsigned long flags;
u64 bitmap_t;
u32 shift_idx;
+ if (evt_prefix == L2_TRACECTR_PREFIX)
+ return 1;
raw_spin_lock_irqsave(&l2_pmu_constraints.lock, flags);
shift_idx = ((reg * 4) + group);
diff --git a/arch/arm/mach-msm/perf_trace_counters.h b/arch/arm/mach-msm/perf_trace_counters.h
index 8f77bad..f1753b8 100644
--- a/arch/arm/mach-msm/perf_trace_counters.h
+++ b/arch/arm/mach-msm/perf_trace_counters.h
@@ -19,13 +19,17 @@
/* Ctr index for PMCNTENSET/CLR */
#define CC 0x80000000
#define C0 0x1
-#define C1 0x10
-#define C2 0x100
-#define C3 0x1000
+#define C1 0x2
+#define C2 0x4
+#define C3 0x8
+#define C_ALL (CC | C1 | C1 | C2 | C3)
+#define RESET_ALL 6
#include <linux/sched.h>
+#include <linux/cpumask.h>
#include <linux/tracepoint.h>
+#include <mach/msm-krait-l2-accessors.h>
TRACE_EVENT(sched_switch_with_ctrs,
@@ -41,82 +45,110 @@
__field(u32, ctr1)
__field(u32, ctr2)
__field(u32, ctr3)
+ __field(u32, lctr0)
+ __field(u32, lctr1)
),
TP_fast_assign(
+ u32 cpu = smp_processor_id();
+ u32 idx;
+ u32 counter_reg;
+ u32 val;
+ u32 num_l2ctrs;
+ u32 num_cores = nr_cpu_ids;
__entry->old_pid = prev;
__entry->new_pid = next;
+ __entry->lctr0 = 0;
+ __entry->lctr1 = 0;
+
+ val = get_l2_indirect_reg(L2PMCR);
+ num_l2ctrs = ((val >> 11) & 0x1f) + 1;
+ /* Disable All*/
+ asm volatile("mcr p15, 0, %0, c9, c12, 2"
+ : : "r"(C_ALL));
/* cycle counter */
- /* Disable */
- asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r"(CC));
/* Read value */
asm volatile("mrc p15, 0, %0, c9, c13, 0"
: "=r"(__entry->cctr));
- /* Reset */
- asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r"(0));
- /* Enable */
- asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r"(CC));
/* ctr 0 */
- /* Disable */
- asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r"(C0));
/* Select */
asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r"(0));
/* Read value */
asm volatile("mrc p15, 0, %0, c9, c13, 2"
: "=r"(__entry->ctr0));
- /* Reset */
- asm volatile("mcr p15, 0, %0, c9, c13, 2" : : "r"(0));
- /* Enable */
- asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r"(C0));
/* ctr 1 */
- /* Disable */
- asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r"(C1));
/* Select */
asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r"(1));
/* Read value */
asm volatile("mrc p15, 0, %0, c9, c13, 2"
: "=r"(__entry->ctr1));
- /* Reset */
- asm volatile("mcr p15, 0, %0, c9, c13, 2" : : "r"(0));
- /* Enable */
- asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r"(C1));
/* ctr 2 */
- /* Disable */
- asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r"(C2));
/* Select */
asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r"(2));
/* Read value */
asm volatile("mrc p15, 0, %0, c9, c13, 2"
: "=r"(__entry->ctr2));
- /* Reset */
- asm volatile("mcr p15, 0, %0, c9, c13, 2" : : "r"(0));
- /* Enable */
- asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r"(C2));
/* ctr 3 */
- /* Disable */
- asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r"(C3));
/* Select */
asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r"(3));
/* Read value */
asm volatile("mrc p15, 0, %0, c9, c13, 2"
: "=r"(__entry->ctr3));
- /* Reset */
- asm volatile("mcr p15, 0, %0, c9, c13, 2" : : "r"(0));
+
+ /* Read PMCR */
+ asm volatile("mrc p15, 0, %0, c9, c12, 0"
+ : "=r"(val));
+ /* Reset all */
+ asm volatile("mcr p15, 0, %0, c9, c12, 0"
+ : : "r"(val | RESET_ALL));
+ /* Enable All*/
+ asm volatile("mcr p15, 0, %0, c9, c12, 1"
+ : : "r"(C_ALL));
+
+ /* L2 counters */
+ /* Assign L2 counters to cores sequentially starting
+ from zero. A core could have multiple L2 counters
+ allocated if # L2 counters is more than the # cores */
+
+ idx = cpu;
+ /* Disable */
+ set_l2_indirect_reg(L2PMCNTENCLR, 1 << idx);
+ /* L2PMEVCNTR values go from 0x421, 0x431..
+ So we multiply idx by 16 to get the counter reg
+ value */
+ counter_reg = (idx * 16) + IA_L2PMXEVCNTR_BASE;
+ val = get_l2_indirect_reg(counter_reg);
+ __entry->lctr0 = val;
+ set_l2_indirect_reg(counter_reg, 0);
/* Enable */
- asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r"(C3));
+ set_l2_indirect_reg(L2PMCNTENSET, 1 << idx);
+
+ idx = num_cores + cpu;
+ if (idx < num_l2ctrs) {
+ /* Disable */
+ set_l2_indirect_reg(L2PMCNTENCLR, 1 << idx);
+ counter_reg = (idx * 16) + IA_L2PMXEVCNTR_BASE;
+ val = get_l2_indirect_reg(counter_reg);
+ __entry->lctr1 = val;
+ set_l2_indirect_reg(counter_reg, 0);
+ /* Enable */
+ set_l2_indirect_reg(L2PMCNTENSET, 1 << idx);
+ }
),
TP_printk("prev_pid=%d, next_pid=%d, CCNTR: %u, CTR0: %u," \
- " CTR1: %u, CTR2: %u, CTR3: %u",
+ " CTR1: %u, CTR2: %u, CTR3: %u," \
+ " L2CTR0,: %u, L2CTR1: %u",
__entry->old_pid, __entry->new_pid,
__entry->cctr, __entry->ctr0, __entry->ctr1,
- __entry->ctr2, __entry->ctr3)
+ __entry->ctr2, __entry->ctr3,
+ __entry->lctr0, __entry->lctr1)
);
#endif