Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | /* |
| 2 | * arch/sh/oprofile/op_model_sh7750.c |
| 3 | * |
| 4 | * OProfile support for SH7750/SH7750S Performance Counters |
| 5 | * |
| 6 | * Copyright (C) 2003, 2004 Paul Mundt |
| 7 | * |
| 8 | * This file is subject to the terms and conditions of the GNU General Public |
| 9 | * License. See the file "COPYING" in the main directory of this archive |
| 10 | * for more details. |
| 11 | */ |
| 12 | #include <linux/kernel.h> |
| 13 | #include <linux/oprofile.h> |
| 14 | #include <linux/profile.h> |
| 15 | #include <linux/init.h> |
| 16 | #include <linux/errno.h> |
| 17 | #include <linux/interrupt.h> |
| 18 | #include <linux/fs.h> |
| 19 | #include <linux/notifier.h> |
| 20 | #include <asm/uaccess.h> |
| 21 | #include <asm/io.h> |
| 22 | |
| 23 | #define PM_CR_BASE 0xff000084 /* 16-bit */ |
| 24 | #define PM_CTR_BASE 0xff100004 /* 32-bit */ |
| 25 | |
| 26 | #define PMCR1 (PM_CR_BASE + 0x00) |
| 27 | #define PMCR2 (PM_CR_BASE + 0x04) |
| 28 | #define PMCTR1H (PM_CTR_BASE + 0x00) |
| 29 | #define PMCTR1L (PM_CTR_BASE + 0x04) |
| 30 | #define PMCTR2H (PM_CTR_BASE + 0x08) |
| 31 | #define PMCTR2L (PM_CTR_BASE + 0x0c) |
| 32 | |
| 33 | #define PMCR_PMM_MASK 0x0000003f |
| 34 | |
| 35 | #define PMCR_CLKF 0x00000100 |
| 36 | #define PMCR_PMCLR 0x00002000 |
| 37 | #define PMCR_PMST 0x00004000 |
| 38 | #define PMCR_PMEN 0x00008000 |
| 39 | |
| 40 | #define PMCR_ENABLE (PMCR_PMST | PMCR_PMEN) |
| 41 | |
| 42 | /* |
| 43 | * SH7750/SH7750S have 2 perf counters |
| 44 | */ |
| 45 | #define NR_CNTRS 2 |
| 46 | |
| 47 | extern const char *get_cpu_subtype(void); |
| 48 | |
| 49 | struct op_counter_config { |
| 50 | unsigned long enabled; |
| 51 | unsigned long event; |
| 52 | unsigned long count; |
| 53 | |
| 54 | /* Dummy values for userspace tool compliance */ |
| 55 | unsigned long kernel; |
| 56 | unsigned long user; |
| 57 | unsigned long unit_mask; |
| 58 | }; |
| 59 | |
| 60 | static struct op_counter_config ctr[NR_CNTRS]; |
| 61 | |
| 62 | /* |
| 63 | * There are a number of events supported by each counter (33 in total). |
| 64 | * Since we have 2 counters, each counter will take the event code as it |
| 65 | * corresponds to the PMCR PMM setting. Each counter can be configured |
| 66 | * independently. |
| 67 | * |
| 68 | * Event Code Description |
| 69 | * ---------- ----------- |
| 70 | * |
| 71 | * 0x01 Operand read access |
| 72 | * 0x02 Operand write access |
| 73 | * 0x03 UTLB miss |
| 74 | * 0x04 Operand cache read miss |
| 75 | * 0x05 Operand cache write miss |
| 76 | * 0x06 Instruction fetch (w/ cache) |
| 77 | * 0x07 Instruction TLB miss |
| 78 | * 0x08 Instruction cache miss |
| 79 | * 0x09 All operand accesses |
| 80 | * 0x0a All instruction accesses |
| 81 | * 0x0b OC RAM operand access |
| 82 | * 0x0d On-chip I/O space access |
| 83 | * 0x0e Operand access (r/w) |
| 84 | * 0x0f Operand cache miss (r/w) |
| 85 | * 0x10 Branch instruction |
| 86 | * 0x11 Branch taken |
| 87 | * 0x12 BSR/BSRF/JSR |
| 88 | * 0x13 Instruction execution |
| 89 | * 0x14 Instruction execution in parallel |
| 90 | * 0x15 FPU Instruction execution |
| 91 | * 0x16 Interrupt |
| 92 | * 0x17 NMI |
| 93 | * 0x18 trapa instruction execution |
| 94 | * 0x19 UBCA match |
| 95 | * 0x1a UBCB match |
| 96 | * 0x21 Instruction cache fill |
| 97 | * 0x22 Operand cache fill |
| 98 | * 0x23 Elapsed time |
| 99 | * 0x24 Pipeline freeze by I-cache miss |
| 100 | * 0x25 Pipeline freeze by D-cache miss |
| 101 | * 0x27 Pipeline freeze by branch instruction |
| 102 | * 0x28 Pipeline freeze by CPU register |
| 103 | * 0x29 Pipeline freeze by FPU |
| 104 | * |
| 105 | * Unfortunately we don't have a native exception or interrupt for counter |
| 106 | * overflow (although since these counters can run for 16.3 days without |
| 107 | * overflowing, it's not really necessary). |
| 108 | * |
| 109 | * OProfile on the other hand likes to have samples taken periodically, so |
| 110 | * for now we just piggyback the timer interrupt to get the expected |
| 111 | * behavior. |
| 112 | */ |
| 113 | |
| 114 | static int sh7750_timer_notify(struct notifier_block *self, |
| 115 | unsigned long val, void *regs) |
| 116 | { |
| 117 | oprofile_add_sample((struct pt_regs *)regs, 0); |
| 118 | return 0; |
| 119 | } |
| 120 | |
| 121 | static struct notifier_block sh7750_timer_notifier = { |
| 122 | .notifier_call = sh7750_timer_notify, |
| 123 | }; |
| 124 | |
| 125 | static u64 sh7750_read_counter(int counter) |
| 126 | { |
| 127 | u32 hi, lo; |
| 128 | |
| 129 | hi = (counter == 0) ? ctrl_inl(PMCTR1H) : ctrl_inl(PMCTR2H); |
| 130 | lo = (counter == 0) ? ctrl_inl(PMCTR1L) : ctrl_inl(PMCTR2L); |
| 131 | |
| 132 | return (u64)((u64)(hi & 0xffff) << 32) | lo; |
| 133 | } |
| 134 | |
| 135 | /* |
| 136 | * Files will be in a path like: |
| 137 | * |
| 138 | * /<oprofilefs mount point>/<counter number>/<file> |
| 139 | * |
| 140 | * So when dealing with <file>, we look to the parent dentry for the counter |
| 141 | * number. |
| 142 | */ |
| 143 | static inline int to_counter(struct file *file) |
| 144 | { |
Josef Sipek | c943c4b | 2006-12-08 02:37:38 -0800 | [diff] [blame] | 145 | const unsigned char *name = file->f_path.dentry->d_parent->d_name.name; |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 146 | |
| 147 | return (int)simple_strtol(name, NULL, 10); |
| 148 | } |
| 149 | |
| 150 | /* |
| 151 | * XXX: We have 48-bit counters, so we're probably going to want something |
| 152 | * more along the lines of oprofilefs_ullong_to_user().. Truncating to |
| 153 | * unsigned long works fine for now though, as long as we don't attempt to |
| 154 | * profile for too horribly long. |
| 155 | */ |
| 156 | static ssize_t sh7750_read_count(struct file *file, char __user *buf, |
| 157 | size_t count, loff_t *ppos) |
| 158 | { |
| 159 | int counter = to_counter(file); |
| 160 | u64 val = sh7750_read_counter(counter); |
| 161 | |
| 162 | return oprofilefs_ulong_to_user((unsigned long)val, buf, count, ppos); |
| 163 | } |
| 164 | |
| 165 | static ssize_t sh7750_write_count(struct file *file, const char __user *buf, |
| 166 | size_t count, loff_t *ppos) |
| 167 | { |
| 168 | int counter = to_counter(file); |
| 169 | unsigned long val; |
| 170 | |
| 171 | if (oprofilefs_ulong_from_user(&val, buf, count)) |
| 172 | return -EFAULT; |
| 173 | |
| 174 | /* |
| 175 | * Any write will clear the counter, although only 0 should be |
| 176 | * written for this purpose, as we do not support setting the |
| 177 | * counter to an arbitrary value. |
| 178 | */ |
| 179 | WARN_ON(val != 0); |
| 180 | |
| 181 | if (counter == 0) { |
| 182 | ctrl_outw(ctrl_inw(PMCR1) | PMCR_PMCLR, PMCR1); |
| 183 | } else { |
| 184 | ctrl_outw(ctrl_inw(PMCR2) | PMCR_PMCLR, PMCR2); |
| 185 | } |
| 186 | |
| 187 | return count; |
| 188 | } |
| 189 | |
Arjan van de Ven | 5dfe4c9 | 2007-02-12 00:55:31 -0800 | [diff] [blame] | 190 | static const struct file_operations count_fops = { |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 191 | .read = sh7750_read_count, |
| 192 | .write = sh7750_write_count, |
| 193 | }; |
| 194 | |
| 195 | static int sh7750_perf_counter_create_files(struct super_block *sb, struct dentry *root) |
| 196 | { |
| 197 | int i; |
| 198 | |
| 199 | for (i = 0; i < NR_CNTRS; i++) { |
| 200 | struct dentry *dir; |
Markus Armbruster | 0c6856f | 2006-06-26 00:24:34 -0700 | [diff] [blame] | 201 | char buf[4]; |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 202 | |
| 203 | snprintf(buf, sizeof(buf), "%d", i); |
| 204 | dir = oprofilefs_mkdir(sb, root, buf); |
| 205 | |
| 206 | oprofilefs_create_ulong(sb, dir, "enabled", &ctr[i].enabled); |
| 207 | oprofilefs_create_ulong(sb, dir, "event", &ctr[i].event); |
| 208 | oprofilefs_create_file(sb, dir, "count", &count_fops); |
| 209 | |
| 210 | /* Dummy entries */ |
| 211 | oprofilefs_create_ulong(sb, dir, "kernel", &ctr[i].kernel); |
| 212 | oprofilefs_create_ulong(sb, dir, "user", &ctr[i].user); |
| 213 | oprofilefs_create_ulong(sb, dir, "unit_mask", &ctr[i].unit_mask); |
| 214 | } |
| 215 | |
| 216 | return 0; |
| 217 | } |
| 218 | |
| 219 | static int sh7750_perf_counter_start(void) |
| 220 | { |
| 221 | u16 pmcr; |
| 222 | |
| 223 | /* Enable counter 1 */ |
| 224 | if (ctr[0].enabled) { |
| 225 | pmcr = ctrl_inw(PMCR1); |
| 226 | WARN_ON(pmcr & PMCR_PMEN); |
| 227 | |
| 228 | pmcr &= ~PMCR_PMM_MASK; |
| 229 | pmcr |= ctr[0].event; |
| 230 | ctrl_outw(pmcr | PMCR_ENABLE, PMCR1); |
| 231 | } |
| 232 | |
| 233 | /* Enable counter 2 */ |
| 234 | if (ctr[1].enabled) { |
| 235 | pmcr = ctrl_inw(PMCR2); |
| 236 | WARN_ON(pmcr & PMCR_PMEN); |
| 237 | |
| 238 | pmcr &= ~PMCR_PMM_MASK; |
| 239 | pmcr |= ctr[1].event; |
| 240 | ctrl_outw(pmcr | PMCR_ENABLE, PMCR2); |
| 241 | } |
| 242 | |
| 243 | return register_profile_notifier(&sh7750_timer_notifier); |
| 244 | } |
| 245 | |
| 246 | static void sh7750_perf_counter_stop(void) |
| 247 | { |
| 248 | ctrl_outw(ctrl_inw(PMCR1) & ~PMCR_PMEN, PMCR1); |
| 249 | ctrl_outw(ctrl_inw(PMCR2) & ~PMCR_PMEN, PMCR2); |
| 250 | |
| 251 | unregister_profile_notifier(&sh7750_timer_notifier); |
| 252 | } |
| 253 | |
| 254 | static struct oprofile_operations sh7750_perf_counter_ops = { |
| 255 | .create_files = sh7750_perf_counter_create_files, |
| 256 | .start = sh7750_perf_counter_start, |
| 257 | .stop = sh7750_perf_counter_stop, |
| 258 | }; |
| 259 | |
| 260 | int __init oprofile_arch_init(struct oprofile_operations **ops) |
| 261 | { |
Paul Mundt | 11c1965 | 2006-12-25 10:19:56 +0900 | [diff] [blame] | 262 | if (!(current_cpu_data.flags & CPU_HAS_PERF_COUNTER)) |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 263 | return -ENODEV; |
| 264 | |
| 265 | sh7750_perf_counter_ops.cpu_type = (char *)get_cpu_subtype(); |
| 266 | *ops = &sh7750_perf_counter_ops; |
| 267 | |
| 268 | printk(KERN_INFO "oprofile: using SH-4 (%s) performance monitoring.\n", |
| 269 | sh7750_perf_counter_ops.cpu_type); |
| 270 | |
| 271 | /* Clear the counters */ |
| 272 | ctrl_outw(ctrl_inw(PMCR1) | PMCR_PMCLR, PMCR1); |
| 273 | ctrl_outw(ctrl_inw(PMCR2) | PMCR_PMCLR, PMCR2); |
| 274 | |
| 275 | return 0; |
| 276 | } |
| 277 | |
| 278 | void oprofile_arch_exit(void) |
| 279 | { |
| 280 | } |
| 281 | |