Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | /** |
| 2 | * @file arch/alpha/oprofile/op_model_ev67.c |
| 3 | * |
| 4 | * @remark Copyright 2002 OProfile authors |
| 5 | * @remark Read the file COPYING |
| 6 | * |
| 7 | * @author Richard Henderson <rth@twiddle.net> |
| 8 | * @author Falk Hueffner <falk@debian.org> |
| 9 | */ |
| 10 | |
| 11 | #include <linux/oprofile.h> |
| 12 | #include <linux/init.h> |
| 13 | #include <linux/smp.h> |
| 14 | #include <asm/ptrace.h> |
| 15 | #include <asm/system.h> |
| 16 | |
| 17 | #include "op_impl.h" |
| 18 | |
| 19 | |
| 20 | /* Compute all of the registers in preparation for enabling profiling. */ |
| 21 | |
| 22 | static void |
| 23 | ev67_reg_setup(struct op_register_config *reg, |
| 24 | struct op_counter_config *ctr, |
| 25 | struct op_system_config *sys) |
| 26 | { |
| 27 | unsigned long ctl, reset, need_reset, i; |
| 28 | |
| 29 | /* Select desired events. */ |
| 30 | ctl = 1UL << 4; /* Enable ProfileMe mode. */ |
| 31 | |
| 32 | /* The event numbers are chosen so we can use them directly if |
| 33 | PCTR1 is enabled. */ |
| 34 | if (ctr[1].enabled) { |
| 35 | ctl |= (ctr[1].event & 3) << 2; |
| 36 | } else { |
| 37 | if (ctr[0].event == 0) /* cycles */ |
| 38 | ctl |= 1UL << 2; |
| 39 | } |
| 40 | reg->mux_select = ctl; |
| 41 | |
| 42 | /* Select logging options. */ |
| 43 | /* ??? Need to come up with some mechanism to trace only |
| 44 | selected processes. EV67 does not have a mechanism to |
| 45 | select kernel or user mode only. For now, enable always. */ |
| 46 | reg->proc_mode = 0; |
| 47 | |
| 48 | /* EV67 cannot change the width of the counters as with the |
| 49 | other implementations. But fortunately, we can write to |
| 50 | the counters and set the value such that it will overflow |
| 51 | at the right time. */ |
| 52 | reset = need_reset = 0; |
| 53 | for (i = 0; i < 2; ++i) { |
| 54 | unsigned long count = ctr[i].count; |
| 55 | if (!ctr[i].enabled) |
| 56 | continue; |
| 57 | |
| 58 | if (count > 0x100000) |
| 59 | count = 0x100000; |
| 60 | ctr[i].count = count; |
| 61 | reset |= (0x100000 - count) << (i ? 6 : 28); |
| 62 | if (count != 0x100000) |
| 63 | need_reset |= 1 << i; |
| 64 | } |
| 65 | reg->reset_values = reset; |
| 66 | reg->need_reset = need_reset; |
| 67 | } |
| 68 | |
| 69 | /* Program all of the registers in preparation for enabling profiling. */ |
| 70 | |
| 71 | static void |
| 72 | ev67_cpu_setup (void *x) |
| 73 | { |
| 74 | struct op_register_config *reg = x; |
| 75 | |
| 76 | wrperfmon(2, reg->mux_select); |
| 77 | wrperfmon(3, reg->proc_mode); |
| 78 | wrperfmon(6, reg->reset_values | 3); |
| 79 | } |
| 80 | |
| 81 | /* CTR is a counter for which the user has requested an interrupt count |
| 82 | in between one of the widths selectable in hardware. Reset the count |
| 83 | for CTR to the value stored in REG->RESET_VALUES. */ |
| 84 | |
| 85 | static void |
| 86 | ev67_reset_ctr(struct op_register_config *reg, unsigned long ctr) |
| 87 | { |
| 88 | wrperfmon(6, reg->reset_values | (1 << ctr)); |
| 89 | } |
| 90 | |
| 91 | /* ProfileMe conditions which will show up as counters. We can also |
| 92 | detect the following, but it seems unlikely that anybody is |
| 93 | interested in counting them: |
| 94 | * Reset |
| 95 | * MT_FPCR (write to floating point control register) |
| 96 | * Arithmetic trap |
| 97 | * Dstream Fault |
| 98 | * Machine Check (ECC fault, etc.) |
| 99 | * OPCDEC (illegal opcode) |
| 100 | * Floating point disabled |
| 101 | * Differentiate between DTB single/double misses and 3 or 4 level |
| 102 | page tables |
| 103 | * Istream access violation |
| 104 | * Interrupt |
| 105 | * Icache Parity Error. |
| 106 | * Instruction killed (nop, trapb) |
| 107 | |
| 108 | Unfortunately, there seems to be no way to detect Dcache and Bcache |
| 109 | misses; the latter could be approximated by making the counter |
| 110 | count Bcache misses, but that is not precise. |
| 111 | |
| 112 | We model this as 20 counters: |
| 113 | * PCTR0 |
| 114 | * PCTR1 |
| 115 | * 9 ProfileMe events, induced by PCTR0 |
| 116 | * 9 ProfileMe events, induced by PCTR1 |
| 117 | */ |
| 118 | |
| 119 | enum profileme_counters { |
| 120 | PM_STALLED, /* Stalled for at least one cycle |
| 121 | between the fetch and map stages */ |
| 122 | PM_TAKEN, /* Conditional branch taken */ |
| 123 | PM_MISPREDICT, /* Branch caused mispredict trap */ |
| 124 | PM_ITB_MISS, /* ITB miss */ |
| 125 | PM_DTB_MISS, /* DTB miss */ |
| 126 | PM_REPLAY, /* Replay trap */ |
| 127 | PM_LOAD_STORE, /* Load-store order trap */ |
| 128 | PM_ICACHE_MISS, /* Icache miss */ |
| 129 | PM_UNALIGNED, /* Unaligned Load/Store */ |
| 130 | PM_NUM_COUNTERS |
| 131 | }; |
| 132 | |
| 133 | static inline void |
| 134 | op_add_pm(unsigned long pc, int kern, unsigned long counter, |
| 135 | struct op_counter_config *ctr, unsigned long event) |
| 136 | { |
| 137 | unsigned long fake_counter = 2 + event; |
| 138 | if (counter == 1) |
| 139 | fake_counter += PM_NUM_COUNTERS; |
| 140 | if (ctr[fake_counter].enabled) |
| 141 | oprofile_add_pc(pc, kern, fake_counter); |
| 142 | } |
| 143 | |
| 144 | static void |
| 145 | ev67_handle_interrupt(unsigned long which, struct pt_regs *regs, |
| 146 | struct op_counter_config *ctr) |
| 147 | { |
| 148 | unsigned long pmpc, pctr_ctl; |
| 149 | int kern = !user_mode(regs); |
| 150 | int mispredict = 0; |
| 151 | union { |
| 152 | unsigned long v; |
| 153 | struct { |
| 154 | unsigned reserved: 30; /* 0-29 */ |
| 155 | unsigned overcount: 3; /* 30-32 */ |
| 156 | unsigned icache_miss: 1; /* 33 */ |
| 157 | unsigned trap_type: 4; /* 34-37 */ |
| 158 | unsigned load_store: 1; /* 38 */ |
| 159 | unsigned trap: 1; /* 39 */ |
| 160 | unsigned mispredict: 1; /* 40 */ |
| 161 | } fields; |
| 162 | } i_stat; |
| 163 | |
| 164 | enum trap_types { |
| 165 | TRAP_REPLAY, |
| 166 | TRAP_INVALID0, |
| 167 | TRAP_DTB_DOUBLE_MISS_3, |
| 168 | TRAP_DTB_DOUBLE_MISS_4, |
| 169 | TRAP_FP_DISABLED, |
| 170 | TRAP_UNALIGNED, |
| 171 | TRAP_DTB_SINGLE_MISS, |
| 172 | TRAP_DSTREAM_FAULT, |
| 173 | TRAP_OPCDEC, |
| 174 | TRAP_INVALID1, |
| 175 | TRAP_MACHINE_CHECK, |
| 176 | TRAP_INVALID2, |
| 177 | TRAP_ARITHMETIC, |
| 178 | TRAP_INVALID3, |
| 179 | TRAP_MT_FPCR, |
| 180 | TRAP_RESET |
| 181 | }; |
| 182 | |
| 183 | pmpc = wrperfmon(9, 0); |
| 184 | /* ??? Don't know how to handle physical-mode PALcode address. */ |
| 185 | if (pmpc & 1) |
| 186 | return; |
| 187 | pmpc &= ~2; /* clear reserved bit */ |
| 188 | |
| 189 | i_stat.v = wrperfmon(8, 0); |
| 190 | if (i_stat.fields.trap) { |
| 191 | switch (i_stat.fields.trap_type) { |
| 192 | case TRAP_INVALID1: |
| 193 | case TRAP_INVALID2: |
| 194 | case TRAP_INVALID3: |
| 195 | /* Pipeline redirection ocurred. PMPC points |
| 196 | to PALcode. Recognize ITB miss by PALcode |
| 197 | offset address, and get actual PC from |
| 198 | EXC_ADDR. */ |
| 199 | oprofile_add_pc(regs->pc, kern, which); |
| 200 | if ((pmpc & ((1 << 15) - 1)) == 581) |
| 201 | op_add_pm(regs->pc, kern, which, |
| 202 | ctr, PM_ITB_MISS); |
| 203 | /* Most other bit and counter values will be |
| 204 | those for the first instruction in the |
| 205 | fault handler, so we're done. */ |
| 206 | return; |
| 207 | case TRAP_REPLAY: |
| 208 | op_add_pm(pmpc, kern, which, ctr, |
| 209 | (i_stat.fields.load_store |
| 210 | ? PM_LOAD_STORE : PM_REPLAY)); |
| 211 | break; |
| 212 | case TRAP_DTB_DOUBLE_MISS_3: |
| 213 | case TRAP_DTB_DOUBLE_MISS_4: |
| 214 | case TRAP_DTB_SINGLE_MISS: |
| 215 | op_add_pm(pmpc, kern, which, ctr, PM_DTB_MISS); |
| 216 | break; |
| 217 | case TRAP_UNALIGNED: |
| 218 | op_add_pm(pmpc, kern, which, ctr, PM_UNALIGNED); |
| 219 | break; |
| 220 | case TRAP_INVALID0: |
| 221 | case TRAP_FP_DISABLED: |
| 222 | case TRAP_DSTREAM_FAULT: |
| 223 | case TRAP_OPCDEC: |
| 224 | case TRAP_MACHINE_CHECK: |
| 225 | case TRAP_ARITHMETIC: |
| 226 | case TRAP_MT_FPCR: |
| 227 | case TRAP_RESET: |
| 228 | break; |
| 229 | } |
| 230 | |
| 231 | /* ??? JSR/JMP/RET/COR or HW_JSR/HW_JMP/HW_RET/HW_COR |
| 232 | mispredicts do not set this bit but can be |
| 233 | recognized by the presence of one of these |
| 234 | instructions at the PMPC location with bit 39 |
| 235 | set. */ |
| 236 | if (i_stat.fields.mispredict) { |
| 237 | mispredict = 1; |
| 238 | op_add_pm(pmpc, kern, which, ctr, PM_MISPREDICT); |
| 239 | } |
| 240 | } |
| 241 | |
| 242 | oprofile_add_pc(pmpc, kern, which); |
| 243 | |
| 244 | pctr_ctl = wrperfmon(5, 0); |
| 245 | if (pctr_ctl & (1UL << 27)) |
| 246 | op_add_pm(pmpc, kern, which, ctr, PM_STALLED); |
| 247 | |
| 248 | /* Unfortunately, TAK is undefined on mispredicted branches. |
| 249 | ??? It is also undefined for non-cbranch insns, should |
| 250 | check that. */ |
| 251 | if (!mispredict && pctr_ctl & (1UL << 0)) |
| 252 | op_add_pm(pmpc, kern, which, ctr, PM_TAKEN); |
| 253 | } |
| 254 | |
| 255 | struct op_axp_model op_model_ev67 = { |
| 256 | .reg_setup = ev67_reg_setup, |
| 257 | .cpu_setup = ev67_cpu_setup, |
| 258 | .reset_ctr = ev67_reset_ctr, |
| 259 | .handle_interrupt = ev67_handle_interrupt, |
| 260 | .cpu_type = "alpha/ev67", |
| 261 | .num_counters = 20, |
| 262 | .can_set_proc_mode = 0, |
| 263 | }; |