Shrenuj Bansal | a419c79 | 2016-10-20 14:05:11 -0700 | [diff] [blame] | 1 | /* Copyright (c) 2012-2017, The Linux Foundation. All rights reserved. |
| 2 | * |
| 3 | * This program is free software; you can redistribute it and/or modify |
| 4 | * it under the terms of the GNU General Public License version 2 and |
| 5 | * only version 2 as published by the Free Software Foundation. |
| 6 | * |
| 7 | * This program is distributed in the hope that it will be useful, |
| 8 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 10 | * GNU General Public License for more details. |
| 11 | * |
| 12 | */ |
| 13 | |
| 14 | #include <linux/io.h> |
| 15 | #include "kgsl.h" |
| 16 | #include "adreno.h" |
| 17 | #include "kgsl_snapshot.h" |
| 18 | #include "a3xx_reg.h" |
| 19 | #include "adreno_snapshot.h" |
| 20 | #include "adreno_a3xx.h" |
| 21 | |
| 22 | /* |
| 23 | * Set of registers to dump for A3XX on snapshot. |
| 24 | * Registers in pairs - first value is the start offset, second |
| 25 | * is the stop offset (inclusive) |
| 26 | */ |
| 27 | |
| 28 | static const unsigned int a3xx_registers[] = { |
| 29 | 0x0000, 0x0002, 0x0010, 0x0012, 0x0018, 0x0018, 0x0020, 0x0027, |
| 30 | 0x0029, 0x002b, 0x002e, 0x0033, 0x0040, 0x0042, 0x0050, 0x005c, |
| 31 | 0x0060, 0x006c, 0x0080, 0x0082, 0x0084, 0x0088, 0x0090, 0x00e5, |
| 32 | 0x00ea, 0x00ed, 0x0100, 0x0100, 0x0110, 0x0123, 0x01c0, 0x01c1, |
| 33 | 0x01c3, 0x01c5, 0x01c7, 0x01c7, 0x01d5, 0x01d9, 0x01dc, 0x01dd, |
| 34 | 0x01ea, 0x01ea, 0x01ee, 0x01f1, 0x01f5, 0x01f6, 0x01f8, 0x01f9, |
| 35 | 0x01fc, 0x01ff, |
| 36 | 0x0440, 0x0440, 0x0443, 0x0443, 0x0445, 0x0445, 0x044d, 0x044f, |
| 37 | 0x0452, 0x0452, 0x0454, 0x046f, 0x047c, 0x047c, 0x047f, 0x047f, |
| 38 | 0x0578, 0x057f, 0x0600, 0x0602, 0x0605, 0x0607, 0x060a, 0x060e, |
| 39 | 0x0612, 0x0614, 0x0c01, 0x0c02, 0x0c06, 0x0c1d, 0x0c3d, 0x0c3f, |
| 40 | 0x0c48, 0x0c4b, 0x0c80, 0x0c80, 0x0c88, 0x0c8b, 0x0ca0, 0x0cb7, |
| 41 | 0x0cc0, 0x0cc1, 0x0cc6, 0x0cc7, 0x0ce4, 0x0ce5, |
| 42 | 0x0e41, 0x0e45, 0x0e64, 0x0e65, |
| 43 | 0x0e80, 0x0e82, 0x0e84, 0x0e89, 0x0ea0, 0x0ea1, 0x0ea4, 0x0ea7, |
| 44 | 0x0ec4, 0x0ecb, 0x0ee0, 0x0ee0, 0x0f00, 0x0f01, 0x0f03, 0x0f09, |
| 45 | 0x2040, 0x2040, 0x2044, 0x2044, 0x2048, 0x204d, 0x2068, 0x2069, |
| 46 | 0x206c, 0x206d, 0x2070, 0x2070, 0x2072, 0x2072, 0x2074, 0x2075, |
| 47 | 0x2079, 0x207a, 0x20c0, 0x20d3, 0x20e4, 0x20ef, 0x2100, 0x2109, |
| 48 | 0x210c, 0x210c, 0x210e, 0x210e, 0x2110, 0x2111, 0x2114, 0x2115, |
| 49 | 0x21e4, 0x21e4, 0x21ea, 0x21ea, 0x21ec, 0x21ed, 0x21f0, 0x21f0, |
| 50 | 0x2240, 0x227e, |
| 51 | 0x2280, 0x228b, 0x22c0, 0x22c0, 0x22c4, 0x22ce, 0x22d0, 0x22d8, |
| 52 | 0x22df, 0x22e6, 0x22e8, 0x22e9, 0x22ec, 0x22ec, 0x22f0, 0x22f7, |
| 53 | 0x22ff, 0x22ff, 0x2340, 0x2343, |
| 54 | 0x2440, 0x2440, 0x2444, 0x2444, 0x2448, 0x244d, |
| 55 | 0x2468, 0x2469, 0x246c, 0x246d, 0x2470, 0x2470, 0x2472, 0x2472, |
| 56 | 0x2474, 0x2475, 0x2479, 0x247a, 0x24c0, 0x24d3, 0x24e4, 0x24ef, |
| 57 | 0x2500, 0x2509, 0x250c, 0x250c, 0x250e, 0x250e, 0x2510, 0x2511, |
| 58 | 0x2514, 0x2515, 0x25e4, 0x25e4, 0x25ea, 0x25ea, 0x25ec, 0x25ed, |
| 59 | 0x25f0, 0x25f0, |
| 60 | 0x2640, 0x267e, 0x2680, 0x268b, 0x26c0, 0x26c0, 0x26c4, 0x26ce, |
| 61 | 0x26d0, 0x26d8, 0x26df, 0x26e6, 0x26e8, 0x26e9, 0x26ec, 0x26ec, |
| 62 | 0x26f0, 0x26f7, 0x26ff, 0x26ff, 0x2740, 0x2743, |
| 63 | 0x300C, 0x300E, 0x301C, 0x301D, |
| 64 | 0x302A, 0x302A, 0x302C, 0x302D, 0x3030, 0x3031, 0x3034, 0x3036, |
| 65 | 0x303C, 0x303C, 0x305E, 0x305F, |
| 66 | }; |
| 67 | |
| 68 | /* Removed the following HLSQ register ranges from being read during |
| 69 | * fault tolerance since reading the registers may cause the device to hang: |
| 70 | */ |
| 71 | static const unsigned int a3xx_hlsq_registers[] = { |
| 72 | 0x0e00, 0x0e05, 0x0e0c, 0x0e0c, 0x0e22, 0x0e23, |
| 73 | 0x2200, 0x2212, 0x2214, 0x2217, 0x221a, 0x221a, |
| 74 | 0x2600, 0x2612, 0x2614, 0x2617, 0x261a, 0x261a, |
| 75 | }; |
| 76 | |
| 77 | /* The set of additional registers to be dumped for A330 */ |
| 78 | |
| 79 | static const unsigned int a330_registers[] = { |
| 80 | 0x1d0, 0x1d0, 0x1d4, 0x1d4, 0x453, 0x453, |
| 81 | }; |
| 82 | |
| 83 | /* Shader memory size in words */ |
| 84 | #define SHADER_MEMORY_SIZE 0x4000 |
| 85 | |
| 86 | /** |
| 87 | * _rbbm_debug_bus_read - Helper function to read data from the RBBM |
| 88 | * debug bus. |
| 89 | * @device - GPU device to read/write registers |
| 90 | * @block_id - Debug bus block to read from |
| 91 | * @index - Index in the debug bus block to read |
| 92 | * @ret - Value of the register read |
| 93 | */ |
| 94 | static void _rbbm_debug_bus_read(struct kgsl_device *device, |
| 95 | unsigned int block_id, unsigned int index, unsigned int *val) |
| 96 | { |
| 97 | unsigned int block = (block_id << 8) | 1 << 16; |
| 98 | |
| 99 | kgsl_regwrite(device, A3XX_RBBM_DEBUG_BUS_CTL, block | index); |
| 100 | kgsl_regread(device, A3XX_RBBM_DEBUG_BUS_DATA_STATUS, val); |
| 101 | } |
| 102 | |
| 103 | /** |
| 104 | * a3xx_snapshot_shader_memory - Helper function to dump the GPU shader |
| 105 | * memory to the snapshot buffer. |
| 106 | * @device: GPU device whose shader memory is to be dumped |
| 107 | * @buf: Pointer to binary snapshot data blob being made |
| 108 | * @remain: Number of remaining bytes in the snapshot blob |
| 109 | * @priv: Unused parameter |
| 110 | * |
| 111 | */ |
| 112 | static size_t a3xx_snapshot_shader_memory(struct kgsl_device *device, |
| 113 | u8 *buf, size_t remain, void *priv) |
| 114 | { |
| 115 | struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *)buf; |
| 116 | unsigned int i; |
| 117 | unsigned int *data = (unsigned int *)(buf + sizeof(*header)); |
| 118 | unsigned int shader_read_len = SHADER_MEMORY_SIZE; |
| 119 | |
| 120 | if (shader_read_len > (device->shader_mem_len >> 2)) |
| 121 | shader_read_len = (device->shader_mem_len >> 2); |
| 122 | |
| 123 | if (remain < DEBUG_SECTION_SZ(shader_read_len)) { |
| 124 | SNAPSHOT_ERR_NOMEM(device, "SHADER MEMORY"); |
| 125 | return 0; |
| 126 | } |
| 127 | |
| 128 | header->type = SNAPSHOT_DEBUG_SHADER_MEMORY; |
| 129 | header->size = shader_read_len; |
| 130 | |
| 131 | /* Map shader memory to kernel, for dumping */ |
| 132 | if (device->shader_mem_virt == NULL) |
| 133 | device->shader_mem_virt = devm_ioremap(device->dev, |
| 134 | device->shader_mem_phys, |
| 135 | device->shader_mem_len); |
| 136 | |
| 137 | if (device->shader_mem_virt == NULL) { |
| 138 | KGSL_DRV_ERR(device, |
| 139 | "Unable to map shader memory region\n"); |
| 140 | return 0; |
| 141 | } |
| 142 | |
| 143 | /* Now, dump shader memory to snapshot */ |
| 144 | for (i = 0; i < shader_read_len; i++) |
| 145 | adreno_shadermem_regread(device, i, &data[i]); |
| 146 | |
| 147 | |
| 148 | return DEBUG_SECTION_SZ(shader_read_len); |
| 149 | } |
| 150 | |
| 151 | static size_t a3xx_snapshot_debugbus_block(struct kgsl_device *device, |
| 152 | u8 *buf, size_t remain, void *priv) |
| 153 | { |
| 154 | struct adreno_device *adreno_dev = ADRENO_DEVICE(device); |
| 155 | |
| 156 | struct kgsl_snapshot_debugbus *header |
| 157 | = (struct kgsl_snapshot_debugbus *)buf; |
| 158 | struct adreno_debugbus_block *block = priv; |
| 159 | int i; |
| 160 | unsigned int *data = (unsigned int *)(buf + sizeof(*header)); |
| 161 | unsigned int dwords; |
| 162 | size_t size; |
| 163 | |
| 164 | /* |
| 165 | * For A305 and A320 all debug bus regions are the same size (0x40). For |
| 166 | * A330, they can be different sizes - most are still 0x40, but some |
| 167 | * like CP are larger |
| 168 | */ |
| 169 | |
| 170 | dwords = (adreno_is_a330(adreno_dev) || |
| 171 | adreno_is_a305b(adreno_dev)) ? |
| 172 | block->dwords : 0x40; |
| 173 | |
| 174 | size = (dwords * sizeof(unsigned int)) + sizeof(*header); |
| 175 | |
| 176 | if (remain < size) { |
| 177 | SNAPSHOT_ERR_NOMEM(device, "DEBUGBUS"); |
| 178 | return 0; |
| 179 | } |
| 180 | |
| 181 | header->id = block->block_id; |
| 182 | header->count = dwords; |
| 183 | |
| 184 | for (i = 0; i < dwords; i++) |
| 185 | _rbbm_debug_bus_read(device, block->block_id, i, &data[i]); |
| 186 | |
| 187 | return size; |
| 188 | } |
| 189 | |
| 190 | static struct adreno_debugbus_block debugbus_blocks[] = { |
| 191 | { RBBM_BLOCK_ID_CP, 0x52, }, |
| 192 | { RBBM_BLOCK_ID_RBBM, 0x40, }, |
| 193 | { RBBM_BLOCK_ID_VBIF, 0x40, }, |
| 194 | { RBBM_BLOCK_ID_HLSQ, 0x40, }, |
| 195 | { RBBM_BLOCK_ID_UCHE, 0x40, }, |
| 196 | { RBBM_BLOCK_ID_PC, 0x40, }, |
| 197 | { RBBM_BLOCK_ID_VFD, 0x40, }, |
| 198 | { RBBM_BLOCK_ID_VPC, 0x40, }, |
| 199 | { RBBM_BLOCK_ID_TSE, 0x40, }, |
| 200 | { RBBM_BLOCK_ID_RAS, 0x40, }, |
| 201 | { RBBM_BLOCK_ID_VSC, 0x40, }, |
| 202 | { RBBM_BLOCK_ID_SP_0, 0x40, }, |
| 203 | { RBBM_BLOCK_ID_SP_1, 0x40, }, |
| 204 | { RBBM_BLOCK_ID_SP_2, 0x40, }, |
| 205 | { RBBM_BLOCK_ID_SP_3, 0x40, }, |
| 206 | { RBBM_BLOCK_ID_TPL1_0, 0x40, }, |
| 207 | { RBBM_BLOCK_ID_TPL1_1, 0x40, }, |
| 208 | { RBBM_BLOCK_ID_TPL1_2, 0x40, }, |
| 209 | { RBBM_BLOCK_ID_TPL1_3, 0x40, }, |
| 210 | { RBBM_BLOCK_ID_RB_0, 0x40, }, |
| 211 | { RBBM_BLOCK_ID_RB_1, 0x40, }, |
| 212 | { RBBM_BLOCK_ID_RB_2, 0x40, }, |
| 213 | { RBBM_BLOCK_ID_RB_3, 0x40, }, |
| 214 | { RBBM_BLOCK_ID_MARB_0, 0x40, }, |
| 215 | { RBBM_BLOCK_ID_MARB_1, 0x40, }, |
| 216 | { RBBM_BLOCK_ID_MARB_2, 0x40, }, |
| 217 | { RBBM_BLOCK_ID_MARB_3, 0x40, }, |
| 218 | }; |
| 219 | |
| 220 | static void a3xx_snapshot_debugbus(struct kgsl_device *device, |
| 221 | struct kgsl_snapshot *snapshot) |
| 222 | { |
| 223 | int i; |
| 224 | |
| 225 | for (i = 0; i < ARRAY_SIZE(debugbus_blocks); i++) { |
| 226 | kgsl_snapshot_add_section(device, |
| 227 | KGSL_SNAPSHOT_SECTION_DEBUGBUS, snapshot, |
| 228 | a3xx_snapshot_debugbus_block, |
| 229 | (void *) &debugbus_blocks[i]); |
| 230 | } |
| 231 | } |
| 232 | |
| 233 | static void _snapshot_hlsq_regs(struct kgsl_device *device, |
| 234 | struct kgsl_snapshot *snapshot) |
| 235 | { |
| 236 | struct adreno_device *adreno_dev = ADRENO_DEVICE(device); |
| 237 | |
| 238 | /* |
| 239 | * Trying to read HLSQ registers when the HLSQ block is busy |
| 240 | * will cause the device to hang. The RBBM_DEBUG_BUS has information |
| 241 | * that will tell us if the HLSQ block is busy or not. Read values |
| 242 | * from the debug bus to ensure the HLSQ block is not busy (this |
| 243 | * is hardware dependent). If the HLSQ block is busy do not |
| 244 | * dump the registers, otherwise dump the HLSQ registers. |
| 245 | */ |
| 246 | |
| 247 | if (adreno_is_a330(adreno_dev)) { |
| 248 | /* |
| 249 | * stall_ctxt_full status bit: RBBM_BLOCK_ID_HLSQ index 49 [27] |
| 250 | * |
| 251 | * if (!stall_context_full) |
| 252 | * then dump HLSQ registers |
| 253 | */ |
| 254 | unsigned int stall_context_full = 0; |
| 255 | |
| 256 | _rbbm_debug_bus_read(device, RBBM_BLOCK_ID_HLSQ, 49, |
| 257 | &stall_context_full); |
| 258 | stall_context_full &= 0x08000000; |
| 259 | |
| 260 | if (stall_context_full) |
| 261 | return; |
| 262 | } else { |
| 263 | /* |
| 264 | * tpif status bits: RBBM_BLOCK_ID_HLSQ index 4 [4:0] |
| 265 | * spif status bits: RBBM_BLOCK_ID_HLSQ index 7 [5:0] |
| 266 | * |
| 267 | * if ((tpif == 0, 1, 28) && (spif == 0, 1, 10)) |
| 268 | * then dump HLSQ registers |
| 269 | */ |
| 270 | unsigned int next_pif = 0; |
| 271 | |
| 272 | /* check tpif */ |
| 273 | _rbbm_debug_bus_read(device, RBBM_BLOCK_ID_HLSQ, 4, &next_pif); |
| 274 | next_pif &= 0x1f; |
| 275 | if (next_pif != 0 && next_pif != 1 && next_pif != 28) |
| 276 | return; |
| 277 | |
| 278 | /* check spif */ |
| 279 | _rbbm_debug_bus_read(device, RBBM_BLOCK_ID_HLSQ, 7, &next_pif); |
| 280 | next_pif &= 0x3f; |
| 281 | if (next_pif != 0 && next_pif != 1 && next_pif != 10) |
| 282 | return; |
| 283 | } |
| 284 | |
| 285 | SNAPSHOT_REGISTERS(device, snapshot, a3xx_hlsq_registers); |
| 286 | } |
| 287 | |
| 288 | /* |
| 289 | * a3xx_snapshot() - A3XX GPU snapshot function |
| 290 | * @adreno_dev: Device being snapshotted |
| 291 | * @snapshot: Snapshot meta data |
| 292 | * @remain: Amount of space left in snapshot memory |
| 293 | * |
| 294 | * This is where all of the A3XX specific bits and pieces are grabbed |
| 295 | * into the snapshot memory |
| 296 | */ |
| 297 | void a3xx_snapshot(struct adreno_device *adreno_dev, |
| 298 | struct kgsl_snapshot *snapshot) |
| 299 | { |
| 300 | struct kgsl_device *device = KGSL_DEVICE(adreno_dev); |
| 301 | struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); |
| 302 | struct adreno_snapshot_data *snap_data = gpudev->snapshot_data; |
| 303 | unsigned int reg; |
| 304 | |
| 305 | /* Disable Clock gating temporarily for the debug bus to work */ |
| 306 | adreno_writereg(adreno_dev, ADRENO_REG_RBBM_CLOCK_CTL, 0x00); |
| 307 | |
| 308 | SNAPSHOT_REGISTERS(device, snapshot, a3xx_registers); |
| 309 | |
| 310 | _snapshot_hlsq_regs(device, snapshot); |
| 311 | |
| 312 | if (adreno_is_a330(adreno_dev) || adreno_is_a305b(adreno_dev)) |
| 313 | SNAPSHOT_REGISTERS(device, snapshot, a330_registers); |
| 314 | |
| 315 | kgsl_snapshot_indexed_registers(device, snapshot, |
| 316 | A3XX_CP_STATE_DEBUG_INDEX, A3XX_CP_STATE_DEBUG_DATA, |
| 317 | 0x0, snap_data->sect_sizes->cp_pfp); |
| 318 | |
| 319 | /* CP_ME indexed registers */ |
| 320 | kgsl_snapshot_indexed_registers(device, snapshot, |
| 321 | A3XX_CP_ME_CNTL, A3XX_CP_ME_STATUS, 64, 44); |
| 322 | |
| 323 | /* VPC memory */ |
| 324 | kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG, |
| 325 | snapshot, adreno_snapshot_vpc_memory, |
| 326 | &snap_data->sect_sizes->vpc_mem); |
| 327 | |
| 328 | /* CP MEQ */ |
| 329 | kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG, snapshot, |
| 330 | adreno_snapshot_cp_meq, &snap_data->sect_sizes->cp_meq); |
| 331 | |
| 332 | /* Shader working/shadow memory */ |
| 333 | kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG, |
| 334 | snapshot, a3xx_snapshot_shader_memory, |
| 335 | &snap_data->sect_sizes->shader_mem); |
| 336 | |
| 337 | |
| 338 | /* CP PFP and PM4 */ |
| 339 | |
| 340 | /* |
| 341 | * Reading the microcode while the CP is running will |
| 342 | * basically move the CP instruction pointer to |
| 343 | * whatever address we read. Big badaboom ensues. Stop the CP |
| 344 | * (if it isn't already stopped) to ensure that we are safe. |
| 345 | * We do this here and not earlier to avoid corrupting the RBBM |
| 346 | * status and CP registers - by the time we get here we don't |
| 347 | * care about the contents of the CP anymore. |
| 348 | */ |
| 349 | |
| 350 | adreno_readreg(adreno_dev, ADRENO_REG_CP_ME_CNTL, ®); |
| 351 | reg |= (1 << 27) | (1 << 28); |
| 352 | adreno_writereg(adreno_dev, ADRENO_REG_CP_ME_CNTL, reg); |
| 353 | |
| 354 | kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG, |
| 355 | snapshot, adreno_snapshot_cp_pfp_ram, NULL); |
| 356 | |
| 357 | kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG, |
| 358 | snapshot, adreno_snapshot_cp_pm4_ram, NULL); |
| 359 | |
| 360 | /* CP ROQ */ |
| 361 | kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG, |
| 362 | snapshot, adreno_snapshot_cp_roq, &snap_data->sect_sizes->roq); |
| 363 | |
| 364 | if (snap_data->sect_sizes->cp_merciu) { |
| 365 | kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG, |
| 366 | snapshot, adreno_snapshot_cp_merciu, |
| 367 | &snap_data->sect_sizes->cp_merciu); |
| 368 | } |
| 369 | |
| 370 | a3xx_snapshot_debugbus(device, snapshot); |
| 371 | } |