Eric Anholt | 85667f4 | 2013-03-26 22:06:37 -0700 | [diff] [blame] | 1 | /* |
| 2 | * Copyright © 2010, 2013 Intel Corporation |
| 3 | * |
| 4 | * Permission is hereby granted, free of charge, to any person obtaining a |
| 5 | * copy of this software and associated documentation files (the "Software"), |
| 6 | * to deal in the Software without restriction, including without limitation |
| 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| 8 | * and/or sell copies of the Software, and to permit persons to whom the |
| 9 | * Software is furnished to do so, subject to the following conditions: |
| 10 | * |
| 11 | * The above copyright notice and this permission notice (including the next |
| 12 | * paragraph) shall be included in all copies or substantial portions of the |
| 13 | * Software. |
| 14 | * |
| 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| 18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
| 20 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
| 21 | * DEALINGS IN THE SOFTWARE. |
| 22 | * |
| 23 | * Authors: |
| 24 | * Eric Anholt <eric@anholt.net> |
Kenneth Graunke | 43a0862 | 2013-03-26 22:06:39 -0700 | [diff] [blame] | 25 | * Kenneth Graunke <kenneth@whitecape.org> |
| 26 | * |
| 27 | * While documentation for performance counters is suspiciously missing from the |
| 28 | * Sandybridge PRM, they were documented in Volume 1 Part 3 of the Ironlake PRM. |
| 29 | * |
| 30 | * A lot of the Ironlake PRM actually unintentionally documents Sandybridge |
| 31 | * due to mistakes made when updating the documentation for Gen6+. Many of |
| 32 | * these mislabeled sections carried forward to the public documentation. |
| 33 | * |
| 34 | * The Ironlake PRMs have been publicly available since 2010 and are online at: |
| 35 | * https://01.org/linuxgraphics/documentation/2010-intel-core-processor-family |
Eric Anholt | 85667f4 | 2013-03-26 22:06:37 -0700 | [diff] [blame] | 36 | */ |
| 37 | |
| 38 | #include <unistd.h> |
Kenneth Graunke | 43a0862 | 2013-03-26 22:06:39 -0700 | [diff] [blame] | 39 | #include <stdbool.h> |
Eric Anholt | 85667f4 | 2013-03-26 22:06:37 -0700 | [diff] [blame] | 40 | #include <stdlib.h> |
| 41 | #include <stdio.h> |
| 42 | #include <err.h> |
| 43 | #include <sys/ioctl.h> |
| 44 | |
| 45 | #include "drm.h" |
| 46 | #include "i915_drm.h" |
| 47 | #include "drmtest.h" |
| 48 | #include "intel_gpu_tools.h" |
| 49 | #include "intel_bufmgr.h" |
| 50 | #include "intel_batchbuffer.h" |
| 51 | |
Kenneth Graunke | 0811556 | 2013-03-26 22:06:38 -0700 | [diff] [blame] | 52 | #define GEN5_COUNTER_COUNT 29 |
Eric Anholt | 85667f4 | 2013-03-26 22:06:37 -0700 | [diff] [blame] | 53 | |
Kenneth Graunke | 0811556 | 2013-03-26 22:06:38 -0700 | [diff] [blame] | 54 | const char *gen5_counter_names[GEN5_COUNTER_COUNT] = { |
Eric Anholt | 85667f4 | 2013-03-26 22:06:37 -0700 | [diff] [blame] | 55 | "cycles the CS unit is starved", |
| 56 | "cycles the CS unit is stalled", |
| 57 | "cycles the VF unit is starved", |
| 58 | "cycles the VF unit is stalled", |
| 59 | "cycles the VS unit is starved", |
| 60 | "cycles the VS unit is stalled", |
| 61 | "cycles the GS unit is starved", |
| 62 | "cycles the GS unit is stalled", |
| 63 | "cycles the CL unit is starved", |
| 64 | "cycles the CL unit is stalled", |
| 65 | "cycles the SF unit is starved", |
| 66 | "cycles the SF unit is stalled", |
| 67 | "cycles the WZ unit is starved", |
| 68 | "cycles the WZ unit is stalled", |
| 69 | "Z buffer read/write ", |
| 70 | "cycles each EU was active ", |
| 71 | "cycles each EU was suspended ", |
| 72 | "cycles threads loaded all EUs", |
| 73 | "cycles filtering active ", |
| 74 | "cycles PS threads executed ", |
| 75 | "subspans written to RC ", |
| 76 | "bytes read for texture reads ", |
| 77 | "texels returned from sampler ", |
| 78 | "polygons not culled ", |
| 79 | "clocks MASF has valid message", |
| 80 | "64b writes/reads from RC ", |
| 81 | "reads on dataport ", |
| 82 | "clocks MASF has valid msg not consumed by sampler", |
| 83 | "cycles any EU is stalled for math", |
| 84 | }; |
| 85 | |
Kenneth Graunke | 43a0862 | 2013-03-26 22:06:39 -0700 | [diff] [blame] | 86 | #define GEN6_COUNTER_COUNT 29 |
| 87 | |
| 88 | /** |
| 89 | * Sandybridge: Counter Select = 001 |
| 90 | * A0 A1 A2 A3 A4 TIMESTAMP RPT_ID |
| 91 | * A5 A6 A7 A8 A9 A10 A11 A12 |
| 92 | * A13 A14 A15 A16 A17 A18 A19 A20 |
| 93 | * A21 A22 A23 A24 A25 A26 A27 A28 |
| 94 | */ |
| 95 | const int gen6_counter_format = 1; |
| 96 | |
| 97 | /** |
| 98 | * Names for aggregating counters A0-A28. |
| 99 | * |
| 100 | * While the Ironlake PRM clearly documents that there are 29 counters (A0-A28), |
| 101 | * it only lists the names for 28 of them; one is missing. However, careful |
| 102 | * examination reveals a pattern: there are five GS counters (Active, Stall, |
| 103 | * Core Stall, # threads loaded, and ready but not running time). There are |
| 104 | * also five PS counters, in the same order. But there are only four VS |
| 105 | * counters listed - the number of VS threads loaded is missing. Presumably, |
| 106 | * it exists and is counter 5, and the rest are shifted over one place. |
| 107 | */ |
| 108 | const char *gen6_counter_names[GEN6_COUNTER_COUNT] = { |
| 109 | [0] = "Aggregated Core Array Active", |
| 110 | [1] = "Aggregated Core Array Stalled", |
| 111 | [2] = "Vertex Shader Active Time", |
| 112 | [3] = "Vertex Shader Stall Time", |
| 113 | [4] = "Vertex Shader Stall Time - Core Stall", |
| 114 | [5] = "# VS threads loaded", |
| 115 | [6] = "Vertex Shader Ready but not running time", |
| 116 | [7] = "Geometry Shader Active Time", |
| 117 | [8] = "Geometry Shader Stall Time", |
| 118 | [9] = "Geometry Shader Stall Time - Core Stall", |
| 119 | [10] = "# GS threads loaded", |
| 120 | [11] = "Geometry Shader ready but not running Time", |
| 121 | [12] = "Pixel Shader Active Time", |
| 122 | [13] = "Pixel Shader Stall Time", |
| 123 | [14] = "Pixel Shader Stall Time - Core Stall", |
| 124 | [15] = "# PS threads loaded", |
| 125 | [16] = "Pixel Shader ready but not running Time", |
| 126 | [17] = "Early Z Test Pixels Passing", |
| 127 | [18] = "Early Z Test Pixels Failing", |
| 128 | [19] = "Early Stencil Test Pixels Passing", |
| 129 | [20] = "Early Stencil Test Pixels Failing", |
| 130 | [21] = "Pixel Kill Count", |
| 131 | [22] = "Alpha Test Pixels Failed", |
| 132 | [23] = "Post PS Stencil Pixels Failed", |
| 133 | [24] = "Post PS Z buffer Pixels Failed", |
| 134 | [25] = "Pixels/samples Written in the frame buffer", |
| 135 | [26] = "GPU Busy", |
| 136 | [27] = "CL active and not stalled", |
| 137 | [28] = "SF active and stalled", |
| 138 | }; |
| 139 | |
Kenneth Graunke | 11d5859 | 2013-04-02 22:54:08 -0700 | [diff] [blame] | 140 | #define GEN7_COUNTER_COUNT 44 |
| 141 | |
| 142 | /** |
| 143 | * Names for aggregating counters A0-A44. Uninitialized fields are "Reserved." |
| 144 | */ |
| 145 | const char *gen7_counter_names[GEN7_COUNTER_COUNT] = { |
| 146 | /* A0: |
| 147 | * The sum of all cycles on all cores actively executing instructions |
| 148 | * This does not count the time taken to service Send instructions. |
| 149 | * This time is considered by shader active counters to give the result. |
| 150 | */ |
| 151 | [0] = "Aggregated Core Array Active", |
| 152 | /* A1: |
| 153 | * The sum of all cycles on all cores where the EU is not idle and is |
| 154 | * not actively executing ISA instructions. Generally this means that |
| 155 | * all loaded threads on the EU are stalled on some data dependency, |
| 156 | * but this also includes the time during which the TS is loading the |
| 157 | * thread dispatch header into the EU prior to thread execution and no |
| 158 | * other thread is fully loaded. |
| 159 | */ |
| 160 | [1] = "Aggregated Core Array Stalled", |
| 161 | /* A2: |
| 162 | * Total time in clocks the vertex shader spent active on all cores. |
| 163 | */ |
| 164 | [2] = "Vertex Shader Active Time", |
| 165 | /* A4: |
| 166 | * Total time in clocks the vertex shader spent stalled on all cores - |
| 167 | * and the entire core was stalled as well. |
| 168 | */ |
| 169 | [4] = "Vertex Shader Stall Time - Core Stall", |
| 170 | /* A5: Number of VS threads loaded at any given time in the EUs. */ |
| 171 | [5] = "# VS threads loaded", |
| 172 | /* A7: |
| 173 | * Total time in clocks the Hull shader spent active on all cores. |
| 174 | */ |
| 175 | [7] = "Hull Shader Active Time", |
| 176 | /* A9: |
| 177 | * Total time in clocks the Hull shader spent stalled on all cores - |
| 178 | * and the entire core was stalled as well. |
| 179 | */ |
| 180 | [9] = "Hull Shader Stall Time - Core Stall", |
| 181 | /* A10: Number of HS threads loaded at any given time in the EUs. */ |
| 182 | [10] = "# HS threads loaded", |
| 183 | /* A12: |
| 184 | * Total time in clocks the Domain shader spent active on all cores. |
| 185 | */ |
| 186 | [12] = "Domain Shader Active Time", |
| 187 | /* A14: |
| 188 | * Total time in clocks the domain shader spent stalled on all cores - |
| 189 | * and the entire core was stalled as well. |
| 190 | */ |
| 191 | [14] = "Domain Shader Stall Time - Core Stall", |
| 192 | /* A15: Number of DS threads loaded at any given time in the EUs. */ |
| 193 | [15] = "# DS threads loaded", |
| 194 | /* A17: |
| 195 | * Total time in clocks the compute shader spent active on all cores. |
| 196 | */ |
| 197 | [17] = "Compute Shader Active Time", |
| 198 | /* A19: |
| 199 | * Total time in clocks the compute shader spent stalled on all cores - |
| 200 | * and the entire core was stalled as well. |
| 201 | */ |
| 202 | [19] = "Compute Shader Stall Time - Core Stall", |
| 203 | /* A20: Number of CS threads loaded at any given time in the EUs. */ |
| 204 | [20] = "# CS threads loaded", |
| 205 | /* A22: |
| 206 | * Total time in clocks the geometry shader spent active on all cores. |
| 207 | */ |
| 208 | [22] = "Geometry Shader Active Time", |
| 209 | /* A24: |
| 210 | * Total time in clocks the geometry shader spent stalled on all cores - |
| 211 | * and the entire core was stalled as well. |
| 212 | */ |
| 213 | [24] = "Geometry Shader Stall Time - Core Stall", |
| 214 | /* A25: Number of GS threads loaded at any time in the EUs. */ |
| 215 | [25] = "# GS threads loaded", |
| 216 | /* A27: |
| 217 | * Total time in clocks the pixel shader spent active on all cores. |
| 218 | */ |
| 219 | [27] = "Pixel Shader Active Time", |
| 220 | /* A29: |
| 221 | * Total time in clocks the pixel shader spent stalled on all cores - |
| 222 | * and the entire core was stalled as well. |
| 223 | */ |
| 224 | [29] = "Pixel Shader Stall Time - Core Stall", |
| 225 | /* A30: Number of PS threads loaded at any given time in the EUs. */ |
| 226 | [30] = "# PS threads loaded", |
| 227 | /* A32: Count of pixels that pass the fast check (8x8). */ |
| 228 | [32] = "HiZ Fast Z Test Pixels Passing", |
| 229 | /* A33: Count of pixels that fail the fast check (8x8). */ |
| 230 | [33] = "HiZ Fast Z Test Pixels Failing", |
| 231 | /* A34: Count of pixels passing the slow check (2x2). */ |
| 232 | [34] = "Slow Z Test Pixels Passing", |
| 233 | /* A35: Count of pixels that fail the slow check (2x2). */ |
| 234 | [35] = "Slow Z Test Pixels Failing", |
| 235 | /* A36: Number of pixels/samples killed in the pixel shader. |
| 236 | * Ivybridge/Baytrail Erratum: Count reported is 2X the actual count for |
| 237 | * dual source render target messages i.e. when PS has two output colors. |
| 238 | */ |
| 239 | [36] = "Pixel Kill Count", |
| 240 | /* A37: |
| 241 | * Number of pixels/samples that fail alpha-test. Alpha to coverage |
| 242 | * may have some challenges in per-pixel invocation. |
| 243 | */ |
| 244 | [37] = "Alpha Test Pixels Failed", |
| 245 | /* A38: |
| 246 | * Number of pixels/samples failing stencil test after the pixel shader |
| 247 | * has executed. |
| 248 | */ |
| 249 | [38] = "Post PS Stencil Pixels Failed", |
| 250 | /* A39: |
| 251 | * Number of pixels/samples fail Z test after the pixel shader has |
| 252 | * executed. |
| 253 | */ |
| 254 | [39] = "Post PS Z buffer Pixels Failed", |
| 255 | /* A40: |
| 256 | * Number of render target writes. MRT scenarios will cause this |
| 257 | * counter to increment multiple times. |
| 258 | */ |
| 259 | [40] = "3D/GPGPU Render Target Writes", |
| 260 | /* A41: Render engine is not idle. |
| 261 | * |
| 262 | * GPU Busy aggregate counter doesn't increment under the following |
| 263 | * conditions: |
| 264 | * |
| 265 | * 1. Context Switch in Progress. |
| 266 | * 2. GPU stalled on executing MI_WAIT_FOR_EVENT. |
| 267 | * 3. GPU stalled on execution MI_SEMAPHORE_MBOX. |
| 268 | * 4. RCS idle but other parts of GPU active (e.g. only media engines |
| 269 | * active) |
| 270 | */ |
| 271 | [41] = "Render Engine Busy", |
| 272 | /* A42: |
| 273 | * VSunit is stalling VF (upstream unit) and starving HS (downstream |
| 274 | * unit). |
| 275 | */ |
| 276 | [42] = "VS bottleneck", |
| 277 | /* A43: |
| 278 | * GSunit is stalling DS (upstream unit) and starving SOL (downstream |
| 279 | * unit). |
| 280 | */ |
| 281 | [43] = "GS bottleneck", |
| 282 | }; |
| 283 | |
| 284 | /** |
| 285 | * Ivybridge - Counter Select = 101 |
| 286 | * A4 A3 A2 A1 A0 TIMESTAMP ReportID |
| 287 | * A12 A11 A10 A9 A8 A7 A6 A5 |
| 288 | * A20 A19 A18 A17 A16 A15 A14 A13 |
| 289 | * A28 A27 A26 A25 A24 A23 A22 A21 |
| 290 | * A36 A35 A34 A33 A32 A31 A30 A29 |
| 291 | * A44 A43 A42 A41 A40 A39 A38 A37 |
| 292 | * C3 C2 C1 C0 B3 B2 B1 B0 |
| 293 | * C11 C10 C9 C8 C7 C6 C5 C4 |
| 294 | */ |
| 295 | const int gen7_counter_format = 5; /* 0b101 */ |
| 296 | |
Eric Anholt | 85667f4 | 2013-03-26 22:06:37 -0700 | [diff] [blame] | 297 | int have_totals = 0; |
Kenneth Graunke | 0811556 | 2013-03-26 22:06:38 -0700 | [diff] [blame] | 298 | uint32_t *totals; |
| 299 | uint32_t *last_counter; |
Eric Anholt | 85667f4 | 2013-03-26 22:06:37 -0700 | [diff] [blame] | 300 | static drm_intel_bufmgr *bufmgr; |
| 301 | struct intel_batchbuffer *batch; |
| 302 | |
| 303 | /* DW0 */ |
Kenneth Graunke | 0811556 | 2013-03-26 22:06:38 -0700 | [diff] [blame] | 304 | #define GEN5_MI_REPORT_PERF_COUNT ((0x26 << 23) | (3 - 2)) |
Eric Anholt | 85667f4 | 2013-03-26 22:06:37 -0700 | [diff] [blame] | 305 | #define MI_COUNTER_SET_0 (0 << 6) |
| 306 | #define MI_COUNTER_SET_1 (1 << 6) |
| 307 | /* DW1 */ |
| 308 | #define MI_COUNTER_ADDRESS_GTT (1 << 0) |
| 309 | /* DW2: report ID */ |
| 310 | |
Kenneth Graunke | 43a0862 | 2013-03-26 22:06:39 -0700 | [diff] [blame] | 311 | /** |
| 312 | * According to the Sandybridge PRM, Volume 1, Part 1, page 48, |
| 313 | * MI_REPORT_PERF_COUNT is now opcode 0x28. The Ironlake PRM, Volume 1, |
| 314 | * Part 3 details how it works. |
| 315 | */ |
| 316 | /* DW0 */ |
| 317 | #define GEN6_MI_REPORT_PERF_COUNT (0x28 << 23) |
| 318 | /* DW1 and 2 are the same as above */ |
| 319 | |
| 320 | /* OACONTROL exists on Gen6+ but is documented in the Ironlake PRM */ |
| 321 | #define OACONTROL 0x2360 |
| 322 | # define OACONTROL_COUNTER_SELECT_SHIFT 2 |
| 323 | # define PERFORMANCE_COUNTER_ENABLE (1 << 0) |
| 324 | |
Eric Anholt | 85667f4 | 2013-03-26 22:06:37 -0700 | [diff] [blame] | 325 | static void |
Kenneth Graunke | 0811556 | 2013-03-26 22:06:38 -0700 | [diff] [blame] | 326 | gen5_get_counters(void) |
Eric Anholt | 85667f4 | 2013-03-26 22:06:37 -0700 | [diff] [blame] | 327 | { |
| 328 | int i; |
| 329 | drm_intel_bo *stats_bo; |
| 330 | uint32_t *stats_result; |
| 331 | |
| 332 | stats_bo = drm_intel_bo_alloc(bufmgr, "stats", 4096, 4096); |
| 333 | |
| 334 | BEGIN_BATCH(6); |
Kenneth Graunke | 0811556 | 2013-03-26 22:06:38 -0700 | [diff] [blame] | 335 | OUT_BATCH(GEN5_MI_REPORT_PERF_COUNT | MI_COUNTER_SET_0); |
Eric Anholt | 85667f4 | 2013-03-26 22:06:37 -0700 | [diff] [blame] | 336 | OUT_RELOC(stats_bo, |
| 337 | I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, |
| 338 | 0); |
| 339 | OUT_BATCH(0); |
| 340 | |
Kenneth Graunke | 0811556 | 2013-03-26 22:06:38 -0700 | [diff] [blame] | 341 | OUT_BATCH(GEN5_MI_REPORT_PERF_COUNT | MI_COUNTER_SET_1); |
Eric Anholt | 85667f4 | 2013-03-26 22:06:37 -0700 | [diff] [blame] | 342 | OUT_RELOC(stats_bo, |
| 343 | I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, |
| 344 | 64); |
| 345 | OUT_BATCH(0); |
| 346 | |
| 347 | ADVANCE_BATCH(); |
| 348 | |
| 349 | intel_batchbuffer_flush(batch); |
| 350 | |
| 351 | drm_intel_bo_map(stats_bo, 0); |
| 352 | stats_result = stats_bo->virtual; |
| 353 | /* skip REPORT_ID, TIMESTAMP */ |
| 354 | stats_result += 3; |
Kenneth Graunke | 0811556 | 2013-03-26 22:06:38 -0700 | [diff] [blame] | 355 | for (i = 0 ; i < GEN5_COUNTER_COUNT; i++) { |
Eric Anholt | 85667f4 | 2013-03-26 22:06:37 -0700 | [diff] [blame] | 356 | totals[i] += stats_result[i] - last_counter[i]; |
| 357 | last_counter[i] = stats_result[i]; |
| 358 | } |
| 359 | |
| 360 | drm_intel_bo_unmap(stats_bo); |
| 361 | drm_intel_bo_unreference(stats_bo); |
| 362 | } |
| 363 | |
Kenneth Graunke | 43a0862 | 2013-03-26 22:06:39 -0700 | [diff] [blame] | 364 | static void |
| 365 | gen6_get_counters(void) |
| 366 | { |
| 367 | int i; |
| 368 | drm_intel_bo *stats_bo; |
| 369 | uint32_t *stats_result; |
| 370 | |
| 371 | /* Map from counter names to their index in the buffer object */ |
| 372 | static const int buffer_index[GEN6_COUNTER_COUNT] = |
| 373 | { |
| 374 | 7, 6, 5, 4, 3, |
| 375 | 15, 14, 13, 12, 11, 10, 9, 8, |
| 376 | 23, 22, 21, 20, 19, 18, 17, 16, |
| 377 | 31, 30, 29, 28, 27, 26, 25, 24, |
| 378 | }; |
| 379 | |
| 380 | stats_bo = drm_intel_bo_alloc(bufmgr, "stats", 4096, 4096); |
| 381 | |
| 382 | BEGIN_BATCH(3); |
| 383 | OUT_BATCH(GEN6_MI_REPORT_PERF_COUNT | (3 - 2)); |
| 384 | OUT_RELOC(stats_bo, |
| 385 | I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, |
| 386 | MI_COUNTER_ADDRESS_GTT); |
| 387 | OUT_BATCH(0); |
| 388 | ADVANCE_BATCH(); |
| 389 | |
| 390 | intel_batchbuffer_flush_on_ring(batch, I915_EXEC_RENDER); |
| 391 | |
| 392 | drm_intel_bo_map(stats_bo, 0); |
| 393 | stats_result = stats_bo->virtual; |
| 394 | for (i = 0; i < GEN6_COUNTER_COUNT; i++) { |
| 395 | totals[i] += stats_result[buffer_index[i]] - last_counter[i]; |
| 396 | last_counter[i] = stats_result[buffer_index[i]]; |
| 397 | } |
| 398 | |
| 399 | drm_intel_bo_unmap(stats_bo); |
| 400 | drm_intel_bo_unreference(stats_bo); |
| 401 | } |
| 402 | |
Kenneth Graunke | 11d5859 | 2013-04-02 22:54:08 -0700 | [diff] [blame] | 403 | static void |
| 404 | gen7_get_counters(void) |
| 405 | { |
| 406 | int i; |
| 407 | drm_intel_bo *stats_bo; |
| 408 | uint32_t *stats_result; |
| 409 | |
| 410 | stats_bo = drm_intel_bo_alloc(bufmgr, "stats", 4096, 4096); |
| 411 | |
| 412 | BEGIN_BATCH(3); |
| 413 | OUT_BATCH(GEN6_MI_REPORT_PERF_COUNT | (3 - 2)); |
| 414 | OUT_RELOC(stats_bo, |
| 415 | I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0); |
| 416 | OUT_BATCH(0); |
| 417 | ADVANCE_BATCH(); |
| 418 | |
| 419 | intel_batchbuffer_flush_on_ring(batch, I915_EXEC_RENDER); |
| 420 | |
| 421 | drm_intel_bo_map(stats_bo, 0); |
| 422 | stats_result = stats_bo->virtual; |
| 423 | /* skip REPORT_ID, TIMESTAMP */ |
| 424 | stats_result += 3; |
| 425 | for (i = 0; i < GEN7_COUNTER_COUNT; i++) { |
| 426 | /* Ignore "Reserved" counters */ |
| 427 | if (!gen7_counter_names[i]) |
| 428 | continue; |
| 429 | totals[i] += stats_result[i] - last_counter[i]; |
| 430 | last_counter[i] = stats_result[i]; |
| 431 | } |
| 432 | |
| 433 | drm_intel_bo_unmap(stats_bo); |
| 434 | drm_intel_bo_unreference(stats_bo); |
| 435 | } |
| 436 | |
Eric Anholt | 85667f4 | 2013-03-26 22:06:37 -0700 | [diff] [blame] | 437 | #define STATS_CHECK_FREQUENCY 100 |
| 438 | #define STATS_REPORT_FREQUENCY 2 |
| 439 | |
| 440 | int |
| 441 | main(int argc, char **argv) |
| 442 | { |
| 443 | uint32_t devid; |
Kenneth Graunke | 43a0862 | 2013-03-26 22:06:39 -0700 | [diff] [blame] | 444 | int counter_format; |
Kenneth Graunke | 0811556 | 2013-03-26 22:06:38 -0700 | [diff] [blame] | 445 | int counter_count; |
| 446 | const char **counter_name; |
| 447 | void (*get_counters)(void); |
Eric Anholt | 85667f4 | 2013-03-26 22:06:37 -0700 | [diff] [blame] | 448 | int i; |
| 449 | char clear_screen[] = {0x1b, '[', 'H', |
| 450 | 0x1b, '[', 'J', |
| 451 | 0x0}; |
Kenneth Graunke | 43a0862 | 2013-03-26 22:06:39 -0700 | [diff] [blame] | 452 | bool oacontrol = true; |
Eric Anholt | 85667f4 | 2013-03-26 22:06:37 -0700 | [diff] [blame] | 453 | int fd; |
| 454 | int l; |
| 455 | |
| 456 | fd = drm_open_any(); |
| 457 | devid = intel_get_drm_devid(fd); |
| 458 | |
| 459 | bufmgr = drm_intel_bufmgr_gem_init(fd, 4096); |
| 460 | drm_intel_bufmgr_gem_enable_reuse(bufmgr); |
| 461 | batch = intel_batchbuffer_alloc(bufmgr, devid); |
| 462 | |
Kenneth Graunke | 0811556 | 2013-03-26 22:06:38 -0700 | [diff] [blame] | 463 | if (IS_GEN5(devid)) { |
| 464 | counter_name = gen5_counter_names; |
| 465 | counter_count = GEN5_COUNTER_COUNT; |
| 466 | get_counters = gen5_get_counters; |
Kenneth Graunke | 43a0862 | 2013-03-26 22:06:39 -0700 | [diff] [blame] | 467 | oacontrol = false; |
| 468 | } else if (IS_GEN6(devid)) { |
| 469 | counter_name = gen6_counter_names; |
| 470 | counter_count = GEN6_COUNTER_COUNT; |
| 471 | counter_format = gen6_counter_format; |
| 472 | get_counters = gen6_get_counters; |
Kenneth Graunke | 11d5859 | 2013-04-02 22:54:08 -0700 | [diff] [blame] | 473 | } else if (IS_GEN7(devid)) { |
| 474 | counter_name = gen7_counter_names; |
| 475 | counter_count = GEN7_COUNTER_COUNT; |
| 476 | counter_format = gen7_counter_format; |
| 477 | get_counters = gen7_get_counters; |
Kenneth Graunke | 0811556 | 2013-03-26 22:06:38 -0700 | [diff] [blame] | 478 | } else { |
| 479 | printf("This tool is not yet supported on your platform.\n"); |
Eric Anholt | 85667f4 | 2013-03-26 22:06:37 -0700 | [diff] [blame] | 480 | abort(); |
| 481 | } |
Kenneth Graunke | 43a0862 | 2013-03-26 22:06:39 -0700 | [diff] [blame] | 482 | |
| 483 | if (oacontrol) { |
| 484 | /* Forcewake */ |
| 485 | intel_register_access_init(intel_get_pci_device(), 0); |
| 486 | |
| 487 | /* Enable performance counters */ |
| 488 | intel_register_write(OACONTROL, |
| 489 | counter_format << OACONTROL_COUNTER_SELECT_SHIFT | |
| 490 | PERFORMANCE_COUNTER_ENABLE); |
| 491 | } |
| 492 | |
Kenneth Graunke | 0811556 | 2013-03-26 22:06:38 -0700 | [diff] [blame] | 493 | totals = calloc(counter_count, sizeof(uint32_t)); |
| 494 | last_counter = calloc(counter_count, sizeof(uint32_t)); |
Eric Anholt | 85667f4 | 2013-03-26 22:06:37 -0700 | [diff] [blame] | 495 | |
| 496 | for (;;) { |
| 497 | for (l = 0; l < STATS_CHECK_FREQUENCY; l++) { |
| 498 | printf("%s", clear_screen); |
| 499 | |
| 500 | if (l % (STATS_CHECK_FREQUENCY / STATS_REPORT_FREQUENCY) == 0) { |
| 501 | if (have_totals) { |
Kenneth Graunke | 0811556 | 2013-03-26 22:06:38 -0700 | [diff] [blame] | 502 | for (i = 0; i < counter_count; i++) { |
Kenneth Graunke | 11d5859 | 2013-04-02 22:54:08 -0700 | [diff] [blame] | 503 | /* Ignore "Reserved" counters */ |
| 504 | if (!counter_name[i]) |
| 505 | continue; |
Eric Anholt | 85667f4 | 2013-03-26 22:06:37 -0700 | [diff] [blame] | 506 | printf("%s: %u\n", counter_name[i], |
| 507 | totals[i]); |
| 508 | totals[i] = 0; |
| 509 | } |
| 510 | } |
| 511 | } |
| 512 | |
| 513 | get_counters(); |
| 514 | have_totals = 1; |
| 515 | |
| 516 | usleep(1000000 / STATS_CHECK_FREQUENCY); |
| 517 | } |
| 518 | } |
| 519 | |
Kenneth Graunke | 43a0862 | 2013-03-26 22:06:39 -0700 | [diff] [blame] | 520 | if (oacontrol) { |
| 521 | /* Disable performance counters */ |
| 522 | intel_register_write(OACONTROL, 0); |
| 523 | |
| 524 | /* Forcewake */ |
| 525 | intel_register_access_fini(); |
| 526 | } |
| 527 | |
Kenneth Graunke | 0811556 | 2013-03-26 22:06:38 -0700 | [diff] [blame] | 528 | free(totals); |
| 529 | free(last_counter); |
| 530 | |
Eric Anholt | 85667f4 | 2013-03-26 22:06:37 -0700 | [diff] [blame] | 531 | return 0; |
| 532 | } |