Eric Anholt | 85667f4 | 2013-03-26 22:06:37 -0700 | [diff] [blame] | 1 | /* |
| 2 | * Copyright © 2010, 2013 Intel Corporation |
| 3 | * |
| 4 | * Permission is hereby granted, free of charge, to any person obtaining a |
| 5 | * copy of this software and associated documentation files (the "Software"), |
| 6 | * to deal in the Software without restriction, including without limitation |
| 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| 8 | * and/or sell copies of the Software, and to permit persons to whom the |
| 9 | * Software is furnished to do so, subject to the following conditions: |
| 10 | * |
| 11 | * The above copyright notice and this permission notice (including the next |
| 12 | * paragraph) shall be included in all copies or substantial portions of the |
| 13 | * Software. |
| 14 | * |
| 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| 18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
| 20 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
| 21 | * DEALINGS IN THE SOFTWARE. |
| 22 | * |
| 23 | * Authors: |
| 24 | * Eric Anholt <eric@anholt.net> |
Kenneth Graunke | 43a0862 | 2013-03-26 22:06:39 -0700 | [diff] [blame] | 25 | * Kenneth Graunke <kenneth@whitecape.org> |
| 26 | * |
| 27 | * While documentation for performance counters is suspiciously missing from the |
| 28 | * Sandybridge PRM, they were documented in Volume 1 Part 3 of the Ironlake PRM. |
| 29 | * |
| 30 | * A lot of the Ironlake PRM actually unintentionally documents Sandybridge |
| 31 | * due to mistakes made when updating the documentation for Gen6+. Many of |
| 32 | * these mislabeled sections carried forward to the public documentation. |
| 33 | * |
| 34 | * The Ironlake PRMs have been publicly available since 2010 and are online at: |
| 35 | * https://01.org/linuxgraphics/documentation/2010-intel-core-processor-family |
Eric Anholt | 85667f4 | 2013-03-26 22:06:37 -0700 | [diff] [blame] | 36 | */ |
| 37 | |
| 38 | #include <unistd.h> |
Kenneth Graunke | 43a0862 | 2013-03-26 22:06:39 -0700 | [diff] [blame] | 39 | #include <stdbool.h> |
Eric Anholt | 85667f4 | 2013-03-26 22:06:37 -0700 | [diff] [blame] | 40 | #include <stdlib.h> |
| 41 | #include <stdio.h> |
| 42 | #include <err.h> |
| 43 | #include <sys/ioctl.h> |
| 44 | |
| 45 | #include "drm.h" |
| 46 | #include "i915_drm.h" |
| 47 | #include "drmtest.h" |
Daniel Vetter | c03c6ce | 2014-03-22 21:34:29 +0100 | [diff] [blame] | 48 | #include "intel_io.h" |
Eric Anholt | 85667f4 | 2013-03-26 22:06:37 -0700 | [diff] [blame] | 49 | #include "intel_bufmgr.h" |
| 50 | #include "intel_batchbuffer.h" |
Daniel Vetter | e49ceb8 | 2014-03-22 21:07:37 +0100 | [diff] [blame] | 51 | #include "intel_chipset.h" |
Eric Anholt | 85667f4 | 2013-03-26 22:06:37 -0700 | [diff] [blame] | 52 | |
Kenneth Graunke | 0811556 | 2013-03-26 22:06:38 -0700 | [diff] [blame] | 53 | #define GEN5_COUNTER_COUNT 29 |
Eric Anholt | 85667f4 | 2013-03-26 22:06:37 -0700 | [diff] [blame] | 54 | |
Kenneth Graunke | 0811556 | 2013-03-26 22:06:38 -0700 | [diff] [blame] | 55 | const char *gen5_counter_names[GEN5_COUNTER_COUNT] = { |
Eric Anholt | 85667f4 | 2013-03-26 22:06:37 -0700 | [diff] [blame] | 56 | "cycles the CS unit is starved", |
| 57 | "cycles the CS unit is stalled", |
| 58 | "cycles the VF unit is starved", |
| 59 | "cycles the VF unit is stalled", |
| 60 | "cycles the VS unit is starved", |
| 61 | "cycles the VS unit is stalled", |
| 62 | "cycles the GS unit is starved", |
| 63 | "cycles the GS unit is stalled", |
| 64 | "cycles the CL unit is starved", |
| 65 | "cycles the CL unit is stalled", |
| 66 | "cycles the SF unit is starved", |
| 67 | "cycles the SF unit is stalled", |
| 68 | "cycles the WZ unit is starved", |
| 69 | "cycles the WZ unit is stalled", |
| 70 | "Z buffer read/write ", |
| 71 | "cycles each EU was active ", |
| 72 | "cycles each EU was suspended ", |
| 73 | "cycles threads loaded all EUs", |
| 74 | "cycles filtering active ", |
| 75 | "cycles PS threads executed ", |
| 76 | "subspans written to RC ", |
| 77 | "bytes read for texture reads ", |
| 78 | "texels returned from sampler ", |
| 79 | "polygons not culled ", |
| 80 | "clocks MASF has valid message", |
| 81 | "64b writes/reads from RC ", |
| 82 | "reads on dataport ", |
| 83 | "clocks MASF has valid msg not consumed by sampler", |
| 84 | "cycles any EU is stalled for math", |
| 85 | }; |
| 86 | |
Kenneth Graunke | 43a0862 | 2013-03-26 22:06:39 -0700 | [diff] [blame] | 87 | #define GEN6_COUNTER_COUNT 29 |
| 88 | |
| 89 | /** |
| 90 | * Sandybridge: Counter Select = 001 |
| 91 | * A0 A1 A2 A3 A4 TIMESTAMP RPT_ID |
| 92 | * A5 A6 A7 A8 A9 A10 A11 A12 |
| 93 | * A13 A14 A15 A16 A17 A18 A19 A20 |
| 94 | * A21 A22 A23 A24 A25 A26 A27 A28 |
| 95 | */ |
| 96 | const int gen6_counter_format = 1; |
| 97 | |
| 98 | /** |
| 99 | * Names for aggregating counters A0-A28. |
| 100 | * |
| 101 | * While the Ironlake PRM clearly documents that there are 29 counters (A0-A28), |
| 102 | * it only lists the names for 28 of them; one is missing. However, careful |
| 103 | * examination reveals a pattern: there are five GS counters (Active, Stall, |
| 104 | * Core Stall, # threads loaded, and ready but not running time). There are |
| 105 | * also five PS counters, in the same order. But there are only four VS |
| 106 | * counters listed - the number of VS threads loaded is missing. Presumably, |
| 107 | * it exists and is counter 5, and the rest are shifted over one place. |
| 108 | */ |
| 109 | const char *gen6_counter_names[GEN6_COUNTER_COUNT] = { |
| 110 | [0] = "Aggregated Core Array Active", |
| 111 | [1] = "Aggregated Core Array Stalled", |
| 112 | [2] = "Vertex Shader Active Time", |
| 113 | [3] = "Vertex Shader Stall Time", |
| 114 | [4] = "Vertex Shader Stall Time - Core Stall", |
| 115 | [5] = "# VS threads loaded", |
| 116 | [6] = "Vertex Shader Ready but not running time", |
| 117 | [7] = "Geometry Shader Active Time", |
| 118 | [8] = "Geometry Shader Stall Time", |
| 119 | [9] = "Geometry Shader Stall Time - Core Stall", |
| 120 | [10] = "# GS threads loaded", |
| 121 | [11] = "Geometry Shader ready but not running Time", |
| 122 | [12] = "Pixel Shader Active Time", |
| 123 | [13] = "Pixel Shader Stall Time", |
| 124 | [14] = "Pixel Shader Stall Time - Core Stall", |
| 125 | [15] = "# PS threads loaded", |
| 126 | [16] = "Pixel Shader ready but not running Time", |
| 127 | [17] = "Early Z Test Pixels Passing", |
| 128 | [18] = "Early Z Test Pixels Failing", |
| 129 | [19] = "Early Stencil Test Pixels Passing", |
| 130 | [20] = "Early Stencil Test Pixels Failing", |
| 131 | [21] = "Pixel Kill Count", |
| 132 | [22] = "Alpha Test Pixels Failed", |
| 133 | [23] = "Post PS Stencil Pixels Failed", |
| 134 | [24] = "Post PS Z buffer Pixels Failed", |
| 135 | [25] = "Pixels/samples Written in the frame buffer", |
| 136 | [26] = "GPU Busy", |
| 137 | [27] = "CL active and not stalled", |
| 138 | [28] = "SF active and stalled", |
| 139 | }; |
| 140 | |
Kenneth Graunke | 11d5859 | 2013-04-02 22:54:08 -0700 | [diff] [blame] | 141 | #define GEN7_COUNTER_COUNT 44 |
| 142 | |
| 143 | /** |
| 144 | * Names for aggregating counters A0-A44. Uninitialized fields are "Reserved." |
| 145 | */ |
| 146 | const char *gen7_counter_names[GEN7_COUNTER_COUNT] = { |
| 147 | /* A0: |
| 148 | * The sum of all cycles on all cores actively executing instructions |
| 149 | * This does not count the time taken to service Send instructions. |
| 150 | * This time is considered by shader active counters to give the result. |
| 151 | */ |
| 152 | [0] = "Aggregated Core Array Active", |
| 153 | /* A1: |
| 154 | * The sum of all cycles on all cores where the EU is not idle and is |
| 155 | * not actively executing ISA instructions. Generally this means that |
| 156 | * all loaded threads on the EU are stalled on some data dependency, |
| 157 | * but this also includes the time during which the TS is loading the |
| 158 | * thread dispatch header into the EU prior to thread execution and no |
| 159 | * other thread is fully loaded. |
| 160 | */ |
| 161 | [1] = "Aggregated Core Array Stalled", |
| 162 | /* A2: |
| 163 | * Total time in clocks the vertex shader spent active on all cores. |
| 164 | */ |
| 165 | [2] = "Vertex Shader Active Time", |
| 166 | /* A4: |
| 167 | * Total time in clocks the vertex shader spent stalled on all cores - |
| 168 | * and the entire core was stalled as well. |
| 169 | */ |
| 170 | [4] = "Vertex Shader Stall Time - Core Stall", |
| 171 | /* A5: Number of VS threads loaded at any given time in the EUs. */ |
| 172 | [5] = "# VS threads loaded", |
| 173 | /* A7: |
| 174 | * Total time in clocks the Hull shader spent active on all cores. |
| 175 | */ |
| 176 | [7] = "Hull Shader Active Time", |
| 177 | /* A9: |
| 178 | * Total time in clocks the Hull shader spent stalled on all cores - |
| 179 | * and the entire core was stalled as well. |
| 180 | */ |
| 181 | [9] = "Hull Shader Stall Time - Core Stall", |
| 182 | /* A10: Number of HS threads loaded at any given time in the EUs. */ |
| 183 | [10] = "# HS threads loaded", |
| 184 | /* A12: |
| 185 | * Total time in clocks the Domain shader spent active on all cores. |
| 186 | */ |
| 187 | [12] = "Domain Shader Active Time", |
| 188 | /* A14: |
| 189 | * Total time in clocks the domain shader spent stalled on all cores - |
| 190 | * and the entire core was stalled as well. |
| 191 | */ |
| 192 | [14] = "Domain Shader Stall Time - Core Stall", |
| 193 | /* A15: Number of DS threads loaded at any given time in the EUs. */ |
| 194 | [15] = "# DS threads loaded", |
| 195 | /* A17: |
| 196 | * Total time in clocks the compute shader spent active on all cores. |
| 197 | */ |
| 198 | [17] = "Compute Shader Active Time", |
| 199 | /* A19: |
| 200 | * Total time in clocks the compute shader spent stalled on all cores - |
| 201 | * and the entire core was stalled as well. |
| 202 | */ |
| 203 | [19] = "Compute Shader Stall Time - Core Stall", |
| 204 | /* A20: Number of CS threads loaded at any given time in the EUs. */ |
| 205 | [20] = "# CS threads loaded", |
| 206 | /* A22: |
| 207 | * Total time in clocks the geometry shader spent active on all cores. |
| 208 | */ |
| 209 | [22] = "Geometry Shader Active Time", |
| 210 | /* A24: |
| 211 | * Total time in clocks the geometry shader spent stalled on all cores - |
| 212 | * and the entire core was stalled as well. |
| 213 | */ |
| 214 | [24] = "Geometry Shader Stall Time - Core Stall", |
| 215 | /* A25: Number of GS threads loaded at any time in the EUs. */ |
| 216 | [25] = "# GS threads loaded", |
| 217 | /* A27: |
| 218 | * Total time in clocks the pixel shader spent active on all cores. |
| 219 | */ |
| 220 | [27] = "Pixel Shader Active Time", |
| 221 | /* A29: |
| 222 | * Total time in clocks the pixel shader spent stalled on all cores - |
| 223 | * and the entire core was stalled as well. |
| 224 | */ |
| 225 | [29] = "Pixel Shader Stall Time - Core Stall", |
| 226 | /* A30: Number of PS threads loaded at any given time in the EUs. */ |
| 227 | [30] = "# PS threads loaded", |
| 228 | /* A32: Count of pixels that pass the fast check (8x8). */ |
| 229 | [32] = "HiZ Fast Z Test Pixels Passing", |
| 230 | /* A33: Count of pixels that fail the fast check (8x8). */ |
| 231 | [33] = "HiZ Fast Z Test Pixels Failing", |
| 232 | /* A34: Count of pixels passing the slow check (2x2). */ |
| 233 | [34] = "Slow Z Test Pixels Passing", |
| 234 | /* A35: Count of pixels that fail the slow check (2x2). */ |
| 235 | [35] = "Slow Z Test Pixels Failing", |
| 236 | /* A36: Number of pixels/samples killed in the pixel shader. |
| 237 | * Ivybridge/Baytrail Erratum: Count reported is 2X the actual count for |
| 238 | * dual source render target messages i.e. when PS has two output colors. |
| 239 | */ |
| 240 | [36] = "Pixel Kill Count", |
| 241 | /* A37: |
| 242 | * Number of pixels/samples that fail alpha-test. Alpha to coverage |
| 243 | * may have some challenges in per-pixel invocation. |
| 244 | */ |
| 245 | [37] = "Alpha Test Pixels Failed", |
| 246 | /* A38: |
| 247 | * Number of pixels/samples failing stencil test after the pixel shader |
| 248 | * has executed. |
| 249 | */ |
| 250 | [38] = "Post PS Stencil Pixels Failed", |
| 251 | /* A39: |
| 252 | * Number of pixels/samples fail Z test after the pixel shader has |
| 253 | * executed. |
| 254 | */ |
| 255 | [39] = "Post PS Z buffer Pixels Failed", |
| 256 | /* A40: |
| 257 | * Number of render target writes. MRT scenarios will cause this |
| 258 | * counter to increment multiple times. |
| 259 | */ |
| 260 | [40] = "3D/GPGPU Render Target Writes", |
| 261 | /* A41: Render engine is not idle. |
| 262 | * |
| 263 | * GPU Busy aggregate counter doesn't increment under the following |
| 264 | * conditions: |
| 265 | * |
| 266 | * 1. Context Switch in Progress. |
| 267 | * 2. GPU stalled on executing MI_WAIT_FOR_EVENT. |
| 268 | * 3. GPU stalled on execution MI_SEMAPHORE_MBOX. |
| 269 | * 4. RCS idle but other parts of GPU active (e.g. only media engines |
| 270 | * active) |
| 271 | */ |
| 272 | [41] = "Render Engine Busy", |
| 273 | /* A42: |
| 274 | * VSunit is stalling VF (upstream unit) and starving HS (downstream |
| 275 | * unit). |
| 276 | */ |
| 277 | [42] = "VS bottleneck", |
| 278 | /* A43: |
| 279 | * GSunit is stalling DS (upstream unit) and starving SOL (downstream |
| 280 | * unit). |
| 281 | */ |
| 282 | [43] = "GS bottleneck", |
| 283 | }; |
| 284 | |
| 285 | /** |
| 286 | * Ivybridge - Counter Select = 101 |
| 287 | * A4 A3 A2 A1 A0 TIMESTAMP ReportID |
| 288 | * A12 A11 A10 A9 A8 A7 A6 A5 |
| 289 | * A20 A19 A18 A17 A16 A15 A14 A13 |
| 290 | * A28 A27 A26 A25 A24 A23 A22 A21 |
| 291 | * A36 A35 A34 A33 A32 A31 A30 A29 |
| 292 | * A44 A43 A42 A41 A40 A39 A38 A37 |
| 293 | * C3 C2 C1 C0 B3 B2 B1 B0 |
| 294 | * C11 C10 C9 C8 C7 C6 C5 C4 |
| 295 | */ |
| 296 | const int gen7_counter_format = 5; /* 0b101 */ |
| 297 | |
Eric Anholt | 85667f4 | 2013-03-26 22:06:37 -0700 | [diff] [blame] | 298 | int have_totals = 0; |
Kenneth Graunke | 0811556 | 2013-03-26 22:06:38 -0700 | [diff] [blame] | 299 | uint32_t *totals; |
| 300 | uint32_t *last_counter; |
Eric Anholt | 85667f4 | 2013-03-26 22:06:37 -0700 | [diff] [blame] | 301 | static drm_intel_bufmgr *bufmgr; |
| 302 | struct intel_batchbuffer *batch; |
| 303 | |
| 304 | /* DW0 */ |
Kenneth Graunke | 0811556 | 2013-03-26 22:06:38 -0700 | [diff] [blame] | 305 | #define GEN5_MI_REPORT_PERF_COUNT ((0x26 << 23) | (3 - 2)) |
Eric Anholt | 85667f4 | 2013-03-26 22:06:37 -0700 | [diff] [blame] | 306 | #define MI_COUNTER_SET_0 (0 << 6) |
| 307 | #define MI_COUNTER_SET_1 (1 << 6) |
| 308 | /* DW1 */ |
| 309 | #define MI_COUNTER_ADDRESS_GTT (1 << 0) |
| 310 | /* DW2: report ID */ |
| 311 | |
Kenneth Graunke | 43a0862 | 2013-03-26 22:06:39 -0700 | [diff] [blame] | 312 | /** |
| 313 | * According to the Sandybridge PRM, Volume 1, Part 1, page 48, |
| 314 | * MI_REPORT_PERF_COUNT is now opcode 0x28. The Ironlake PRM, Volume 1, |
| 315 | * Part 3 details how it works. |
| 316 | */ |
| 317 | /* DW0 */ |
| 318 | #define GEN6_MI_REPORT_PERF_COUNT (0x28 << 23) |
| 319 | /* DW1 and 2 are the same as above */ |
| 320 | |
| 321 | /* OACONTROL exists on Gen6+ but is documented in the Ironlake PRM */ |
| 322 | #define OACONTROL 0x2360 |
| 323 | # define OACONTROL_COUNTER_SELECT_SHIFT 2 |
| 324 | # define PERFORMANCE_COUNTER_ENABLE (1 << 0) |
| 325 | |
Eric Anholt | 85667f4 | 2013-03-26 22:06:37 -0700 | [diff] [blame] | 326 | static void |
Kenneth Graunke | 0811556 | 2013-03-26 22:06:38 -0700 | [diff] [blame] | 327 | gen5_get_counters(void) |
Eric Anholt | 85667f4 | 2013-03-26 22:06:37 -0700 | [diff] [blame] | 328 | { |
| 329 | int i; |
| 330 | drm_intel_bo *stats_bo; |
| 331 | uint32_t *stats_result; |
| 332 | |
| 333 | stats_bo = drm_intel_bo_alloc(bufmgr, "stats", 4096, 4096); |
| 334 | |
Chris Wilson | 10552b5 | 2014-08-30 11:44:51 +0100 | [diff] [blame] | 335 | BEGIN_BATCH(6, 2); |
Kenneth Graunke | 0811556 | 2013-03-26 22:06:38 -0700 | [diff] [blame] | 336 | OUT_BATCH(GEN5_MI_REPORT_PERF_COUNT | MI_COUNTER_SET_0); |
Eric Anholt | 85667f4 | 2013-03-26 22:06:37 -0700 | [diff] [blame] | 337 | OUT_RELOC(stats_bo, |
| 338 | I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, |
| 339 | 0); |
| 340 | OUT_BATCH(0); |
| 341 | |
Kenneth Graunke | 0811556 | 2013-03-26 22:06:38 -0700 | [diff] [blame] | 342 | OUT_BATCH(GEN5_MI_REPORT_PERF_COUNT | MI_COUNTER_SET_1); |
Eric Anholt | 85667f4 | 2013-03-26 22:06:37 -0700 | [diff] [blame] | 343 | OUT_RELOC(stats_bo, |
| 344 | I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, |
| 345 | 64); |
| 346 | OUT_BATCH(0); |
| 347 | |
| 348 | ADVANCE_BATCH(); |
| 349 | |
| 350 | intel_batchbuffer_flush(batch); |
| 351 | |
| 352 | drm_intel_bo_map(stats_bo, 0); |
| 353 | stats_result = stats_bo->virtual; |
| 354 | /* skip REPORT_ID, TIMESTAMP */ |
| 355 | stats_result += 3; |
Kenneth Graunke | 0811556 | 2013-03-26 22:06:38 -0700 | [diff] [blame] | 356 | for (i = 0 ; i < GEN5_COUNTER_COUNT; i++) { |
Eric Anholt | 85667f4 | 2013-03-26 22:06:37 -0700 | [diff] [blame] | 357 | totals[i] += stats_result[i] - last_counter[i]; |
| 358 | last_counter[i] = stats_result[i]; |
| 359 | } |
| 360 | |
| 361 | drm_intel_bo_unmap(stats_bo); |
| 362 | drm_intel_bo_unreference(stats_bo); |
| 363 | } |
| 364 | |
Kenneth Graunke | 43a0862 | 2013-03-26 22:06:39 -0700 | [diff] [blame] | 365 | static void |
| 366 | gen6_get_counters(void) |
| 367 | { |
| 368 | int i; |
| 369 | drm_intel_bo *stats_bo; |
| 370 | uint32_t *stats_result; |
| 371 | |
| 372 | /* Map from counter names to their index in the buffer object */ |
| 373 | static const int buffer_index[GEN6_COUNTER_COUNT] = |
| 374 | { |
| 375 | 7, 6, 5, 4, 3, |
| 376 | 15, 14, 13, 12, 11, 10, 9, 8, |
| 377 | 23, 22, 21, 20, 19, 18, 17, 16, |
| 378 | 31, 30, 29, 28, 27, 26, 25, 24, |
| 379 | }; |
| 380 | |
| 381 | stats_bo = drm_intel_bo_alloc(bufmgr, "stats", 4096, 4096); |
| 382 | |
Chris Wilson | 10552b5 | 2014-08-30 11:44:51 +0100 | [diff] [blame] | 383 | BEGIN_BATCH(3, 1); |
Kenneth Graunke | 43a0862 | 2013-03-26 22:06:39 -0700 | [diff] [blame] | 384 | OUT_BATCH(GEN6_MI_REPORT_PERF_COUNT | (3 - 2)); |
| 385 | OUT_RELOC(stats_bo, |
| 386 | I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, |
| 387 | MI_COUNTER_ADDRESS_GTT); |
| 388 | OUT_BATCH(0); |
| 389 | ADVANCE_BATCH(); |
| 390 | |
| 391 | intel_batchbuffer_flush_on_ring(batch, I915_EXEC_RENDER); |
| 392 | |
| 393 | drm_intel_bo_map(stats_bo, 0); |
| 394 | stats_result = stats_bo->virtual; |
| 395 | for (i = 0; i < GEN6_COUNTER_COUNT; i++) { |
| 396 | totals[i] += stats_result[buffer_index[i]] - last_counter[i]; |
| 397 | last_counter[i] = stats_result[buffer_index[i]]; |
| 398 | } |
| 399 | |
| 400 | drm_intel_bo_unmap(stats_bo); |
| 401 | drm_intel_bo_unreference(stats_bo); |
| 402 | } |
| 403 | |
Kenneth Graunke | 11d5859 | 2013-04-02 22:54:08 -0700 | [diff] [blame] | 404 | static void |
| 405 | gen7_get_counters(void) |
| 406 | { |
| 407 | int i; |
| 408 | drm_intel_bo *stats_bo; |
| 409 | uint32_t *stats_result; |
| 410 | |
| 411 | stats_bo = drm_intel_bo_alloc(bufmgr, "stats", 4096, 4096); |
| 412 | |
Chris Wilson | 10552b5 | 2014-08-30 11:44:51 +0100 | [diff] [blame] | 413 | BEGIN_BATCH(3, 1); |
Kenneth Graunke | 11d5859 | 2013-04-02 22:54:08 -0700 | [diff] [blame] | 414 | OUT_BATCH(GEN6_MI_REPORT_PERF_COUNT | (3 - 2)); |
| 415 | OUT_RELOC(stats_bo, |
| 416 | I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0); |
| 417 | OUT_BATCH(0); |
| 418 | ADVANCE_BATCH(); |
| 419 | |
| 420 | intel_batchbuffer_flush_on_ring(batch, I915_EXEC_RENDER); |
| 421 | |
| 422 | drm_intel_bo_map(stats_bo, 0); |
| 423 | stats_result = stats_bo->virtual; |
| 424 | /* skip REPORT_ID, TIMESTAMP */ |
| 425 | stats_result += 3; |
| 426 | for (i = 0; i < GEN7_COUNTER_COUNT; i++) { |
| 427 | /* Ignore "Reserved" counters */ |
| 428 | if (!gen7_counter_names[i]) |
| 429 | continue; |
| 430 | totals[i] += stats_result[i] - last_counter[i]; |
| 431 | last_counter[i] = stats_result[i]; |
| 432 | } |
| 433 | |
| 434 | drm_intel_bo_unmap(stats_bo); |
| 435 | drm_intel_bo_unreference(stats_bo); |
| 436 | } |
| 437 | |
Eric Anholt | 85667f4 | 2013-03-26 22:06:37 -0700 | [diff] [blame] | 438 | #define STATS_CHECK_FREQUENCY 100 |
| 439 | #define STATS_REPORT_FREQUENCY 2 |
| 440 | |
| 441 | int |
| 442 | main(int argc, char **argv) |
| 443 | { |
| 444 | uint32_t devid; |
Kenneth Graunke | 43a0862 | 2013-03-26 22:06:39 -0700 | [diff] [blame] | 445 | int counter_format; |
Kenneth Graunke | 0811556 | 2013-03-26 22:06:38 -0700 | [diff] [blame] | 446 | int counter_count; |
| 447 | const char **counter_name; |
| 448 | void (*get_counters)(void); |
Eric Anholt | 85667f4 | 2013-03-26 22:06:37 -0700 | [diff] [blame] | 449 | int i; |
| 450 | char clear_screen[] = {0x1b, '[', 'H', |
| 451 | 0x1b, '[', 'J', |
| 452 | 0x0}; |
Kenneth Graunke | 43a0862 | 2013-03-26 22:06:39 -0700 | [diff] [blame] | 453 | bool oacontrol = true; |
Eric Anholt | 85667f4 | 2013-03-26 22:06:37 -0700 | [diff] [blame] | 454 | int fd; |
| 455 | int l; |
| 456 | |
Micah Fedke | c81d293 | 2015-07-22 21:54:02 +0000 | [diff] [blame] | 457 | fd = drm_open_driver(DRIVER_INTEL); |
Eric Anholt | 85667f4 | 2013-03-26 22:06:37 -0700 | [diff] [blame] | 458 | devid = intel_get_drm_devid(fd); |
| 459 | |
| 460 | bufmgr = drm_intel_bufmgr_gem_init(fd, 4096); |
| 461 | drm_intel_bufmgr_gem_enable_reuse(bufmgr); |
| 462 | batch = intel_batchbuffer_alloc(bufmgr, devid); |
| 463 | |
Kenneth Graunke | 0811556 | 2013-03-26 22:06:38 -0700 | [diff] [blame] | 464 | if (IS_GEN5(devid)) { |
| 465 | counter_name = gen5_counter_names; |
| 466 | counter_count = GEN5_COUNTER_COUNT; |
| 467 | get_counters = gen5_get_counters; |
Kenneth Graunke | 43a0862 | 2013-03-26 22:06:39 -0700 | [diff] [blame] | 468 | oacontrol = false; |
| 469 | } else if (IS_GEN6(devid)) { |
| 470 | counter_name = gen6_counter_names; |
| 471 | counter_count = GEN6_COUNTER_COUNT; |
| 472 | counter_format = gen6_counter_format; |
| 473 | get_counters = gen6_get_counters; |
Kenneth Graunke | 11d5859 | 2013-04-02 22:54:08 -0700 | [diff] [blame] | 474 | } else if (IS_GEN7(devid)) { |
| 475 | counter_name = gen7_counter_names; |
| 476 | counter_count = GEN7_COUNTER_COUNT; |
| 477 | counter_format = gen7_counter_format; |
| 478 | get_counters = gen7_get_counters; |
Kenneth Graunke | 0811556 | 2013-03-26 22:06:38 -0700 | [diff] [blame] | 479 | } else { |
| 480 | printf("This tool is not yet supported on your platform.\n"); |
Eric Anholt | 85667f4 | 2013-03-26 22:06:37 -0700 | [diff] [blame] | 481 | abort(); |
| 482 | } |
Kenneth Graunke | 43a0862 | 2013-03-26 22:06:39 -0700 | [diff] [blame] | 483 | |
| 484 | if (oacontrol) { |
| 485 | /* Forcewake */ |
Ville Syrjälä | e408d56 | 2019-03-27 20:52:52 +0200 | [diff] [blame] | 486 | intel_register_access_init(intel_get_pci_device(), 0, fd); |
Kenneth Graunke | 43a0862 | 2013-03-26 22:06:39 -0700 | [diff] [blame] | 487 | |
| 488 | /* Enable performance counters */ |
| 489 | intel_register_write(OACONTROL, |
| 490 | counter_format << OACONTROL_COUNTER_SELECT_SHIFT | |
| 491 | PERFORMANCE_COUNTER_ENABLE); |
| 492 | } |
| 493 | |
Kenneth Graunke | 0811556 | 2013-03-26 22:06:38 -0700 | [diff] [blame] | 494 | totals = calloc(counter_count, sizeof(uint32_t)); |
| 495 | last_counter = calloc(counter_count, sizeof(uint32_t)); |
Eric Anholt | 85667f4 | 2013-03-26 22:06:37 -0700 | [diff] [blame] | 496 | |
| 497 | for (;;) { |
| 498 | for (l = 0; l < STATS_CHECK_FREQUENCY; l++) { |
| 499 | printf("%s", clear_screen); |
| 500 | |
| 501 | if (l % (STATS_CHECK_FREQUENCY / STATS_REPORT_FREQUENCY) == 0) { |
| 502 | if (have_totals) { |
Kenneth Graunke | 0811556 | 2013-03-26 22:06:38 -0700 | [diff] [blame] | 503 | for (i = 0; i < counter_count; i++) { |
Kenneth Graunke | 11d5859 | 2013-04-02 22:54:08 -0700 | [diff] [blame] | 504 | /* Ignore "Reserved" counters */ |
| 505 | if (!counter_name[i]) |
| 506 | continue; |
Eric Anholt | 85667f4 | 2013-03-26 22:06:37 -0700 | [diff] [blame] | 507 | printf("%s: %u\n", counter_name[i], |
| 508 | totals[i]); |
| 509 | totals[i] = 0; |
| 510 | } |
| 511 | } |
| 512 | } |
| 513 | |
| 514 | get_counters(); |
| 515 | have_totals = 1; |
| 516 | |
| 517 | usleep(1000000 / STATS_CHECK_FREQUENCY); |
| 518 | } |
| 519 | } |
| 520 | |
Kenneth Graunke | 43a0862 | 2013-03-26 22:06:39 -0700 | [diff] [blame] | 521 | if (oacontrol) { |
| 522 | /* Disable performance counters */ |
| 523 | intel_register_write(OACONTROL, 0); |
| 524 | |
| 525 | /* Forcewake */ |
| 526 | intel_register_access_fini(); |
| 527 | } |
| 528 | |
Kenneth Graunke | 0811556 | 2013-03-26 22:06:38 -0700 | [diff] [blame] | 529 | free(totals); |
| 530 | free(last_counter); |
| 531 | |
Eric Anholt | 85667f4 | 2013-03-26 22:06:37 -0700 | [diff] [blame] | 532 | return 0; |
| 533 | } |