blob: b5c158e0e6118ebe9c6b338290eb25aab1fea6cb [file] [log] [blame]
/*
* Copyright © 2007 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Eric Anholt <eric@anholt.net>
*
*/
#include <unistd.h>
#include <stdlib.h>
#include <stdio.h>
#include <err.h>
#include <sys/ioctl.h>
#include "intel_gpu_tools.h"
#include "instdone.h"
#define SAMPLES_PER_SEC 10000
#define SAMPLES_TO_PERCENT_RATIO (SAMPLES_PER_SEC / 100)
#define MAX_NUM_TOP_BITS 100
struct top_bit {
struct instdone_bit *bit;
int count;
} top_bits[MAX_NUM_TOP_BITS];
struct top_bit *top_bits_sorted[MAX_NUM_TOP_BITS];
static uint32_t instdone, instdone1;
static const char *bars[] = {
" ",
"▏",
"▎",
"▍",
"▌",
"▋",
"▊",
"▉",
"█"
};
enum stats_counts {
IA_VERTICES,
IA_PRIMITIVES,
VS_INVOCATION,
GS_INVOCATION,
GS_PRIMITIVES,
CL_INVOCATION,
CL_PRIMITIVES,
PS_INVOCATION,
PS_DEPTH,
STATS_COUNT
};
const uint32_t stats_regs[STATS_COUNT] = {
IA_VERTICES_COUNT_QW,
IA_PRIMITIVES_COUNT_QW,
VS_INVOCATION_COUNT_QW,
GS_INVOCATION_COUNT_QW,
GS_PRIMITIVES_COUNT_QW,
CL_INVOCATION_COUNT_QW,
CL_PRIMITIVES_COUNT_QW,
PS_INVOCATION_COUNT_QW,
PS_DEPTH_COUNT_QW,
};
const char *stats_reg_names[STATS_COUNT] = {
"vert fetch",
"prim fetch",
"VS invocations",
"GS invocations",
"GS prims",
"CL invocations",
"CL prims",
"PS invocations",
"PS depth pass",
};
uint64_t stats[STATS_COUNT];
uint64_t last_stats[STATS_COUNT];
static int
top_bits_sort(const void *a, const void *b)
{
struct top_bit * const *bit_a = a;
struct top_bit * const *bit_b = b;
int a_count = (*bit_a)->count;
int b_count = (*bit_b)->count;
if (a_count < b_count)
return 1;
else if (a_count == b_count)
return 0;
else
return -1;
}
static void
update_idle_bit(struct top_bit *top_bit)
{
uint32_t reg_val;
if (top_bit->bit->reg == INST_DONE_1)
reg_val = instdone1;
else
reg_val = instdone;
if ((reg_val & top_bit->bit->bit) == 0)
top_bit->count++;
}
static void
print_clock(char *name, int clock) {
if (clock == -1)
printf("%s clock: unknown", name);
else
printf("%s clock: %d Mhz", name, clock);
}
static int
print_clock_info(struct pci_device *pci_dev)
{
uint32_t devid = pci_dev->device_id;
uint16_t gcfgc;
if (IS_GM45(devid)) {
int core_clock = -1;
pci_device_cfg_read_u16(pci_dev, &gcfgc, I915_GCFGC);
switch (gcfgc & 0xf) {
case 8:
core_clock = 266;
break;
case 9:
core_clock = 320;
break;
case 11:
core_clock = 400;
break;
case 13:
core_clock = 533;
break;
}
print_clock("core", core_clock);
} else if (IS_965(devid) && IS_MOBILE(devid)) {
int render_clock = -1, sampler_clock = -1;
pci_device_cfg_read_u16(pci_dev, &gcfgc, I915_GCFGC);
switch (gcfgc & 0xf) {
case 2:
render_clock = 250; sampler_clock = 267;
break;
case 3:
render_clock = 320; sampler_clock = 333;
break;
case 4:
render_clock = 400; sampler_clock = 444;
break;
case 5:
render_clock = 500; sampler_clock = 533;
break;
}
print_clock("render", render_clock);
printf(" ");
print_clock("sampler", sampler_clock);
} else if (IS_945(devid) && IS_MOBILE(devid)) {
int render_clock = -1, display_clock = -1;
pci_device_cfg_read_u16(pci_dev, &gcfgc, I915_GCFGC);
switch (gcfgc & 0x7) {
case 0:
render_clock = 166;
break;
case 1:
render_clock = 200;
break;
case 3:
render_clock = 250;
break;
case 5:
render_clock = 400;
break;
}
switch (gcfgc & 0x70) {
case 0:
display_clock = 200;
break;
case 4:
display_clock = 320;
break;
}
if (gcfgc & (1 << 7))
display_clock = 133;
print_clock("render", render_clock);
printf(" ");
print_clock("display", display_clock);
} else if (IS_915(devid) && IS_MOBILE(devid)) {
int render_clock = -1, display_clock = -1;
pci_device_cfg_read_u16(pci_dev, &gcfgc, I915_GCFGC);
switch (gcfgc & 0x7) {
case 0:
render_clock = 160;
break;
case 1:
render_clock = 190;
break;
case 4:
render_clock = 333;
break;
}
if (gcfgc & (1 << 13))
render_clock = 133;
switch (gcfgc & 0x70) {
case 0:
display_clock = 190;
break;
case 4:
display_clock = 333;
break;
}
if (gcfgc & (1 << 7))
display_clock = 133;
print_clock("render", render_clock);
printf(" ");
print_clock("display", display_clock);
}
printf("\n");
return -1;
}
#define STATS_LEN (20)
#define PERCENTAGE_BAR_END (79 - STATS_LEN)
static void
print_percentage_bar(float percent, int cur_line_len)
{
int bar_avail_len = (PERCENTAGE_BAR_END - cur_line_len - 1) * 8;
int bar_len = bar_avail_len * (percent + .5) / 100.0;
int i;
for (i = bar_len; i >= 8; i -= 8) {
printf("%s", bars[8]);
cur_line_len++;
}
if (i) {
printf("%s", bars[i]);
cur_line_len++;
}
/* NB: We can't use a field width with utf8 so we manually
* guarantee a field with of 45 chars for any bar. */
printf("%*s", PERCENTAGE_BAR_END - cur_line_len, "");
}
struct ring {
const char *name;
uint32_t mmio;
uint32_t head, tail, size;
uint64_t full;
int idle;
};
static void ring_init(struct ring *ring)
{
ring->size = ((INREG(ring->mmio + RING_LEN) & RING_NR_PAGES) >> 12) * 4096;
}
static void ring_reset(struct ring *ring)
{
ring->idle = ring->full = 0;
}
static void ring_sample(struct ring *ring)
{
int full;
if (!ring->size)
return;
ring->head = INREG(ring->mmio + RING_HEAD) & HEAD_ADDR;
ring->tail = INREG(ring->mmio + RING_TAIL) & TAIL_ADDR;
if (ring->tail == ring->head)
ring->idle++;
full = ring->tail - ring->head;
if (full < 0)
full += ring->size;
ring->full += full;
}
static void ring_print(struct ring *ring)
{
int percent, len;
if (!ring->size)
return;
percent = 100 - ring->idle / SAMPLES_TO_PERCENT_RATIO;
len = printf("%25s busy: %3d%%: ", ring->name, percent);
print_percentage_bar (percent, len);
printf("%24s space: %d/%d (%d%%)\n",
ring->name,
(int)(ring->full / SAMPLES_PER_SEC),
ring->size,
(int)((ring->full / SAMPLES_TO_PERCENT_RATIO) / ring->size));
}
int main(int argc, char **argv)
{
struct pci_device *pci_dev;
struct ring render_ring = {
.name = "render",
.mmio = 0x2030,
}, bsd_ring = {
.name = "bitstream",
.mmio = 0x4030,
}, bsd6_ring = {
.name = "bitstream",
.mmio = 0x12030,
};
uint32_t devid;
int i;
pci_dev = intel_get_pci_device();
devid = pci_dev->device_id;
intel_get_mmio(pci_dev);
init_instdone_definitions(devid);
for (i = 0; i < num_instdone_bits; i++) {
top_bits[i].bit = &instdone_bits[i];
top_bits[i].count = 0;
top_bits_sorted[i] = &top_bits[i];
}
ring_init(&render_ring);
if (IS_GEN4(devid) || IS_IRONLAKE(devid))
ring_init(&bsd_ring);
if (IS_GEN6(devid))
ring_init(&bsd6_ring);
for (;;) {
int j;
char clear_screen[] = {0x1b, '[', 'H',
0x1b, '[', 'J',
0x0};
int percent;
int len;
ring_reset(&render_ring);
ring_reset(&bsd_ring);
ring_reset(&bsd6_ring);
for (i = 0; i < SAMPLES_PER_SEC; i++) {
if (IS_965(devid)) {
instdone = INREG(INST_DONE_I965);
instdone1 = INREG(INST_DONE_1);
} else
instdone = INREG(INST_DONE);
for (j = 0; j < num_instdone_bits; j++)
update_idle_bit(&top_bits[j]);
ring_sample(&render_ring);
ring_sample(&bsd_ring);
ring_sample(&bsd6_ring);
usleep(1000000 / SAMPLES_PER_SEC);
}
if (IS_GEN4(devid) || IS_GEN6(devid)) {
for (i = 0; i < STATS_COUNT; i++) {
uint32_t stats_high, stats_low, stats_high_2;
do {
stats_high = INREG(stats_regs[i] + 4);
stats_low = INREG(stats_regs[i]);
stats_high_2 = INREG(stats_regs[i] + 4);
} while (stats_high != stats_high_2);
stats[i] = (uint64_t)stats_high << 32 |
stats_low;
}
}
qsort(top_bits_sorted, num_instdone_bits,
sizeof(struct top_bit *), top_bits_sort);
/* Limit the number of lines printed to the terminal height so the
* most important info (at the top) will stay on screen. */
unsigned short int max_lines = -1;
struct winsize ws;
if (ioctl(0, TIOCGWINSZ, &ws) != -1)
max_lines = ws.ws_row - 6; /* exclude header lines */
if (max_lines >= num_instdone_bits)
max_lines = num_instdone_bits;
printf("%s", clear_screen);
print_clock_info(pci_dev);
ring_print(&render_ring);
ring_print(&bsd_ring);
ring_print(&bsd6_ring);
printf("\n%30s %s\n", "task", "percent busy");
for (i = 0; i < max_lines; i++) {
if (top_bits_sorted[i]->count > 0) {
percent = top_bits_sorted[i]->count /
SAMPLES_TO_PERCENT_RATIO;
len = printf("%30s: %3d%%: ",
top_bits_sorted[i]->bit->name,
percent);
print_percentage_bar (percent, len);
} else {
printf("%*s", PERCENTAGE_BAR_END, "");
}
if (i < STATS_COUNT &&
(IS_GEN4(devid) || IS_GEN6(devid))) {
printf("%13s: %llu (%lld/sec)",
stats_reg_names[i],
stats[i],
stats[i] - last_stats[i]);
last_stats[i] = stats[i];
} else {
if (!top_bits_sorted[i]->count)
break;
}
printf("\n");
}
for (i = 0; i < num_instdone_bits; i++) {
top_bits_sorted[i]->count = 0;
if (i < STATS_COUNT)
last_stats[i] = stats[i];
}
}
return 0;
}