blob: c9c5c57ede9035bd2d2840944ea0d4385628183b [file] [log] [blame]
Robert Braggbb7ea5b2016-02-09 19:15:45 +00001/*
2 * Copyright © 2016 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 */
24
25#include <stdlib.h>
26#include <stdio.h>
27#include <string.h>
28#include <fcntl.h>
29#include <inttypes.h>
30#include <errno.h>
31#include <sys/stat.h>
32#include <sys/time.h>
33#include <sys/times.h>
34#include <sys/types.h>
35#include <dirent.h>
36#include <time.h>
37#include <poll.h>
38#include <math.h>
39
40#include "igt.h"
41#include "drm.h"
42
43IGT_TEST_DESCRIPTION("Test the i915 perf metrics streaming interface");
44
45#define GEN6_MI_REPORT_PERF_COUNT ((0x28 << 23) | (3 - 2))
46
47#define GFX_OP_PIPE_CONTROL ((3 << 29) | (3 << 27) | (2 << 24))
48#define PIPE_CONTROL_CS_STALL (1 << 20)
49#define PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET (1 << 19)
50#define PIPE_CONTROL_TLB_INVALIDATE (1 << 18)
51#define PIPE_CONTROL_SYNC_GFDT (1 << 17)
52#define PIPE_CONTROL_MEDIA_STATE_CLEAR (1 << 16)
53#define PIPE_CONTROL_NO_WRITE (0 << 14)
54#define PIPE_CONTROL_WRITE_IMMEDIATE (1 << 14)
55#define PIPE_CONTROL_WRITE_DEPTH_COUNT (2 << 14)
56#define PIPE_CONTROL_WRITE_TIMESTAMP (3 << 14)
57#define PIPE_CONTROL_DEPTH_STALL (1 << 13)
58#define PIPE_CONTROL_RENDER_TARGET_FLUSH (1 << 12)
59#define PIPE_CONTROL_INSTRUCTION_INVALIDATE (1 << 11)
60#define PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE (1 << 10) /* GM45+ only */
61#define PIPE_CONTROL_ISP_DIS (1 << 9)
62#define PIPE_CONTROL_INTERRUPT_ENABLE (1 << 8)
63#define PIPE_CONTROL_FLUSH_ENABLE (1 << 7) /* Gen7+ only */
64/* GT */
65#define PIPE_CONTROL_DATA_CACHE_INVALIDATE (1 << 5)
66#define PIPE_CONTROL_VF_CACHE_INVALIDATE (1 << 4)
67#define PIPE_CONTROL_CONST_CACHE_INVALIDATE (1 << 3)
68#define PIPE_CONTROL_STATE_CACHE_INVALIDATE (1 << 2)
69#define PIPE_CONTROL_STALL_AT_SCOREBOARD (1 << 1)
70#define PIPE_CONTROL_DEPTH_CACHE_FLUSH (1 << 0)
71#define PIPE_CONTROL_PPGTT_WRITE (0 << 2)
72#define PIPE_CONTROL_GLOBAL_GTT_WRITE (1 << 2)
73
Robert Braggbb7ea5b2016-02-09 19:15:45 +000074/* Temporarily copy i915-perf uapi here to avoid a dependency on libdrm's
75 * i915_drm.h copy being updated with the i915-perf interface before this
76 * test can land in i-g-t.
77 *
78 * TODO: remove this once the interface lands in libdrm
79 */
80#ifndef DRM_I915_PERF_OPEN
81#define DRM_I915_PERF_OPEN 0x36
82#define DRM_IOCTL_I915_PERF_OPEN DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_OPEN, struct drm_i915_perf_open_param)
83
84enum drm_i915_oa_format {
85 I915_OA_FORMAT_A13 = 1,
86 I915_OA_FORMAT_A29,
87 I915_OA_FORMAT_A13_B8_C8,
88 I915_OA_FORMAT_B4_C8,
89 I915_OA_FORMAT_A45_B8_C8,
90 I915_OA_FORMAT_B4_C8_A16,
91 I915_OA_FORMAT_C4_B8,
92
93 I915_OA_FORMAT_MAX /* non-ABI */
94};
95
96enum drm_i915_perf_property_id {
97 DRM_I915_PERF_PROP_CTX_HANDLE = 1,
98 DRM_I915_PERF_PROP_SAMPLE_OA,
99 DRM_I915_PERF_PROP_OA_METRICS_SET,
100 DRM_I915_PERF_PROP_OA_FORMAT,
101 DRM_I915_PERF_PROP_OA_EXPONENT,
102
103 DRM_I915_PERF_PROP_MAX /* non-ABI */
104};
105
106struct drm_i915_perf_open_param {
107 __u32 flags;
108#define I915_PERF_FLAG_FD_CLOEXEC (1<<0)
109#define I915_PERF_FLAG_FD_NONBLOCK (1<<1)
110#define I915_PERF_FLAG_DISABLED (1<<2)
111
112 __u32 num_properties;
113 __u64 properties_ptr;
114};
115
116#define I915_PERF_IOCTL_ENABLE _IO('i', 0x0)
117#define I915_PERF_IOCTL_DISABLE _IO('i', 0x1)
118
119struct drm_i915_perf_record_header {
120 __u32 type;
121 __u16 pad;
122 __u16 size;
123};
124
125enum drm_i915_perf_record_type {
126 DRM_I915_PERF_RECORD_SAMPLE = 1,
127 DRM_I915_PERF_RECORD_OA_REPORT_LOST = 2,
128 DRM_I915_PERF_RECORD_OA_BUFFER_LOST = 3,
129
130 DRM_I915_PERF_RECORD_MAX /* non-ABI */
131};
132#endif /* !DRM_I915_PERF_OPEN */
133
134static struct {
135 const char *name;
136 size_t size;
137 int a_off; /* bytes */
138 int n_a;
139 int first_a;
140 int b_off;
141 int n_b;
142 int c_off;
143 int n_c;
144} oa_formats[I915_OA_FORMAT_MAX] = {
145 [I915_OA_FORMAT_A13] = {
146 "A13", .size = 64,
147 .a_off = 12, .n_a = 13 },
148 [I915_OA_FORMAT_A29] = {
149 "A29", .size = 128,
150 .a_off = 12, .n_a = 29 },
151 [I915_OA_FORMAT_A13_B8_C8] = {
152 "A13_B8_C8", .size = 128,
153 .a_off = 12, .n_a = 13,
154 .b_off = 64, .n_b = 8,
155 .c_off = 96, .n_c = 8 },
156 [I915_OA_FORMAT_A45_B8_C8] = {
157 "A45_B8_C8", .size = 256,
158 .a_off = 12, .n_a = 45,
159 .b_off = 192, .n_b = 8,
160 .c_off = 224, .n_c = 8 },
161 [I915_OA_FORMAT_B4_C8] = {
162 "B4_C8", .size = 64,
163 .b_off = 16, .n_b = 4,
164 .c_off = 32, .n_c = 8 },
165 [I915_OA_FORMAT_B4_C8_A16] = {
166 "B4_C8_A16", .size = 128,
167 .b_off = 16, .n_b = 4,
168 .c_off = 32, .n_c = 8,
169 .a_off = 60, .n_a = 16, .first_a = 29 },
170 [I915_OA_FORMAT_C4_B8] = {
171 "C4_B8", .size = 64,
172 .c_off = 16, .n_c = 4,
173 .b_off = 28, .n_b = 8 },
174};
175
176static bool hsw_undefined_a_counters[45] = {
177 [4] = true,
178 [6] = true,
179 [9] = true,
180 [11] = true,
181 [14] = true,
182 [16] = true,
183 [19] = true,
184 [21] = true,
185 [24] = true,
186 [26] = true,
187 [29] = true,
188 [31] = true,
189 [34] = true,
190 [43] = true,
191 [44] = true,
192};
193
194static int drm_fd = -1;
195static uint32_t devid;
196static int device = -1;
197
198static uint64_t hsw_render_basic_id = UINT64_MAX;
199static uint64_t gt_min_freq_mhz_saved = 0;
200static uint64_t gt_max_freq_mhz_saved = 0;
201static uint64_t gt_min_freq_mhz = 0;
202static uint64_t gt_max_freq_mhz = 0;
203
204static uint64_t timestamp_frequency = 12500000;
205
206static igt_render_copyfunc_t render_copy = NULL;
207
208static int
209__perf_open(int fd, struct drm_i915_perf_open_param *param)
210{
211 int ret = igt_ioctl(fd, DRM_IOCTL_I915_PERF_OPEN, param);
212
213 igt_assert(ret >= 0);
214 errno = 0;
215
216 return ret;
217}
218
219static int
220lookup_format(int i915_perf_fmt_id)
221{
222 igt_assert(i915_perf_fmt_id < I915_OA_FORMAT_MAX);
223 igt_assert(oa_formats[i915_perf_fmt_id].name);
224
225 return i915_perf_fmt_id;
226}
227
228static bool
229try_read_u64_file(const char *file, uint64_t *val)
230{
231 char buf[32];
232 int fd, n;
233
234 fd = open(file, O_RDONLY);
235 if (fd < 0)
236 return false;
237
238 while ((n = read(fd, buf, sizeof(buf) - 1)) < 0 && errno == EINTR)
239 ;
240 igt_assert(n >= 0);
241
242 close(fd);
243
244 buf[n] = '\0';
245 *val = strtoull(buf, NULL, 0);
246
247 return true;
248}
249
250static uint64_t
251read_u64_file(const char *file)
252{
253 uint64_t val;
254
255 igt_assert_eq(try_read_u64_file(file, &val), true);
256
257 return val;
258}
259
260static void
261write_u64_file(const char *file, uint64_t val)
262{
263 char buf[32];
264 int fd, len, ret;
265
266 fd = open(file, O_WRONLY);
267 igt_assert(fd >= 0);
268
269 len = snprintf(buf, sizeof(buf), "%"PRIu64, val);
270 igt_assert(len > 0);
271
272 while ((ret = write(fd, buf, len)) < 0 && errno == EINTR)
273 ;
274 igt_assert_eq(ret, len);
275
276 close(fd);
277}
278
279static uint64_t
280sysfs_read(const char *file)
281{
282 char buf[512];
283
284 snprintf(buf, sizeof(buf), "/sys/class/drm/card%d/%s", device, file);
285
286 return read_u64_file(buf);
287}
288
289static void
290sysfs_write(const char *file, uint64_t val)
291{
292 char buf[512];
293
294 snprintf(buf, sizeof(buf), "/sys/class/drm/card%d/%s", device, file);
295
296 write_u64_file(buf, val);
297}
298
299static char *
300read_debugfs_record(const char *file, const char *key)
301{
302 FILE *fp;
303 char *line = NULL;
304 size_t line_buf_size = 0;
305 int len = 0;
306 int key_len = strlen(key);
307 char *value = NULL;
308
309 fp = igt_debugfs_fopen(file, "r");
310 igt_require(fp);
311
312 while ((len = getline(&line, &line_buf_size, fp)) > 0) {
313
314 if (line[len - 1] == '\n')
315 line[len - 1] = '\0';
316
317 if (strncmp(key, line, key_len) == 0 &&
318 line[key_len] == ':' &&
319 line[key_len + 1] == ' ')
320 {
321 value = strdup(line + key_len + 2);
322 goto done;
323 }
324 }
325
326 igt_assert(!"reached");
327done:
328 free(line);
329 if (fp)
330 fclose(fp);
331 return value;
332}
333
334static uint64_t
335read_debugfs_u64_record(const char *file, const char *key)
336{
337 char *str_val = read_debugfs_record(file, key);
338 uint64_t val;
339
340 igt_require(str_val);
341
342 val = strtoull(str_val, NULL, 0);
343 free(str_val);
344
345 return val;
346}
347
348static bool
349lookup_hsw_render_basic_id(void)
350{
351 char buf[256];
352
353 igt_assert_neq(device, -1);
354
355 snprintf(buf, sizeof(buf),
356 "/sys/class/drm/card%d/metrics/403d8832-1a27-4aa6-a64e-f5389ce7b212/id",
357 device);
358
359 return try_read_u64_file(buf, &hsw_render_basic_id);
360}
361
362static void
363gt_frequency_range_save(void)
364{
365 gt_min_freq_mhz_saved = sysfs_read("gt_min_freq_mhz");
366 gt_max_freq_mhz_saved = sysfs_read("gt_max_freq_mhz");
367
368 gt_min_freq_mhz = gt_min_freq_mhz_saved;
369 gt_max_freq_mhz = gt_max_freq_mhz_saved;
370}
371
372static void
373gt_frequency_pin(int gt_freq_mhz)
374{
375 igt_debug("requesting pinned GT freq = %dmhz\n", gt_freq_mhz);
376
377 if (gt_freq_mhz > gt_max_freq_mhz) {
378 sysfs_write("gt_max_freq_mhz", gt_freq_mhz);
379 sysfs_write("gt_min_freq_mhz", gt_freq_mhz);
380 } else {
381 sysfs_write("gt_min_freq_mhz", gt_freq_mhz);
382 sysfs_write("gt_max_freq_mhz", gt_freq_mhz);
383 }
384 gt_min_freq_mhz = gt_freq_mhz;
385 gt_max_freq_mhz = gt_freq_mhz;
386}
387
388static void
389gt_frequency_range_restore(void)
390{
391 igt_debug("restoring GT frequency range: min = %dmhz, max =%dmhz, current: min=%dmhz, max=%dmhz\n",
392 (int)gt_min_freq_mhz_saved,
393 (int)gt_max_freq_mhz_saved,
394 (int)gt_min_freq_mhz,
395 (int)gt_max_freq_mhz);
396
397 /* Assume current min/max are the same */
398 if (gt_min_freq_mhz_saved > gt_max_freq_mhz) {
399 sysfs_write("gt_max_freq_mhz", gt_max_freq_mhz_saved);
400 sysfs_write("gt_min_freq_mhz", gt_min_freq_mhz_saved);
401 } else {
402 sysfs_write("gt_min_freq_mhz", gt_min_freq_mhz_saved);
403 sysfs_write("gt_max_freq_mhz", gt_max_freq_mhz_saved);
404 }
405
406 gt_min_freq_mhz = gt_min_freq_mhz_saved;
407 gt_max_freq_mhz = gt_max_freq_mhz_saved;
408}
409
410static uint64_t
411timebase_scale(uint32_t u32_delta)
412{
413 return ((uint64_t)u32_delta * NSEC_PER_SEC) / timestamp_frequency;
414}
415
416/* CAP_SYS_ADMIN is required to open system wide metrics, unless the system
417 * control parameter dev.i915.perf_stream_paranoid == 0 */
418static void
419test_system_wide_paranoid(void)
420{
421 igt_fork(child, 1) {
422 uint64_t properties[] = {
423 /* Include OA reports in samples */
424 DRM_I915_PERF_PROP_SAMPLE_OA, true,
425
426 /* OA unit configuration */
427 DRM_I915_PERF_PROP_OA_METRICS_SET, hsw_render_basic_id,
428 DRM_I915_PERF_PROP_OA_FORMAT, I915_OA_FORMAT_A45_B8_C8,
429 DRM_I915_PERF_PROP_OA_EXPONENT, 13, /* 1 millisecond */
430 };
431 struct drm_i915_perf_open_param param = {
432 .flags = I915_PERF_FLAG_FD_CLOEXEC |
433 I915_PERF_FLAG_FD_NONBLOCK,
434 .num_properties = sizeof(properties) / 16,
Robert Fossdbee0832016-12-19 05:19:10 -0500435 .properties_ptr = to_user_pointer(properties),
Robert Braggbb7ea5b2016-02-09 19:15:45 +0000436 };
437
438 write_u64_file("/proc/sys/dev/i915/perf_stream_paranoid", 1);
439
440 igt_drop_root();
441
442 do_ioctl_err(drm_fd, DRM_IOCTL_I915_PERF_OPEN, &param, EACCES);
443 }
444
445 igt_waitchildren();
446
447 igt_fork(child, 1) {
448 uint64_t properties[] = {
449 /* Include OA reports in samples */
450 DRM_I915_PERF_PROP_SAMPLE_OA, true,
451
452 /* OA unit configuration */
453 DRM_I915_PERF_PROP_OA_METRICS_SET, hsw_render_basic_id,
454 DRM_I915_PERF_PROP_OA_FORMAT, I915_OA_FORMAT_A45_B8_C8,
455 DRM_I915_PERF_PROP_OA_EXPONENT, 13, /* 1 millisecond */
456 };
457 struct drm_i915_perf_open_param param = {
458 .flags = I915_PERF_FLAG_FD_CLOEXEC |
459 I915_PERF_FLAG_FD_NONBLOCK,
460 .num_properties = sizeof(properties) / 16,
Robert Fossdbee0832016-12-19 05:19:10 -0500461 .properties_ptr = to_user_pointer(properties),
Robert Braggbb7ea5b2016-02-09 19:15:45 +0000462 };
463 int stream_fd;
464
465 write_u64_file("/proc/sys/dev/i915/perf_stream_paranoid", 0);
466
467 igt_drop_root();
468
469 stream_fd = __perf_open(drm_fd, &param);
470 close(stream_fd);
471 }
472
473 igt_waitchildren();
474
475 /* leave in paranoid state */
476 write_u64_file("/proc/sys/dev/i915/perf_stream_paranoid", 1);
477}
478
479static void
480test_invalid_open_flags(void)
481{
482 uint64_t properties[] = {
483 /* Include OA reports in samples */
484 DRM_I915_PERF_PROP_SAMPLE_OA, true,
485
486 /* OA unit configuration */
487 DRM_I915_PERF_PROP_OA_METRICS_SET, hsw_render_basic_id,
488 DRM_I915_PERF_PROP_OA_FORMAT, I915_OA_FORMAT_A45_B8_C8,
489 DRM_I915_PERF_PROP_OA_EXPONENT, 13, /* 1 millisecond */
490 };
491 struct drm_i915_perf_open_param param = {
492 .flags = ~0, /* Undefined flag bits set! */
493 .num_properties = sizeof(properties) / 16,
Robert Fossdbee0832016-12-19 05:19:10 -0500494 .properties_ptr = to_user_pointer(properties),
Robert Braggbb7ea5b2016-02-09 19:15:45 +0000495 };
496
497 do_ioctl_err(drm_fd, DRM_IOCTL_I915_PERF_OPEN, &param, EINVAL);
498}
499
500static void
501test_invalid_oa_metric_set_id(void)
502{
503 uint64_t properties[] = {
504 /* Include OA reports in samples */
505 DRM_I915_PERF_PROP_SAMPLE_OA, true,
506
507 /* OA unit configuration */
508 DRM_I915_PERF_PROP_OA_FORMAT, I915_OA_FORMAT_A45_B8_C8,
509 DRM_I915_PERF_PROP_OA_EXPONENT, 13, /* 1 millisecond */
510 DRM_I915_PERF_PROP_OA_METRICS_SET, UINT64_MAX,
511 };
512 struct drm_i915_perf_open_param param = {
513 .flags = I915_PERF_FLAG_FD_CLOEXEC |
514 I915_PERF_FLAG_FD_NONBLOCK,
515 .num_properties = sizeof(properties) / 16,
Robert Fossdbee0832016-12-19 05:19:10 -0500516 .properties_ptr = to_user_pointer(properties),
Robert Braggbb7ea5b2016-02-09 19:15:45 +0000517 };
518 int stream_fd;
519
520 do_ioctl_err(drm_fd, DRM_IOCTL_I915_PERF_OPEN, &param, EINVAL);
521
522 properties[ARRAY_SIZE(properties) - 1] = 0; /* ID 0 is also be reserved as invalid */
523 do_ioctl_err(drm_fd, DRM_IOCTL_I915_PERF_OPEN, &param, EINVAL);
524
525 /* Check that we aren't just seeing false positives... */
526 properties[ARRAY_SIZE(properties) - 1] = hsw_render_basic_id;
527 stream_fd = __perf_open(drm_fd, &param);
528 close(stream_fd);
529
530 /* There's no valid default OA metric set ID... */
531 param.num_properties--;
532 do_ioctl_err(drm_fd, DRM_IOCTL_I915_PERF_OPEN, &param, EINVAL);
533}
534
535static void
536test_invalid_oa_format_id(void)
537{
538 uint64_t properties[] = {
539 /* Include OA reports in samples */
540 DRM_I915_PERF_PROP_SAMPLE_OA, true,
541
542 /* OA unit configuration */
543 DRM_I915_PERF_PROP_OA_METRICS_SET, hsw_render_basic_id,
544 DRM_I915_PERF_PROP_OA_EXPONENT, 13, /* 1 millisecond */
545 DRM_I915_PERF_PROP_OA_FORMAT, UINT64_MAX,
546 };
547 struct drm_i915_perf_open_param param = {
548 .flags = I915_PERF_FLAG_FD_CLOEXEC |
549 I915_PERF_FLAG_FD_NONBLOCK,
550 .num_properties = sizeof(properties) / 16,
Robert Fossdbee0832016-12-19 05:19:10 -0500551 .properties_ptr = to_user_pointer(properties),
Robert Braggbb7ea5b2016-02-09 19:15:45 +0000552 };
553 int stream_fd;
554
555 do_ioctl_err(drm_fd, DRM_IOCTL_I915_PERF_OPEN, &param, EINVAL);
556
557 properties[ARRAY_SIZE(properties) - 1] = 0; /* ID 0 is also be reserved as invalid */
558 do_ioctl_err(drm_fd, DRM_IOCTL_I915_PERF_OPEN, &param, EINVAL);
559
560 /* Check that we aren't just seeing false positives... */
561 properties[ARRAY_SIZE(properties) - 1] = I915_OA_FORMAT_A45_B8_C8;
562 stream_fd = __perf_open(drm_fd, &param);
563 close(stream_fd);
564
565 /* There's no valid default OA format... */
566 param.num_properties--;
567 do_ioctl_err(drm_fd, DRM_IOCTL_I915_PERF_OPEN, &param, EINVAL);
568}
569
570static void
571test_missing_sample_flags(void)
572{
573 uint64_t properties[] = {
574 /* No _PROP_SAMPLE_xyz flags */
575
576 /* OA unit configuration */
577 DRM_I915_PERF_PROP_OA_METRICS_SET, hsw_render_basic_id,
578 DRM_I915_PERF_PROP_OA_EXPONENT, 13, /* 1 millisecond */
579 DRM_I915_PERF_PROP_OA_FORMAT, I915_OA_FORMAT_A45_B8_C8,
580 };
581 struct drm_i915_perf_open_param param = {
582 .flags = I915_PERF_FLAG_FD_CLOEXEC,
583 .num_properties = sizeof(properties) / 16,
Robert Fossdbee0832016-12-19 05:19:10 -0500584 .properties_ptr = to_user_pointer(properties),
Robert Braggbb7ea5b2016-02-09 19:15:45 +0000585 };
586
587 do_ioctl_err(drm_fd, DRM_IOCTL_I915_PERF_OPEN, &param, EINVAL);
588}
589
590static void
591read_2_oa_reports(int stream_fd,
592 int format_id,
593 int exponent,
594 uint32_t *oa_report0,
595 uint32_t *oa_report1,
596 bool timer_only)
597{
598 size_t format_size = oa_formats[format_id].size;
599 size_t sample_size = (sizeof(struct drm_i915_perf_record_header) +
600 format_size);
601 const struct drm_i915_perf_record_header *header;
602 uint32_t exponent_mask = (1 << (exponent + 1)) - 1;
603
604 /* Note: we allocate a large buffer so that each read() iteration
605 * should scrape *all* pending records.
606 *
607 * The largest buffer the OA unit supports is 16MB and the smallest
608 * OA report format is 64bytes allowing up to 262144 reports to
609 * be buffered.
610 *
611 * Being sure we are fetching all buffered reports allows us to
612 * potentially throw away / skip all reports whenever we see
613 * a _REPORT_LOST notification as a way of being sure are
614 * measurements aren't skewed by a lost report.
615 *
616 * Note: that is is useful for some tests but also not something
617 * applications would be expected to resort to. Lost reports are
618 * somewhat unpredictable but typically don't pose a problem - except
619 * to indicate that the OA unit may be over taxed if lots of reports
620 * are being lost.
621 */
622 int buf_size = 262144 * (64 + sizeof(struct drm_i915_perf_record_header));
623 uint8_t *buf = malloc(buf_size);
624 int n = 0;
625
626 for (int i = 0; i < 1000; i++) {
627 ssize_t len;
628
629 while ((len = read(stream_fd, buf, buf_size)) < 0 &&
630 errno == EINTR)
631 ;
632
633 igt_assert(len > 0);
634
635 for (size_t offset = 0; offset < len; offset += header->size) {
636 const uint32_t *report;
637
638 header = (void *)(buf + offset);
639
640 igt_assert_eq(header->pad, 0); /* Reserved */
641
642 /* Currently the only test that should ever expect to
643 * see a _BUFFER_LOST error is the buffer_fill test,
644 * otherwise something bad has probably happened...
645 */
646 igt_assert_neq(header->type, DRM_I915_PERF_RECORD_OA_BUFFER_LOST);
647
648 /* At high sampling frequencies the OA HW might not be
649 * able to cope with all write requests and will notify
650 * us that a report was lost. We restart our read of
651 * two sequential reports due to the timeline blip this
652 * implies
653 */
654 if (header->type == DRM_I915_PERF_RECORD_OA_REPORT_LOST) {
655 igt_debug("read restart: OA trigger collision / report lost\n");
656 n = 0;
657
658 /* XXX: break, because we don't know where
659 * within the series of already read reports
660 * there could be a blip from the lost report.
661 */
662 break;
663 }
664
665 /* Currently the only other record type expected is a
666 * _SAMPLE. Notably this test will need updating if
667 * i915-perf is extended in the future with additional
668 * record types.
669 */
670 igt_assert_eq(header->type, DRM_I915_PERF_RECORD_SAMPLE);
671
672 igt_assert_eq(header->size, sample_size);
673
674 report = (const void *)(header + 1);
675
676 igt_debug("read report: reason = %x, timestamp = %x, exponent mask=%x\n",
677 report[0], report[1], exponent_mask);
678
679 /* Don't expect zero for timestamps */
680 igt_assert_neq(report[1], 0);
681
682 if (timer_only) {
683 /* For Haswell we don't have a documented
684 * report reason field (though empirically
685 * report[0] bit 10 does seem to correlate with
686 * a timer trigger reason) so we instead infer
687 * which reports are timer triggered by
688 * checking if the least significant bits are
689 * zero and the exponent bit is set.
690 */
691 if ((report[1] & exponent_mask) != (1 << exponent)) {
692 igt_debug("skipping non timer report reason=%x\n",
693 report[0]);
694
695 /* Also assert our hypothesis about the
696 * reason bit...
697 */
698 igt_assert_eq(report[0] & (1 << 10), 0);
699 continue;
700 }
701 }
702
703 if (n++ == 0)
704 memcpy(oa_report0, report, format_size);
705 else {
706 memcpy(oa_report1, report, format_size);
707 free(buf);
708 return;
709 }
710 }
711 }
712
713 free(buf);
714
715 igt_assert(!"reached");
716}
717
718static void
719open_and_read_2_oa_reports(int format_id,
720 int exponent,
721 uint32_t *oa_report0,
722 uint32_t *oa_report1,
723 bool timer_only)
724{
725 uint64_t properties[] = {
726 /* Include OA reports in samples */
727 DRM_I915_PERF_PROP_SAMPLE_OA, true,
728
729 /* OA unit configuration */
730 DRM_I915_PERF_PROP_OA_METRICS_SET, hsw_render_basic_id,
731 DRM_I915_PERF_PROP_OA_FORMAT, format_id,
732 DRM_I915_PERF_PROP_OA_EXPONENT, exponent,
733
734 };
735 struct drm_i915_perf_open_param param = {
736 .flags = I915_PERF_FLAG_FD_CLOEXEC,
737 .num_properties = sizeof(properties) / 16,
Robert Fossdbee0832016-12-19 05:19:10 -0500738 .properties_ptr = to_user_pointer(properties),
Robert Braggbb7ea5b2016-02-09 19:15:45 +0000739 };
740 int stream_fd = __perf_open(drm_fd, &param);
741
742 read_2_oa_reports(stream_fd, format_id, exponent,
743 oa_report0, oa_report1, timer_only);
744
745 close(stream_fd);
746}
747
748static void
749print_reports(uint32_t *oa_report0, uint32_t *oa_report1, int fmt)
750{
751 uint32_t *a0, *b0, *c0;
752 uint32_t *a1, *b1, *c1;
753
754 /* Not ideal naming here with a0 or a1
755 * differentiating report0 or 1 not A counter 0 or 1....
756 */
757 a0 = (uint32_t *)(((uint8_t *)oa_report0) + oa_formats[fmt].a_off);
758 b0 = (uint32_t *)(((uint8_t *)oa_report0) + oa_formats[fmt].b_off);
759 c0 = (uint32_t *)(((uint8_t *)oa_report0) + oa_formats[fmt].c_off);
760
761 a1 = (uint32_t *)(((uint8_t *)oa_report1) + oa_formats[fmt].a_off);
762 b1 = (uint32_t *)(((uint8_t *)oa_report1) + oa_formats[fmt].b_off);
763 c1 = (uint32_t *)(((uint8_t *)oa_report1) + oa_formats[fmt].c_off);
764
765 igt_debug("TIMESTAMP: 1st = %"PRIu32", 2nd = %"PRIu32", delta = %"PRIu32"\n",
766 oa_report0[1], oa_report1[1], oa_report1[1] - oa_report0[1]);
767
768 if (oa_formats[fmt].n_c) {
769 igt_debug("CLOCK: 1st = %"PRIu32", 2nd = %"PRIu32", delta = %"PRIu32"\n",
770 c0[2], c1[2], c1[2] - c0[2]);
771 } else
772 igt_debug("CLOCK = N/A\n");
773
774 for (int j = oa_formats[fmt].first_a;
775 j < oa_formats[fmt].n_a;
776 j++)
777 {
778 uint32_t delta = a1[j] - a0[j];
779
780 if (hsw_undefined_a_counters[j])
781 continue;
782
783 igt_debug("A%d: 1st = %"PRIu32", 2nd = %"PRIu32", delta = %"PRIu32"\n",
784 j, a0[j], a1[j], delta);
785 }
786
787 for (int j = 0; j < oa_formats[fmt].n_b; j++) {
788 uint32_t delta = b1[j] - b0[j];
789 igt_debug("B%d: 1st = %"PRIu32", 2nd = %"PRIu32", delta = %"PRIu32"\n",
790 j, b0[j], b1[j], delta);
791 }
792
793 for (int j = 0; j < oa_formats[fmt].n_c; j++) {
794 uint32_t delta = c1[j] - c0[j];
795 igt_debug("C%d: 1st = %"PRIu32", 2nd = %"PRIu32", delta = %"PRIu32"\n",
796 j, c0[j], c1[j], delta);
797 }
798}
799
800static void
801test_oa_formats(void)
802{
803 int oa_exponent = 13;
804
805 for (int i = 0; i < ARRAY_SIZE(oa_formats); i++) {
806 uint32_t oa_report0[64];
807 uint32_t oa_report1[64];
808 uint32_t *a0, *b0, *c0;
809 uint32_t *a1, *b1, *c1;
810 uint32_t time_delta;
811 uint32_t clock_delta;
812 uint32_t max_delta;
813
814 if (!oa_formats[i].name) /* sparse, indexed by ID */
815 continue;
816
817 igt_debug("Checking OA format %s\n", oa_formats[i].name);
818
819 open_and_read_2_oa_reports(i,
820 oa_exponent,
821 oa_report0,
822 oa_report1,
823 false); /* timer reports only */
824
825 print_reports(oa_report0, oa_report1, i);
826
827 a0 = (uint32_t *)(((uint8_t *)oa_report0) + oa_formats[i].a_off);
828 b0 = (uint32_t *)(((uint8_t *)oa_report0) + oa_formats[i].b_off);
829 c0 = (uint32_t *)(((uint8_t *)oa_report0) + oa_formats[i].c_off);
830
831 a1 = (uint32_t *)(((uint8_t *)oa_report1) + oa_formats[i].a_off);
832 b1 = (uint32_t *)(((uint8_t *)oa_report1) + oa_formats[i].b_off);
833 c1 = (uint32_t *)(((uint8_t *)oa_report1) + oa_formats[i].c_off);
834
835 time_delta = timebase_scale(oa_report1[1] - oa_report0[1]);
836 igt_assert_neq(time_delta, 0);
837
838 /* C2 corresponds to a clock counter for the Haswell render
839 * basic metric set but it's not included in all of the
840 * formats.
841 */
842 if (oa_formats[i].n_c) {
843 uint64_t freq;
844
845 /* The first report might have a clock count of zero
846 * but we wouldn't expect that in the second report...
847 */
848 igt_assert_neq(c1[2], 0);
849
850 clock_delta = c1[2] - c0[2];
851 igt_assert_neq(clock_delta, 0);
852
853 freq = ((uint64_t)clock_delta * 1000) / time_delta;
854 igt_debug("freq = %"PRIu64"\n", freq);
855
856 igt_assert(freq <= gt_max_freq_mhz);
857 } else {
858 /* Assume running at max freq for sake of
859 * below sanity check on counters... */
860 clock_delta = (gt_max_freq_mhz *
861 (uint64_t)time_delta) / 1000;
862 }
863
864 igt_debug("clock delta = %"PRIu32"\n", clock_delta);
865
866 /* The maximum rate for any HSW counter =
867 * clock_delta * 40 EUs
868 *
869 * Sanity check that no counters exceed this delta.
870 */
871 max_delta = clock_delta * 40;
872
873 for (int j = oa_formats[i].first_a;
874 j < oa_formats[i].n_a;
875 j++)
876 {
877 uint32_t delta = a1[j] - a0[j];
878
879 if (hsw_undefined_a_counters[j])
880 continue;
881
882 igt_debug("A%d: delta = %"PRIu32"\n", j, delta);
883 igt_assert(delta <= max_delta);
884 }
885
886 for (int j = 0; j < oa_formats[i].n_b; j++) {
887 uint32_t delta = b1[j] - b0[j];
888 igt_debug("B%d: delta = %"PRIu32"\n", j, delta);
889 igt_assert(delta <= max_delta);
890 }
891
892 for (int j = 0; j < oa_formats[i].n_c; j++) {
893 uint32_t delta = c1[j] - c0[j];
894 igt_debug("C%d: delta = %"PRIu32"\n", j, delta);
895 igt_assert(delta <= max_delta);
896 }
897 }
898}
899
900static void
901test_oa_exponents(int gt_freq_mhz)
902{
903 /* This test tries to use the sysfs interface for pinning the GT
904 * frequency so we have another point of reference for comparing with
905 * the clock frequency as derived from OA reports.
906 *
907 * This test has been finicky to stabilise while the
908 * gt_min/max_freq_mhz files in sysfs don't seem to be a reliable
909 * mechanism for fixing the gpu frequency.
910 *
911 * Since these unit tests are focused on the OA unit not the ability to
912 * pin the frequency via sysfs we make the test account for pinning not
913 * being reliable and read back the current frequency for each
914 * iteration of this test to take this into account.
915 */
916 gt_frequency_pin(gt_freq_mhz);
917
918 igt_debug("Testing OA timer exponents with requested GT frequency = %dmhz\n",
919 gt_freq_mhz);
920
921 /* It's asking a lot to sample with a 160 nanosecond period and the
922 * test can fail due to buffer overflows if it wasn't possible to
923 * keep up, so we don't start from an exponent of zero...
924 */
925 for (int i = 2; i < 20; i++) {
926 uint32_t expected_timestamp_delta;
927 uint32_t timestamp_delta;
928 uint32_t oa_report0[64];
929 uint32_t oa_report1[64];
930 uint32_t *c0, *c1;
931 uint32_t time_delta;
932 uint32_t clock_delta;
933 uint32_t freq;
934 int n_tested = 0;
935 int n_freq_matches = 0;
936
937 /* The exponent is effectively selecting a bit in the timestamp
938 * to trigger reports on and so in practice we expect the raw
939 * timestamp deltas for periodic reports to exactly match the
940 * value of next bit.
941 */
942 expected_timestamp_delta = 2 << i;
943
944 for (int j = 0; n_tested < 10 && j < 100; j++) {
945 int gt_freq_mhz_0, gt_freq_mhz_1;
946 int c_off;
947
948 gt_freq_mhz_0 = sysfs_read("gt_act_freq_mhz");
949
950 igt_debug("ITER %d: testing OA exponent %d with sysfs GT freq = %dmhz\n",
951 j, i, gt_freq_mhz_0);
952
953 open_and_read_2_oa_reports(I915_OA_FORMAT_A45_B8_C8,
954 i, /* exponent */
955 oa_report0,
956 oa_report1,
957 true); /* timer triggered
958 reports only */
959
960 gt_freq_mhz_1 = sysfs_read("gt_act_freq_mhz");
961
962 /* If it looks like the frequency has changed according
963 * to sysfs then skip looking at this pair of reports
964 */
965 if (gt_freq_mhz_0 != gt_freq_mhz_1) {
966 igt_debug("skipping OA reports pair due to GT frequency change according to sysfs\n");
967 continue;
968 }
969
970 timestamp_delta = oa_report1[1] - oa_report0[1];
971 igt_assert_neq(timestamp_delta, 0);
972
973 if (timestamp_delta != expected_timestamp_delta) {
974 igt_debug("timestamp0 = %u/0x%x\n",
975 oa_report0[1], oa_report0[1]);
976 igt_debug("timestamp1 = %u/0x%x\n",
977 oa_report1[1], oa_report1[1]);
978 }
979
980 igt_assert_eq(timestamp_delta, expected_timestamp_delta);
981
982 /* NB: for the render basic metric set opened above by
983 * open_and_read_2_oa_reports(), the C2 counter is
984 * configured as the gpu clock counter...
985 */
986 c_off = oa_formats[I915_OA_FORMAT_A45_B8_C8].c_off;
987 igt_assert(c_off);
988 c0 = (uint32_t *)(((uint8_t *)oa_report0) + c_off);
989 c1 = (uint32_t *)(((uint8_t *)oa_report1) + c_off);
990 clock_delta = c1[2] - c0[2];
991
992 time_delta = timebase_scale(timestamp_delta);
993
994 freq = ((uint64_t)clock_delta * 1000) / time_delta;
995 igt_debug("ITER %d: time delta = %"PRIu32"(ns) clock delta = %"PRIu32" freq = %"PRIu32"(mhz)\n",
996 j, time_delta, clock_delta, freq);
997
998 if (freq == gt_freq_mhz_1)
999 n_freq_matches++;
1000
1001 n_tested++;
1002 }
1003
1004 if (n_tested < 10)
1005 igt_debug("sysfs frequency pinning too unstable for cross-referencing with OA derived frequency");
1006 igt_assert_eq(n_tested, 10);
1007
1008 igt_debug("number of iterations with expected clock frequency = %d\n",
1009 n_freq_matches);
1010
1011 /* Don't assert the calculated frequency for extremely short
1012 * durations.
1013 *
1014 * Allow some mismatches since can't be can't be sure about
1015 * frequency changes between sysfs reads.
1016 */
1017 if (i > 3)
1018 igt_assert(n_freq_matches >= 7);
1019 }
1020
1021 gt_frequency_range_restore();
1022}
1023
1024/* The OA exponent selects a timestamp counter bit to trigger reports on.
1025 *
1026 * With a 64bit timestamp and least significant bit approx == 80ns then the MSB
1027 * equates to > 40 thousand years and isn't exposed via the i915 perf interface.
1028 *
1029 * The max exponent exposed is expected to be 31, which is still a fairly
1030 * ridiculous period (>5min) but is the maximum exponent where it's still
1031 * possible to use periodic sampling as a means for tracking the overflow of
1032 * 32bit OA report timestamps.
1033 */
1034static void
1035test_invalid_oa_exponent(void)
1036{
1037 uint64_t properties[] = {
1038 /* Include OA reports in samples */
1039 DRM_I915_PERF_PROP_SAMPLE_OA, true,
1040
1041 /* OA unit configuration */
1042 DRM_I915_PERF_PROP_OA_METRICS_SET, hsw_render_basic_id,
1043 DRM_I915_PERF_PROP_OA_FORMAT, I915_OA_FORMAT_A45_B8_C8,
1044 DRM_I915_PERF_PROP_OA_EXPONENT, 31, /* maximum exponent expected
1045 to be accepted */
1046 };
1047 struct drm_i915_perf_open_param param = {
1048 .flags = I915_PERF_FLAG_FD_CLOEXEC,
1049 .num_properties = sizeof(properties) / 16,
Robert Fossdbee0832016-12-19 05:19:10 -05001050 .properties_ptr = to_user_pointer(properties),
Robert Braggbb7ea5b2016-02-09 19:15:45 +00001051 };
1052 int stream_fd = __perf_open(drm_fd, &param);
1053
1054 close(stream_fd);
1055
1056 for (int i = 32; i < 65; i++) {
1057 properties[7] = i;
1058 do_ioctl_err(drm_fd, DRM_IOCTL_I915_PERF_OPEN, &param, EINVAL);
1059 }
1060}
1061
1062/* Return the largest OA exponent that will still result in a sampling
1063 * frequency higher than the given frequency.
1064 */
1065static int
1066max_oa_exponent_for_higher_freq(uint64_t freq)
1067{
1068 /* NB: timebase_scale() takes a uint32_t and an exponent of 30
1069 * would already represent a period of ~3 minutes so there's
1070 * really no need to consider higher exponents.
1071 */
1072 for (int i = 0; i < 30; i++) {
1073 uint64_t oa_period = timebase_scale(2 << i);
1074 uint32_t oa_freq = NSEC_PER_SEC / oa_period;
1075
1076 if (oa_freq <= freq)
1077 return max(0, i - 1);
1078 }
1079
1080 igt_assert(!"reached");
1081 return -1;
1082}
1083
1084/* The lowest periodic sampling exponent equates to a period of 160 nanoseconds
1085 * or a frequency of 6.25MHz which is only possible to request as root by
1086 * default. By default the maximum OA sampling rate is 100KHz
1087 */
1088static void
1089test_low_oa_exponent_permissions(void)
1090{
1091 int max_freq = read_u64_file("/proc/sys/dev/i915/oa_max_sample_rate");
1092 int bad_exponent = max_oa_exponent_for_higher_freq(max_freq);
1093 int ok_exponent = bad_exponent + 1;
1094 uint64_t properties[] = {
1095 /* Include OA reports in samples */
1096 DRM_I915_PERF_PROP_SAMPLE_OA, true,
1097
1098 /* OA unit configuration */
1099 DRM_I915_PERF_PROP_OA_METRICS_SET, hsw_render_basic_id,
1100 DRM_I915_PERF_PROP_OA_FORMAT, I915_OA_FORMAT_A45_B8_C8,
1101 DRM_I915_PERF_PROP_OA_EXPONENT, bad_exponent,
1102 };
1103 struct drm_i915_perf_open_param param = {
1104 .flags = I915_PERF_FLAG_FD_CLOEXEC,
1105 .num_properties = sizeof(properties) / 16,
Robert Fossdbee0832016-12-19 05:19:10 -05001106 .properties_ptr = to_user_pointer(properties),
Robert Braggbb7ea5b2016-02-09 19:15:45 +00001107 };
1108 uint64_t oa_period, oa_freq;
1109
1110 igt_assert_eq(max_freq, 100000);
1111
1112 /* Avoid EACCES errors opening a stream without CAP_SYS_ADMIN */
1113 write_u64_file("/proc/sys/dev/i915/perf_stream_paranoid", 0);
1114
1115 igt_fork(child, 1) {
1116 igt_drop_root();
1117
1118 do_ioctl_err(drm_fd, DRM_IOCTL_I915_PERF_OPEN, &param, EACCES);
1119 }
1120
1121 igt_waitchildren();
1122
1123 properties[7] = ok_exponent;
1124
1125 igt_fork(child, 1) {
1126 int stream_fd;
1127
1128 igt_drop_root();
1129
1130 stream_fd = __perf_open(drm_fd, &param);
1131 close(stream_fd);
1132 }
1133
1134 igt_waitchildren();
1135
1136 oa_period = timebase_scale(2 << ok_exponent);
1137 oa_freq = NSEC_PER_SEC / oa_period;
1138 write_u64_file("/proc/sys/dev/i915/oa_max_sample_rate", oa_freq - 100);
1139
1140 igt_fork(child, 1) {
1141 igt_drop_root();
1142
1143 do_ioctl_err(drm_fd, DRM_IOCTL_I915_PERF_OPEN, &param, EACCES);
1144 }
1145
1146 igt_waitchildren();
1147
1148 /* restore the defaults */
1149 write_u64_file("/proc/sys/dev/i915/oa_max_sample_rate", 100000);
1150 write_u64_file("/proc/sys/dev/i915/perf_stream_paranoid", 1);
1151}
1152
1153static void
1154test_per_context_mode_unprivileged(void)
1155{
1156 uint64_t properties[] = {
1157 /* Single context sampling */
1158 DRM_I915_PERF_PROP_CTX_HANDLE, UINT64_MAX, /* updated below */
1159
1160 /* Include OA reports in samples */
1161 DRM_I915_PERF_PROP_SAMPLE_OA, true,
1162
1163 /* OA unit configuration */
1164 DRM_I915_PERF_PROP_OA_METRICS_SET, hsw_render_basic_id,
1165 DRM_I915_PERF_PROP_OA_FORMAT, I915_OA_FORMAT_A45_B8_C8,
1166 DRM_I915_PERF_PROP_OA_EXPONENT, 13, /* 1 millisecond */
1167 };
1168 struct drm_i915_perf_open_param param = {
1169 .flags = I915_PERF_FLAG_FD_CLOEXEC,
1170 .num_properties = sizeof(properties) / 16,
Robert Fossdbee0832016-12-19 05:19:10 -05001171 .properties_ptr = to_user_pointer(properties),
Robert Braggbb7ea5b2016-02-09 19:15:45 +00001172 };
1173
1174 /* should be default, but just to be sure... */
1175 write_u64_file("/proc/sys/dev/i915/perf_stream_paranoid", 1);
1176
1177 igt_fork(child, 1) {
1178 drm_intel_context *context;
1179 drm_intel_bufmgr *bufmgr;
1180 int stream_fd;
1181 uint32_t ctx_id = 0xffffffff; /* invalid id */
1182 int ret;
1183
1184 igt_drop_root();
1185
1186 bufmgr = drm_intel_bufmgr_gem_init(drm_fd, 4096);
1187 context = drm_intel_gem_context_create(bufmgr);
1188
1189 igt_assert(context);
1190
1191 ret = drm_intel_gem_context_get_id(context, &ctx_id);
1192 igt_assert_eq(ret, 0);
1193 igt_assert_neq(ctx_id, 0xffffffff);
1194
1195 properties[1] = ctx_id;
1196
1197 stream_fd = __perf_open(drm_fd, &param);
1198 close(stream_fd);
1199
1200 drm_intel_gem_context_destroy(context);
1201 drm_intel_bufmgr_destroy(bufmgr);
1202 }
1203
1204 igt_waitchildren();
1205}
1206
1207static int64_t
1208get_time(void)
1209{
1210 struct timespec ts;
1211
1212 clock_gettime(CLOCK_MONOTONIC, &ts);
1213
1214 return ts.tv_sec * 1000000000 + ts.tv_nsec;
1215}
1216
1217/* Note: The interface doesn't currently provide strict guarantees or control
1218 * over the upper bound for how long it might take for a POLLIN event after
1219 * some OA report is written by the OA unit.
1220 *
1221 * The plan is to add a property later that gives some control over the maximum
1222 * latency, but for now we expect it is tuned for a fairly low latency
1223 * suitable for applications wanting to provide live feedback for captured
1224 * metrics.
1225 *
1226 * At the time of writing this test the driver was using a fixed 200Hz hrtimer
1227 * regardless of the OA sampling exponent.
1228 *
1229 * There is no lower bound since a stream configured for periodic sampling may
1230 * still contain other automatically triggered reports.
1231 *
1232 * What we try and check for here is that blocking reads don't return EAGAIN
1233 * and that we aren't spending any significant time burning the cpu in
1234 * kernelspace.
1235 */
1236static void
1237test_blocking(void)
1238{
1239 /* 40 milliseconds
1240 *
1241 * Having a period somewhat > sysconf(_SC_CLK_TCK) helps to stop
1242 * scheduling (liable to kick in when we make blocking poll()s/reads)
1243 * from interfering with the test.
1244 */
1245 int oa_exponent = 18;
1246 uint64_t properties[] = {
1247 /* Include OA reports in samples */
1248 DRM_I915_PERF_PROP_SAMPLE_OA, true,
1249
1250 /* OA unit configuration */
1251 DRM_I915_PERF_PROP_OA_METRICS_SET, hsw_render_basic_id,
1252 DRM_I915_PERF_PROP_OA_FORMAT, I915_OA_FORMAT_A45_B8_C8,
1253 DRM_I915_PERF_PROP_OA_EXPONENT, oa_exponent,
1254 };
1255 struct drm_i915_perf_open_param param = {
1256 .flags = I915_PERF_FLAG_FD_CLOEXEC,
1257 .num_properties = sizeof(properties) / 16,
Robert Fossdbee0832016-12-19 05:19:10 -05001258 .properties_ptr = to_user_pointer(properties),
Robert Braggbb7ea5b2016-02-09 19:15:45 +00001259 };
1260 int stream_fd = __perf_open(drm_fd, &param);
1261 uint8_t buf[1024 * 1024];
1262 struct tms start_times;
1263 struct tms end_times;
1264 int64_t user_ns, kernel_ns;
1265 int64_t tick_ns = 1000000000 / sysconf(_SC_CLK_TCK);
1266 int64_t start;
1267 int n = 0;
1268
1269 times(&start_times);
1270
1271 /* Loop for 600ms performing blocking reads while the HW is sampling at
1272 * ~25Hz, with the expectation that we spend most of our time blocked
1273 * in the kernel, and shouldn't be burning cpu cycles in the kernel in
1274 * association with this process (verified by looking at stime before
1275 * and after loop).
1276 */
1277 for (start = get_time(); (get_time() - start) < 600000000; /* nop */) {
1278 int ret;
1279
1280 while ((ret = read(stream_fd, buf, sizeof(buf))) < 0 &&
1281 errno == EINTR)
1282 ;
1283
1284 igt_assert(ret > 0);
1285
1286 n++;
1287 }
1288
1289 times(&end_times);
1290
1291 /* Using nanosecond units is fairly silly here, given the tick in-
1292 * precision - ah well, it's consistent with the get_time() units.
1293 */
1294 user_ns = (end_times.tms_utime - start_times.tms_utime) * tick_ns;
1295 kernel_ns = (end_times.tms_stime - start_times.tms_stime) * tick_ns;
1296
1297 igt_debug("%d blocking reads in 500 milliseconds, with 1KHz OA sampling\n", n);
1298 igt_debug("time in userspace = %"PRIu64"ns (start utime = %d, end = %d, ns ticks per sec = %d)\n",
1299 user_ns, (int)start_times.tms_utime, (int)end_times.tms_utime, (int)tick_ns);
1300 igt_debug("time in kernelspace = %"PRIu64"ns (start stime = %d, end = %d, ns ticks per sec = %d)\n",
1301 kernel_ns, (int)start_times.tms_stime, (int)end_times.tms_stime, (int)tick_ns);
1302
1303 /* With completely broken blocking (but also not returning an error) we
1304 * could end up with an open loop, hopefully recognisable with > 15
1305 * (600/40)iterations.
1306 */
1307 igt_assert(n <= 15);
1308
1309 /* It's a bit tricky to put a lower limit here, but we expect a
1310 * relatively low latency for seeing reports, while we don't currently
1311 * give any control over this in the api.
1312 *
1313 * Limited to a 5 millisecond latency and 45ms (worst case)
1314 * per-iteration that could give 13.3 iterations. Rounding gives a tiny
1315 * bit more latency slack (6ms)...
1316 */
1317 igt_assert(n > 13);
1318
1319 /* A bit tricky to put a number on this, but we don't expect the kernel
1320 * to use any significant cpu while waiting and given the in precision
1321 * of stime (multiple of CLK_TCK) we expect this to round to zero.
1322 */
1323 igt_assert_eq(kernel_ns, 0);
1324
1325 close(stream_fd);
1326}
1327
1328static void
1329test_polling(void)
1330{
1331 /* 40 milliseconds
1332 *
1333 * Having a period somewhat > sysconf(_SC_CLK_TCK) helps to stop
1334 * scheduling (liable to kick in when we make blocking poll()s/reads)
1335 * from interfering with the test.
1336 */
1337 int oa_exponent = 18;
1338 uint64_t properties[] = {
1339 /* Include OA reports in samples */
1340 DRM_I915_PERF_PROP_SAMPLE_OA, true,
1341
1342 /* OA unit configuration */
1343 DRM_I915_PERF_PROP_OA_METRICS_SET, hsw_render_basic_id,
1344 DRM_I915_PERF_PROP_OA_FORMAT, I915_OA_FORMAT_A45_B8_C8,
1345 DRM_I915_PERF_PROP_OA_EXPONENT, oa_exponent,
1346 };
1347 struct drm_i915_perf_open_param param = {
1348 .flags = I915_PERF_FLAG_FD_CLOEXEC |
1349 I915_PERF_FLAG_FD_NONBLOCK,
1350 .num_properties = sizeof(properties) / 16,
Robert Fossdbee0832016-12-19 05:19:10 -05001351 .properties_ptr = to_user_pointer(properties),
Robert Braggbb7ea5b2016-02-09 19:15:45 +00001352 };
1353 int stream_fd = __perf_open(drm_fd, &param);
1354 uint8_t buf[1024 * 1024];
1355 struct tms start_times;
1356 struct tms end_times;
1357 int64_t user_ns, kernel_ns;
1358 int64_t tick_ns = 1000000000 / sysconf(_SC_CLK_TCK);
1359 int64_t start;
1360 int n = 0;
1361
1362 times(&start_times);
1363
1364 /* Loop for 600ms performing blocking polls while the HW is sampling at
1365 * ~25Hz, with the expectation that we spend most of our time blocked
1366 * in the kernel, and shouldn't be burning cpu cycles in the kernel in
1367 * association with this process (verified by looking at stime before
1368 * and after loop).
1369 */
1370 for (start = get_time(); (get_time() - start) < 600000000; /* nop */) {
1371 struct pollfd pollfd = { .fd = stream_fd, .events = POLLIN };
1372 int ret;
1373
1374 while ((ret = poll(&pollfd, 1, -1)) < 0 &&
1375 errno == EINTR)
1376 ;
1377 igt_assert_eq(ret, 1);
1378 igt_assert(pollfd.revents & POLLIN);
1379
1380 while ((ret = read(stream_fd, buf, sizeof(buf))) < 0 &&
1381 errno == EINTR)
1382 ;
1383
1384 /* Don't expect to see EAGAIN if we've had a POLLIN event
1385 *
1386 * XXX: actually this is technically overly strict since we do
1387 * knowingly allow false positive POLLIN events. At least in
1388 * the future when supporting context filtering of metrics for
1389 * Gen8+ handled in the kernel then POLLIN events may be
1390 * delivered when we know there are pending reports to process
1391 * but before we've done any filtering to know for certain that
1392 * any reports are destined to be copied to userspace.
1393 *
1394 * Still, for now it's a reasonable sanity check.
1395 */
1396 if (ret < 0)
1397 igt_debug("Unexpected error when reading after poll = %d\n", errno);
1398 igt_assert_neq(ret, -1);
1399
1400 /* At this point, after consuming pending reports (and hoping
1401 * the scheduler hasn't stopped us for too long we now
1402 * expect EAGAIN on read.
1403 */
1404 while ((ret = read(stream_fd, buf, sizeof(buf))) < 0 &&
1405 errno == EINTR)
1406 ;
1407 igt_assert_eq(ret, -1);
1408 igt_assert_eq(errno, EAGAIN);
1409
1410 n++;
1411 }
1412
1413 times(&end_times);
1414
1415 /* Using nanosecond units is fairly silly here, given the tick in-
1416 * precision - ah well, it's consistent with the get_time() units.
1417 */
1418 user_ns = (end_times.tms_utime - start_times.tms_utime) * tick_ns;
1419 kernel_ns = (end_times.tms_stime - start_times.tms_stime) * tick_ns;
1420
1421 igt_debug("%d blocking poll()s in 600 milliseconds, with 25Hz OA sampling\n", n);
1422 igt_debug("time in userspace = %"PRIu64"ns (start utime = %d, end = %d, ns ticks per sec = %d)\n",
1423 user_ns, (int)start_times.tms_utime, (int)end_times.tms_utime, (int)tick_ns);
1424 igt_debug("time in kernelspace = %"PRIu64"ns (start stime = %d, end = %d, ns ticks per sec = %d)\n",
1425 kernel_ns, (int)start_times.tms_stime, (int)end_times.tms_stime, (int)tick_ns);
1426
1427 /* With completely broken blocking while polling (but still somehow
1428 * reporting a POLLIN event) we could end up with an open loop,
1429 * hopefully recognisable with > 15 (600/40)iterations.
1430 */
1431 igt_assert(n <= 15);
1432
1433 /* It's a bit tricky to put a lower limit here, but we expect a
1434 * relatively low latency for seeing reports, while we don't currently
1435 * give any control over this in the api.
1436 *
1437 * Limited to a 5 millisecond latency and 45ms (worst case)
1438 * per-iteration that could give 13.3 iterations. Rounding gives a tiny
1439 * bit more latency slack (6ms)...
1440 */
1441 igt_assert(n > 13);
1442
1443 /* A bit tricky to put a number on this, but we don't expect the kernel
1444 * to use any significant cpu while waiting and given the in precision
1445 * of stime (multiple of CLK_TCK) we expect this to round to zero.
1446 */
1447 igt_assert_eq(kernel_ns, 0);
1448
1449 close(stream_fd);
1450}
1451
1452static void
1453test_buffer_fill(void)
1454{
1455 int oa_exponent = 5; /* 5 micro seconds */
1456 uint64_t properties[] = {
1457 /* Include OA reports in samples */
1458 DRM_I915_PERF_PROP_SAMPLE_OA, true,
1459
1460 /* OA unit configuration */
1461 DRM_I915_PERF_PROP_OA_METRICS_SET, hsw_render_basic_id,
1462 DRM_I915_PERF_PROP_OA_FORMAT, I915_OA_FORMAT_A45_B8_C8,
1463 DRM_I915_PERF_PROP_OA_EXPONENT, oa_exponent,
1464 };
1465 struct drm_i915_perf_open_param param = {
1466 .flags = I915_PERF_FLAG_FD_CLOEXEC,
1467 .num_properties = sizeof(properties) / 16,
Robert Fossdbee0832016-12-19 05:19:10 -05001468 .properties_ptr = to_user_pointer(properties),
Robert Braggbb7ea5b2016-02-09 19:15:45 +00001469 };
1470 int stream_fd = __perf_open(drm_fd, &param);
1471 int buf_size = 65536 * (256 + sizeof(struct drm_i915_perf_record_header));
1472 uint8_t *buf = malloc(buf_size);
1473
1474
1475 for (int i = 0; i < 5; i++) {
1476 struct drm_i915_perf_record_header *header;
1477 bool overflow_seen;
1478 int offset = 0;
1479 int len;
1480
1481 /* It should take ~330 milliseconds to fill a 16MB OA buffer with a
1482 * 5 microsecond sampling period and 256 byte reports. */
1483 nanosleep(&(struct timespec){ .tv_sec = 0, .tv_nsec = 500000000 }, NULL);
1484
1485 while ((len = read(stream_fd, buf, buf_size)) == -1 && errno == EINTR)
1486 ;
1487
1488 igt_assert_neq(len, -1);
1489
1490 overflow_seen = false;
1491 for (offset = 0; offset < len; offset += header->size) {
1492 header = (void *)(buf + offset);
1493
1494 if (header->type == DRM_I915_PERF_RECORD_OA_BUFFER_LOST)
1495 overflow_seen = true;
1496 }
1497
1498 igt_assert_eq(overflow_seen, true);
1499
1500 nanosleep(&(struct timespec){ .tv_sec = 0, .tv_nsec = 1000000 }, NULL);
1501
1502 while ((len = read(stream_fd, buf, buf_size)) == -1 && errno == EINTR)
1503 ;
1504
1505 igt_assert_neq(len, -1);
1506
1507 /* expect ~ 200 records in 1 millisecond */
1508 igt_assert(len > 256 * 150);
1509
1510 overflow_seen = false;
1511 for (offset = 0; offset < len; offset += header->size) {
1512 header = (void *)(buf + offset);
1513
1514 if (header->type == DRM_I915_PERF_RECORD_OA_BUFFER_LOST)
1515 overflow_seen = true;
1516 }
1517
1518 igt_assert_eq(overflow_seen, false);
1519 }
1520
1521 free(buf);
1522
1523 close(stream_fd);
1524}
1525
1526static void
1527test_enable_disable(void)
1528{
1529 int oa_exponent = 5; /* 5 micro seconds */
1530 uint64_t properties[] = {
1531 /* Include OA reports in samples */
1532 DRM_I915_PERF_PROP_SAMPLE_OA, true,
1533
1534 /* OA unit configuration */
1535 DRM_I915_PERF_PROP_OA_METRICS_SET, hsw_render_basic_id,
1536 DRM_I915_PERF_PROP_OA_FORMAT, I915_OA_FORMAT_A45_B8_C8,
1537 DRM_I915_PERF_PROP_OA_EXPONENT, oa_exponent,
1538 };
1539 struct drm_i915_perf_open_param param = {
1540 .flags = I915_PERF_FLAG_FD_CLOEXEC |
1541 I915_PERF_FLAG_DISABLED, /* Verify we start disabled */
1542 .num_properties = sizeof(properties) / 16,
Robert Fossdbee0832016-12-19 05:19:10 -05001543 .properties_ptr = to_user_pointer(properties),
Robert Braggbb7ea5b2016-02-09 19:15:45 +00001544 };
1545 int stream_fd = __perf_open(drm_fd, &param);
1546 int buf_size = 65536 * (256 + sizeof(struct drm_i915_perf_record_header));
1547 uint8_t *buf = malloc(buf_size);
1548
1549
1550 for (int i = 0; i < 5; i++) {
1551 int len;
1552
1553 /* If the stream were enabled then it would take ~330
1554 * milliseconds to fill a 16MB OA buffer with a 5 microsecond
1555 * sampling period and 256 byte reports.
1556 *
1557 * Giving enough time for an overflow might help catch whether
1558 * the OA unit has been enabled even if the driver might at
1559 * least avoid copying reports while disabled.
1560 */
1561 nanosleep(&(struct timespec){ .tv_sec = 0, .tv_nsec = 500000000 }, NULL);
1562
1563 while ((len = read(stream_fd, buf, buf_size)) == -1 && errno == EINTR)
1564 ;
1565
1566 igt_assert_eq(len, -1);
1567 igt_assert_eq(errno, EIO);
1568
1569 do_ioctl(stream_fd, I915_PERF_IOCTL_ENABLE, 0);
1570
1571 nanosleep(&(struct timespec){ .tv_sec = 0, .tv_nsec = 1000000 }, NULL);
1572
1573 while ((len = read(stream_fd, buf, buf_size)) == -1 && errno == EINTR)
1574 ;
1575
1576 igt_assert_neq(len, -1);
1577
1578 /* expect ~ 200 records in 1 millisecond */
1579 igt_assert(len > 256 * 150 && len < 256 * 2000);
1580
1581 do_ioctl(stream_fd, I915_PERF_IOCTL_DISABLE, 0);
1582
1583 /* It's considered an error to read a stream while it's disabled
1584 * since it would block indefinitely...
1585 */
1586 len = read(stream_fd, buf, buf_size);
1587
1588 igt_assert_eq(len, -1);
1589 igt_assert_eq(errno, EIO);
1590 }
1591
1592 free(buf);
1593
1594 close(stream_fd);
1595}
1596
1597static void
1598test_short_reads(void)
1599{
1600 int oa_exponent = 5; /* 5 micro seconds */
1601 uint64_t properties[] = {
1602 /* Include OA reports in samples */
1603 DRM_I915_PERF_PROP_SAMPLE_OA, true,
1604
1605 /* OA unit configuration */
1606 DRM_I915_PERF_PROP_OA_METRICS_SET, hsw_render_basic_id,
1607 DRM_I915_PERF_PROP_OA_FORMAT, I915_OA_FORMAT_A45_B8_C8,
1608 DRM_I915_PERF_PROP_OA_EXPONENT, oa_exponent,
1609 };
1610 struct drm_i915_perf_open_param param = {
1611 .flags = I915_PERF_FLAG_FD_CLOEXEC,
1612 .num_properties = sizeof(properties) / 16,
Robert Fossdbee0832016-12-19 05:19:10 -05001613 .properties_ptr = to_user_pointer(properties),
Robert Braggbb7ea5b2016-02-09 19:15:45 +00001614 };
1615 size_t record_size = 256 + sizeof(struct drm_i915_perf_record_header);
1616 size_t page_size = sysconf(_SC_PAGE_SIZE);
1617 int zero_fd = open("/dev/zero", O_RDWR|O_CLOEXEC);
1618 uint8_t *pages = mmap(NULL, page_size * 2,
1619 PROT_READ|PROT_WRITE, MAP_PRIVATE, zero_fd, 0);
1620 struct drm_i915_perf_record_header *header;
1621 int stream_fd;
1622 int ret;
1623
1624 igt_assert_neq(zero_fd, -1);
1625 close(zero_fd);
1626 zero_fd = -1;
1627
1628 igt_assert(pages);
1629
1630 ret = mprotect(pages + page_size, page_size, PROT_NONE);
1631 igt_assert_eq(ret, 0);
1632
1633 stream_fd = __perf_open(drm_fd, &param);
1634
1635 nanosleep(&(struct timespec){ .tv_sec = 0, .tv_nsec = 5000000 }, NULL);
1636
1637 /* At this point there should be lots of pending reports to read */
1638
1639 /* A read that can return at least one record should result in a short
1640 * read not an EFAULT if the buffer is smaller than the requested read
1641 * size...
1642 *
1643 * Expect to see a sample record here, but at least skip over any
1644 * _RECORD_LOST notifications.
1645 */
1646 do {
1647 header = (void *)(pages + page_size - record_size);
1648 ret = read(stream_fd,
1649 header,
1650 page_size);
1651 igt_assert(ret > 0);
1652 } while (header->type == DRM_I915_PERF_RECORD_OA_REPORT_LOST);
1653
1654 igt_assert_eq(ret, record_size);
1655
1656 /* A read that can't return a single record because it would result
1657 * in a fault on buffer overrun should result in an EFAULT error...
1658 */
1659 ret = read(stream_fd, pages + page_size - 16, page_size);
1660 igt_assert_eq(ret, -1);
1661 igt_assert_eq(errno, EFAULT);
1662
1663 /* A read that can't return a single record because the buffer is too
1664 * small should result in an ENOSPC error..
1665 *
1666 * Again, skip over _RECORD_LOST records (smaller than record_size/2)
1667 */
1668 do {
1669 header = (void *)(pages + page_size - record_size / 2);
1670 ret = read(stream_fd,
1671 header,
1672 record_size / 2);
1673 } while (ret > 0 && header->type == DRM_I915_PERF_RECORD_OA_REPORT_LOST);
1674
1675 igt_assert_eq(ret, -1);
1676 igt_assert_eq(errno, ENOSPC);
1677
1678 close(stream_fd);
1679
1680 munmap(pages, page_size * 2);
1681}
1682
1683static void
1684test_non_sampling_read_error(void)
1685{
1686 uint64_t properties[] = {
1687 /* XXX: even without periodic sampling we have to
1688 * specify at least one sample layout property...
1689 */
1690 DRM_I915_PERF_PROP_SAMPLE_OA, true,
1691
1692 /* OA unit configuration */
1693 DRM_I915_PERF_PROP_OA_METRICS_SET, hsw_render_basic_id,
1694 DRM_I915_PERF_PROP_OA_FORMAT, I915_OA_FORMAT_A45_B8_C8,
1695
1696 /* XXX: no sampling exponent */
1697 };
1698 struct drm_i915_perf_open_param param = {
1699 .flags = I915_PERF_FLAG_FD_CLOEXEC,
1700 .num_properties = sizeof(properties) / 16,
Robert Fossdbee0832016-12-19 05:19:10 -05001701 .properties_ptr = to_user_pointer(properties),
Robert Braggbb7ea5b2016-02-09 19:15:45 +00001702 };
1703 int stream_fd = __perf_open(drm_fd, &param);
1704 uint8_t buf[1024];
1705
1706 int ret = read(stream_fd, buf, sizeof(buf));
1707 igt_assert_eq(ret, -1);
1708 igt_assert_eq(errno, EIO);
1709
1710 close(stream_fd);
1711}
1712
1713/* Check that attempts to read from a stream while it is disable will return
1714 * EIO instead of blocking indefinitely.
1715 */
1716static void
1717test_disabled_read_error(void)
1718{
1719 int oa_exponent = 5; /* 5 micro seconds */
1720 uint64_t properties[] = {
1721 /* XXX: even without periodic sampling we have to
1722 * specify at least one sample layout property...
1723 */
1724 DRM_I915_PERF_PROP_SAMPLE_OA, true,
1725
1726 /* OA unit configuration */
1727 DRM_I915_PERF_PROP_OA_METRICS_SET, hsw_render_basic_id,
1728 DRM_I915_PERF_PROP_OA_FORMAT, I915_OA_FORMAT_A45_B8_C8,
1729 DRM_I915_PERF_PROP_OA_EXPONENT, oa_exponent,
1730 };
1731 struct drm_i915_perf_open_param param = {
1732 .flags = I915_PERF_FLAG_FD_CLOEXEC |
1733 I915_PERF_FLAG_DISABLED, /* XXX: open disabled */
1734 .num_properties = sizeof(properties) / 16,
Robert Fossdbee0832016-12-19 05:19:10 -05001735 .properties_ptr = to_user_pointer(properties),
Robert Braggbb7ea5b2016-02-09 19:15:45 +00001736 };
1737 int stream_fd = __perf_open(drm_fd, &param);
1738 uint32_t oa_report0[64];
1739 uint32_t oa_report1[64];
1740 uint32_t buf[128] = { 0 };
1741 int ret;
1742
1743
1744 ret = read(stream_fd, buf, sizeof(buf));
1745 igt_assert_eq(ret, -1);
1746 igt_assert_eq(errno, EIO);
1747
1748 close(stream_fd);
1749
1750
1751 param.flags &= ~I915_PERF_FLAG_DISABLED;
1752 stream_fd = __perf_open(drm_fd, &param);
1753
1754 read_2_oa_reports(stream_fd,
1755 I915_OA_FORMAT_A45_B8_C8,
1756 oa_exponent,
1757 oa_report0,
1758 oa_report1,
1759 false); /* not just timer reports */
1760
1761 do_ioctl(stream_fd, I915_PERF_IOCTL_DISABLE, 0);
1762
1763 ret = read(stream_fd, buf, sizeof(buf));
1764 igt_assert_eq(ret, -1);
1765 igt_assert_eq(errno, EIO);
1766
1767 do_ioctl(stream_fd, I915_PERF_IOCTL_ENABLE, 0);
1768
1769 read_2_oa_reports(stream_fd,
1770 I915_OA_FORMAT_A45_B8_C8,
1771 oa_exponent,
1772 oa_report0,
1773 oa_report1,
1774 false); /* not just timer reports */
1775
1776 close(stream_fd);
1777}
1778
1779static void
1780test_mi_rpc(void)
1781{
1782 uint64_t properties[] = {
1783 /* Note: we have to specify at least one sample property even
1784 * though we aren't interested in samples in this case.
1785 */
1786 DRM_I915_PERF_PROP_SAMPLE_OA, true,
1787
1788 /* OA unit configuration */
1789 DRM_I915_PERF_PROP_OA_METRICS_SET, hsw_render_basic_id,
1790 DRM_I915_PERF_PROP_OA_FORMAT, I915_OA_FORMAT_A45_B8_C8,
1791
1792 /* Note: no OA exponent specified in this case */
1793 };
1794 struct drm_i915_perf_open_param param = {
1795 .flags = I915_PERF_FLAG_FD_CLOEXEC,
1796 .num_properties = sizeof(properties) / 16,
Robert Fossdbee0832016-12-19 05:19:10 -05001797 .properties_ptr = to_user_pointer(properties),
Robert Braggbb7ea5b2016-02-09 19:15:45 +00001798 };
1799 int stream_fd = __perf_open(drm_fd, &param);
1800 drm_intel_bufmgr *bufmgr = drm_intel_bufmgr_gem_init(drm_fd, 4096);
1801 drm_intel_context *context;
1802 struct intel_batchbuffer *batch;
1803 drm_intel_bo *bo;
1804 uint32_t *report32;
1805 int ret;
1806
1807 drm_intel_bufmgr_gem_enable_reuse(bufmgr);
1808
1809 context = drm_intel_gem_context_create(bufmgr);
1810 igt_assert(context);
1811
1812 batch = intel_batchbuffer_alloc(bufmgr, devid);
1813
1814 bo = drm_intel_bo_alloc(bufmgr, "mi_rpc dest bo", 4096, 64);
1815
1816 ret = drm_intel_bo_map(bo, true);
1817 igt_assert_eq(ret, 0);
1818
1819 memset(bo->virtual, 0x80, 4096);
1820 drm_intel_bo_unmap(bo);
1821
1822 BEGIN_BATCH(3, 1);
1823 OUT_BATCH(GEN6_MI_REPORT_PERF_COUNT);
1824 OUT_RELOC(bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
1825 0); /* offset in bytes */
1826 OUT_BATCH(0xdeadbeef); /* report ID */
1827 ADVANCE_BATCH();
1828
1829 intel_batchbuffer_flush_with_context(batch, context);
1830
1831 ret = drm_intel_bo_map(bo, false /* write enable */);
1832 igt_assert_eq(ret, 0);
1833
1834 report32 = bo->virtual;
1835 igt_assert_eq(report32[0], 0xdeadbeef); /* report ID */
1836 igt_assert_neq(report32[1], 0); /* timestamp */
1837
1838 igt_assert_neq(report32[63], 0x80808080); /* end of report */
1839 igt_assert_eq(report32[64], 0x80808080); /* after 256 byte report */
1840
1841 drm_intel_bo_unmap(bo);
1842 drm_intel_bo_unreference(bo);
1843 intel_batchbuffer_free(batch);
1844 drm_intel_gem_context_destroy(context);
1845 drm_intel_bufmgr_destroy(bufmgr);
1846 close(stream_fd);
1847}
1848
1849static void
1850scratch_buf_init(drm_intel_bufmgr *bufmgr,
1851 struct igt_buf *buf,
1852 int width, int height,
1853 uint32_t color)
1854{
1855 size_t stride = width * 4;
1856 size_t size = stride * height;
1857 drm_intel_bo *bo = drm_intel_bo_alloc(bufmgr, "", size, 4096);
1858 int ret;
1859
1860 ret = drm_intel_bo_map(bo, true /* writable */);
1861 igt_assert_eq(ret, 0);
1862
1863 for (int i = 0; i < width * height; i++)
1864 ((uint32_t *)bo->virtual)[i] = color;
1865
1866 drm_intel_bo_unmap(bo);
1867
1868 buf->bo = bo;
1869 buf->stride = stride;
1870 buf->tiling = I915_TILING_NONE;
1871 buf->size = size;
1872}
1873
1874static void
1875emit_stall_timestamp_and_rpc(struct intel_batchbuffer *batch,
1876 drm_intel_bo *dst,
1877 int timestamp_offset,
1878 int report_dst_offset,
1879 uint32_t report_id)
1880{
1881 uint32_t pipe_ctl_flags = (PIPE_CONTROL_CS_STALL |
1882 PIPE_CONTROL_RENDER_TARGET_FLUSH |
1883 PIPE_CONTROL_WRITE_TIMESTAMP);
1884
1885 BEGIN_BATCH(5, 1);
1886 OUT_BATCH(GFX_OP_PIPE_CONTROL | (5 - 2));
1887 OUT_BATCH(pipe_ctl_flags);
1888 OUT_RELOC(dst, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
1889 timestamp_offset);
1890 OUT_BATCH(0); /* imm lower */
1891 OUT_BATCH(0); /* imm upper */
1892 ADVANCE_BATCH();
1893
1894 BEGIN_BATCH(3, 1);
1895 OUT_BATCH(GEN6_MI_REPORT_PERF_COUNT);
1896 OUT_RELOC(dst, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
1897 report_dst_offset);
1898 OUT_BATCH(report_id);
1899 ADVANCE_BATCH();
1900}
1901
1902/* Tests the INTEL_performance_query use case where an unprivileged process
1903 * should be able to configure the OA unit for per-context metrics (for a
1904 * context associated with that process' drm file descriptor) and the counters
1905 * should only relate to that specific context.
1906 */
1907static void
1908test_per_ctx_mi_rpc(void)
1909{
1910 uint64_t properties[] = {
1911 DRM_I915_PERF_PROP_CTX_HANDLE, UINT64_MAX, /* updated below */
1912
1913 /* Note: we have to specify at least one sample property even
1914 * though we aren't interested in samples in this case
1915 */
1916 DRM_I915_PERF_PROP_SAMPLE_OA, true,
1917
1918 /* OA unit configuration */
1919 DRM_I915_PERF_PROP_OA_METRICS_SET, hsw_render_basic_id,
1920 DRM_I915_PERF_PROP_OA_FORMAT, I915_OA_FORMAT_A45_B8_C8,
1921
1922 /* Note: no OA exponent specified in this case */
1923 };
1924 struct drm_i915_perf_open_param param = {
1925 .flags = I915_PERF_FLAG_FD_CLOEXEC,
1926 .num_properties = sizeof(properties) / 16,
Robert Fossdbee0832016-12-19 05:19:10 -05001927 .properties_ptr = to_user_pointer(properties),
Robert Braggbb7ea5b2016-02-09 19:15:45 +00001928 };
1929
1930 /* should be default, but just to be sure... */
1931 write_u64_file("/proc/sys/dev/i915/perf_stream_paranoid", 1);
1932
1933 igt_fork(child, 1) {
1934 drm_intel_bufmgr *bufmgr;
1935 drm_intel_context *context0, *context1;
1936 int stream_fd;
1937 struct intel_batchbuffer *batch;
1938 struct igt_buf src, dst;
1939 drm_intel_bo *bo;
1940 uint32_t *report0_32, *report1_32;
1941 uint64_t timestamp0_64, timestamp1_64;
1942 uint32_t delta_ts64, delta_oa32;
1943 uint64_t delta_ts64_ns, delta_oa32_ns;
1944 uint32_t delta_delta;
1945 int n_samples_written;
1946 int width = 800;
1947 int height = 600;
1948 uint32_t ctx_id = 0xffffffff; /* invalid id */
1949 int ret;
1950
1951 igt_drop_root();
1952
1953 bufmgr = drm_intel_bufmgr_gem_init(drm_fd, 4096);
1954 drm_intel_bufmgr_gem_enable_reuse(bufmgr);
1955
1956 scratch_buf_init(bufmgr, &src, width, height, 0xff0000ff);
1957 scratch_buf_init(bufmgr, &dst, width, height, 0x00ff00ff);
1958
1959 batch = intel_batchbuffer_alloc(bufmgr, devid);
1960
1961 context0 = drm_intel_gem_context_create(bufmgr);
1962 igt_assert(context0);
1963
1964 context1 = drm_intel_gem_context_create(bufmgr);
1965 igt_assert(context1);
1966
1967 igt_debug("submitting warm up render_copy\n");
1968
1969 /* Submit some early, unmeasured, work to the context we want
1970 * to measure to try and catch issues with i915-perf
1971 * initializing the HW context ID for filtering.
1972 *
1973 * We do this because i915-perf single context filtering had
1974 * previously only relied on a hook into context pinning to
1975 * initialize the HW context ID, instead of also trying to
1976 * determine the HW ID while opening the stream, in case it
1977 * has already been pinned.
1978 *
1979 * This wasn't noticed by the previous unit test because we
1980 * were opening the stream while the context hadn't been
1981 * touched or pinned yet and so it worked out correctly to wait
1982 * for the pinning hook.
1983 *
1984 * Now a buggy version of i915-perf will fail to measure
1985 * anything for context0 once this initial render_copy() ends
1986 * up pinning the context since there won't ever be a pinning
1987 * hook callback.
1988 */
1989 render_copy(batch,
1990 context0,
1991 &src, 0, 0, width, height,
1992 &dst, 0, 0);
1993
1994 ret = drm_intel_gem_context_get_id(context0, &ctx_id);
1995 igt_assert_eq(ret, 0);
1996 igt_assert_neq(ctx_id, 0xffffffff);
1997 properties[1] = ctx_id;
1998
1999 igt_debug("opening i915-perf stream\n");
2000 stream_fd = __perf_open(drm_fd, &param);
2001
2002 bo = drm_intel_bo_alloc(bufmgr, "mi_rpc dest bo", 4096, 64);
2003
2004 ret = drm_intel_bo_map(bo, true /* write enable */);
2005 igt_assert_eq(ret, 0);
2006
2007 memset(bo->virtual, 0x80, 4096);
2008 drm_intel_bo_unmap(bo);
2009
2010 emit_stall_timestamp_and_rpc(batch,
2011 bo,
2012 512 /* timestamp offset */,
2013 0, /* report dst offset */
2014 0xdeadbeef); /* report id */
2015
2016 /* Explicitly flush here (even though the render_copy() call
2017 * will itself flush before/after the copy) to clarify that
2018 * that the PIPE_CONTROL + MI_RPC commands will be in a
2019 * separate batch from the copy.
2020 */
2021 intel_batchbuffer_flush_with_context(batch, context0);
2022
2023 render_copy(batch,
2024 context0,
2025 &src, 0, 0, width, height,
2026 &dst, 0, 0);
2027
2028 /* Another redundant flush to clarify batch bo is free to reuse */
2029 intel_batchbuffer_flush_with_context(batch, context0);
2030
2031 /* submit two copies on the other context to avoid a false
2032 * positive in case the driver somehow ended up filtering for
2033 * context1
2034 */
2035 render_copy(batch,
2036 context1,
2037 &src, 0, 0, width, height,
2038 &dst, 0, 0);
2039
2040 render_copy(batch,
2041 context1,
2042 &src, 0, 0, width, height,
2043 &dst, 0, 0);
2044
2045 /* And another */
2046 intel_batchbuffer_flush_with_context(batch, context1);
2047
2048 emit_stall_timestamp_and_rpc(batch,
2049 bo,
2050 520 /* timestamp offset */,
2051 256, /* report dst offset */
2052 0xbeefbeef); /* report id */
2053
2054 intel_batchbuffer_flush_with_context(batch, context0);
2055
2056 ret = drm_intel_bo_map(bo, false /* write enable */);
2057 igt_assert_eq(ret, 0);
2058
2059 report0_32 = bo->virtual;
2060 igt_assert_eq(report0_32[0], 0xdeadbeef); /* report ID */
2061 igt_assert_neq(report0_32[1], 0); /* timestamp */
2062
2063 report1_32 = report0_32 + 64;
2064 igt_assert_eq(report1_32[0], 0xbeefbeef); /* report ID */
2065 igt_assert_neq(report1_32[1], 0); /* timestamp */
2066
2067 print_reports(report0_32, report1_32,
2068 lookup_format(I915_OA_FORMAT_A45_B8_C8));
2069
2070 /* A40 == N samples written to all render targets */
2071 n_samples_written = report1_32[43] - report0_32[43];
2072 igt_debug("n samples written = %d\n", n_samples_written);
2073 igt_assert_eq(n_samples_written, width * height);
2074
2075 igt_debug("timestamp32 0 = %u\n", report0_32[1]);
2076 igt_debug("timestamp32 1 = %u\n", report1_32[1]);
2077
2078 timestamp0_64 = *(uint64_t *)(((uint8_t *)bo->virtual) + 512);
2079 timestamp1_64 = *(uint64_t *)(((uint8_t *)bo->virtual) + 520);
2080
2081 igt_debug("timestamp64 0 = %"PRIu64"\n", timestamp0_64);
2082 igt_debug("timestamp64 1 = %"PRIu64"\n", timestamp1_64);
2083
2084 delta_ts64 = timestamp1_64 - timestamp0_64;
2085 delta_oa32 = report1_32[1] - report0_32[1];
2086
2087 /* sanity check that we can pass the delta to timebase_scale */
2088 igt_assert(delta_ts64 < UINT32_MAX);
2089 delta_oa32_ns = timebase_scale(delta_oa32);
2090 delta_ts64_ns = timebase_scale(delta_ts64);
2091
2092 igt_debug("ts32 delta = %u, = %uns\n",
2093 delta_oa32, (unsigned)delta_oa32_ns);
2094 igt_debug("ts64 delta = %u, = %uns\n",
2095 delta_ts64, (unsigned)delta_ts64_ns);
2096
2097 /* The delta as calculated via the PIPE_CONTROL timestamp or
2098 * the OA report timestamps should be almost identical but
2099 * allow a 320 nanoseconds margin.
2100 */
2101 delta_delta = delta_ts64_ns > delta_oa32_ns ?
2102 (delta_ts64_ns - delta_oa32_ns) :
2103 (delta_oa32_ns - delta_ts64_ns);
2104 igt_assert(delta_delta <= 320);
2105
2106 drm_intel_bo_unreference(src.bo);
2107 drm_intel_bo_unreference(dst.bo);
2108
2109 drm_intel_bo_unmap(bo);
2110 drm_intel_bo_unreference(bo);
2111 intel_batchbuffer_free(batch);
2112 drm_intel_gem_context_destroy(context0);
2113 drm_intel_gem_context_destroy(context1);
2114 drm_intel_bufmgr_destroy(bufmgr);
2115 close(stream_fd);
2116 }
2117
2118 igt_waitchildren();
2119}
2120
2121static void
2122test_rc6_disable(void)
2123{
2124 int oa_exponent = 13; /* 1 millisecond */
2125 uint64_t properties[] = {
2126 /* Include OA reports in samples */
2127 DRM_I915_PERF_PROP_SAMPLE_OA, true,
2128
2129 /* OA unit configuration */
2130 DRM_I915_PERF_PROP_OA_METRICS_SET, hsw_render_basic_id,
2131 DRM_I915_PERF_PROP_OA_FORMAT, I915_OA_FORMAT_A45_B8_C8,
2132 DRM_I915_PERF_PROP_OA_EXPONENT, oa_exponent,
2133 };
2134 struct drm_i915_perf_open_param param = {
2135 .flags = I915_PERF_FLAG_FD_CLOEXEC,
2136 .num_properties = sizeof(properties) / 16,
Robert Fossdbee0832016-12-19 05:19:10 -05002137 .properties_ptr = to_user_pointer(properties),
Robert Braggbb7ea5b2016-02-09 19:15:45 +00002138 };
2139 int stream_fd = __perf_open(drm_fd, &param);
2140 uint64_t n_events_start = read_debugfs_u64_record("i915_drpc_info",
2141 "RC6 residency since boot");
2142 uint64_t n_events_end;
2143
2144 nanosleep(&(struct timespec){ .tv_sec = 0, .tv_nsec = 500000000 }, NULL);
2145
2146 n_events_end = read_debugfs_u64_record("i915_drpc_info",
2147 "RC6 residency since boot");
2148
2149 igt_assert_eq(n_events_end - n_events_start, 0);
2150
2151 close(stream_fd);
2152
2153 n_events_start = read_debugfs_u64_record("i915_drpc_info",
2154 "RC6 residency since boot");
2155
2156 nanosleep(&(struct timespec){ .tv_sec = 0, .tv_nsec = 500000000 }, NULL);
2157
2158 n_events_end = read_debugfs_u64_record("i915_drpc_info",
2159 "RC6 residency since boot");
2160
2161 igt_assert_neq(n_events_end - n_events_start, 0);
2162}
2163
2164static unsigned
2165read_i915_module_ref(void)
2166{
2167 FILE *fp = fopen("/proc/modules", "r");
2168 char *line = NULL;
2169 size_t line_buf_size = 0;
2170 int len = 0;
2171 unsigned ref_count;
2172
2173 igt_assert(fp);
2174
2175 while ((len = getline(&line, &line_buf_size, fp)) > 0) {
2176 if (strncmp(line, "i915 ", 5) == 0) {
2177 unsigned long mem;
2178 int ret = sscanf(line + 5, "%lu %u", &mem, &ref_count);
2179 igt_assert(ret == 2);
2180 goto done;
2181 }
2182 }
2183
2184 igt_assert(!"reached");
2185
2186done:
2187 free(line);
2188 fclose(fp);
2189 return ref_count;
2190}
2191
2192/* check that an open i915 perf stream holds a reference on the drm i915 module
2193 * including in the corner case where the original drm fd has been closed.
2194 */
2195static void
2196test_i915_ref_count(void)
2197{
2198 int oa_exponent = 13; /* 1 millisecond */
2199 uint64_t properties[] = {
2200 /* Include OA reports in samples */
2201 DRM_I915_PERF_PROP_SAMPLE_OA, true,
2202
2203 /* OA unit configuration */
2204 DRM_I915_PERF_PROP_OA_METRICS_SET, 0 /* updated below */,
2205 DRM_I915_PERF_PROP_OA_FORMAT, I915_OA_FORMAT_A45_B8_C8,
2206 DRM_I915_PERF_PROP_OA_EXPONENT, oa_exponent,
2207 };
2208 struct drm_i915_perf_open_param param = {
2209 .flags = I915_PERF_FLAG_FD_CLOEXEC,
2210 .num_properties = sizeof(properties) / 16,
Robert Fossdbee0832016-12-19 05:19:10 -05002211 .properties_ptr = to_user_pointer(properties),
Robert Braggbb7ea5b2016-02-09 19:15:45 +00002212 };
2213 unsigned baseline, ref_count0, ref_count1;
2214 int stream_fd;
2215 uint32_t oa_report0[64];
2216 uint32_t oa_report1[64];
2217
2218 /* This should be the first test before the first fixture so no drm_fd
2219 * should have been opened so far...
2220 */
2221 igt_assert_eq(drm_fd, -1);
2222
2223 baseline = read_i915_module_ref();
2224 igt_debug("baseline ref count (drm fd closed) = %u\n", baseline);
2225
2226 drm_fd = __drm_open_driver(DRIVER_INTEL);
2227 devid = intel_get_drm_devid(drm_fd);
2228 device = drm_get_card();
2229
2230 igt_require(IS_HASWELL(devid));
2231 igt_require(lookup_hsw_render_basic_id());
2232 properties[3] = hsw_render_basic_id;
2233
2234 ref_count0 = read_i915_module_ref();
2235 igt_debug("initial ref count with drm_fd open = %u\n", ref_count0);
2236 igt_assert(ref_count0 > baseline);
2237
2238 stream_fd = __perf_open(drm_fd, &param);
2239 ref_count1 = read_i915_module_ref();
2240 igt_debug("ref count after opening i915 perf stream = %u\n", ref_count1);
2241 igt_assert(ref_count1 > ref_count0);
2242
2243 close(drm_fd);
2244 drm_fd = -1;
2245 ref_count0 = read_i915_module_ref();
2246 igt_debug("ref count after closing drm fd = %u\n", ref_count0);
2247
2248 igt_assert(ref_count0 > baseline);
2249
2250 read_2_oa_reports(stream_fd,
2251 I915_OA_FORMAT_A45_B8_C8,
2252 oa_exponent,
2253 oa_report0,
2254 oa_report1,
2255 false); /* not just timer reports */
2256
2257 close(stream_fd);
2258 ref_count0 = read_i915_module_ref();
2259 igt_debug("ref count after closing i915 perf stream fd = %u\n", ref_count0);
2260 igt_assert_eq(ref_count0, baseline);
2261}
2262
2263static void
2264test_sysctl_defaults(void)
2265{
2266 int paranoid = read_u64_file("/proc/sys/dev/i915/perf_stream_paranoid");
2267 int max_freq = read_u64_file("/proc/sys/dev/i915/oa_max_sample_rate");
2268
2269 igt_assert_eq(paranoid, 1);
2270 igt_assert_eq(max_freq, 100000);
2271}
2272
2273igt_main
2274{
2275 igt_skip_on_simulation();
2276
2277 igt_fixture {
2278 struct stat sb;
2279
2280 igt_require(stat("/proc/sys/dev/i915/perf_stream_paranoid", &sb)
2281 == 0);
2282 igt_require(stat("/proc/sys/dev/i915/oa_max_sample_rate", &sb)
2283 == 0);
2284 }
2285
2286 igt_subtest("i915-ref-count")
2287 test_i915_ref_count();
2288
2289 igt_subtest("sysctl-defaults")
2290 test_sysctl_defaults();
2291
2292 igt_fixture {
2293 /* We expect that the ref count test before these fixtures
2294 * should have closed drm_fd...
2295 */
2296 igt_assert_eq(drm_fd, -1);
2297 drm_fd = drm_open_driver_render(DRIVER_INTEL);
2298 devid = intel_get_drm_devid(drm_fd);
2299 device = drm_get_card();
2300
2301 igt_require(IS_HASWELL(devid));
2302 igt_require(lookup_hsw_render_basic_id());
2303
2304 gt_frequency_range_save();
2305
2306 write_u64_file("/proc/sys/dev/i915/perf_stream_paranoid", 1);
2307 write_u64_file("/proc/sys/dev/i915/oa_max_sample_rate", 100000);
2308
2309 render_copy = igt_get_render_copyfunc(devid);
2310 igt_require_f(render_copy, "no render-copy function\n");
2311 }
2312
2313 igt_subtest("non-system-wide-paranoid")
2314 test_system_wide_paranoid();
2315
2316 igt_subtest("invalid-open-flags")
2317 test_invalid_open_flags();
2318
2319 igt_subtest("invalid-oa-metric-set-id")
2320 test_invalid_oa_metric_set_id();
2321
2322 igt_subtest("invalid-oa-format-id")
2323 test_invalid_oa_format_id();
2324
2325 igt_subtest("missing-sample-flags")
2326 test_missing_sample_flags();
2327
2328 igt_subtest("oa-formats")
2329 test_oa_formats();
2330
2331 igt_subtest("invalid-oa-exponent")
2332 test_invalid_oa_exponent();
2333 igt_subtest("low-oa-exponent-permissions")
2334 test_low_oa_exponent_permissions();
2335 igt_subtest("oa-exponents") {
2336 test_oa_exponents(450);
2337 test_oa_exponents(550);
2338 }
2339
2340 igt_subtest("per-context-mode-unprivileged")
2341 test_per_context_mode_unprivileged();
2342
2343 igt_subtest("buffer-fill")
2344 test_buffer_fill();
2345
2346 igt_subtest("disabled-read-error")
2347 test_disabled_read_error();
2348 igt_subtest("non-sampling-read-error")
2349 test_non_sampling_read_error();
2350
2351 igt_subtest("enable-disable")
2352 test_enable_disable();
2353
2354 igt_subtest("blocking")
2355 test_blocking();
2356
2357 igt_subtest("polling")
2358 test_polling();
2359
2360 igt_subtest("short-reads")
2361 test_short_reads();
2362
2363 igt_subtest("mi-rpc")
2364 test_mi_rpc();
2365
2366 igt_subtest("mi-rpc-per-ctx")
2367 test_per_ctx_mi_rpc();
2368
2369 igt_subtest("rc6-disable")
2370 test_rc6_disable();
2371
2372 igt_fixture {
2373 /* leave sysctl options in their default state... */
2374 write_u64_file("/proc/sys/dev/i915/oa_max_sample_rate", 100000);
2375 write_u64_file("/proc/sys/dev/i915/perf_stream_paranoid", 1);
2376
2377 gt_frequency_range_restore();
2378
2379 close(drm_fd);
2380 }
2381}