blob: 247e8934fedc8a62f93fa801d70fb2568528244d [file] [log] [blame]
Chris Wilson93b8ad82016-08-28 16:45:22 +01001/*
2 * Copyright © 2011 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Chris Wilson <chris@chris-wilson.co.uk>
25 *
26 */
27
28#include <unistd.h>
29#include <stdlib.h>
30#include <stdint.h>
31#include <stdio.h>
32#include <string.h>
33#include <fcntl.h>
34#include <inttypes.h>
35#include <errno.h>
36#include <sys/stat.h>
37#include <sys/poll.h>
38#include <sys/ioctl.h>
39#include <sys/time.h>
40#include <time.h>
41
42#include "drm.h"
43#include "ioctl_wrappers.h"
44#include "drmtest.h"
45#include "intel_chipset.h"
46#include "igt_stats.h"
47
48#define LOCAL_I915_EXEC_NO_RELOC (1<<11)
49#define LOCAL_I915_EXEC_HANDLE_LUT (1<<12)
50
51#define LOCAL_I915_EXEC_BSD_SHIFT (13)
52#define LOCAL_I915_EXEC_BSD_MASK (3 << LOCAL_I915_EXEC_BSD_SHIFT)
53
54#define ENGINE_FLAGS (I915_EXEC_RING_MASK | LOCAL_I915_EXEC_BSD_MASK)
55
56#define WRITE 0x1
57#define IDLE 0x2
Chris Wilson572a7702016-08-28 20:43:41 +010058#define DMABUF 0x4
59#define WAIT 0x8
Chris Wilson96e1eac2016-08-29 19:16:41 +010060#define SYNC 0x10
Chris Wilsonf0bfbad2017-08-10 19:41:53 +010061#define SYNCOBJ 0x20
62
63#define LOCAL_I915_EXEC_FENCE_ARRAY (1 << 19)
64struct local_gem_exec_fence {
65 uint32_t handle;
66 uint32_t flags;
67#define LOCAL_EXEC_FENCE_WAIT (1 << 0)
68#define LOCAL_EXEC_FENCE_SIGNAL (1 << 1)
69};
Chris Wilson93b8ad82016-08-28 16:45:22 +010070
Chris Wilson43baec12017-06-05 14:01:40 +010071static void gem_busy(int fd, uint32_t handle)
Chris Wilson93b8ad82016-08-28 16:45:22 +010072{
Chris Wilson43baec12017-06-05 14:01:40 +010073 struct drm_i915_gem_busy busy = { .handle = handle };
74 ioctl(fd, DRM_IOCTL_I915_GEM_BUSY, &busy);
Chris Wilson93b8ad82016-08-28 16:45:22 +010075}
76
Chris Wilson43baec12017-06-05 14:01:40 +010077static void gem_wait__busy(int fd, uint32_t handle)
Chris Wilson572a7702016-08-28 20:43:41 +010078{
Chris Wilson43baec12017-06-05 14:01:40 +010079 struct drm_i915_gem_wait wait = { .bo_handle = handle };
80 ioctl(fd, DRM_IOCTL_I915_GEM_WAIT, &wait);
Chris Wilson572a7702016-08-28 20:43:41 +010081}
82
Chris Wilson93b8ad82016-08-28 16:45:22 +010083static double elapsed(const struct timespec *start,
84 const struct timespec *end)
85{
86 return 1e9*(end->tv_sec - start->tv_sec) +
87 (end->tv_nsec - start->tv_nsec);
88}
89
Chris Wilson6bd42082016-09-04 20:00:09 +010090struct sync_merge_data {
91 char name[32];
92 __s32 fd2;
93 __s32 fence;
94 __u32 flags;
95 __u32 pad;
96};
97
98#define SYNC_IOC_MAGIC '>'
99#define SYNC_IOC_MERGE _IOWR(SYNC_IOC_MAGIC, 3, struct sync_merge_data)
100
101static int sync_merge(int fd1, int fd2)
102{
103 struct sync_merge_data data;
104
105 if (fd1 == -1)
106 return dup(fd2);
107
108 if (fd2 == -1)
109 return dup(fd1);
110
111 memset(&data, 0, sizeof(data));
112 data.fd2 = fd2;
113 strcpy(data.name, "i965");
114
115 if (ioctl(fd1, SYNC_IOC_MERGE, &data))
116 return -errno;
117
118 return data.fence;
119}
120
Chris Wilsonf0bfbad2017-08-10 19:41:53 +0100121static uint32_t __syncobj_create(int fd)
122{
123 struct local_syncobj_create {
124 uint32_t handle, flags;
125 } arg;
126#define LOCAL_IOCTL_SYNCOBJ_CREATE DRM_IOWR(0xBF, struct local_syncobj_create)
127
128 memset(&arg, 0, sizeof(arg));
129 ioctl(fd, LOCAL_IOCTL_SYNCOBJ_CREATE, &arg);
130
131 return arg.handle;
132}
133
134static uint32_t syncobj_create(int fd)
135{
136 uint32_t ret;
137
138 igt_assert_neq((ret = __syncobj_create(fd)), 0);
139
140 return ret;
141}
142
143#define LOCAL_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0)
144#define LOCAL_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT (1 << 1)
145struct local_syncobj_wait {
146 __u64 handles;
147 /* absolute timeout */
148 __s64 timeout_nsec;
149 __u32 count_handles;
150 __u32 flags;
151 __u32 first_signaled; /* only valid when not waiting all */
152 __u32 pad;
153};
154#define LOCAL_IOCTL_SYNCOBJ_WAIT DRM_IOWR(0xC3, struct local_syncobj_wait)
155static int __syncobj_wait(int fd, struct local_syncobj_wait *args)
156{
157 int err = 0;
158 if (drmIoctl(fd, LOCAL_IOCTL_SYNCOBJ_WAIT, args))
159 err = -errno;
160 return err;
161}
162
Chris Wilson93b8ad82016-08-28 16:45:22 +0100163static int loop(unsigned ring, int reps, int ncpus, unsigned flags)
164{
165 struct drm_i915_gem_execbuffer2 execbuf;
166 struct drm_i915_gem_exec_object2 obj[2];
167 struct drm_i915_gem_relocation_entry reloc[2];
Chris Wilsonf0bfbad2017-08-10 19:41:53 +0100168 struct local_gem_exec_fence syncobj;
Chris Wilson93b8ad82016-08-28 16:45:22 +0100169 unsigned engines[16];
170 unsigned nengine;
171 uint32_t *batch;
172 double *shared;
173 int fd, i, gen;
174 int dmabuf;
175
176 shared = mmap(0, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
177
178 fd = drm_open_driver(DRIVER_INTEL);
179 gen = intel_gen(intel_get_drm_devid(fd));
180
181 memset(obj, 0, sizeof(obj));
182 obj[0].handle = gem_create(fd, 4096);
183 if (flags & WRITE)
184 obj[0].flags = EXEC_OBJECT_WRITE;
185 obj[1].handle = gem_create(fd, 4096);
186 if (gem_mmap__has_wc(fd))
187 batch = gem_mmap__wc(fd, obj[1].handle, 0, 4096, PROT_WRITE);
188 else
189 batch = gem_mmap__gtt(fd, obj[1].handle, 4096, PROT_WRITE);
190 gem_set_domain(fd, obj[1].handle,
191 I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
192 batch[0] = MI_BATCH_BUFFER_END;
193
194 memset(&execbuf, 0, sizeof(execbuf));
Chris Wilsonf0bfbad2017-08-10 19:41:53 +0100195 execbuf.buffers_ptr = to_user_pointer(obj);
Chris Wilson93b8ad82016-08-28 16:45:22 +0100196 execbuf.buffer_count = 2;
197 execbuf.flags |= LOCAL_I915_EXEC_HANDLE_LUT;
198 execbuf.flags |= LOCAL_I915_EXEC_NO_RELOC;
199 if (__gem_execbuf(fd, &execbuf)) {
200 execbuf.flags = 0;
201 if (__gem_execbuf(fd, &execbuf))
202 return 77;
203 }
204
Chris Wilsonf0bfbad2017-08-10 19:41:53 +0100205 if (flags & SYNCOBJ) {
206 syncobj.handle = syncobj_create(fd);
207 syncobj.flags = LOCAL_EXEC_FENCE_SIGNAL;
208
209 execbuf.cliprects_ptr = to_user_pointer(&syncobj);
210 execbuf.num_cliprects = 1;
211 execbuf.flags |= LOCAL_I915_EXEC_FENCE_ARRAY;
212 }
213
Chris Wilson93b8ad82016-08-28 16:45:22 +0100214 if (ring == -1) {
215 nengine = 0;
216 for (ring = 1; ring < 16; ring++) {
217 execbuf.flags &= ~ENGINE_FLAGS;
218 execbuf.flags |= ring;
219 if (__gem_execbuf(fd, &execbuf) == 0)
220 engines[nengine++] = ring;
221 }
222 } else {
223 nengine = 1;
224 engines[0] = ring;
225 }
226
Chris Wilsonf0bfbad2017-08-10 19:41:53 +0100227 obj[1].relocs_ptr = to_user_pointer(reloc);
Chris Wilson93b8ad82016-08-28 16:45:22 +0100228 obj[1].relocation_count = 2;
229
230 if (flags & DMABUF)
231 dmabuf = prime_handle_to_fd(fd, obj[0].handle);
232
233 gem_set_domain(fd, obj[1].handle,
234 I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
235
236 reloc[0].target_handle = obj[1].handle; /* recurse */
237 reloc[0].presumed_offset = obj[1].offset;
238 reloc[0].offset = sizeof(uint32_t);
239 reloc[0].delta = 0;
240 if (gen < 4)
241 reloc[0].delta = 1;
242 reloc[0].read_domains = I915_GEM_DOMAIN_COMMAND;
243 reloc[0].write_domain = 0;
244
245 reloc[1].target_handle = obj[0].handle;
246 reloc[1].presumed_offset = obj[0].offset;
247 reloc[1].offset = 1024;
248 reloc[1].delta = 0;
249 reloc[1].read_domains = I915_GEM_DOMAIN_RENDER;
250 reloc[1].write_domain = 0;
251 if (flags & WRITE)
252 reloc[1].write_domain = I915_GEM_DOMAIN_RENDER;
253
254 while (reps--) {
Chris Wilson96e1eac2016-08-29 19:16:41 +0100255 int fence = -1;
Chris Wilson93b8ad82016-08-28 16:45:22 +0100256 memset(shared, 0, 4096);
257
258 gem_set_domain(fd, obj[1].handle,
259 I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
260 sleep(1); /* wait for the hw to go back to sleep */
261 batch[i = 0] = MI_BATCH_BUFFER_START;
262 if (gen >= 8) {
263 batch[i] |= 1 << 8 | 1;
264 batch[++i] = obj[1].offset;
265 batch[++i] = obj[1].offset >> 32;
266 } else if (gen >= 6) {
267 batch[i] |= 1 << 8;
268 batch[++i] = obj[1].offset;
269 } else {
270 batch[i] |= 2 << 6;
271 batch[++i] = obj[1].offset;
272 if (gen < 4)
273 batch[i] |= 1;
274 }
275
276 if ((flags & IDLE) == 0) {
277 for (int n = 0; n < nengine; n++) {
Chris Wilson96e1eac2016-08-29 19:16:41 +0100278 execbuf.flags &= ~(3 << 16);
Chris Wilson6bd42082016-09-04 20:00:09 +0100279 if (flags & SYNC)
Chris Wilson96e1eac2016-08-29 19:16:41 +0100280 execbuf.flags |= 1 << 17;
Chris Wilson93b8ad82016-08-28 16:45:22 +0100281 execbuf.flags &= ~ENGINE_FLAGS;
282 execbuf.flags |= engines[n];
Chris Wilson96e1eac2016-08-29 19:16:41 +0100283 gem_execbuf_wr(fd, &execbuf);
284 if (execbuf.flags & (1 << 17))
Chris Wilson6bd42082016-09-04 20:00:09 +0100285 fence = sync_merge(fence, execbuf.rsvd2 >> 32);
Chris Wilson93b8ad82016-08-28 16:45:22 +0100286 }
287 }
288
289 igt_fork(child, ncpus) {
290 struct timespec start, end;
291 unsigned count = 0;
292
293 clock_gettime(CLOCK_MONOTONIC, &start);
294 do {
295 if (flags & DMABUF) {
296 struct pollfd pfd = { .fd = dmabuf, .events = POLLOUT };
297 for (int inner = 0; inner < 1024; inner++)
298 poll(&pfd, 1, 0);
Chris Wilsonf0bfbad2017-08-10 19:41:53 +0100299 } else if (flags & SYNCOBJ) {
300 struct local_syncobj_wait arg = {
301 .handles = to_user_pointer(&syncobj.handle),
302 .count_handles = 1,
303 };
304
305 for (int inner = 0; inner < 1024; inner++)
306 __syncobj_wait(fd, &arg);
Chris Wilson96e1eac2016-08-29 19:16:41 +0100307 } else if (flags & SYNC) {
308 struct pollfd pfd = { .fd = fence, .events = POLLOUT };
309 for (int inner = 0; inner < 1024; inner++)
310 poll(&pfd, 1, 0);
Chris Wilson572a7702016-08-28 20:43:41 +0100311 } else if (flags & WAIT) {
312 for (int inner = 0; inner < 1024; inner++)
313 gem_wait__busy(fd, obj[0].handle);
Chris Wilson93b8ad82016-08-28 16:45:22 +0100314 } else {
315 for (int inner = 0; inner < 1024; inner++)
316 gem_busy(fd, obj[0].handle);
317 }
318
319 clock_gettime(CLOCK_MONOTONIC, &end);
320 count += 1024;
321 } while (elapsed(&start, &end) < 2e9);
322
323 clock_gettime(CLOCK_MONOTONIC, &end);
324 shared[child] = elapsed(&start, &end) / count;
325 }
326 igt_waitchildren();
327
328 batch[0] = MI_BATCH_BUFFER_END;
Chris Wilson96e1eac2016-08-29 19:16:41 +0100329 if (fence != -1)
330 close(fence);
Chris Wilson93b8ad82016-08-28 16:45:22 +0100331
332 for (int child = 0; child < ncpus; child++)
333 shared[ncpus] += shared[child];
334 printf("%7.3f\n", shared[ncpus] / ncpus);
335 }
336 return 0;
337}
338
339int main(int argc, char **argv)
340{
341 unsigned ring = I915_EXEC_RENDER;
342 unsigned flags = 0;
343 int reps = 1;
344 int ncpus = 1;
345 int c;
346
Chris Wilsonf0bfbad2017-08-10 19:41:53 +0100347 while ((c = getopt (argc, argv, "e:r:dfsSwWI")) != -1) {
Chris Wilson93b8ad82016-08-28 16:45:22 +0100348 switch (c) {
349 case 'e':
350 if (strcmp(optarg, "rcs") == 0)
351 ring = I915_EXEC_RENDER;
352 else if (strcmp(optarg, "vcs") == 0)
353 ring = I915_EXEC_BSD;
354 else if (strcmp(optarg, "bcs") == 0)
355 ring = I915_EXEC_BLT;
356 else if (strcmp(optarg, "vecs") == 0)
357 ring = I915_EXEC_VEBOX;
358 else if (strcmp(optarg, "all") == 0)
359 ring = -1;
360 else
361 ring = atoi(optarg);
362 break;
363
364 case 'r':
365 reps = atoi(optarg);
366 if (reps < 1)
367 reps = 1;
368 break;
369
370 case 'f':
371 ncpus = sysconf(_SC_NPROCESSORS_ONLN);
372 break;
373
374 case 'd':
375 flags |= DMABUF;
376 break;
377
Chris Wilson572a7702016-08-28 20:43:41 +0100378 case 'w':
379 flags |= WAIT;
380 break;
381
Chris Wilson96e1eac2016-08-29 19:16:41 +0100382 case 's':
383 flags |= SYNC;
384 break;
385
Chris Wilsonf0bfbad2017-08-10 19:41:53 +0100386 case 'S':
387 flags |= SYNCOBJ;
388 break;
389
Chris Wilson93b8ad82016-08-28 16:45:22 +0100390 case 'W':
391 flags |= WRITE;
392 break;
393
394 case 'I':
395 flags |= IDLE;
396 break;
397 default:
398 break;
399 }
400 }
401
402 return loop(ring, reps, ncpus, flags);
403}