blob: 82fe6ba9ec5fa5ec3e90b05f629bfbaca2599f60 [file] [log] [blame]
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001/*
2 * Copyright © 2017 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 */
24
25#include <unistd.h>
26#include <stdlib.h>
27#include <stdint.h>
28#include <stdio.h>
29#include <string.h>
30#include <fcntl.h>
31#include <inttypes.h>
32#include <errno.h>
33#include <poll.h>
34#include <sys/stat.h>
35#include <sys/types.h>
36#include <sys/ioctl.h>
37#include <sys/time.h>
38#include <sys/wait.h>
39#include <time.h>
40#include <assert.h>
41#include <limits.h>
Tvrtko Ursulin255d1fc2017-05-19 15:13:48 +010042#include <pthread.h>
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +010043
44
45#include "intel_chipset.h"
46#include "drm.h"
47#include "ioctl_wrappers.h"
48#include "drmtest.h"
Chris Wilson5be05632017-05-09 10:53:39 +010049
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +010050#include "intel_io.h"
Chris Wilson5be05632017-05-09 10:53:39 +010051#include "igt_aux.h"
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +010052#include "igt_rand.h"
Tvrtko Ursulin987abfb2017-05-16 13:49:21 +010053#include "sw_sync.h"
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +010054
Chris Wilson9e55cca2017-04-25 15:12:50 +010055#include "ewma.h"
56
Arkadiusz Hiler200d0f52017-06-07 12:11:37 +020057#define LOCAL_I915_EXEC_FENCE_IN (1<<16)
58#define LOCAL_I915_EXEC_FENCE_OUT (1<<17)
59
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +010060enum intel_engine_id {
61 RCS,
62 BCS,
63 VCS,
64 VCS1,
65 VCS2,
66 VECS,
67 NUM_ENGINES
68};
69
70struct duration {
71 unsigned int min, max;
72};
73
74enum w_type
75{
76 BATCH,
77 SYNC,
78 DELAY,
79 PERIOD,
80 THROTTLE,
Tvrtko Ursulin987abfb2017-05-16 13:49:21 +010081 QD_THROTTLE,
82 SW_FENCE,
83 SW_FENCE_SIGNAL
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +010084};
85
Tvrtko Ursulina47419f2017-05-16 10:27:45 +010086struct deps
87{
88 int nr;
89 int *list;
90};
91
Chris Wilson89107382017-05-17 19:55:25 +010092struct w_arg {
93 char *filename;
94 char *desc;
95 int prio;
96};
97
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +010098struct w_step
99{
100 /* Workload step metadata */
101 enum w_type type;
102 unsigned int context;
103 unsigned int engine;
104 struct duration duration;
Tvrtko Ursulina47419f2017-05-16 10:27:45 +0100105 struct deps data_deps;
106 struct deps fence_deps;
107 int emit_fence;
Tvrtko Ursulin3e622a82017-05-16 11:46:50 +0100108 union {
109 int sync;
110 int delay;
111 int period;
112 int target;
113 int throttle;
114 int fence_signal;
115 };
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100116
117 /* Implementation details */
118 unsigned int idx;
Chris Wilson5be05632017-05-09 10:53:39 +0100119 struct igt_list rq_link;
Chris Wilson12e2def2017-05-09 22:50:19 +0100120 unsigned int request;
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100121
122 struct drm_i915_gem_execbuffer2 eb;
Tvrtko Ursulin07a8aa02017-05-08 13:31:50 +0100123 struct drm_i915_gem_exec_object2 *obj;
Chris Wilson02b0f8c2017-05-09 21:26:46 +0100124 struct drm_i915_gem_relocation_entry reloc[4];
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100125 unsigned long bb_sz;
126 uint32_t bb_handle;
127 uint32_t *mapped_batch;
128 uint32_t *seqno_value;
129 uint32_t *seqno_address;
130 uint32_t *rt0_value;
131 uint32_t *rt0_address;
132 uint32_t *rt1_address;
Chris Wilson02b0f8c2017-05-09 21:26:46 +0100133 uint32_t *latch_value;
134 uint32_t *latch_address;
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100135 unsigned int mapped_len;
136};
137
Chris Wilson9e55cca2017-04-25 15:12:50 +0100138DECLARE_EWMA(uint64_t, rt, 4, 2)
139
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100140struct workload
141{
Tvrtko Ursulin255d1fc2017-05-19 15:13:48 +0100142 unsigned int id;
143
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100144 unsigned int nr_steps;
145 struct w_step *steps;
Chris Wilson89107382017-05-17 19:55:25 +0100146 int prio;
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100147
Tvrtko Ursulin255d1fc2017-05-19 15:13:48 +0100148 pthread_t thread;
149 bool run;
150 bool background;
151 const struct workload_balancer *balancer;
152 unsigned int repeat;
153 unsigned int flags;
154 bool print_stats;
155
Chris Wilson62a1f542017-05-09 12:42:41 +0100156 uint32_t prng;
157
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100158 struct timespec repeat_start;
159
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100160 unsigned int nr_ctxs;
Tvrtko Ursulinef74c052017-06-05 08:58:19 +0100161 struct {
162 uint32_t id;
163 unsigned int static_vcs;
164 } *ctx_list;
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100165
Tvrtko Ursulin987abfb2017-05-16 13:49:21 +0100166 int sync_timeline;
167 uint32_t sync_seqno;
168
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100169 uint32_t seqno[NUM_ENGINES];
Chris Wilson36dec3d2017-05-10 10:57:16 +0100170 struct drm_i915_gem_exec_object2 status_object[2];
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100171 uint32_t *status_page;
Chris Wilson36dec3d2017-05-10 10:57:16 +0100172 uint32_t *status_cs;
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100173 unsigned int vcs_rr;
174
175 unsigned long qd_sum[NUM_ENGINES];
176 unsigned long nr_bb[NUM_ENGINES];
Chris Wilson5be05632017-05-09 10:53:39 +0100177
178 struct igt_list requests[NUM_ENGINES];
179 unsigned int nrequest[NUM_ENGINES];
Chris Wilson9e55cca2017-04-25 15:12:50 +0100180
Tvrtko Ursulin1c6c53c2017-05-22 10:30:45 +0100181 struct workload *global_wrk;
182 const struct workload_balancer *global_balancer;
183 pthread_mutex_t mutex;
184
Chris Wilson9e55cca2017-04-25 15:12:50 +0100185 union {
186 struct rtavg {
187 struct ewma_rt avg[NUM_ENGINES];
188 uint32_t last[NUM_ENGINES];
189 } rt;
190 };
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100191};
192
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100193static const unsigned int nop_calibration_us = 1000;
194static unsigned long nop_calibration;
195
Tvrtko Ursulinef74c052017-06-05 08:58:19 +0100196static unsigned int context_vcs_rr;
197
Chris Wilsond099f7d2017-05-09 14:22:21 +0100198static int verbose = 1;
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100199static int fd;
200
Tvrtko Ursulinb56e1712017-05-08 13:56:32 +0100201#define SWAPVCS (1<<0)
202#define SEQNO (1<<1)
203#define BALANCE (1<<2)
204#define RT (1<<3)
205#define VCS2REMAP (1<<4)
Tvrtko Ursulin7736d7e2017-05-09 09:21:03 +0100206#define INITVCSRR (1<<5)
Tvrtko Ursulin8540b912017-05-09 09:39:17 +0100207#define SYNCEDCLIENTS (1<<6)
Chris Wilson70d38142017-05-10 11:38:39 +0100208#define HEARTBEAT (1<<7)
Tvrtko Ursulin1c6c53c2017-05-22 10:30:45 +0100209#define GLOBAL_BALANCE (1<<8)
Tvrtko Ursulin6f2e3ba2017-06-05 11:16:59 +0100210#define DEPSYNC (1<<9)
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100211
Tvrtko Ursulin01959de2017-05-19 15:42:35 +0100212#define SEQNO_IDX(engine) ((engine) * 16)
213#define SEQNO_OFFSET(engine) (SEQNO_IDX(engine) * sizeof(uint32_t))
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100214
215#define RCS_TIMESTAMP (0x2000 + 0x358)
216#define REG(x) (volatile uint32_t *)((volatile char *)igt_global_mmio + x)
217
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100218static const char *ring_str_map[NUM_ENGINES] = {
219 [RCS] = "RCS",
220 [BCS] = "BCS",
221 [VCS] = "VCS",
222 [VCS1] = "VCS1",
223 [VCS2] = "VCS2",
224 [VECS] = "VECS",
225};
226
Tvrtko Ursulinb9576422017-05-09 10:33:00 +0100227static int
228parse_dependencies(unsigned int nr_steps, struct w_step *w, char *_desc)
Tvrtko Ursulin07a8aa02017-05-08 13:31:50 +0100229{
230 char *desc = strdup(_desc);
231 char *token, *tctx = NULL, *tstart = desc;
Tvrtko Ursulin07a8aa02017-05-08 13:31:50 +0100232
233 igt_assert(desc);
Tvrtko Ursulina47419f2017-05-16 10:27:45 +0100234 igt_assert(!w->data_deps.nr && w->data_deps.nr == w->fence_deps.nr);
235 igt_assert(!w->data_deps.list &&
236 w->data_deps.list == w->fence_deps.list);
Tvrtko Ursulin07a8aa02017-05-08 13:31:50 +0100237
238 while ((token = strtok_r(tstart, "/", &tctx)) != NULL) {
Tvrtko Ursulina47419f2017-05-16 10:27:45 +0100239 char *str = token;
240 struct deps *deps;
241 int dep;
242
Tvrtko Ursulin07a8aa02017-05-08 13:31:50 +0100243 tstart = NULL;
244
Tvrtko Ursulina47419f2017-05-16 10:27:45 +0100245 if (strlen(token) > 1 && token[0] == 'f') {
246 deps = &w->fence_deps;
247 str++;
248 } else {
249 deps = &w->data_deps;
250 }
251
252 dep = atoi(str);
Tvrtko Ursulinb9576422017-05-09 10:33:00 +0100253 if (dep > 0 || ((int)nr_steps + dep) < 0) {
Tvrtko Ursulina47419f2017-05-16 10:27:45 +0100254 if (deps->list)
255 free(deps->list);
Tvrtko Ursulin07a8aa02017-05-08 13:31:50 +0100256 return -1;
257 }
258
259 if (dep < 0) {
Tvrtko Ursulina47419f2017-05-16 10:27:45 +0100260 deps->nr++;
261 /* Multiple fences not yet supported. */
262 igt_assert(deps->nr == 1 || deps != &w->fence_deps);
263 deps->list = realloc(deps->list,
264 sizeof(*deps->list) * deps->nr);
265 igt_assert(deps->list);
266 deps->list[deps->nr - 1] = dep;
Tvrtko Ursulin07a8aa02017-05-08 13:31:50 +0100267 }
268 }
269
270 free(desc);
271
272 return 0;
273}
274
Tvrtko Ursulin2e10dc92017-05-16 07:56:20 +0100275static struct workload *
Chris Wilson89107382017-05-17 19:55:25 +0100276parse_workload(struct w_arg *arg, unsigned int flags, struct workload *app_w)
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100277{
278 struct workload *wrk;
279 unsigned int nr_steps = 0;
Chris Wilson89107382017-05-17 19:55:25 +0100280 char *desc = strdup(arg->desc);
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100281 char *_token, *token, *tctx = NULL, *tstart = desc;
282 char *field, *fctx = NULL, *fstart;
283 struct w_step step, *steps = NULL;
Tvrtko Ursulinb56e1712017-05-08 13:56:32 +0100284 bool bcs_used = false;
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100285 unsigned int valid;
Tvrtko Ursulina47419f2017-05-16 10:27:45 +0100286 int i, j, tmp;
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100287
Tvrtko Ursulin07a8aa02017-05-08 13:31:50 +0100288 igt_assert(desc);
289
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100290 while ((_token = strtok_r(tstart, ",", &tctx)) != NULL) {
291 tstart = NULL;
292 token = strdup(_token);
Tvrtko Ursulin07a8aa02017-05-08 13:31:50 +0100293 igt_assert(token);
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100294 fstart = token;
295 valid = 0;
296 memset(&step, 0, sizeof(step));
297
298 if ((field = strtok_r(fstart, ".", &fctx)) != NULL) {
299 fstart = NULL;
300
301 if (!strcasecmp(field, "d")) {
302 if ((field = strtok_r(fstart, ".", &fctx)) !=
303 NULL) {
304 tmp = atoi(field);
305 if (tmp <= 0) {
Chris Wilsond099f7d2017-05-09 14:22:21 +0100306 if (verbose)
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100307 fprintf(stderr,
308 "Invalid delay at step %u!\n",
309 nr_steps);
310 return NULL;
311 }
312
313 step.type = DELAY;
Tvrtko Ursulin3e622a82017-05-16 11:46:50 +0100314 step.delay = tmp;
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100315 goto add_step;
316 }
317 } else if (!strcasecmp(field, "p")) {
318 if ((field = strtok_r(fstart, ".", &fctx)) !=
319 NULL) {
320 tmp = atoi(field);
321 if (tmp <= 0) {
Chris Wilsond099f7d2017-05-09 14:22:21 +0100322 if (verbose)
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100323 fprintf(stderr,
324 "Invalid period at step %u!\n",
325 nr_steps);
326 return NULL;
327 }
328
329 step.type = PERIOD;
Tvrtko Ursulin3e622a82017-05-16 11:46:50 +0100330 step.period = tmp;
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100331 goto add_step;
332 }
333 } else if (!strcasecmp(field, "s")) {
334 if ((field = strtok_r(fstart, ".", &fctx)) !=
335 NULL) {
336 tmp = atoi(field);
Tvrtko Ursulinb9576422017-05-09 10:33:00 +0100337 if (tmp >= 0 ||
338 ((int)nr_steps + tmp) < 0) {
Chris Wilsond099f7d2017-05-09 14:22:21 +0100339 if (verbose)
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100340 fprintf(stderr,
341 "Invalid sync target at step %u!\n",
342 nr_steps);
343 return NULL;
344 }
345
346 step.type = SYNC;
Tvrtko Ursulin3e622a82017-05-16 11:46:50 +0100347 step.target = tmp;
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100348 goto add_step;
349 }
350 } else if (!strcasecmp(field, "t")) {
351 if ((field = strtok_r(fstart, ".", &fctx)) !=
352 NULL) {
353 tmp = atoi(field);
354 if (tmp < 0) {
Chris Wilsond099f7d2017-05-09 14:22:21 +0100355 if (verbose)
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100356 fprintf(stderr,
357 "Invalid throttle at step %u!\n",
358 nr_steps);
359 return NULL;
360 }
361
362 step.type = THROTTLE;
Tvrtko Ursulin3e622a82017-05-16 11:46:50 +0100363 step.throttle = tmp;
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100364 goto add_step;
365 }
366 } else if (!strcasecmp(field, "q")) {
367 if ((field = strtok_r(fstart, ".", &fctx)) !=
368 NULL) {
369 tmp = atoi(field);
370 if (tmp < 0) {
Chris Wilsond099f7d2017-05-09 14:22:21 +0100371 if (verbose)
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100372 fprintf(stderr,
373 "Invalid qd throttle at step %u!\n",
374 nr_steps);
375 return NULL;
376 }
377
378 step.type = QD_THROTTLE;
Tvrtko Ursulin3e622a82017-05-16 11:46:50 +0100379 step.throttle = tmp;
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100380 goto add_step;
381 }
Tvrtko Ursulin987abfb2017-05-16 13:49:21 +0100382 } else if (!strcasecmp(field, "a")) {
383 if ((field = strtok_r(fstart, ".", &fctx)) !=
384 NULL) {
385 tmp = atoi(field);
386 if (tmp >= 0) {
387 if (verbose)
388 fprintf(stderr,
389 "Invalid sw fence signal at step %u!\n",
390 nr_steps);
391 return NULL;
392 }
393
394 step.type = SW_FENCE_SIGNAL;
395 step.target = tmp;
396 goto add_step;
397 }
398 } else if (!strcasecmp(field, "f")) {
399 step.type = SW_FENCE;
400 goto add_step;
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100401 }
402
403 tmp = atoi(field);
404 if (tmp < 0) {
Chris Wilsond099f7d2017-05-09 14:22:21 +0100405 if (verbose)
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100406 fprintf(stderr,
407 "Invalid ctx id at step %u!\n",
408 nr_steps);
409 return NULL;
410 }
411 step.context = tmp;
412
413 valid++;
414 }
415
416 if ((field = strtok_r(fstart, ".", &fctx)) != NULL) {
Tvrtko Ursulina47419f2017-05-16 10:27:45 +0100417 unsigned int old_valid = valid;
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100418
419 fstart = NULL;
420
421 for (i = 0; i < ARRAY_SIZE(ring_str_map); i++) {
422 if (!strcasecmp(field, ring_str_map[i])) {
423 step.engine = i;
Tvrtko Ursulinb56e1712017-05-08 13:56:32 +0100424 if (step.engine == BCS)
425 bcs_used = true;
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100426 valid++;
427 break;
428 }
429 }
430
431 if (old_valid == valid) {
Chris Wilsond099f7d2017-05-09 14:22:21 +0100432 if (verbose)
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100433 fprintf(stderr,
434 "Invalid engine id at step %u!\n",
435 nr_steps);
436 return NULL;
437 }
438 }
439
440 if ((field = strtok_r(fstart, ".", &fctx)) != NULL) {
441 char *sep = NULL;
442 long int tmpl;
443
444 fstart = NULL;
445
446 tmpl = strtol(field, &sep, 10);
Tvrtko Ursulin424faaa2017-06-05 12:30:24 +0100447 if (tmpl <= 0 || tmpl == LONG_MIN || tmpl == LONG_MAX) {
Chris Wilsond099f7d2017-05-09 14:22:21 +0100448 if (verbose)
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100449 fprintf(stderr,
450 "Invalid duration at step %u!\n",
451 nr_steps);
452 return NULL;
453 }
454 step.duration.min = tmpl;
455
456 if (sep && *sep == '-') {
457 tmpl = strtol(sep + 1, NULL, 10);
Tvrtko Ursulin424faaa2017-06-05 12:30:24 +0100458 if (tmpl <= 0 || tmpl <= step.duration.min ||
Tvrtko Ursulinb9576422017-05-09 10:33:00 +0100459 tmpl == LONG_MIN || tmpl == LONG_MAX) {
Chris Wilsond099f7d2017-05-09 14:22:21 +0100460 if (verbose)
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100461 fprintf(stderr,
462 "Invalid duration range at step %u!\n",
463 nr_steps);
464 return NULL;
465 }
466 step.duration.max = tmpl;
467 } else {
468 step.duration.max = step.duration.min;
469 }
470
471 valid++;
472 }
473
474 if ((field = strtok_r(fstart, ".", &fctx)) != NULL) {
475 fstart = NULL;
476
Tvrtko Ursulinb9576422017-05-09 10:33:00 +0100477 tmp = parse_dependencies(nr_steps, &step, field);
Tvrtko Ursulin07a8aa02017-05-08 13:31:50 +0100478 if (tmp < 0) {
Chris Wilsond099f7d2017-05-09 14:22:21 +0100479 if (verbose)
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100480 fprintf(stderr,
Tvrtko Ursulinb9576422017-05-09 10:33:00 +0100481 "Invalid dependency at step %u!\n",
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100482 nr_steps);
483 return NULL;
484 }
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100485
486 valid++;
487 }
488
489 if ((field = strtok_r(fstart, ".", &fctx)) != NULL) {
490 fstart = NULL;
491
Tvrtko Ursulinb9576422017-05-09 10:33:00 +0100492 if (strlen(field) != 1 ||
493 (field[0] != '0' && field[0] != '1')) {
Chris Wilsond099f7d2017-05-09 14:22:21 +0100494 if (verbose)
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100495 fprintf(stderr,
496 "Invalid wait boolean at step %u!\n",
497 nr_steps);
498 return NULL;
499 }
Tvrtko Ursulin3e622a82017-05-16 11:46:50 +0100500 step.sync = field[0] - '0';
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100501
502 valid++;
503 }
504
505 if (valid != 5) {
Chris Wilsond099f7d2017-05-09 14:22:21 +0100506 if (verbose)
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100507 fprintf(stderr, "Invalid record at step %u!\n",
508 nr_steps);
509 return NULL;
510 }
511
512 step.type = BATCH;
513
514add_step:
515 step.idx = nr_steps++;
Chris Wilson12e2def2017-05-09 22:50:19 +0100516 step.request = -1;
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100517 steps = realloc(steps, sizeof(step) * nr_steps);
518 igt_assert(steps);
519
520 memcpy(&steps[nr_steps - 1], &step, sizeof(step));
521
522 free(token);
523 }
524
Tvrtko Ursulin2e10dc92017-05-16 07:56:20 +0100525 if (app_w) {
Tvrtko Ursulin2e10dc92017-05-16 07:56:20 +0100526 steps = realloc(steps, sizeof(step) *
527 (nr_steps + app_w->nr_steps));
528 igt_assert(steps);
529
530 memcpy(&steps[nr_steps], app_w->steps,
531 sizeof(step) * app_w->nr_steps);
532
533 for (i = 0; i < app_w->nr_steps; i++)
534 steps[nr_steps + i].idx += nr_steps;
535
536 nr_steps += app_w->nr_steps;
537 }
538
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100539 wrk = malloc(sizeof(*wrk));
540 igt_assert(wrk);
541
542 wrk->nr_steps = nr_steps;
543 wrk->steps = steps;
Chris Wilson89107382017-05-17 19:55:25 +0100544 wrk->prio = arg->prio;
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100545
546 free(desc);
547
Tvrtko Ursulina47419f2017-05-16 10:27:45 +0100548 /*
549 * Tag all steps which need to emit a sync fence if another step is
550 * referencing them as a sync fence dependency.
551 */
552 for (i = 0; i < nr_steps; i++) {
553 for (j = 0; j < steps[i].fence_deps.nr; j++) {
554 tmp = steps[i].idx + steps[i].fence_deps.list[j];
Tvrtko Ursulin987abfb2017-05-16 13:49:21 +0100555 if (tmp < 0 || tmp >= i ||
556 (steps[tmp].type != BATCH &&
557 steps[tmp].type != SW_FENCE)) {
558 if (verbose)
559 fprintf(stderr,
560 "Invalid dependency target %u!\n",
561 i);
562 return NULL;
563 }
Tvrtko Ursulina47419f2017-05-16 10:27:45 +0100564 steps[tmp].emit_fence = -1;
565 }
566 }
567
Tvrtko Ursulin987abfb2017-05-16 13:49:21 +0100568 /* Validate SW_FENCE_SIGNAL targets. */
569 for (i = 0; i < nr_steps; i++) {
570 if (steps[i].type == SW_FENCE_SIGNAL) {
571 tmp = steps[i].idx + steps[i].target;
572 if (tmp < 0 || tmp >= i ||
573 steps[tmp].type != SW_FENCE) {
574 if (verbose)
575 fprintf(stderr,
576 "Invalid sw fence target %u!\n",
577 i);
578 return NULL;
579 }
580 }
581 }
582
Chris Wilsond099f7d2017-05-09 14:22:21 +0100583 if (bcs_used && verbose)
Tvrtko Ursulinb56e1712017-05-08 13:56:32 +0100584 printf("BCS usage in workload with VCS2 remapping enabled!\n");
585
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100586 return wrk;
587}
588
589static struct workload *
590clone_workload(struct workload *_wrk)
591{
592 struct workload *wrk;
Chris Wilson5be05632017-05-09 10:53:39 +0100593 int i;
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100594
595 wrk = malloc(sizeof(*wrk));
596 igt_assert(wrk);
597 memset(wrk, 0, sizeof(*wrk));
598
Chris Wilson89107382017-05-17 19:55:25 +0100599 wrk->prio = _wrk->prio;
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100600 wrk->nr_steps = _wrk->nr_steps;
601 wrk->steps = calloc(wrk->nr_steps, sizeof(struct w_step));
602 igt_assert(wrk->steps);
603
604 memcpy(wrk->steps, _wrk->steps, sizeof(struct w_step) * wrk->nr_steps);
605
Tvrtko Ursulin987abfb2017-05-16 13:49:21 +0100606 /* Check if we need a sw sync timeline. */
607 for (i = 0; i < wrk->nr_steps; i++) {
608 if (wrk->steps[i].type == SW_FENCE) {
609 wrk->sync_timeline = sw_sync_timeline_create();
610 igt_assert(wrk->sync_timeline >= 0);
611 break;
612 }
613 }
614
Chris Wilson5be05632017-05-09 10:53:39 +0100615 for (i = 0; i < NUM_ENGINES; i++)
616 igt_list_init(&wrk->requests[i]);
617
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100618 return wrk;
619}
620
621#define rounddown(x, y) (x - (x%y))
622#ifndef PAGE_SIZE
623#define PAGE_SIZE (4096)
624#endif
625
Tvrtko Ursulin26a5e652017-05-16 08:11:29 +0100626static unsigned int get_duration(struct w_step *w)
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100627{
Tvrtko Ursulin26a5e652017-05-16 08:11:29 +0100628 struct duration *dur = &w->duration;
629
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100630 if (dur->min == dur->max)
631 return dur->min;
632 else
633 return dur->min + hars_petruska_f54_1_random_unsafe() %
634 (dur->max + 1 - dur->min);
635}
636
637static unsigned long get_bb_sz(unsigned int duration)
638{
639 return ALIGN(duration * nop_calibration * sizeof(uint32_t) /
640 nop_calibration_us, sizeof(uint32_t));
641}
642
643static void
644terminate_bb(struct w_step *w, unsigned int flags)
645{
646 const uint32_t bbe = 0xa << 23;
647 unsigned long mmap_start, mmap_len;
648 unsigned long batch_start = w->bb_sz;
649 uint32_t *ptr, *cs;
650
651 igt_assert(((flags & RT) && (flags & SEQNO)) || !(flags & RT));
652
653 batch_start -= sizeof(uint32_t); /* bbend */
654 if (flags & SEQNO)
655 batch_start -= 4 * sizeof(uint32_t);
656 if (flags & RT)
Chris Wilson02b0f8c2017-05-09 21:26:46 +0100657 batch_start -= 12 * sizeof(uint32_t);
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100658
659 mmap_start = rounddown(batch_start, PAGE_SIZE);
660 mmap_len = w->bb_sz - mmap_start;
661
662 gem_set_domain(fd, w->bb_handle,
663 I915_GEM_DOMAIN_WC, I915_GEM_DOMAIN_WC);
664
665 ptr = gem_mmap__wc(fd, w->bb_handle, mmap_start, mmap_len, PROT_WRITE);
666 cs = (uint32_t *)((char *)ptr + batch_start - mmap_start);
667
668 if (flags & SEQNO) {
669 w->reloc[0].offset = batch_start + sizeof(uint32_t);
670 batch_start += 4 * sizeof(uint32_t);
671
672 *cs++ = MI_STORE_DWORD_IMM;
673 w->seqno_address = cs;
674 *cs++ = 0;
675 *cs++ = 0;
676 w->seqno_value = cs;
677 *cs++ = 0;
678 }
679
680 if (flags & RT) {
681 w->reloc[1].offset = batch_start + sizeof(uint32_t);
682 batch_start += 4 * sizeof(uint32_t);
683
684 *cs++ = MI_STORE_DWORD_IMM;
685 w->rt0_address = cs;
686 *cs++ = 0;
687 *cs++ = 0;
688 w->rt0_value = cs;
689 *cs++ = 0;
690
691 w->reloc[2].offset = batch_start + 2 * sizeof(uint32_t);
692 batch_start += 4 * sizeof(uint32_t);
693
694 *cs++ = 0x24 << 23 | 2; /* MI_STORE_REG_MEM */
695 *cs++ = RCS_TIMESTAMP;
696 w->rt1_address = cs;
697 *cs++ = 0;
698 *cs++ = 0;
Chris Wilson02b0f8c2017-05-09 21:26:46 +0100699
700 w->reloc[3].offset = batch_start + sizeof(uint32_t);
701 batch_start += 4 * sizeof(uint32_t);
702
703 *cs++ = MI_STORE_DWORD_IMM;
704 w->latch_address = cs;
705 *cs++ = 0;
706 *cs++ = 0;
707 w->latch_value = cs;
708 *cs++ = 0;
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100709 }
710
711 *cs = bbe;
712
713 w->mapped_batch = ptr;
714 w->mapped_len = mmap_len;
715}
716
Tvrtko Ursulinb56e1712017-05-08 13:56:32 +0100717static const unsigned int eb_engine_map[NUM_ENGINES] = {
718 [RCS] = I915_EXEC_RENDER,
719 [BCS] = I915_EXEC_BLT,
720 [VCS] = I915_EXEC_BSD,
721 [VCS1] = I915_EXEC_BSD | I915_EXEC_BSD_RING1,
722 [VCS2] = I915_EXEC_BSD | I915_EXEC_BSD_RING2,
723 [VECS] = I915_EXEC_VEBOX
724};
725
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100726static void
Tvrtko Ursulinc14a2602017-06-07 11:40:43 +0100727eb_set_engine(struct drm_i915_gem_execbuffer2 *eb,
728 enum intel_engine_id engine,
729 unsigned int flags)
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100730{
Tvrtko Ursulinb56e1712017-05-08 13:56:32 +0100731 if (engine == VCS2 && (flags & VCS2REMAP))
732 engine = BCS;
733
Tvrtko Ursulinc14a2602017-06-07 11:40:43 +0100734 eb->flags = eb_engine_map[engine];
735}
736
737static void
738eb_update_flags(struct w_step *w, enum intel_engine_id engine,
739 unsigned int flags)
740{
741 eb_set_engine(&w->eb, engine, flags);
Tvrtko Ursulinb56e1712017-05-08 13:56:32 +0100742
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100743 w->eb.flags |= I915_EXEC_HANDLE_LUT;
744 w->eb.flags |= I915_EXEC_NO_RELOC;
Tvrtko Ursulina47419f2017-05-16 10:27:45 +0100745
746 igt_assert(w->emit_fence <= 0);
747 if (w->emit_fence)
Arkadiusz Hiler200d0f52017-06-07 12:11:37 +0200748 w->eb.flags |= LOCAL_I915_EXEC_FENCE_OUT;
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100749}
750
Tvrtko Ursulin01959de2017-05-19 15:42:35 +0100751static struct drm_i915_gem_exec_object2 *
752get_status_objects(struct workload *wrk)
753{
Tvrtko Ursulin1c6c53c2017-05-22 10:30:45 +0100754 if (wrk->flags & GLOBAL_BALANCE)
755 return wrk->global_wrk->status_object;
756 else
757 return wrk->status_object;
Tvrtko Ursulin01959de2017-05-19 15:42:35 +0100758}
759
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100760static void
761alloc_step_batch(struct workload *wrk, struct w_step *w, unsigned int flags)
762{
763 enum intel_engine_id engine = w->engine;
Tvrtko Ursulinb0872572017-05-05 18:55:23 +0100764 unsigned int j = 0;
Tvrtko Ursulina47419f2017-05-16 10:27:45 +0100765 unsigned int nr_obj = 3 + w->data_deps.nr;
Tvrtko Ursulin07a8aa02017-05-08 13:31:50 +0100766 unsigned int i;
767
768 w->obj = calloc(nr_obj, sizeof(*w->obj));
769 igt_assert(w->obj);
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100770
771 w->obj[j].handle = gem_create(fd, 4096);
772 w->obj[j].flags = EXEC_OBJECT_WRITE;
773 j++;
Tvrtko Ursulin07a8aa02017-05-08 13:31:50 +0100774 igt_assert(j < nr_obj);
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100775
776 if (flags & SEQNO) {
Tvrtko Ursulin01959de2017-05-19 15:42:35 +0100777 w->obj[j++] = get_status_objects(wrk)[0];
Tvrtko Ursulin07a8aa02017-05-08 13:31:50 +0100778 igt_assert(j < nr_obj);
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100779 }
780
Tvrtko Ursulina47419f2017-05-16 10:27:45 +0100781 for (i = 0; i < w->data_deps.nr; i++) {
782 igt_assert(w->data_deps.list[i] <= 0);
783 if (w->data_deps.list[i]) {
784 int dep_idx = w->idx + w->data_deps.list[i];
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100785
Tvrtko Ursulin987abfb2017-05-16 13:49:21 +0100786 igt_assert(dep_idx >= 0 && dep_idx < w->idx);
Tvrtko Ursulin07a8aa02017-05-08 13:31:50 +0100787 igt_assert(wrk->steps[dep_idx].type == BATCH);
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100788
Tvrtko Ursulin07a8aa02017-05-08 13:31:50 +0100789 w->obj[j].handle = wrk->steps[dep_idx].obj[0].handle;
790 j++;
791 igt_assert(j < nr_obj);
792 }
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100793 }
794
Tvrtko Ursulinb0872572017-05-05 18:55:23 +0100795 w->bb_sz = get_bb_sz(w->duration.max);
796 w->bb_handle = w->obj[j].handle = gem_create(fd, w->bb_sz);
797 terminate_bb(w, flags);
798
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100799 if (flags & SEQNO) {
Tvrtko Ursulinb0872572017-05-05 18:55:23 +0100800 w->obj[j].relocs_ptr = to_user_pointer(&w->reloc);
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100801 if (flags & RT)
Chris Wilson02b0f8c2017-05-09 21:26:46 +0100802 w->obj[j].relocation_count = 4;
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100803 else
Tvrtko Ursulinb0872572017-05-05 18:55:23 +0100804 w->obj[j].relocation_count = 1;
Tvrtko Ursulin07a8aa02017-05-08 13:31:50 +0100805 for (i = 0; i < w->obj[j].relocation_count; i++)
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100806 w->reloc[i].target_handle = 1;
807 }
808
809 w->eb.buffers_ptr = to_user_pointer(w->obj);
Tvrtko Ursulinb0872572017-05-05 18:55:23 +0100810 w->eb.buffer_count = j + 1;
Tvrtko Ursulinef74c052017-06-05 08:58:19 +0100811 w->eb.rsvd1 = wrk->ctx_list[w->context].id;
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100812
813 if (flags & SWAPVCS && engine == VCS1)
814 engine = VCS2;
815 else if (flags & SWAPVCS && engine == VCS2)
816 engine = VCS1;
817 eb_update_flags(w, engine, flags);
818#ifdef DEBUG
Tvrtko Ursulin07a8aa02017-05-08 13:31:50 +0100819 printf("%u: %u:|", w->idx, w->eb.buffer_count);
820 for (i = 0; i <= j; i++)
821 printf("%x|", w->obj[i].handle);
822 printf(" %10lu flags=%llx bb=%x[%u] ctx[%u]=%u\n",
823 w->bb_sz, w->eb.flags, w->bb_handle, j, w->context,
Tvrtko Ursulinef74c052017-06-05 08:58:19 +0100824 wrk->ctx_list[w->context].id);
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100825#endif
826}
827
828static void
Tvrtko Ursulin7736d7e2017-05-09 09:21:03 +0100829prepare_workload(unsigned int id, struct workload *wrk, unsigned int flags)
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100830{
Tvrtko Ursulinef74c052017-06-05 08:58:19 +0100831 unsigned int ctx_vcs = 0;
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100832 int max_ctx = -1;
833 struct w_step *w;
834 int i;
835
Tvrtko Ursulin255d1fc2017-05-19 15:13:48 +0100836 wrk->id = id;
Chris Wilson62a1f542017-05-09 12:42:41 +0100837 wrk->prng = rand();
Tvrtko Ursulin1c6c53c2017-05-22 10:30:45 +0100838 wrk->run = true;
Chris Wilson62a1f542017-05-09 12:42:41 +0100839
Tvrtko Ursulin7736d7e2017-05-09 09:21:03 +0100840 if (flags & INITVCSRR)
841 wrk->vcs_rr = id & 1;
842
Tvrtko Ursulin1c6c53c2017-05-22 10:30:45 +0100843 if (flags & GLOBAL_BALANCE) {
844 int ret = pthread_mutex_init(&wrk->mutex, NULL);
845 igt_assert(ret == 0);
846 }
847
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100848 if (flags & SEQNO) {
Tvrtko Ursulin1c6c53c2017-05-22 10:30:45 +0100849 if (!(flags & GLOBAL_BALANCE) || id == 0) {
850 uint32_t handle;
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100851
Tvrtko Ursulin1c6c53c2017-05-22 10:30:45 +0100852 handle = gem_create(fd, 4096);
853 gem_set_caching(fd, handle, I915_CACHING_CACHED);
854 wrk->status_object[0].handle = handle;
855 wrk->status_page = gem_mmap__cpu(fd, handle, 0, 4096,
856 PROT_READ);
Chris Wilson36dec3d2017-05-10 10:57:16 +0100857
Tvrtko Ursulin1c6c53c2017-05-22 10:30:45 +0100858 handle = gem_create(fd, 4096);
859 wrk->status_object[1].handle = handle;
860 wrk->status_cs = gem_mmap__wc(fd, handle,
861 0, 4096, PROT_WRITE);
862 }
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100863 }
864
865 for (i = 0, w = wrk->steps; i < wrk->nr_steps; i++, w++) {
866 if ((int)w->context > max_ctx) {
867 int delta = w->context + 1 - wrk->nr_ctxs;
868
869 wrk->nr_ctxs += delta;
Tvrtko Ursulinef74c052017-06-05 08:58:19 +0100870 wrk->ctx_list = realloc(wrk->ctx_list,
871 wrk->nr_ctxs *
872 sizeof(*wrk->ctx_list));
873 memset(&wrk->ctx_list[wrk->nr_ctxs - delta], 0,
874 delta * sizeof(*wrk->ctx_list));
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100875
876 max_ctx = w->context;
877 }
878
Tvrtko Ursulinef74c052017-06-05 08:58:19 +0100879 if (!wrk->ctx_list[w->context].id) {
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100880 struct drm_i915_gem_context_create arg = {};
881
882 drmIoctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, &arg);
883 igt_assert(arg.ctx_id);
884
Tvrtko Ursulinef74c052017-06-05 08:58:19 +0100885 wrk->ctx_list[w->context].id = arg.ctx_id;
886
887 if (flags & GLOBAL_BALANCE) {
888 wrk->ctx_list[w->context].static_vcs = context_vcs_rr;
889 context_vcs_rr ^= 1;
890 } else {
891 wrk->ctx_list[w->context].static_vcs = ctx_vcs;
892 ctx_vcs ^= 1;
893 }
Chris Wilson89107382017-05-17 19:55:25 +0100894
895 if (wrk->prio) {
896 struct local_i915_gem_context_param param = {
897 .context = arg.ctx_id,
898 .param = 0x6,
899 .value = wrk->prio,
900 };
901 gem_context_set_param(fd, &param);
902 }
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100903 }
904 }
905
906 for (i = 0, w = wrk->steps; i < wrk->nr_steps; i++, w++) {
907 unsigned int _flags = flags;
908 enum intel_engine_id engine = w->engine;
909
910 if (w->type != BATCH)
911 continue;
912
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100913 if (engine == VCS)
914 _flags &= ~SWAPVCS;
915
916 alloc_step_batch(wrk, w, _flags);
917 }
918}
919
920static double elapsed(const struct timespec *start, const struct timespec *end)
921{
922 return (end->tv_sec - start->tv_sec) +
923 (end->tv_nsec - start->tv_nsec) / 1e9;
924}
925
926static int elapsed_us(const struct timespec *start, const struct timespec *end)
927{
928 return elapsed(start, end) * 1e6;
929}
930
931static enum intel_engine_id get_vcs_engine(unsigned int n)
932{
933 const enum intel_engine_id vcs_engines[2] = { VCS1, VCS2 };
934
935 igt_assert(n < ARRAY_SIZE(vcs_engines));
936
937 return vcs_engines[n];
938}
939
Tvrtko Ursuline329adc2017-05-19 15:33:23 +0100940static uint32_t new_seqno(struct workload *wrk, enum intel_engine_id engine)
941{
Tvrtko Ursulin1c6c53c2017-05-22 10:30:45 +0100942 uint32_t seqno;
943 int ret;
944
945 if (wrk->flags & GLOBAL_BALANCE) {
946 igt_assert(wrk->global_wrk);
947 wrk = wrk->global_wrk;
948
949 ret = pthread_mutex_lock(&wrk->mutex);
950 igt_assert(ret == 0);
951 }
952
953 seqno = ++wrk->seqno[engine];
954
955 if (wrk->flags & GLOBAL_BALANCE) {
956 ret = pthread_mutex_unlock(&wrk->mutex);
957 igt_assert(ret == 0);
958 }
959
960 return seqno;
Tvrtko Ursuline329adc2017-05-19 15:33:23 +0100961}
962
963static uint32_t
964current_seqno(struct workload *wrk, enum intel_engine_id engine)
965{
Tvrtko Ursulin1c6c53c2017-05-22 10:30:45 +0100966 if (wrk->flags & GLOBAL_BALANCE)
967 return wrk->global_wrk->seqno[engine];
968 else
969 return wrk->seqno[engine];
Tvrtko Ursuline329adc2017-05-19 15:33:23 +0100970}
971
Tvrtko Ursulin01959de2017-05-19 15:42:35 +0100972#define READ_ONCE(x) (*(volatile typeof(x) *)(&(x)))
973
974static uint32_t
975read_status_page(struct workload *wrk, unsigned int idx)
976{
Tvrtko Ursulin1c6c53c2017-05-22 10:30:45 +0100977 if (wrk->flags & GLOBAL_BALANCE)
978 return READ_ONCE(wrk->global_wrk->status_page[idx]);
979 else
980 return READ_ONCE(wrk->status_page[idx]);
Tvrtko Ursulin01959de2017-05-19 15:42:35 +0100981}
982
983static uint32_t
984current_gpu_seqno(struct workload *wrk, enum intel_engine_id engine)
985{
986 return read_status_page(wrk, SEQNO_IDX(engine));
987}
988
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100989struct workload_balancer {
Tvrtko Ursulinf39a7c52017-05-10 12:06:05 +0100990 unsigned int id;
991 const char *name;
992 const char *desc;
993 unsigned int flags;
994 unsigned int min_gen;
995
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100996 unsigned int (*get_qd)(const struct workload_balancer *balancer,
997 struct workload *wrk,
998 enum intel_engine_id engine);
999 enum intel_engine_id (*balance)(const struct workload_balancer *balancer,
1000 struct workload *wrk, struct w_step *w);
1001};
1002
1003static enum intel_engine_id
1004rr_balance(const struct workload_balancer *balancer,
1005 struct workload *wrk, struct w_step *w)
1006{
1007 unsigned int engine;
1008
1009 engine = get_vcs_engine(wrk->vcs_rr);
1010 wrk->vcs_rr ^= 1;
1011
1012 return engine;
1013}
1014
Tvrtko Ursulinf39a7c52017-05-10 12:06:05 +01001015static enum intel_engine_id
1016rand_balance(const struct workload_balancer *balancer,
1017 struct workload *wrk, struct w_step *w)
1018{
1019 return get_vcs_engine(hars_petruska_f54_1_random(&wrk->prng) & 1);
1020}
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001021
1022static unsigned int
1023get_qd_depth(const struct workload_balancer *balancer,
1024 struct workload *wrk, enum intel_engine_id engine)
1025{
Tvrtko Ursulin01959de2017-05-19 15:42:35 +01001026 return current_seqno(wrk, engine) - current_gpu_seqno(wrk, engine);
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001027}
1028
1029static enum intel_engine_id
Chris Wilson04e38c32017-05-10 12:52:58 +01001030__qd_select_engine(struct workload *wrk, const unsigned long *qd, bool random)
1031{
1032 unsigned int n;
1033
1034 if (qd[VCS1] < qd[VCS2])
1035 n = 0;
1036 else if (qd[VCS1] > qd[VCS2])
1037 n = 1;
1038 else if (random)
1039 n = hars_petruska_f54_1_random(&wrk->prng) & 1;
1040 else
1041 n = wrk->vcs_rr;
1042 wrk->vcs_rr = n ^ 1;
1043
1044 return get_vcs_engine(n);
1045}
1046
1047static enum intel_engine_id
Tvrtko Ursulinf39a7c52017-05-10 12:06:05 +01001048__qd_balance(const struct workload_balancer *balancer,
1049 struct workload *wrk, struct w_step *w, bool random)
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001050{
1051 enum intel_engine_id engine;
Chris Wilson04e38c32017-05-10 12:52:58 +01001052 unsigned long qd[NUM_ENGINES];
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001053
1054 igt_assert(w->engine == VCS);
1055
1056 qd[VCS1] = balancer->get_qd(balancer, wrk, VCS1);
1057 wrk->qd_sum[VCS1] += qd[VCS1];
1058
1059 qd[VCS2] = balancer->get_qd(balancer, wrk, VCS2);
1060 wrk->qd_sum[VCS2] += qd[VCS2];
1061
Chris Wilson04e38c32017-05-10 12:52:58 +01001062 engine = __qd_select_engine(wrk, qd, random);
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001063
1064#ifdef DEBUG
Tvrtko Ursulin1c6c53c2017-05-22 10:30:45 +01001065 printf("qd_balance[%u]: 1:%ld 2:%ld rr:%u = %u\t(%u - %u) (%u - %u)\n",
1066 wrk->id, qd[VCS1], qd[VCS2], wrk->vcs_rr, engine,
Tvrtko Ursulin01959de2017-05-19 15:42:35 +01001067 current_seqno(wrk, VCS1), current_gpu_seqno(wrk, VCS1),
1068 current_seqno(wrk, VCS2), current_gpu_seqno(wrk, VCS2));
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001069#endif
1070 return engine;
1071}
1072
Tvrtko Ursulinf39a7c52017-05-10 12:06:05 +01001073static enum intel_engine_id
1074qd_balance(const struct workload_balancer *balancer,
1075 struct workload *wrk, struct w_step *w)
1076{
1077 return __qd_balance(balancer, wrk, w, false);
1078}
1079
1080static enum intel_engine_id
1081qdr_balance(const struct workload_balancer *balancer,
1082 struct workload *wrk, struct w_step *w)
1083{
1084 return __qd_balance(balancer, wrk, w, true);
1085}
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001086
1087static enum intel_engine_id
Chris Wilson04e38c32017-05-10 12:52:58 +01001088qdavg_balance(const struct workload_balancer *balancer,
1089 struct workload *wrk, struct w_step *w)
1090{
1091 unsigned long qd[NUM_ENGINES];
1092 unsigned int engine;
1093
1094 igt_assert(w->engine == VCS);
1095
1096 for (engine = VCS1; engine <= VCS2; engine++) {
1097 qd[engine] = balancer->get_qd(balancer, wrk, engine);
1098 wrk->qd_sum[engine] += qd[engine];
1099
1100 ewma_rt_add(&wrk->rt.avg[engine], qd[engine]);
1101 qd[engine] = ewma_rt_read(&wrk->rt.avg[engine]);
1102 }
1103
Tvrtko Ursulin1c6c53c2017-05-22 10:30:45 +01001104 engine = __qd_select_engine(wrk, qd, false);
1105#ifdef DEBUG
1106 printf("qdavg_balance[%u]: 1:%ld 2:%ld rr:%u = %u\t(%u - %u) (%u - %u)\n",
1107 wrk->id, qd[VCS1], qd[VCS2], wrk->vcs_rr, engine,
1108 current_seqno(wrk, VCS1), current_gpu_seqno(wrk, VCS1),
1109 current_seqno(wrk, VCS2), current_gpu_seqno(wrk, VCS2));
1110#endif
1111 return engine;
Chris Wilson04e38c32017-05-10 12:52:58 +01001112}
1113
1114static enum intel_engine_id
Chris Wilson9e55cca2017-04-25 15:12:50 +01001115__rt_select_engine(struct workload *wrk, unsigned long *qd, bool random)
1116{
Chris Wilson9e55cca2017-04-25 15:12:50 +01001117 qd[VCS1] >>= 10;
1118 qd[VCS2] >>= 10;
1119
Chris Wilson04e38c32017-05-10 12:52:58 +01001120 return __qd_select_engine(wrk, qd, random);
Chris Wilson9e55cca2017-04-25 15:12:50 +01001121}
1122
Chris Wilsoncb161192017-05-09 17:31:27 +01001123struct rt_depth {
1124 uint32_t seqno;
1125 uint32_t submitted;
1126 uint32_t completed;
1127};
1128
Chris Wilsoncb161192017-05-09 17:31:27 +01001129static void get_rt_depth(struct workload *wrk,
1130 unsigned int engine,
1131 struct rt_depth *rt)
1132{
Tvrtko Ursulin01959de2017-05-19 15:42:35 +01001133 const unsigned int idx = SEQNO_IDX(engine);
Chris Wilson02b0f8c2017-05-09 21:26:46 +01001134 uint32_t latch;
Chris Wilsoncb161192017-05-09 17:31:27 +01001135
Chris Wilsoncb161192017-05-09 17:31:27 +01001136 do {
Tvrtko Ursulin01959de2017-05-19 15:42:35 +01001137 latch = read_status_page(wrk, idx + 3);
1138 rt->submitted = read_status_page(wrk, idx + 1);
1139 rt->completed = read_status_page(wrk, idx + 2);
1140 rt->seqno = read_status_page(wrk, idx);
Chris Wilson02b0f8c2017-05-09 21:26:46 +01001141 } while (latch != rt->seqno);
Chris Wilsoncb161192017-05-09 17:31:27 +01001142}
1143
Chris Wilson9e55cca2017-04-25 15:12:50 +01001144static enum intel_engine_id
Tvrtko Ursulin81116532017-05-08 18:37:03 +01001145__rt_balance(const struct workload_balancer *balancer,
1146 struct workload *wrk, struct w_step *w, bool random)
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001147{
Chris Wilson9e55cca2017-04-25 15:12:50 +01001148 unsigned long qd[NUM_ENGINES];
Chris Wilson43f6fce2017-05-10 13:07:43 +01001149 unsigned int engine;
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001150
1151 igt_assert(w->engine == VCS);
1152
1153 /* Estimate the "speed" of the most recent batch
1154 * (finish time - submit time)
1155 * and use that as an approximate for the total remaining time for
Chris Wilson6e6ad402017-05-09 12:43:13 +01001156 * all batches on that engine, plus the time we expect this batch to
1157 * take. We try to keep the total balanced between the engines.
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001158 */
Chris Wilson43f6fce2017-05-10 13:07:43 +01001159 for (engine = VCS1; engine <= VCS2; engine++) {
1160 struct rt_depth rt;
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001161
Chris Wilson43f6fce2017-05-10 13:07:43 +01001162 get_rt_depth(wrk, engine, &rt);
Tvrtko Ursuline329adc2017-05-19 15:33:23 +01001163 qd[engine] = current_seqno(wrk, engine) - rt.seqno;
Chris Wilson43f6fce2017-05-10 13:07:43 +01001164 wrk->qd_sum[engine] += qd[engine];
1165 qd[engine] = (qd[engine] + 1) * (rt.completed - rt.submitted);
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001166#ifdef DEBUG
Tvrtko Ursuline329adc2017-05-19 15:33:23 +01001167 printf("rt[0] = %d (%d - %d) x %d (%d - %d) = %ld\n",
1168 current_seqno(wrk, engine) - rt.seqno,
1169 current_seqno(wrk, engine), rt.seqno,
Chris Wilson43f6fce2017-05-10 13:07:43 +01001170 rt.completed - rt.submitted,
1171 rt.completed, rt.submitted,
1172 qd[engine]);
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001173#endif
Chris Wilson43f6fce2017-05-10 13:07:43 +01001174 }
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001175
Chris Wilson9e55cca2017-04-25 15:12:50 +01001176 return __rt_select_engine(wrk, qd, random);
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001177}
1178
Tvrtko Ursulin81116532017-05-08 18:37:03 +01001179static enum intel_engine_id
1180rt_balance(const struct workload_balancer *balancer,
1181 struct workload *wrk, struct w_step *w)
1182{
1183
1184 return __rt_balance(balancer, wrk, w, false);
1185}
1186
Tvrtko Ursulin81116532017-05-08 18:37:03 +01001187static enum intel_engine_id
1188rtr_balance(const struct workload_balancer *balancer,
1189 struct workload *wrk, struct w_step *w)
1190{
Tvrtko Ursulin81116532017-05-08 18:37:03 +01001191 return __rt_balance(balancer, wrk, w, true);
1192}
1193
Chris Wilson9e55cca2017-04-25 15:12:50 +01001194static enum intel_engine_id
1195rtavg_balance(const struct workload_balancer *balancer,
1196 struct workload *wrk, struct w_step *w)
1197{
1198 unsigned long qd[NUM_ENGINES];
Chris Wilson43f6fce2017-05-10 13:07:43 +01001199 unsigned int engine;
Chris Wilson9e55cca2017-04-25 15:12:50 +01001200
1201 igt_assert(w->engine == VCS);
1202
1203 /* Estimate the average "speed" of the most recent batches
1204 * (finish time - submit time)
1205 * and use that as an approximate for the total remaining time for
1206 * all batches on that engine plus the time we expect to execute in.
1207 * We try to keep the total remaining balanced between the engines.
1208 */
Chris Wilson43f6fce2017-05-10 13:07:43 +01001209 for (engine = VCS1; engine <= VCS2; engine++) {
1210 struct rt_depth rt;
1211
1212 get_rt_depth(wrk, engine, &rt);
1213 if (rt.seqno != wrk->rt.last[engine]) {
1214 igt_assert((long)(rt.completed - rt.submitted) > 0);
1215 ewma_rt_add(&wrk->rt.avg[engine],
1216 rt.completed - rt.submitted);
1217 wrk->rt.last[engine] = rt.seqno;
1218 }
Tvrtko Ursuline329adc2017-05-19 15:33:23 +01001219 qd[engine] = current_seqno(wrk, engine) - rt.seqno;
Chris Wilson43f6fce2017-05-10 13:07:43 +01001220 wrk->qd_sum[engine] += qd[engine];
1221 qd[engine] =
1222 (qd[engine] + 1) * ewma_rt_read(&wrk->rt.avg[engine]);
Chris Wilson9e55cca2017-04-25 15:12:50 +01001223
1224#ifdef DEBUG
Tvrtko Ursuline329adc2017-05-19 15:33:23 +01001225 printf("rtavg[%d] = %d (%d - %d) x %ld (%d) = %ld\n",
Chris Wilson43f6fce2017-05-10 13:07:43 +01001226 engine,
Tvrtko Ursuline329adc2017-05-19 15:33:23 +01001227 current_seqno(wrk, engine) - rt.seqno,
1228 current_seqno(wrk, engine), rt.seqno,
Chris Wilson43f6fce2017-05-10 13:07:43 +01001229 ewma_rt_read(&wrk->rt.avg[engine]),
1230 rt.completed - rt.submitted,
1231 qd[engine]);
Chris Wilson9e55cca2017-04-25 15:12:50 +01001232#endif
Chris Wilson9e55cca2017-04-25 15:12:50 +01001233 }
Chris Wilson9e55cca2017-04-25 15:12:50 +01001234
1235 return __rt_select_engine(wrk, qd, false);
1236}
1237
Tvrtko Ursulinef74c052017-06-05 08:58:19 +01001238static enum intel_engine_id
1239context_balance(const struct workload_balancer *balancer,
1240 struct workload *wrk, struct w_step *w)
1241{
1242 return get_vcs_engine(wrk->ctx_list[w->context].static_vcs);
1243}
1244
Tvrtko Ursulinf39a7c52017-05-10 12:06:05 +01001245static const struct workload_balancer all_balancers[] = {
1246 {
1247 .id = 0,
1248 .name = "rr",
1249 .desc = "Simple round-robin.",
1250 .balance = rr_balance,
1251 },
1252 {
1253 .id = 6,
1254 .name = "rand",
1255 .desc = "Random selection.",
1256 .balance = rand_balance,
1257 },
1258 {
1259 .id = 1,
1260 .name = "qd",
1261 .desc = "Queue depth estimation with round-robin on equal depth.",
1262 .flags = SEQNO,
1263 .min_gen = 8,
1264 .get_qd = get_qd_depth,
1265 .balance = qd_balance,
1266 },
1267 {
1268 .id = 5,
1269 .name = "qdr",
1270 .desc = "Queue depth estimation with random selection on equal depth.",
1271 .flags = SEQNO,
1272 .min_gen = 8,
1273 .get_qd = get_qd_depth,
1274 .balance = qdr_balance,
1275 },
1276 {
Chris Wilson04e38c32017-05-10 12:52:58 +01001277 .id = 7,
1278 .name = "qdavg",
1279 .desc = "Like qd, but using an average queue depth estimator.",
1280 .flags = SEQNO,
1281 .min_gen = 8,
1282 .get_qd = get_qd_depth,
1283 .balance = qdavg_balance,
1284 },
1285 {
Tvrtko Ursulinf39a7c52017-05-10 12:06:05 +01001286 .id = 2,
1287 .name = "rt",
1288 .desc = "Queue depth plus last runtime estimation.",
1289 .flags = SEQNO | RT,
1290 .min_gen = 8,
1291 .get_qd = get_qd_depth,
1292 .balance = rt_balance,
1293 },
1294 {
1295 .id = 3,
1296 .name = "rtr",
1297 .desc = "Like rt but with random engine selection on equal depth.",
1298 .flags = SEQNO | RT,
1299 .min_gen = 8,
1300 .get_qd = get_qd_depth,
1301 .balance = rtr_balance,
1302 },
1303 {
1304 .id = 4,
1305 .name = "rtavg",
1306 .desc = "Improved version rt tracking average execution speed per engine.",
1307 .flags = SEQNO | RT,
1308 .min_gen = 8,
1309 .get_qd = get_qd_depth,
1310 .balance = rtavg_balance,
1311 },
Tvrtko Ursulinef74c052017-06-05 08:58:19 +01001312 {
1313 .id = 8,
1314 .name = "context",
1315 .desc = "Static round-robin VCS assignment at context creation.",
1316 .balance = context_balance,
1317 },
Chris Wilson9e55cca2017-04-25 15:12:50 +01001318};
1319
Tvrtko Ursulin1c6c53c2017-05-22 10:30:45 +01001320static unsigned int
1321global_get_qd(const struct workload_balancer *balancer,
1322 struct workload *wrk, enum intel_engine_id engine)
1323{
1324 igt_assert(wrk->global_wrk);
1325 igt_assert(wrk->global_balancer);
1326
1327 return wrk->global_balancer->get_qd(wrk->global_balancer,
1328 wrk->global_wrk, engine);
1329}
1330
1331static enum intel_engine_id
1332global_balance(const struct workload_balancer *balancer,
1333 struct workload *wrk, struct w_step *w)
1334{
1335 enum intel_engine_id engine;
1336 int ret;
1337
1338 igt_assert(wrk->global_wrk);
1339 igt_assert(wrk->global_balancer);
1340
1341 wrk = wrk->global_wrk;
1342
1343 ret = pthread_mutex_lock(&wrk->mutex);
1344 igt_assert(ret == 0);
1345
1346 engine = wrk->global_balancer->balance(wrk->global_balancer, wrk, w);
1347
1348 ret = pthread_mutex_unlock(&wrk->mutex);
1349 igt_assert(ret == 0);
1350
1351 return engine;
1352}
1353
1354static const struct workload_balancer global_balancer = {
1355 .id = ~0,
1356 .name = "global",
1357 .desc = "Global balancer",
1358 .get_qd = global_get_qd,
1359 .balance = global_balance,
1360 };
1361
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001362static void
1363update_bb_seqno(struct w_step *w, enum intel_engine_id engine, uint32_t seqno)
1364{
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001365 gem_set_domain(fd, w->bb_handle,
1366 I915_GEM_DOMAIN_WC, I915_GEM_DOMAIN_WC);
1367
Tvrtko Ursulin01959de2017-05-19 15:42:35 +01001368 w->reloc[0].delta = SEQNO_OFFSET(engine);
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001369
1370 *w->seqno_value = seqno;
1371 *w->seqno_address = w->reloc[0].presumed_offset + w->reloc[0].delta;
1372
1373 /* If not using NO_RELOC, force the relocations */
1374 if (!(w->eb.flags & I915_EXEC_NO_RELOC))
1375 w->reloc[0].presumed_offset = -1;
1376}
1377
1378static void
Chris Wilson02b0f8c2017-05-09 21:26:46 +01001379update_bb_rt(struct w_step *w, enum intel_engine_id engine, uint32_t seqno)
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001380{
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001381 gem_set_domain(fd, w->bb_handle,
1382 I915_GEM_DOMAIN_WC, I915_GEM_DOMAIN_WC);
1383
Tvrtko Ursulin01959de2017-05-19 15:42:35 +01001384 w->reloc[1].delta = SEQNO_OFFSET(engine) + sizeof(uint32_t);
1385 w->reloc[2].delta = SEQNO_OFFSET(engine) + 2 * sizeof(uint32_t);
1386 w->reloc[3].delta = SEQNO_OFFSET(engine) + 3 * sizeof(uint32_t);
Chris Wilson02b0f8c2017-05-09 21:26:46 +01001387
1388 *w->latch_value = seqno;
1389 *w->latch_address = w->reloc[3].presumed_offset + w->reloc[3].delta;
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001390
1391 *w->rt0_value = *REG(RCS_TIMESTAMP);
1392 *w->rt0_address = w->reloc[1].presumed_offset + w->reloc[1].delta;
Tvrtko Ursulin18909982017-04-25 15:46:46 +01001393 *w->rt1_address = w->reloc[2].presumed_offset + w->reloc[2].delta;
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001394
1395 /* If not using NO_RELOC, force the relocations */
1396 if (!(w->eb.flags & I915_EXEC_NO_RELOC)) {
1397 w->reloc[1].presumed_offset = -1;
1398 w->reloc[2].presumed_offset = -1;
Tvrtko Ursulinfeaf7792017-05-11 13:00:51 +01001399 w->reloc[3].presumed_offset = -1;
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001400 }
1401}
1402
1403static void w_sync_to(struct workload *wrk, struct w_step *w, int target)
1404{
1405 if (target < 0)
1406 target = wrk->nr_steps + target;
1407
1408 igt_assert(target < wrk->nr_steps);
1409
1410 while (wrk->steps[target].type != BATCH) {
1411 if (--target < 0)
1412 target = wrk->nr_steps + target;
1413 }
1414
1415 igt_assert(target < wrk->nr_steps);
1416 igt_assert(wrk->steps[target].type == BATCH);
1417
1418 gem_sync(fd, wrk->steps[target].obj[0].handle);
1419}
1420
Tvrtko Ursulin01959de2017-05-19 15:42:35 +01001421static uint32_t *get_status_cs(struct workload *wrk)
1422{
1423 return wrk->status_cs;
1424}
1425
Chris Wilsondd7bc6d2017-05-10 11:29:09 +01001426#define INIT_CLOCKS 0x1
1427#define INIT_ALL (INIT_CLOCKS)
1428static void init_status_page(struct workload *wrk, unsigned int flags)
Chris Wilson7d1362a2017-05-09 13:41:01 +01001429{
Chris Wilson02b0f8c2017-05-09 21:26:46 +01001430 struct drm_i915_gem_relocation_entry reloc[4] = {};
Tvrtko Ursulin01959de2017-05-19 15:42:35 +01001431 struct drm_i915_gem_exec_object2 *status_object =
1432 get_status_objects(wrk);
Chris Wilson7d1362a2017-05-09 13:41:01 +01001433 struct drm_i915_gem_execbuffer2 eb = {
Chris Wilson36dec3d2017-05-10 10:57:16 +01001434 .buffer_count = ARRAY_SIZE(wrk->status_object),
Tvrtko Ursulin01959de2017-05-19 15:42:35 +01001435 .buffers_ptr = to_user_pointer(status_object)
Chris Wilson7d1362a2017-05-09 13:41:01 +01001436 };
Tvrtko Ursulin01959de2017-05-19 15:42:35 +01001437 uint32_t *base = get_status_cs(wrk);
Chris Wilson7d1362a2017-05-09 13:41:01 +01001438
1439 /* Want to make sure that the balancer has a reasonable view of
1440 * the background busyness of each engine. To do that we occasionally
1441 * send a dummy batch down the pipeline.
1442 */
1443
Tvrtko Ursulin01959de2017-05-19 15:42:35 +01001444 if (!base)
Chris Wilson7d1362a2017-05-09 13:41:01 +01001445 return;
1446
Tvrtko Ursulin01959de2017-05-19 15:42:35 +01001447 gem_set_domain(fd, status_object[1].handle,
Chris Wilson5a6b9752017-05-09 21:59:58 +01001448 I915_GEM_DOMAIN_WC, I915_GEM_DOMAIN_WC);
Chris Wilson5a6b9752017-05-09 21:59:58 +01001449
Tvrtko Ursulin01959de2017-05-19 15:42:35 +01001450 status_object[1].relocs_ptr = to_user_pointer(reloc);
1451 status_object[1].relocation_count = 2;
Chris Wilsondd7bc6d2017-05-10 11:29:09 +01001452 if (flags & INIT_CLOCKS)
Tvrtko Ursulin01959de2017-05-19 15:42:35 +01001453 status_object[1].relocation_count += 2;
Chris Wilson7d1362a2017-05-09 13:41:01 +01001454
Chris Wilson22f22b42017-05-11 16:29:25 +01001455 for (int engine = 0; engine < NUM_ENGINES; engine++) {
Chris Wilsondd7bc6d2017-05-10 11:29:09 +01001456 struct drm_i915_gem_relocation_entry *r = reloc;
Tvrtko Ursulin01959de2017-05-19 15:42:35 +01001457 uint64_t presumed_offset = status_object[0].offset;
Chris Wilsondd7bc6d2017-05-10 11:29:09 +01001458 uint32_t offset = engine * 128;
1459 uint32_t *cs = base + offset / sizeof(*cs);
Chris Wilson7d1362a2017-05-09 13:41:01 +01001460 uint64_t addr;
1461
Chris Wilsondd7bc6d2017-05-10 11:29:09 +01001462 r->offset = offset + sizeof(uint32_t);
Tvrtko Ursulin01959de2017-05-19 15:42:35 +01001463 r->delta = SEQNO_OFFSET(engine);
Chris Wilsondd7bc6d2017-05-10 11:29:09 +01001464 r->presumed_offset = presumed_offset;
1465 addr = presumed_offset + r->delta;
1466 r++;
Chris Wilson7d1362a2017-05-09 13:41:01 +01001467 *cs++ = MI_STORE_DWORD_IMM;
1468 *cs++ = addr;
1469 *cs++ = addr >> 32;
Tvrtko Ursuline329adc2017-05-19 15:33:23 +01001470 *cs++ = new_seqno(wrk, engine);
Chris Wilsondd7bc6d2017-05-10 11:29:09 +01001471 offset += 4 * sizeof(uint32_t);
Chris Wilson7d1362a2017-05-09 13:41:01 +01001472
Chris Wilsondd7bc6d2017-05-10 11:29:09 +01001473 /* When we are busy, we can just reuse the last set of timings.
1474 * If we have been idle for a while, we want to resample the
1475 * latency on each engine (to measure external load).
1476 */
1477 if (flags & INIT_CLOCKS) {
1478 r->offset = offset + sizeof(uint32_t);
Tvrtko Ursulin01959de2017-05-19 15:42:35 +01001479 r->delta = SEQNO_OFFSET(engine) + sizeof(uint32_t);
Chris Wilsondd7bc6d2017-05-10 11:29:09 +01001480 r->presumed_offset = presumed_offset;
1481 addr = presumed_offset + r->delta;
1482 r++;
1483 *cs++ = MI_STORE_DWORD_IMM;
1484 *cs++ = addr;
1485 *cs++ = addr >> 32;
1486 *cs++ = *REG(RCS_TIMESTAMP);
1487 offset += 4 * sizeof(uint32_t);
Chris Wilson7d1362a2017-05-09 13:41:01 +01001488
Chris Wilsondd7bc6d2017-05-10 11:29:09 +01001489 r->offset = offset + 2 * sizeof(uint32_t);
Tvrtko Ursulin01959de2017-05-19 15:42:35 +01001490 r->delta = SEQNO_OFFSET(engine) + 2*sizeof(uint32_t);
Chris Wilsondd7bc6d2017-05-10 11:29:09 +01001491 r->presumed_offset = presumed_offset;
1492 addr = presumed_offset + r->delta;
1493 r++;
1494 *cs++ = 0x24 << 23 | 2; /* MI_STORE_REG_MEM */
1495 *cs++ = RCS_TIMESTAMP;
1496 *cs++ = addr;
1497 *cs++ = addr >> 32;
1498 offset += 4 * sizeof(uint32_t);
1499 }
Chris Wilson7d1362a2017-05-09 13:41:01 +01001500
Chris Wilsondd7bc6d2017-05-10 11:29:09 +01001501 r->offset = offset + sizeof(uint32_t);
Tvrtko Ursulin01959de2017-05-19 15:42:35 +01001502 r->delta = SEQNO_OFFSET(engine) + 3*sizeof(uint32_t);
Chris Wilsondd7bc6d2017-05-10 11:29:09 +01001503 r->presumed_offset = presumed_offset;
1504 addr = presumed_offset + r->delta;
1505 r++;
Chris Wilson02b0f8c2017-05-09 21:26:46 +01001506 *cs++ = MI_STORE_DWORD_IMM;
1507 *cs++ = addr;
1508 *cs++ = addr >> 32;
Tvrtko Ursuline329adc2017-05-19 15:33:23 +01001509 *cs++ = current_seqno(wrk, engine);
Chris Wilsondd7bc6d2017-05-10 11:29:09 +01001510 offset += 4 * sizeof(uint32_t);
Chris Wilson02b0f8c2017-05-09 21:26:46 +01001511
Chris Wilson7d1362a2017-05-09 13:41:01 +01001512 *cs++ = MI_BATCH_BUFFER_END;
Chris Wilson7d1362a2017-05-09 13:41:01 +01001513
Tvrtko Ursulinc14a2602017-06-07 11:40:43 +01001514 eb_set_engine(&eb, engine, wrk->flags);
Chris Wilson7d1362a2017-05-09 13:41:01 +01001515 eb.flags |= I915_EXEC_HANDLE_LUT;
1516 eb.flags |= I915_EXEC_NO_RELOC;
1517
Chris Wilson5a6b9752017-05-09 21:59:58 +01001518 eb.batch_start_offset = 128 * engine;
1519
Chris Wilson7d1362a2017-05-09 13:41:01 +01001520 gem_execbuf(fd, &eb);
Chris Wilson7d1362a2017-05-09 13:41:01 +01001521 }
1522}
1523
Tvrtko Ursulina47419f2017-05-16 10:27:45 +01001524static void
1525do_eb(struct workload *wrk, struct w_step *w, enum intel_engine_id engine,
1526 unsigned int flags)
1527{
Tvrtko Ursuline329adc2017-05-19 15:33:23 +01001528 uint32_t seqno = new_seqno(wrk, engine);
Tvrtko Ursulina47419f2017-05-16 10:27:45 +01001529 unsigned int i;
1530
1531 eb_update_flags(w, engine, flags);
1532
Tvrtko Ursulina47419f2017-05-16 10:27:45 +01001533 if (flags & SEQNO)
Tvrtko Ursuline329adc2017-05-19 15:33:23 +01001534 update_bb_seqno(w, engine, seqno);
Tvrtko Ursulina47419f2017-05-16 10:27:45 +01001535 if (flags & RT)
Tvrtko Ursuline329adc2017-05-19 15:33:23 +01001536 update_bb_rt(w, engine, seqno);
Tvrtko Ursulina47419f2017-05-16 10:27:45 +01001537
1538 w->eb.batch_start_offset =
1539 ALIGN(w->bb_sz - get_bb_sz(get_duration(w)),
1540 2 * sizeof(uint32_t));
1541
1542 for (i = 0; i < w->fence_deps.nr; i++) {
1543 int tgt = w->idx + w->fence_deps.list[i];
1544
1545 /* TODO: fence merging needed to support multiple inputs */
1546 igt_assert(i == 0);
1547 igt_assert(tgt >= 0 && tgt < w->idx);
1548 igt_assert(wrk->steps[tgt].emit_fence > 0);
1549
Arkadiusz Hiler200d0f52017-06-07 12:11:37 +02001550 w->eb.flags |= LOCAL_I915_EXEC_FENCE_IN;
Tvrtko Ursulina47419f2017-05-16 10:27:45 +01001551 w->eb.rsvd2 = wrk->steps[tgt].emit_fence;
1552 }
1553
Arkadiusz Hiler200d0f52017-06-07 12:11:37 +02001554 if (w->eb.flags & LOCAL_I915_EXEC_FENCE_OUT)
Tvrtko Ursulina47419f2017-05-16 10:27:45 +01001555 gem_execbuf_wr(fd, &w->eb);
1556 else
1557 gem_execbuf(fd, &w->eb);
1558
Arkadiusz Hiler200d0f52017-06-07 12:11:37 +02001559 if (w->eb.flags & LOCAL_I915_EXEC_FENCE_OUT) {
Tvrtko Ursulina47419f2017-05-16 10:27:45 +01001560 w->emit_fence = w->eb.rsvd2 >> 32;
1561 igt_assert(w->emit_fence > 0);
1562 }
1563}
1564
Tvrtko Ursulin6f2e3ba2017-06-05 11:16:59 +01001565static bool sync_deps(struct workload *wrk, struct w_step *w)
1566{
1567 bool synced = false;
1568 unsigned int i;
1569
1570 for (i = 0; i < w->data_deps.nr; i++) {
1571 int dep_idx;
1572
1573 igt_assert(w->data_deps.list[i] <= 0);
1574
1575 if (!w->data_deps.list[i])
1576 continue;
1577
1578 dep_idx = w->idx + w->data_deps.list[i];
1579
1580 igt_assert(dep_idx >= 0 && dep_idx < w->idx);
1581 igt_assert(wrk->steps[dep_idx].type == BATCH);
1582
1583 gem_sync(fd, wrk->steps[dep_idx].obj[0].handle);
1584
1585 synced = true;
1586 }
1587
1588 return synced;
1589}
1590
Tvrtko Ursulin255d1fc2017-05-19 15:13:48 +01001591static void *run_workload(void *data)
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001592{
Tvrtko Ursulin255d1fc2017-05-19 15:13:48 +01001593 struct workload *wrk = (struct workload *)data;
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001594 struct timespec t_start, t_end;
1595 struct w_step *w;
Tvrtko Ursulin6f2e3ba2017-06-05 11:16:59 +01001596 bool last_sync = false;
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001597 int throttle = -1;
1598 int qd_throttle = -1;
Chris Wilsoncb5479a2017-05-17 19:54:49 +01001599 int count;
1600 int i;
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001601
1602 clock_gettime(CLOCK_MONOTONIC, &t_start);
1603
Tvrtko Ursulin255d1fc2017-05-19 15:13:48 +01001604 hars_petruska_f54_1_random_seed((wrk->flags & SYNCEDCLIENTS) ?
1605 0 : wrk->id);
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001606
Chris Wilsondd7bc6d2017-05-10 11:29:09 +01001607 init_status_page(wrk, INIT_ALL);
Tvrtko Ursulin255d1fc2017-05-19 15:13:48 +01001608 for (count = 0; wrk->run && (wrk->background || count < wrk->repeat);
1609 count++) {
Tvrtko Ursulin987abfb2017-05-16 13:49:21 +01001610 unsigned int cur_seqno = wrk->sync_seqno;
1611
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001612 clock_gettime(CLOCK_MONOTONIC, &wrk->repeat_start);
1613
Tvrtko Ursulin255d1fc2017-05-19 15:13:48 +01001614 for (i = 0, w = wrk->steps; wrk->run && (i < wrk->nr_steps);
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001615 i++, w++) {
1616 enum intel_engine_id engine = w->engine;
1617 int do_sleep = 0;
1618
1619 if (w->type == DELAY) {
Tvrtko Ursulin3e622a82017-05-16 11:46:50 +01001620 do_sleep = w->delay;
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001621 } else if (w->type == PERIOD) {
1622 struct timespec now;
1623
1624 clock_gettime(CLOCK_MONOTONIC, &now);
Tvrtko Ursulin3e622a82017-05-16 11:46:50 +01001625 do_sleep = w->period -
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001626 elapsed_us(&wrk->repeat_start, &now);
1627 if (do_sleep < 0) {
Tvrtko Ursulin2e10dc92017-05-16 07:56:20 +01001628 if (verbose > 1)
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001629 printf("%u: Dropped period @ %u/%u (%dus late)!\n",
Tvrtko Ursulin255d1fc2017-05-19 15:13:48 +01001630 wrk->id, count, i, do_sleep);
Tvrtko Ursulin2e10dc92017-05-16 07:56:20 +01001631 continue;
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001632 }
1633 } else if (w->type == SYNC) {
Tvrtko Ursulin3e622a82017-05-16 11:46:50 +01001634 unsigned int s_idx = i + w->target;
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001635
Tvrtko Ursulin987abfb2017-05-16 13:49:21 +01001636 igt_assert(s_idx >= 0 && s_idx < i);
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001637 igt_assert(wrk->steps[s_idx].type == BATCH);
1638 gem_sync(fd, wrk->steps[s_idx].obj[0].handle);
1639 continue;
1640 } else if (w->type == THROTTLE) {
Tvrtko Ursulin3e622a82017-05-16 11:46:50 +01001641 throttle = w->throttle;
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001642 continue;
1643 } else if (w->type == QD_THROTTLE) {
Tvrtko Ursulin3e622a82017-05-16 11:46:50 +01001644 qd_throttle = w->throttle;
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001645 continue;
Tvrtko Ursulin987abfb2017-05-16 13:49:21 +01001646 } else if (w->type == SW_FENCE) {
1647 igt_assert(w->emit_fence < 0);
1648 w->emit_fence =
1649 sw_sync_timeline_create_fence(wrk->sync_timeline,
1650 cur_seqno + w->idx);
1651 igt_assert(w->emit_fence > 0);
1652 continue;
1653 } else if (w->type == SW_FENCE_SIGNAL) {
1654 int tgt = w->idx + w->target;
1655 int inc;
1656
1657 igt_assert(tgt >= 0 && tgt < i);
1658 igt_assert(wrk->steps[tgt].type == SW_FENCE);
1659 cur_seqno += wrk->steps[tgt].idx;
1660 inc = cur_seqno - wrk->sync_seqno;
1661 sw_sync_timeline_inc(wrk->sync_timeline, inc);
1662 continue;
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001663 }
1664
Tvrtko Ursulin2e10dc92017-05-16 07:56:20 +01001665 if (do_sleep || w->type == PERIOD) {
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001666 usleep(do_sleep);
1667 continue;
1668 }
1669
Tvrtko Ursulin2e10dc92017-05-16 07:56:20 +01001670 igt_assert(w->type == BATCH);
1671
Tvrtko Ursulin6f2e3ba2017-06-05 11:16:59 +01001672 if ((wrk->flags & DEPSYNC) && engine == VCS)
1673 last_sync = sync_deps(wrk, w);
1674
1675 if (last_sync && (wrk->flags & HEARTBEAT))
1676 init_status_page(wrk, 0);
1677
1678 last_sync = false;
1679
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001680 wrk->nr_bb[engine]++;
Tvrtko Ursulin255d1fc2017-05-19 15:13:48 +01001681 if (engine == VCS && wrk->balancer) {
1682 engine = wrk->balancer->balance(wrk->balancer,
1683 wrk, w);
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001684 wrk->nr_bb[engine]++;
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001685 }
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001686
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001687 if (throttle > 0)
1688 w_sync_to(wrk, w, i - throttle);
1689
Tvrtko Ursulin255d1fc2017-05-19 15:13:48 +01001690 do_eb(wrk, w, engine, wrk->flags);
Tvrtko Ursulina47419f2017-05-16 10:27:45 +01001691
Chris Wilson12e2def2017-05-09 22:50:19 +01001692 if (w->request != -1) {
1693 igt_list_del(&w->rq_link);
1694 wrk->nrequest[w->request]--;
1695 }
1696 w->request = engine;
Chris Wilson5be05632017-05-09 10:53:39 +01001697 igt_list_add_tail(&w->rq_link, &wrk->requests[engine]);
1698 wrk->nrequest[engine]++;
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001699
Tvrtko Ursulin255d1fc2017-05-19 15:13:48 +01001700 if (!wrk->run)
1701 break;
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001702
Tvrtko Ursulin3e622a82017-05-16 11:46:50 +01001703 if (w->sync) {
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001704 gem_sync(fd, w->obj[0].handle);
Tvrtko Ursulin6f2e3ba2017-06-05 11:16:59 +01001705 last_sync = true;
Chris Wilson75b2b1a2017-05-10 11:03:57 +01001706 }
Chris Wilson5be05632017-05-09 10:53:39 +01001707
Chris Wilson3b723462017-05-09 16:35:22 +01001708 if (qd_throttle > 0) {
Chris Wilson5be05632017-05-09 10:53:39 +01001709 while (wrk->nrequest[engine] > qd_throttle) {
1710 struct w_step *s;
1711
1712 s = igt_list_first_entry(&wrk->requests[engine],
1713 s, rq_link);
1714
1715 gem_sync(fd, s->obj[0].handle);
Tvrtko Ursulin6f2e3ba2017-06-05 11:16:59 +01001716 last_sync = true;
Chris Wilson5be05632017-05-09 10:53:39 +01001717
Chris Wilson12e2def2017-05-09 22:50:19 +01001718 s->request = -1;
Chris Wilson5be05632017-05-09 10:53:39 +01001719 igt_list_del(&s->rq_link);
1720 wrk->nrequest[engine]--;
1721 }
1722 }
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001723 }
Tvrtko Ursulina47419f2017-05-16 10:27:45 +01001724
Tvrtko Ursulin987abfb2017-05-16 13:49:21 +01001725 if (wrk->sync_timeline) {
1726 int inc;
1727
1728 inc = wrk->nr_steps - (cur_seqno - wrk->sync_seqno);
1729 sw_sync_timeline_inc(wrk->sync_timeline, inc);
1730 wrk->sync_seqno += wrk->nr_steps;
1731 }
1732
Tvrtko Ursulina47419f2017-05-16 10:27:45 +01001733 /* Cleanup all fences instantiated in this iteration. */
Tvrtko Ursulin255d1fc2017-05-19 15:13:48 +01001734 for (i = 0, w = wrk->steps; wrk->run && (i < wrk->nr_steps);
Tvrtko Ursulina47419f2017-05-16 10:27:45 +01001735 i++, w++) {
1736 if (w->emit_fence > 0) {
1737 close(w->emit_fence);
1738 w->emit_fence = -1;
1739 }
1740 }
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001741 }
1742
Chris Wilson5be05632017-05-09 10:53:39 +01001743 for (i = 0; i < NUM_ENGINES; i++) {
1744 if (!wrk->nrequest[i])
1745 continue;
1746
1747 w = igt_list_last_entry(&wrk->requests[i], w, rq_link);
1748 gem_sync(fd, w->obj[0].handle);
1749 }
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001750
1751 clock_gettime(CLOCK_MONOTONIC, &t_end);
1752
Tvrtko Ursulin255d1fc2017-05-19 15:13:48 +01001753 if (wrk->print_stats) {
Chris Wilsond099f7d2017-05-09 14:22:21 +01001754 double t = elapsed(&t_start, &t_end);
1755
Chris Wilsoncb5479a2017-05-17 19:54:49 +01001756 printf("%c%u: %.3fs elapsed (%d cycles, %.3f workloads/s).",
Tvrtko Ursulin255d1fc2017-05-19 15:13:48 +01001757 wrk->background ? ' ' : '*', wrk->id,
1758 t, count, count / t);
1759 if (wrk->balancer)
Tvrtko Ursulin251b1302017-05-10 11:11:10 +01001760 printf(" %lu (%lu + %lu) total VCS batches.",
Chris Wilsond099f7d2017-05-09 14:22:21 +01001761 wrk->nr_bb[VCS], wrk->nr_bb[VCS1], wrk->nr_bb[VCS2]);
Tvrtko Ursulin255d1fc2017-05-19 15:13:48 +01001762 if (wrk->balancer && wrk->balancer->get_qd)
Tvrtko Ursulin251b1302017-05-10 11:11:10 +01001763 printf(" Average queue depths %.3f, %.3f.",
Chris Wilsond099f7d2017-05-09 14:22:21 +01001764 (double)wrk->qd_sum[VCS1] / wrk->nr_bb[VCS],
1765 (double)wrk->qd_sum[VCS2] / wrk->nr_bb[VCS]);
Tvrtko Ursulin251b1302017-05-10 11:11:10 +01001766 putchar('\n');
Chris Wilsond099f7d2017-05-09 14:22:21 +01001767 }
Tvrtko Ursulin255d1fc2017-05-19 15:13:48 +01001768
1769 return NULL;
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001770}
1771
1772static void fini_workload(struct workload *wrk)
1773{
1774 free(wrk->steps);
1775 free(wrk);
1776}
1777
1778static unsigned long calibrate_nop(unsigned int tolerance_pct)
1779{
1780 const uint32_t bbe = 0xa << 23;
1781 unsigned int loops = 17;
1782 unsigned int usecs = nop_calibration_us;
1783 struct drm_i915_gem_exec_object2 obj = {};
1784 struct drm_i915_gem_execbuffer2 eb =
1785 { .buffer_count = 1, .buffers_ptr = (uintptr_t)&obj};
1786 long size, last_size;
1787 struct timespec t_0, t_end;
1788
1789 clock_gettime(CLOCK_MONOTONIC, &t_0);
1790
1791 size = 256 * 1024;
1792 do {
1793 struct timespec t_start;
1794
1795 obj.handle = gem_create(fd, size);
1796 gem_write(fd, obj.handle, size - sizeof(bbe), &bbe,
1797 sizeof(bbe));
1798 gem_execbuf(fd, &eb);
1799 gem_sync(fd, obj.handle);
1800
1801 clock_gettime(CLOCK_MONOTONIC, &t_start);
1802 for (int loop = 0; loop < loops; loop++)
1803 gem_execbuf(fd, &eb);
1804 gem_sync(fd, obj.handle);
1805 clock_gettime(CLOCK_MONOTONIC, &t_end);
1806
1807 gem_close(fd, obj.handle);
1808
1809 last_size = size;
1810 size = loops * size / elapsed(&t_start, &t_end) / 1e6 * usecs;
1811 size = ALIGN(size, sizeof(uint32_t));
1812 } while (elapsed(&t_0, &t_end) < 5 ||
1813 abs(size - last_size) > (size * tolerance_pct / 100));
1814
1815 return size / sizeof(uint32_t);
1816}
1817
1818static void print_help(void)
1819{
Tvrtko Ursulinf39a7c52017-05-10 12:06:05 +01001820 unsigned int i;
1821
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001822 puts(
1823"Usage: gem_wsim [OPTIONS]\n"
1824"\n"
1825"Runs a simulated workload on the GPU.\n"
Tvrtko Ursulin251b1302017-05-10 11:11:10 +01001826"When ran without arguments performs a GPU calibration result of which needs to\n"
1827"be provided when running the simulation in subsequent invocations.\n"
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001828"\n"
1829"Options:\n"
Tvrtko Ursulin251b1302017-05-10 11:11:10 +01001830" -h This text.\n"
1831" -q Be quiet - do not output anything to stdout.\n"
1832" -n <n> Nop calibration value.\n"
1833" -t <n> Nop calibration tolerance percentage.\n"
1834" Use when there is a difficulty obtaining calibration with the\n"
1835" default settings.\n"
Tvrtko Ursulina71597a2017-05-19 13:05:44 +01001836" -p <n> Context priority to use for the following workload on the\n"
1837" command line.\n"
Tvrtko Ursulin251b1302017-05-10 11:11:10 +01001838" -w <desc|path> Filename or a workload descriptor.\n"
1839" Can be given multiple times.\n"
1840" -W <desc|path> Filename or a master workload descriptor.\n"
1841" Only one master workload can be optinally specified in which\n"
1842" case all other workloads become background ones and run as\n"
1843" long as the master.\n"
Tvrtko Ursulin2e10dc92017-05-16 07:56:20 +01001844" -a <desc|path> Append a workload to all other workloads.\n"
Tvrtko Ursulin251b1302017-05-10 11:11:10 +01001845" -r <n> How many times to emit the workload.\n"
1846" -c <n> Fork N clients emitting the workload simultaneously.\n"
1847" -x Swap VCS1 and VCS2 engines in every other client.\n"
1848" -b <n> Load balancing to use.\n"
Tvrtko Ursulinf39a7c52017-05-10 12:06:05 +01001849" Available load balancers are:"
1850 );
1851
1852 for (i = 0; i < ARRAY_SIZE(all_balancers); i++) {
1853 igt_assert(all_balancers[i].desc);
1854 printf(
1855" %s (%u): %s\n",
1856 all_balancers[i].name, all_balancers[i].id,
1857 all_balancers[i].desc);
1858 }
1859 puts(
Tvrtko Ursulin251b1302017-05-10 11:11:10 +01001860" Balancers can be specified either as names or as their id\n"
1861" number as listed above.\n"
1862" -2 Remap VCS2 to BCS.\n"
1863" -R Round-robin initial VCS assignment per client.\n"
Tvrtko Ursulindd5a3782017-06-05 08:00:52 +01001864" -H Send heartbeat on synchronisation points with seqno based\n"
1865" balancers. Gives better engine busyness view in some cases.\n"
Tvrtko Ursulin251b1302017-05-10 11:11:10 +01001866" -S Synchronize the sequence of random batch durations between\n"
Tvrtko Ursulin1c6c53c2017-05-22 10:30:45 +01001867" clients.\n"
1868" -G Global load balancing - a single load balancer will be shared\n"
Tvrtko Ursulin6f2e3ba2017-06-05 11:16:59 +01001869" between all clients and there will be a single seqno domain.\n"
1870" -d Sync between data dependencies in userspace."
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001871 );
1872}
1873
1874static char *load_workload_descriptor(char *filename)
1875{
1876 struct stat sbuf;
1877 char *buf;
1878 int infd, ret, i;
1879 ssize_t len;
1880
1881 ret = stat(filename, &sbuf);
1882 if (ret || !S_ISREG(sbuf.st_mode))
1883 return filename;
1884
1885 igt_assert(sbuf.st_size < 1024 * 1024); /* Just so. */
1886 buf = malloc(sbuf.st_size);
1887 igt_assert(buf);
1888
1889 infd = open(filename, O_RDONLY);
1890 igt_assert(infd >= 0);
1891 len = read(infd, buf, sbuf.st_size);
1892 igt_assert(len == sbuf.st_size);
1893 close(infd);
1894
1895 for (i = 0; i < len; i++) {
1896 if (buf[i] == '\n')
1897 buf[i] = ',';
1898 }
1899
1900 len--;
1901 while (buf[len] == ',')
1902 buf[len--] = 0;
1903
1904 return buf;
1905}
1906
Chris Wilson89107382017-05-17 19:55:25 +01001907static struct w_arg *
1908add_workload_arg(struct w_arg *w_args, unsigned int nr_args, char *w_arg, int prio)
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001909{
Chris Wilson89107382017-05-17 19:55:25 +01001910 w_args = realloc(w_args, sizeof(*w_args) * nr_args);
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001911 igt_assert(w_args);
Chris Wilson89107382017-05-17 19:55:25 +01001912 w_args[nr_args - 1] = (struct w_arg) { w_arg, NULL, prio };
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001913
1914 return w_args;
1915}
1916
Tvrtko Ursulinf39a7c52017-05-10 12:06:05 +01001917static int find_balancer_by_name(char *name)
Tvrtko Ursulin0e0eca32017-05-09 10:01:22 +01001918{
Tvrtko Ursulin0e0eca32017-05-09 10:01:22 +01001919 unsigned int i;
1920
Tvrtko Ursulinf39a7c52017-05-10 12:06:05 +01001921 for (i = 0; i < ARRAY_SIZE(all_balancers); i++) {
1922 if (!strcasecmp(name, all_balancers[i].name))
1923 return all_balancers[i].id;
Tvrtko Ursulin0e0eca32017-05-09 10:01:22 +01001924 }
1925
Tvrtko Ursulinf39a7c52017-05-10 12:06:05 +01001926 return -1;
1927}
1928
1929static const struct workload_balancer *find_balancer_by_id(unsigned int id)
1930{
1931 unsigned int i;
1932
1933 for (i = 0; i < ARRAY_SIZE(all_balancers); i++) {
1934 if (id == all_balancers[i].id)
1935 return &all_balancers[i];
1936 }
1937
1938 return NULL;
Tvrtko Ursulin0e0eca32017-05-09 10:01:22 +01001939}
1940
Chris Wilson474bcdd2017-05-09 15:13:09 +01001941static void init_clocks(void)
1942{
1943 struct timespec t_start, t_end;
1944 uint32_t rcs_start, rcs_end;
1945 double overhead, t;
1946
1947 intel_register_access_init(intel_get_pci_device(), false, fd);
1948
Chris Wilsond099f7d2017-05-09 14:22:21 +01001949 if (verbose <= 1)
Chris Wilson474bcdd2017-05-09 15:13:09 +01001950 return;
1951
1952 clock_gettime(CLOCK_MONOTONIC, &t_start);
1953 for (int i = 0; i < 100; i++)
1954 rcs_start = *REG(RCS_TIMESTAMP);
1955 clock_gettime(CLOCK_MONOTONIC, &t_end);
1956 overhead = 2 * elapsed(&t_start, &t_end) / 100;
1957
1958 clock_gettime(CLOCK_MONOTONIC, &t_start);
1959 for (int i = 0; i < 100; i++)
1960 clock_gettime(CLOCK_MONOTONIC, &t_end);
1961 clock_gettime(CLOCK_MONOTONIC, &t_end);
1962 overhead += elapsed(&t_start, &t_end) / 100;
1963
1964 clock_gettime(CLOCK_MONOTONIC, &t_start);
1965 rcs_start = *REG(RCS_TIMESTAMP);
1966 usleep(100);
1967 rcs_end = *REG(RCS_TIMESTAMP);
1968 clock_gettime(CLOCK_MONOTONIC, &t_end);
1969
1970 t = elapsed(&t_start, &t_end) - overhead;
1971 printf("%d cycles in %.1fus, i.e. 1024 cycles takes %1.fus\n",
1972 rcs_end - rcs_start, 1e6*t, 1024e6 * t / (rcs_end - rcs_start));
1973}
1974
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001975int main(int argc, char **argv)
1976{
1977 unsigned int repeat = 1;
1978 unsigned int clients = 1;
1979 unsigned int flags = 0;
1980 struct timespec t_start, t_end;
1981 struct workload **w, **wrk = NULL;
Tvrtko Ursulin2e10dc92017-05-16 07:56:20 +01001982 struct workload *app_w = NULL;
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001983 unsigned int nr_w_args = 0;
1984 int master_workload = -1;
Tvrtko Ursulin2e10dc92017-05-16 07:56:20 +01001985 char *append_workload_arg = NULL;
Chris Wilson89107382017-05-17 19:55:25 +01001986 struct w_arg *w_args = NULL;
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001987 unsigned int tolerance_pct = 1;
1988 const struct workload_balancer *balancer = NULL;
Tvrtko Ursulin0e0eca32017-05-09 10:01:22 +01001989 char *endptr = NULL;
Chris Wilson89107382017-05-17 19:55:25 +01001990 int prio = 0;
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001991 double t;
1992 int i, c;
1993
Tvrtko Ursulin31530822017-05-11 17:10:15 +01001994 /*
1995 * Open the device via the low-level API so we can do the GPU quiesce
1996 * manually as close as possible in time to the start of the workload.
1997 * This minimizes the gap in engine utilization tracking when observed
1998 * via external tools like trace.pl.
1999 */
2000 fd = __drm_open_driver(DRIVER_INTEL);
2001 igt_require(fd);
2002
Chris Wilson474bcdd2017-05-09 15:13:09 +01002003 init_clocks();
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01002004
Tvrtko Ursulin6f2e3ba2017-06-05 11:16:59 +01002005 while ((c = getopt(argc, argv, "hqv2RSHxGdc:n:r:w:W:a:t:b:p:")) != -1) {
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01002006 switch (c) {
2007 case 'W':
2008 if (master_workload >= 0) {
Chris Wilsond099f7d2017-05-09 14:22:21 +01002009 if (verbose)
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01002010 fprintf(stderr,
2011 "Only one master workload can be given!\n");
2012 return 1;
2013 }
2014 master_workload = nr_w_args;
2015 /* Fall through */
2016 case 'w':
Chris Wilson89107382017-05-17 19:55:25 +01002017 w_args = add_workload_arg(w_args, ++nr_w_args, optarg, prio);
2018 break;
2019 case 'p':
2020 prio = atoi(optarg);
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01002021 break;
Tvrtko Ursulin2e10dc92017-05-16 07:56:20 +01002022 case 'a':
2023 if (append_workload_arg) {
2024 if (verbose)
2025 fprintf(stderr,
2026 "Only one append workload can be given!\n");
2027 return 1;
2028 }
2029 append_workload_arg = optarg;
2030 break;
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01002031 case 'c':
2032 clients = strtol(optarg, NULL, 0);
2033 break;
2034 case 't':
2035 tolerance_pct = strtol(optarg, NULL, 0);
2036 break;
2037 case 'n':
2038 nop_calibration = strtol(optarg, NULL, 0);
2039 break;
2040 case 'r':
2041 repeat = strtol(optarg, NULL, 0);
2042 break;
2043 case 'q':
Chris Wilsond099f7d2017-05-09 14:22:21 +01002044 verbose = 0;
2045 break;
2046 case 'v':
2047 verbose++;
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01002048 break;
2049 case 'x':
2050 flags |= SWAPVCS;
2051 break;
Tvrtko Ursulinb56e1712017-05-08 13:56:32 +01002052 case '2':
2053 flags |= VCS2REMAP;
2054 break;
Tvrtko Ursulin7736d7e2017-05-09 09:21:03 +01002055 case 'R':
2056 flags |= INITVCSRR;
2057 break;
Tvrtko Ursulin8540b912017-05-09 09:39:17 +01002058 case 'S':
2059 flags |= SYNCEDCLIENTS;
2060 break;
Chris Wilson70d38142017-05-10 11:38:39 +01002061 case 'H':
2062 flags |= HEARTBEAT;
2063 break;
Tvrtko Ursulin1c6c53c2017-05-22 10:30:45 +01002064 case 'G':
2065 flags |= GLOBAL_BALANCE;
2066 break;
Tvrtko Ursulin6f2e3ba2017-06-05 11:16:59 +01002067 case 'd':
2068 flags |= DEPSYNC;
2069 break;
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01002070 case 'b':
Tvrtko Ursulinf39a7c52017-05-10 12:06:05 +01002071 i = find_balancer_by_name(optarg);
Tvrtko Ursulin0e0eca32017-05-09 10:01:22 +01002072 if (i < 0) {
2073 i = strtol(optarg, &endptr, 0);
2074 if (endptr && *endptr)
2075 i = -1;
2076 }
Tvrtko Ursulinf39a7c52017-05-10 12:06:05 +01002077
2078 if (i >= 0) {
2079 balancer = find_balancer_by_id(i);
2080 if (balancer) {
2081 igt_assert(intel_gen(intel_get_drm_devid(fd)) >= balancer->min_gen);
2082 flags |= BALANCE | balancer->flags;
2083 }
2084 }
2085
2086 if (!balancer) {
Chris Wilsond099f7d2017-05-09 14:22:21 +01002087 if (verbose)
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01002088 fprintf(stderr,
2089 "Unknown balancing mode '%s'!\n",
2090 optarg);
2091 return 1;
2092 }
2093 break;
2094 case 'h':
2095 print_help();
2096 return 0;
2097 default:
2098 return 1;
2099 }
2100 }
2101
Tvrtko Ursulindd5a3782017-06-05 08:00:52 +01002102 if ((flags & HEARTBEAT) && !(flags & SEQNO)) {
2103 if (verbose)
2104 fprintf(stderr, "Heartbeat needs a seqno based balancer!\n");
2105 return 1;
2106 }
2107
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01002108 if (!nop_calibration) {
Chris Wilsond099f7d2017-05-09 14:22:21 +01002109 if (verbose > 1)
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01002110 printf("Calibrating nop delay with %u%% tolerance...\n",
2111 tolerance_pct);
2112 nop_calibration = calibrate_nop(tolerance_pct);
Chris Wilsond099f7d2017-05-09 14:22:21 +01002113 if (verbose)
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01002114 printf("Nop calibration for %uus delay is %lu.\n",
2115 nop_calibration_us, nop_calibration);
2116
2117 return 0;
2118 }
2119
2120 if (!nr_w_args) {
Chris Wilsond099f7d2017-05-09 14:22:21 +01002121 if (verbose)
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01002122 fprintf(stderr, "No workload descriptor(s)!\n");
2123 return 1;
2124 }
2125
2126 if (nr_w_args > 1 && clients > 1) {
Chris Wilsond099f7d2017-05-09 14:22:21 +01002127 if (verbose)
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01002128 fprintf(stderr,
2129 "Cloned clients cannot be combined with multiple workloads!\n");
2130 return 1;
2131 }
2132
Tvrtko Ursulin1c6c53c2017-05-22 10:30:45 +01002133 if ((flags & GLOBAL_BALANCE) && !balancer) {
2134 if (verbose)
2135 fprintf(stderr,
2136 "Balancer not specified in global balancing mode!\n");
2137 return 1;
2138 }
2139
Tvrtko Ursulin2e10dc92017-05-16 07:56:20 +01002140 if (append_workload_arg) {
2141 append_workload_arg = load_workload_descriptor(append_workload_arg);
2142 if (!append_workload_arg) {
2143 if (verbose)
2144 fprintf(stderr,
2145 "Failed to load append workload descriptor!\n");
2146 return 1;
2147 }
2148 }
2149
2150 if (append_workload_arg) {
Chris Wilson89107382017-05-17 19:55:25 +01002151 struct w_arg arg = { NULL, append_workload_arg, 0 };
2152 app_w = parse_workload(&arg, flags, NULL);
Tvrtko Ursulin2e10dc92017-05-16 07:56:20 +01002153 if (!app_w) {
2154 if (verbose)
2155 fprintf(stderr,
Chris Wilson89107382017-05-17 19:55:25 +01002156 "Failed to parse append workload!\n");
Tvrtko Ursulin2e10dc92017-05-16 07:56:20 +01002157 return 1;
2158 }
2159 }
2160
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01002161 wrk = calloc(nr_w_args, sizeof(*wrk));
2162 igt_assert(wrk);
2163
2164 for (i = 0; i < nr_w_args; i++) {
Chris Wilson89107382017-05-17 19:55:25 +01002165 w_args[i].desc = load_workload_descriptor(w_args[i].filename);
Tvrtko Ursulin251b1302017-05-10 11:11:10 +01002166
Chris Wilson89107382017-05-17 19:55:25 +01002167 if (!w_args[i].desc) {
Chris Wilsond099f7d2017-05-09 14:22:21 +01002168 if (verbose)
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01002169 fprintf(stderr,
2170 "Failed to load workload descriptor %u!\n",
2171 i);
2172 return 1;
2173 }
2174
Chris Wilson89107382017-05-17 19:55:25 +01002175 wrk[i] = parse_workload(&w_args[i], flags, app_w);
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01002176 if (!wrk[i]) {
Chris Wilsond099f7d2017-05-09 14:22:21 +01002177 if (verbose)
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01002178 fprintf(stderr,
2179 "Failed to parse workload %u!\n", i);
2180 return 1;
2181 }
2182 }
2183
Tvrtko Ursulin251b1302017-05-10 11:11:10 +01002184 if (nr_w_args > 1)
2185 clients = nr_w_args;
2186
Chris Wilsond099f7d2017-05-09 14:22:21 +01002187 if (verbose > 1) {
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01002188 printf("Using %lu nop calibration for %uus delay.\n",
2189 nop_calibration, nop_calibration_us);
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01002190 printf("%u client%s.\n", clients, clients > 1 ? "s" : "");
2191 if (flags & SWAPVCS)
2192 printf("Swapping VCS rings between clients.\n");
Tvrtko Ursulin1c6c53c2017-05-22 10:30:45 +01002193 if (flags & GLOBAL_BALANCE)
2194 printf("Using %s balancer in global mode.\n",
2195 balancer->name);
2196 else if (balancer)
Tvrtko Ursulinf39a7c52017-05-10 12:06:05 +01002197 printf("Using %s balancer.\n", balancer->name);
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01002198 }
2199
2200 if (master_workload >= 0 && clients == 1)
2201 master_workload = -1;
2202
2203 w = calloc(clients, sizeof(struct workload *));
2204 igt_assert(w);
2205
2206 for (i = 0; i < clients; i++) {
2207 unsigned int flags_ = flags;
2208
2209 w[i] = clone_workload(wrk[nr_w_args > 1 ? i : 0]);
2210
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01002211 if (flags & SWAPVCS && i & 1)
2212 flags_ &= ~SWAPVCS;
2213
Tvrtko Ursulin1c6c53c2017-05-22 10:30:45 +01002214 if (flags & GLOBAL_BALANCE) {
2215 w[i]->balancer = &global_balancer;
2216 w[i]->global_wrk = w[0];
2217 w[i]->global_balancer = balancer;
2218 } else {
2219 w[i]->balancer = balancer;
2220 }
Tvrtko Ursulin255d1fc2017-05-19 15:13:48 +01002221
2222 w[i]->flags = flags;
2223 w[i]->repeat = repeat;
Tvrtko Ursulin255d1fc2017-05-19 15:13:48 +01002224 w[i]->background = master_workload >= 0 && i != master_workload;
2225 w[i]->print_stats = verbose > 1 ||
2226 (verbose > 0 && master_workload == i);
Tvrtko Ursulin1c6c53c2017-05-22 10:30:45 +01002227
2228 prepare_workload(i, w[i], flags_);
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01002229 }
2230
Tvrtko Ursulin31530822017-05-11 17:10:15 +01002231 gem_quiescent_gpu(fd);
2232
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01002233 clock_gettime(CLOCK_MONOTONIC, &t_start);
2234
Tvrtko Ursulin255d1fc2017-05-19 15:13:48 +01002235 for (i = 0; i < clients; i++) {
2236 int ret;
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01002237
Tvrtko Ursulin255d1fc2017-05-19 15:13:48 +01002238 ret = pthread_create(&w[i]->thread, NULL, run_workload, w[i]);
2239 igt_assert_eq(ret, 0);
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01002240 }
2241
2242 if (master_workload >= 0) {
Tvrtko Ursulin255d1fc2017-05-19 15:13:48 +01002243 int ret = pthread_join(w[master_workload]->thread, NULL);
2244
2245 igt_assert(ret == 0);
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01002246
2247 for (i = 0; i < clients; i++)
Tvrtko Ursulin255d1fc2017-05-19 15:13:48 +01002248 w[i]->run = false;
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01002249 }
2250
Tvrtko Ursulin255d1fc2017-05-19 15:13:48 +01002251 for (i = 0; i < clients; i++) {
2252 if (master_workload != i) {
2253 int ret = pthread_join(w[i]->thread, NULL);
2254 igt_assert(ret == 0);
2255 }
2256 }
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01002257
2258 clock_gettime(CLOCK_MONOTONIC, &t_end);
2259
2260 t = elapsed(&t_start, &t_end);
Chris Wilsond099f7d2017-05-09 14:22:21 +01002261 if (verbose)
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01002262 printf("%.3fs elapsed (%.3f workloads/s)\n",
2263 t, clients * repeat / t);
2264
2265 for (i = 0; i < clients; i++)
2266 fini_workload(w[i]);
2267 free(w);
2268 for (i = 0; i < nr_w_args; i++)
2269 fini_workload(wrk[i]);
2270 free(w_args);
2271
2272 return 0;
2273}