blob: 462f440b58726fcb989318def34729683503fff4 [file] [log] [blame]
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001/*
2 * Copyright © 2017 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 */
24
25#include <unistd.h>
26#include <stdlib.h>
27#include <stdint.h>
28#include <stdio.h>
29#include <string.h>
30#include <fcntl.h>
31#include <inttypes.h>
32#include <errno.h>
33#include <poll.h>
34#include <sys/stat.h>
35#include <sys/types.h>
36#include <sys/ioctl.h>
37#include <sys/time.h>
38#include <sys/wait.h>
39#include <time.h>
40#include <assert.h>
41#include <limits.h>
42
43
44#include "intel_chipset.h"
45#include "drm.h"
46#include "ioctl_wrappers.h"
47#include "drmtest.h"
Chris Wilson5be05632017-05-09 10:53:39 +010048
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +010049#include "intel_io.h"
Chris Wilson5be05632017-05-09 10:53:39 +010050#include "igt_aux.h"
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +010051#include "igt_rand.h"
52
Chris Wilson9e55cca2017-04-25 15:12:50 +010053#include "ewma.h"
54
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +010055enum intel_engine_id {
56 RCS,
57 BCS,
58 VCS,
59 VCS1,
60 VCS2,
61 VECS,
62 NUM_ENGINES
63};
64
65struct duration {
66 unsigned int min, max;
67};
68
69enum w_type
70{
71 BATCH,
72 SYNC,
73 DELAY,
74 PERIOD,
75 THROTTLE,
76 QD_THROTTLE
77};
78
79struct w_step
80{
81 /* Workload step metadata */
82 enum w_type type;
83 unsigned int context;
84 unsigned int engine;
85 struct duration duration;
Tvrtko Ursulin07a8aa02017-05-08 13:31:50 +010086 int nr_deps;
87 int *dep;
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +010088 int wait;
89
90 /* Implementation details */
91 unsigned int idx;
Chris Wilson5be05632017-05-09 10:53:39 +010092 struct igt_list rq_link;
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +010093
94 struct drm_i915_gem_execbuffer2 eb;
Tvrtko Ursulin07a8aa02017-05-08 13:31:50 +010095 struct drm_i915_gem_exec_object2 *obj;
Chris Wilson02b0f8c2017-05-09 21:26:46 +010096 struct drm_i915_gem_relocation_entry reloc[4];
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +010097 unsigned long bb_sz;
98 uint32_t bb_handle;
99 uint32_t *mapped_batch;
100 uint32_t *seqno_value;
101 uint32_t *seqno_address;
102 uint32_t *rt0_value;
103 uint32_t *rt0_address;
104 uint32_t *rt1_address;
Chris Wilson02b0f8c2017-05-09 21:26:46 +0100105 uint32_t *latch_value;
106 uint32_t *latch_address;
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100107 unsigned int mapped_len;
108};
109
Chris Wilson9e55cca2017-04-25 15:12:50 +0100110DECLARE_EWMA(uint64_t, rt, 4, 2)
111
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100112struct workload
113{
114 unsigned int nr_steps;
115 struct w_step *steps;
116
Chris Wilson62a1f542017-05-09 12:42:41 +0100117 uint32_t prng;
118
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100119 struct timespec repeat_start;
120
121 int pipe[2];
122
123 unsigned int nr_ctxs;
124 uint32_t *ctx_id;
125
126 uint32_t seqno[NUM_ENGINES];
127 uint32_t status_page_handle;
128 uint32_t *status_page;
129 unsigned int vcs_rr;
130
131 unsigned long qd_sum[NUM_ENGINES];
132 unsigned long nr_bb[NUM_ENGINES];
Chris Wilson5be05632017-05-09 10:53:39 +0100133
134 struct igt_list requests[NUM_ENGINES];
135 unsigned int nrequest[NUM_ENGINES];
Chris Wilson9e55cca2017-04-25 15:12:50 +0100136
137 union {
138 struct rtavg {
139 struct ewma_rt avg[NUM_ENGINES];
140 uint32_t last[NUM_ENGINES];
141 } rt;
142 };
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100143};
144
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100145static const unsigned int nop_calibration_us = 1000;
146static unsigned long nop_calibration;
147
Chris Wilsond099f7d2017-05-09 14:22:21 +0100148static int verbose = 1;
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100149static int fd;
150
Tvrtko Ursulinb56e1712017-05-08 13:56:32 +0100151#define SWAPVCS (1<<0)
152#define SEQNO (1<<1)
153#define BALANCE (1<<2)
154#define RT (1<<3)
155#define VCS2REMAP (1<<4)
Tvrtko Ursulin7736d7e2017-05-09 09:21:03 +0100156#define INITVCSRR (1<<5)
Tvrtko Ursulin8540b912017-05-09 09:39:17 +0100157#define SYNCEDCLIENTS (1<<6)
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100158
159#define VCS_SEQNO_IDX(engine) (((engine) - VCS1) * 16)
160#define VCS_SEQNO_OFFSET(engine) (VCS_SEQNO_IDX(engine) * sizeof(uint32_t))
161
162#define RCS_TIMESTAMP (0x2000 + 0x358)
163#define REG(x) (volatile uint32_t *)((volatile char *)igt_global_mmio + x)
164
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100165static const char *ring_str_map[NUM_ENGINES] = {
166 [RCS] = "RCS",
167 [BCS] = "BCS",
168 [VCS] = "VCS",
169 [VCS1] = "VCS1",
170 [VCS2] = "VCS2",
171 [VECS] = "VECS",
172};
173
Tvrtko Ursulinb9576422017-05-09 10:33:00 +0100174static int
175parse_dependencies(unsigned int nr_steps, struct w_step *w, char *_desc)
Tvrtko Ursulin07a8aa02017-05-08 13:31:50 +0100176{
177 char *desc = strdup(_desc);
178 char *token, *tctx = NULL, *tstart = desc;
179 int dep;
180
181 igt_assert(desc);
182
183 w->nr_deps = 0;
184 w->dep = NULL;
185
186 while ((token = strtok_r(tstart, "/", &tctx)) != NULL) {
187 tstart = NULL;
188
189 dep = atoi(token);
Tvrtko Ursulinb9576422017-05-09 10:33:00 +0100190 if (dep > 0 || ((int)nr_steps + dep) < 0) {
Tvrtko Ursulin07a8aa02017-05-08 13:31:50 +0100191 if (w->dep)
192 free(w-dep);
193 return -1;
194 }
195
196 if (dep < 0) {
197 w->nr_deps++;
198 w->dep = realloc(w->dep, sizeof(*w->dep) * w->nr_deps);
199 igt_assert(w->dep);
200 w->dep[w->nr_deps - 1] = dep;
201 }
202 }
203
204 free(desc);
205
206 return 0;
207}
208
Tvrtko Ursulinb56e1712017-05-08 13:56:32 +0100209static struct workload *parse_workload(char *_desc, unsigned int flags)
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100210{
211 struct workload *wrk;
212 unsigned int nr_steps = 0;
213 char *desc = strdup(_desc);
214 char *_token, *token, *tctx = NULL, *tstart = desc;
215 char *field, *fctx = NULL, *fstart;
216 struct w_step step, *steps = NULL;
Tvrtko Ursulinb56e1712017-05-08 13:56:32 +0100217 bool bcs_used = false;
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100218 unsigned int valid;
219 int tmp;
220
Tvrtko Ursulin07a8aa02017-05-08 13:31:50 +0100221 igt_assert(desc);
222
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100223 while ((_token = strtok_r(tstart, ",", &tctx)) != NULL) {
224 tstart = NULL;
225 token = strdup(_token);
Tvrtko Ursulin07a8aa02017-05-08 13:31:50 +0100226 igt_assert(token);
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100227 fstart = token;
228 valid = 0;
229 memset(&step, 0, sizeof(step));
230
231 if ((field = strtok_r(fstart, ".", &fctx)) != NULL) {
232 fstart = NULL;
233
234 if (!strcasecmp(field, "d")) {
235 if ((field = strtok_r(fstart, ".", &fctx)) !=
236 NULL) {
237 tmp = atoi(field);
238 if (tmp <= 0) {
Chris Wilsond099f7d2017-05-09 14:22:21 +0100239 if (verbose)
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100240 fprintf(stderr,
241 "Invalid delay at step %u!\n",
242 nr_steps);
243 return NULL;
244 }
245
246 step.type = DELAY;
247 step.wait = tmp;
248 goto add_step;
249 }
250 } else if (!strcasecmp(field, "p")) {
251 if ((field = strtok_r(fstart, ".", &fctx)) !=
252 NULL) {
253 tmp = atoi(field);
254 if (tmp <= 0) {
Chris Wilsond099f7d2017-05-09 14:22:21 +0100255 if (verbose)
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100256 fprintf(stderr,
257 "Invalid period at step %u!\n",
258 nr_steps);
259 return NULL;
260 }
261
262 step.type = PERIOD;
263 step.wait = tmp;
264 goto add_step;
265 }
266 } else if (!strcasecmp(field, "s")) {
267 if ((field = strtok_r(fstart, ".", &fctx)) !=
268 NULL) {
269 tmp = atoi(field);
Tvrtko Ursulinb9576422017-05-09 10:33:00 +0100270 if (tmp >= 0 ||
271 ((int)nr_steps + tmp) < 0) {
Chris Wilsond099f7d2017-05-09 14:22:21 +0100272 if (verbose)
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100273 fprintf(stderr,
274 "Invalid sync target at step %u!\n",
275 nr_steps);
276 return NULL;
277 }
278
279 step.type = SYNC;
280 step.wait = tmp;
281 goto add_step;
282 }
283 } else if (!strcasecmp(field, "t")) {
284 if ((field = strtok_r(fstart, ".", &fctx)) !=
285 NULL) {
286 tmp = atoi(field);
287 if (tmp < 0) {
Chris Wilsond099f7d2017-05-09 14:22:21 +0100288 if (verbose)
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100289 fprintf(stderr,
290 "Invalid throttle at step %u!\n",
291 nr_steps);
292 return NULL;
293 }
294
295 step.type = THROTTLE;
296 step.wait = tmp;
297 goto add_step;
298 }
299 } else if (!strcasecmp(field, "q")) {
300 if ((field = strtok_r(fstart, ".", &fctx)) !=
301 NULL) {
302 tmp = atoi(field);
303 if (tmp < 0) {
Chris Wilsond099f7d2017-05-09 14:22:21 +0100304 if (verbose)
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100305 fprintf(stderr,
306 "Invalid qd throttle at step %u!\n",
307 nr_steps);
308 return NULL;
309 }
310
311 step.type = QD_THROTTLE;
312 step.wait = tmp;
313 goto add_step;
314 }
315 }
316
317 tmp = atoi(field);
318 if (tmp < 0) {
Chris Wilsond099f7d2017-05-09 14:22:21 +0100319 if (verbose)
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100320 fprintf(stderr,
321 "Invalid ctx id at step %u!\n",
322 nr_steps);
323 return NULL;
324 }
325 step.context = tmp;
326
327 valid++;
328 }
329
330 if ((field = strtok_r(fstart, ".", &fctx)) != NULL) {
331 unsigned int i, old_valid = valid;
332
333 fstart = NULL;
334
335 for (i = 0; i < ARRAY_SIZE(ring_str_map); i++) {
336 if (!strcasecmp(field, ring_str_map[i])) {
337 step.engine = i;
Tvrtko Ursulinb56e1712017-05-08 13:56:32 +0100338 if (step.engine == BCS)
339 bcs_used = true;
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100340 valid++;
341 break;
342 }
343 }
344
345 if (old_valid == valid) {
Chris Wilsond099f7d2017-05-09 14:22:21 +0100346 if (verbose)
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100347 fprintf(stderr,
348 "Invalid engine id at step %u!\n",
349 nr_steps);
350 return NULL;
351 }
352 }
353
354 if ((field = strtok_r(fstart, ".", &fctx)) != NULL) {
355 char *sep = NULL;
356 long int tmpl;
357
358 fstart = NULL;
359
360 tmpl = strtol(field, &sep, 10);
Tvrtko Ursulinb9576422017-05-09 10:33:00 +0100361 if (tmpl == 0 || tmpl == LONG_MIN || tmpl == LONG_MAX) {
Chris Wilsond099f7d2017-05-09 14:22:21 +0100362 if (verbose)
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100363 fprintf(stderr,
364 "Invalid duration at step %u!\n",
365 nr_steps);
366 return NULL;
367 }
368 step.duration.min = tmpl;
369
370 if (sep && *sep == '-') {
371 tmpl = strtol(sep + 1, NULL, 10);
Tvrtko Ursulinb9576422017-05-09 10:33:00 +0100372 if (tmpl == 0 ||
373 tmpl == LONG_MIN || tmpl == LONG_MAX) {
Chris Wilsond099f7d2017-05-09 14:22:21 +0100374 if (verbose)
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100375 fprintf(stderr,
376 "Invalid duration range at step %u!\n",
377 nr_steps);
378 return NULL;
379 }
380 step.duration.max = tmpl;
381 } else {
382 step.duration.max = step.duration.min;
383 }
384
385 valid++;
386 }
387
388 if ((field = strtok_r(fstart, ".", &fctx)) != NULL) {
389 fstart = NULL;
390
Tvrtko Ursulinb9576422017-05-09 10:33:00 +0100391 tmp = parse_dependencies(nr_steps, &step, field);
Tvrtko Ursulin07a8aa02017-05-08 13:31:50 +0100392 if (tmp < 0) {
Chris Wilsond099f7d2017-05-09 14:22:21 +0100393 if (verbose)
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100394 fprintf(stderr,
Tvrtko Ursulinb9576422017-05-09 10:33:00 +0100395 "Invalid dependency at step %u!\n",
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100396 nr_steps);
397 return NULL;
398 }
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100399
400 valid++;
401 }
402
403 if ((field = strtok_r(fstart, ".", &fctx)) != NULL) {
404 fstart = NULL;
405
Tvrtko Ursulinb9576422017-05-09 10:33:00 +0100406 if (strlen(field) != 1 ||
407 (field[0] != '0' && field[0] != '1')) {
Chris Wilsond099f7d2017-05-09 14:22:21 +0100408 if (verbose)
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100409 fprintf(stderr,
410 "Invalid wait boolean at step %u!\n",
411 nr_steps);
412 return NULL;
413 }
Tvrtko Ursulinb9576422017-05-09 10:33:00 +0100414 step.wait = field[0] - '0';
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100415
416 valid++;
417 }
418
419 if (valid != 5) {
Chris Wilsond099f7d2017-05-09 14:22:21 +0100420 if (verbose)
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100421 fprintf(stderr, "Invalid record at step %u!\n",
422 nr_steps);
423 return NULL;
424 }
425
426 step.type = BATCH;
427
428add_step:
429 step.idx = nr_steps++;
430 steps = realloc(steps, sizeof(step) * nr_steps);
431 igt_assert(steps);
432
433 memcpy(&steps[nr_steps - 1], &step, sizeof(step));
434
435 free(token);
436 }
437
438 wrk = malloc(sizeof(*wrk));
439 igt_assert(wrk);
440
441 wrk->nr_steps = nr_steps;
442 wrk->steps = steps;
443
444 free(desc);
445
Chris Wilsond099f7d2017-05-09 14:22:21 +0100446 if (bcs_used && verbose)
Tvrtko Ursulinb56e1712017-05-08 13:56:32 +0100447 printf("BCS usage in workload with VCS2 remapping enabled!\n");
448
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100449 return wrk;
450}
451
452static struct workload *
453clone_workload(struct workload *_wrk)
454{
455 struct workload *wrk;
Chris Wilson5be05632017-05-09 10:53:39 +0100456 int i;
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100457
458 wrk = malloc(sizeof(*wrk));
459 igt_assert(wrk);
460 memset(wrk, 0, sizeof(*wrk));
461
462 wrk->nr_steps = _wrk->nr_steps;
463 wrk->steps = calloc(wrk->nr_steps, sizeof(struct w_step));
464 igt_assert(wrk->steps);
465
466 memcpy(wrk->steps, _wrk->steps, sizeof(struct w_step) * wrk->nr_steps);
467
Chris Wilson5be05632017-05-09 10:53:39 +0100468 for (i = 0; i < NUM_ENGINES; i++)
469 igt_list_init(&wrk->requests[i]);
470
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100471 return wrk;
472}
473
474#define rounddown(x, y) (x - (x%y))
475#ifndef PAGE_SIZE
476#define PAGE_SIZE (4096)
477#endif
478
479static unsigned int get_duration(struct duration *dur)
480{
481 if (dur->min == dur->max)
482 return dur->min;
483 else
484 return dur->min + hars_petruska_f54_1_random_unsafe() %
485 (dur->max + 1 - dur->min);
486}
487
488static unsigned long get_bb_sz(unsigned int duration)
489{
490 return ALIGN(duration * nop_calibration * sizeof(uint32_t) /
491 nop_calibration_us, sizeof(uint32_t));
492}
493
494static void
495terminate_bb(struct w_step *w, unsigned int flags)
496{
497 const uint32_t bbe = 0xa << 23;
498 unsigned long mmap_start, mmap_len;
499 unsigned long batch_start = w->bb_sz;
500 uint32_t *ptr, *cs;
501
502 igt_assert(((flags & RT) && (flags & SEQNO)) || !(flags & RT));
503
504 batch_start -= sizeof(uint32_t); /* bbend */
505 if (flags & SEQNO)
506 batch_start -= 4 * sizeof(uint32_t);
507 if (flags & RT)
Chris Wilson02b0f8c2017-05-09 21:26:46 +0100508 batch_start -= 12 * sizeof(uint32_t);
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100509
510 mmap_start = rounddown(batch_start, PAGE_SIZE);
511 mmap_len = w->bb_sz - mmap_start;
512
513 gem_set_domain(fd, w->bb_handle,
514 I915_GEM_DOMAIN_WC, I915_GEM_DOMAIN_WC);
515
516 ptr = gem_mmap__wc(fd, w->bb_handle, mmap_start, mmap_len, PROT_WRITE);
517 cs = (uint32_t *)((char *)ptr + batch_start - mmap_start);
518
519 if (flags & SEQNO) {
520 w->reloc[0].offset = batch_start + sizeof(uint32_t);
521 batch_start += 4 * sizeof(uint32_t);
522
523 *cs++ = MI_STORE_DWORD_IMM;
524 w->seqno_address = cs;
525 *cs++ = 0;
526 *cs++ = 0;
527 w->seqno_value = cs;
528 *cs++ = 0;
529 }
530
531 if (flags & RT) {
532 w->reloc[1].offset = batch_start + sizeof(uint32_t);
533 batch_start += 4 * sizeof(uint32_t);
534
535 *cs++ = MI_STORE_DWORD_IMM;
536 w->rt0_address = cs;
537 *cs++ = 0;
538 *cs++ = 0;
539 w->rt0_value = cs;
540 *cs++ = 0;
541
542 w->reloc[2].offset = batch_start + 2 * sizeof(uint32_t);
543 batch_start += 4 * sizeof(uint32_t);
544
545 *cs++ = 0x24 << 23 | 2; /* MI_STORE_REG_MEM */
546 *cs++ = RCS_TIMESTAMP;
547 w->rt1_address = cs;
548 *cs++ = 0;
549 *cs++ = 0;
Chris Wilson02b0f8c2017-05-09 21:26:46 +0100550
551 w->reloc[3].offset = batch_start + sizeof(uint32_t);
552 batch_start += 4 * sizeof(uint32_t);
553
554 *cs++ = MI_STORE_DWORD_IMM;
555 w->latch_address = cs;
556 *cs++ = 0;
557 *cs++ = 0;
558 w->latch_value = cs;
559 *cs++ = 0;
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100560 }
561
562 *cs = bbe;
563
564 w->mapped_batch = ptr;
565 w->mapped_len = mmap_len;
566}
567
Tvrtko Ursulinb56e1712017-05-08 13:56:32 +0100568static const unsigned int eb_engine_map[NUM_ENGINES] = {
569 [RCS] = I915_EXEC_RENDER,
570 [BCS] = I915_EXEC_BLT,
571 [VCS] = I915_EXEC_BSD,
572 [VCS1] = I915_EXEC_BSD | I915_EXEC_BSD_RING1,
573 [VCS2] = I915_EXEC_BSD | I915_EXEC_BSD_RING2,
574 [VECS] = I915_EXEC_VEBOX
575};
576
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100577static void
578eb_update_flags(struct w_step *w, enum intel_engine_id engine,
579 unsigned int flags)
580{
Tvrtko Ursulinb56e1712017-05-08 13:56:32 +0100581 if (engine == VCS2 && (flags & VCS2REMAP))
582 engine = BCS;
583
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100584 w->eb.flags = eb_engine_map[engine];
Tvrtko Ursulinb56e1712017-05-08 13:56:32 +0100585
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100586 w->eb.flags |= I915_EXEC_HANDLE_LUT;
587 w->eb.flags |= I915_EXEC_NO_RELOC;
588}
589
590static void
591alloc_step_batch(struct workload *wrk, struct w_step *w, unsigned int flags)
592{
593 enum intel_engine_id engine = w->engine;
Tvrtko Ursulinb0872572017-05-05 18:55:23 +0100594 unsigned int j = 0;
Tvrtko Ursulin07a8aa02017-05-08 13:31:50 +0100595 unsigned int nr_obj = 3 + w->nr_deps;
596 unsigned int i;
597
598 w->obj = calloc(nr_obj, sizeof(*w->obj));
599 igt_assert(w->obj);
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100600
601 w->obj[j].handle = gem_create(fd, 4096);
602 w->obj[j].flags = EXEC_OBJECT_WRITE;
603 j++;
Tvrtko Ursulin07a8aa02017-05-08 13:31:50 +0100604 igt_assert(j < nr_obj);
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100605
606 if (flags & SEQNO) {
607 w->obj[j].handle = wrk->status_page_handle;
608 j++;
Tvrtko Ursulin07a8aa02017-05-08 13:31:50 +0100609 igt_assert(j < nr_obj);
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100610 }
611
Tvrtko Ursulin07a8aa02017-05-08 13:31:50 +0100612 for (i = 0; i < w->nr_deps; i++) {
613 igt_assert(w->dep[i] <= 0);
614 if (w->dep[i]) {
615 int dep_idx = w->idx + w->dep[i];
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100616
Tvrtko Ursulin07a8aa02017-05-08 13:31:50 +0100617 igt_assert(dep_idx >= 0 && dep_idx < wrk->nr_steps);
618 igt_assert(wrk->steps[dep_idx].type == BATCH);
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100619
Tvrtko Ursulin07a8aa02017-05-08 13:31:50 +0100620 w->obj[j].handle = wrk->steps[dep_idx].obj[0].handle;
621 j++;
622 igt_assert(j < nr_obj);
623 }
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100624 }
625
Tvrtko Ursulinb0872572017-05-05 18:55:23 +0100626 w->bb_sz = get_bb_sz(w->duration.max);
627 w->bb_handle = w->obj[j].handle = gem_create(fd, w->bb_sz);
628 terminate_bb(w, flags);
629
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100630 if (flags & SEQNO) {
Tvrtko Ursulinb0872572017-05-05 18:55:23 +0100631 w->obj[j].relocs_ptr = to_user_pointer(&w->reloc);
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100632 if (flags & RT)
Chris Wilson02b0f8c2017-05-09 21:26:46 +0100633 w->obj[j].relocation_count = 4;
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100634 else
Tvrtko Ursulinb0872572017-05-05 18:55:23 +0100635 w->obj[j].relocation_count = 1;
Tvrtko Ursulin07a8aa02017-05-08 13:31:50 +0100636 for (i = 0; i < w->obj[j].relocation_count; i++)
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100637 w->reloc[i].target_handle = 1;
638 }
639
640 w->eb.buffers_ptr = to_user_pointer(w->obj);
Tvrtko Ursulinb0872572017-05-05 18:55:23 +0100641 w->eb.buffer_count = j + 1;
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100642 w->eb.rsvd1 = wrk->ctx_id[w->context];
643
644 if (flags & SWAPVCS && engine == VCS1)
645 engine = VCS2;
646 else if (flags & SWAPVCS && engine == VCS2)
647 engine = VCS1;
648 eb_update_flags(w, engine, flags);
649#ifdef DEBUG
Tvrtko Ursulin07a8aa02017-05-08 13:31:50 +0100650 printf("%u: %u:|", w->idx, w->eb.buffer_count);
651 for (i = 0; i <= j; i++)
652 printf("%x|", w->obj[i].handle);
653 printf(" %10lu flags=%llx bb=%x[%u] ctx[%u]=%u\n",
654 w->bb_sz, w->eb.flags, w->bb_handle, j, w->context,
655 wrk->ctx_id[w->context]);
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100656#endif
657}
658
659static void
Tvrtko Ursulin7736d7e2017-05-09 09:21:03 +0100660prepare_workload(unsigned int id, struct workload *wrk, unsigned int flags)
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100661{
662 int max_ctx = -1;
663 struct w_step *w;
664 int i;
665
Chris Wilson62a1f542017-05-09 12:42:41 +0100666 wrk->prng = rand();
667
Tvrtko Ursulin7736d7e2017-05-09 09:21:03 +0100668 if (flags & INITVCSRR)
669 wrk->vcs_rr = id & 1;
670
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100671 if (flags & SEQNO) {
672 const unsigned int status_sz = sizeof(uint32_t);
673 uint32_t handle = gem_create(fd, status_sz);
674
675 gem_set_caching(fd, handle, I915_CACHING_CACHED);
676 wrk->status_page_handle = handle;
677 wrk->status_page = gem_mmap__cpu(fd, handle, 0, status_sz,
678 PROT_READ);
679 }
680
681 for (i = 0, w = wrk->steps; i < wrk->nr_steps; i++, w++) {
682 if ((int)w->context > max_ctx) {
683 int delta = w->context + 1 - wrk->nr_ctxs;
684
685 wrk->nr_ctxs += delta;
686 wrk->ctx_id = realloc(wrk->ctx_id,
687 wrk->nr_ctxs * sizeof(uint32_t));
688 memset(&wrk->ctx_id[wrk->nr_ctxs - delta], 0,
689 delta * sizeof(uint32_t));
690
691 max_ctx = w->context;
692 }
693
694 if (!wrk->ctx_id[w->context]) {
695 struct drm_i915_gem_context_create arg = {};
696
697 drmIoctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, &arg);
698 igt_assert(arg.ctx_id);
699
700 wrk->ctx_id[w->context] = arg.ctx_id;
701 }
702 }
703
704 for (i = 0, w = wrk->steps; i < wrk->nr_steps; i++, w++) {
705 unsigned int _flags = flags;
706 enum intel_engine_id engine = w->engine;
707
708 if (w->type != BATCH)
709 continue;
710
711 if (engine != VCS && engine != VCS1 && engine != VCS2)
712 _flags &= ~(SEQNO | RT);
713
714 if (engine == VCS)
715 _flags &= ~SWAPVCS;
716
717 alloc_step_batch(wrk, w, _flags);
718 }
719}
720
721static double elapsed(const struct timespec *start, const struct timespec *end)
722{
723 return (end->tv_sec - start->tv_sec) +
724 (end->tv_nsec - start->tv_nsec) / 1e9;
725}
726
727static int elapsed_us(const struct timespec *start, const struct timespec *end)
728{
729 return elapsed(start, end) * 1e6;
730}
731
732static enum intel_engine_id get_vcs_engine(unsigned int n)
733{
734 const enum intel_engine_id vcs_engines[2] = { VCS1, VCS2 };
735
736 igt_assert(n < ARRAY_SIZE(vcs_engines));
737
738 return vcs_engines[n];
739}
740
741struct workload_balancer {
742 unsigned int (*get_qd)(const struct workload_balancer *balancer,
743 struct workload *wrk,
744 enum intel_engine_id engine);
745 enum intel_engine_id (*balance)(const struct workload_balancer *balancer,
746 struct workload *wrk, struct w_step *w);
747};
748
749static enum intel_engine_id
750rr_balance(const struct workload_balancer *balancer,
751 struct workload *wrk, struct w_step *w)
752{
753 unsigned int engine;
754
755 engine = get_vcs_engine(wrk->vcs_rr);
756 wrk->vcs_rr ^= 1;
757
758 return engine;
759}
760
761static const struct workload_balancer rr_balancer = {
762 .balance = rr_balance,
763};
764
765static unsigned int
766get_qd_depth(const struct workload_balancer *balancer,
767 struct workload *wrk, enum intel_engine_id engine)
768{
769 return wrk->seqno[engine] -
770 wrk->status_page[VCS_SEQNO_IDX(engine)];
771}
772
773static enum intel_engine_id
774qd_balance(const struct workload_balancer *balancer,
775 struct workload *wrk, struct w_step *w)
776{
777 enum intel_engine_id engine;
778 long qd[NUM_ENGINES];
779 unsigned int n;
780
781 igt_assert(w->engine == VCS);
782
783 qd[VCS1] = balancer->get_qd(balancer, wrk, VCS1);
784 wrk->qd_sum[VCS1] += qd[VCS1];
785
786 qd[VCS2] = balancer->get_qd(balancer, wrk, VCS2);
787 wrk->qd_sum[VCS2] += qd[VCS2];
788
789 if (qd[VCS1] < qd[VCS2])
790 n = 0;
791 else if (qd[VCS2] < qd[VCS1])
792 n = 1;
793 else
794 n = wrk->vcs_rr;
795
796 engine = get_vcs_engine(n);
797 wrk->vcs_rr = n ^ 1;
798
799#ifdef DEBUG
800 printf("qd_balance: 1:%ld 2:%ld rr:%u = %u\t(%lu - %u) (%lu - %u)\n",
801 qd[VCS1], qd[VCS2], wrk->vcs_rr, engine,
802 wrk->seqno[VCS1], wrk->status_page[VCS_SEQNO_IDX(VCS1)],
803 wrk->seqno[VCS2], wrk->status_page[VCS_SEQNO_IDX(VCS2)]);
804#endif
805 return engine;
806}
807
808static const struct workload_balancer qd_balancer = {
809 .get_qd = get_qd_depth,
810 .balance = qd_balance,
811};
812
813static enum intel_engine_id
Chris Wilson9e55cca2017-04-25 15:12:50 +0100814__rt_select_engine(struct workload *wrk, unsigned long *qd, bool random)
815{
816 unsigned int n;
817
818 qd[VCS1] >>= 10;
819 qd[VCS2] >>= 10;
820
821 if (qd[VCS1] < qd[VCS2])
822 n = 0;
823 else if (qd[VCS2] < qd[VCS1])
824 n = 1;
825 else if (random)
826 n = hars_petruska_f54_1_random(&wrk->prng) & 1;
827 else
828 n = wrk->vcs_rr;
829 wrk->vcs_rr = n ^ 1;
830
831 return get_vcs_engine(n);
832}
833
Chris Wilsoncb161192017-05-09 17:31:27 +0100834struct rt_depth {
835 uint32_t seqno;
836 uint32_t submitted;
837 uint32_t completed;
838};
839
840#define READ_ONCE(x) (*(volatile typeof(x) *)(&(x)))
841
842static void get_rt_depth(struct workload *wrk,
843 unsigned int engine,
844 struct rt_depth *rt)
845{
Chris Wilsoncfd94fa2017-05-09 18:53:33 +0100846 const unsigned int idx = VCS_SEQNO_IDX(engine);
Chris Wilson02b0f8c2017-05-09 21:26:46 +0100847 uint32_t latch;
Chris Wilsoncb161192017-05-09 17:31:27 +0100848
Chris Wilsoncb161192017-05-09 17:31:27 +0100849 do {
Chris Wilson02b0f8c2017-05-09 21:26:46 +0100850 latch = READ_ONCE(wrk->status_page[idx + 3]);
851
Chris Wilsoncb161192017-05-09 17:31:27 +0100852 rt->submitted = wrk->status_page[idx + 1];
853 rt->completed = wrk->status_page[idx + 2];
Chris Wilsoncfd94fa2017-05-09 18:53:33 +0100854 rt->seqno = READ_ONCE(wrk->status_page[idx]);
Chris Wilson02b0f8c2017-05-09 21:26:46 +0100855 } while (latch != rt->seqno);
Chris Wilsoncb161192017-05-09 17:31:27 +0100856}
857
Chris Wilson9e55cca2017-04-25 15:12:50 +0100858static enum intel_engine_id
Tvrtko Ursulin81116532017-05-08 18:37:03 +0100859__rt_balance(const struct workload_balancer *balancer,
860 struct workload *wrk, struct w_step *w, bool random)
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100861{
Chris Wilson9e55cca2017-04-25 15:12:50 +0100862 unsigned long qd[NUM_ENGINES];
Chris Wilsoncb161192017-05-09 17:31:27 +0100863 struct rt_depth results;
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100864
865 igt_assert(w->engine == VCS);
866
867 /* Estimate the "speed" of the most recent batch
868 * (finish time - submit time)
869 * and use that as an approximate for the total remaining time for
Chris Wilson6e6ad402017-05-09 12:43:13 +0100870 * all batches on that engine, plus the time we expect this batch to
871 * take. We try to keep the total balanced between the engines.
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100872 */
Chris Wilsoncb161192017-05-09 17:31:27 +0100873 get_rt_depth(wrk, VCS1, &results);
874 qd[VCS1] = wrk->seqno[VCS1] - results.seqno;
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100875 wrk->qd_sum[VCS1] += qd[VCS1];
Chris Wilsoncb161192017-05-09 17:31:27 +0100876 qd[VCS1] = (qd[VCS1] + 1) * (results.completed - results.submitted);
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100877#ifdef DEBUG
878 printf("qd[0] = %d (%d - %d) x %d (%d - %d) = %ld\n",
879 wrk->seqno[VCS1] - wrk->status_page[0],
880 wrk->seqno[VCS1], wrk->status_page[0],
881 wrk->status_page[2] - wrk->status_page[1],
882 wrk->status_page[2], wrk->status_page[1],
883 qd[VCS1]);
884#endif
885
Chris Wilsoncb161192017-05-09 17:31:27 +0100886 get_rt_depth(wrk, VCS2, &results);
887 qd[VCS2] = wrk->seqno[VCS2] - results.seqno;
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100888 wrk->qd_sum[VCS2] += qd[VCS2];
Chris Wilsoncb161192017-05-09 17:31:27 +0100889 qd[VCS2] = (qd[VCS2] + 1) * (results.completed - results.submitted);
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100890#ifdef DEBUG
891 printf("qd[1] = %d (%d - %d) x %d (%d - %d) = %ld\n",
892 wrk->seqno[VCS2] - wrk->status_page[16],
893 wrk->seqno[VCS2], wrk->status_page[16],
894 wrk->status_page[18] - wrk->status_page[17],
895 wrk->status_page[18], wrk->status_page[17],
896 qd[VCS2]);
897#endif
898
Chris Wilson9e55cca2017-04-25 15:12:50 +0100899 return __rt_select_engine(wrk, qd, random);
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100900}
901
Tvrtko Ursulin81116532017-05-08 18:37:03 +0100902static enum intel_engine_id
903rt_balance(const struct workload_balancer *balancer,
904 struct workload *wrk, struct w_step *w)
905{
906
907 return __rt_balance(balancer, wrk, w, false);
908}
909
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100910static const struct workload_balancer rt_balancer = {
911 .get_qd = get_qd_depth,
912 .balance = rt_balance,
913};
914
Tvrtko Ursulin81116532017-05-08 18:37:03 +0100915static enum intel_engine_id
916rtr_balance(const struct workload_balancer *balancer,
917 struct workload *wrk, struct w_step *w)
918{
Tvrtko Ursulin81116532017-05-08 18:37:03 +0100919 return __rt_balance(balancer, wrk, w, true);
920}
921
922static const struct workload_balancer rtr_balancer = {
923 .get_qd = get_qd_depth,
924 .balance = rtr_balance,
925};
926
Chris Wilson9e55cca2017-04-25 15:12:50 +0100927static enum intel_engine_id
928rtavg_balance(const struct workload_balancer *balancer,
929 struct workload *wrk, struct w_step *w)
930{
931 unsigned long qd[NUM_ENGINES];
Chris Wilsoncb161192017-05-09 17:31:27 +0100932 struct rt_depth results;
Chris Wilson9e55cca2017-04-25 15:12:50 +0100933
934 igt_assert(w->engine == VCS);
935
936 /* Estimate the average "speed" of the most recent batches
937 * (finish time - submit time)
938 * and use that as an approximate for the total remaining time for
939 * all batches on that engine plus the time we expect to execute in.
940 * We try to keep the total remaining balanced between the engines.
941 */
Chris Wilsoncb161192017-05-09 17:31:27 +0100942 get_rt_depth(wrk, VCS1, &results);
943 if (results.seqno != wrk->rt.last[VCS1]) {
944 igt_assert((long)(results.completed - results.submitted) > 0);
Chris Wilson9e55cca2017-04-25 15:12:50 +0100945 ewma_rt_add(&wrk->rt.avg[VCS1],
Chris Wilsoncb161192017-05-09 17:31:27 +0100946 results.completed - results.submitted);
947 wrk->rt.last[VCS1] = results.seqno;
Chris Wilson9e55cca2017-04-25 15:12:50 +0100948 }
Chris Wilsoncb161192017-05-09 17:31:27 +0100949 qd[VCS1] = wrk->seqno[VCS1] - results.seqno;
Chris Wilson9e55cca2017-04-25 15:12:50 +0100950 wrk->qd_sum[VCS1] += qd[VCS1];
951 qd[VCS1] = (qd[VCS1] + 1) * ewma_rt_read(&wrk->rt.avg[VCS1]);
952
953#ifdef DEBUG
954 printf("qd[0] = %d (%d - %d) x %ld (%d) = %ld\n",
955 wrk->seqno[VCS1] - wrk->status_page[0],
956 wrk->seqno[VCS1], wrk->status_page[0],
957 ewma_rt_read(&wrk->rt.avg[VCS1]),
958 wrk->status_page[2] - wrk->status_page[1],
959 qd[VCS1]);
960#endif
961
Chris Wilsoncb161192017-05-09 17:31:27 +0100962 get_rt_depth(wrk, VCS2, &results);
963 if (results.seqno != wrk->rt.last[VCS2]) {
964 igt_assert((long)(results.completed - results.submitted) > 0);
Chris Wilson9e55cca2017-04-25 15:12:50 +0100965 ewma_rt_add(&wrk->rt.avg[VCS2],
Chris Wilsoncb161192017-05-09 17:31:27 +0100966 results.completed - results.submitted);
967 wrk->rt.last[VCS2] = results.seqno;
Chris Wilson9e55cca2017-04-25 15:12:50 +0100968 }
Chris Wilsoncb161192017-05-09 17:31:27 +0100969 qd[VCS2] = wrk->seqno[VCS2] - results.seqno;
Chris Wilson9e55cca2017-04-25 15:12:50 +0100970 wrk->qd_sum[VCS2] += qd[VCS2];
971 qd[VCS2] = (qd[VCS2] + 1) * ewma_rt_read(&wrk->rt.avg[VCS2]);
972
973#ifdef DEBUG
974 printf("qd[1] = %d (%d - %d) x %ld (%d) = %ld\n",
975 wrk->seqno[VCS2] - wrk->status_page[16],
976 wrk->seqno[VCS2], wrk->status_page[16],
977 ewma_rt_read(&wrk->rt.avg[VCS2]),
978 wrk->status_page[18] - wrk->status_page[17],
979 qd[VCS2]);
980#endif
981
982 return __rt_select_engine(wrk, qd, false);
983}
984
985static const struct workload_balancer rtavg_balancer = {
986 .get_qd = get_qd_depth,
987 .balance = rtavg_balance,
988};
989
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +0100990static void
991update_bb_seqno(struct w_step *w, enum intel_engine_id engine, uint32_t seqno)
992{
993 igt_assert(engine == VCS1 || engine == VCS2);
994
995 gem_set_domain(fd, w->bb_handle,
996 I915_GEM_DOMAIN_WC, I915_GEM_DOMAIN_WC);
997
998 w->reloc[0].delta = VCS_SEQNO_OFFSET(engine);
999
1000 *w->seqno_value = seqno;
1001 *w->seqno_address = w->reloc[0].presumed_offset + w->reloc[0].delta;
1002
1003 /* If not using NO_RELOC, force the relocations */
1004 if (!(w->eb.flags & I915_EXEC_NO_RELOC))
1005 w->reloc[0].presumed_offset = -1;
1006}
1007
1008static void
Chris Wilson02b0f8c2017-05-09 21:26:46 +01001009update_bb_rt(struct w_step *w, enum intel_engine_id engine, uint32_t seqno)
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001010{
1011 igt_assert(engine == VCS1 || engine == VCS2);
1012
1013 gem_set_domain(fd, w->bb_handle,
1014 I915_GEM_DOMAIN_WC, I915_GEM_DOMAIN_WC);
1015
1016 w->reloc[1].delta = VCS_SEQNO_OFFSET(engine) + sizeof(uint32_t);
1017 w->reloc[2].delta = VCS_SEQNO_OFFSET(engine) + 2 * sizeof(uint32_t);
Chris Wilson02b0f8c2017-05-09 21:26:46 +01001018 w->reloc[3].delta = VCS_SEQNO_OFFSET(engine) + 3 * sizeof(uint32_t);
1019
1020 *w->latch_value = seqno;
1021 *w->latch_address = w->reloc[3].presumed_offset + w->reloc[3].delta;
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001022
1023 *w->rt0_value = *REG(RCS_TIMESTAMP);
1024 *w->rt0_address = w->reloc[1].presumed_offset + w->reloc[1].delta;
Tvrtko Ursulin18909982017-04-25 15:46:46 +01001025 *w->rt1_address = w->reloc[2].presumed_offset + w->reloc[2].delta;
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001026
1027 /* If not using NO_RELOC, force the relocations */
1028 if (!(w->eb.flags & I915_EXEC_NO_RELOC)) {
1029 w->reloc[1].presumed_offset = -1;
1030 w->reloc[2].presumed_offset = -1;
1031 }
1032}
1033
1034static void w_sync_to(struct workload *wrk, struct w_step *w, int target)
1035{
1036 if (target < 0)
1037 target = wrk->nr_steps + target;
1038
1039 igt_assert(target < wrk->nr_steps);
1040
1041 while (wrk->steps[target].type != BATCH) {
1042 if (--target < 0)
1043 target = wrk->nr_steps + target;
1044 }
1045
1046 igt_assert(target < wrk->nr_steps);
1047 igt_assert(wrk->steps[target].type == BATCH);
1048
1049 gem_sync(fd, wrk->steps[target].obj[0].handle);
1050}
1051
Chris Wilson7d1362a2017-05-09 13:41:01 +01001052static void init_status_page(struct workload *wrk)
1053{
1054 struct drm_i915_gem_exec_object2 obj[2] = {};
Chris Wilson02b0f8c2017-05-09 21:26:46 +01001055 struct drm_i915_gem_relocation_entry reloc[4] = {};
Chris Wilson7d1362a2017-05-09 13:41:01 +01001056 struct drm_i915_gem_execbuffer2 eb = {
1057 .buffer_count = 2, .buffers_ptr = to_user_pointer(obj)
1058 };
Chris Wilson5a6b9752017-05-09 21:59:58 +01001059 uint32_t *base;
Chris Wilson7d1362a2017-05-09 13:41:01 +01001060
1061 /* Want to make sure that the balancer has a reasonable view of
1062 * the background busyness of each engine. To do that we occasionally
1063 * send a dummy batch down the pipeline.
1064 */
1065
1066 if (!wrk->status_page_handle)
1067 return;
1068
1069 obj[0].handle = wrk->status_page_handle;
Chris Wilson5a6b9752017-05-09 21:59:58 +01001070
1071 /* As the expected offset is untracked, do a quick nop to query it */
1072 obj[1].handle = gem_create(fd, 4096);
1073 base = gem_mmap__wc(fd, obj[1].handle, 0, 4096, PROT_WRITE);
1074 gem_set_domain(fd, obj[1].handle,
1075 I915_GEM_DOMAIN_WC, I915_GEM_DOMAIN_WC);
1076 *base = MI_BATCH_BUFFER_END;
1077 gem_execbuf(fd, &eb);
1078
Chris Wilson7d1362a2017-05-09 13:41:01 +01001079 obj[1].relocs_ptr = to_user_pointer(reloc);
1080 obj[1].relocation_count = ARRAY_SIZE(reloc);
1081
1082 reloc[0].offset = sizeof(uint32_t);
1083 reloc[1].offset = 5 * sizeof(uint32_t);
1084 reloc[2].offset = 10 * sizeof(uint32_t);
Chris Wilson02b0f8c2017-05-09 21:26:46 +01001085 reloc[3].offset = 13 * sizeof(uint32_t);
Chris Wilson7d1362a2017-05-09 13:41:01 +01001086
1087 for (int engine = VCS1; engine <= VCS2; engine++) {
Chris Wilson5a6b9752017-05-09 21:59:58 +01001088 uint32_t *cs = base + engine * 128 / sizeof(*cs);
Chris Wilson7d1362a2017-05-09 13:41:01 +01001089 uint64_t addr;
1090
1091 reloc[0].delta = VCS_SEQNO_OFFSET(engine);
Chris Wilson5a6b9752017-05-09 21:59:58 +01001092 reloc[0].presumed_offset = obj[0].offset;
Chris Wilson7d1362a2017-05-09 13:41:01 +01001093 addr = reloc[0].presumed_offset + reloc[0].delta;
1094 *cs++ = MI_STORE_DWORD_IMM;
1095 *cs++ = addr;
1096 *cs++ = addr >> 32;
1097 *cs++ = ++wrk->seqno[engine];
1098
Chris Wilson5a6b9752017-05-09 21:59:58 +01001099 reloc[1].delta = VCS_SEQNO_OFFSET(engine) + sizeof(uint32_t);
1100 reloc[1].presumed_offset = obj[0].offset;
Chris Wilson7d1362a2017-05-09 13:41:01 +01001101 addr = reloc[1].presumed_offset + reloc[1].delta;
1102 *cs++ = MI_STORE_DWORD_IMM;
1103 *cs++ = addr;
1104 *cs++ = addr >> 32;
1105 *cs++ = *REG(RCS_TIMESTAMP);
1106
Chris Wilson5a6b9752017-05-09 21:59:58 +01001107 reloc[2].delta = VCS_SEQNO_OFFSET(engine) + 2*sizeof(uint32_t);
1108 reloc[2].presumed_offset = obj[0].offset;
Chris Wilson7d1362a2017-05-09 13:41:01 +01001109 addr = reloc[2].presumed_offset + reloc[2].delta;
1110 *cs++ = 0x24 << 23 | 2; /* MI_STORE_REG_MEM */
1111 *cs++ = RCS_TIMESTAMP;
1112 *cs++ = addr;
1113 *cs++ = addr >> 32;
1114
Chris Wilson5a6b9752017-05-09 21:59:58 +01001115 reloc[3].delta = VCS_SEQNO_OFFSET(engine) + 3*sizeof(uint32_t);
1116
1117 reloc[3].presumed_offset = obj[0].offset;
Chris Wilson02b0f8c2017-05-09 21:26:46 +01001118 addr = reloc[3].presumed_offset + reloc[3].delta;
1119 *cs++ = MI_STORE_DWORD_IMM;
1120 *cs++ = addr;
1121 *cs++ = addr >> 32;
1122 *cs++ = wrk->seqno[engine];
1123
Chris Wilson7d1362a2017-05-09 13:41:01 +01001124 *cs++ = MI_BATCH_BUFFER_END;
Chris Wilson7d1362a2017-05-09 13:41:01 +01001125
1126 eb.flags = eb_engine_map[engine];
1127 eb.flags |= I915_EXEC_HANDLE_LUT;
1128 eb.flags |= I915_EXEC_NO_RELOC;
1129
Chris Wilson5a6b9752017-05-09 21:59:58 +01001130 eb.batch_start_offset = 128 * engine;
1131
Chris Wilson7d1362a2017-05-09 13:41:01 +01001132 gem_execbuf(fd, &eb);
Chris Wilson7d1362a2017-05-09 13:41:01 +01001133 }
Chris Wilson5a6b9752017-05-09 21:59:58 +01001134
1135 munmap(base, 4096);
1136 gem_close(fd, obj[1].handle);
Chris Wilson7d1362a2017-05-09 13:41:01 +01001137}
1138
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001139static void
1140run_workload(unsigned int id, struct workload *wrk,
1141 bool background, int pipe_fd,
1142 const struct workload_balancer *balancer,
1143 unsigned int repeat,
1144 unsigned int flags)
1145{
1146 struct timespec t_start, t_end;
1147 struct w_step *w;
1148 bool run = true;
1149 int throttle = -1;
1150 int qd_throttle = -1;
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001151 int i, j;
1152
1153 clock_gettime(CLOCK_MONOTONIC, &t_start);
1154
Tvrtko Ursulin8540b912017-05-09 09:39:17 +01001155 hars_petruska_f54_1_random_seed((flags & SYNCEDCLIENTS) ? 0 : id);
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001156
1157 for (j = 0; run && (background || j < repeat); j++) {
1158 clock_gettime(CLOCK_MONOTONIC, &wrk->repeat_start);
Chris Wilson7d1362a2017-05-09 13:41:01 +01001159 init_status_page(wrk);
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001160
1161 for (i = 0, w = wrk->steps; run && (i < wrk->nr_steps);
1162 i++, w++) {
1163 enum intel_engine_id engine = w->engine;
1164 int do_sleep = 0;
1165
1166 if (w->type == DELAY) {
1167 do_sleep = w->wait;
1168 } else if (w->type == PERIOD) {
1169 struct timespec now;
1170
1171 clock_gettime(CLOCK_MONOTONIC, &now);
1172 do_sleep = w->wait -
1173 elapsed_us(&wrk->repeat_start, &now);
1174 if (do_sleep < 0) {
Chris Wilsond099f7d2017-05-09 14:22:21 +01001175 if (verbose > 1) {
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001176 printf("%u: Dropped period @ %u/%u (%dus late)!\n",
1177 id, j, i, do_sleep);
1178 continue;
1179 }
1180 }
1181 } else if (w->type == SYNC) {
1182 unsigned int s_idx = i + w->wait;
1183
1184 igt_assert(i > 0 && i < wrk->nr_steps);
1185 igt_assert(wrk->steps[s_idx].type == BATCH);
1186 gem_sync(fd, wrk->steps[s_idx].obj[0].handle);
1187 continue;
1188 } else if (w->type == THROTTLE) {
1189 throttle = w->wait;
1190 continue;
1191 } else if (w->type == QD_THROTTLE) {
1192 qd_throttle = w->wait;
1193 continue;
1194 }
1195
1196 if (do_sleep) {
1197 usleep(do_sleep);
1198 continue;
1199 }
1200
1201 wrk->nr_bb[engine]++;
1202
1203 if (engine == VCS && balancer) {
1204 engine = balancer->balance(balancer, wrk, w);
1205 wrk->nr_bb[engine]++;
1206
1207 eb_update_flags(w, engine, flags);
1208
1209 if (flags & SEQNO)
1210 update_bb_seqno(w, engine,
1211 ++wrk->seqno[engine]);
1212 if (flags & RT)
Chris Wilson02b0f8c2017-05-09 21:26:46 +01001213 update_bb_rt(w, engine,
1214 wrk->seqno[engine]);
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001215 }
1216
1217 if (w->duration.min != w->duration.max) {
1218 unsigned int d = get_duration(&w->duration);
1219 unsigned long offset;
1220
1221 offset = ALIGN(w->bb_sz - get_bb_sz(d),
1222 2 * sizeof(uint32_t));
1223 w->eb.batch_start_offset = offset;
1224 }
1225
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001226 if (throttle > 0)
1227 w_sync_to(wrk, w, i - throttle);
1228
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001229 gem_execbuf(fd, &w->eb);
Chris Wilson5be05632017-05-09 10:53:39 +01001230 igt_list_add_tail(&w->rq_link, &wrk->requests[engine]);
1231 wrk->nrequest[engine]++;
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001232
1233 if (pipe_fd >= 0) {
1234 struct pollfd fds;
1235
1236 fds.fd = pipe_fd;
1237 fds.events = POLLHUP;
1238 if (poll(&fds, 1, 0)) {
1239 run = false;
1240 break;
1241 }
1242 }
1243
1244 if (w->wait)
1245 gem_sync(fd, w->obj[0].handle);
Chris Wilson5be05632017-05-09 10:53:39 +01001246
Chris Wilson3b723462017-05-09 16:35:22 +01001247 if (qd_throttle > 0) {
Chris Wilson5be05632017-05-09 10:53:39 +01001248 while (wrk->nrequest[engine] > qd_throttle) {
1249 struct w_step *s;
1250
1251 s = igt_list_first_entry(&wrk->requests[engine],
1252 s, rq_link);
1253
1254 gem_sync(fd, s->obj[0].handle);
1255
1256 igt_list_del(&s->rq_link);
1257 wrk->nrequest[engine]--;
1258 }
1259 }
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001260 }
1261 }
1262
Chris Wilson5be05632017-05-09 10:53:39 +01001263 for (i = 0; i < NUM_ENGINES; i++) {
1264 if (!wrk->nrequest[i])
1265 continue;
1266
1267 w = igt_list_last_entry(&wrk->requests[i], w, rq_link);
1268 gem_sync(fd, w->obj[0].handle);
1269 }
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001270
1271 clock_gettime(CLOCK_MONOTONIC, &t_end);
1272
Chris Wilsond099f7d2017-05-09 14:22:21 +01001273 if (verbose > 1) {
1274 double t = elapsed(&t_start, &t_end);
1275
1276 if (!balancer)
1277 printf("%c%u: %.3fs elapsed (%.3f workloads/s)\n",
1278 background ? ' ' : '*', id, t, repeat / t);
1279 else if (!balancer->get_qd)
1280 printf("%c%u: %.3fs elapsed (%.3f workloads/s). %lu (%lu + %lu) total VCS batches.\n",
1281 background ? ' ' : '*', id, t, repeat / t,
1282 wrk->nr_bb[VCS], wrk->nr_bb[VCS1], wrk->nr_bb[VCS2]);
1283 else
1284 printf("%c%u: %.3fs elapsed (%.3f workloads/s). %lu (%lu + %lu) total VCS batches. Average queue depths %.3f, %.3f.\n",
1285 background ? ' ' : '*', id, t, repeat / t,
1286 wrk->nr_bb[VCS], wrk->nr_bb[VCS1], wrk->nr_bb[VCS2],
1287 (double)wrk->qd_sum[VCS1] / wrk->nr_bb[VCS],
1288 (double)wrk->qd_sum[VCS2] / wrk->nr_bb[VCS]);
1289 }
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001290}
1291
1292static void fini_workload(struct workload *wrk)
1293{
1294 free(wrk->steps);
1295 free(wrk);
1296}
1297
1298static unsigned long calibrate_nop(unsigned int tolerance_pct)
1299{
1300 const uint32_t bbe = 0xa << 23;
1301 unsigned int loops = 17;
1302 unsigned int usecs = nop_calibration_us;
1303 struct drm_i915_gem_exec_object2 obj = {};
1304 struct drm_i915_gem_execbuffer2 eb =
1305 { .buffer_count = 1, .buffers_ptr = (uintptr_t)&obj};
1306 long size, last_size;
1307 struct timespec t_0, t_end;
1308
1309 clock_gettime(CLOCK_MONOTONIC, &t_0);
1310
1311 size = 256 * 1024;
1312 do {
1313 struct timespec t_start;
1314
1315 obj.handle = gem_create(fd, size);
1316 gem_write(fd, obj.handle, size - sizeof(bbe), &bbe,
1317 sizeof(bbe));
1318 gem_execbuf(fd, &eb);
1319 gem_sync(fd, obj.handle);
1320
1321 clock_gettime(CLOCK_MONOTONIC, &t_start);
1322 for (int loop = 0; loop < loops; loop++)
1323 gem_execbuf(fd, &eb);
1324 gem_sync(fd, obj.handle);
1325 clock_gettime(CLOCK_MONOTONIC, &t_end);
1326
1327 gem_close(fd, obj.handle);
1328
1329 last_size = size;
1330 size = loops * size / elapsed(&t_start, &t_end) / 1e6 * usecs;
1331 size = ALIGN(size, sizeof(uint32_t));
1332 } while (elapsed(&t_0, &t_end) < 5 ||
1333 abs(size - last_size) > (size * tolerance_pct / 100));
1334
1335 return size / sizeof(uint32_t);
1336}
1337
1338static void print_help(void)
1339{
1340 puts(
1341"Usage: gem_wsim [OPTIONS]\n"
1342"\n"
1343"Runs a simulated workload on the GPU.\n"
1344"When ran without arguments performs a GPU calibration result of which needs\n"
1345"to be provided when running the simulation in subsequent invocations.\n"
1346"\n"
1347"Options:\n"
1348" -h This text.\n"
1349" -q Be quiet - do not output anything to stdout.\n"
1350" -n <n> Nop calibration value.\n"
1351" -t <n> Nop calibration tolerance percentage.\n"
1352" Use when there is a difficulty obtaining calibration\n"
1353" with the default settings.\n"
1354" -w <desc|path> Filename or a workload descriptor.\n"
1355" Can be given multiple times.\n"
1356" -W <desc|path> Filename or a master workload descriptor.\n"
1357" Only one master workload can be optinally specified\n"
1358" in which case all other workloads become background\n"
1359" ones and run as long as the master.\n"
1360" -r <n> How many times to emit the workload.\n"
1361" -c <n> Fork N clients emitting the workload simultaneously.\n"
1362" -x Swap VCS1 and VCS2 engines in every other client.\n"
Tvrtko Ursulin81116532017-05-08 18:37:03 +01001363" -b <n> Load balancing to use. (0: rr, 1: qd, 2: rt, 3: rtr)\n"
Tvrtko Ursulin0e0eca32017-05-09 10:01:22 +01001364" Balancers can be specified either as names or as their\n"
1365" id numbers as listed above.\n"
Tvrtko Ursulin7736d7e2017-05-09 09:21:03 +01001366" -2 Remap VCS2 to BCS.\n"
1367" -R Round-robin initial VCS assignment per client.\n"
Tvrtko Ursulin8540b912017-05-09 09:39:17 +01001368" -S Synchronize the sequence of random batch durations\n"
1369" between clients.\n"
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001370 );
1371}
1372
1373static char *load_workload_descriptor(char *filename)
1374{
1375 struct stat sbuf;
1376 char *buf;
1377 int infd, ret, i;
1378 ssize_t len;
1379
1380 ret = stat(filename, &sbuf);
1381 if (ret || !S_ISREG(sbuf.st_mode))
1382 return filename;
1383
1384 igt_assert(sbuf.st_size < 1024 * 1024); /* Just so. */
1385 buf = malloc(sbuf.st_size);
1386 igt_assert(buf);
1387
1388 infd = open(filename, O_RDONLY);
1389 igt_assert(infd >= 0);
1390 len = read(infd, buf, sbuf.st_size);
1391 igt_assert(len == sbuf.st_size);
1392 close(infd);
1393
1394 for (i = 0; i < len; i++) {
1395 if (buf[i] == '\n')
1396 buf[i] = ',';
1397 }
1398
1399 len--;
1400 while (buf[len] == ',')
1401 buf[len--] = 0;
1402
1403 return buf;
1404}
1405
1406static char **
1407add_workload_arg(char **w_args, unsigned int nr_args, char *w_arg)
1408{
1409 w_args = realloc(w_args, sizeof(char *) * nr_args);
1410 igt_assert(w_args);
1411 w_args[nr_args - 1] = w_arg;
1412
1413 return w_args;
1414}
1415
Tvrtko Ursulin0e0eca32017-05-09 10:01:22 +01001416static int parse_balancing_mode(char *str)
1417{
Chris Wilson9e55cca2017-04-25 15:12:50 +01001418 const char *modes[] = { "rr", "qd", "rt", "rtr" , "rtavg" };
Tvrtko Ursulin0e0eca32017-05-09 10:01:22 +01001419 int mode = -1;
1420 unsigned int i;
1421
1422 for (i = 0; i < ARRAY_SIZE(modes); i++) {
1423 if (!strcasecmp(str, modes[i])) {
1424 mode = i;
1425 break;
1426 }
1427 }
1428
1429 return mode;
1430}
1431
Chris Wilson474bcdd2017-05-09 15:13:09 +01001432static void init_clocks(void)
1433{
1434 struct timespec t_start, t_end;
1435 uint32_t rcs_start, rcs_end;
1436 double overhead, t;
1437
1438 intel_register_access_init(intel_get_pci_device(), false, fd);
1439
Chris Wilsond099f7d2017-05-09 14:22:21 +01001440 if (verbose <= 1)
Chris Wilson474bcdd2017-05-09 15:13:09 +01001441 return;
1442
1443 clock_gettime(CLOCK_MONOTONIC, &t_start);
1444 for (int i = 0; i < 100; i++)
1445 rcs_start = *REG(RCS_TIMESTAMP);
1446 clock_gettime(CLOCK_MONOTONIC, &t_end);
1447 overhead = 2 * elapsed(&t_start, &t_end) / 100;
1448
1449 clock_gettime(CLOCK_MONOTONIC, &t_start);
1450 for (int i = 0; i < 100; i++)
1451 clock_gettime(CLOCK_MONOTONIC, &t_end);
1452 clock_gettime(CLOCK_MONOTONIC, &t_end);
1453 overhead += elapsed(&t_start, &t_end) / 100;
1454
1455 clock_gettime(CLOCK_MONOTONIC, &t_start);
1456 rcs_start = *REG(RCS_TIMESTAMP);
1457 usleep(100);
1458 rcs_end = *REG(RCS_TIMESTAMP);
1459 clock_gettime(CLOCK_MONOTONIC, &t_end);
1460
1461 t = elapsed(&t_start, &t_end) - overhead;
1462 printf("%d cycles in %.1fus, i.e. 1024 cycles takes %1.fus\n",
1463 rcs_end - rcs_start, 1e6*t, 1024e6 * t / (rcs_end - rcs_start));
1464}
1465
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001466int main(int argc, char **argv)
1467{
1468 unsigned int repeat = 1;
1469 unsigned int clients = 1;
1470 unsigned int flags = 0;
1471 struct timespec t_start, t_end;
1472 struct workload **w, **wrk = NULL;
1473 unsigned int nr_w_args = 0;
1474 int master_workload = -1;
1475 char **w_args = NULL;
1476 unsigned int tolerance_pct = 1;
1477 const struct workload_balancer *balancer = NULL;
Tvrtko Ursulin0e0eca32017-05-09 10:01:22 +01001478 char *endptr = NULL;
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001479 double t;
1480 int i, c;
1481
1482 fd = drm_open_driver(DRIVER_INTEL);
Chris Wilson474bcdd2017-05-09 15:13:09 +01001483 init_clocks();
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001484
Chris Wilsond099f7d2017-05-09 14:22:21 +01001485 while ((c = getopt(argc, argv, "q2RSc:n:r:xw:W:t:b:vh")) != -1) {
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001486 switch (c) {
1487 case 'W':
1488 if (master_workload >= 0) {
Chris Wilsond099f7d2017-05-09 14:22:21 +01001489 if (verbose)
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001490 fprintf(stderr,
1491 "Only one master workload can be given!\n");
1492 return 1;
1493 }
1494 master_workload = nr_w_args;
1495 /* Fall through */
1496 case 'w':
1497 w_args = add_workload_arg(w_args, ++nr_w_args, optarg);
1498 break;
1499 case 'c':
1500 clients = strtol(optarg, NULL, 0);
1501 break;
1502 case 't':
1503 tolerance_pct = strtol(optarg, NULL, 0);
1504 break;
1505 case 'n':
1506 nop_calibration = strtol(optarg, NULL, 0);
1507 break;
1508 case 'r':
1509 repeat = strtol(optarg, NULL, 0);
1510 break;
1511 case 'q':
Chris Wilsond099f7d2017-05-09 14:22:21 +01001512 verbose = 0;
1513 break;
1514 case 'v':
1515 verbose++;
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001516 break;
1517 case 'x':
1518 flags |= SWAPVCS;
1519 break;
Tvrtko Ursulinb56e1712017-05-08 13:56:32 +01001520 case '2':
1521 flags |= VCS2REMAP;
1522 break;
Tvrtko Ursulin7736d7e2017-05-09 09:21:03 +01001523 case 'R':
1524 flags |= INITVCSRR;
1525 break;
Tvrtko Ursulin8540b912017-05-09 09:39:17 +01001526 case 'S':
1527 flags |= SYNCEDCLIENTS;
1528 break;
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001529 case 'b':
Tvrtko Ursulin0e0eca32017-05-09 10:01:22 +01001530 i = parse_balancing_mode(optarg);
1531 if (i < 0) {
1532 i = strtol(optarg, &endptr, 0);
1533 if (endptr && *endptr)
1534 i = -1;
1535 }
1536 switch (i) {
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001537 case 0:
Chris Wilsond099f7d2017-05-09 14:22:21 +01001538 if (verbose > 1)
Chris Wilson9e55cca2017-04-25 15:12:50 +01001539 printf("Using rr balancer\n");
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001540 balancer = &rr_balancer;
1541 flags |= BALANCE;
1542 break;
1543 case 1:
Chris Wilsond099f7d2017-05-09 14:22:21 +01001544 if (verbose > 1)
Chris Wilson9e55cca2017-04-25 15:12:50 +01001545 printf("Using qd balancer\n");
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001546 igt_assert(intel_gen(intel_get_drm_devid(fd)) >=
1547 8);
1548 balancer = &qd_balancer;
1549 flags |= SEQNO | BALANCE;
1550 break;
1551 case 2:
Chris Wilsond099f7d2017-05-09 14:22:21 +01001552 if (verbose > 1)
Chris Wilson9e55cca2017-04-25 15:12:50 +01001553 printf("Using rt balancer\n");
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001554 igt_assert(intel_gen(intel_get_drm_devid(fd)) >=
1555 8);
1556 balancer = &rt_balancer;
1557 flags |= SEQNO | BALANCE | RT;
1558 break;
Tvrtko Ursulin81116532017-05-08 18:37:03 +01001559 case 3:
Chris Wilsond099f7d2017-05-09 14:22:21 +01001560 if (verbose > 1)
Chris Wilson9e55cca2017-04-25 15:12:50 +01001561 printf("Using rtr balancer\n");
Tvrtko Ursulin81116532017-05-08 18:37:03 +01001562 igt_assert(intel_gen(intel_get_drm_devid(fd)) >=
1563 8);
1564 balancer = &rtr_balancer;
1565 flags |= SEQNO | BALANCE | RT;
1566 break;
Chris Wilson9e55cca2017-04-25 15:12:50 +01001567 case 4:
Chris Wilsond099f7d2017-05-09 14:22:21 +01001568 if (verbose > 1)
Chris Wilson9e55cca2017-04-25 15:12:50 +01001569 printf("Using rtavg balancer\n");
1570 igt_assert(intel_gen(intel_get_drm_devid(fd)) >=
1571 8);
1572 balancer = &rtavg_balancer;
1573 flags |= SEQNO | BALANCE | RT;
1574 break;
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001575 default:
Chris Wilsond099f7d2017-05-09 14:22:21 +01001576 if (verbose)
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001577 fprintf(stderr,
1578 "Unknown balancing mode '%s'!\n",
1579 optarg);
1580 return 1;
1581 }
1582 break;
1583 case 'h':
1584 print_help();
1585 return 0;
1586 default:
1587 return 1;
1588 }
1589 }
1590
1591 if (!nop_calibration) {
Chris Wilsond099f7d2017-05-09 14:22:21 +01001592 if (verbose > 1)
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001593 printf("Calibrating nop delay with %u%% tolerance...\n",
1594 tolerance_pct);
1595 nop_calibration = calibrate_nop(tolerance_pct);
Chris Wilsond099f7d2017-05-09 14:22:21 +01001596 if (verbose)
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001597 printf("Nop calibration for %uus delay is %lu.\n",
1598 nop_calibration_us, nop_calibration);
1599
1600 return 0;
1601 }
1602
1603 if (!nr_w_args) {
Chris Wilsond099f7d2017-05-09 14:22:21 +01001604 if (verbose)
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001605 fprintf(stderr, "No workload descriptor(s)!\n");
1606 return 1;
1607 }
1608
1609 if (nr_w_args > 1 && clients > 1) {
Chris Wilsond099f7d2017-05-09 14:22:21 +01001610 if (verbose)
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001611 fprintf(stderr,
1612 "Cloned clients cannot be combined with multiple workloads!\n");
1613 return 1;
1614 }
1615
1616 wrk = calloc(nr_w_args, sizeof(*wrk));
1617 igt_assert(wrk);
1618
1619 for (i = 0; i < nr_w_args; i++) {
1620 w_args[i] = load_workload_descriptor(w_args[i]);
1621 if (!w_args[i]) {
Chris Wilsond099f7d2017-05-09 14:22:21 +01001622 if (verbose)
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001623 fprintf(stderr,
1624 "Failed to load workload descriptor %u!\n",
1625 i);
1626 return 1;
1627 }
1628
Tvrtko Ursulinb56e1712017-05-08 13:56:32 +01001629 wrk[i] = parse_workload(w_args[i], flags);
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001630 if (!wrk[i]) {
Chris Wilsond099f7d2017-05-09 14:22:21 +01001631 if (verbose)
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001632 fprintf(stderr,
1633 "Failed to parse workload %u!\n", i);
1634 return 1;
1635 }
1636 }
1637
Chris Wilsond099f7d2017-05-09 14:22:21 +01001638 if (verbose > 1) {
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001639 printf("Using %lu nop calibration for %uus delay.\n",
1640 nop_calibration, nop_calibration_us);
1641 if (nr_w_args > 1)
1642 clients = nr_w_args;
1643 printf("%u client%s.\n", clients, clients > 1 ? "s" : "");
1644 if (flags & SWAPVCS)
1645 printf("Swapping VCS rings between clients.\n");
1646 }
1647
1648 if (master_workload >= 0 && clients == 1)
1649 master_workload = -1;
1650
1651 w = calloc(clients, sizeof(struct workload *));
1652 igt_assert(w);
1653
1654 for (i = 0; i < clients; i++) {
1655 unsigned int flags_ = flags;
1656
1657 w[i] = clone_workload(wrk[nr_w_args > 1 ? i : 0]);
1658
1659 if (master_workload >= 0) {
1660 int ret = pipe(w[i]->pipe);
1661
1662 igt_assert(ret == 0);
1663 }
1664
1665 if (flags & SWAPVCS && i & 1)
1666 flags_ &= ~SWAPVCS;
1667
Tvrtko Ursulin7736d7e2017-05-09 09:21:03 +01001668 prepare_workload(i, w[i], flags_);
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001669 }
1670
1671 clock_gettime(CLOCK_MONOTONIC, &t_start);
1672
1673 igt_fork(child, clients) {
1674 int pipe_fd = -1;
1675 bool background = false;
1676
1677 if (master_workload >= 0) {
1678 close(w[child]->pipe[0]);
1679 if (child != master_workload) {
1680 pipe_fd = w[child]->pipe[1];
1681 background = true;
1682 } else {
1683 close(w[child]->pipe[1]);
1684 }
1685 }
1686
1687 run_workload(child, w[child], background, pipe_fd, balancer,
1688 repeat, flags);
1689 }
1690
1691 if (master_workload >= 0) {
1692 int status = -1;
1693 pid_t pid;
1694
1695 for (i = 0; i < clients; i++)
1696 close(w[i]->pipe[1]);
1697
1698 pid = wait(&status);
1699 if (pid >= 0)
1700 igt_child_done(pid);
1701
1702 for (i = 0; i < clients; i++)
1703 close(w[i]->pipe[0]);
1704 }
1705
1706 igt_waitchildren();
1707
1708 clock_gettime(CLOCK_MONOTONIC, &t_end);
1709
1710 t = elapsed(&t_start, &t_end);
Chris Wilsond099f7d2017-05-09 14:22:21 +01001711 if (verbose)
Tvrtko Ursulin054eb1a2017-03-30 14:32:29 +01001712 printf("%.3fs elapsed (%.3f workloads/s)\n",
1713 t, clients * repeat / t);
1714
1715 for (i = 0; i < clients; i++)
1716 fini_workload(w[i]);
1717 free(w);
1718 for (i = 0; i < nr_w_args; i++)
1719 fini_workload(wrk[i]);
1720 free(w_args);
1721
1722 return 0;
1723}