blob: f5fc61bbdb07c216bb55355db14454a99eebfa4d [file] [log] [blame]
Katarzyna Dec081f7712018-04-11 10:14:58 +02001/*
2 * Copyright © 2018 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 */
24
25#include "gpu_fill.h"
26
Katarzyna Dec081f7712018-04-11 10:14:58 +020027void
28gen7_render_flush(struct intel_batchbuffer *batch, uint32_t batch_end)
29{
30 int ret;
31
32 ret = drm_intel_bo_subdata(batch->bo, 0, 4096, batch->buffer);
33 if (ret == 0)
34 ret = drm_intel_bo_mrb_exec(batch->bo, batch_end,
35 NULL, 0, 0, 0);
36 igt_assert(ret == 0);
37}
38
39uint32_t
40gen7_fill_curbe_buffer_data(struct intel_batchbuffer *batch,
41 uint8_t color)
42{
43 uint8_t *curbe_buffer;
44 uint32_t offset;
45
Kalamarz, Lukasze5e8daf2018-04-24 10:32:12 +020046 curbe_buffer = intel_batchbuffer_subdata_alloc(batch,
47 sizeof(uint32_t) * 8,
48 64);
49 offset = intel_batchbuffer_subdata_offset(batch, curbe_buffer);
Katarzyna Dec081f7712018-04-11 10:14:58 +020050 *curbe_buffer = color;
51
52 return offset;
53}
54
55uint32_t
56gen7_fill_surface_state(struct intel_batchbuffer *batch,
57 struct igt_buf *buf,
58 uint32_t format,
59 int is_dst)
60{
61 struct gen7_surface_state *ss;
62 uint32_t write_domain, read_domain, offset;
63 int ret;
64
65 if (is_dst) {
66 write_domain = read_domain = I915_GEM_DOMAIN_RENDER;
67 } else {
68 write_domain = 0;
69 read_domain = I915_GEM_DOMAIN_SAMPLER;
70 }
71
Kalamarz, Lukasze5e8daf2018-04-24 10:32:12 +020072 ss = intel_batchbuffer_subdata_alloc(batch, sizeof(*ss), 64);
73 offset = intel_batchbuffer_subdata_offset(batch, ss);
Katarzyna Dec081f7712018-04-11 10:14:58 +020074
75 ss->ss0.surface_type = GEN7_SURFACE_2D;
76 ss->ss0.surface_format = format;
77 ss->ss0.render_cache_read_write = 1;
78
79 if (buf->tiling == I915_TILING_X)
80 ss->ss0.tiled_mode = 2;
81 else if (buf->tiling == I915_TILING_Y)
82 ss->ss0.tiled_mode = 3;
83
84 ss->ss1.base_addr = buf->bo->offset;
85 ret = drm_intel_bo_emit_reloc(batch->bo,
Kalamarz, Lukasze5e8daf2018-04-24 10:32:12 +020086 intel_batchbuffer_subdata_offset(batch, ss) + 4,
Katarzyna Dec081f7712018-04-11 10:14:58 +020087 buf->bo, 0,
88 read_domain, write_domain);
89 igt_assert(ret == 0);
90
91 ss->ss2.height = igt_buf_height(buf) - 1;
92 ss->ss2.width = igt_buf_width(buf) - 1;
93
94 ss->ss3.pitch = buf->stride - 1;
95
96 ss->ss7.shader_chanel_select_r = 4;
97 ss->ss7.shader_chanel_select_g = 5;
98 ss->ss7.shader_chanel_select_b = 6;
99 ss->ss7.shader_chanel_select_a = 7;
100
101 return offset;
102}
103
104uint32_t
105gen7_fill_binding_table(struct intel_batchbuffer *batch,
106 struct igt_buf *dst)
107{
108 uint32_t *binding_table, offset;
109
Kalamarz, Lukasze5e8daf2018-04-24 10:32:12 +0200110 binding_table = intel_batchbuffer_subdata_alloc(batch, 32, 64);
111 offset = intel_batchbuffer_subdata_offset(batch, binding_table);
Katarzyna Dec80996142018-04-11 10:14:59 +0200112 if (IS_GEN7(batch->devid))
113 binding_table[0] = gen7_fill_surface_state(batch, dst,
Katarzyna Dec081f7712018-04-11 10:14:58 +0200114 GEN7_SURFACEFORMAT_R8_UNORM, 1);
Katarzyna Dec80996142018-04-11 10:14:59 +0200115 else
116 binding_table[0] = gen8_fill_surface_state(batch, dst,
117 GEN8_SURFACEFORMAT_R8_UNORM, 1);
Katarzyna Dec081f7712018-04-11 10:14:58 +0200118
119 return offset;
120}
121
122uint32_t
Katarzyna Dec80996142018-04-11 10:14:59 +0200123gen7_fill_kernel(struct intel_batchbuffer *batch,
Katarzyna Dec081f7712018-04-11 10:14:58 +0200124 const uint32_t kernel[][4],
125 size_t size)
126{
127 uint32_t offset;
128
Kalamarz, Lukasze5e8daf2018-04-24 10:32:12 +0200129 offset = intel_batchbuffer_copy_data(batch, kernel, size, 64);
Katarzyna Dec081f7712018-04-11 10:14:58 +0200130
131 return offset;
132}
133
134uint32_t
135gen7_fill_interface_descriptor(struct intel_batchbuffer *batch, struct igt_buf *dst,
136 const uint32_t kernel[][4], size_t size)
137{
138 struct gen7_interface_descriptor_data *idd;
139 uint32_t offset;
140 uint32_t binding_table_offset, kernel_offset;
141
142 binding_table_offset = gen7_fill_binding_table(batch, dst);
Katarzyna Dec80996142018-04-11 10:14:59 +0200143 kernel_offset = gen7_fill_kernel(batch, kernel, size);
Katarzyna Dec081f7712018-04-11 10:14:58 +0200144
Kalamarz, Lukasze5e8daf2018-04-24 10:32:12 +0200145 idd = intel_batchbuffer_subdata_alloc(batch, sizeof(*idd), 64);
146 offset = intel_batchbuffer_subdata_offset(batch, idd);
Katarzyna Dec081f7712018-04-11 10:14:58 +0200147
148 idd->desc0.kernel_start_pointer = (kernel_offset >> 6);
149
150 idd->desc1.single_program_flow = 1;
151 idd->desc1.floating_point_mode = GEN7_FLOATING_POINT_IEEE_754;
152
153 idd->desc2.sampler_count = 0; /* 0 samplers used */
154 idd->desc2.sampler_state_pointer = 0;
155
156 idd->desc3.binding_table_entry_count = 0;
157 idd->desc3.binding_table_pointer = (binding_table_offset >> 5);
158
159 idd->desc4.constant_urb_entry_read_offset = 0;
160 idd->desc4.constant_urb_entry_read_length = 1; /* grf 1 */
161
162 return offset;
163}
164
165void
166gen7_emit_state_base_address(struct intel_batchbuffer *batch)
167{
168 OUT_BATCH(GEN7_STATE_BASE_ADDRESS | (10 - 2));
169
170 /* general */
171 OUT_BATCH(0);
172
173 /* surface */
174 OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
175
176 /* dynamic */
177 OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
178
179 /* indirect */
180 OUT_BATCH(0);
181
182 /* instruction */
183 OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
184
185 /* general/dynamic/indirect/instruction access Bound */
186 OUT_BATCH(0);
187 OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
188 OUT_BATCH(0);
189 OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
190}
191
192void
193gen7_emit_vfe_state(struct intel_batchbuffer *batch)
194{
195 OUT_BATCH(GEN7_MEDIA_VFE_STATE | (8 - 2));
196
197 /* scratch buffer */
198 OUT_BATCH(0);
199
200 /* number of threads & urb entries */
201 OUT_BATCH(1 << 16 |
202 2 << 8);
203
204 OUT_BATCH(0);
205
206 /* urb entry size & curbe size */
207 OUT_BATCH(2 << 16 | /* in 256 bits unit */
208 2); /* in 256 bits unit */
209
210 /* scoreboard */
211 OUT_BATCH(0);
212 OUT_BATCH(0);
213 OUT_BATCH(0);
214}
215
216void
217gen7_emit_vfe_state_gpgpu(struct intel_batchbuffer *batch)
218{
219 OUT_BATCH(GEN7_MEDIA_VFE_STATE | (8 - 2));
220
221 /* scratch buffer */
222 OUT_BATCH(0);
223
224 /* number of threads & urb entries */
225 OUT_BATCH(1 << 16 | /* max num of threads */
226 0 << 8 | /* num of URB entry */
227 1 << 2); /* GPGPU mode */
228
229 OUT_BATCH(0);
230
231 /* urb entry size & curbe size */
232 OUT_BATCH(0 << 16 | /* URB entry size in 256 bits unit */
233 1); /* CURBE entry size in 256 bits unit */
234
235 /* scoreboard */
236 OUT_BATCH(0);
237 OUT_BATCH(0);
238 OUT_BATCH(0);
239}
240
241void
242gen7_emit_curbe_load(struct intel_batchbuffer *batch, uint32_t curbe_buffer)
243{
244 OUT_BATCH(GEN7_MEDIA_CURBE_LOAD | (4 - 2));
245 OUT_BATCH(0);
246 /* curbe total data length */
247 OUT_BATCH(64);
248 /* curbe data start address, is relative to the dynamics base address */
249 OUT_BATCH(curbe_buffer);
250}
251
252void
253gen7_emit_interface_descriptor_load(struct intel_batchbuffer *batch, uint32_t interface_descriptor)
254{
255 OUT_BATCH(GEN7_MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2));
256 OUT_BATCH(0);
257 /* interface descriptor data length */
Katarzyna Dec80996142018-04-11 10:14:59 +0200258 if (IS_GEN7(batch->devid))
259 OUT_BATCH(sizeof(struct gen7_interface_descriptor_data));
260 else
261 OUT_BATCH(sizeof(struct gen8_interface_descriptor_data));
Katarzyna Dec081f7712018-04-11 10:14:58 +0200262 /* interface descriptor address, is relative to the dynamics base address */
263 OUT_BATCH(interface_descriptor);
264}
265
266void
267gen7_emit_media_objects(struct intel_batchbuffer *batch,
268 unsigned x, unsigned y,
269 unsigned width, unsigned height)
270{
271 int i, j;
272
273 for (i = 0; i < width / 16; i++) {
274 for (j = 0; j < height / 16; j++) {
275 OUT_BATCH(GEN7_MEDIA_OBJECT | (8 - 2));
276
277 /* interface descriptor offset */
278 OUT_BATCH(0);
279
280 /* without indirect data */
281 OUT_BATCH(0);
282 OUT_BATCH(0);
283
284 /* scoreboard */
285 OUT_BATCH(0);
286 OUT_BATCH(0);
287
288 /* inline data (xoffset, yoffset) */
289 OUT_BATCH(x + i * 16);
290 OUT_BATCH(y + j * 16);
Katarzyna Dec80996142018-04-11 10:14:59 +0200291 if (AT_LEAST_GEN(batch->devid, 8) && !IS_CHERRYVIEW(batch->devid))
292 gen8_emit_media_state_flush(batch);
Katarzyna Dec081f7712018-04-11 10:14:58 +0200293 }
294 }
295}
296
297void
298gen7_emit_gpgpu_walk(struct intel_batchbuffer *batch,
299 unsigned x, unsigned y,
300 unsigned width, unsigned height)
301{
302 uint32_t x_dim, y_dim, tmp, right_mask;
303
304 /*
305 * Simply do SIMD16 based dispatch, so every thread uses
306 * SIMD16 channels.
307 *
308 * Define our own thread group size, e.g 16x1 for every group, then
309 * will have 1 thread each group in SIMD16 dispatch. So thread
310 * width/height/depth are all 1.
311 *
312 * Then thread group X = width / 16 (aligned to 16)
313 * thread group Y = height;
314 */
315 x_dim = (width + 15) / 16;
316 y_dim = height;
317
318 tmp = width & 15;
319 if (tmp == 0)
320 right_mask = (1 << 16) - 1;
321 else
322 right_mask = (1 << tmp) - 1;
323
324 OUT_BATCH(GEN7_GPGPU_WALKER | 9);
325
326 /* interface descriptor offset */
327 OUT_BATCH(0);
328
329 /* SIMD size, thread w/h/d */
330 OUT_BATCH(1 << 30 | /* SIMD16 */
331 0 << 16 | /* depth:1 */
332 0 << 8 | /* height:1 */
333 0); /* width:1 */
334
335 /* thread group X */
336 OUT_BATCH(0);
337 OUT_BATCH(x_dim);
338
339 /* thread group Y */
340 OUT_BATCH(0);
341 OUT_BATCH(y_dim);
342
343 /* thread group Z */
344 OUT_BATCH(0);
345 OUT_BATCH(1);
346
347 /* right mask */
348 OUT_BATCH(right_mask);
349
350 /* bottom mask, height 1, always 0xffffffff */
351 OUT_BATCH(0xffffffff);
352}
353
Katarzyna Dec081f7712018-04-11 10:14:58 +0200354uint32_t
Katarzyna Dec92e89da2018-05-04 15:02:14 +0200355gen8_spin_curbe_buffer_data(struct intel_batchbuffer *batch,
356 uint32_t iters)
357{
358 uint32_t *curbe_buffer;
359 uint32_t offset;
360
361 curbe_buffer = intel_batchbuffer_subdata_alloc(batch, 64, 64);
362 offset = intel_batchbuffer_subdata_offset(batch, curbe_buffer);
363 *curbe_buffer = iters;
364
365 return offset;
366}
367
368uint32_t
Katarzyna Dec081f7712018-04-11 10:14:58 +0200369gen8_fill_surface_state(struct intel_batchbuffer *batch,
370 struct igt_buf *buf,
371 uint32_t format,
372 int is_dst)
373{
374 struct gen8_surface_state *ss;
375 uint32_t write_domain, read_domain, offset;
376 int ret;
377
378 if (is_dst) {
379 write_domain = read_domain = I915_GEM_DOMAIN_RENDER;
380 } else {
381 write_domain = 0;
382 read_domain = I915_GEM_DOMAIN_SAMPLER;
383 }
384
Kalamarz, Lukasze5e8daf2018-04-24 10:32:12 +0200385 ss = intel_batchbuffer_subdata_alloc(batch, sizeof(*ss), 64);
386 offset = intel_batchbuffer_subdata_offset(batch, ss);
Katarzyna Dec081f7712018-04-11 10:14:58 +0200387
388 ss->ss0.surface_type = GEN8_SURFACE_2D;
389 ss->ss0.surface_format = format;
390 ss->ss0.render_cache_read_write = 1;
391 ss->ss0.vertical_alignment = 1; /* align 4 */
392 ss->ss0.horizontal_alignment = 1; /* align 4 */
393
394 if (buf->tiling == I915_TILING_X)
395 ss->ss0.tiled_mode = 2;
396 else if (buf->tiling == I915_TILING_Y)
397 ss->ss0.tiled_mode = 3;
398
399 ss->ss8.base_addr = buf->bo->offset;
400
401 ret = drm_intel_bo_emit_reloc(batch->bo,
Kalamarz, Lukasze5e8daf2018-04-24 10:32:12 +0200402 intel_batchbuffer_subdata_offset(batch, ss) + 8 * 4,
Katarzyna Dec081f7712018-04-11 10:14:58 +0200403 buf->bo, 0,
404 read_domain, write_domain);
405 igt_assert(ret == 0);
406
407 ss->ss2.height = igt_buf_height(buf) - 1;
408 ss->ss2.width = igt_buf_width(buf) - 1;
409 ss->ss3.pitch = buf->stride - 1;
410
411 ss->ss7.shader_chanel_select_r = 4;
412 ss->ss7.shader_chanel_select_g = 5;
413 ss->ss7.shader_chanel_select_b = 6;
414 ss->ss7.shader_chanel_select_a = 7;
415
416 return offset;
417}
418
419uint32_t
Katarzyna Dec081f7712018-04-11 10:14:58 +0200420gen8_fill_interface_descriptor(struct intel_batchbuffer *batch, struct igt_buf *dst, const uint32_t kernel[][4], size_t size)
421{
422 struct gen8_interface_descriptor_data *idd;
423 uint32_t offset;
424 uint32_t binding_table_offset, kernel_offset;
425
Katarzyna Dec80996142018-04-11 10:14:59 +0200426 binding_table_offset = gen7_fill_binding_table(batch, dst);
427 kernel_offset = gen7_fill_kernel(batch, kernel, size);
Katarzyna Dec081f7712018-04-11 10:14:58 +0200428
Kalamarz, Lukasze5e8daf2018-04-24 10:32:12 +0200429 idd = intel_batchbuffer_subdata_alloc(batch, sizeof(*idd), 64);
430 offset = intel_batchbuffer_subdata_offset(batch, idd);
Katarzyna Dec081f7712018-04-11 10:14:58 +0200431
432 idd->desc0.kernel_start_pointer = (kernel_offset >> 6);
433
434 idd->desc2.single_program_flow = 1;
435 idd->desc2.floating_point_mode = GEN8_FLOATING_POINT_IEEE_754;
436
437 idd->desc3.sampler_count = 0; /* 0 samplers used */
438 idd->desc3.sampler_state_pointer = 0;
439
440 idd->desc4.binding_table_entry_count = 0;
441 idd->desc4.binding_table_pointer = (binding_table_offset >> 5);
442
443 idd->desc5.constant_urb_entry_read_offset = 0;
444 idd->desc5.constant_urb_entry_read_length = 1; /* grf 1 */
445
Katarzyna Dec098b05b2018-04-11 10:15:00 +0200446 idd->desc6.num_threads_in_tg = 1;
447
Katarzyna Dec081f7712018-04-11 10:14:58 +0200448 return offset;
449}
450
451void
452gen8_emit_state_base_address(struct intel_batchbuffer *batch)
453{
454 OUT_BATCH(GEN8_STATE_BASE_ADDRESS | (16 - 2));
455
456 /* general */
457 OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
458 OUT_BATCH(0);
459
460 /* stateless data port */
461 OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
462
463 /* surface */
464 OUT_RELOC(batch->bo, I915_GEM_DOMAIN_SAMPLER, 0, BASE_ADDRESS_MODIFY);
465
466 /* dynamic */
467 OUT_RELOC(batch->bo, I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_INSTRUCTION,
468 0, BASE_ADDRESS_MODIFY);
469
470 /* indirect */
471 OUT_BATCH(0);
472 OUT_BATCH(0);
473
474 /* instruction */
475 OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
476
477 /* general state buffer size */
478 OUT_BATCH(0xfffff000 | 1);
479 /* dynamic state buffer size */
480 OUT_BATCH(1 << 12 | 1);
481 /* indirect object buffer size */
482 OUT_BATCH(0xfffff000 | 1);
483 /* intruction buffer size, must set modify enable bit, otherwise it may result in GPU hang */
484 OUT_BATCH(1 << 12 | 1);
485}
486
487void
Katarzyna Dec80996142018-04-11 10:14:59 +0200488gen8_emit_media_state_flush(struct intel_batchbuffer *batch)
489{
490 OUT_BATCH(GEN8_MEDIA_STATE_FLUSH | (2 - 2));
491 OUT_BATCH(0);
492}
493
494void
Katarzyna Dec081f7712018-04-11 10:14:58 +0200495gen8_emit_vfe_state(struct intel_batchbuffer *batch)
496{
Katarzyna Dec80996142018-04-11 10:14:59 +0200497 OUT_BATCH(GEN7_MEDIA_VFE_STATE | (9 - 2));
Katarzyna Dec081f7712018-04-11 10:14:58 +0200498
499 /* scratch buffer */
500 OUT_BATCH(0);
501 OUT_BATCH(0);
502
503 /* number of threads & urb entries */
504 OUT_BATCH(1 << 16 |
505 2 << 8);
506
507 OUT_BATCH(0);
508
509 /* urb entry size & curbe size */
510 OUT_BATCH(2 << 16 |
511 2);
512
513 /* scoreboard */
514 OUT_BATCH(0);
515 OUT_BATCH(0);
516 OUT_BATCH(0);
517}
518
519void
520gen8_emit_vfe_state_gpgpu(struct intel_batchbuffer *batch)
521{
Katarzyna Dec80996142018-04-11 10:14:59 +0200522 OUT_BATCH(GEN7_MEDIA_VFE_STATE | (9 - 2));
Katarzyna Dec081f7712018-04-11 10:14:58 +0200523
524 /* scratch buffer */
525 OUT_BATCH(0);
526 OUT_BATCH(0);
527
528 /* number of threads & urb entries */
529 OUT_BATCH(1 << 16 | 1 << 8);
530
531 OUT_BATCH(0);
532
533 /* urb entry size & curbe size */
534 OUT_BATCH(0 << 16 | 1);
535
536 /* scoreboard */
537 OUT_BATCH(0);
538 OUT_BATCH(0);
539 OUT_BATCH(0);
540}
541
542void
Katarzyna Dec92e89da2018-05-04 15:02:14 +0200543gen8_emit_vfe_state_spin(struct intel_batchbuffer *batch)
544{
545 OUT_BATCH(GEN8_MEDIA_VFE_STATE | (9 - 2));
546
547 /* scratch buffer */
548 OUT_BATCH(0);
549 OUT_BATCH(0);
550
551 /* number of threads & urb entries */
552 OUT_BATCH(2 << 8);
553
554 OUT_BATCH(0);
555
556 /* urb entry size & curbe size */
557 OUT_BATCH(2 << 16 |
558 2);
559
560 /* scoreboard */
561 OUT_BATCH(0);
562 OUT_BATCH(0);
563 OUT_BATCH(0);
564}
565
566void
Katarzyna Dec081f7712018-04-11 10:14:58 +0200567gen8_emit_gpgpu_walk(struct intel_batchbuffer *batch,
568 unsigned x, unsigned y,
569 unsigned width, unsigned height)
570{
571 uint32_t x_dim, y_dim, tmp, right_mask;
572
573 /*
574 * Simply do SIMD16 based dispatch, so every thread uses
575 * SIMD16 channels.
576 *
577 * Define our own thread group size, e.g 16x1 for every group, then
578 * will have 1 thread each group in SIMD16 dispatch. So thread
579 * width/height/depth are all 1.
580 *
581 * Then thread group X = width / 16 (aligned to 16)
582 * thread group Y = height;
583 */
584 x_dim = (width + 15) / 16;
585 y_dim = height;
586
587 tmp = width & 15;
588 if (tmp == 0)
589 right_mask = (1 << 16) - 1;
590 else
591 right_mask = (1 << tmp) - 1;
592
593 OUT_BATCH(GEN7_GPGPU_WALKER | 13);
594
595 OUT_BATCH(0); /* kernel offset */
596 OUT_BATCH(0); /* indirect data length */
597 OUT_BATCH(0); /* indirect data offset */
598
599 /* SIMD size, thread w/h/d */
600 OUT_BATCH(1 << 30 | /* SIMD16 */
601 0 << 16 | /* depth:1 */
602 0 << 8 | /* height:1 */
603 0); /* width:1 */
604
605 /* thread group X */
606 OUT_BATCH(0);
607 OUT_BATCH(0);
608 OUT_BATCH(x_dim);
609
610 /* thread group Y */
611 OUT_BATCH(0);
612 OUT_BATCH(0);
613 OUT_BATCH(y_dim);
614
615 /* thread group Z */
616 OUT_BATCH(0);
617 OUT_BATCH(1);
618
619 /* right mask */
620 OUT_BATCH(right_mask);
621
622 /* bottom mask, height 1, always 0xffffffff */
623 OUT_BATCH(0xffffffff);
624}
625
626void
Katarzyna Dec92e89da2018-05-04 15:02:14 +0200627gen8_emit_media_objects_spin(struct intel_batchbuffer *batch)
628{
629 OUT_BATCH(GEN8_MEDIA_OBJECT | (8 - 2));
630
631 /* interface descriptor offset */
632 OUT_BATCH(0);
633
634 /* without indirect data */
635 OUT_BATCH(0);
636 OUT_BATCH(0);
637
638 /* scoreboard */
639 OUT_BATCH(0);
640 OUT_BATCH(0);
641
642 /* inline data (xoffset, yoffset) */
643 OUT_BATCH(0);
644 OUT_BATCH(0);
645 gen8_emit_media_state_flush(batch);
646}
647
648void
649gen8lp_emit_media_objects_spin(struct intel_batchbuffer *batch)
650{
651 OUT_BATCH(GEN8_MEDIA_OBJECT | (8 - 2));
652
653 /* interface descriptor offset */
654 OUT_BATCH(0);
655
656 /* without indirect data */
657 OUT_BATCH(0);
658 OUT_BATCH(0);
659
660 /* scoreboard */
661 OUT_BATCH(0);
662 OUT_BATCH(0);
663
664 /* inline data (xoffset, yoffset) */
665 OUT_BATCH(0);
666 OUT_BATCH(0);
667}
668
669void
Katarzyna Dec081f7712018-04-11 10:14:58 +0200670gen9_emit_state_base_address(struct intel_batchbuffer *batch)
671{
672 OUT_BATCH(GEN8_STATE_BASE_ADDRESS | (19 - 2));
673
674 /* general */
675 OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
676 OUT_BATCH(0);
677
678 /* stateless data port */
679 OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
680
681 /* surface */
682 OUT_RELOC(batch->bo, I915_GEM_DOMAIN_SAMPLER, 0, BASE_ADDRESS_MODIFY);
683
684 /* dynamic */
685 OUT_RELOC(batch->bo, I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_INSTRUCTION,
686 0, BASE_ADDRESS_MODIFY);
687
688 /* indirect */
689 OUT_BATCH(0);
690 OUT_BATCH(0);
691
692 /* instruction */
693 OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
694
695 /* general state buffer size */
696 OUT_BATCH(0xfffff000 | 1);
697 /* dynamic state buffer size */
698 OUT_BATCH(1 << 12 | 1);
699 /* indirect object buffer size */
700 OUT_BATCH(0xfffff000 | 1);
701 /* intruction buffer size, must set modify enable bit, otherwise it may result in GPU hang */
702 OUT_BATCH(1 << 12 | 1);
703
704 /* Bindless surface state base address */
705 OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
706 OUT_BATCH(0);
707 OUT_BATCH(0xfffff000);
708}