Blame - lib/gpu_fill.c - platform/external/igt-gpu-tools

blob: f5fc61bbdb07c216bb55355db14454a99eebfa4d [file] [log] [blame]

Katarzyna Dec	081f771	2018-04-11 10:14:58 +0200	[diff] [blame]	1	/*
				2	* Copyright © 2018 Intel Corporation
				3	*
				4	* Permission is hereby granted, free of charge, to any person obtaining a
				5	* copy of this software and associated documentation files (the "Software"),
				6	* to deal in the Software without restriction, including without limitation
				7	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
				8	* and/or sell copies of the Software, and to permit persons to whom the
				9	* Software is furnished to do so, subject to the following conditions:
				10	*
				11	* The above copyright notice and this permission notice (including the next
				12	* paragraph) shall be included in all copies or substantial portions of the
				13	* Software.
				14	*
				15	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				16	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				17	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
				18	* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				19	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
				20	* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
				21	* IN THE SOFTWARE.
				22	*
				23	*/
				24
				25	#include "gpu_fill.h"
				26
Katarzyna Dec	081f771	2018-04-11 10:14:58 +0200	[diff] [blame]	27	void
				28	gen7_render_flush(struct intel_batchbuffer *batch, uint32_t batch_end)
				29	{
				30	int ret;
				31
				32	ret = drm_intel_bo_subdata(batch->bo, 0, 4096, batch->buffer);
				33	if (ret == 0)
				34	ret = drm_intel_bo_mrb_exec(batch->bo, batch_end,
				35	NULL, 0, 0, 0);
				36	igt_assert(ret == 0);
				37	}
				38
				39	uint32_t
				40	gen7_fill_curbe_buffer_data(struct intel_batchbuffer *batch,
				41	uint8_t color)
				42	{
				43	uint8_t *curbe_buffer;
				44	uint32_t offset;
				45
Kalamarz, Lukasz	e5e8daf	2018-04-24 10:32:12 +0200	[diff] [blame]	46	curbe_buffer = intel_batchbuffer_subdata_alloc(batch,
				47	sizeof(uint32_t) * 8,
				48	64);
				49	offset = intel_batchbuffer_subdata_offset(batch, curbe_buffer);
Katarzyna Dec	081f771	2018-04-11 10:14:58 +0200	[diff] [blame]	50	*curbe_buffer = color;
				51
				52	return offset;
				53	}
				54
				55	uint32_t
				56	gen7_fill_surface_state(struct intel_batchbuffer *batch,
				57	struct igt_buf *buf,
				58	uint32_t format,
				59	int is_dst)
				60	{
				61	struct gen7_surface_state *ss;
				62	uint32_t write_domain, read_domain, offset;
				63	int ret;
				64
				65	if (is_dst) {
				66	write_domain = read_domain = I915_GEM_DOMAIN_RENDER;
				67	} else {
				68	write_domain = 0;
				69	read_domain = I915_GEM_DOMAIN_SAMPLER;
				70	}
				71
Kalamarz, Lukasz	e5e8daf	2018-04-24 10:32:12 +0200	[diff] [blame]	72	ss = intel_batchbuffer_subdata_alloc(batch, sizeof(*ss), 64);
				73	offset = intel_batchbuffer_subdata_offset(batch, ss);
Katarzyna Dec	081f771	2018-04-11 10:14:58 +0200	[diff] [blame]	74
				75	ss->ss0.surface_type = GEN7_SURFACE_2D;
				76	ss->ss0.surface_format = format;
				77	ss->ss0.render_cache_read_write = 1;
				78
				79	if (buf->tiling == I915_TILING_X)
				80	ss->ss0.tiled_mode = 2;
				81	else if (buf->tiling == I915_TILING_Y)
				82	ss->ss0.tiled_mode = 3;
				83
				84	ss->ss1.base_addr = buf->bo->offset;
				85	ret = drm_intel_bo_emit_reloc(batch->bo,
Kalamarz, Lukasz	e5e8daf	2018-04-24 10:32:12 +0200	[diff] [blame]	86	intel_batchbuffer_subdata_offset(batch, ss) + 4,
Katarzyna Dec	081f771	2018-04-11 10:14:58 +0200	[diff] [blame]	87	buf->bo, 0,
				88	read_domain, write_domain);
				89	igt_assert(ret == 0);
				90
				91	ss->ss2.height = igt_buf_height(buf) - 1;
				92	ss->ss2.width = igt_buf_width(buf) - 1;
				93
				94	ss->ss3.pitch = buf->stride - 1;
				95
				96	ss->ss7.shader_chanel_select_r = 4;
				97	ss->ss7.shader_chanel_select_g = 5;
				98	ss->ss7.shader_chanel_select_b = 6;
				99	ss->ss7.shader_chanel_select_a = 7;
				100
				101	return offset;
				102	}
				103
				104	uint32_t
				105	gen7_fill_binding_table(struct intel_batchbuffer *batch,
				106	struct igt_buf *dst)
				107	{
				108	uint32_t *binding_table, offset;
				109
Kalamarz, Lukasz	e5e8daf	2018-04-24 10:32:12 +0200	[diff] [blame]	110	binding_table = intel_batchbuffer_subdata_alloc(batch, 32, 64);
				111	offset = intel_batchbuffer_subdata_offset(batch, binding_table);
Katarzyna Dec	8099614	2018-04-11 10:14:59 +0200	[diff] [blame]	112	if (IS_GEN7(batch->devid))
				113	binding_table[0] = gen7_fill_surface_state(batch, dst,
Katarzyna Dec	081f771	2018-04-11 10:14:58 +0200	[diff] [blame]	114	GEN7_SURFACEFORMAT_R8_UNORM, 1);
Katarzyna Dec	8099614	2018-04-11 10:14:59 +0200	[diff] [blame]	115	else
				116	binding_table[0] = gen8_fill_surface_state(batch, dst,
				117	GEN8_SURFACEFORMAT_R8_UNORM, 1);
Katarzyna Dec	081f771	2018-04-11 10:14:58 +0200	[diff] [blame]	118
				119	return offset;
				120	}
				121
				122	uint32_t
Katarzyna Dec	8099614	2018-04-11 10:14:59 +0200	[diff] [blame]	123	gen7_fill_kernel(struct intel_batchbuffer *batch,
Katarzyna Dec	081f771	2018-04-11 10:14:58 +0200	[diff] [blame]	124	const uint32_t kernel[][4],
				125	size_t size)
				126	{
				127	uint32_t offset;
				128
Kalamarz, Lukasz	e5e8daf	2018-04-24 10:32:12 +0200	[diff] [blame]	129	offset = intel_batchbuffer_copy_data(batch, kernel, size, 64);
Katarzyna Dec	081f771	2018-04-11 10:14:58 +0200	[diff] [blame]	130
				131	return offset;
				132	}
				133
				134	uint32_t
				135	gen7_fill_interface_descriptor(struct intel_batchbuffer batch, struct igt_buf dst,
				136	const uint32_t kernel[][4], size_t size)
				137	{
				138	struct gen7_interface_descriptor_data *idd;
				139	uint32_t offset;
				140	uint32_t binding_table_offset, kernel_offset;
				141
				142	binding_table_offset = gen7_fill_binding_table(batch, dst);
Katarzyna Dec	8099614	2018-04-11 10:14:59 +0200	[diff] [blame]	143	kernel_offset = gen7_fill_kernel(batch, kernel, size);
Katarzyna Dec	081f771	2018-04-11 10:14:58 +0200	[diff] [blame]	144
Kalamarz, Lukasz	e5e8daf	2018-04-24 10:32:12 +0200	[diff] [blame]	145	idd = intel_batchbuffer_subdata_alloc(batch, sizeof(*idd), 64);
				146	offset = intel_batchbuffer_subdata_offset(batch, idd);
Katarzyna Dec	081f771	2018-04-11 10:14:58 +0200	[diff] [blame]	147
				148	idd->desc0.kernel_start_pointer = (kernel_offset >> 6);
				149
				150	idd->desc1.single_program_flow = 1;
				151	idd->desc1.floating_point_mode = GEN7_FLOATING_POINT_IEEE_754;
				152
				153	idd->desc2.sampler_count = 0; /* 0 samplers used */
				154	idd->desc2.sampler_state_pointer = 0;
				155
				156	idd->desc3.binding_table_entry_count = 0;
				157	idd->desc3.binding_table_pointer = (binding_table_offset >> 5);
				158
				159	idd->desc4.constant_urb_entry_read_offset = 0;
				160	idd->desc4.constant_urb_entry_read_length = 1; /* grf 1 */
				161
				162	return offset;
				163	}
				164
				165	void
				166	gen7_emit_state_base_address(struct intel_batchbuffer *batch)
				167	{
				168	OUT_BATCH(GEN7_STATE_BASE_ADDRESS \| (10 - 2));
				169
				170	/* general */
				171	OUT_BATCH(0);
				172
				173	/* surface */
				174	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
				175
				176	/* dynamic */
				177	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
				178
				179	/* indirect */
				180	OUT_BATCH(0);
				181
				182	/* instruction */
				183	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
				184
				185	/* general/dynamic/indirect/instruction access Bound */
				186	OUT_BATCH(0);
				187	OUT_BATCH(0 \| BASE_ADDRESS_MODIFY);
				188	OUT_BATCH(0);
				189	OUT_BATCH(0 \| BASE_ADDRESS_MODIFY);
				190	}
				191
				192	void
				193	gen7_emit_vfe_state(struct intel_batchbuffer *batch)
				194	{
				195	OUT_BATCH(GEN7_MEDIA_VFE_STATE \| (8 - 2));
				196
				197	/* scratch buffer */
				198	OUT_BATCH(0);
				199
				200	/* number of threads & urb entries */
				201	OUT_BATCH(1 << 16 \|
				202	2 << 8);
				203
				204	OUT_BATCH(0);
				205
				206	/* urb entry size & curbe size */
				207	OUT_BATCH(2 << 16 \| /* in 256 bits unit */
				208	2); /* in 256 bits unit */
				209
				210	/* scoreboard */
				211	OUT_BATCH(0);
				212	OUT_BATCH(0);
				213	OUT_BATCH(0);
				214	}
				215
				216	void
				217	gen7_emit_vfe_state_gpgpu(struct intel_batchbuffer *batch)
				218	{
				219	OUT_BATCH(GEN7_MEDIA_VFE_STATE \| (8 - 2));
				220
				221	/* scratch buffer */
				222	OUT_BATCH(0);
				223
				224	/* number of threads & urb entries */
				225	OUT_BATCH(1 << 16 \| /* max num of threads */
				226	0 << 8 \| /* num of URB entry */
				227	1 << 2); /* GPGPU mode */
				228
				229	OUT_BATCH(0);
				230
				231	/* urb entry size & curbe size */
				232	OUT_BATCH(0 << 16 \| /* URB entry size in 256 bits unit */
				233	1); /* CURBE entry size in 256 bits unit */
				234
				235	/* scoreboard */
				236	OUT_BATCH(0);
				237	OUT_BATCH(0);
				238	OUT_BATCH(0);
				239	}
				240
				241	void
				242	gen7_emit_curbe_load(struct intel_batchbuffer *batch, uint32_t curbe_buffer)
				243	{
				244	OUT_BATCH(GEN7_MEDIA_CURBE_LOAD \| (4 - 2));
				245	OUT_BATCH(0);
				246	/* curbe total data length */
				247	OUT_BATCH(64);
				248	/* curbe data start address, is relative to the dynamics base address */
				249	OUT_BATCH(curbe_buffer);
				250	}
				251
				252	void
				253	gen7_emit_interface_descriptor_load(struct intel_batchbuffer *batch, uint32_t interface_descriptor)
				254	{
				255	OUT_BATCH(GEN7_MEDIA_INTERFACE_DESCRIPTOR_LOAD \| (4 - 2));
				256	OUT_BATCH(0);
				257	/* interface descriptor data length */
Katarzyna Dec	8099614	2018-04-11 10:14:59 +0200	[diff] [blame]	258	if (IS_GEN7(batch->devid))
				259	OUT_BATCH(sizeof(struct gen7_interface_descriptor_data));
				260	else
				261	OUT_BATCH(sizeof(struct gen8_interface_descriptor_data));
Katarzyna Dec	081f771	2018-04-11 10:14:58 +0200	[diff] [blame]	262	/* interface descriptor address, is relative to the dynamics base address */
				263	OUT_BATCH(interface_descriptor);
				264	}
				265
				266	void
				267	gen7_emit_media_objects(struct intel_batchbuffer *batch,
				268	unsigned x, unsigned y,
				269	unsigned width, unsigned height)
				270	{
				271	int i, j;
				272
				273	for (i = 0; i < width / 16; i++) {
				274	for (j = 0; j < height / 16; j++) {
				275	OUT_BATCH(GEN7_MEDIA_OBJECT \| (8 - 2));
				276
				277	/* interface descriptor offset */
				278	OUT_BATCH(0);
				279
				280	/* without indirect data */
				281	OUT_BATCH(0);
				282	OUT_BATCH(0);
				283
				284	/* scoreboard */
				285	OUT_BATCH(0);
				286	OUT_BATCH(0);
				287
				288	/* inline data (xoffset, yoffset) */
				289	OUT_BATCH(x + i * 16);
				290	OUT_BATCH(y + j * 16);
Katarzyna Dec	8099614	2018-04-11 10:14:59 +0200	[diff] [blame]	291	if (AT_LEAST_GEN(batch->devid, 8) && !IS_CHERRYVIEW(batch->devid))
				292	gen8_emit_media_state_flush(batch);
Katarzyna Dec	081f771	2018-04-11 10:14:58 +0200	[diff] [blame]	293	}
				294	}
				295	}
				296
				297	void
				298	gen7_emit_gpgpu_walk(struct intel_batchbuffer *batch,
				299	unsigned x, unsigned y,
				300	unsigned width, unsigned height)
				301	{
				302	uint32_t x_dim, y_dim, tmp, right_mask;
				303
				304	/*
				305	* Simply do SIMD16 based dispatch, so every thread uses
				306	* SIMD16 channels.
				307	*
				308	* Define our own thread group size, e.g 16x1 for every group, then
				309	* will have 1 thread each group in SIMD16 dispatch. So thread
				310	* width/height/depth are all 1.
				311	*
				312	* Then thread group X = width / 16 (aligned to 16)
				313	* thread group Y = height;
				314	*/
				315	x_dim = (width + 15) / 16;
				316	y_dim = height;
				317
				318	tmp = width & 15;
				319	if (tmp == 0)
				320	right_mask = (1 << 16) - 1;
				321	else
				322	right_mask = (1 << tmp) - 1;
				323
				324	OUT_BATCH(GEN7_GPGPU_WALKER \| 9);
				325
				326	/* interface descriptor offset */
				327	OUT_BATCH(0);
				328
				329	/* SIMD size, thread w/h/d */
				330	OUT_BATCH(1 << 30 \| /* SIMD16 */
				331	0 << 16 \| /* depth:1 */
				332	0 << 8 \| /* height:1 */
				333	0); /* width:1 */
				334
				335	/* thread group X */
				336	OUT_BATCH(0);
				337	OUT_BATCH(x_dim);
				338
				339	/* thread group Y */
				340	OUT_BATCH(0);
				341	OUT_BATCH(y_dim);
				342
				343	/* thread group Z */
				344	OUT_BATCH(0);
				345	OUT_BATCH(1);
				346
				347	/* right mask */
				348	OUT_BATCH(right_mask);
				349
				350	/* bottom mask, height 1, always 0xffffffff */
				351	OUT_BATCH(0xffffffff);
				352	}
				353
Katarzyna Dec	081f771	2018-04-11 10:14:58 +0200	[diff] [blame]	354	uint32_t
Katarzyna Dec	92e89da	2018-05-04 15:02:14 +0200	[diff] [blame^]	355	gen8_spin_curbe_buffer_data(struct intel_batchbuffer *batch,
				356	uint32_t iters)
				357	{
				358	uint32_t *curbe_buffer;
				359	uint32_t offset;
				360
				361	curbe_buffer = intel_batchbuffer_subdata_alloc(batch, 64, 64);
				362	offset = intel_batchbuffer_subdata_offset(batch, curbe_buffer);
				363	*curbe_buffer = iters;
				364
				365	return offset;
				366	}
				367
				368	uint32_t
Katarzyna Dec	081f771	2018-04-11 10:14:58 +0200	[diff] [blame]	369	gen8_fill_surface_state(struct intel_batchbuffer *batch,
				370	struct igt_buf *buf,
				371	uint32_t format,
				372	int is_dst)
				373	{
				374	struct gen8_surface_state *ss;
				375	uint32_t write_domain, read_domain, offset;
				376	int ret;
				377
				378	if (is_dst) {
				379	write_domain = read_domain = I915_GEM_DOMAIN_RENDER;
				380	} else {
				381	write_domain = 0;
				382	read_domain = I915_GEM_DOMAIN_SAMPLER;
				383	}
				384
Kalamarz, Lukasz	e5e8daf	2018-04-24 10:32:12 +0200	[diff] [blame]	385	ss = intel_batchbuffer_subdata_alloc(batch, sizeof(*ss), 64);
				386	offset = intel_batchbuffer_subdata_offset(batch, ss);
Katarzyna Dec	081f771	2018-04-11 10:14:58 +0200	[diff] [blame]	387
				388	ss->ss0.surface_type = GEN8_SURFACE_2D;
				389	ss->ss0.surface_format = format;
				390	ss->ss0.render_cache_read_write = 1;
				391	ss->ss0.vertical_alignment = 1; /* align 4 */
				392	ss->ss0.horizontal_alignment = 1; /* align 4 */
				393
				394	if (buf->tiling == I915_TILING_X)
				395	ss->ss0.tiled_mode = 2;
				396	else if (buf->tiling == I915_TILING_Y)
				397	ss->ss0.tiled_mode = 3;
				398
				399	ss->ss8.base_addr = buf->bo->offset;
				400
				401	ret = drm_intel_bo_emit_reloc(batch->bo,
Kalamarz, Lukasz	e5e8daf	2018-04-24 10:32:12 +0200	[diff] [blame]	402	intel_batchbuffer_subdata_offset(batch, ss) + 8 * 4,
Katarzyna Dec	081f771	2018-04-11 10:14:58 +0200	[diff] [blame]	403	buf->bo, 0,
				404	read_domain, write_domain);
				405	igt_assert(ret == 0);
				406
				407	ss->ss2.height = igt_buf_height(buf) - 1;
				408	ss->ss2.width = igt_buf_width(buf) - 1;
				409	ss->ss3.pitch = buf->stride - 1;
				410
				411	ss->ss7.shader_chanel_select_r = 4;
				412	ss->ss7.shader_chanel_select_g = 5;
				413	ss->ss7.shader_chanel_select_b = 6;
				414	ss->ss7.shader_chanel_select_a = 7;
				415
				416	return offset;
				417	}
				418
				419	uint32_t
Katarzyna Dec	081f771	2018-04-11 10:14:58 +0200	[diff] [blame]	420	gen8_fill_interface_descriptor(struct intel_batchbuffer batch, struct igt_buf dst, const uint32_t kernel[][4], size_t size)
				421	{
				422	struct gen8_interface_descriptor_data *idd;
				423	uint32_t offset;
				424	uint32_t binding_table_offset, kernel_offset;
				425
Katarzyna Dec	8099614	2018-04-11 10:14:59 +0200	[diff] [blame]	426	binding_table_offset = gen7_fill_binding_table(batch, dst);
				427	kernel_offset = gen7_fill_kernel(batch, kernel, size);
Katarzyna Dec	081f771	2018-04-11 10:14:58 +0200	[diff] [blame]	428
Kalamarz, Lukasz	e5e8daf	2018-04-24 10:32:12 +0200	[diff] [blame]	429	idd = intel_batchbuffer_subdata_alloc(batch, sizeof(*idd), 64);
				430	offset = intel_batchbuffer_subdata_offset(batch, idd);
Katarzyna Dec	081f771	2018-04-11 10:14:58 +0200	[diff] [blame]	431
				432	idd->desc0.kernel_start_pointer = (kernel_offset >> 6);
				433
				434	idd->desc2.single_program_flow = 1;
				435	idd->desc2.floating_point_mode = GEN8_FLOATING_POINT_IEEE_754;
				436
				437	idd->desc3.sampler_count = 0; /* 0 samplers used */
				438	idd->desc3.sampler_state_pointer = 0;
				439
				440	idd->desc4.binding_table_entry_count = 0;
				441	idd->desc4.binding_table_pointer = (binding_table_offset >> 5);
				442
				443	idd->desc5.constant_urb_entry_read_offset = 0;
				444	idd->desc5.constant_urb_entry_read_length = 1; /* grf 1 */
				445
Katarzyna Dec	098b05b	2018-04-11 10:15:00 +0200	[diff] [blame]	446	idd->desc6.num_threads_in_tg = 1;
				447
Katarzyna Dec	081f771	2018-04-11 10:14:58 +0200	[diff] [blame]	448	return offset;
				449	}
				450
				451	void
				452	gen8_emit_state_base_address(struct intel_batchbuffer *batch)
				453	{
				454	OUT_BATCH(GEN8_STATE_BASE_ADDRESS \| (16 - 2));
				455
				456	/* general */
				457	OUT_BATCH(0 \| BASE_ADDRESS_MODIFY);
				458	OUT_BATCH(0);
				459
				460	/* stateless data port */
				461	OUT_BATCH(0 \| BASE_ADDRESS_MODIFY);
				462
				463	/* surface */
				464	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_SAMPLER, 0, BASE_ADDRESS_MODIFY);
				465
				466	/* dynamic */
				467	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_RENDER \| I915_GEM_DOMAIN_INSTRUCTION,
				468	0, BASE_ADDRESS_MODIFY);
				469
				470	/* indirect */
				471	OUT_BATCH(0);
				472	OUT_BATCH(0);
				473
				474	/* instruction */
				475	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
				476
				477	/* general state buffer size */
				478	OUT_BATCH(0xfffff000 \| 1);
				479	/* dynamic state buffer size */
				480	OUT_BATCH(1 << 12 \| 1);
				481	/* indirect object buffer size */
				482	OUT_BATCH(0xfffff000 \| 1);
				483	/* intruction buffer size, must set modify enable bit, otherwise it may result in GPU hang */
				484	OUT_BATCH(1 << 12 \| 1);
				485	}
				486
				487	void
Katarzyna Dec	8099614	2018-04-11 10:14:59 +0200	[diff] [blame]	488	gen8_emit_media_state_flush(struct intel_batchbuffer *batch)
				489	{
				490	OUT_BATCH(GEN8_MEDIA_STATE_FLUSH \| (2 - 2));
				491	OUT_BATCH(0);
				492	}
				493
				494	void
Katarzyna Dec	081f771	2018-04-11 10:14:58 +0200	[diff] [blame]	495	gen8_emit_vfe_state(struct intel_batchbuffer *batch)
				496	{
Katarzyna Dec	8099614	2018-04-11 10:14:59 +0200	[diff] [blame]	497	OUT_BATCH(GEN7_MEDIA_VFE_STATE \| (9 - 2));
Katarzyna Dec	081f771	2018-04-11 10:14:58 +0200	[diff] [blame]	498
				499	/* scratch buffer */
				500	OUT_BATCH(0);
				501	OUT_BATCH(0);
				502
				503	/* number of threads & urb entries */
				504	OUT_BATCH(1 << 16 \|
				505	2 << 8);
				506
				507	OUT_BATCH(0);
				508
				509	/* urb entry size & curbe size */
				510	OUT_BATCH(2 << 16 \|
				511	2);
				512
				513	/* scoreboard */
				514	OUT_BATCH(0);
				515	OUT_BATCH(0);
				516	OUT_BATCH(0);
				517	}
				518
				519	void
				520	gen8_emit_vfe_state_gpgpu(struct intel_batchbuffer *batch)
				521	{
Katarzyna Dec	8099614	2018-04-11 10:14:59 +0200	[diff] [blame]	522	OUT_BATCH(GEN7_MEDIA_VFE_STATE \| (9 - 2));
Katarzyna Dec	081f771	2018-04-11 10:14:58 +0200	[diff] [blame]	523
				524	/* scratch buffer */
				525	OUT_BATCH(0);
				526	OUT_BATCH(0);
				527
				528	/* number of threads & urb entries */
				529	OUT_BATCH(1 << 16 \| 1 << 8);
				530
				531	OUT_BATCH(0);
				532
				533	/* urb entry size & curbe size */
				534	OUT_BATCH(0 << 16 \| 1);
				535
				536	/* scoreboard */
				537	OUT_BATCH(0);
				538	OUT_BATCH(0);
				539	OUT_BATCH(0);
				540	}
				541
				542	void
Katarzyna Dec	92e89da	2018-05-04 15:02:14 +0200	[diff] [blame^]	543	gen8_emit_vfe_state_spin(struct intel_batchbuffer *batch)
				544	{
				545	OUT_BATCH(GEN8_MEDIA_VFE_STATE \| (9 - 2));
				546
				547	/* scratch buffer */
				548	OUT_BATCH(0);
				549	OUT_BATCH(0);
				550
				551	/* number of threads & urb entries */
				552	OUT_BATCH(2 << 8);
				553
				554	OUT_BATCH(0);
				555
				556	/* urb entry size & curbe size */
				557	OUT_BATCH(2 << 16 \|
				558	2);
				559
				560	/* scoreboard */
				561	OUT_BATCH(0);
				562	OUT_BATCH(0);
				563	OUT_BATCH(0);
				564	}
				565
				566	void
Katarzyna Dec	081f771	2018-04-11 10:14:58 +0200	[diff] [blame]	567	gen8_emit_gpgpu_walk(struct intel_batchbuffer *batch,
				568	unsigned x, unsigned y,
				569	unsigned width, unsigned height)
				570	{
				571	uint32_t x_dim, y_dim, tmp, right_mask;
				572
				573	/*
				574	* Simply do SIMD16 based dispatch, so every thread uses
				575	* SIMD16 channels.
				576	*
				577	* Define our own thread group size, e.g 16x1 for every group, then
				578	* will have 1 thread each group in SIMD16 dispatch. So thread
				579	* width/height/depth are all 1.
				580	*
				581	* Then thread group X = width / 16 (aligned to 16)
				582	* thread group Y = height;
				583	*/
				584	x_dim = (width + 15) / 16;
				585	y_dim = height;
				586
				587	tmp = width & 15;
				588	if (tmp == 0)
				589	right_mask = (1 << 16) - 1;
				590	else
				591	right_mask = (1 << tmp) - 1;
				592
				593	OUT_BATCH(GEN7_GPGPU_WALKER \| 13);
				594
				595	OUT_BATCH(0); /* kernel offset */
				596	OUT_BATCH(0); /* indirect data length */
				597	OUT_BATCH(0); /* indirect data offset */
				598
				599	/* SIMD size, thread w/h/d */
				600	OUT_BATCH(1 << 30 \| /* SIMD16 */
				601	0 << 16 \| /* depth:1 */
				602	0 << 8 \| /* height:1 */
				603	0); /* width:1 */
				604
				605	/* thread group X */
				606	OUT_BATCH(0);
				607	OUT_BATCH(0);
				608	OUT_BATCH(x_dim);
				609
				610	/* thread group Y */
				611	OUT_BATCH(0);
				612	OUT_BATCH(0);
				613	OUT_BATCH(y_dim);
				614
				615	/* thread group Z */
				616	OUT_BATCH(0);
				617	OUT_BATCH(1);
				618
				619	/* right mask */
				620	OUT_BATCH(right_mask);
				621
				622	/* bottom mask, height 1, always 0xffffffff */
				623	OUT_BATCH(0xffffffff);
				624	}
				625
				626	void
Katarzyna Dec	92e89da	2018-05-04 15:02:14 +0200	[diff] [blame^]	627	gen8_emit_media_objects_spin(struct intel_batchbuffer *batch)
				628	{
				629	OUT_BATCH(GEN8_MEDIA_OBJECT \| (8 - 2));
				630
				631	/* interface descriptor offset */
				632	OUT_BATCH(0);
				633
				634	/* without indirect data */
				635	OUT_BATCH(0);
				636	OUT_BATCH(0);
				637
				638	/* scoreboard */
				639	OUT_BATCH(0);
				640	OUT_BATCH(0);
				641
				642	/* inline data (xoffset, yoffset) */
				643	OUT_BATCH(0);
				644	OUT_BATCH(0);
				645	gen8_emit_media_state_flush(batch);
				646	}
				647
				648	void
				649	gen8lp_emit_media_objects_spin(struct intel_batchbuffer *batch)
				650	{
				651	OUT_BATCH(GEN8_MEDIA_OBJECT \| (8 - 2));
				652
				653	/* interface descriptor offset */
				654	OUT_BATCH(0);
				655
				656	/* without indirect data */
				657	OUT_BATCH(0);
				658	OUT_BATCH(0);
				659
				660	/* scoreboard */
				661	OUT_BATCH(0);
				662	OUT_BATCH(0);
				663
				664	/* inline data (xoffset, yoffset) */
				665	OUT_BATCH(0);
				666	OUT_BATCH(0);
				667	}
				668
				669	void
Katarzyna Dec	081f771	2018-04-11 10:14:58 +0200	[diff] [blame]	670	gen9_emit_state_base_address(struct intel_batchbuffer *batch)
				671	{
				672	OUT_BATCH(GEN8_STATE_BASE_ADDRESS \| (19 - 2));
				673
				674	/* general */
				675	OUT_BATCH(0 \| BASE_ADDRESS_MODIFY);
				676	OUT_BATCH(0);
				677
				678	/* stateless data port */
				679	OUT_BATCH(0 \| BASE_ADDRESS_MODIFY);
				680
				681	/* surface */
				682	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_SAMPLER, 0, BASE_ADDRESS_MODIFY);
				683
				684	/* dynamic */
				685	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_RENDER \| I915_GEM_DOMAIN_INSTRUCTION,
				686	0, BASE_ADDRESS_MODIFY);
				687
				688	/* indirect */
				689	OUT_BATCH(0);
				690	OUT_BATCH(0);
				691
				692	/* instruction */
				693	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
				694
				695	/* general state buffer size */
				696	OUT_BATCH(0xfffff000 \| 1);
				697	/* dynamic state buffer size */
				698	OUT_BATCH(1 << 12 \| 1);
				699	/* indirect object buffer size */
				700	OUT_BATCH(0xfffff000 \| 1);
				701	/* intruction buffer size, must set modify enable bit, otherwise it may result in GPU hang */
				702	OUT_BATCH(1 << 12 \| 1);
				703
				704	/* Bindless surface state base address */
				705	OUT_BATCH(0 \| BASE_ADDRESS_MODIFY);
				706	OUT_BATCH(0);
				707	OUT_BATCH(0xfffff000);
				708	}