blob: 119eb349cd075cdae096b9f4f30cc64d6d1a703a [file] [log] [blame]
Damien Lespiau042e9352013-01-19 23:27:46 +00001/*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32#include <string.h>
33
34#include "brw_context.h"
35#include "brw_defines.h"
36#include "brw_eu.h"
37
38#include "ralloc.h"
39
40/***********************************************************************
41 * Internal helper for constructing instructions
42 */
43
44static void guess_execution_size(struct brw_compile *p,
45 struct brw_instruction *insn,
46 struct brw_reg reg)
47{
48 if (reg.width == BRW_WIDTH_8 && p->compressed)
49 insn->header.execution_size = BRW_EXECUTE_16;
50 else
51 insn->header.execution_size = reg.width; /* note - definitions are compatible */
52}
53
54
55/**
56 * Prior to Sandybridge, the SEND instruction accepted non-MRF source
57 * registers, implicitly moving the operand to a message register.
58 *
59 * On Sandybridge, this is no longer the case. This function performs the
60 * explicit move; it should be called before emitting a SEND instruction.
61 */
62void
63gen6_resolve_implied_move(struct brw_compile *p,
64 struct brw_reg *src,
65 GLuint msg_reg_nr)
66{
67 struct intel_context *intel = &p->brw->intel;
68 if (intel->gen < 6)
69 return;
70
71 if (src->file == BRW_MESSAGE_REGISTER_FILE)
72 return;
73
74 if (src->file != BRW_ARCHITECTURE_REGISTER_FILE || src->nr != BRW_ARF_NULL) {
75 brw_push_insn_state(p);
76 brw_set_mask_control(p, BRW_MASK_DISABLE);
77 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
78 brw_MOV(p, retype(brw_message_reg(msg_reg_nr), BRW_REGISTER_TYPE_UD),
79 retype(*src, BRW_REGISTER_TYPE_UD));
80 brw_pop_insn_state(p);
81 }
82 *src = brw_message_reg(msg_reg_nr);
83}
84
85static void
86gen7_convert_mrf_to_grf(struct brw_compile *p, struct brw_reg *reg)
87{
88 /* From the BSpec / ISA Reference / send - [DevIVB+]:
89 * "The send with EOT should use register space R112-R127 for <src>. This is
90 * to enable loading of a new thread into the same slot while the message
91 * with EOT for current thread is pending dispatch."
92 *
93 * Since we're pretending to have 16 MRFs anyway, we may as well use the
94 * registers required for messages with EOT.
95 */
96 struct intel_context *intel = &p->brw->intel;
97 if (intel->gen == 7 && reg->file == BRW_MESSAGE_REGISTER_FILE) {
98 reg->file = BRW_GENERAL_REGISTER_FILE;
99 reg->nr += GEN7_MRF_HACK_START;
100 }
101}
102
103
104void
105brw_set_dest(struct brw_compile *p, struct brw_instruction *insn,
106 struct brw_reg dest)
107{
108 if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE &&
109 dest.file != BRW_MESSAGE_REGISTER_FILE)
110 assert(dest.nr < 128);
111
112 gen7_convert_mrf_to_grf(p, &dest);
113
114 insn->bits1.da1.dest_reg_file = dest.file;
115 insn->bits1.da1.dest_reg_type = dest.type;
116 insn->bits1.da1.dest_address_mode = dest.address_mode;
117
118 if (dest.address_mode == BRW_ADDRESS_DIRECT) {
119 insn->bits1.da1.dest_reg_nr = dest.nr;
120
121 if (insn->header.access_mode == BRW_ALIGN_1) {
122 insn->bits1.da1.dest_subreg_nr = dest.subnr;
123 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
124 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
125 insn->bits1.da1.dest_horiz_stride = dest.hstride;
126 }
127 else {
128 insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
129 insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
130 /* even ignored in da16, still need to set as '01' */
131 insn->bits1.da16.dest_horiz_stride = 1;
132 }
133 }
134 else {
135 insn->bits1.ia1.dest_subreg_nr = dest.subnr;
136
137 /* These are different sizes in align1 vs align16:
138 */
139 if (insn->header.access_mode == BRW_ALIGN_1) {
140 insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
141 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
142 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
143 insn->bits1.ia1.dest_horiz_stride = dest.hstride;
144 }
145 else {
146 insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
147 /* even ignored in da16, still need to set as '01' */
148 insn->bits1.ia16.dest_horiz_stride = 1;
149 }
150 }
151
152 /* NEW: Set the execution size based on dest.width and
153 * insn->compression_control:
154 */
155 guess_execution_size(p, insn, dest);
156}
157
158extern int reg_type_size[];
159
160static void
161validate_reg(struct brw_instruction *insn, struct brw_reg reg)
162{
163 int hstride_for_reg[] = {0, 1, 2, 4};
164 int vstride_for_reg[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256};
165 int width_for_reg[] = {1, 2, 4, 8, 16};
Damien Lespiau6e83eb62013-01-25 15:12:12 +0000166 int execsize_for_reg[] = {1, 2, 4, 8, 16, 32};
Damien Lespiau042e9352013-01-19 23:27:46 +0000167 int width, hstride, vstride, execsize;
168
169 if (reg.file == BRW_IMMEDIATE_VALUE) {
170 /* 3.3.6: Region Parameters. Restriction: Immediate vectors
171 * mean the destination has to be 128-bit aligned and the
172 * destination horiz stride has to be a word.
173 */
174 if (reg.type == BRW_REGISTER_TYPE_V) {
175 assert(hstride_for_reg[insn->bits1.da1.dest_horiz_stride] *
176 reg_type_size[insn->bits1.da1.dest_reg_type] == 2);
177 }
178
179 return;
180 }
181
182 if (reg.file == BRW_ARCHITECTURE_REGISTER_FILE &&
183 reg.file == BRW_ARF_NULL)
184 return;
185
186 assert(reg.hstride >= 0 && reg.hstride < Elements(hstride_for_reg));
187 hstride = hstride_for_reg[reg.hstride];
188
189 if (reg.vstride == 0xf) {
190 vstride = -1;
191 } else {
192 assert(reg.vstride >= 0 && reg.vstride < Elements(vstride_for_reg));
193 vstride = vstride_for_reg[reg.vstride];
194 }
195
196 assert(reg.width >= 0 && reg.width < Elements(width_for_reg));
197 width = width_for_reg[reg.width];
198
199 assert(insn->header.execution_size >= 0 &&
200 insn->header.execution_size < Elements(execsize_for_reg));
201 execsize = execsize_for_reg[insn->header.execution_size];
202
203 /* Restrictions from 3.3.10: Register Region Restrictions. */
204 /* 3. */
205 assert(execsize >= width);
206
207 /* 4. */
208 if (execsize == width && hstride != 0) {
209 assert(vstride == -1 || vstride == width * hstride);
210 }
211
212 /* 5. */
213 if (execsize == width && hstride == 0) {
214 /* no restriction on vstride. */
215 }
216
217 /* 6. */
218 if (width == 1) {
219 assert(hstride == 0);
220 }
221
222 /* 7. */
223 if (execsize == 1 && width == 1) {
224 assert(hstride == 0);
225 assert(vstride == 0);
226 }
227
228 /* 8. */
229 if (vstride == 0 && hstride == 0) {
230 assert(width == 1);
231 }
232
233 /* 10. Check destination issues. */
234}
235
236void
237brw_set_src0(struct brw_compile *p, struct brw_instruction *insn,
238 struct brw_reg reg)
239{
240 struct brw_context *brw = p->brw;
241 struct intel_context *intel = &brw->intel;
242
Damien Lespiau103edcc2013-01-25 15:13:30 +0000243 if (reg.file != BRW_ARCHITECTURE_REGISTER_FILE)
Damien Lespiau042e9352013-01-19 23:27:46 +0000244 assert(reg.nr < 128);
245
246 gen7_convert_mrf_to_grf(p, &reg);
247
248 if (intel->gen >= 6 && (insn->header.opcode == BRW_OPCODE_SEND ||
249 insn->header.opcode == BRW_OPCODE_SENDC)) {
250 /* Any source modifiers or regions will be ignored, since this just
251 * identifies the MRF/GRF to start reading the message contents from.
252 * Check for some likely failures.
253 */
254 assert(!reg.negate);
255 assert(!reg.abs);
256 assert(reg.address_mode == BRW_ADDRESS_DIRECT);
257 }
258
259 validate_reg(insn, reg);
260
261 insn->bits1.da1.src0_reg_file = reg.file;
262 insn->bits1.da1.src0_reg_type = reg.type;
263 insn->bits2.da1.src0_abs = reg.abs;
264 insn->bits2.da1.src0_negate = reg.negate;
265 insn->bits2.da1.src0_address_mode = reg.address_mode;
266
267 if (reg.file == BRW_IMMEDIATE_VALUE) {
268 insn->bits3.ud = reg.dw1.ud;
269
270 /* Required to set some fields in src1 as well:
271 */
272 insn->bits1.da1.src1_reg_file = 0; /* arf */
273 insn->bits1.da1.src1_reg_type = reg.type;
274 }
275 else
276 {
277 if (reg.address_mode == BRW_ADDRESS_DIRECT) {
278 if (insn->header.access_mode == BRW_ALIGN_1) {
279 insn->bits2.da1.src0_subreg_nr = reg.subnr;
280 insn->bits2.da1.src0_reg_nr = reg.nr;
281 }
282 else {
283 insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
284 insn->bits2.da16.src0_reg_nr = reg.nr;
285 }
286 }
287 else {
288 insn->bits2.ia1.src0_subreg_nr = reg.subnr;
289
290 if (insn->header.access_mode == BRW_ALIGN_1) {
291 insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset;
292 }
293 else {
294 insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset;
295 }
296 }
297
298 if (insn->header.access_mode == BRW_ALIGN_1) {
299 if (reg.width == BRW_WIDTH_1 &&
300 insn->header.execution_size == BRW_EXECUTE_1) {
301 insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
302 insn->bits2.da1.src0_width = BRW_WIDTH_1;
303 insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0;
304 }
305 else {
306 insn->bits2.da1.src0_horiz_stride = reg.hstride;
307 insn->bits2.da1.src0_width = reg.width;
308 insn->bits2.da1.src0_vert_stride = reg.vstride;
309 }
310 }
311 else {
312 insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
313 insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
314 insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
315 insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
316
317 /* This is an oddity of the fact we're using the same
318 * descriptions for registers in align_16 as align_1:
319 */
320 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
321 insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4;
322 else
323 insn->bits2.da16.src0_vert_stride = reg.vstride;
324 }
325 }
326}
327
328
329void brw_set_src1(struct brw_compile *p,
330 struct brw_instruction *insn,
331 struct brw_reg reg)
332{
333 assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
334
Damien Lespiau103edcc2013-01-25 15:13:30 +0000335 if (reg.file != BRW_ARCHITECTURE_REGISTER_FILE)
Damien Lespiau042e9352013-01-19 23:27:46 +0000336 assert(reg.nr < 128);
337
338 gen7_convert_mrf_to_grf(p, &reg);
339
340 validate_reg(insn, reg);
341
342 insn->bits1.da1.src1_reg_file = reg.file;
343 insn->bits1.da1.src1_reg_type = reg.type;
344 insn->bits3.da1.src1_abs = reg.abs;
345 insn->bits3.da1.src1_negate = reg.negate;
346
347 /* Only src1 can be immediate in two-argument instructions.
348 */
349 assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE);
350
351 if (reg.file == BRW_IMMEDIATE_VALUE) {
352 insn->bits3.ud = reg.dw1.ud;
353 }
354 else {
355 /* This is a hardware restriction, which may or may not be lifted
356 * in the future:
357 */
358 assert (reg.address_mode == BRW_ADDRESS_DIRECT);
359 /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
360
361 if (insn->header.access_mode == BRW_ALIGN_1) {
362 insn->bits3.da1.src1_subreg_nr = reg.subnr;
363 insn->bits3.da1.src1_reg_nr = reg.nr;
364 }
365 else {
366 insn->bits3.da16.src1_subreg_nr = reg.subnr / 16;
367 insn->bits3.da16.src1_reg_nr = reg.nr;
368 }
369
370 if (insn->header.access_mode == BRW_ALIGN_1) {
371 if (reg.width == BRW_WIDTH_1 &&
372 insn->header.execution_size == BRW_EXECUTE_1) {
373 insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
374 insn->bits3.da1.src1_width = BRW_WIDTH_1;
375 insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0;
376 }
377 else {
378 insn->bits3.da1.src1_horiz_stride = reg.hstride;
379 insn->bits3.da1.src1_width = reg.width;
380 insn->bits3.da1.src1_vert_stride = reg.vstride;
381 }
382 }
383 else {
384 insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
385 insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
386 insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
387 insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
388
389 /* This is an oddity of the fact we're using the same
390 * descriptions for registers in align_16 as align_1:
391 */
392 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
393 insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4;
394 else
395 insn->bits3.da16.src1_vert_stride = reg.vstride;
396 }
397 }
398}
399
400/**
401 * Set the Message Descriptor and Extended Message Descriptor fields
402 * for SEND messages.
403 *
404 * \note This zeroes out the Function Control bits, so it must be called
405 * \b before filling out any message-specific data. Callers can
406 * choose not to fill in irrelevant bits; they will be zero.
407 */
408static void
409brw_set_message_descriptor(struct brw_compile *p,
410 struct brw_instruction *inst,
411 enum brw_message_target sfid,
412 unsigned msg_length,
413 unsigned response_length,
414 bool header_present,
415 bool end_of_thread)
416{
417 struct intel_context *intel = &p->brw->intel;
418
419 brw_set_src1(p, inst, brw_imm_d(0));
420
421 if (intel->gen >= 5) {
422 inst->bits3.generic_gen5.header_present = header_present;
423 inst->bits3.generic_gen5.response_length = response_length;
424 inst->bits3.generic_gen5.msg_length = msg_length;
425 inst->bits3.generic_gen5.end_of_thread = end_of_thread;
426
427 if (intel->gen >= 6) {
428 /* On Gen6+ Message target/SFID goes in bits 27:24 of the header */
429 inst->header.destreg__conditionalmod = sfid;
430 } else {
431 /* Set Extended Message Descriptor (ex_desc) */
432 inst->bits2.send_gen5.sfid = sfid;
433 inst->bits2.send_gen5.end_of_thread = end_of_thread;
434 }
435 } else {
436 inst->bits3.generic.response_length = response_length;
437 inst->bits3.generic.msg_length = msg_length;
438 inst->bits3.generic.msg_target = sfid;
439 inst->bits3.generic.end_of_thread = end_of_thread;
440 }
441}
442
443static void brw_set_math_message( struct brw_compile *p,
444 struct brw_instruction *insn,
445 GLuint function,
446 GLuint integer_type,
447 bool low_precision,
448 GLuint dataType )
449{
450 struct brw_context *brw = p->brw;
451 struct intel_context *intel = &brw->intel;
452 unsigned msg_length;
453 unsigned response_length;
454
455 /* Infer message length from the function */
456 switch (function) {
457 case BRW_MATH_FUNCTION_POW:
458 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT:
459 case BRW_MATH_FUNCTION_INT_DIV_REMAINDER:
460 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
461 msg_length = 2;
462 break;
463 default:
464 msg_length = 1;
465 break;
466 }
467
468 /* Infer response length from the function */
469 switch (function) {
470 case BRW_MATH_FUNCTION_SINCOS:
471 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
472 response_length = 2;
473 break;
474 default:
475 response_length = 1;
476 break;
477 }
478
479
480 brw_set_message_descriptor(p, insn, BRW_SFID_MATH,
481 msg_length, response_length, false, false);
482 if (intel->gen == 5) {
483 insn->bits3.math_gen5.function = function;
484 insn->bits3.math_gen5.int_type = integer_type;
485 insn->bits3.math_gen5.precision = low_precision;
486 insn->bits3.math_gen5.saturate = insn->header.saturate;
487 insn->bits3.math_gen5.data_type = dataType;
488 insn->bits3.math_gen5.snapshot = 0;
489 } else {
490 insn->bits3.math.function = function;
491 insn->bits3.math.int_type = integer_type;
492 insn->bits3.math.precision = low_precision;
493 insn->bits3.math.saturate = insn->header.saturate;
494 insn->bits3.math.data_type = dataType;
495 }
496 insn->header.saturate = 0;
497}
498
499
500static void brw_set_ff_sync_message(struct brw_compile *p,
501 struct brw_instruction *insn,
502 bool allocate,
503 GLuint response_length,
504 bool end_of_thread)
505{
506 brw_set_message_descriptor(p, insn, BRW_SFID_URB,
507 1, response_length, true, end_of_thread);
508 insn->bits3.urb_gen5.opcode = 1; /* FF_SYNC */
509 insn->bits3.urb_gen5.offset = 0; /* Not used by FF_SYNC */
510 insn->bits3.urb_gen5.swizzle_control = 0; /* Not used by FF_SYNC */
511 insn->bits3.urb_gen5.allocate = allocate;
512 insn->bits3.urb_gen5.used = 0; /* Not used by FF_SYNC */
513 insn->bits3.urb_gen5.complete = 0; /* Not used by FF_SYNC */
514}
515
516static void brw_set_urb_message( struct brw_compile *p,
517 struct brw_instruction *insn,
518 bool allocate,
519 bool used,
520 GLuint msg_length,
521 GLuint response_length,
522 bool end_of_thread,
523 bool complete,
524 GLuint offset,
525 GLuint swizzle_control )
526{
527 struct brw_context *brw = p->brw;
528 struct intel_context *intel = &brw->intel;
529
530 brw_set_message_descriptor(p, insn, BRW_SFID_URB,
531 msg_length, response_length, true, end_of_thread);
532 if (intel->gen == 7) {
533 insn->bits3.urb_gen7.opcode = 0; /* URB_WRITE_HWORD */
534 insn->bits3.urb_gen7.offset = offset;
535 assert(swizzle_control != BRW_URB_SWIZZLE_TRANSPOSE);
536 insn->bits3.urb_gen7.swizzle_control = swizzle_control;
537 /* per_slot_offset = 0 makes it ignore offsets in message header */
538 insn->bits3.urb_gen7.per_slot_offset = 0;
539 insn->bits3.urb_gen7.complete = complete;
540 } else if (intel->gen >= 5) {
541 insn->bits3.urb_gen5.opcode = 0; /* URB_WRITE */
542 insn->bits3.urb_gen5.offset = offset;
543 insn->bits3.urb_gen5.swizzle_control = swizzle_control;
544 insn->bits3.urb_gen5.allocate = allocate;
545 insn->bits3.urb_gen5.used = used; /* ? */
546 insn->bits3.urb_gen5.complete = complete;
547 } else {
548 insn->bits3.urb.opcode = 0; /* ? */
549 insn->bits3.urb.offset = offset;
550 insn->bits3.urb.swizzle_control = swizzle_control;
551 insn->bits3.urb.allocate = allocate;
552 insn->bits3.urb.used = used; /* ? */
553 insn->bits3.urb.complete = complete;
554 }
555}
556
557void
558brw_set_dp_write_message(struct brw_compile *p,
559 struct brw_instruction *insn,
560 GLuint binding_table_index,
561 GLuint msg_control,
562 GLuint msg_type,
563 GLuint msg_length,
564 bool header_present,
565 GLuint last_render_target,
566 GLuint response_length,
567 GLuint end_of_thread,
568 GLuint send_commit_msg)
569{
570 struct brw_context *brw = p->brw;
571 struct intel_context *intel = &brw->intel;
572 unsigned sfid;
573
574 if (intel->gen >= 7) {
575 /* Use the Render Cache for RT writes; otherwise use the Data Cache */
576 if (msg_type == GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE)
577 sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
578 else
579 sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
580 } else if (intel->gen == 6) {
581 /* Use the render cache for all write messages. */
582 sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
583 } else {
584 sfid = BRW_SFID_DATAPORT_WRITE;
585 }
586
587 brw_set_message_descriptor(p, insn, sfid, msg_length, response_length,
588 header_present, end_of_thread);
589
590 if (intel->gen >= 7) {
591 insn->bits3.gen7_dp.binding_table_index = binding_table_index;
592 insn->bits3.gen7_dp.msg_control = msg_control |
593 last_render_target << 6;
594 insn->bits3.gen7_dp.msg_type = msg_type;
595 } else if (intel->gen == 6) {
596 insn->bits3.gen6_dp.binding_table_index = binding_table_index;
597 insn->bits3.gen6_dp.msg_control = msg_control |
598 last_render_target << 5;
599 insn->bits3.gen6_dp.msg_type = msg_type;
600 insn->bits3.gen6_dp.send_commit_msg = send_commit_msg;
601 } else if (intel->gen == 5) {
602 insn->bits3.dp_write_gen5.binding_table_index = binding_table_index;
603 insn->bits3.dp_write_gen5.msg_control = msg_control;
604 insn->bits3.dp_write_gen5.last_render_target = last_render_target;
605 insn->bits3.dp_write_gen5.msg_type = msg_type;
606 insn->bits3.dp_write_gen5.send_commit_msg = send_commit_msg;
607 } else {
608 insn->bits3.dp_write.binding_table_index = binding_table_index;
609 insn->bits3.dp_write.msg_control = msg_control;
610 insn->bits3.dp_write.last_render_target = last_render_target;
611 insn->bits3.dp_write.msg_type = msg_type;
612 insn->bits3.dp_write.send_commit_msg = send_commit_msg;
613 }
614}
615
616void
617brw_set_dp_read_message(struct brw_compile *p,
618 struct brw_instruction *insn,
619 GLuint binding_table_index,
620 GLuint msg_control,
621 GLuint msg_type,
622 GLuint target_cache,
623 GLuint msg_length,
624 bool header_present,
625 GLuint response_length)
626{
627 struct brw_context *brw = p->brw;
628 struct intel_context *intel = &brw->intel;
629 unsigned sfid;
630
631 if (intel->gen >= 7) {
632 sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
633 } else if (intel->gen == 6) {
634 if (target_cache == BRW_DATAPORT_READ_TARGET_RENDER_CACHE)
635 sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
636 else
637 sfid = GEN6_SFID_DATAPORT_SAMPLER_CACHE;
638 } else {
639 sfid = BRW_SFID_DATAPORT_READ;
640 }
641
642 brw_set_message_descriptor(p, insn, sfid, msg_length, response_length,
643 header_present, false);
644
645 if (intel->gen >= 7) {
646 insn->bits3.gen7_dp.binding_table_index = binding_table_index;
647 insn->bits3.gen7_dp.msg_control = msg_control;
648 insn->bits3.gen7_dp.msg_type = msg_type;
649 } else if (intel->gen == 6) {
650 insn->bits3.gen6_dp.binding_table_index = binding_table_index;
651 insn->bits3.gen6_dp.msg_control = msg_control;
652 insn->bits3.gen6_dp.msg_type = msg_type;
653 insn->bits3.gen6_dp.send_commit_msg = 0;
654 } else if (intel->gen == 5) {
655 insn->bits3.dp_read_gen5.binding_table_index = binding_table_index;
656 insn->bits3.dp_read_gen5.msg_control = msg_control;
657 insn->bits3.dp_read_gen5.msg_type = msg_type;
658 insn->bits3.dp_read_gen5.target_cache = target_cache;
659 } else if (intel->is_g4x) {
660 insn->bits3.dp_read_g4x.binding_table_index = binding_table_index; /*0:7*/
661 insn->bits3.dp_read_g4x.msg_control = msg_control; /*8:10*/
662 insn->bits3.dp_read_g4x.msg_type = msg_type; /*11:13*/
663 insn->bits3.dp_read_g4x.target_cache = target_cache; /*14:15*/
664 } else {
665 insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/
666 insn->bits3.dp_read.msg_control = msg_control; /*8:11*/
667 insn->bits3.dp_read.msg_type = msg_type; /*12:13*/
668 insn->bits3.dp_read.target_cache = target_cache; /*14:15*/
669 }
670}
671
672void
673brw_set_sampler_message(struct brw_compile *p,
674 struct brw_instruction *insn,
675 GLuint binding_table_index,
676 GLuint sampler,
677 GLuint msg_type,
678 GLuint response_length,
679 GLuint msg_length,
680 GLuint header_present,
681 GLuint simd_mode,
682 GLuint return_format)
683{
684 struct brw_context *brw = p->brw;
685 struct intel_context *intel = &brw->intel;
686
687 brw_set_message_descriptor(p, insn, BRW_SFID_SAMPLER, msg_length,
688 response_length, header_present, false);
689
690 if (intel->gen >= 7) {
691 insn->bits3.sampler_gen7.binding_table_index = binding_table_index;
692 insn->bits3.sampler_gen7.sampler = sampler;
693 insn->bits3.sampler_gen7.msg_type = msg_type;
694 insn->bits3.sampler_gen7.simd_mode = simd_mode;
695 } else if (intel->gen >= 5) {
696 insn->bits3.sampler_gen5.binding_table_index = binding_table_index;
697 insn->bits3.sampler_gen5.sampler = sampler;
698 insn->bits3.sampler_gen5.msg_type = msg_type;
699 insn->bits3.sampler_gen5.simd_mode = simd_mode;
700 } else if (intel->is_g4x) {
701 insn->bits3.sampler_g4x.binding_table_index = binding_table_index;
702 insn->bits3.sampler_g4x.sampler = sampler;
703 insn->bits3.sampler_g4x.msg_type = msg_type;
704 } else {
705 insn->bits3.sampler.binding_table_index = binding_table_index;
706 insn->bits3.sampler.sampler = sampler;
707 insn->bits3.sampler.msg_type = msg_type;
708 insn->bits3.sampler.return_format = return_format;
709 }
710}
711
712
713#define next_insn brw_next_insn
714struct brw_instruction *
715brw_next_insn(struct brw_compile *p, GLuint opcode)
716{
717 struct brw_instruction *insn;
718
719 if (p->nr_insn + 1 > p->store_size) {
720 if (0)
721 printf("incresing the store size to %d\n", p->store_size << 1);
722 p->store_size <<= 1;
723 p->store = reralloc(p->mem_ctx, p->store,
724 struct brw_instruction, p->store_size);
725 if (!p->store)
726 assert(!"realloc eu store memeory failed");
727 }
728
729 p->next_insn_offset += 16;
730 insn = &p->store[p->nr_insn++];
731 memcpy(insn, p->current, sizeof(*insn));
732
733 /* Reset this one-shot flag:
734 */
735
736 if (p->current->header.destreg__conditionalmod) {
737 p->current->header.destreg__conditionalmod = 0;
738 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
739 }
740
741 insn->header.opcode = opcode;
742 return insn;
743}
744
745static struct brw_instruction *brw_alu1( struct brw_compile *p,
746 GLuint opcode,
747 struct brw_reg dest,
748 struct brw_reg src )
749{
750 struct brw_instruction *insn = next_insn(p, opcode);
751 brw_set_dest(p, insn, dest);
752 brw_set_src0(p, insn, src);
753 return insn;
754}
755
756static struct brw_instruction *brw_alu2(struct brw_compile *p,
757 GLuint opcode,
758 struct brw_reg dest,
759 struct brw_reg src0,
760 struct brw_reg src1 )
761{
762 struct brw_instruction *insn = next_insn(p, opcode);
763 brw_set_dest(p, insn, dest);
764 brw_set_src0(p, insn, src0);
765 brw_set_src1(p, insn, src1);
766 return insn;
767}
768
769static int
770get_3src_subreg_nr(struct brw_reg reg)
771{
772 if (reg.vstride == BRW_VERTICAL_STRIDE_0) {
773 assert(brw_is_single_value_swizzle(reg.dw1.bits.swizzle));
774 return reg.subnr / 4 + BRW_GET_SWZ(reg.dw1.bits.swizzle, 0);
775 } else {
776 return reg.subnr / 4;
777 }
778}
779
780static struct brw_instruction *brw_alu3(struct brw_compile *p,
781 GLuint opcode,
782 struct brw_reg dest,
783 struct brw_reg src0,
784 struct brw_reg src1,
785 struct brw_reg src2)
786{
787 struct brw_instruction *insn = next_insn(p, opcode);
788
789 gen7_convert_mrf_to_grf(p, &dest);
790
791 assert(insn->header.access_mode == BRW_ALIGN_16);
792
793 assert(dest.file == BRW_GENERAL_REGISTER_FILE ||
794 dest.file == BRW_MESSAGE_REGISTER_FILE);
795 assert(dest.nr < 128);
796 assert(dest.address_mode == BRW_ADDRESS_DIRECT);
797 assert(dest.type == BRW_REGISTER_TYPE_F);
798 insn->bits1.da3src.dest_reg_file = (dest.file == BRW_MESSAGE_REGISTER_FILE);
799 insn->bits1.da3src.dest_reg_nr = dest.nr;
800 insn->bits1.da3src.dest_subreg_nr = dest.subnr / 16;
801 insn->bits1.da3src.dest_writemask = dest.dw1.bits.writemask;
802 guess_execution_size(p, insn, dest);
803
804 assert(src0.file == BRW_GENERAL_REGISTER_FILE);
805 assert(src0.address_mode == BRW_ADDRESS_DIRECT);
806 assert(src0.nr < 128);
807 assert(src0.type == BRW_REGISTER_TYPE_F);
808 insn->bits2.da3src.src0_swizzle = src0.dw1.bits.swizzle;
809 insn->bits2.da3src.src0_subreg_nr = get_3src_subreg_nr(src0);
810 insn->bits2.da3src.src0_reg_nr = src0.nr;
811 insn->bits1.da3src.src0_abs = src0.abs;
812 insn->bits1.da3src.src0_negate = src0.negate;
813 insn->bits2.da3src.src0_rep_ctrl = src0.vstride == BRW_VERTICAL_STRIDE_0;
814
815 assert(src1.file == BRW_GENERAL_REGISTER_FILE);
816 assert(src1.address_mode == BRW_ADDRESS_DIRECT);
817 assert(src1.nr < 128);
818 assert(src1.type == BRW_REGISTER_TYPE_F);
819 insn->bits2.da3src.src1_swizzle = src1.dw1.bits.swizzle;
820 insn->bits2.da3src.src1_subreg_nr_low = get_3src_subreg_nr(src1) & 0x3;
821 insn->bits3.da3src.src1_subreg_nr_high = get_3src_subreg_nr(src1) >> 2;
822 insn->bits2.da3src.src1_rep_ctrl = src1.vstride == BRW_VERTICAL_STRIDE_0;
823 insn->bits3.da3src.src1_reg_nr = src1.nr;
824 insn->bits1.da3src.src1_abs = src1.abs;
825 insn->bits1.da3src.src1_negate = src1.negate;
826
827 assert(src2.file == BRW_GENERAL_REGISTER_FILE);
828 assert(src2.address_mode == BRW_ADDRESS_DIRECT);
829 assert(src2.nr < 128);
830 assert(src2.type == BRW_REGISTER_TYPE_F);
831 insn->bits3.da3src.src2_swizzle = src2.dw1.bits.swizzle;
832 insn->bits3.da3src.src2_subreg_nr = get_3src_subreg_nr(src2);
833 insn->bits3.da3src.src2_rep_ctrl = src2.vstride == BRW_VERTICAL_STRIDE_0;
834 insn->bits3.da3src.src2_reg_nr = src2.nr;
835 insn->bits1.da3src.src2_abs = src2.abs;
836 insn->bits1.da3src.src2_negate = src2.negate;
837
838 return insn;
839}
840
841
842/***********************************************************************
843 * Convenience routines.
844 */
845#define ALU1(OP) \
846struct brw_instruction *brw_##OP(struct brw_compile *p, \
847 struct brw_reg dest, \
848 struct brw_reg src0) \
849{ \
850 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
851}
852
853#define ALU2(OP) \
854struct brw_instruction *brw_##OP(struct brw_compile *p, \
855 struct brw_reg dest, \
856 struct brw_reg src0, \
857 struct brw_reg src1) \
858{ \
859 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
860}
861
862#define ALU3(OP) \
863struct brw_instruction *brw_##OP(struct brw_compile *p, \
864 struct brw_reg dest, \
865 struct brw_reg src0, \
866 struct brw_reg src1, \
867 struct brw_reg src2) \
868{ \
869 return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \
870}
871
872/* Rounding operations (other than RNDD) require two instructions - the first
873 * stores a rounded value (possibly the wrong way) in the dest register, but
874 * also sets a per-channel "increment bit" in the flag register. A predicated
875 * add of 1.0 fixes dest to contain the desired result.
876 *
877 * Sandybridge and later appear to round correctly without an ADD.
878 */
879#define ROUND(OP) \
880void brw_##OP(struct brw_compile *p, \
881 struct brw_reg dest, \
882 struct brw_reg src) \
883{ \
884 struct brw_instruction *rnd, *add; \
885 rnd = next_insn(p, BRW_OPCODE_##OP); \
886 brw_set_dest(p, rnd, dest); \
887 brw_set_src0(p, rnd, src); \
888 \
889 if (p->brw->intel.gen < 6) { \
890 /* turn on round-increments */ \
891 rnd->header.destreg__conditionalmod = BRW_CONDITIONAL_R; \
892 add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \
893 add->header.predicate_control = BRW_PREDICATE_NORMAL; \
894 } \
895}
896
897
898ALU1(MOV)
899ALU2(SEL)
900ALU1(NOT)
901ALU2(AND)
902ALU2(OR)
903ALU2(XOR)
904ALU2(SHR)
905ALU2(SHL)
906ALU2(RSR)
907ALU2(RSL)
908ALU2(ASR)
909ALU1(FRC)
910ALU1(RNDD)
911ALU2(MAC)
912ALU2(MACH)
913ALU1(LZD)
914ALU2(DP4)
915ALU2(DPH)
916ALU2(DP3)
917ALU2(DP2)
918ALU2(LINE)
919ALU2(PLN)
920ALU3(MAD)
921
922ROUND(RNDZ)
923ROUND(RNDE)
924
925
926struct brw_instruction *brw_ADD(struct brw_compile *p,
927 struct brw_reg dest,
928 struct brw_reg src0,
929 struct brw_reg src1)
930{
931 /* 6.2.2: add */
932 if (src0.type == BRW_REGISTER_TYPE_F ||
933 (src0.file == BRW_IMMEDIATE_VALUE &&
934 src0.type == BRW_REGISTER_TYPE_VF)) {
935 assert(src1.type != BRW_REGISTER_TYPE_UD);
936 assert(src1.type != BRW_REGISTER_TYPE_D);
937 }
938
939 if (src1.type == BRW_REGISTER_TYPE_F ||
940 (src1.file == BRW_IMMEDIATE_VALUE &&
941 src1.type == BRW_REGISTER_TYPE_VF)) {
942 assert(src0.type != BRW_REGISTER_TYPE_UD);
943 assert(src0.type != BRW_REGISTER_TYPE_D);
944 }
945
946 return brw_alu2(p, BRW_OPCODE_ADD, dest, src0, src1);
947}
948
949struct brw_instruction *brw_AVG(struct brw_compile *p,
950 struct brw_reg dest,
951 struct brw_reg src0,
952 struct brw_reg src1)
953{
954 assert(dest.type == src0.type);
955 assert(src0.type == src1.type);
956 switch (src0.type) {
957 case BRW_REGISTER_TYPE_B:
958 case BRW_REGISTER_TYPE_UB:
959 case BRW_REGISTER_TYPE_W:
960 case BRW_REGISTER_TYPE_UW:
961 case BRW_REGISTER_TYPE_D:
962 case BRW_REGISTER_TYPE_UD:
963 break;
964 default:
965 assert(!"Bad type for brw_AVG");
966 }
967
968 return brw_alu2(p, BRW_OPCODE_AVG, dest, src0, src1);
969}
970
971struct brw_instruction *brw_MUL(struct brw_compile *p,
972 struct brw_reg dest,
973 struct brw_reg src0,
974 struct brw_reg src1)
975{
976 /* 6.32.38: mul */
977 if (src0.type == BRW_REGISTER_TYPE_D ||
978 src0.type == BRW_REGISTER_TYPE_UD ||
979 src1.type == BRW_REGISTER_TYPE_D ||
980 src1.type == BRW_REGISTER_TYPE_UD) {
981 assert(dest.type != BRW_REGISTER_TYPE_F);
982 }
983
984 if (src0.type == BRW_REGISTER_TYPE_F ||
985 (src0.file == BRW_IMMEDIATE_VALUE &&
986 src0.type == BRW_REGISTER_TYPE_VF)) {
987 assert(src1.type != BRW_REGISTER_TYPE_UD);
988 assert(src1.type != BRW_REGISTER_TYPE_D);
989 }
990
991 if (src1.type == BRW_REGISTER_TYPE_F ||
992 (src1.file == BRW_IMMEDIATE_VALUE &&
993 src1.type == BRW_REGISTER_TYPE_VF)) {
994 assert(src0.type != BRW_REGISTER_TYPE_UD);
995 assert(src0.type != BRW_REGISTER_TYPE_D);
996 }
997
998 assert(src0.file != BRW_ARCHITECTURE_REGISTER_FILE ||
999 src0.nr != BRW_ARF_ACCUMULATOR);
1000 assert(src1.file != BRW_ARCHITECTURE_REGISTER_FILE ||
1001 src1.nr != BRW_ARF_ACCUMULATOR);
1002
1003 return brw_alu2(p, BRW_OPCODE_MUL, dest, src0, src1);
1004}
1005
1006
1007void brw_NOP(struct brw_compile *p)
1008{
1009 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP);
1010 brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
1011 brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
1012 brw_set_src1(p, insn, brw_imm_ud(0x0));
1013}
1014
1015
1016
1017
1018
1019/***********************************************************************
1020 * Comparisons, if/else/endif
1021 */
1022
1023struct brw_instruction *brw_JMPI(struct brw_compile *p,
1024 struct brw_reg dest,
1025 struct brw_reg src0,
1026 struct brw_reg src1)
1027{
1028 struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1);
1029
1030 insn->header.execution_size = 1;
1031 insn->header.compression_control = BRW_COMPRESSION_NONE;
1032 insn->header.mask_control = BRW_MASK_DISABLE;
1033
1034 p->current->header.predicate_control = BRW_PREDICATE_NONE;
1035
1036 return insn;
1037}
1038
1039static void
1040push_if_stack(struct brw_compile *p, struct brw_instruction *inst)
1041{
1042 p->if_stack[p->if_stack_depth] = inst - p->store;
1043
1044 p->if_stack_depth++;
1045 if (p->if_stack_array_size <= p->if_stack_depth) {
1046 p->if_stack_array_size *= 2;
1047 p->if_stack = reralloc(p->mem_ctx, p->if_stack, int,
1048 p->if_stack_array_size);
1049 }
1050}
1051
1052static struct brw_instruction *
1053pop_if_stack(struct brw_compile *p)
1054{
1055 p->if_stack_depth--;
1056 return &p->store[p->if_stack[p->if_stack_depth]];
1057}
1058
1059static void
1060push_loop_stack(struct brw_compile *p, struct brw_instruction *inst)
1061{
1062 if (p->loop_stack_array_size < p->loop_stack_depth) {
1063 p->loop_stack_array_size *= 2;
1064 p->loop_stack = reralloc(p->mem_ctx, p->loop_stack, int,
1065 p->loop_stack_array_size);
1066 p->if_depth_in_loop = reralloc(p->mem_ctx, p->if_depth_in_loop, int,
1067 p->loop_stack_array_size);
1068 }
1069
1070 p->loop_stack[p->loop_stack_depth] = inst - p->store;
1071 p->loop_stack_depth++;
1072 p->if_depth_in_loop[p->loop_stack_depth] = 0;
1073}
1074
1075static struct brw_instruction *
1076get_inner_do_insn(struct brw_compile *p)
1077{
1078 return &p->store[p->loop_stack[p->loop_stack_depth - 1]];
1079}
1080
1081/* EU takes the value from the flag register and pushes it onto some
1082 * sort of a stack (presumably merging with any flag value already on
1083 * the stack). Within an if block, the flags at the top of the stack
1084 * control execution on each channel of the unit, eg. on each of the
1085 * 16 pixel values in our wm programs.
1086 *
1087 * When the matching 'else' instruction is reached (presumably by
1088 * countdown of the instruction count patched in by our ELSE/ENDIF
1089 * functions), the relevent flags are inverted.
1090 *
1091 * When the matching 'endif' instruction is reached, the flags are
1092 * popped off. If the stack is now empty, normal execution resumes.
1093 */
1094struct brw_instruction *
1095brw_IF(struct brw_compile *p, GLuint execute_size)
1096{
1097 struct intel_context *intel = &p->brw->intel;
1098 struct brw_instruction *insn;
1099
1100 insn = next_insn(p, BRW_OPCODE_IF);
1101
1102 /* Override the defaults for this instruction:
1103 */
1104 if (intel->gen < 6) {
1105 brw_set_dest(p, insn, brw_ip_reg());
1106 brw_set_src0(p, insn, brw_ip_reg());
1107 brw_set_src1(p, insn, brw_imm_d(0x0));
1108 } else if (intel->gen == 6) {
1109 brw_set_dest(p, insn, brw_imm_w(0));
1110 insn->bits1.branch_gen6.jump_count = 0;
1111 brw_set_src0(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
1112 brw_set_src1(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
1113 } else {
1114 brw_set_dest(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
1115 brw_set_src0(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
1116 brw_set_src1(p, insn, brw_imm_ud(0));
1117 insn->bits3.break_cont.jip = 0;
1118 insn->bits3.break_cont.uip = 0;
1119 }
1120
1121 insn->header.execution_size = execute_size;
1122 insn->header.compression_control = BRW_COMPRESSION_NONE;
1123 insn->header.predicate_control = BRW_PREDICATE_NORMAL;
1124 insn->header.mask_control = BRW_MASK_ENABLE;
1125 if (!p->single_program_flow)
1126 insn->header.thread_control = BRW_THREAD_SWITCH;
1127
1128 p->current->header.predicate_control = BRW_PREDICATE_NONE;
1129
1130 push_if_stack(p, insn);
1131 p->if_depth_in_loop[p->loop_stack_depth]++;
1132 return insn;
1133}
1134
1135/* This function is only used for gen6-style IF instructions with an
1136 * embedded comparison (conditional modifier). It is not used on gen7.
1137 */
1138struct brw_instruction *
1139gen6_IF(struct brw_compile *p, uint32_t conditional,
1140 struct brw_reg src0, struct brw_reg src1)
1141{
1142 struct brw_instruction *insn;
1143
1144 insn = next_insn(p, BRW_OPCODE_IF);
1145
1146 brw_set_dest(p, insn, brw_imm_w(0));
1147 if (p->compressed) {
1148 insn->header.execution_size = BRW_EXECUTE_16;
1149 } else {
1150 insn->header.execution_size = BRW_EXECUTE_8;
1151 }
1152 insn->bits1.branch_gen6.jump_count = 0;
1153 brw_set_src0(p, insn, src0);
1154 brw_set_src1(p, insn, src1);
1155
1156 assert(insn->header.compression_control == BRW_COMPRESSION_NONE);
1157 assert(insn->header.predicate_control == BRW_PREDICATE_NONE);
1158 insn->header.destreg__conditionalmod = conditional;
1159
1160 if (!p->single_program_flow)
1161 insn->header.thread_control = BRW_THREAD_SWITCH;
1162
1163 push_if_stack(p, insn);
1164 return insn;
1165}
1166
1167/**
1168 * In single-program-flow (SPF) mode, convert IF and ELSE into ADDs.
1169 */
1170static void
1171convert_IF_ELSE_to_ADD(struct brw_compile *p,
1172 struct brw_instruction *if_inst,
1173 struct brw_instruction *else_inst)
1174{
1175 /* The next instruction (where the ENDIF would be, if it existed) */
1176 struct brw_instruction *next_inst = &p->store[p->nr_insn];
1177
1178 assert(p->single_program_flow);
1179 assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF);
1180 assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE);
1181 assert(if_inst->header.execution_size == BRW_EXECUTE_1);
1182
1183 /* Convert IF to an ADD instruction that moves the instruction pointer
1184 * to the first instruction of the ELSE block. If there is no ELSE
1185 * block, point to where ENDIF would be. Reverse the predicate.
1186 *
1187 * There's no need to execute an ENDIF since we don't need to do any
1188 * stack operations, and if we're currently executing, we just want to
1189 * continue normally.
1190 */
1191 if_inst->header.opcode = BRW_OPCODE_ADD;
1192 if_inst->header.predicate_inverse = 1;
1193
1194 if (else_inst != NULL) {
1195 /* Convert ELSE to an ADD instruction that points where the ENDIF
1196 * would be.
1197 */
1198 else_inst->header.opcode = BRW_OPCODE_ADD;
1199
1200 if_inst->bits3.ud = (else_inst - if_inst + 1) * 16;
1201 else_inst->bits3.ud = (next_inst - else_inst) * 16;
1202 } else {
1203 if_inst->bits3.ud = (next_inst - if_inst) * 16;
1204 }
1205}
1206
1207/**
1208 * Patch IF and ELSE instructions with appropriate jump targets.
1209 */
1210static void
1211patch_IF_ELSE(struct brw_compile *p,
1212 struct brw_instruction *if_inst,
1213 struct brw_instruction *else_inst,
1214 struct brw_instruction *endif_inst)
1215{
1216 struct intel_context *intel = &p->brw->intel;
1217
1218 /* We shouldn't be patching IF and ELSE instructions in single program flow
1219 * mode when gen < 6, because in single program flow mode on those
1220 * platforms, we convert flow control instructions to conditional ADDs that
1221 * operate on IP (see brw_ENDIF).
1222 *
1223 * However, on Gen6, writing to IP doesn't work in single program flow mode
1224 * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may
1225 * not be updated by non-flow control instructions."). And on later
1226 * platforms, there is no significant benefit to converting control flow
1227 * instructions to conditional ADDs. So we do patch IF and ELSE
1228 * instructions in single program flow mode on those platforms.
1229 */
1230 if (intel->gen < 6)
1231 assert(!p->single_program_flow);
1232
1233 assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF);
1234 assert(endif_inst != NULL);
1235 assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE);
1236
1237 unsigned br = 1;
1238 /* Jump count is for 64bit data chunk each, so one 128bit instruction
1239 * requires 2 chunks.
1240 */
1241 if (intel->gen >= 5)
1242 br = 2;
1243
1244 assert(endif_inst->header.opcode == BRW_OPCODE_ENDIF);
1245 endif_inst->header.execution_size = if_inst->header.execution_size;
1246
1247 if (else_inst == NULL) {
1248 /* Patch IF -> ENDIF */
1249 if (intel->gen < 6) {
1250 /* Turn it into an IFF, which means no mask stack operations for
1251 * all-false and jumping past the ENDIF.
1252 */
1253 if_inst->header.opcode = BRW_OPCODE_IFF;
1254 if_inst->bits3.if_else.jump_count = br * (endif_inst - if_inst + 1);
1255 if_inst->bits3.if_else.pop_count = 0;
1256 if_inst->bits3.if_else.pad0 = 0;
1257 } else if (intel->gen == 6) {
1258 /* As of gen6, there is no IFF and IF must point to the ENDIF. */
1259 if_inst->bits1.branch_gen6.jump_count = br * (endif_inst - if_inst);
1260 } else {
1261 if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst);
1262 if_inst->bits3.break_cont.jip = br * (endif_inst - if_inst);
1263 }
1264 } else {
1265 else_inst->header.execution_size = if_inst->header.execution_size;
1266
1267 /* Patch IF -> ELSE */
1268 if (intel->gen < 6) {
1269 if_inst->bits3.if_else.jump_count = br * (else_inst - if_inst);
1270 if_inst->bits3.if_else.pop_count = 0;
1271 if_inst->bits3.if_else.pad0 = 0;
1272 } else if (intel->gen == 6) {
1273 if_inst->bits1.branch_gen6.jump_count = br * (else_inst - if_inst + 1);
1274 }
1275
1276 /* Patch ELSE -> ENDIF */
1277 if (intel->gen < 6) {
1278 /* BRW_OPCODE_ELSE pre-gen6 should point just past the
1279 * matching ENDIF.
1280 */
1281 else_inst->bits3.if_else.jump_count = br*(endif_inst - else_inst + 1);
1282 else_inst->bits3.if_else.pop_count = 1;
1283 else_inst->bits3.if_else.pad0 = 0;
1284 } else if (intel->gen == 6) {
1285 /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */
1286 else_inst->bits1.branch_gen6.jump_count = br*(endif_inst - else_inst);
1287 } else {
1288 /* The IF instruction's JIP should point just past the ELSE */
1289 if_inst->bits3.break_cont.jip = br * (else_inst - if_inst + 1);
1290 /* The IF instruction's UIP and ELSE's JIP should point to ENDIF */
1291 if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst);
1292 else_inst->bits3.break_cont.jip = br * (endif_inst - else_inst);
1293 }
1294 }
1295}
1296
1297void
1298brw_ELSE(struct brw_compile *p)
1299{
1300 struct intel_context *intel = &p->brw->intel;
1301 struct brw_instruction *insn;
1302
1303 insn = next_insn(p, BRW_OPCODE_ELSE);
1304
1305 if (intel->gen < 6) {
1306 brw_set_dest(p, insn, brw_ip_reg());
1307 brw_set_src0(p, insn, brw_ip_reg());
1308 brw_set_src1(p, insn, brw_imm_d(0x0));
1309 } else if (intel->gen == 6) {
1310 brw_set_dest(p, insn, brw_imm_w(0));
1311 insn->bits1.branch_gen6.jump_count = 0;
1312 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1313 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1314 } else {
1315 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1316 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1317 brw_set_src1(p, insn, brw_imm_ud(0));
1318 insn->bits3.break_cont.jip = 0;
1319 insn->bits3.break_cont.uip = 0;
1320 }
1321
1322 insn->header.compression_control = BRW_COMPRESSION_NONE;
1323 insn->header.mask_control = BRW_MASK_ENABLE;
1324 if (!p->single_program_flow)
1325 insn->header.thread_control = BRW_THREAD_SWITCH;
1326
1327 push_if_stack(p, insn);
1328}
1329
1330void
1331brw_ENDIF(struct brw_compile *p)
1332{
1333 struct intel_context *intel = &p->brw->intel;
1334 struct brw_instruction *insn = NULL;
1335 struct brw_instruction *else_inst = NULL;
1336 struct brw_instruction *if_inst = NULL;
1337 struct brw_instruction *tmp;
1338 bool emit_endif = true;
1339
1340 /* In single program flow mode, we can express IF and ELSE instructions
1341 * equivalently as ADD instructions that operate on IP. On platforms prior
1342 * to Gen6, flow control instructions cause an implied thread switch, so
1343 * this is a significant savings.
1344 *
1345 * However, on Gen6, writing to IP doesn't work in single program flow mode
1346 * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may
1347 * not be updated by non-flow control instructions."). And on later
1348 * platforms, there is no significant benefit to converting control flow
1349 * instructions to conditional ADDs. So we only do this trick on Gen4 and
1350 * Gen5.
1351 */
1352 if (intel->gen < 6 && p->single_program_flow)
1353 emit_endif = false;
1354
1355 /*
1356 * A single next_insn() may change the base adress of instruction store
1357 * memory(p->store), so call it first before referencing the instruction
1358 * store pointer from an index
1359 */
1360 if (emit_endif)
1361 insn = next_insn(p, BRW_OPCODE_ENDIF);
1362
1363 /* Pop the IF and (optional) ELSE instructions from the stack */
1364 p->if_depth_in_loop[p->loop_stack_depth]--;
1365 tmp = pop_if_stack(p);
1366 if (tmp->header.opcode == BRW_OPCODE_ELSE) {
1367 else_inst = tmp;
1368 tmp = pop_if_stack(p);
1369 }
1370 if_inst = tmp;
1371
1372 if (!emit_endif) {
1373 /* ENDIF is useless; don't bother emitting it. */
1374 convert_IF_ELSE_to_ADD(p, if_inst, else_inst);
1375 return;
1376 }
1377
1378 if (intel->gen < 6) {
1379 brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
1380 brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
1381 brw_set_src1(p, insn, brw_imm_d(0x0));
1382 } else if (intel->gen == 6) {
1383 brw_set_dest(p, insn, brw_imm_w(0));
1384 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1385 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1386 } else {
1387 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1388 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1389 brw_set_src1(p, insn, brw_imm_ud(0));
1390 }
1391
1392 insn->header.compression_control = BRW_COMPRESSION_NONE;
1393 insn->header.mask_control = BRW_MASK_ENABLE;
1394 insn->header.thread_control = BRW_THREAD_SWITCH;
1395
1396 /* Also pop item off the stack in the endif instruction: */
1397 if (intel->gen < 6) {
1398 insn->bits3.if_else.jump_count = 0;
1399 insn->bits3.if_else.pop_count = 1;
1400 insn->bits3.if_else.pad0 = 0;
1401 } else if (intel->gen == 6) {
1402 insn->bits1.branch_gen6.jump_count = 2;
1403 } else {
1404 insn->bits3.break_cont.jip = 2;
1405 }
1406 patch_IF_ELSE(p, if_inst, else_inst, insn);
1407}
1408
1409struct brw_instruction *brw_BREAK(struct brw_compile *p)
1410{
1411 struct intel_context *intel = &p->brw->intel;
1412 struct brw_instruction *insn;
1413
1414 insn = next_insn(p, BRW_OPCODE_BREAK);
1415 if (intel->gen >= 6) {
1416 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1417 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1418 brw_set_src1(p, insn, brw_imm_d(0x0));
1419 } else {
1420 brw_set_dest(p, insn, brw_ip_reg());
1421 brw_set_src0(p, insn, brw_ip_reg());
1422 brw_set_src1(p, insn, brw_imm_d(0x0));
1423 insn->bits3.if_else.pad0 = 0;
1424 insn->bits3.if_else.pop_count = p->if_depth_in_loop[p->loop_stack_depth];
1425 }
1426 insn->header.compression_control = BRW_COMPRESSION_NONE;
1427 insn->header.execution_size = BRW_EXECUTE_8;
1428
1429 return insn;
1430}
1431
1432struct brw_instruction *gen6_CONT(struct brw_compile *p)
1433{
1434 struct brw_instruction *insn;
1435
1436 insn = next_insn(p, BRW_OPCODE_CONTINUE);
1437 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1438 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1439 brw_set_dest(p, insn, brw_ip_reg());
1440 brw_set_src0(p, insn, brw_ip_reg());
1441 brw_set_src1(p, insn, brw_imm_d(0x0));
1442
1443 insn->header.compression_control = BRW_COMPRESSION_NONE;
1444 insn->header.execution_size = BRW_EXECUTE_8;
1445 return insn;
1446}
1447
1448struct brw_instruction *brw_CONT(struct brw_compile *p)
1449{
1450 struct brw_instruction *insn;
1451 insn = next_insn(p, BRW_OPCODE_CONTINUE);
1452 brw_set_dest(p, insn, brw_ip_reg());
1453 brw_set_src0(p, insn, brw_ip_reg());
1454 brw_set_src1(p, insn, brw_imm_d(0x0));
1455 insn->header.compression_control = BRW_COMPRESSION_NONE;
1456 insn->header.execution_size = BRW_EXECUTE_8;
1457 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1458 insn->bits3.if_else.pad0 = 0;
1459 insn->bits3.if_else.pop_count = p->if_depth_in_loop[p->loop_stack_depth];
1460 return insn;
1461}
1462
1463struct brw_instruction *gen6_HALT(struct brw_compile *p)
1464{
1465 struct brw_instruction *insn;
1466
1467 insn = next_insn(p, BRW_OPCODE_HALT);
1468 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1469 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1470 brw_set_src1(p, insn, brw_imm_d(0x0)); /* UIP and JIP, updated later. */
1471
1472 if (p->compressed) {
1473 insn->header.execution_size = BRW_EXECUTE_16;
1474 } else {
1475 insn->header.compression_control = BRW_COMPRESSION_NONE;
1476 insn->header.execution_size = BRW_EXECUTE_8;
1477 }
1478 return insn;
1479}
1480
1481/* DO/WHILE loop:
1482 *
1483 * The DO/WHILE is just an unterminated loop -- break or continue are
1484 * used for control within the loop. We have a few ways they can be
1485 * done.
1486 *
1487 * For uniform control flow, the WHILE is just a jump, so ADD ip, ip,
1488 * jip and no DO instruction.
1489 *
1490 * For non-uniform control flow pre-gen6, there's a DO instruction to
1491 * push the mask, and a WHILE to jump back, and BREAK to get out and
1492 * pop the mask.
1493 *
1494 * For gen6, there's no more mask stack, so no need for DO. WHILE
1495 * just points back to the first instruction of the loop.
1496 */
1497struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size)
1498{
1499 struct intel_context *intel = &p->brw->intel;
1500
1501 if (intel->gen >= 6 || p->single_program_flow) {
1502 push_loop_stack(p, &p->store[p->nr_insn]);
1503 return &p->store[p->nr_insn];
1504 } else {
1505 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO);
1506
1507 push_loop_stack(p, insn);
1508
1509 /* Override the defaults for this instruction:
1510 */
1511 brw_set_dest(p, insn, brw_null_reg());
1512 brw_set_src0(p, insn, brw_null_reg());
1513 brw_set_src1(p, insn, brw_null_reg());
1514
1515 insn->header.compression_control = BRW_COMPRESSION_NONE;
1516 insn->header.execution_size = execute_size;
1517 insn->header.predicate_control = BRW_PREDICATE_NONE;
1518 /* insn->header.mask_control = BRW_MASK_ENABLE; */
1519 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1520
1521 return insn;
1522 }
1523}
1524
1525/**
1526 * For pre-gen6, we patch BREAK/CONT instructions to point at the WHILE
1527 * instruction here.
1528 *
1529 * For gen6+, see brw_set_uip_jip(), which doesn't care so much about the loop
1530 * nesting, since it can always just point to the end of the block/current loop.
1531 */
1532static void
1533brw_patch_break_cont(struct brw_compile *p, struct brw_instruction *while_inst)
1534{
1535 struct intel_context *intel = &p->brw->intel;
1536 struct brw_instruction *do_inst = get_inner_do_insn(p);
1537 struct brw_instruction *inst;
1538 int br = (intel->gen == 5) ? 2 : 1;
1539
1540 for (inst = while_inst - 1; inst != do_inst; inst--) {
1541 /* If the jump count is != 0, that means that this instruction has already
1542 * been patched because it's part of a loop inside of the one we're
1543 * patching.
1544 */
1545 if (inst->header.opcode == BRW_OPCODE_BREAK &&
1546 inst->bits3.if_else.jump_count == 0) {
1547 inst->bits3.if_else.jump_count = br * ((while_inst - inst) + 1);
1548 } else if (inst->header.opcode == BRW_OPCODE_CONTINUE &&
1549 inst->bits3.if_else.jump_count == 0) {
1550 inst->bits3.if_else.jump_count = br * (while_inst - inst);
1551 }
1552 }
1553}
1554
1555struct brw_instruction *brw_WHILE(struct brw_compile *p)
1556{
1557 struct intel_context *intel = &p->brw->intel;
1558 struct brw_instruction *insn, *do_insn;
1559 GLuint br = 1;
1560
1561 if (intel->gen >= 5)
1562 br = 2;
1563
1564 if (intel->gen >= 7) {
1565 insn = next_insn(p, BRW_OPCODE_WHILE);
1566 do_insn = get_inner_do_insn(p);
1567
1568 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1569 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1570 brw_set_src1(p, insn, brw_imm_ud(0));
1571 insn->bits3.break_cont.jip = br * (do_insn - insn);
1572
1573 insn->header.execution_size = BRW_EXECUTE_8;
1574 } else if (intel->gen == 6) {
1575 insn = next_insn(p, BRW_OPCODE_WHILE);
1576 do_insn = get_inner_do_insn(p);
1577
1578 brw_set_dest(p, insn, brw_imm_w(0));
1579 insn->bits1.branch_gen6.jump_count = br * (do_insn - insn);
1580 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1581 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1582
1583 insn->header.execution_size = BRW_EXECUTE_8;
1584 } else {
1585 if (p->single_program_flow) {
1586 insn = next_insn(p, BRW_OPCODE_ADD);
1587 do_insn = get_inner_do_insn(p);
1588
1589 brw_set_dest(p, insn, brw_ip_reg());
1590 brw_set_src0(p, insn, brw_ip_reg());
1591 brw_set_src1(p, insn, brw_imm_d((do_insn - insn) * 16));
1592 insn->header.execution_size = BRW_EXECUTE_1;
1593 } else {
1594 insn = next_insn(p, BRW_OPCODE_WHILE);
1595 do_insn = get_inner_do_insn(p);
1596
1597 assert(do_insn->header.opcode == BRW_OPCODE_DO);
1598
1599 brw_set_dest(p, insn, brw_ip_reg());
1600 brw_set_src0(p, insn, brw_ip_reg());
1601 brw_set_src1(p, insn, brw_imm_d(0));
1602
1603 insn->header.execution_size = do_insn->header.execution_size;
1604 insn->bits3.if_else.jump_count = br * (do_insn - insn + 1);
1605 insn->bits3.if_else.pop_count = 0;
1606 insn->bits3.if_else.pad0 = 0;
1607
1608 brw_patch_break_cont(p, insn);
1609 }
1610 }
1611 insn->header.compression_control = BRW_COMPRESSION_NONE;
1612 p->current->header.predicate_control = BRW_PREDICATE_NONE;
1613
1614 p->loop_stack_depth--;
1615
1616 return insn;
1617}
1618
1619
1620/* FORWARD JUMPS:
1621 */
1622void brw_land_fwd_jump(struct brw_compile *p, int jmp_insn_idx)
1623{
1624 struct intel_context *intel = &p->brw->intel;
1625 struct brw_instruction *jmp_insn = &p->store[jmp_insn_idx];
1626 GLuint jmpi = 1;
1627
1628 if (intel->gen >= 5)
1629 jmpi = 2;
1630
1631 assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
1632 assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE);
1633
1634 jmp_insn->bits3.ud = jmpi * (p->nr_insn - jmp_insn_idx - 1);
1635}
1636
1637
1638
1639/* To integrate with the above, it makes sense that the comparison
1640 * instruction should populate the flag register. It might be simpler
1641 * just to use the flag reg for most WM tasks?
1642 */
1643void brw_CMP(struct brw_compile *p,
1644 struct brw_reg dest,
1645 GLuint conditional,
1646 struct brw_reg src0,
1647 struct brw_reg src1)
1648{
1649 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP);
1650
1651 insn->header.destreg__conditionalmod = conditional;
1652 brw_set_dest(p, insn, dest);
1653 brw_set_src0(p, insn, src0);
1654 brw_set_src1(p, insn, src1);
1655
1656/* guess_execution_size(insn, src0); */
1657
1658
1659 /* Make it so that future instructions will use the computed flag
1660 * value until brw_set_predicate_control_flag_value() is called
1661 * again.
1662 */
1663 if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
1664 dest.nr == 0) {
1665 p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
1666 p->flag_value = 0xff;
1667 }
1668}
1669
1670/* Issue 'wait' instruction for n1, host could program MMIO
1671 to wake up thread. */
1672void brw_WAIT (struct brw_compile *p)
1673{
1674 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_WAIT);
1675 struct brw_reg src = brw_notification_1_reg();
1676
1677 brw_set_dest(p, insn, src);
1678 brw_set_src0(p, insn, src);
1679 brw_set_src1(p, insn, brw_null_reg());
1680 insn->header.execution_size = 0; /* must */
1681 insn->header.predicate_control = 0;
1682 insn->header.compression_control = 0;
1683}
1684
1685
1686/***********************************************************************
1687 * Helpers for the various SEND message types:
1688 */
1689
1690/** Extended math function, float[8].
1691 */
1692void brw_math( struct brw_compile *p,
1693 struct brw_reg dest,
1694 GLuint function,
1695 GLuint msg_reg_nr,
1696 struct brw_reg src,
1697 GLuint data_type,
1698 GLuint precision )
1699{
1700 struct intel_context *intel = &p->brw->intel;
1701
1702 if (intel->gen >= 6) {
1703 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
1704
1705 assert(dest.file == BRW_GENERAL_REGISTER_FILE);
1706 assert(src.file == BRW_GENERAL_REGISTER_FILE);
1707
1708 assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
1709 if (intel->gen == 6)
1710 assert(src.hstride == BRW_HORIZONTAL_STRIDE_1);
1711
1712 /* Source modifiers are ignored for extended math instructions on Gen6. */
1713 if (intel->gen == 6) {
1714 assert(!src.negate);
1715 assert(!src.abs);
1716 }
1717
1718 if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT ||
1719 function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER ||
1720 function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
1721 assert(src.type != BRW_REGISTER_TYPE_F);
1722 } else {
1723 assert(src.type == BRW_REGISTER_TYPE_F);
1724 }
1725
1726 /* Math is the same ISA format as other opcodes, except that CondModifier
1727 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1728 */
1729 insn->header.destreg__conditionalmod = function;
1730
1731 brw_set_dest(p, insn, dest);
1732 brw_set_src0(p, insn, src);
1733 brw_set_src1(p, insn, brw_null_reg());
1734 } else {
1735 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1736
1737 /* Example code doesn't set predicate_control for send
1738 * instructions.
1739 */
1740 insn->header.predicate_control = 0;
1741 insn->header.destreg__conditionalmod = msg_reg_nr;
1742
1743 brw_set_dest(p, insn, dest);
1744 brw_set_src0(p, insn, src);
1745 brw_set_math_message(p,
1746 insn,
1747 function,
1748 src.type == BRW_REGISTER_TYPE_D,
1749 precision,
1750 data_type);
1751 }
1752}
1753
1754/** Extended math function, float[8].
1755 */
1756void brw_math2(struct brw_compile *p,
1757 struct brw_reg dest,
1758 GLuint function,
1759 struct brw_reg src0,
1760 struct brw_reg src1)
1761{
1762 struct intel_context *intel = &p->brw->intel;
1763 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
1764
1765 assert(intel->gen >= 6);
1766 (void) intel;
1767
1768
1769 assert(dest.file == BRW_GENERAL_REGISTER_FILE);
1770 assert(src0.file == BRW_GENERAL_REGISTER_FILE);
1771 assert(src1.file == BRW_GENERAL_REGISTER_FILE);
1772
1773 assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
1774 if (intel->gen == 6) {
1775 assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1);
1776 assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1);
1777 }
1778
1779 if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT ||
1780 function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER ||
1781 function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
1782 assert(src0.type != BRW_REGISTER_TYPE_F);
1783 assert(src1.type != BRW_REGISTER_TYPE_F);
1784 } else {
1785 assert(src0.type == BRW_REGISTER_TYPE_F);
1786 assert(src1.type == BRW_REGISTER_TYPE_F);
1787 }
1788
1789 /* Source modifiers are ignored for extended math instructions on Gen6. */
1790 if (intel->gen == 6) {
1791 assert(!src0.negate);
1792 assert(!src0.abs);
1793 assert(!src1.negate);
1794 assert(!src1.abs);
1795 }
1796
1797 /* Math is the same ISA format as other opcodes, except that CondModifier
1798 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1799 */
1800 insn->header.destreg__conditionalmod = function;
1801
1802 brw_set_dest(p, insn, dest);
1803 brw_set_src0(p, insn, src0);
1804 brw_set_src1(p, insn, src1);
1805}
1806
1807
1808/**
1809 * Write a block of OWORDs (half a GRF each) from the scratch buffer,
1810 * using a constant offset per channel.
1811 *
1812 * The offset must be aligned to oword size (16 bytes). Used for
1813 * register spilling.
1814 */
1815void brw_oword_block_write_scratch(struct brw_compile *p,
1816 struct brw_reg mrf,
1817 int num_regs,
1818 GLuint offset)
1819{
1820 struct intel_context *intel = &p->brw->intel;
1821 uint32_t msg_control, msg_type;
1822 int mlen;
1823
1824 if (intel->gen >= 6)
1825 offset /= 16;
1826
1827 mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
1828
1829 if (num_regs == 1) {
1830 msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS;
1831 mlen = 2;
1832 } else {
1833 msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS;
1834 mlen = 3;
1835 }
1836
1837 /* Set up the message header. This is g0, with g0.2 filled with
1838 * the offset. We don't want to leave our offset around in g0 or
1839 * it'll screw up texture samples, so set it up inside the message
1840 * reg.
1841 */
1842 {
1843 brw_push_insn_state(p);
1844 brw_set_mask_control(p, BRW_MASK_DISABLE);
1845 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1846
1847 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
1848
1849 /* set message header global offset field (reg 0, element 2) */
1850 brw_MOV(p,
1851 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
1852 mrf.nr,
1853 2), BRW_REGISTER_TYPE_UD),
1854 brw_imm_ud(offset));
1855
1856 brw_pop_insn_state(p);
1857 }
1858
1859 {
1860 struct brw_reg dest;
1861 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1862 int send_commit_msg;
1863 struct brw_reg src_header = retype(brw_vec8_grf(0, 0),
1864 BRW_REGISTER_TYPE_UW);
1865
1866 if (insn->header.compression_control != BRW_COMPRESSION_NONE) {
1867 insn->header.compression_control = BRW_COMPRESSION_NONE;
1868 src_header = vec16(src_header);
1869 }
1870 assert(insn->header.predicate_control == BRW_PREDICATE_NONE);
1871 insn->header.destreg__conditionalmod = mrf.nr;
1872
1873 /* Until gen6, writes followed by reads from the same location
1874 * are not guaranteed to be ordered unless write_commit is set.
1875 * If set, then a no-op write is issued to the destination
1876 * register to set a dependency, and a read from the destination
1877 * can be used to ensure the ordering.
1878 *
1879 * For gen6, only writes between different threads need ordering
1880 * protection. Our use of DP writes is all about register
1881 * spilling within a thread.
1882 */
1883 if (intel->gen >= 6) {
1884 dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
1885 send_commit_msg = 0;
1886 } else {
1887 dest = src_header;
1888 send_commit_msg = 1;
1889 }
1890
1891 brw_set_dest(p, insn, dest);
1892 if (intel->gen >= 6) {
1893 brw_set_src0(p, insn, mrf);
1894 } else {
1895 brw_set_src0(p, insn, brw_null_reg());
1896 }
1897
1898 if (intel->gen >= 6)
1899 msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
1900 else
1901 msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
1902
1903 brw_set_dp_write_message(p,
1904 insn,
1905 255, /* binding table index (255=stateless) */
1906 msg_control,
1907 msg_type,
1908 mlen,
1909 true, /* header_present */
1910 0, /* not a render target */
1911 send_commit_msg, /* response_length */
1912 0, /* eot */
1913 send_commit_msg);
1914 }
1915}
1916
1917
1918/**
1919 * Read a block of owords (half a GRF each) from the scratch buffer
1920 * using a constant index per channel.
1921 *
1922 * Offset must be aligned to oword size (16 bytes). Used for register
1923 * spilling.
1924 */
1925void
1926brw_oword_block_read_scratch(struct brw_compile *p,
1927 struct brw_reg dest,
1928 struct brw_reg mrf,
1929 int num_regs,
1930 GLuint offset)
1931{
1932 struct intel_context *intel = &p->brw->intel;
1933 uint32_t msg_control;
1934 int rlen;
1935
1936 if (intel->gen >= 6)
1937 offset /= 16;
1938
1939 mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
1940 dest = retype(dest, BRW_REGISTER_TYPE_UW);
1941
1942 if (num_regs == 1) {
1943 msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS;
1944 rlen = 1;
1945 } else {
1946 msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS;
1947 rlen = 2;
1948 }
1949
1950 {
1951 brw_push_insn_state(p);
1952 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1953 brw_set_mask_control(p, BRW_MASK_DISABLE);
1954
1955 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
1956
1957 /* set message header global offset field (reg 0, element 2) */
1958 brw_MOV(p,
1959 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
1960 mrf.nr,
1961 2), BRW_REGISTER_TYPE_UD),
1962 brw_imm_ud(offset));
1963
1964 brw_pop_insn_state(p);
1965 }
1966
1967 {
1968 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1969
1970 assert(insn->header.predicate_control == 0);
1971 insn->header.compression_control = BRW_COMPRESSION_NONE;
1972 insn->header.destreg__conditionalmod = mrf.nr;
1973
1974 brw_set_dest(p, insn, dest); /* UW? */
1975 if (intel->gen >= 6) {
1976 brw_set_src0(p, insn, mrf);
1977 } else {
1978 brw_set_src0(p, insn, brw_null_reg());
1979 }
1980
1981 brw_set_dp_read_message(p,
1982 insn,
1983 255, /* binding table index (255=stateless) */
1984 msg_control,
1985 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1986 BRW_DATAPORT_READ_TARGET_RENDER_CACHE,
1987 1, /* msg_length */
1988 true, /* header_present */
1989 rlen);
1990 }
1991}
1992
1993/**
1994 * Read a float[4] vector from the data port Data Cache (const buffer).
1995 * Location (in buffer) should be a multiple of 16.
1996 * Used for fetching shader constants.
1997 */
1998void brw_oword_block_read(struct brw_compile *p,
1999 struct brw_reg dest,
2000 struct brw_reg mrf,
2001 uint32_t offset,
2002 uint32_t bind_table_index)
2003{
2004 struct intel_context *intel = &p->brw->intel;
2005
2006 /* On newer hardware, offset is in units of owords. */
2007 if (intel->gen >= 6)
2008 offset /= 16;
2009
2010 mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
2011
2012 brw_push_insn_state(p);
2013 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
2014 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
2015 brw_set_mask_control(p, BRW_MASK_DISABLE);
2016
2017 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
2018
2019 /* set message header global offset field (reg 0, element 2) */
2020 brw_MOV(p,
2021 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
2022 mrf.nr,
2023 2), BRW_REGISTER_TYPE_UD),
2024 brw_imm_ud(offset));
2025
2026 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
2027 insn->header.destreg__conditionalmod = mrf.nr;
2028
2029 /* cast dest to a uword[8] vector */
2030 dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
2031
2032 brw_set_dest(p, insn, dest);
2033 if (intel->gen >= 6) {
2034 brw_set_src0(p, insn, mrf);
2035 } else {
2036 brw_set_src0(p, insn, brw_null_reg());
2037 }
2038
2039 brw_set_dp_read_message(p,
2040 insn,
2041 bind_table_index,
2042 BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW,
2043 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ,
2044 BRW_DATAPORT_READ_TARGET_DATA_CACHE,
2045 1, /* msg_length */
2046 true, /* header_present */
2047 1); /* response_length (1 reg, 2 owords!) */
2048
2049 brw_pop_insn_state(p);
2050}
2051
2052
2053void brw_fb_WRITE(struct brw_compile *p,
2054 int dispatch_width,
2055 GLuint msg_reg_nr,
2056 struct brw_reg src0,
2057 GLuint msg_control,
2058 GLuint binding_table_index,
2059 GLuint msg_length,
2060 GLuint response_length,
2061 bool eot,
2062 bool header_present)
2063{
2064 struct intel_context *intel = &p->brw->intel;
2065 struct brw_instruction *insn;
2066 GLuint msg_type;
2067 struct brw_reg dest;
2068
2069 if (dispatch_width == 16)
2070 dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
2071 else
2072 dest = retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW);
2073
2074 if (intel->gen >= 6) {
2075 insn = next_insn(p, BRW_OPCODE_SENDC);
2076 } else {
2077 insn = next_insn(p, BRW_OPCODE_SEND);
2078 }
2079 /* The execution mask is ignored for render target writes. */
2080 insn->header.predicate_control = 0;
2081 insn->header.compression_control = BRW_COMPRESSION_NONE;
2082
2083 if (intel->gen >= 6) {
2084 /* headerless version, just submit color payload */
2085 src0 = brw_message_reg(msg_reg_nr);
2086
2087 msg_type = GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
2088 } else {
2089 insn->header.destreg__conditionalmod = msg_reg_nr;
2090
2091 msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
2092 }
2093
2094 brw_set_dest(p, insn, dest);
2095 brw_set_src0(p, insn, src0);
2096 brw_set_dp_write_message(p,
2097 insn,
2098 binding_table_index,
2099 msg_control,
2100 msg_type,
2101 msg_length,
2102 header_present,
2103 eot, /* last render target write */
2104 response_length,
2105 eot,
2106 0 /* send_commit_msg */);
2107}
2108
2109
2110/**
2111 * Texture sample instruction.
2112 * Note: the msg_type plus msg_length values determine exactly what kind
2113 * of sampling operation is performed. See volume 4, page 161 of docs.
2114 */
2115void brw_SAMPLE(struct brw_compile *p,
2116 struct brw_reg dest,
2117 GLuint msg_reg_nr,
2118 struct brw_reg src0,
2119 GLuint binding_table_index,
2120 GLuint sampler,
2121 GLuint writemask,
2122 GLuint msg_type,
2123 GLuint response_length,
2124 GLuint msg_length,
2125 GLuint header_present,
2126 GLuint simd_mode,
2127 GLuint return_format)
2128{
2129 struct intel_context *intel = &p->brw->intel;
2130 bool need_stall = 0;
2131
2132 if (writemask == 0) {
2133 /*printf("%s: zero writemask??\n", __FUNCTION__); */
2134 return;
2135 }
2136
2137 /* Hardware doesn't do destination dependency checking on send
2138 * instructions properly. Add a workaround which generates the
2139 * dependency by other means. In practice it seems like this bug
2140 * only crops up for texture samples, and only where registers are
2141 * written by the send and then written again later without being
2142 * read in between. Luckily for us, we already track that
2143 * information and use it to modify the writemask for the
2144 * instruction, so that is a guide for whether a workaround is
2145 * needed.
2146 */
2147 if (writemask != BRW_WRITEMASK_XYZW) {
2148 GLuint dst_offset = 0;
2149 GLuint i, newmask = 0, len = 0;
2150
2151 for (i = 0; i < 4; i++) {
2152 if (writemask & (1<<i))
2153 break;
2154 dst_offset += 2;
2155 }
2156 for (; i < 4; i++) {
2157 if (!(writemask & (1<<i)))
2158 break;
2159 newmask |= 1<<i;
2160 len++;
2161 }
2162
2163 if (newmask != writemask) {
2164 need_stall = 1;
2165 /* printf("need stall %x %x\n", newmask , writemask); */
2166 }
2167 else {
2168 bool dispatch_16 = false;
2169
2170 struct brw_reg m1 = brw_message_reg(msg_reg_nr);
2171
2172 guess_execution_size(p, p->current, dest);
2173 if (p->current->header.execution_size == BRW_EXECUTE_16)
2174 dispatch_16 = true;
2175
2176 newmask = ~newmask & BRW_WRITEMASK_XYZW;
2177
2178 brw_push_insn_state(p);
2179
2180 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
2181 brw_set_mask_control(p, BRW_MASK_DISABLE);
2182
2183 brw_MOV(p, retype(m1, BRW_REGISTER_TYPE_UD),
2184 retype(brw_vec8_grf(0,0), BRW_REGISTER_TYPE_UD));
2185 brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12));
2186
2187 brw_pop_insn_state(p);
2188
2189 src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
2190 dest = offset(dest, dst_offset);
2191
2192 /* For 16-wide dispatch, masked channels are skipped in the
2193 * response. For 8-wide, masked channels still take up slots,
2194 * and are just not written to.
2195 */
2196 if (dispatch_16)
2197 response_length = len * 2;
2198 }
2199 }
2200
2201 {
2202 struct brw_instruction *insn;
2203
2204 gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2205
2206 insn = next_insn(p, BRW_OPCODE_SEND);
2207 insn->header.predicate_control = 0; /* XXX */
2208 insn->header.compression_control = BRW_COMPRESSION_NONE;
2209 if (intel->gen < 6)
2210 insn->header.destreg__conditionalmod = msg_reg_nr;
2211
2212 brw_set_dest(p, insn, dest);
2213 brw_set_src0(p, insn, src0);
2214 brw_set_sampler_message(p, insn,
2215 binding_table_index,
2216 sampler,
2217 msg_type,
2218 response_length,
2219 msg_length,
2220 header_present,
2221 simd_mode,
2222 return_format);
2223 }
2224
2225 if (need_stall) {
2226 struct brw_reg reg = vec8(offset(dest, response_length-1));
2227
2228 /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 }
2229 */
2230 brw_push_insn_state(p);
2231 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
2232 brw_MOV(p, retype(reg, BRW_REGISTER_TYPE_UD),
2233 retype(reg, BRW_REGISTER_TYPE_UD));
2234 brw_pop_insn_state(p);
2235 }
2236
2237}
2238
2239/* All these variables are pretty confusing - we might be better off
2240 * using bitmasks and macros for this, in the old style. Or perhaps
2241 * just having the caller instantiate the fields in dword3 itself.
2242 */
2243void brw_urb_WRITE(struct brw_compile *p,
2244 struct brw_reg dest,
2245 GLuint msg_reg_nr,
2246 struct brw_reg src0,
2247 bool allocate,
2248 bool used,
2249 GLuint msg_length,
2250 GLuint response_length,
2251 bool eot,
2252 bool writes_complete,
2253 GLuint offset,
2254 GLuint swizzle)
2255{
2256 struct intel_context *intel = &p->brw->intel;
2257 struct brw_instruction *insn;
2258
2259 gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2260
2261 if (intel->gen == 7) {
2262 /* Enable Channel Masks in the URB_WRITE_HWORD message header */
2263 brw_push_insn_state(p);
2264 brw_set_access_mode(p, BRW_ALIGN_1);
2265 brw_OR(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 5),
2266 BRW_REGISTER_TYPE_UD),
2267 retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD),
2268 brw_imm_ud(0xff00));
2269 brw_pop_insn_state(p);
2270 }
2271
2272 insn = next_insn(p, BRW_OPCODE_SEND);
2273
2274 assert(msg_length < BRW_MAX_MRF);
2275
2276 brw_set_dest(p, insn, dest);
2277 brw_set_src0(p, insn, src0);
2278 brw_set_src1(p, insn, brw_imm_d(0));
2279
2280 if (intel->gen < 6)
2281 insn->header.destreg__conditionalmod = msg_reg_nr;
2282
2283 brw_set_urb_message(p,
2284 insn,
2285 allocate,
2286 used,
2287 msg_length,
2288 response_length,
2289 eot,
2290 writes_complete,
2291 offset,
2292 swizzle);
2293}
2294
2295static int
2296next_ip(struct brw_compile *p, int ip)
2297{
2298 struct brw_instruction *insn = (void *)p->store + ip;
2299
2300 if (insn->header.cmpt_control)
2301 return ip + 8;
2302 else
2303 return ip + 16;
2304}
2305
2306static int
2307brw_find_next_block_end(struct brw_compile *p, int start)
2308{
2309 int ip;
2310 void *store = p->store;
2311
2312 for (ip = next_ip(p, start); ip < p->next_insn_offset; ip = next_ip(p, ip)) {
2313 struct brw_instruction *insn = store + ip;
2314
2315 switch (insn->header.opcode) {
2316 case BRW_OPCODE_ENDIF:
2317 case BRW_OPCODE_ELSE:
2318 case BRW_OPCODE_WHILE:
2319 case BRW_OPCODE_HALT:
2320 return ip;
2321 }
2322 }
2323
2324 return 0;
2325}
2326
2327/* There is no DO instruction on gen6, so to find the end of the loop
2328 * we have to see if the loop is jumping back before our start
2329 * instruction.
2330 */
2331static int
2332brw_find_loop_end(struct brw_compile *p, int start)
2333{
2334 struct intel_context *intel = &p->brw->intel;
2335 int ip;
2336 int scale = 8;
2337 void *store = p->store;
2338
2339 /* Always start after the instruction (such as a WHILE) we're trying to fix
2340 * up.
2341 */
2342 for (ip = next_ip(p, start); ip < p->next_insn_offset; ip = next_ip(p, ip)) {
2343 struct brw_instruction *insn = store + ip;
2344
2345 if (insn->header.opcode == BRW_OPCODE_WHILE) {
2346 int jip = intel->gen == 6 ? insn->bits1.branch_gen6.jump_count
2347 : insn->bits3.break_cont.jip;
2348 if (ip + jip * scale <= start)
2349 return ip;
2350 }
2351 }
2352 assert(!"not reached");
2353 return start;
2354}
2355
2356/* After program generation, go back and update the UIP and JIP of
2357 * BREAK, CONT, and HALT instructions to their correct locations.
2358 */
2359void
2360brw_set_uip_jip(struct brw_compile *p)
2361{
2362 struct intel_context *intel = &p->brw->intel;
2363 int ip;
2364 int scale = 8;
2365 void *store = p->store;
2366
2367 if (intel->gen < 6)
2368 return;
2369
2370 for (ip = 0; ip < p->next_insn_offset; ip = next_ip(p, ip)) {
2371 struct brw_instruction *insn = store + ip;
2372
2373 if (insn->header.cmpt_control) {
2374 /* Fixups for compacted BREAK/CONTINUE not supported yet. */
2375 assert(insn->header.opcode != BRW_OPCODE_BREAK &&
2376 insn->header.opcode != BRW_OPCODE_CONTINUE &&
2377 insn->header.opcode != BRW_OPCODE_HALT);
2378 continue;
2379 }
2380
2381 int block_end_ip = brw_find_next_block_end(p, ip);
2382 switch (insn->header.opcode) {
2383 case BRW_OPCODE_BREAK:
2384 assert(block_end_ip != 0);
2385 insn->bits3.break_cont.jip = (block_end_ip - ip) / scale;
2386 /* Gen7 UIP points to WHILE; Gen6 points just after it */
2387 insn->bits3.break_cont.uip =
2388 (brw_find_loop_end(p, ip) - ip +
2389 (intel->gen == 6 ? 16 : 0)) / scale;
2390 break;
2391 case BRW_OPCODE_CONTINUE:
2392 assert(block_end_ip != 0);
2393 insn->bits3.break_cont.jip = (block_end_ip - ip) / scale;
2394 insn->bits3.break_cont.uip =
2395 (brw_find_loop_end(p, ip) - ip) / scale;
2396
2397 assert(insn->bits3.break_cont.uip != 0);
2398 assert(insn->bits3.break_cont.jip != 0);
2399 break;
2400
2401 case BRW_OPCODE_ENDIF:
2402 if (block_end_ip == 0)
2403 insn->bits3.break_cont.jip = 2;
2404 else
2405 insn->bits3.break_cont.jip = (block_end_ip - ip) / scale;
2406 break;
2407
2408 case BRW_OPCODE_HALT:
2409 /* From the Sandy Bridge PRM (volume 4, part 2, section 8.3.19):
2410 *
2411 * "In case of the halt instruction not inside any conditional
2412 * code block, the value of <JIP> and <UIP> should be the
2413 * same. In case of the halt instruction inside conditional code
2414 * block, the <UIP> should be the end of the program, and the
2415 * <JIP> should be end of the most inner conditional code block."
2416 *
2417 * The uip will have already been set by whoever set up the
2418 * instruction.
2419 */
2420 if (block_end_ip == 0) {
2421 insn->bits3.break_cont.jip = insn->bits3.break_cont.uip;
2422 } else {
2423 insn->bits3.break_cont.jip = (block_end_ip - ip) / scale;
2424 }
2425 assert(insn->bits3.break_cont.uip != 0);
2426 assert(insn->bits3.break_cont.jip != 0);
2427 break;
2428 }
2429 }
2430}
2431
2432void brw_ff_sync(struct brw_compile *p,
2433 struct brw_reg dest,
2434 GLuint msg_reg_nr,
2435 struct brw_reg src0,
2436 bool allocate,
2437 GLuint response_length,
2438 bool eot)
2439{
2440 struct intel_context *intel = &p->brw->intel;
2441 struct brw_instruction *insn;
2442
2443 gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2444
2445 insn = next_insn(p, BRW_OPCODE_SEND);
2446 brw_set_dest(p, insn, dest);
2447 brw_set_src0(p, insn, src0);
2448 brw_set_src1(p, insn, brw_imm_d(0));
2449
2450 if (intel->gen < 6)
2451 insn->header.destreg__conditionalmod = msg_reg_nr;
2452
2453 brw_set_ff_sync_message(p,
2454 insn,
2455 allocate,
2456 response_length,
2457 eot);
2458}
2459
2460/**
2461 * Emit the SEND instruction necessary to generate stream output data on Gen6
2462 * (for transform feedback).
2463 *
2464 * If send_commit_msg is true, this is the last piece of stream output data
2465 * from this thread, so send the data as a committed write. According to the
2466 * Sandy Bridge PRM (volume 2 part 1, section 4.5.1):
2467 *
2468 * "Prior to End of Thread with a URB_WRITE, the kernel must ensure all
2469 * writes are complete by sending the final write as a committed write."
2470 */
2471void
2472brw_svb_write(struct brw_compile *p,
2473 struct brw_reg dest,
2474 GLuint msg_reg_nr,
2475 struct brw_reg src0,
2476 GLuint binding_table_index,
2477 bool send_commit_msg)
2478{
2479 struct brw_instruction *insn;
2480
2481 gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2482
2483 insn = next_insn(p, BRW_OPCODE_SEND);
2484 brw_set_dest(p, insn, dest);
2485 brw_set_src0(p, insn, src0);
2486 brw_set_src1(p, insn, brw_imm_d(0));
2487 brw_set_dp_write_message(p, insn,
2488 binding_table_index,
2489 0, /* msg_control: ignored */
2490 GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE,
2491 1, /* msg_length */
2492 true, /* header_present */
2493 0, /* last_render_target: ignored */
2494 send_commit_msg, /* response_length */
2495 0, /* end_of_thread */
2496 send_commit_msg); /* send_commit_msg */
2497}
2498
2499/**
2500 * This instruction is generated as a single-channel align1 instruction by
2501 * both the VS and FS stages when using INTEL_DEBUG=shader_time.
2502 *
2503 * We can't use the typed atomic op in the FS because that has the execution
2504 * mask ANDed with the pixel mask, but we just want to write the one dword for
2505 * all the pixels.
2506 *
2507 * We don't use the SIMD4x2 atomic ops in the VS because want to just write
2508 * one u32. So we use the same untyped atomic write message as the pixel
2509 * shader.
2510 *
2511 * The untyped atomic operation requires a BUFFER surface type with RAW
2512 * format, and is only accessible through the legacy DATA_CACHE dataport
2513 * messages.
2514 */
2515void brw_shader_time_add(struct brw_compile *p,
2516 int base_mrf,
2517 uint32_t surf_index)
2518{
2519 struct intel_context *intel = &p->brw->intel;
2520 assert(intel->gen >= 7);
2521
2522 brw_push_insn_state(p);
2523 brw_set_access_mode(p, BRW_ALIGN_1);
2524 brw_set_mask_control(p, BRW_MASK_DISABLE);
2525 struct brw_instruction *send = brw_next_insn(p, BRW_OPCODE_SEND);
2526 brw_pop_insn_state(p);
2527
2528 /* We use brw_vec1_reg and unmasked because we want to increment the given
2529 * offset only once.
2530 */
2531 brw_set_dest(p, send, brw_vec1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
2532 BRW_ARF_NULL, 0));
2533 brw_set_src0(p, send, brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
2534 base_mrf, 0));
2535
2536 bool header_present = false;
2537 bool eot = false;
2538 uint32_t mlen = 2; /* offset, value */
2539 uint32_t rlen = 0;
2540 brw_set_message_descriptor(p, send,
2541 GEN7_SFID_DATAPORT_DATA_CACHE,
2542 mlen, rlen, header_present, eot);
2543
2544 send->bits3.ud |= 6 << 14; /* untyped atomic op */
2545 send->bits3.ud |= 0 << 13; /* no return data */
2546 send->bits3.ud |= 1 << 12; /* SIMD8 mode */
2547 send->bits3.ud |= BRW_AOP_ADD << 8;
2548 send->bits3.ud |= surf_index << 0;
2549}