blob: df72b98906666be19f98d433015fe6ae584c2623 [file] [log] [blame]
Eric Anholt11dd9e92011-05-24 16:34:27 -07001/*
2 * Copyright © 2010 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
Kenneth Graunkeec44d562013-09-17 23:32:10 -070024/** @file brw_fs_generator.cpp
Eric Anholt11dd9e92011-05-24 16:34:27 -070025 *
Kenneth Graunkeec44d562013-09-17 23:32:10 -070026 * This file supports generating code from the FS LIR to the actual
Eric Anholt11dd9e92011-05-24 16:34:27 -070027 * native instructions.
28 */
29
30extern "C" {
31#include "main/macros.h"
32#include "brw_context.h"
33#include "brw_eu.h"
34} /* extern "C" */
35
36#include "brw_fs.h"
Eric Anholt5ed57d92012-10-03 13:03:12 -070037#include "brw_cfg.h"
Eric Anholt11dd9e92011-05-24 16:34:27 -070038
Kenneth Graunkeea681a02012-11-09 01:05:47 -080039fs_generator::fs_generator(struct brw_context *brw,
40 struct brw_wm_compile *c,
41 struct gl_shader_program *prog,
42 struct gl_fragment_program *fp,
43 bool dual_source_output)
44
45 : brw(brw), c(c), prog(prog), fp(fp), dual_source_output(dual_source_output)
46{
Kenneth Graunke8c9a54e2013-07-06 00:46:38 -070047 ctx = &brw->ctx;
Kenneth Graunkeea681a02012-11-09 01:05:47 -080048
49 shader = prog ? prog->_LinkedShaders[MESA_SHADER_FRAGMENT] : NULL;
50
51 mem_ctx = c;
Kenneth Graunke91367232012-11-20 19:26:52 -080052
53 p = rzalloc(mem_ctx, struct brw_compile);
54 brw_init_compile(brw, p, mem_ctx);
Kenneth Graunkeea681a02012-11-09 01:05:47 -080055}
56
57fs_generator::~fs_generator()
58{
59}
60
Eric Anholt11dd9e92011-05-24 16:34:27 -070061void
Kenneth Graunke6d89bc82013-08-14 19:49:33 -070062fs_generator::mark_surface_used(unsigned surf_index)
63{
Eric Anholt4e530642013-10-02 18:53:04 -070064 assert(surf_index < BRW_MAX_SURFACES);
Kenneth Graunke6d89bc82013-08-14 19:49:33 -070065
Eric Anholt3c9dc2d2013-10-02 14:07:40 -070066 c->prog_data.base.binding_table.size_bytes =
67 MAX2(c->prog_data.base.binding_table.size_bytes, (surf_index + 1) * 4);
Kenneth Graunke6d89bc82013-08-14 19:49:33 -070068}
69
70void
Eric Anholtbeafced2012-12-06 10:15:08 -080071fs_generator::patch_discard_jumps_to_fb_writes()
72{
Kenneth Graunke53631be2013-07-06 00:36:46 -070073 if (brw->gen < 6 || this->discard_halt_patches.is_empty())
Eric Anholtbeafced2012-12-06 10:15:08 -080074 return;
75
76 /* There is a somewhat strange undocumented requirement of using
77 * HALT, according to the simulator. If some channel has HALTed to
78 * a particular UIP, then by the end of the program, every channel
79 * must have HALTed to that UIP. Furthermore, the tracking is a
80 * stack, so you can't do the final halt of a UIP after starting
81 * halting to a new UIP.
82 *
83 * Symptoms of not emitting this instruction on actual hardware
84 * included GPU hangs and sparkly rendering on the piglit discard
85 * tests.
86 */
87 struct brw_instruction *last_halt = gen6_HALT(p);
88 last_halt->bits3.break_cont.uip = 2;
89 last_halt->bits3.break_cont.jip = 2;
90
91 int ip = p->nr_insn;
92
93 foreach_list(node, &this->discard_halt_patches) {
94 ip_record *patch_ip = (ip_record *)node;
95 struct brw_instruction *patch = &p->store[patch_ip->ip];
96
97 assert(patch->header.opcode == BRW_OPCODE_HALT);
98 /* HALT takes a half-instruction distance from the pre-incremented IP. */
99 patch->bits3.break_cont.uip = (ip - patch_ip->ip) * 2;
100 }
101
102 this->discard_halt_patches.make_empty();
103}
104
105void
Kenneth Graunkeea681a02012-11-09 01:05:47 -0800106fs_generator::generate_fb_write(fs_inst *inst)
Eric Anholt11dd9e92011-05-24 16:34:27 -0700107{
Kenneth Graunke2e5a1a22011-10-07 12:26:50 -0700108 bool eot = inst->eot;
Eric Anholt11dd9e92011-05-24 16:34:27 -0700109 struct brw_reg implied_header;
Eric Anholt29362872012-04-25 13:58:07 -0700110 uint32_t msg_control;
Eric Anholt11dd9e92011-05-24 16:34:27 -0700111
112 /* Header is 2 regs, g0 and g1 are the contents. g0 will be implied
113 * move, here's g1.
114 */
115 brw_push_insn_state(p);
116 brw_set_mask_control(p, BRW_MASK_DISABLE);
117 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
118
Eric Anholtd5016492012-12-06 12:15:13 -0800119 if (fp->UsesKill) {
120 struct brw_reg pixel_mask;
121
Kenneth Graunke53631be2013-07-06 00:36:46 -0700122 if (brw->gen >= 6)
Eric Anholtd5016492012-12-06 12:15:13 -0800123 pixel_mask = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW);
124 else
125 pixel_mask = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
126
127 brw_MOV(p, pixel_mask, brw_flag_reg(0, 1));
128 }
129
Eric Anholt11dd9e92011-05-24 16:34:27 -0700130 if (inst->header_present) {
Kenneth Graunke53631be2013-07-06 00:36:46 -0700131 if (brw->gen >= 6) {
Eric Anholt11dd9e92011-05-24 16:34:27 -0700132 brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
133 brw_MOV(p,
134 retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_UD),
135 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
136 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
137
Chris Forbes1415a182013-07-01 23:30:55 +1200138 if (inst->target > 0 && c->key.replicate_alpha) {
Anuj Phogate592f7d2012-08-01 16:32:06 -0700139 /* Set "Source0 Alpha Present to RenderTarget" bit in message
140 * header.
141 */
142 brw_OR(p,
143 vec1(retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_UD)),
144 vec1(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)),
145 brw_imm_ud(0x1 << 11));
146 }
147
Eric Anholt11dd9e92011-05-24 16:34:27 -0700148 if (inst->target > 0) {
149 /* Set the render target index for choosing BLEND_STATE. */
Eric Anholt3daa2d92011-07-25 15:39:03 -0700150 brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
151 inst->base_mrf, 2),
Eric Anholt11dd9e92011-05-24 16:34:27 -0700152 BRW_REGISTER_TYPE_UD),
153 brw_imm_ud(inst->target));
154 }
155
156 implied_header = brw_null_reg();
157 } else {
158 implied_header = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW);
159
160 brw_MOV(p,
161 brw_message_reg(inst->base_mrf + 1),
162 brw_vec8_grf(1, 0));
163 }
164 } else {
165 implied_header = brw_null_reg();
166 }
167
Kenneth Graunkeea681a02012-11-09 01:05:47 -0800168 if (this->dual_source_output)
Eric Anholt29362872012-04-25 13:58:07 -0700169 msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01;
Kenneth Graunkea303df82012-11-20 13:50:52 -0800170 else if (dispatch_width == 16)
Eric Anholt29362872012-04-25 13:58:07 -0700171 msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE;
172 else
173 msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01;
174
Eric Anholt11dd9e92011-05-24 16:34:27 -0700175 brw_pop_insn_state(p);
176
Eric Anholt3c9dc2d2013-10-02 14:07:40 -0700177 uint32_t surf_index =
178 c->prog_data.binding_table.render_target_start + inst->target;
Eric Anholt11dd9e92011-05-24 16:34:27 -0700179 brw_fb_WRITE(p,
Kenneth Graunkea303df82012-11-20 13:50:52 -0800180 dispatch_width,
Eric Anholt11dd9e92011-05-24 16:34:27 -0700181 inst->base_mrf,
182 implied_header,
Eric Anholt29362872012-04-25 13:58:07 -0700183 msg_control,
Eric Anholt3c9dc2d2013-10-02 14:07:40 -0700184 surf_index,
Eric Anholt11dd9e92011-05-24 16:34:27 -0700185 inst->mlen,
186 0,
187 eot,
188 inst->header_present);
Kenneth Graunke6d89bc82013-08-14 19:49:33 -0700189
Eric Anholt3c9dc2d2013-10-02 14:07:40 -0700190 mark_surface_used(surf_index);
Eric Anholt11dd9e92011-05-24 16:34:27 -0700191}
192
193/* Computes the integer pixel x,y values from the origin.
194 *
195 * This is the basis of gl_FragCoord computation, but is also used
196 * pre-gen6 for computing the deltas from v0 for computing
197 * interpolation.
198 */
199void
Kenneth Graunkeea681a02012-11-09 01:05:47 -0800200fs_generator::generate_pixel_xy(struct brw_reg dst, bool is_x)
Eric Anholt11dd9e92011-05-24 16:34:27 -0700201{
202 struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW);
203 struct brw_reg src;
204 struct brw_reg deltas;
205
206 if (is_x) {
207 src = stride(suboffset(g1_uw, 4), 2, 4, 0);
208 deltas = brw_imm_v(0x10101010);
209 } else {
210 src = stride(suboffset(g1_uw, 5), 2, 4, 0);
211 deltas = brw_imm_v(0x11001100);
212 }
213
Kenneth Graunkea303df82012-11-20 13:50:52 -0800214 if (dispatch_width == 16) {
Eric Anholt11dd9e92011-05-24 16:34:27 -0700215 dst = vec16(dst);
216 }
217
218 /* We do this 8 or 16-wide, but since the destination is UW we
219 * don't do compression in the 16-wide case.
220 */
221 brw_push_insn_state(p);
222 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
223 brw_ADD(p, dst, src, deltas);
224 brw_pop_insn_state(p);
225}
226
227void
Kenneth Graunkeea681a02012-11-09 01:05:47 -0800228fs_generator::generate_linterp(fs_inst *inst,
Eric Anholt11dd9e92011-05-24 16:34:27 -0700229 struct brw_reg dst, struct brw_reg *src)
230{
231 struct brw_reg delta_x = src[0];
232 struct brw_reg delta_y = src[1];
233 struct brw_reg interp = src[2];
234
235 if (brw->has_pln &&
236 delta_y.nr == delta_x.nr + 1 &&
Kenneth Graunke53631be2013-07-06 00:36:46 -0700237 (brw->gen >= 6 || (delta_x.nr & 1) == 0)) {
Eric Anholt11dd9e92011-05-24 16:34:27 -0700238 brw_PLN(p, dst, interp, delta_x);
239 } else {
240 brw_LINE(p, brw_null_reg(), interp, delta_x);
241 brw_MAC(p, dst, suboffset(interp, 1), delta_y);
242 }
243}
244
245void
Kenneth Graunkeea681a02012-11-09 01:05:47 -0800246fs_generator::generate_math1_gen7(fs_inst *inst,
Kenneth Graunkea73c65c2011-10-18 12:24:47 -0700247 struct brw_reg dst,
248 struct brw_reg src0)
249{
250 assert(inst->mlen == 0);
251 brw_math(p, dst,
252 brw_math_function(inst->opcode),
Kenneth Graunkea73c65c2011-10-18 12:24:47 -0700253 0, src0,
254 BRW_MATH_DATA_VECTOR,
255 BRW_MATH_PRECISION_FULL);
256}
257
258void
Kenneth Graunkeea681a02012-11-09 01:05:47 -0800259fs_generator::generate_math2_gen7(fs_inst *inst,
Kenneth Graunkea73c65c2011-10-18 12:24:47 -0700260 struct brw_reg dst,
261 struct brw_reg src0,
262 struct brw_reg src1)
263{
264 assert(inst->mlen == 0);
265 brw_math2(p, dst, brw_math_function(inst->opcode), src0, src1);
266}
267
268void
Kenneth Graunkeea681a02012-11-09 01:05:47 -0800269fs_generator::generate_math1_gen6(fs_inst *inst,
Kenneth Graunke74e927b2011-08-18 11:55:42 -0700270 struct brw_reg dst,
271 struct brw_reg src0)
Eric Anholt11dd9e92011-05-24 16:34:27 -0700272{
Eric Anholtaf3c9802011-05-02 09:45:40 -0700273 int op = brw_math_function(inst->opcode);
Eric Anholt11dd9e92011-05-24 16:34:27 -0700274
Kenneth Graunke74e927b2011-08-18 11:55:42 -0700275 assert(inst->mlen == 0);
Eric Anholt11dd9e92011-05-24 16:34:27 -0700276
Kenneth Graunke74e927b2011-08-18 11:55:42 -0700277 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
278 brw_math(p, dst,
279 op,
Kenneth Graunke74e927b2011-08-18 11:55:42 -0700280 0, src0,
281 BRW_MATH_DATA_VECTOR,
282 BRW_MATH_PRECISION_FULL);
Eric Anholt11dd9e92011-05-24 16:34:27 -0700283
Kenneth Graunkea303df82012-11-20 13:50:52 -0800284 if (dispatch_width == 16) {
Kenneth Graunke74e927b2011-08-18 11:55:42 -0700285 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
286 brw_math(p, sechalf(dst),
Eric Anholt11dd9e92011-05-24 16:34:27 -0700287 op,
Kenneth Graunke74e927b2011-08-18 11:55:42 -0700288 0, sechalf(src0),
289 BRW_MATH_DATA_VECTOR,
290 BRW_MATH_PRECISION_FULL);
291 brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
292 }
293}
294
295void
Kenneth Graunkeea681a02012-11-09 01:05:47 -0800296fs_generator::generate_math2_gen6(fs_inst *inst,
Kenneth Graunke74e927b2011-08-18 11:55:42 -0700297 struct brw_reg dst,
298 struct brw_reg src0,
299 struct brw_reg src1)
300{
301 int op = brw_math_function(inst->opcode);
302
303 assert(inst->mlen == 0);
304
305 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
306 brw_math2(p, dst, op, src0, src1);
307
Kenneth Graunkea303df82012-11-20 13:50:52 -0800308 if (dispatch_width == 16) {
Kenneth Graunke74e927b2011-08-18 11:55:42 -0700309 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
310 brw_math2(p, sechalf(dst), op, sechalf(src0), sechalf(src1));
311 brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
312 }
313}
314
315void
Kenneth Graunkeea681a02012-11-09 01:05:47 -0800316fs_generator::generate_math_gen4(fs_inst *inst,
Kenneth Graunke74e927b2011-08-18 11:55:42 -0700317 struct brw_reg dst,
318 struct brw_reg src)
319{
320 int op = brw_math_function(inst->opcode);
321
322 assert(inst->mlen >= 1);
323
324 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
325 brw_math(p, dst,
326 op,
Kenneth Graunke74e927b2011-08-18 11:55:42 -0700327 inst->base_mrf, src,
328 BRW_MATH_DATA_VECTOR,
329 BRW_MATH_PRECISION_FULL);
330
Kenneth Graunkea303df82012-11-20 13:50:52 -0800331 if (dispatch_width == 16) {
Kenneth Graunke74e927b2011-08-18 11:55:42 -0700332 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
333 brw_math(p, sechalf(dst),
334 op,
Kenneth Graunke74e927b2011-08-18 11:55:42 -0700335 inst->base_mrf + 1, sechalf(src),
Eric Anholt11dd9e92011-05-24 16:34:27 -0700336 BRW_MATH_DATA_VECTOR,
337 BRW_MATH_PRECISION_FULL);
338
Kenneth Graunke74e927b2011-08-18 11:55:42 -0700339 brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
Eric Anholt11dd9e92011-05-24 16:34:27 -0700340 }
341}
342
343void
Kenneth Graunke1b77d212013-03-30 00:15:54 -0700344fs_generator::generate_math_g45(fs_inst *inst,
345 struct brw_reg dst,
346 struct brw_reg src)
347{
348 if (inst->opcode == SHADER_OPCODE_POW ||
349 inst->opcode == SHADER_OPCODE_INT_QUOTIENT ||
350 inst->opcode == SHADER_OPCODE_INT_REMAINDER) {
351 generate_math_gen4(inst, dst, src);
352 return;
353 }
354
355 int op = brw_math_function(inst->opcode);
356
357 assert(inst->mlen >= 1);
358
359 brw_math(p, dst,
360 op,
361 inst->base_mrf, src,
362 BRW_MATH_DATA_VECTOR,
363 BRW_MATH_PRECISION_FULL);
364}
365
366void
Kenneth Graunkeea681a02012-11-09 01:05:47 -0800367fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
Eric Anholt11dd9e92011-05-24 16:34:27 -0700368{
369 int msg_type = -1;
370 int rlen = 4;
371 uint32_t simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8;
Eric Anholt7e84a642011-11-09 16:07:57 -0800372 uint32_t return_format;
373
374 switch (dst.type) {
375 case BRW_REGISTER_TYPE_D:
376 return_format = BRW_SAMPLER_RETURN_FORMAT_SINT32;
377 break;
378 case BRW_REGISTER_TYPE_UD:
379 return_format = BRW_SAMPLER_RETURN_FORMAT_UINT32;
380 break;
381 default:
382 return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
383 break;
384 }
Eric Anholt11dd9e92011-05-24 16:34:27 -0700385
Chia-I Wu3db52b62013-09-30 14:12:19 +0800386 if (dispatch_width == 16 &&
387 !inst->force_uncompressed && !inst->force_sechalf)
Eric Anholt11dd9e92011-05-24 16:34:27 -0700388 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
389
Kenneth Graunke53631be2013-07-06 00:36:46 -0700390 if (brw->gen >= 5) {
Eric Anholt11dd9e92011-05-24 16:34:27 -0700391 switch (inst->opcode) {
Kenneth Graunkefebad172011-10-26 12:58:37 -0700392 case SHADER_OPCODE_TEX:
Eric Anholt11dd9e92011-05-24 16:34:27 -0700393 if (inst->shadow_compare) {
394 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE;
395 } else {
396 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE;
397 }
398 break;
399 case FS_OPCODE_TXB:
400 if (inst->shadow_compare) {
401 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE;
402 } else {
403 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS;
404 }
405 break;
Kenneth Graunkefebad172011-10-26 12:58:37 -0700406 case SHADER_OPCODE_TXL:
Eric Anholt11dd9e92011-05-24 16:34:27 -0700407 if (inst->shadow_compare) {
408 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE;
409 } else {
410 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD;
411 }
412 break;
Kenneth Graunkefebad172011-10-26 12:58:37 -0700413 case SHADER_OPCODE_TXS:
Kenneth Graunkeecf89632011-06-19 01:47:50 -0700414 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO;
415 break;
Kenneth Graunkefebad172011-10-26 12:58:37 -0700416 case SHADER_OPCODE_TXD:
Kenneth Graunke899017f2013-01-04 07:53:09 -0800417 if (inst->shadow_compare) {
418 /* Gen7.5+. Otherwise, lowered by brw_lower_texture_gradients(). */
Kenneth Graunke794de2f2013-07-06 00:15:44 -0700419 assert(brw->is_haswell);
Kenneth Graunke899017f2013-01-04 07:53:09 -0800420 msg_type = HSW_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE;
421 } else {
422 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS;
423 }
Eric Anholt11dd9e92011-05-24 16:34:27 -0700424 break;
Kenneth Graunkefebad172011-10-26 12:58:37 -0700425 case SHADER_OPCODE_TXF:
Kenneth Graunke30be2cc2011-08-25 17:13:37 -0700426 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
427 break;
Chris Forbesf52ce6a2013-01-24 21:35:15 +1300428 case SHADER_OPCODE_TXF_MS:
Kenneth Graunke53631be2013-07-06 00:36:46 -0700429 if (brw->gen >= 7)
Chris Forbesf52ce6a2013-01-24 21:35:15 +1300430 msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DMS;
431 else
432 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
433 break;
Matt Turnerb8aa9f72013-03-06 14:47:01 -0800434 case SHADER_OPCODE_LOD:
435 msg_type = GEN5_SAMPLER_MESSAGE_LOD;
436 break;
Chris Forbesfb455502013-03-31 21:31:12 +1300437 case SHADER_OPCODE_TG4:
438 assert(brw->gen >= 6);
439 msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4;
440 break;
Eric Anholt6034b9a2011-05-03 10:55:50 -0700441 default:
442 assert(!"not reached");
443 break;
Eric Anholt11dd9e92011-05-24 16:34:27 -0700444 }
445 } else {
446 switch (inst->opcode) {
Kenneth Graunkefebad172011-10-26 12:58:37 -0700447 case SHADER_OPCODE_TEX:
Eric Anholt11dd9e92011-05-24 16:34:27 -0700448 /* Note that G45 and older determines shadow compare and dispatch width
449 * from message length for most messages.
450 */
Kenneth Graunkea303df82012-11-20 13:50:52 -0800451 assert(dispatch_width == 8);
Eric Anholt11dd9e92011-05-24 16:34:27 -0700452 msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE;
453 if (inst->shadow_compare) {
454 assert(inst->mlen == 6);
455 } else {
456 assert(inst->mlen <= 4);
457 }
458 break;
459 case FS_OPCODE_TXB:
460 if (inst->shadow_compare) {
461 assert(inst->mlen == 6);
462 msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_COMPARE;
463 } else {
464 assert(inst->mlen == 9);
465 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
466 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
467 }
468 break;
Kenneth Graunkefebad172011-10-26 12:58:37 -0700469 case SHADER_OPCODE_TXL:
Eric Anholt11dd9e92011-05-24 16:34:27 -0700470 if (inst->shadow_compare) {
471 assert(inst->mlen == 6);
472 msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_COMPARE;
473 } else {
474 assert(inst->mlen == 9);
475 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD;
476 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
477 }
478 break;
Kenneth Graunkefebad172011-10-26 12:58:37 -0700479 case SHADER_OPCODE_TXD:
Kenneth Graunke6430df32011-06-10 14:48:46 -0700480 /* There is no sample_d_c message; comparisons are done manually */
Kenneth Graunke6c947cf2011-06-08 16:05:34 -0700481 assert(inst->mlen == 7 || inst->mlen == 10);
482 msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS;
Eric Anholt11dd9e92011-05-24 16:34:27 -0700483 break;
Kenneth Graunkefebad172011-10-26 12:58:37 -0700484 case SHADER_OPCODE_TXF:
Kenneth Graunke47b556f2011-09-06 16:39:01 -0700485 assert(inst->mlen == 9);
486 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_LD;
487 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
488 break;
Kenneth Graunkefebad172011-10-26 12:58:37 -0700489 case SHADER_OPCODE_TXS:
Kenneth Graunke4eeb4c12011-08-17 10:45:47 -0700490 assert(inst->mlen == 3);
491 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_RESINFO;
492 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
493 break;
Eric Anholt6034b9a2011-05-03 10:55:50 -0700494 default:
495 assert(!"not reached");
496 break;
Eric Anholt11dd9e92011-05-24 16:34:27 -0700497 }
498 }
499 assert(msg_type != -1);
500
501 if (simd_mode == BRW_SAMPLER_SIMD_MODE_SIMD16) {
502 rlen = 8;
503 dst = vec16(dst);
504 }
505
Eric Anholt36fbe662013-10-09 17:17:59 -0700506 if (brw->gen >= 7 && inst->header_present && dispatch_width == 16) {
507 /* The send-from-GRF for 16-wide texturing with a header has an extra
508 * hardware register allocated to it, which we need to skip over (since
509 * our coordinates in the payload are in the even-numbered registers,
510 * and the header comes right before the first one).
511 */
512 assert(src.file == BRW_GENERAL_REGISTER_FILE);
513 src.nr++;
514 }
515
Kenneth Graunke82bfb4b2012-08-04 20:33:13 -0700516 /* Load the message header if present. If there's a texture offset,
517 * we need to set it up explicitly and load the offset bitfield.
518 * Otherwise, we can use an implied move from g0 to the first message reg.
519 */
520 if (inst->texture_offset) {
Eric Anholt36fbe662013-10-09 17:17:59 -0700521 struct brw_reg header_reg;
522
523 if (brw->gen >= 7) {
524 header_reg = src;
525 } else {
526 assert(inst->base_mrf != -1);
Chris Forbesc4de86f2013-10-13 12:39:47 +1300527 header_reg = brw_message_reg(inst->base_mrf);
Eric Anholt36fbe662013-10-09 17:17:59 -0700528 }
Kenneth Graunke82bfb4b2012-08-04 20:33:13 -0700529 brw_push_insn_state(p);
Eric Anholt86536a32012-08-30 11:07:52 -0700530 brw_set_mask_control(p, BRW_MASK_DISABLE);
Kenneth Graunke82bfb4b2012-08-04 20:33:13 -0700531 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
532 /* Explicitly set up the message header by copying g0 to the MRF. */
Chris Forbesc4de86f2013-10-13 12:39:47 +1300533 brw_MOV(p, header_reg, brw_vec8_grf(0, 0));
Kenneth Graunke82bfb4b2012-08-04 20:33:13 -0700534
535 /* Then set the offset bits in DWord 2. */
Eric Anholt36fbe662013-10-09 17:17:59 -0700536 brw_MOV(p, retype(brw_vec1_reg(header_reg.file,
537 header_reg.nr, 2), BRW_REGISTER_TYPE_UD),
Kenneth Graunke82bfb4b2012-08-04 20:33:13 -0700538 brw_imm_ud(inst->texture_offset));
539 brw_pop_insn_state(p);
540 } else if (inst->header_present) {
Chris Forbesb38af012013-10-13 12:20:03 +1300541 if (brw->gen >= 7) {
542 /* Explicitly set up the message header by copying g0 to the MRF. */
543 brw_push_insn_state(p);
544 brw_set_mask_control(p, BRW_MASK_DISABLE);
545 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
546 brw_MOV(p, src, brw_vec8_grf(0, 0));
547 brw_pop_insn_state(p);
548 } else {
549 /* Set up an implied move from g0 to the MRF. */
550 src = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW);
551 }
Kenneth Graunke82bfb4b2012-08-04 20:33:13 -0700552 }
553
Eric Anholt3c9dc2d2013-10-02 14:07:40 -0700554 uint32_t surface_index = (inst->opcode == SHADER_OPCODE_TG4
555 ? c->prog_data.base.binding_table.gather_texture_start
556 : c->prog_data.base.binding_table.texture_start) + inst->sampler;
Chris Forbesdd4c2a52013-09-15 18:23:14 +1200557
Eric Anholt11dd9e92011-05-24 16:34:27 -0700558 brw_SAMPLE(p,
559 retype(dst, BRW_REGISTER_TYPE_UW),
560 inst->base_mrf,
561 src,
Chris Forbesdd4c2a52013-09-15 18:23:14 +1200562 surface_index,
Eric Anholt11dd9e92011-05-24 16:34:27 -0700563 inst->sampler,
Eric Anholt11dd9e92011-05-24 16:34:27 -0700564 msg_type,
565 rlen,
566 inst->mlen,
Eric Anholt11dd9e92011-05-24 16:34:27 -0700567 inst->header_present,
Eric Anholt7e84a642011-11-09 16:07:57 -0800568 simd_mode,
569 return_format);
Kenneth Graunke6d89bc82013-08-14 19:49:33 -0700570
Chris Forbesdd4c2a52013-09-15 18:23:14 +1200571 mark_surface_used(surface_index);
Eric Anholt11dd9e92011-05-24 16:34:27 -0700572}
573
574
575/* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input
576 * looking like:
577 *
578 * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br
579 *
Chia-I Wu848c0e72013-09-12 13:00:52 +0800580 * Ideally, we want to produce:
Eric Anholt11dd9e92011-05-24 16:34:27 -0700581 *
582 * DDX DDY
583 * dst: (ss0.tr - ss0.tl) (ss0.tl - ss0.bl)
584 * (ss0.tr - ss0.tl) (ss0.tr - ss0.br)
585 * (ss0.br - ss0.bl) (ss0.tl - ss0.bl)
586 * (ss0.br - ss0.bl) (ss0.tr - ss0.br)
587 * (ss1.tr - ss1.tl) (ss1.tl - ss1.bl)
588 * (ss1.tr - ss1.tl) (ss1.tr - ss1.br)
589 * (ss1.br - ss1.bl) (ss1.tl - ss1.bl)
590 * (ss1.br - ss1.bl) (ss1.tr - ss1.br)
591 *
592 * and add another set of two more subspans if in 16-pixel dispatch mode.
593 *
594 * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result
595 * for each pair, and vertstride = 2 jumps us 2 elements after processing a
Chia-I Wu848c0e72013-09-12 13:00:52 +0800596 * pair. But the ideal approximation may impose a huge performance cost on
597 * sample_d. On at least Haswell, sample_d instruction does some
598 * optimizations if the same LOD is used for all pixels in the subspan.
599 *
Paul Berry800610f2013-09-20 09:04:31 -0700600 * For DDY, we need to use ALIGN16 mode since it's capable of doing the
601 * appropriate swizzling.
Eric Anholt11dd9e92011-05-24 16:34:27 -0700602 */
603void
Kenneth Graunkeea681a02012-11-09 01:05:47 -0800604fs_generator::generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
Eric Anholt11dd9e92011-05-24 16:34:27 -0700605{
Chia-I Wu848c0e72013-09-12 13:00:52 +0800606 unsigned vstride, width;
607
608 if (c->key.high_quality_derivatives) {
609 /* produce accurate derivatives */
610 vstride = BRW_VERTICAL_STRIDE_2;
611 width = BRW_WIDTH_2;
612 }
613 else {
614 /* replicate the derivative at the top-left pixel to other pixels */
615 vstride = BRW_VERTICAL_STRIDE_4;
616 width = BRW_WIDTH_4;
617 }
618
Eric Anholt11dd9e92011-05-24 16:34:27 -0700619 struct brw_reg src0 = brw_reg(src.file, src.nr, 1,
620 BRW_REGISTER_TYPE_F,
Chia-I Wu848c0e72013-09-12 13:00:52 +0800621 vstride,
622 width,
Eric Anholt11dd9e92011-05-24 16:34:27 -0700623 BRW_HORIZONTAL_STRIDE_0,
624 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
625 struct brw_reg src1 = brw_reg(src.file, src.nr, 0,
626 BRW_REGISTER_TYPE_F,
Chia-I Wu848c0e72013-09-12 13:00:52 +0800627 vstride,
628 width,
Eric Anholt11dd9e92011-05-24 16:34:27 -0700629 BRW_HORIZONTAL_STRIDE_0,
630 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
631 brw_ADD(p, dst, src0, negate(src1));
632}
633
Paul Berry82d25962012-06-20 13:40:45 -0700634/* The negate_value boolean is used to negate the derivative computation for
635 * FBOs, since they place the origin at the upper left instead of the lower
636 * left.
637 */
Eric Anholt11dd9e92011-05-24 16:34:27 -0700638void
Kenneth Graunkeea681a02012-11-09 01:05:47 -0800639fs_generator::generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src,
Paul Berry82d25962012-06-20 13:40:45 -0700640 bool negate_value)
Eric Anholt11dd9e92011-05-24 16:34:27 -0700641{
Paul Berry800610f2013-09-20 09:04:31 -0700642 if (c->key.high_quality_derivatives) {
Paul Berry4df56172013-10-22 05:56:37 -0700643 /* From the Ivy Bridge PRM, volume 4 part 3, section 3.3.9 (Register
644 * Region Restrictions):
645 *
646 * In Align16 access mode, SIMD16 is not allowed for DW operations
647 * and SIMD8 is not allowed for DF operations.
648 *
649 * In this context, "DW operations" means "operations acting on 32-bit
650 * values", so it includes operations on floats.
651 *
652 * Gen4 has a similar restriction. From the i965 PRM, section 11.5.3
653 * (Instruction Compression -> Rules and Restrictions):
654 *
655 * A compressed instruction must be in Align1 access mode. Align16
656 * mode instructions cannot be compressed.
657 *
658 * Similar text exists in the g45 PRM.
659 *
660 * On these platforms, if we're building a SIMD16 shader, we need to
661 * manually unroll to a pair of SIMD8 instructions.
662 */
663 bool unroll_to_simd8 =
664 (dispatch_width == 16 &&
665 (brw->gen == 4 || (brw->gen == 7 && !brw->is_haswell)));
666
Paul Berry800610f2013-09-20 09:04:31 -0700667 /* produce accurate derivatives */
668 struct brw_reg src0 = brw_reg(src.file, src.nr, 0,
669 BRW_REGISTER_TYPE_F,
670 BRW_VERTICAL_STRIDE_4,
671 BRW_WIDTH_4,
672 BRW_HORIZONTAL_STRIDE_1,
673 BRW_SWIZZLE_XYXY, WRITEMASK_XYZW);
674 struct brw_reg src1 = brw_reg(src.file, src.nr, 0,
675 BRW_REGISTER_TYPE_F,
676 BRW_VERTICAL_STRIDE_4,
677 BRW_WIDTH_4,
678 BRW_HORIZONTAL_STRIDE_1,
679 BRW_SWIZZLE_ZWZW, WRITEMASK_XYZW);
680 brw_push_insn_state(p);
681 brw_set_access_mode(p, BRW_ALIGN_16);
Paul Berry4df56172013-10-22 05:56:37 -0700682 if (unroll_to_simd8)
683 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
Paul Berry800610f2013-09-20 09:04:31 -0700684 if (negate_value)
685 brw_ADD(p, dst, src1, negate(src0));
686 else
687 brw_ADD(p, dst, src0, negate(src1));
Paul Berry4df56172013-10-22 05:56:37 -0700688 if (unroll_to_simd8) {
Paul Berry800610f2013-09-20 09:04:31 -0700689 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
690 src0 = sechalf(src0);
691 src1 = sechalf(src1);
692 dst = sechalf(dst);
693 if (negate_value)
694 brw_ADD(p, dst, src1, negate(src0));
695 else
696 brw_ADD(p, dst, src0, negate(src1));
697 }
698 brw_pop_insn_state(p);
699 } else {
700 /* replicate the derivative at the top-left pixel to other pixels */
701 struct brw_reg src0 = brw_reg(src.file, src.nr, 0,
702 BRW_REGISTER_TYPE_F,
703 BRW_VERTICAL_STRIDE_4,
704 BRW_WIDTH_4,
705 BRW_HORIZONTAL_STRIDE_0,
706 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
707 struct brw_reg src1 = brw_reg(src.file, src.nr, 2,
708 BRW_REGISTER_TYPE_F,
709 BRW_VERTICAL_STRIDE_4,
710 BRW_WIDTH_4,
711 BRW_HORIZONTAL_STRIDE_0,
712 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
713 if (negate_value)
714 brw_ADD(p, dst, src1, negate(src0));
715 else
716 brw_ADD(p, dst, src0, negate(src1));
717 }
Eric Anholt11dd9e92011-05-24 16:34:27 -0700718}
719
720void
Eric Anholtbeafced2012-12-06 10:15:08 -0800721fs_generator::generate_discard_jump(fs_inst *inst)
722{
Kenneth Graunke53631be2013-07-06 00:36:46 -0700723 assert(brw->gen >= 6);
Eric Anholtbeafced2012-12-06 10:15:08 -0800724
725 /* This HALT will be patched up at FB write time to point UIP at the end of
726 * the program, and at brw_uip_jip() JIP will be set to the end of the
727 * current block (or the program).
728 */
729 this->discard_halt_patches.push_tail(new(mem_ctx) ip_record(p->nr_insn));
730
731 brw_push_insn_state(p);
732 brw_set_mask_control(p, BRW_MASK_DISABLE);
733 gen6_HALT(p);
734 brw_pop_insn_state(p);
735}
736
737void
Kenneth Graunkeea681a02012-11-09 01:05:47 -0800738fs_generator::generate_spill(fs_inst *inst, struct brw_reg src)
Eric Anholt11dd9e92011-05-24 16:34:27 -0700739{
740 assert(inst->mlen != 0);
741
742 brw_MOV(p,
743 retype(brw_message_reg(inst->base_mrf + 1), BRW_REGISTER_TYPE_UD),
744 retype(src, BRW_REGISTER_TYPE_UD));
745 brw_oword_block_write_scratch(p, brw_message_reg(inst->base_mrf), 1,
746 inst->offset);
747}
748
749void
Kenneth Graunkeea681a02012-11-09 01:05:47 -0800750fs_generator::generate_unspill(fs_inst *inst, struct brw_reg dst)
Eric Anholt11dd9e92011-05-24 16:34:27 -0700751{
752 assert(inst->mlen != 0);
753
Eric Anholt11dd9e92011-05-24 16:34:27 -0700754 brw_oword_block_read_scratch(p, dst, brw_message_reg(inst->base_mrf), 1,
755 inst->offset);
Eric Anholt11dd9e92011-05-24 16:34:27 -0700756}
757
758void
Eric Anholt29340d02012-11-07 10:42:34 -0800759fs_generator::generate_uniform_pull_constant_load(fs_inst *inst,
760 struct brw_reg dst,
761 struct brw_reg index,
762 struct brw_reg offset)
Eric Anholt11dd9e92011-05-24 16:34:27 -0700763{
764 assert(inst->mlen != 0);
765
Eric Anholt454dc832012-06-20 15:41:14 -0700766 assert(index.file == BRW_IMMEDIATE_VALUE &&
767 index.type == BRW_REGISTER_TYPE_UD);
768 uint32_t surf_index = index.dw1.ud;
769
770 assert(offset.file == BRW_IMMEDIATE_VALUE &&
771 offset.type == BRW_REGISTER_TYPE_UD);
772 uint32_t read_offset = offset.dw1.ud;
773
Eric Anholt11dd9e92011-05-24 16:34:27 -0700774 brw_oword_block_read(p, dst, brw_message_reg(inst->base_mrf),
Eric Anholt454dc832012-06-20 15:41:14 -0700775 read_offset, surf_index);
Kenneth Graunke6d89bc82013-08-14 19:49:33 -0700776
777 mark_surface_used(surf_index);
Eric Anholt11dd9e92011-05-24 16:34:27 -0700778}
779
Eric Anholtd8214e42012-11-07 11:18:34 -0800780void
Eric Anholt461a2972012-12-05 00:06:30 -0800781fs_generator::generate_uniform_pull_constant_load_gen7(fs_inst *inst,
782 struct brw_reg dst,
783 struct brw_reg index,
784 struct brw_reg offset)
785{
786 assert(inst->mlen == 0);
787
788 assert(index.file == BRW_IMMEDIATE_VALUE &&
789 index.type == BRW_REGISTER_TYPE_UD);
790 uint32_t surf_index = index.dw1.ud;
791
792 assert(offset.file == BRW_GENERAL_REGISTER_FILE);
Eric Anholt4c1fdae2013-03-06 14:47:22 -0800793 /* Reference just the dword we need, to avoid angering validate_reg(). */
794 offset = brw_vec1_grf(offset.nr, 0);
Eric Anholt461a2972012-12-05 00:06:30 -0800795
796 brw_push_insn_state(p);
797 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
798 brw_set_mask_control(p, BRW_MASK_DISABLE);
799 struct brw_instruction *send = brw_next_insn(p, BRW_OPCODE_SEND);
800 brw_pop_insn_state(p);
801
Eric Anholt4c1fdae2013-03-06 14:47:22 -0800802 /* We use the SIMD4x2 mode because we want to end up with 4 components in
803 * the destination loaded consecutively from the same offset (which appears
804 * in the first component, and the rest are ignored).
805 */
806 dst.width = BRW_WIDTH_4;
Eric Anholt461a2972012-12-05 00:06:30 -0800807 brw_set_dest(p, send, dst);
808 brw_set_src0(p, send, offset);
Eric Anholt4c1fdae2013-03-06 14:47:22 -0800809 brw_set_sampler_message(p, send,
Eric Anholt461a2972012-12-05 00:06:30 -0800810 surf_index,
Eric Anholt4c1fdae2013-03-06 14:47:22 -0800811 0, /* LD message ignores sampler unit */
812 GEN5_SAMPLER_MESSAGE_SAMPLE_LD,
813 1, /* rlen */
814 1, /* mlen */
815 false, /* no header */
816 BRW_SAMPLER_SIMD_MODE_SIMD4X2,
817 0);
Kenneth Graunke6d89bc82013-08-14 19:49:33 -0700818
819 mark_surface_used(surf_index);
Eric Anholt461a2972012-12-05 00:06:30 -0800820}
821
822void
Eric Anholtd8214e42012-11-07 11:18:34 -0800823fs_generator::generate_varying_pull_constant_load(fs_inst *inst,
824 struct brw_reg dst,
Eric Anholt70b27e02013-03-18 10:16:42 -0700825 struct brw_reg index,
826 struct brw_reg offset)
Eric Anholtd8214e42012-11-07 11:18:34 -0800827{
Kenneth Graunke53631be2013-07-06 00:36:46 -0700828 assert(brw->gen < 7); /* Should use the gen7 variant. */
Eric Anholtd8214e42012-11-07 11:18:34 -0800829 assert(inst->header_present);
Eric Anholt70b27e02013-03-18 10:16:42 -0700830 assert(inst->mlen);
Eric Anholtd8214e42012-11-07 11:18:34 -0800831
832 assert(index.file == BRW_IMMEDIATE_VALUE &&
833 index.type == BRW_REGISTER_TYPE_UD);
834 uint32_t surf_index = index.dw1.ud;
835
Eric Anholt70b27e02013-03-18 10:16:42 -0700836 uint32_t simd_mode, rlen, msg_type;
Eric Anholtd8214e42012-11-07 11:18:34 -0800837 if (dispatch_width == 16) {
Eric Anholt70b27e02013-03-18 10:16:42 -0700838 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
839 rlen = 8;
Eric Anholtd8214e42012-11-07 11:18:34 -0800840 } else {
Eric Anholt70b27e02013-03-18 10:16:42 -0700841 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8;
842 rlen = 4;
Eric Anholtd8214e42012-11-07 11:18:34 -0800843 }
844
Kenneth Graunke53631be2013-07-06 00:36:46 -0700845 if (brw->gen >= 5)
Eric Anholt70b27e02013-03-18 10:16:42 -0700846 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
847 else {
848 /* We always use the SIMD16 message so that we only have to load U, and
849 * not V or R.
850 */
851 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_LD;
852 assert(inst->mlen == 3);
853 assert(inst->regs_written == 8);
854 rlen = 8;
855 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
856 }
857
858 struct brw_reg offset_mrf = retype(brw_message_reg(inst->base_mrf + 1),
859 BRW_REGISTER_TYPE_D);
860 brw_MOV(p, offset_mrf, offset);
861
Eric Anholtd8214e42012-11-07 11:18:34 -0800862 struct brw_reg header = brw_vec8_grf(0, 0);
863 gen6_resolve_implied_move(p, &header, inst->base_mrf);
864
865 struct brw_instruction *send = brw_next_insn(p, BRW_OPCODE_SEND);
Eric Anholt70b27e02013-03-18 10:16:42 -0700866 send->header.compression_control = BRW_COMPRESSION_NONE;
Eric Anholtd8214e42012-11-07 11:18:34 -0800867 brw_set_dest(p, send, dst);
868 brw_set_src0(p, send, header);
Kenneth Graunke53631be2013-07-06 00:36:46 -0700869 if (brw->gen < 6)
Eric Anholtd8214e42012-11-07 11:18:34 -0800870 send->header.destreg__conditionalmod = inst->base_mrf;
Eric Anholt70b27e02013-03-18 10:16:42 -0700871
872 /* Our surface is set up as floats, regardless of what actual data is
873 * stored in it.
874 */
875 uint32_t return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
876 brw_set_sampler_message(p, send,
Eric Anholtd8214e42012-11-07 11:18:34 -0800877 surf_index,
Eric Anholt70b27e02013-03-18 10:16:42 -0700878 0, /* sampler (unused) */
Eric Anholtd8214e42012-11-07 11:18:34 -0800879 msg_type,
Eric Anholt70b27e02013-03-18 10:16:42 -0700880 rlen,
Eric Anholtd8214e42012-11-07 11:18:34 -0800881 inst->mlen,
882 inst->header_present,
Eric Anholt70b27e02013-03-18 10:16:42 -0700883 simd_mode,
884 return_format);
Kenneth Graunke6d89bc82013-08-14 19:49:33 -0700885
886 mark_surface_used(surf_index);
Eric Anholtd8214e42012-11-07 11:18:34 -0800887}
888
889void
890fs_generator::generate_varying_pull_constant_load_gen7(fs_inst *inst,
891 struct brw_reg dst,
892 struct brw_reg index,
893 struct brw_reg offset)
894{
Kenneth Graunke53631be2013-07-06 00:36:46 -0700895 assert(brw->gen >= 7);
Eric Anholtd8214e42012-11-07 11:18:34 -0800896 /* Varying-offset pull constant loads are treated as a normal expression on
897 * gen7, so the fact that it's a send message is hidden at the IR level.
898 */
899 assert(!inst->header_present);
900 assert(!inst->mlen);
901
902 assert(index.file == BRW_IMMEDIATE_VALUE &&
903 index.type == BRW_REGISTER_TYPE_UD);
904 uint32_t surf_index = index.dw1.ud;
905
Eric Anholtdca5fc12013-03-13 14:48:55 -0700906 uint32_t simd_mode, rlen, mlen;
Eric Anholtd8214e42012-11-07 11:18:34 -0800907 if (dispatch_width == 16) {
Eric Anholtdca5fc12013-03-13 14:48:55 -0700908 mlen = 2;
909 rlen = 8;
910 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
Eric Anholtd8214e42012-11-07 11:18:34 -0800911 } else {
Eric Anholtdca5fc12013-03-13 14:48:55 -0700912 mlen = 1;
913 rlen = 4;
914 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8;
Eric Anholtd8214e42012-11-07 11:18:34 -0800915 }
916
917 struct brw_instruction *send = brw_next_insn(p, BRW_OPCODE_SEND);
918 brw_set_dest(p, send, dst);
919 brw_set_src0(p, send, offset);
Eric Anholtdca5fc12013-03-13 14:48:55 -0700920 brw_set_sampler_message(p, send,
Eric Anholtd8214e42012-11-07 11:18:34 -0800921 surf_index,
Eric Anholtdca5fc12013-03-13 14:48:55 -0700922 0, /* LD message ignores sampler unit */
923 GEN5_SAMPLER_MESSAGE_SAMPLE_LD,
924 rlen,
Eric Anholtd8214e42012-11-07 11:18:34 -0800925 mlen,
Eric Anholtdca5fc12013-03-13 14:48:55 -0700926 false, /* no header */
927 simd_mode,
928 0);
Kenneth Graunke6d89bc82013-08-14 19:49:33 -0700929
930 mark_surface_used(surf_index);
Eric Anholtd8214e42012-11-07 11:18:34 -0800931}
Paul Berry3f929ef2012-06-18 14:50:04 -0700932
933/**
934 * Cause the current pixel/sample mask (from R1.7 bits 15:0) to be transferred
935 * into the flags register (f0.0).
936 *
937 * Used only on Gen6 and above.
938 */
939void
Eric Anholtb278f652012-12-06 10:36:11 -0800940fs_generator::generate_mov_dispatch_to_flags(fs_inst *inst)
Paul Berry3f929ef2012-06-18 14:50:04 -0700941{
Eric Anholtb278f652012-12-06 10:36:11 -0800942 struct brw_reg flags = brw_flag_reg(0, inst->flag_subreg);
Eric Anholtd5016492012-12-06 12:15:13 -0800943 struct brw_reg dispatch_mask;
Paul Berry3f929ef2012-06-18 14:50:04 -0700944
Kenneth Graunke53631be2013-07-06 00:36:46 -0700945 if (brw->gen >= 6)
Eric Anholtd5016492012-12-06 12:15:13 -0800946 dispatch_mask = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW);
947 else
948 dispatch_mask = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
949
Paul Berry3f929ef2012-06-18 14:50:04 -0700950 brw_push_insn_state(p);
951 brw_set_mask_control(p, BRW_MASK_DISABLE);
Eric Anholtd5016492012-12-06 12:15:13 -0800952 brw_MOV(p, flags, dispatch_mask);
Paul Berry3f929ef2012-06-18 14:50:04 -0700953 brw_pop_insn_state(p);
954}
955
956
Eric Anholta3b8c5e2011-11-23 10:13:39 -0800957static uint32_t brw_file_from_reg(fs_reg *reg)
958{
959 switch (reg->file) {
Eric Anholta3b8c5e2011-11-23 10:13:39 -0800960 case GRF:
961 return BRW_GENERAL_REGISTER_FILE;
962 case MRF:
963 return BRW_MESSAGE_REGISTER_FILE;
964 case IMM:
965 return BRW_IMMEDIATE_VALUE;
966 default:
967 assert(!"not reached");
968 return BRW_GENERAL_REGISTER_FILE;
969 }
970}
971
Eric Anholt11dd9e92011-05-24 16:34:27 -0700972static struct brw_reg
973brw_reg_from_fs_reg(fs_reg *reg)
974{
975 struct brw_reg brw_reg;
976
977 switch (reg->file) {
978 case GRF:
Eric Anholt11dd9e92011-05-24 16:34:27 -0700979 case MRF:
980 if (reg->smear == -1) {
Eric Anholta3b8c5e2011-11-23 10:13:39 -0800981 brw_reg = brw_vec8_reg(brw_file_from_reg(reg), reg->reg, 0);
Eric Anholt11dd9e92011-05-24 16:34:27 -0700982 } else {
Eric Anholta3b8c5e2011-11-23 10:13:39 -0800983 brw_reg = brw_vec1_reg(brw_file_from_reg(reg), reg->reg, reg->smear);
Eric Anholt11dd9e92011-05-24 16:34:27 -0700984 }
985 brw_reg = retype(brw_reg, reg->type);
986 if (reg->sechalf)
987 brw_reg = sechalf(brw_reg);
988 break;
989 case IMM:
990 switch (reg->type) {
991 case BRW_REGISTER_TYPE_F:
992 brw_reg = brw_imm_f(reg->imm.f);
993 break;
994 case BRW_REGISTER_TYPE_D:
995 brw_reg = brw_imm_d(reg->imm.i);
996 break;
997 case BRW_REGISTER_TYPE_UD:
998 brw_reg = brw_imm_ud(reg->imm.u);
999 break;
1000 default:
1001 assert(!"not reached");
1002 brw_reg = brw_null_reg();
1003 break;
1004 }
1005 break;
Eric Anholtab04f3b2013-04-29 16:05:05 -07001006 case HW_REG:
Eric Anholt11dd9e92011-05-24 16:34:27 -07001007 brw_reg = reg->fixed_hw_reg;
1008 break;
1009 case BAD_FILE:
1010 /* Probably unused. */
1011 brw_reg = brw_null_reg();
1012 break;
1013 case UNIFORM:
1014 assert(!"not reached");
1015 brw_reg = brw_null_reg();
1016 break;
1017 default:
1018 assert(!"not reached");
1019 brw_reg = brw_null_reg();
1020 break;
1021 }
1022 if (reg->abs)
1023 brw_reg = brw_abs(brw_reg);
1024 if (reg->negate)
1025 brw_reg = negate(brw_reg);
1026
1027 return brw_reg;
1028}
1029
Eric Anholt461a2972012-12-05 00:06:30 -08001030/**
Eric Anholt4c1fdae2013-03-06 14:47:22 -08001031 * Sets the first word of a vgrf for gen7+ simd4x2 uniform pull constant
1032 * sampler LD messages.
Eric Anholt461a2972012-12-05 00:06:30 -08001033 *
Eric Anholt4c1fdae2013-03-06 14:47:22 -08001034 * We don't want to bake it into the send message's code generation because
1035 * that means we don't get a chance to schedule the instructions.
Eric Anholt461a2972012-12-05 00:06:30 -08001036 */
1037void
Eric Anholt4c1fdae2013-03-06 14:47:22 -08001038fs_generator::generate_set_simd4x2_offset(fs_inst *inst,
1039 struct brw_reg dst,
1040 struct brw_reg value)
Eric Anholt461a2972012-12-05 00:06:30 -08001041{
Eric Anholt461a2972012-12-05 00:06:30 -08001042 assert(value.file == BRW_IMMEDIATE_VALUE);
1043
1044 brw_push_insn_state(p);
1045 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1046 brw_set_mask_control(p, BRW_MASK_DISABLE);
Eric Anholt4c1fdae2013-03-06 14:47:22 -08001047 brw_MOV(p, retype(brw_vec1_reg(dst.file, dst.nr, 0), value.type), value);
Eric Anholt461a2972012-12-05 00:06:30 -08001048 brw_pop_insn_state(p);
1049}
1050
Chad Versace20dfa502013-01-09 11:46:42 -08001051/**
1052 * Change the register's data type from UD to W, doubling the strides in order
1053 * to compensate for halving the data type width.
1054 */
1055static struct brw_reg
1056ud_reg_to_w(struct brw_reg r)
1057{
1058 assert(r.type == BRW_REGISTER_TYPE_UD);
1059 r.type = BRW_REGISTER_TYPE_W;
1060
1061 /* The BRW_*_STRIDE enums are defined so that incrementing the field
1062 * doubles the real stride.
1063 */
1064 if (r.hstride != 0)
1065 ++r.hstride;
1066 if (r.vstride != 0)
1067 ++r.vstride;
1068
1069 return r;
1070}
1071
1072void
1073fs_generator::generate_pack_half_2x16_split(fs_inst *inst,
1074 struct brw_reg dst,
1075 struct brw_reg x,
1076 struct brw_reg y)
1077{
Kenneth Graunke53631be2013-07-06 00:36:46 -07001078 assert(brw->gen >= 7);
Chad Versace20dfa502013-01-09 11:46:42 -08001079 assert(dst.type == BRW_REGISTER_TYPE_UD);
Vinson Lee15599942013-01-26 08:27:50 +01001080 assert(x.type == BRW_REGISTER_TYPE_F);
1081 assert(y.type == BRW_REGISTER_TYPE_F);
Chad Versace20dfa502013-01-09 11:46:42 -08001082
1083 /* From the Ivybridge PRM, Vol4, Part3, Section 6.27 f32to16:
1084 *
1085 * Because this instruction does not have a 16-bit floating-point type,
1086 * the destination data type must be Word (W).
1087 *
1088 * The destination must be DWord-aligned and specify a horizontal stride
1089 * (HorzStride) of 2. The 16-bit result is stored in the lower word of
1090 * each destination channel and the upper word is not modified.
1091 */
1092 struct brw_reg dst_w = ud_reg_to_w(dst);
1093
1094 /* Give each 32-bit channel of dst the form below , where "." means
1095 * unchanged.
1096 * 0x....hhhh
1097 */
1098 brw_F32TO16(p, dst_w, y);
1099
1100 /* Now the form:
1101 * 0xhhhh0000
1102 */
1103 brw_SHL(p, dst, dst, brw_imm_ud(16u));
1104
1105 /* And, finally the form of packHalf2x16's output:
1106 * 0xhhhhllll
1107 */
1108 brw_F32TO16(p, dst_w, x);
1109}
1110
1111void
1112fs_generator::generate_unpack_half_2x16_split(fs_inst *inst,
1113 struct brw_reg dst,
1114 struct brw_reg src)
1115{
Kenneth Graunke53631be2013-07-06 00:36:46 -07001116 assert(brw->gen >= 7);
Chad Versace20dfa502013-01-09 11:46:42 -08001117 assert(dst.type == BRW_REGISTER_TYPE_F);
1118 assert(src.type == BRW_REGISTER_TYPE_UD);
1119
1120 /* From the Ivybridge PRM, Vol4, Part3, Section 6.26 f16to32:
1121 *
1122 * Because this instruction does not have a 16-bit floating-point type,
1123 * the source data type must be Word (W). The destination type must be
1124 * F (Float).
1125 */
1126 struct brw_reg src_w = ud_reg_to_w(src);
1127
1128 /* Each channel of src has the form of unpackHalf2x16's input: 0xhhhhllll.
1129 * For the Y case, we wish to access only the upper word; therefore
1130 * a 16-bit subregister offset is needed.
1131 */
1132 assert(inst->opcode == FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X ||
1133 inst->opcode == FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y);
1134 if (inst->opcode == FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y)
Chad Versace09740312013-01-24 21:48:40 -08001135 src_w.subnr += 2;
Chad Versace20dfa502013-01-09 11:46:42 -08001136
1137 brw_F16TO32(p, dst, src_w);
1138}
1139
Eric Anholt11dd9e92011-05-24 16:34:27 -07001140void
Eric Anholt5c5218e2013-03-19 15:28:11 -07001141fs_generator::generate_shader_time_add(fs_inst *inst,
1142 struct brw_reg payload,
1143 struct brw_reg offset,
1144 struct brw_reg value)
1145{
Kenneth Graunke53631be2013-07-06 00:36:46 -07001146 assert(brw->gen >= 7);
Eric Anholt5c5218e2013-03-19 15:28:11 -07001147 brw_push_insn_state(p);
1148 brw_set_mask_control(p, true);
1149
1150 assert(payload.file == BRW_GENERAL_REGISTER_FILE);
1151 struct brw_reg payload_offset = retype(brw_vec1_grf(payload.nr, 0),
1152 offset.type);
1153 struct brw_reg payload_value = retype(brw_vec1_grf(payload.nr + 1, 0),
1154 value.type);
1155
1156 assert(offset.file == BRW_IMMEDIATE_VALUE);
1157 if (value.file == BRW_GENERAL_REGISTER_FILE) {
1158 value.width = BRW_WIDTH_1;
1159 value.hstride = BRW_HORIZONTAL_STRIDE_0;
1160 value.vstride = BRW_VERTICAL_STRIDE_0;
1161 } else {
1162 assert(value.file == BRW_IMMEDIATE_VALUE);
1163 }
1164
1165 /* Trying to deal with setup of the params from the IR is crazy in the FS8
1166 * case, and we don't really care about squeezing every bit of performance
1167 * out of this path, so we just emit the MOVs from here.
1168 */
1169 brw_MOV(p, payload_offset, offset);
1170 brw_MOV(p, payload_value, value);
Eric Anholt3c9dc2d2013-10-02 14:07:40 -07001171 brw_shader_time_add(p, payload,
1172 c->prog_data.base.binding_table.shader_time_start);
Eric Anholt5c5218e2013-03-19 15:28:11 -07001173 brw_pop_insn_state(p);
Kenneth Graunke6d89bc82013-08-14 19:49:33 -07001174
Eric Anholt3c9dc2d2013-10-02 14:07:40 -07001175 mark_surface_used(c->prog_data.base.binding_table.shader_time_start);
Eric Anholt5c5218e2013-03-19 15:28:11 -07001176}
1177
1178void
Kenneth Graunkeea681a02012-11-09 01:05:47 -08001179fs_generator::generate_code(exec_list *instructions)
Eric Anholt11dd9e92011-05-24 16:34:27 -07001180{
Eric Anholtf2bd3e72012-02-03 11:50:42 +01001181 int last_native_insn_offset = p->next_insn_offset;
Eric Anholt11dd9e92011-05-24 16:34:27 -07001182 const char *last_annotation_string = NULL;
Eric Anholt97615b22012-08-27 14:35:01 -07001183 const void *last_annotation_ir = NULL;
Eric Anholt11dd9e92011-05-24 16:34:27 -07001184
Eric Anholt11dd9e92011-05-24 16:34:27 -07001185 if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
Eric Anholt97615b22012-08-27 14:35:01 -07001186 if (shader) {
1187 printf("Native code for fragment shader %d (%d-wide dispatch):\n",
Kenneth Graunkea303df82012-11-20 13:50:52 -08001188 prog->Name, dispatch_width);
Eric Anholt97615b22012-08-27 14:35:01 -07001189 } else {
1190 printf("Native code for fragment program %d (%d-wide dispatch):\n",
Kenneth Graunke1f740022012-11-20 14:41:21 -08001191 fp->Base.Id, dispatch_width);
Eric Anholt97615b22012-08-27 14:35:01 -07001192 }
Eric Anholt11dd9e92011-05-24 16:34:27 -07001193 }
1194
Eric Anholt7abfb672012-10-03 13:16:09 -07001195 cfg_t *cfg = NULL;
Eric Anholt080b1252012-04-10 12:01:50 -07001196 if (unlikely(INTEL_DEBUG & DEBUG_WM))
Kenneth Graunkeea681a02012-11-09 01:05:47 -08001197 cfg = new(mem_ctx) cfg_t(mem_ctx, instructions);
Eric Anholt080b1252012-04-10 12:01:50 -07001198
Kenneth Graunkeea681a02012-11-09 01:05:47 -08001199 foreach_list(node, instructions) {
Eric Anholt44ffb4a2011-07-29 11:52:39 -07001200 fs_inst *inst = (fs_inst *)node;
Eric Anholt11dd9e92011-05-24 16:34:27 -07001201 struct brw_reg src[3], dst;
1202
1203 if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
Eric Anholt080b1252012-04-10 12:01:50 -07001204 foreach_list(node, &cfg->block_list) {
Eric Anholt7abfb672012-10-03 13:16:09 -07001205 bblock_link *link = (bblock_link *)node;
1206 bblock_t *block = link->block;
Eric Anholt080b1252012-04-10 12:01:50 -07001207
1208 if (block->start == inst) {
1209 printf(" START B%d", block->block_num);
1210 foreach_list(predecessor_node, &block->parents) {
Eric Anholt7abfb672012-10-03 13:16:09 -07001211 bblock_link *predecessor_link =
1212 (bblock_link *)predecessor_node;
1213 bblock_t *predecessor_block = predecessor_link->block;
Eric Anholt080b1252012-04-10 12:01:50 -07001214 printf(" <-B%d", predecessor_block->block_num);
1215 }
1216 printf("\n");
1217 }
1218 }
1219
Eric Anholt11dd9e92011-05-24 16:34:27 -07001220 if (last_annotation_ir != inst->ir) {
1221 last_annotation_ir = inst->ir;
1222 if (last_annotation_ir) {
1223 printf(" ");
Eric Anholt97615b22012-08-27 14:35:01 -07001224 if (shader)
1225 ((ir_instruction *)inst->ir)->print();
1226 else {
1227 const prog_instruction *fpi;
1228 fpi = (const prog_instruction *)inst->ir;
1229 printf("%d: ", (int)(fpi - fp->Base.Instructions));
1230 _mesa_fprint_instruction_opt(stdout,
1231 fpi,
1232 0, PROG_PRINT_DEBUG, NULL);
1233 }
Eric Anholt11dd9e92011-05-24 16:34:27 -07001234 printf("\n");
1235 }
1236 }
1237 if (last_annotation_string != inst->annotation) {
1238 last_annotation_string = inst->annotation;
1239 if (last_annotation_string)
1240 printf(" %s\n", last_annotation_string);
1241 }
1242 }
1243
1244 for (unsigned int i = 0; i < 3; i++) {
1245 src[i] = brw_reg_from_fs_reg(&inst->src[i]);
Eric Anholt73b0a282011-10-03 15:12:10 -07001246
1247 /* The accumulator result appears to get used for the
1248 * conditional modifier generation. When negating a UD
1249 * value, there is a 33rd bit generated for the sign in the
1250 * accumulator value, so now you can't check, for example,
1251 * equality with a 32-bit value. See piglit fs-op-neg-uvec4.
1252 */
1253 assert(!inst->conditional_mod ||
1254 inst->src[i].type != BRW_REGISTER_TYPE_UD ||
1255 !inst->src[i].negate);
Eric Anholt11dd9e92011-05-24 16:34:27 -07001256 }
1257 dst = brw_reg_from_fs_reg(&inst->dst);
1258
1259 brw_set_conditionalmod(p, inst->conditional_mod);
Eric Anholt54679fc2012-10-03 13:23:05 -07001260 brw_set_predicate_control(p, inst->predicate);
Eric Anholt11dd9e92011-05-24 16:34:27 -07001261 brw_set_predicate_inverse(p, inst->predicate_inverse);
Eric Anholtb278f652012-12-06 10:36:11 -08001262 brw_set_flag_reg(p, 0, inst->flag_subreg);
Eric Anholt11dd9e92011-05-24 16:34:27 -07001263 brw_set_saturate(p, inst->saturate);
Eric Anholtef2fbf62012-11-28 14:16:03 -08001264 brw_set_mask_control(p, inst->force_writemask_all);
Eric Anholt11dd9e92011-05-24 16:34:27 -07001265
Kenneth Graunkea303df82012-11-20 13:50:52 -08001266 if (inst->force_uncompressed || dispatch_width == 8) {
Eric Anholt11dd9e92011-05-24 16:34:27 -07001267 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1268 } else if (inst->force_sechalf) {
1269 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
1270 } else {
1271 brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
1272 }
1273
1274 switch (inst->opcode) {
1275 case BRW_OPCODE_MOV:
1276 brw_MOV(p, dst, src[0]);
1277 break;
1278 case BRW_OPCODE_ADD:
1279 brw_ADD(p, dst, src[0], src[1]);
1280 break;
1281 case BRW_OPCODE_MUL:
1282 brw_MUL(p, dst, src[0], src[1]);
1283 break;
Eric Anholt3f78f712011-08-15 22:36:18 -07001284 case BRW_OPCODE_MACH:
1285 brw_set_acc_write_control(p, 1);
1286 brw_MACH(p, dst, src[0], src[1]);
1287 brw_set_acc_write_control(p, 0);
1288 break;
Eric Anholt11dd9e92011-05-24 16:34:27 -07001289
Eric Anholt7d55f372012-02-07 00:59:11 +01001290 case BRW_OPCODE_MAD:
Matt Turner69909c82013-09-19 22:55:24 -07001291 assert(brw->gen >= 6);
Eric Anholt7d55f372012-02-07 00:59:11 +01001292 brw_set_access_mode(p, BRW_ALIGN_16);
Kenneth Graunkea303df82012-11-20 13:50:52 -08001293 if (dispatch_width == 16) {
Eric Anholt7d55f372012-02-07 00:59:11 +01001294 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1295 brw_MAD(p, dst, src[0], src[1], src[2]);
1296 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
1297 brw_MAD(p, sechalf(dst), sechalf(src[0]), sechalf(src[1]), sechalf(src[2]));
1298 brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
1299 } else {
1300 brw_MAD(p, dst, src[0], src[1], src[2]);
1301 }
1302 brw_set_access_mode(p, BRW_ALIGN_1);
1303 break;
1304
Kenneth Graunke0a1d1452012-12-02 00:08:15 -08001305 case BRW_OPCODE_LRP:
Matt Turner69909c82013-09-19 22:55:24 -07001306 assert(brw->gen >= 6);
Kenneth Graunke0a1d1452012-12-02 00:08:15 -08001307 brw_set_access_mode(p, BRW_ALIGN_16);
1308 if (dispatch_width == 16) {
1309 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1310 brw_LRP(p, dst, src[0], src[1], src[2]);
1311 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
1312 brw_LRP(p, sechalf(dst), sechalf(src[0]), sechalf(src[1]), sechalf(src[2]));
1313 brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
1314 } else {
1315 brw_LRP(p, dst, src[0], src[1], src[2]);
1316 }
1317 brw_set_access_mode(p, BRW_ALIGN_1);
1318 break;
1319
Eric Anholt11dd9e92011-05-24 16:34:27 -07001320 case BRW_OPCODE_FRC:
1321 brw_FRC(p, dst, src[0]);
1322 break;
1323 case BRW_OPCODE_RNDD:
1324 brw_RNDD(p, dst, src[0]);
1325 break;
1326 case BRW_OPCODE_RNDE:
1327 brw_RNDE(p, dst, src[0]);
1328 break;
1329 case BRW_OPCODE_RNDZ:
1330 brw_RNDZ(p, dst, src[0]);
1331 break;
1332
1333 case BRW_OPCODE_AND:
1334 brw_AND(p, dst, src[0], src[1]);
1335 break;
1336 case BRW_OPCODE_OR:
1337 brw_OR(p, dst, src[0], src[1]);
1338 break;
1339 case BRW_OPCODE_XOR:
1340 brw_XOR(p, dst, src[0], src[1]);
1341 break;
1342 case BRW_OPCODE_NOT:
1343 brw_NOT(p, dst, src[0]);
1344 break;
1345 case BRW_OPCODE_ASR:
1346 brw_ASR(p, dst, src[0], src[1]);
1347 break;
1348 case BRW_OPCODE_SHR:
1349 brw_SHR(p, dst, src[0], src[1]);
1350 break;
1351 case BRW_OPCODE_SHL:
1352 brw_SHL(p, dst, src[0], src[1]);
1353 break;
Chad Versace20dfa502013-01-09 11:46:42 -08001354 case BRW_OPCODE_F32TO16:
Matt Turner69909c82013-09-19 22:55:24 -07001355 assert(brw->gen >= 7);
Chad Versace20dfa502013-01-09 11:46:42 -08001356 brw_F32TO16(p, dst, src[0]);
1357 break;
1358 case BRW_OPCODE_F16TO32:
Matt Turner69909c82013-09-19 22:55:24 -07001359 assert(brw->gen >= 7);
Chad Versace20dfa502013-01-09 11:46:42 -08001360 brw_F16TO32(p, dst, src[0]);
1361 break;
Eric Anholt11dd9e92011-05-24 16:34:27 -07001362 case BRW_OPCODE_CMP:
1363 brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]);
1364 break;
1365 case BRW_OPCODE_SEL:
1366 brw_SEL(p, dst, src[0], src[1]);
1367 break;
Matt Turner1f0f26d2013-04-09 19:22:34 -07001368 case BRW_OPCODE_BFREV:
Matt Turner69909c82013-09-19 22:55:24 -07001369 assert(brw->gen >= 7);
Matt Turner1f0f26d2013-04-09 19:22:34 -07001370 /* BFREV only supports UD type for src and dst. */
1371 brw_BFREV(p, retype(dst, BRW_REGISTER_TYPE_UD),
1372 retype(src[0], BRW_REGISTER_TYPE_UD));
1373 break;
1374 case BRW_OPCODE_FBH:
Matt Turner69909c82013-09-19 22:55:24 -07001375 assert(brw->gen >= 7);
Matt Turner1f0f26d2013-04-09 19:22:34 -07001376 /* FBH only supports UD type for dst. */
1377 brw_FBH(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]);
1378 break;
1379 case BRW_OPCODE_FBL:
Matt Turner69909c82013-09-19 22:55:24 -07001380 assert(brw->gen >= 7);
Matt Turner1f0f26d2013-04-09 19:22:34 -07001381 /* FBL only supports UD type for dst. */
1382 brw_FBL(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]);
1383 break;
1384 case BRW_OPCODE_CBIT:
Matt Turner69909c82013-09-19 22:55:24 -07001385 assert(brw->gen >= 7);
Matt Turner1f0f26d2013-04-09 19:22:34 -07001386 /* CBIT only supports UD type for dst. */
1387 brw_CBIT(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]);
1388 break;
Matt Turner014cce32013-09-19 13:01:08 -07001389 case BRW_OPCODE_ADDC:
1390 assert(brw->gen >= 7);
1391 brw_set_acc_write_control(p, 1);
1392 brw_ADDC(p, dst, src[0], src[1]);
1393 brw_set_acc_write_control(p, 0);
1394 break;
1395 case BRW_OPCODE_SUBB:
1396 assert(brw->gen >= 7);
1397 brw_set_acc_write_control(p, 1);
1398 brw_SUBB(p, dst, src[0], src[1]);
1399 brw_set_acc_write_control(p, 0);
1400 break;
Matt Turner1f0f26d2013-04-09 19:22:34 -07001401
1402 case BRW_OPCODE_BFE:
Matt Turner69909c82013-09-19 22:55:24 -07001403 assert(brw->gen >= 7);
Matt Turner1f0f26d2013-04-09 19:22:34 -07001404 brw_set_access_mode(p, BRW_ALIGN_16);
1405 if (dispatch_width == 16) {
1406 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1407 brw_BFE(p, dst, src[0], src[1], src[2]);
1408 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
1409 brw_BFE(p, sechalf(dst), sechalf(src[0]), sechalf(src[1]), sechalf(src[2]));
1410 brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
1411 } else {
1412 brw_BFE(p, dst, src[0], src[1], src[2]);
1413 }
1414 brw_set_access_mode(p, BRW_ALIGN_1);
1415 break;
1416
1417 case BRW_OPCODE_BFI1:
Matt Turner69909c82013-09-19 22:55:24 -07001418 assert(brw->gen >= 7);
Matt Turner1f0f26d2013-04-09 19:22:34 -07001419 brw_BFI1(p, dst, src[0], src[1]);
1420 break;
1421 case BRW_OPCODE_BFI2:
Matt Turner69909c82013-09-19 22:55:24 -07001422 assert(brw->gen >= 7);
Matt Turner1f0f26d2013-04-09 19:22:34 -07001423 brw_set_access_mode(p, BRW_ALIGN_16);
1424 if (dispatch_width == 16) {
1425 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1426 brw_BFI2(p, dst, src[0], src[1], src[2]);
1427 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
1428 brw_BFI2(p, sechalf(dst), sechalf(src[0]), sechalf(src[1]), sechalf(src[2]));
1429 brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
1430 } else {
1431 brw_BFI2(p, dst, src[0], src[1], src[2]);
1432 }
1433 brw_set_access_mode(p, BRW_ALIGN_1);
1434 break;
Eric Anholt11dd9e92011-05-24 16:34:27 -07001435
1436 case BRW_OPCODE_IF:
1437 if (inst->src[0].file != BAD_FILE) {
1438 /* The instruction has an embedded compare (only allowed on gen6) */
Kenneth Graunke53631be2013-07-06 00:36:46 -07001439 assert(brw->gen == 6);
Eric Anholt11dd9e92011-05-24 16:34:27 -07001440 gen6_IF(p, inst->conditional_mod, src[0], src[1]);
1441 } else {
Kenneth Graunkea303df82012-11-20 13:50:52 -08001442 brw_IF(p, dispatch_width == 16 ? BRW_EXECUTE_16 : BRW_EXECUTE_8);
Eric Anholt11dd9e92011-05-24 16:34:27 -07001443 }
Eric Anholt11dd9e92011-05-24 16:34:27 -07001444 break;
1445
1446 case BRW_OPCODE_ELSE:
1447 brw_ELSE(p);
1448 break;
1449 case BRW_OPCODE_ENDIF:
1450 brw_ENDIF(p);
Eric Anholt11dd9e92011-05-24 16:34:27 -07001451 break;
1452
1453 case BRW_OPCODE_DO:
Eric Anholtce6be332011-12-06 12:30:03 -08001454 brw_DO(p, BRW_EXECUTE_8);
Eric Anholt11dd9e92011-05-24 16:34:27 -07001455 break;
1456
1457 case BRW_OPCODE_BREAK:
Eric Anholtf1d89632011-12-06 12:44:41 -08001458 brw_BREAK(p);
Eric Anholt11dd9e92011-05-24 16:34:27 -07001459 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1460 break;
1461 case BRW_OPCODE_CONTINUE:
1462 /* FINISHME: We need to write the loop instruction support still. */
Kenneth Graunke53631be2013-07-06 00:36:46 -07001463 if (brw->gen >= 6)
Eric Anholt9f881472011-12-06 12:09:58 -08001464 gen6_CONT(p);
Eric Anholt11dd9e92011-05-24 16:34:27 -07001465 else
Eric Anholtf1d89632011-12-06 12:44:41 -08001466 brw_CONT(p);
Eric Anholt11dd9e92011-05-24 16:34:27 -07001467 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1468 break;
1469
Eric Anholtce6be332011-12-06 12:30:03 -08001470 case BRW_OPCODE_WHILE:
Eric Anholtce6be332011-12-06 12:30:03 -08001471 brw_WHILE(p);
Eric Anholt11dd9e92011-05-24 16:34:27 -07001472 break;
1473
Eric Anholt65b5cbb2011-08-05 12:38:58 -07001474 case SHADER_OPCODE_RCP:
1475 case SHADER_OPCODE_RSQ:
1476 case SHADER_OPCODE_SQRT:
1477 case SHADER_OPCODE_EXP2:
1478 case SHADER_OPCODE_LOG2:
Eric Anholt65b5cbb2011-08-05 12:38:58 -07001479 case SHADER_OPCODE_SIN:
1480 case SHADER_OPCODE_COS:
Kenneth Graunke53631be2013-07-06 00:36:46 -07001481 if (brw->gen >= 7) {
Kenneth Graunkea73c65c2011-10-18 12:24:47 -07001482 generate_math1_gen7(inst, dst, src[0]);
Kenneth Graunke53631be2013-07-06 00:36:46 -07001483 } else if (brw->gen == 6) {
Kenneth Graunke74e927b2011-08-18 11:55:42 -07001484 generate_math1_gen6(inst, dst, src[0]);
Kenneth Graunke53631be2013-07-06 00:36:46 -07001485 } else if (brw->gen == 5 || brw->is_g4x) {
Kenneth Graunke1b77d212013-03-30 00:15:54 -07001486 generate_math_g45(inst, dst, src[0]);
Kenneth Graunke74e927b2011-08-18 11:55:42 -07001487 } else {
1488 generate_math_gen4(inst, dst, src[0]);
1489 }
1490 break;
Kenneth Graunkeff8f2722011-09-28 17:37:54 -07001491 case SHADER_OPCODE_INT_QUOTIENT:
1492 case SHADER_OPCODE_INT_REMAINDER:
Kenneth Graunke74e927b2011-08-18 11:55:42 -07001493 case SHADER_OPCODE_POW:
Kenneth Graunke53631be2013-07-06 00:36:46 -07001494 if (brw->gen >= 7) {
Kenneth Graunkea73c65c2011-10-18 12:24:47 -07001495 generate_math2_gen7(inst, dst, src[0], src[1]);
Kenneth Graunke53631be2013-07-06 00:36:46 -07001496 } else if (brw->gen == 6) {
Kenneth Graunke74e927b2011-08-18 11:55:42 -07001497 generate_math2_gen6(inst, dst, src[0], src[1]);
1498 } else {
1499 generate_math_gen4(inst, dst, src[0]);
1500 }
Eric Anholt11dd9e92011-05-24 16:34:27 -07001501 break;
1502 case FS_OPCODE_PIXEL_X:
1503 generate_pixel_xy(dst, true);
1504 break;
1505 case FS_OPCODE_PIXEL_Y:
1506 generate_pixel_xy(dst, false);
1507 break;
1508 case FS_OPCODE_CINTERP:
1509 brw_MOV(p, dst, src[0]);
1510 break;
1511 case FS_OPCODE_LINTERP:
1512 generate_linterp(inst, dst, src);
1513 break;
Kenneth Graunkefebad172011-10-26 12:58:37 -07001514 case SHADER_OPCODE_TEX:
Eric Anholt11dd9e92011-05-24 16:34:27 -07001515 case FS_OPCODE_TXB:
Kenneth Graunkefebad172011-10-26 12:58:37 -07001516 case SHADER_OPCODE_TXD:
1517 case SHADER_OPCODE_TXF:
Chris Forbesf52ce6a2013-01-24 21:35:15 +13001518 case SHADER_OPCODE_TXF_MS:
Kenneth Graunkefebad172011-10-26 12:58:37 -07001519 case SHADER_OPCODE_TXL:
1520 case SHADER_OPCODE_TXS:
Matt Turnerb8aa9f72013-03-06 14:47:01 -08001521 case SHADER_OPCODE_LOD:
Chris Forbesfb455502013-03-31 21:31:12 +13001522 case SHADER_OPCODE_TG4:
Eric Anholt11dd9e92011-05-24 16:34:27 -07001523 generate_tex(inst, dst, src[0]);
1524 break;
Eric Anholt11dd9e92011-05-24 16:34:27 -07001525 case FS_OPCODE_DDX:
1526 generate_ddx(inst, dst, src[0]);
1527 break;
1528 case FS_OPCODE_DDY:
Paul Berryd08fdac2012-06-20 13:40:45 -07001529 /* Make sure fp->UsesDFdy flag got set (otherwise there's no
1530 * guarantee that c->key.render_to_fbo is set).
1531 */
1532 assert(fp->UsesDFdy);
Paul Berry82d25962012-06-20 13:40:45 -07001533 generate_ddy(inst, dst, src[0], c->key.render_to_fbo);
Eric Anholt11dd9e92011-05-24 16:34:27 -07001534 break;
1535
1536 case FS_OPCODE_SPILL:
1537 generate_spill(inst, src[0]);
1538 break;
1539
1540 case FS_OPCODE_UNSPILL:
1541 generate_unspill(inst, dst);
1542 break;
1543
Eric Anholt29340d02012-11-07 10:42:34 -08001544 case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
1545 generate_uniform_pull_constant_load(inst, dst, src[0], src[1]);
Eric Anholt11dd9e92011-05-24 16:34:27 -07001546 break;
1547
Eric Anholt461a2972012-12-05 00:06:30 -08001548 case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7:
1549 generate_uniform_pull_constant_load_gen7(inst, dst, src[0], src[1]);
1550 break;
1551
Eric Anholtd8214e42012-11-07 11:18:34 -08001552 case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD:
Eric Anholt70b27e02013-03-18 10:16:42 -07001553 generate_varying_pull_constant_load(inst, dst, src[0], src[1]);
Eric Anholtd8214e42012-11-07 11:18:34 -08001554 break;
1555
1556 case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7:
1557 generate_varying_pull_constant_load_gen7(inst, dst, src[0], src[1]);
1558 break;
1559
Eric Anholt11dd9e92011-05-24 16:34:27 -07001560 case FS_OPCODE_FB_WRITE:
1561 generate_fb_write(inst);
1562 break;
Paul Berry3f929ef2012-06-18 14:50:04 -07001563
1564 case FS_OPCODE_MOV_DISPATCH_TO_FLAGS:
Eric Anholtb278f652012-12-06 10:36:11 -08001565 generate_mov_dispatch_to_flags(inst);
Paul Berry3f929ef2012-06-18 14:50:04 -07001566 break;
1567
Eric Anholtbeafced2012-12-06 10:15:08 -08001568 case FS_OPCODE_DISCARD_JUMP:
1569 generate_discard_jump(inst);
1570 break;
1571
Eric Anholt71f06342012-11-27 14:10:52 -08001572 case SHADER_OPCODE_SHADER_TIME_ADD:
Eric Anholt5c5218e2013-03-19 15:28:11 -07001573 generate_shader_time_add(inst, src[0], src[1], src[2]);
Eric Anholt71f06342012-11-27 14:10:52 -08001574 break;
1575
Eric Anholt4c1fdae2013-03-06 14:47:22 -08001576 case FS_OPCODE_SET_SIMD4X2_OFFSET:
1577 generate_set_simd4x2_offset(inst, dst, src[0]);
Eric Anholt461a2972012-12-05 00:06:30 -08001578 break;
1579
Chad Versace20dfa502013-01-09 11:46:42 -08001580 case FS_OPCODE_PACK_HALF_2x16_SPLIT:
1581 generate_pack_half_2x16_split(inst, dst, src[0], src[1]);
1582 break;
1583
1584 case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X:
1585 case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y:
1586 generate_unpack_half_2x16_split(inst, dst, src[0]);
1587 break;
1588
Kenneth Graunke57a50252013-03-27 23:19:39 -07001589 case FS_OPCODE_PLACEHOLDER_HALT:
1590 /* This is the place where the final HALT needs to be inserted if
1591 * we've emitted any discards. If not, this will emit no code.
1592 */
1593 patch_discard_jumps_to_fb_writes();
1594 break;
1595
Eric Anholt11dd9e92011-05-24 16:34:27 -07001596 default:
Kenneth Graunkeb02492f2012-11-14 14:24:31 -08001597 if (inst->opcode < (int) ARRAY_SIZE(opcode_descs)) {
Eric Anholt11dd9e92011-05-24 16:34:27 -07001598 _mesa_problem(ctx, "Unsupported opcode `%s' in FS",
Kenneth Graunkeb02492f2012-11-14 14:24:31 -08001599 opcode_descs[inst->opcode].name);
Eric Anholt11dd9e92011-05-24 16:34:27 -07001600 } else {
1601 _mesa_problem(ctx, "Unsupported opcode %d in FS", inst->opcode);
1602 }
Kenneth Graunkedd1fd302012-11-20 17:02:23 -08001603 abort();
Eric Anholt11dd9e92011-05-24 16:34:27 -07001604 }
1605
1606 if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
Eric Anholtf2bd3e72012-02-03 11:50:42 +01001607 brw_dump_compile(p, stdout,
1608 last_native_insn_offset, p->next_insn_offset);
Eric Anholt080b1252012-04-10 12:01:50 -07001609
1610 foreach_list(node, &cfg->block_list) {
Eric Anholt7abfb672012-10-03 13:16:09 -07001611 bblock_link *link = (bblock_link *)node;
1612 bblock_t *block = link->block;
Eric Anholt080b1252012-04-10 12:01:50 -07001613
1614 if (block->end == inst) {
1615 printf(" END B%d", block->block_num);
1616 foreach_list(successor_node, &block->children) {
Eric Anholt7abfb672012-10-03 13:16:09 -07001617 bblock_link *successor_link =
1618 (bblock_link *)successor_node;
1619 bblock_t *successor_block = successor_link->block;
Eric Anholt080b1252012-04-10 12:01:50 -07001620 printf(" ->B%d", successor_block->block_num);
1621 }
1622 printf("\n");
1623 }
1624 }
Eric Anholt11dd9e92011-05-24 16:34:27 -07001625 }
1626
Eric Anholtf2bd3e72012-02-03 11:50:42 +01001627 last_native_insn_offset = p->next_insn_offset;
Eric Anholt11dd9e92011-05-24 16:34:27 -07001628 }
1629
1630 if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
1631 printf("\n");
1632 }
1633
Eric Anholt11dd9e92011-05-24 16:34:27 -07001634 brw_set_uip_jip(p);
1635
1636 /* OK, while the INTEL_DEBUG=wm above is very nice for debugging FS
1637 * emit issues, it doesn't get the jump distances into the output,
1638 * which is often something we want to debug. So this is here in
1639 * case you're doing that.
1640 */
1641 if (0) {
Eric Anholtf2bd3e72012-02-03 11:50:42 +01001642 brw_dump_compile(p, stdout, 0, p->next_insn_offset);
Eric Anholt11dd9e92011-05-24 16:34:27 -07001643 }
1644}
Kenneth Graunkeea681a02012-11-09 01:05:47 -08001645
1646const unsigned *
1647fs_generator::generate_assembly(exec_list *simd8_instructions,
1648 exec_list *simd16_instructions,
1649 unsigned *assembly_size)
1650{
1651 dispatch_width = 8;
1652 generate_code(simd8_instructions);
1653
1654 if (simd16_instructions) {
1655 /* We have to do a compaction pass now, or the one at the end of
1656 * execution will squash down where our prog_offset start needs
1657 * to be.
1658 */
1659 brw_compact_instructions(p);
1660
1661 /* align to 64 byte boundary. */
1662 while ((p->nr_insn * sizeof(struct brw_instruction)) % 64) {
1663 brw_NOP(p);
1664 }
1665
1666 /* Save off the start of this 16-wide program */
1667 c->prog_data.prog_offset_16 = p->nr_insn * sizeof(struct brw_instruction);
1668
1669 brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
1670
1671 dispatch_width = 16;
1672 generate_code(simd16_instructions);
1673 }
1674
1675 return brw_get_program(p, assembly_size);
1676}