blob: 0e6e8533c732e8316addc96ebda76dbe449ae2bd [file] [log] [blame]
Eric Anholt11dd9e92011-05-24 16:34:27 -07001/*
2 * Copyright © 2010 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
Kenneth Graunkeec44d562013-09-17 23:32:10 -070024/** @file brw_fs_generator.cpp
Eric Anholt11dd9e92011-05-24 16:34:27 -070025 *
Kenneth Graunkeec44d562013-09-17 23:32:10 -070026 * This file supports generating code from the FS LIR to the actual
Eric Anholt11dd9e92011-05-24 16:34:27 -070027 * native instructions.
28 */
29
30extern "C" {
31#include "main/macros.h"
32#include "brw_context.h"
33#include "brw_eu.h"
34} /* extern "C" */
35
36#include "brw_fs.h"
Eric Anholt5ed57d92012-10-03 13:03:12 -070037#include "brw_cfg.h"
Eric Anholt11dd9e92011-05-24 16:34:27 -070038
Kenneth Graunkeea681a02012-11-09 01:05:47 -080039fs_generator::fs_generator(struct brw_context *brw,
40 struct brw_wm_compile *c,
41 struct gl_shader_program *prog,
42 struct gl_fragment_program *fp,
43 bool dual_source_output)
44
45 : brw(brw), c(c), prog(prog), fp(fp), dual_source_output(dual_source_output)
46{
Kenneth Graunke8c9a54e2013-07-06 00:46:38 -070047 ctx = &brw->ctx;
Kenneth Graunkeea681a02012-11-09 01:05:47 -080048
49 shader = prog ? prog->_LinkedShaders[MESA_SHADER_FRAGMENT] : NULL;
50
51 mem_ctx = c;
Kenneth Graunke91367232012-11-20 19:26:52 -080052
53 p = rzalloc(mem_ctx, struct brw_compile);
54 brw_init_compile(brw, p, mem_ctx);
Kenneth Graunkeea681a02012-11-09 01:05:47 -080055}
56
57fs_generator::~fs_generator()
58{
59}
60
Eric Anholt11dd9e92011-05-24 16:34:27 -070061void
Kenneth Graunke6d89bc82013-08-14 19:49:33 -070062fs_generator::mark_surface_used(unsigned surf_index)
63{
Eric Anholt4e530642013-10-02 18:53:04 -070064 assert(surf_index < BRW_MAX_SURFACES);
Kenneth Graunke6d89bc82013-08-14 19:49:33 -070065
Eric Anholt3c9dc2d2013-10-02 14:07:40 -070066 c->prog_data.base.binding_table.size_bytes =
67 MAX2(c->prog_data.base.binding_table.size_bytes, (surf_index + 1) * 4);
Kenneth Graunke6d89bc82013-08-14 19:49:33 -070068}
69
70void
Eric Anholtbeafced2012-12-06 10:15:08 -080071fs_generator::patch_discard_jumps_to_fb_writes()
72{
Kenneth Graunke53631be2013-07-06 00:36:46 -070073 if (brw->gen < 6 || this->discard_halt_patches.is_empty())
Eric Anholtbeafced2012-12-06 10:15:08 -080074 return;
75
76 /* There is a somewhat strange undocumented requirement of using
77 * HALT, according to the simulator. If some channel has HALTed to
78 * a particular UIP, then by the end of the program, every channel
79 * must have HALTed to that UIP. Furthermore, the tracking is a
80 * stack, so you can't do the final halt of a UIP after starting
81 * halting to a new UIP.
82 *
83 * Symptoms of not emitting this instruction on actual hardware
84 * included GPU hangs and sparkly rendering on the piglit discard
85 * tests.
86 */
87 struct brw_instruction *last_halt = gen6_HALT(p);
88 last_halt->bits3.break_cont.uip = 2;
89 last_halt->bits3.break_cont.jip = 2;
90
91 int ip = p->nr_insn;
92
93 foreach_list(node, &this->discard_halt_patches) {
94 ip_record *patch_ip = (ip_record *)node;
95 struct brw_instruction *patch = &p->store[patch_ip->ip];
96
97 assert(patch->header.opcode == BRW_OPCODE_HALT);
98 /* HALT takes a half-instruction distance from the pre-incremented IP. */
99 patch->bits3.break_cont.uip = (ip - patch_ip->ip) * 2;
100 }
101
102 this->discard_halt_patches.make_empty();
103}
104
105void
Kenneth Graunkeea681a02012-11-09 01:05:47 -0800106fs_generator::generate_fb_write(fs_inst *inst)
Eric Anholt11dd9e92011-05-24 16:34:27 -0700107{
Kenneth Graunke2e5a1a22011-10-07 12:26:50 -0700108 bool eot = inst->eot;
Eric Anholt11dd9e92011-05-24 16:34:27 -0700109 struct brw_reg implied_header;
Eric Anholt29362872012-04-25 13:58:07 -0700110 uint32_t msg_control;
Eric Anholt11dd9e92011-05-24 16:34:27 -0700111
112 /* Header is 2 regs, g0 and g1 are the contents. g0 will be implied
113 * move, here's g1.
114 */
115 brw_push_insn_state(p);
116 brw_set_mask_control(p, BRW_MASK_DISABLE);
117 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
118
Eric Anholtd5016492012-12-06 12:15:13 -0800119 if (fp->UsesKill) {
120 struct brw_reg pixel_mask;
121
Kenneth Graunke53631be2013-07-06 00:36:46 -0700122 if (brw->gen >= 6)
Eric Anholtd5016492012-12-06 12:15:13 -0800123 pixel_mask = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW);
124 else
125 pixel_mask = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
126
127 brw_MOV(p, pixel_mask, brw_flag_reg(0, 1));
128 }
129
Eric Anholt11dd9e92011-05-24 16:34:27 -0700130 if (inst->header_present) {
Kenneth Graunke53631be2013-07-06 00:36:46 -0700131 if (brw->gen >= 6) {
Eric Anholt11dd9e92011-05-24 16:34:27 -0700132 brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
133 brw_MOV(p,
134 retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_UD),
135 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
136 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
137
Chris Forbes1415a182013-07-01 23:30:55 +1200138 if (inst->target > 0 && c->key.replicate_alpha) {
Anuj Phogate592f7d2012-08-01 16:32:06 -0700139 /* Set "Source0 Alpha Present to RenderTarget" bit in message
140 * header.
141 */
142 brw_OR(p,
143 vec1(retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_UD)),
144 vec1(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)),
145 brw_imm_ud(0x1 << 11));
146 }
147
Eric Anholt11dd9e92011-05-24 16:34:27 -0700148 if (inst->target > 0) {
149 /* Set the render target index for choosing BLEND_STATE. */
Eric Anholt3daa2d92011-07-25 15:39:03 -0700150 brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
151 inst->base_mrf, 2),
Eric Anholt11dd9e92011-05-24 16:34:27 -0700152 BRW_REGISTER_TYPE_UD),
153 brw_imm_ud(inst->target));
154 }
155
156 implied_header = brw_null_reg();
157 } else {
158 implied_header = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW);
159
160 brw_MOV(p,
161 brw_message_reg(inst->base_mrf + 1),
162 brw_vec8_grf(1, 0));
163 }
164 } else {
165 implied_header = brw_null_reg();
166 }
167
Kenneth Graunkeea681a02012-11-09 01:05:47 -0800168 if (this->dual_source_output)
Eric Anholt29362872012-04-25 13:58:07 -0700169 msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01;
Kenneth Graunkea303df82012-11-20 13:50:52 -0800170 else if (dispatch_width == 16)
Eric Anholt29362872012-04-25 13:58:07 -0700171 msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE;
172 else
173 msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01;
174
Eric Anholt11dd9e92011-05-24 16:34:27 -0700175 brw_pop_insn_state(p);
176
Eric Anholt3c9dc2d2013-10-02 14:07:40 -0700177 uint32_t surf_index =
178 c->prog_data.binding_table.render_target_start + inst->target;
Eric Anholt11dd9e92011-05-24 16:34:27 -0700179 brw_fb_WRITE(p,
Kenneth Graunkea303df82012-11-20 13:50:52 -0800180 dispatch_width,
Eric Anholt11dd9e92011-05-24 16:34:27 -0700181 inst->base_mrf,
182 implied_header,
Eric Anholt29362872012-04-25 13:58:07 -0700183 msg_control,
Eric Anholt3c9dc2d2013-10-02 14:07:40 -0700184 surf_index,
Eric Anholt11dd9e92011-05-24 16:34:27 -0700185 inst->mlen,
186 0,
187 eot,
188 inst->header_present);
Kenneth Graunke6d89bc82013-08-14 19:49:33 -0700189
Eric Anholt3c9dc2d2013-10-02 14:07:40 -0700190 mark_surface_used(surf_index);
Eric Anholt11dd9e92011-05-24 16:34:27 -0700191}
192
193/* Computes the integer pixel x,y values from the origin.
194 *
195 * This is the basis of gl_FragCoord computation, but is also used
196 * pre-gen6 for computing the deltas from v0 for computing
197 * interpolation.
198 */
199void
Kenneth Graunkeea681a02012-11-09 01:05:47 -0800200fs_generator::generate_pixel_xy(struct brw_reg dst, bool is_x)
Eric Anholt11dd9e92011-05-24 16:34:27 -0700201{
202 struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW);
203 struct brw_reg src;
204 struct brw_reg deltas;
205
206 if (is_x) {
207 src = stride(suboffset(g1_uw, 4), 2, 4, 0);
208 deltas = brw_imm_v(0x10101010);
209 } else {
210 src = stride(suboffset(g1_uw, 5), 2, 4, 0);
211 deltas = brw_imm_v(0x11001100);
212 }
213
Kenneth Graunkea303df82012-11-20 13:50:52 -0800214 if (dispatch_width == 16) {
Eric Anholt11dd9e92011-05-24 16:34:27 -0700215 dst = vec16(dst);
216 }
217
218 /* We do this 8 or 16-wide, but since the destination is UW we
219 * don't do compression in the 16-wide case.
220 */
221 brw_push_insn_state(p);
222 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
223 brw_ADD(p, dst, src, deltas);
224 brw_pop_insn_state(p);
225}
226
227void
Kenneth Graunkeea681a02012-11-09 01:05:47 -0800228fs_generator::generate_linterp(fs_inst *inst,
Eric Anholt11dd9e92011-05-24 16:34:27 -0700229 struct brw_reg dst, struct brw_reg *src)
230{
231 struct brw_reg delta_x = src[0];
232 struct brw_reg delta_y = src[1];
233 struct brw_reg interp = src[2];
234
235 if (brw->has_pln &&
236 delta_y.nr == delta_x.nr + 1 &&
Kenneth Graunke53631be2013-07-06 00:36:46 -0700237 (brw->gen >= 6 || (delta_x.nr & 1) == 0)) {
Eric Anholt11dd9e92011-05-24 16:34:27 -0700238 brw_PLN(p, dst, interp, delta_x);
239 } else {
240 brw_LINE(p, brw_null_reg(), interp, delta_x);
241 brw_MAC(p, dst, suboffset(interp, 1), delta_y);
242 }
243}
244
245void
Kenneth Graunkeea681a02012-11-09 01:05:47 -0800246fs_generator::generate_math1_gen7(fs_inst *inst,
Kenneth Graunkea73c65c2011-10-18 12:24:47 -0700247 struct brw_reg dst,
248 struct brw_reg src0)
249{
250 assert(inst->mlen == 0);
251 brw_math(p, dst,
252 brw_math_function(inst->opcode),
Kenneth Graunkea73c65c2011-10-18 12:24:47 -0700253 0, src0,
254 BRW_MATH_DATA_VECTOR,
255 BRW_MATH_PRECISION_FULL);
256}
257
258void
Kenneth Graunkeea681a02012-11-09 01:05:47 -0800259fs_generator::generate_math2_gen7(fs_inst *inst,
Kenneth Graunkea73c65c2011-10-18 12:24:47 -0700260 struct brw_reg dst,
261 struct brw_reg src0,
262 struct brw_reg src1)
263{
264 assert(inst->mlen == 0);
265 brw_math2(p, dst, brw_math_function(inst->opcode), src0, src1);
266}
267
268void
Kenneth Graunkeea681a02012-11-09 01:05:47 -0800269fs_generator::generate_math1_gen6(fs_inst *inst,
Kenneth Graunke74e927b2011-08-18 11:55:42 -0700270 struct brw_reg dst,
271 struct brw_reg src0)
Eric Anholt11dd9e92011-05-24 16:34:27 -0700272{
Eric Anholtaf3c9802011-05-02 09:45:40 -0700273 int op = brw_math_function(inst->opcode);
Eric Anholt11dd9e92011-05-24 16:34:27 -0700274
Kenneth Graunke74e927b2011-08-18 11:55:42 -0700275 assert(inst->mlen == 0);
Eric Anholt11dd9e92011-05-24 16:34:27 -0700276
Kenneth Graunke74e927b2011-08-18 11:55:42 -0700277 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
278 brw_math(p, dst,
279 op,
Kenneth Graunke74e927b2011-08-18 11:55:42 -0700280 0, src0,
281 BRW_MATH_DATA_VECTOR,
282 BRW_MATH_PRECISION_FULL);
Eric Anholt11dd9e92011-05-24 16:34:27 -0700283
Kenneth Graunkea303df82012-11-20 13:50:52 -0800284 if (dispatch_width == 16) {
Kenneth Graunke74e927b2011-08-18 11:55:42 -0700285 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
286 brw_math(p, sechalf(dst),
Eric Anholt11dd9e92011-05-24 16:34:27 -0700287 op,
Kenneth Graunke74e927b2011-08-18 11:55:42 -0700288 0, sechalf(src0),
289 BRW_MATH_DATA_VECTOR,
290 BRW_MATH_PRECISION_FULL);
291 brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
292 }
293}
294
295void
Kenneth Graunkeea681a02012-11-09 01:05:47 -0800296fs_generator::generate_math2_gen6(fs_inst *inst,
Kenneth Graunke74e927b2011-08-18 11:55:42 -0700297 struct brw_reg dst,
298 struct brw_reg src0,
299 struct brw_reg src1)
300{
301 int op = brw_math_function(inst->opcode);
302
303 assert(inst->mlen == 0);
304
305 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
306 brw_math2(p, dst, op, src0, src1);
307
Kenneth Graunkea303df82012-11-20 13:50:52 -0800308 if (dispatch_width == 16) {
Kenneth Graunke74e927b2011-08-18 11:55:42 -0700309 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
310 brw_math2(p, sechalf(dst), op, sechalf(src0), sechalf(src1));
311 brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
312 }
313}
314
315void
Kenneth Graunkeea681a02012-11-09 01:05:47 -0800316fs_generator::generate_math_gen4(fs_inst *inst,
Kenneth Graunke74e927b2011-08-18 11:55:42 -0700317 struct brw_reg dst,
318 struct brw_reg src)
319{
320 int op = brw_math_function(inst->opcode);
321
322 assert(inst->mlen >= 1);
323
324 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
325 brw_math(p, dst,
326 op,
Kenneth Graunke74e927b2011-08-18 11:55:42 -0700327 inst->base_mrf, src,
328 BRW_MATH_DATA_VECTOR,
329 BRW_MATH_PRECISION_FULL);
330
Kenneth Graunkea303df82012-11-20 13:50:52 -0800331 if (dispatch_width == 16) {
Kenneth Graunke74e927b2011-08-18 11:55:42 -0700332 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
333 brw_math(p, sechalf(dst),
334 op,
Kenneth Graunke74e927b2011-08-18 11:55:42 -0700335 inst->base_mrf + 1, sechalf(src),
Eric Anholt11dd9e92011-05-24 16:34:27 -0700336 BRW_MATH_DATA_VECTOR,
337 BRW_MATH_PRECISION_FULL);
338
Kenneth Graunke74e927b2011-08-18 11:55:42 -0700339 brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
Eric Anholt11dd9e92011-05-24 16:34:27 -0700340 }
341}
342
343void
Kenneth Graunke1b77d212013-03-30 00:15:54 -0700344fs_generator::generate_math_g45(fs_inst *inst,
345 struct brw_reg dst,
346 struct brw_reg src)
347{
348 if (inst->opcode == SHADER_OPCODE_POW ||
349 inst->opcode == SHADER_OPCODE_INT_QUOTIENT ||
350 inst->opcode == SHADER_OPCODE_INT_REMAINDER) {
351 generate_math_gen4(inst, dst, src);
352 return;
353 }
354
355 int op = brw_math_function(inst->opcode);
356
357 assert(inst->mlen >= 1);
358
359 brw_math(p, dst,
360 op,
361 inst->base_mrf, src,
362 BRW_MATH_DATA_VECTOR,
363 BRW_MATH_PRECISION_FULL);
364}
365
366void
Kenneth Graunkeea681a02012-11-09 01:05:47 -0800367fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
Eric Anholt11dd9e92011-05-24 16:34:27 -0700368{
369 int msg_type = -1;
370 int rlen = 4;
371 uint32_t simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8;
Eric Anholt7e84a642011-11-09 16:07:57 -0800372 uint32_t return_format;
373
374 switch (dst.type) {
375 case BRW_REGISTER_TYPE_D:
376 return_format = BRW_SAMPLER_RETURN_FORMAT_SINT32;
377 break;
378 case BRW_REGISTER_TYPE_UD:
379 return_format = BRW_SAMPLER_RETURN_FORMAT_UINT32;
380 break;
381 default:
382 return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
383 break;
384 }
Eric Anholt11dd9e92011-05-24 16:34:27 -0700385
Chia-I Wu3db52b62013-09-30 14:12:19 +0800386 if (dispatch_width == 16 &&
387 !inst->force_uncompressed && !inst->force_sechalf)
Eric Anholt11dd9e92011-05-24 16:34:27 -0700388 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
389
Kenneth Graunke53631be2013-07-06 00:36:46 -0700390 if (brw->gen >= 5) {
Eric Anholt11dd9e92011-05-24 16:34:27 -0700391 switch (inst->opcode) {
Kenneth Graunkefebad172011-10-26 12:58:37 -0700392 case SHADER_OPCODE_TEX:
Eric Anholt11dd9e92011-05-24 16:34:27 -0700393 if (inst->shadow_compare) {
394 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE;
395 } else {
396 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE;
397 }
398 break;
399 case FS_OPCODE_TXB:
400 if (inst->shadow_compare) {
401 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE;
402 } else {
403 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS;
404 }
405 break;
Kenneth Graunkefebad172011-10-26 12:58:37 -0700406 case SHADER_OPCODE_TXL:
Eric Anholt11dd9e92011-05-24 16:34:27 -0700407 if (inst->shadow_compare) {
408 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE;
409 } else {
410 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD;
411 }
412 break;
Kenneth Graunkefebad172011-10-26 12:58:37 -0700413 case SHADER_OPCODE_TXS:
Kenneth Graunkeecf89632011-06-19 01:47:50 -0700414 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO;
415 break;
Kenneth Graunkefebad172011-10-26 12:58:37 -0700416 case SHADER_OPCODE_TXD:
Kenneth Graunke899017f2013-01-04 07:53:09 -0800417 if (inst->shadow_compare) {
418 /* Gen7.5+. Otherwise, lowered by brw_lower_texture_gradients(). */
Kenneth Graunke794de2f2013-07-06 00:15:44 -0700419 assert(brw->is_haswell);
Kenneth Graunke899017f2013-01-04 07:53:09 -0800420 msg_type = HSW_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE;
421 } else {
422 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS;
423 }
Eric Anholt11dd9e92011-05-24 16:34:27 -0700424 break;
Kenneth Graunkefebad172011-10-26 12:58:37 -0700425 case SHADER_OPCODE_TXF:
Kenneth Graunke30be2cc2011-08-25 17:13:37 -0700426 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
427 break;
Chris Forbesf52ce6a2013-01-24 21:35:15 +1300428 case SHADER_OPCODE_TXF_MS:
Kenneth Graunke53631be2013-07-06 00:36:46 -0700429 if (brw->gen >= 7)
Chris Forbesf52ce6a2013-01-24 21:35:15 +1300430 msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DMS;
431 else
432 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
433 break;
Matt Turnerb8aa9f72013-03-06 14:47:01 -0800434 case SHADER_OPCODE_LOD:
435 msg_type = GEN5_SAMPLER_MESSAGE_LOD;
436 break;
Chris Forbesfb455502013-03-31 21:31:12 +1300437 case SHADER_OPCODE_TG4:
438 assert(brw->gen >= 6);
439 msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4;
440 break;
Eric Anholt6034b9a2011-05-03 10:55:50 -0700441 default:
442 assert(!"not reached");
443 break;
Eric Anholt11dd9e92011-05-24 16:34:27 -0700444 }
445 } else {
446 switch (inst->opcode) {
Kenneth Graunkefebad172011-10-26 12:58:37 -0700447 case SHADER_OPCODE_TEX:
Eric Anholt11dd9e92011-05-24 16:34:27 -0700448 /* Note that G45 and older determines shadow compare and dispatch width
449 * from message length for most messages.
450 */
Kenneth Graunkea303df82012-11-20 13:50:52 -0800451 assert(dispatch_width == 8);
Eric Anholt11dd9e92011-05-24 16:34:27 -0700452 msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE;
453 if (inst->shadow_compare) {
454 assert(inst->mlen == 6);
455 } else {
456 assert(inst->mlen <= 4);
457 }
458 break;
459 case FS_OPCODE_TXB:
460 if (inst->shadow_compare) {
461 assert(inst->mlen == 6);
462 msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_COMPARE;
463 } else {
464 assert(inst->mlen == 9);
465 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
466 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
467 }
468 break;
Kenneth Graunkefebad172011-10-26 12:58:37 -0700469 case SHADER_OPCODE_TXL:
Eric Anholt11dd9e92011-05-24 16:34:27 -0700470 if (inst->shadow_compare) {
471 assert(inst->mlen == 6);
472 msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_COMPARE;
473 } else {
474 assert(inst->mlen == 9);
475 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD;
476 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
477 }
478 break;
Kenneth Graunkefebad172011-10-26 12:58:37 -0700479 case SHADER_OPCODE_TXD:
Kenneth Graunke6430df32011-06-10 14:48:46 -0700480 /* There is no sample_d_c message; comparisons are done manually */
Kenneth Graunke6c947cf2011-06-08 16:05:34 -0700481 assert(inst->mlen == 7 || inst->mlen == 10);
482 msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS;
Eric Anholt11dd9e92011-05-24 16:34:27 -0700483 break;
Kenneth Graunkefebad172011-10-26 12:58:37 -0700484 case SHADER_OPCODE_TXF:
Kenneth Graunke47b556f2011-09-06 16:39:01 -0700485 assert(inst->mlen == 9);
486 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_LD;
487 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
488 break;
Kenneth Graunkefebad172011-10-26 12:58:37 -0700489 case SHADER_OPCODE_TXS:
Kenneth Graunke4eeb4c12011-08-17 10:45:47 -0700490 assert(inst->mlen == 3);
491 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_RESINFO;
492 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
493 break;
Eric Anholt6034b9a2011-05-03 10:55:50 -0700494 default:
495 assert(!"not reached");
496 break;
Eric Anholt11dd9e92011-05-24 16:34:27 -0700497 }
498 }
499 assert(msg_type != -1);
500
501 if (simd_mode == BRW_SAMPLER_SIMD_MODE_SIMD16) {
502 rlen = 8;
503 dst = vec16(dst);
504 }
505
Eric Anholt36fbe662013-10-09 17:17:59 -0700506 if (brw->gen >= 7 && inst->header_present && dispatch_width == 16) {
507 /* The send-from-GRF for 16-wide texturing with a header has an extra
508 * hardware register allocated to it, which we need to skip over (since
509 * our coordinates in the payload are in the even-numbered registers,
510 * and the header comes right before the first one).
511 */
512 assert(src.file == BRW_GENERAL_REGISTER_FILE);
513 src.nr++;
514 }
515
Kenneth Graunke82bfb4b2012-08-04 20:33:13 -0700516 /* Load the message header if present. If there's a texture offset,
517 * we need to set it up explicitly and load the offset bitfield.
518 * Otherwise, we can use an implied move from g0 to the first message reg.
519 */
520 if (inst->texture_offset) {
Eric Anholt36fbe662013-10-09 17:17:59 -0700521 struct brw_reg header_reg;
522
523 if (brw->gen >= 7) {
524 header_reg = src;
525 } else {
526 assert(inst->base_mrf != -1);
Chris Forbesc4de86f2013-10-13 12:39:47 +1300527 header_reg = brw_message_reg(inst->base_mrf);
Eric Anholt36fbe662013-10-09 17:17:59 -0700528 }
Kenneth Graunke82bfb4b2012-08-04 20:33:13 -0700529 brw_push_insn_state(p);
Eric Anholt86536a32012-08-30 11:07:52 -0700530 brw_set_mask_control(p, BRW_MASK_DISABLE);
Kenneth Graunke82bfb4b2012-08-04 20:33:13 -0700531 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
532 /* Explicitly set up the message header by copying g0 to the MRF. */
Chris Forbesc4de86f2013-10-13 12:39:47 +1300533 brw_MOV(p, header_reg, brw_vec8_grf(0, 0));
Kenneth Graunke82bfb4b2012-08-04 20:33:13 -0700534
535 /* Then set the offset bits in DWord 2. */
Eric Anholt36fbe662013-10-09 17:17:59 -0700536 brw_MOV(p, retype(brw_vec1_reg(header_reg.file,
537 header_reg.nr, 2), BRW_REGISTER_TYPE_UD),
Kenneth Graunke82bfb4b2012-08-04 20:33:13 -0700538 brw_imm_ud(inst->texture_offset));
539 brw_pop_insn_state(p);
540 } else if (inst->header_present) {
Chris Forbesb38af012013-10-13 12:20:03 +1300541 if (brw->gen >= 7) {
542 /* Explicitly set up the message header by copying g0 to the MRF. */
543 brw_push_insn_state(p);
544 brw_set_mask_control(p, BRW_MASK_DISABLE);
545 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
546 brw_MOV(p, src, brw_vec8_grf(0, 0));
547 brw_pop_insn_state(p);
548 } else {
549 /* Set up an implied move from g0 to the MRF. */
550 src = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW);
551 }
Kenneth Graunke82bfb4b2012-08-04 20:33:13 -0700552 }
553
Eric Anholt3c9dc2d2013-10-02 14:07:40 -0700554 uint32_t surface_index = (inst->opcode == SHADER_OPCODE_TG4
555 ? c->prog_data.base.binding_table.gather_texture_start
556 : c->prog_data.base.binding_table.texture_start) + inst->sampler;
Chris Forbesdd4c2a52013-09-15 18:23:14 +1200557
Eric Anholt11dd9e92011-05-24 16:34:27 -0700558 brw_SAMPLE(p,
559 retype(dst, BRW_REGISTER_TYPE_UW),
560 inst->base_mrf,
561 src,
Chris Forbesdd4c2a52013-09-15 18:23:14 +1200562 surface_index,
Eric Anholt11dd9e92011-05-24 16:34:27 -0700563 inst->sampler,
Eric Anholt11dd9e92011-05-24 16:34:27 -0700564 msg_type,
565 rlen,
566 inst->mlen,
Eric Anholt11dd9e92011-05-24 16:34:27 -0700567 inst->header_present,
Eric Anholt7e84a642011-11-09 16:07:57 -0800568 simd_mode,
569 return_format);
Kenneth Graunke6d89bc82013-08-14 19:49:33 -0700570
Chris Forbesdd4c2a52013-09-15 18:23:14 +1200571 mark_surface_used(surface_index);
Eric Anholt11dd9e92011-05-24 16:34:27 -0700572}
573
574
575/* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input
576 * looking like:
577 *
578 * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br
579 *
Chia-I Wu848c0e72013-09-12 13:00:52 +0800580 * Ideally, we want to produce:
Eric Anholt11dd9e92011-05-24 16:34:27 -0700581 *
582 * DDX DDY
583 * dst: (ss0.tr - ss0.tl) (ss0.tl - ss0.bl)
584 * (ss0.tr - ss0.tl) (ss0.tr - ss0.br)
585 * (ss0.br - ss0.bl) (ss0.tl - ss0.bl)
586 * (ss0.br - ss0.bl) (ss0.tr - ss0.br)
587 * (ss1.tr - ss1.tl) (ss1.tl - ss1.bl)
588 * (ss1.tr - ss1.tl) (ss1.tr - ss1.br)
589 * (ss1.br - ss1.bl) (ss1.tl - ss1.bl)
590 * (ss1.br - ss1.bl) (ss1.tr - ss1.br)
591 *
592 * and add another set of two more subspans if in 16-pixel dispatch mode.
593 *
594 * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result
595 * for each pair, and vertstride = 2 jumps us 2 elements after processing a
Chia-I Wu848c0e72013-09-12 13:00:52 +0800596 * pair. But the ideal approximation may impose a huge performance cost on
597 * sample_d. On at least Haswell, sample_d instruction does some
598 * optimizations if the same LOD is used for all pixels in the subspan.
599 *
Paul Berry800610f2013-09-20 09:04:31 -0700600 * For DDY, we need to use ALIGN16 mode since it's capable of doing the
601 * appropriate swizzling.
Eric Anholt11dd9e92011-05-24 16:34:27 -0700602 */
603void
Kenneth Graunkeea681a02012-11-09 01:05:47 -0800604fs_generator::generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
Eric Anholt11dd9e92011-05-24 16:34:27 -0700605{
Chia-I Wu848c0e72013-09-12 13:00:52 +0800606 unsigned vstride, width;
607
608 if (c->key.high_quality_derivatives) {
609 /* produce accurate derivatives */
610 vstride = BRW_VERTICAL_STRIDE_2;
611 width = BRW_WIDTH_2;
612 }
613 else {
614 /* replicate the derivative at the top-left pixel to other pixels */
615 vstride = BRW_VERTICAL_STRIDE_4;
616 width = BRW_WIDTH_4;
617 }
618
Eric Anholt11dd9e92011-05-24 16:34:27 -0700619 struct brw_reg src0 = brw_reg(src.file, src.nr, 1,
620 BRW_REGISTER_TYPE_F,
Chia-I Wu848c0e72013-09-12 13:00:52 +0800621 vstride,
622 width,
Eric Anholt11dd9e92011-05-24 16:34:27 -0700623 BRW_HORIZONTAL_STRIDE_0,
624 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
625 struct brw_reg src1 = brw_reg(src.file, src.nr, 0,
626 BRW_REGISTER_TYPE_F,
Chia-I Wu848c0e72013-09-12 13:00:52 +0800627 vstride,
628 width,
Eric Anholt11dd9e92011-05-24 16:34:27 -0700629 BRW_HORIZONTAL_STRIDE_0,
630 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
631 brw_ADD(p, dst, src0, negate(src1));
632}
633
Paul Berry82d25962012-06-20 13:40:45 -0700634/* The negate_value boolean is used to negate the derivative computation for
635 * FBOs, since they place the origin at the upper left instead of the lower
636 * left.
637 */
Eric Anholt11dd9e92011-05-24 16:34:27 -0700638void
Kenneth Graunkeea681a02012-11-09 01:05:47 -0800639fs_generator::generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src,
Paul Berry82d25962012-06-20 13:40:45 -0700640 bool negate_value)
Eric Anholt11dd9e92011-05-24 16:34:27 -0700641{
Paul Berry800610f2013-09-20 09:04:31 -0700642 if (c->key.high_quality_derivatives) {
643 /* produce accurate derivatives */
644 struct brw_reg src0 = brw_reg(src.file, src.nr, 0,
645 BRW_REGISTER_TYPE_F,
646 BRW_VERTICAL_STRIDE_4,
647 BRW_WIDTH_4,
648 BRW_HORIZONTAL_STRIDE_1,
649 BRW_SWIZZLE_XYXY, WRITEMASK_XYZW);
650 struct brw_reg src1 = brw_reg(src.file, src.nr, 0,
651 BRW_REGISTER_TYPE_F,
652 BRW_VERTICAL_STRIDE_4,
653 BRW_WIDTH_4,
654 BRW_HORIZONTAL_STRIDE_1,
655 BRW_SWIZZLE_ZWZW, WRITEMASK_XYZW);
656 brw_push_insn_state(p);
657 brw_set_access_mode(p, BRW_ALIGN_16);
658 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
659 if (negate_value)
660 brw_ADD(p, dst, src1, negate(src0));
661 else
662 brw_ADD(p, dst, src0, negate(src1));
663 if (dispatch_width == 16) {
664 /* From page 340 of the i965 PRM:
665 *
666 * "A compressed instruction must be in Align1 access
667 * mode. Align16 mode instructions cannot be compressed."
668 *
669 * Therefore, when doing a 16-wide dispatch, we need to manually
670 * unroll to two instructions.
671 */
672 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
673 src0 = sechalf(src0);
674 src1 = sechalf(src1);
675 dst = sechalf(dst);
676 if (negate_value)
677 brw_ADD(p, dst, src1, negate(src0));
678 else
679 brw_ADD(p, dst, src0, negate(src1));
680 }
681 brw_pop_insn_state(p);
682 } else {
683 /* replicate the derivative at the top-left pixel to other pixels */
684 struct brw_reg src0 = brw_reg(src.file, src.nr, 0,
685 BRW_REGISTER_TYPE_F,
686 BRW_VERTICAL_STRIDE_4,
687 BRW_WIDTH_4,
688 BRW_HORIZONTAL_STRIDE_0,
689 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
690 struct brw_reg src1 = brw_reg(src.file, src.nr, 2,
691 BRW_REGISTER_TYPE_F,
692 BRW_VERTICAL_STRIDE_4,
693 BRW_WIDTH_4,
694 BRW_HORIZONTAL_STRIDE_0,
695 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
696 if (negate_value)
697 brw_ADD(p, dst, src1, negate(src0));
698 else
699 brw_ADD(p, dst, src0, negate(src1));
700 }
Eric Anholt11dd9e92011-05-24 16:34:27 -0700701}
702
703void
Eric Anholtbeafced2012-12-06 10:15:08 -0800704fs_generator::generate_discard_jump(fs_inst *inst)
705{
Kenneth Graunke53631be2013-07-06 00:36:46 -0700706 assert(brw->gen >= 6);
Eric Anholtbeafced2012-12-06 10:15:08 -0800707
708 /* This HALT will be patched up at FB write time to point UIP at the end of
709 * the program, and at brw_uip_jip() JIP will be set to the end of the
710 * current block (or the program).
711 */
712 this->discard_halt_patches.push_tail(new(mem_ctx) ip_record(p->nr_insn));
713
714 brw_push_insn_state(p);
715 brw_set_mask_control(p, BRW_MASK_DISABLE);
716 gen6_HALT(p);
717 brw_pop_insn_state(p);
718}
719
720void
Kenneth Graunkeea681a02012-11-09 01:05:47 -0800721fs_generator::generate_spill(fs_inst *inst, struct brw_reg src)
Eric Anholt11dd9e92011-05-24 16:34:27 -0700722{
723 assert(inst->mlen != 0);
724
725 brw_MOV(p,
726 retype(brw_message_reg(inst->base_mrf + 1), BRW_REGISTER_TYPE_UD),
727 retype(src, BRW_REGISTER_TYPE_UD));
728 brw_oword_block_write_scratch(p, brw_message_reg(inst->base_mrf), 1,
729 inst->offset);
730}
731
732void
Kenneth Graunkeea681a02012-11-09 01:05:47 -0800733fs_generator::generate_unspill(fs_inst *inst, struct brw_reg dst)
Eric Anholt11dd9e92011-05-24 16:34:27 -0700734{
735 assert(inst->mlen != 0);
736
Eric Anholt11dd9e92011-05-24 16:34:27 -0700737 brw_oword_block_read_scratch(p, dst, brw_message_reg(inst->base_mrf), 1,
738 inst->offset);
Eric Anholt11dd9e92011-05-24 16:34:27 -0700739}
740
741void
Eric Anholt29340d02012-11-07 10:42:34 -0800742fs_generator::generate_uniform_pull_constant_load(fs_inst *inst,
743 struct brw_reg dst,
744 struct brw_reg index,
745 struct brw_reg offset)
Eric Anholt11dd9e92011-05-24 16:34:27 -0700746{
747 assert(inst->mlen != 0);
748
Eric Anholt454dc832012-06-20 15:41:14 -0700749 assert(index.file == BRW_IMMEDIATE_VALUE &&
750 index.type == BRW_REGISTER_TYPE_UD);
751 uint32_t surf_index = index.dw1.ud;
752
753 assert(offset.file == BRW_IMMEDIATE_VALUE &&
754 offset.type == BRW_REGISTER_TYPE_UD);
755 uint32_t read_offset = offset.dw1.ud;
756
Eric Anholt11dd9e92011-05-24 16:34:27 -0700757 brw_oword_block_read(p, dst, brw_message_reg(inst->base_mrf),
Eric Anholt454dc832012-06-20 15:41:14 -0700758 read_offset, surf_index);
Kenneth Graunke6d89bc82013-08-14 19:49:33 -0700759
760 mark_surface_used(surf_index);
Eric Anholt11dd9e92011-05-24 16:34:27 -0700761}
762
Eric Anholtd8214e42012-11-07 11:18:34 -0800763void
Eric Anholt461a2972012-12-05 00:06:30 -0800764fs_generator::generate_uniform_pull_constant_load_gen7(fs_inst *inst,
765 struct brw_reg dst,
766 struct brw_reg index,
767 struct brw_reg offset)
768{
769 assert(inst->mlen == 0);
770
771 assert(index.file == BRW_IMMEDIATE_VALUE &&
772 index.type == BRW_REGISTER_TYPE_UD);
773 uint32_t surf_index = index.dw1.ud;
774
775 assert(offset.file == BRW_GENERAL_REGISTER_FILE);
Eric Anholt4c1fdae2013-03-06 14:47:22 -0800776 /* Reference just the dword we need, to avoid angering validate_reg(). */
777 offset = brw_vec1_grf(offset.nr, 0);
Eric Anholt461a2972012-12-05 00:06:30 -0800778
779 brw_push_insn_state(p);
780 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
781 brw_set_mask_control(p, BRW_MASK_DISABLE);
782 struct brw_instruction *send = brw_next_insn(p, BRW_OPCODE_SEND);
783 brw_pop_insn_state(p);
784
Eric Anholt4c1fdae2013-03-06 14:47:22 -0800785 /* We use the SIMD4x2 mode because we want to end up with 4 components in
786 * the destination loaded consecutively from the same offset (which appears
787 * in the first component, and the rest are ignored).
788 */
789 dst.width = BRW_WIDTH_4;
Eric Anholt461a2972012-12-05 00:06:30 -0800790 brw_set_dest(p, send, dst);
791 brw_set_src0(p, send, offset);
Eric Anholt4c1fdae2013-03-06 14:47:22 -0800792 brw_set_sampler_message(p, send,
Eric Anholt461a2972012-12-05 00:06:30 -0800793 surf_index,
Eric Anholt4c1fdae2013-03-06 14:47:22 -0800794 0, /* LD message ignores sampler unit */
795 GEN5_SAMPLER_MESSAGE_SAMPLE_LD,
796 1, /* rlen */
797 1, /* mlen */
798 false, /* no header */
799 BRW_SAMPLER_SIMD_MODE_SIMD4X2,
800 0);
Kenneth Graunke6d89bc82013-08-14 19:49:33 -0700801
802 mark_surface_used(surf_index);
Eric Anholt461a2972012-12-05 00:06:30 -0800803}
804
805void
Eric Anholtd8214e42012-11-07 11:18:34 -0800806fs_generator::generate_varying_pull_constant_load(fs_inst *inst,
807 struct brw_reg dst,
Eric Anholt70b27e02013-03-18 10:16:42 -0700808 struct brw_reg index,
809 struct brw_reg offset)
Eric Anholtd8214e42012-11-07 11:18:34 -0800810{
Kenneth Graunke53631be2013-07-06 00:36:46 -0700811 assert(brw->gen < 7); /* Should use the gen7 variant. */
Eric Anholtd8214e42012-11-07 11:18:34 -0800812 assert(inst->header_present);
Eric Anholt70b27e02013-03-18 10:16:42 -0700813 assert(inst->mlen);
Eric Anholtd8214e42012-11-07 11:18:34 -0800814
815 assert(index.file == BRW_IMMEDIATE_VALUE &&
816 index.type == BRW_REGISTER_TYPE_UD);
817 uint32_t surf_index = index.dw1.ud;
818
Eric Anholt70b27e02013-03-18 10:16:42 -0700819 uint32_t simd_mode, rlen, msg_type;
Eric Anholtd8214e42012-11-07 11:18:34 -0800820 if (dispatch_width == 16) {
Eric Anholt70b27e02013-03-18 10:16:42 -0700821 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
822 rlen = 8;
Eric Anholtd8214e42012-11-07 11:18:34 -0800823 } else {
Eric Anholt70b27e02013-03-18 10:16:42 -0700824 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8;
825 rlen = 4;
Eric Anholtd8214e42012-11-07 11:18:34 -0800826 }
827
Kenneth Graunke53631be2013-07-06 00:36:46 -0700828 if (brw->gen >= 5)
Eric Anholt70b27e02013-03-18 10:16:42 -0700829 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
830 else {
831 /* We always use the SIMD16 message so that we only have to load U, and
832 * not V or R.
833 */
834 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_LD;
835 assert(inst->mlen == 3);
836 assert(inst->regs_written == 8);
837 rlen = 8;
838 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
839 }
840
841 struct brw_reg offset_mrf = retype(brw_message_reg(inst->base_mrf + 1),
842 BRW_REGISTER_TYPE_D);
843 brw_MOV(p, offset_mrf, offset);
844
Eric Anholtd8214e42012-11-07 11:18:34 -0800845 struct brw_reg header = brw_vec8_grf(0, 0);
846 gen6_resolve_implied_move(p, &header, inst->base_mrf);
847
848 struct brw_instruction *send = brw_next_insn(p, BRW_OPCODE_SEND);
Eric Anholt70b27e02013-03-18 10:16:42 -0700849 send->header.compression_control = BRW_COMPRESSION_NONE;
Eric Anholtd8214e42012-11-07 11:18:34 -0800850 brw_set_dest(p, send, dst);
851 brw_set_src0(p, send, header);
Kenneth Graunke53631be2013-07-06 00:36:46 -0700852 if (brw->gen < 6)
Eric Anholtd8214e42012-11-07 11:18:34 -0800853 send->header.destreg__conditionalmod = inst->base_mrf;
Eric Anholt70b27e02013-03-18 10:16:42 -0700854
855 /* Our surface is set up as floats, regardless of what actual data is
856 * stored in it.
857 */
858 uint32_t return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
859 brw_set_sampler_message(p, send,
Eric Anholtd8214e42012-11-07 11:18:34 -0800860 surf_index,
Eric Anholt70b27e02013-03-18 10:16:42 -0700861 0, /* sampler (unused) */
Eric Anholtd8214e42012-11-07 11:18:34 -0800862 msg_type,
Eric Anholt70b27e02013-03-18 10:16:42 -0700863 rlen,
Eric Anholtd8214e42012-11-07 11:18:34 -0800864 inst->mlen,
865 inst->header_present,
Eric Anholt70b27e02013-03-18 10:16:42 -0700866 simd_mode,
867 return_format);
Kenneth Graunke6d89bc82013-08-14 19:49:33 -0700868
869 mark_surface_used(surf_index);
Eric Anholtd8214e42012-11-07 11:18:34 -0800870}
871
872void
873fs_generator::generate_varying_pull_constant_load_gen7(fs_inst *inst,
874 struct brw_reg dst,
875 struct brw_reg index,
876 struct brw_reg offset)
877{
Kenneth Graunke53631be2013-07-06 00:36:46 -0700878 assert(brw->gen >= 7);
Eric Anholtd8214e42012-11-07 11:18:34 -0800879 /* Varying-offset pull constant loads are treated as a normal expression on
880 * gen7, so the fact that it's a send message is hidden at the IR level.
881 */
882 assert(!inst->header_present);
883 assert(!inst->mlen);
884
885 assert(index.file == BRW_IMMEDIATE_VALUE &&
886 index.type == BRW_REGISTER_TYPE_UD);
887 uint32_t surf_index = index.dw1.ud;
888
Eric Anholtdca5fc12013-03-13 14:48:55 -0700889 uint32_t simd_mode, rlen, mlen;
Eric Anholtd8214e42012-11-07 11:18:34 -0800890 if (dispatch_width == 16) {
Eric Anholtdca5fc12013-03-13 14:48:55 -0700891 mlen = 2;
892 rlen = 8;
893 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
Eric Anholtd8214e42012-11-07 11:18:34 -0800894 } else {
Eric Anholtdca5fc12013-03-13 14:48:55 -0700895 mlen = 1;
896 rlen = 4;
897 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8;
Eric Anholtd8214e42012-11-07 11:18:34 -0800898 }
899
900 struct brw_instruction *send = brw_next_insn(p, BRW_OPCODE_SEND);
901 brw_set_dest(p, send, dst);
902 brw_set_src0(p, send, offset);
Eric Anholtdca5fc12013-03-13 14:48:55 -0700903 brw_set_sampler_message(p, send,
Eric Anholtd8214e42012-11-07 11:18:34 -0800904 surf_index,
Eric Anholtdca5fc12013-03-13 14:48:55 -0700905 0, /* LD message ignores sampler unit */
906 GEN5_SAMPLER_MESSAGE_SAMPLE_LD,
907 rlen,
Eric Anholtd8214e42012-11-07 11:18:34 -0800908 mlen,
Eric Anholtdca5fc12013-03-13 14:48:55 -0700909 false, /* no header */
910 simd_mode,
911 0);
Kenneth Graunke6d89bc82013-08-14 19:49:33 -0700912
913 mark_surface_used(surf_index);
Eric Anholtd8214e42012-11-07 11:18:34 -0800914}
Paul Berry3f929ef2012-06-18 14:50:04 -0700915
916/**
917 * Cause the current pixel/sample mask (from R1.7 bits 15:0) to be transferred
918 * into the flags register (f0.0).
919 *
920 * Used only on Gen6 and above.
921 */
922void
Eric Anholtb278f652012-12-06 10:36:11 -0800923fs_generator::generate_mov_dispatch_to_flags(fs_inst *inst)
Paul Berry3f929ef2012-06-18 14:50:04 -0700924{
Eric Anholtb278f652012-12-06 10:36:11 -0800925 struct brw_reg flags = brw_flag_reg(0, inst->flag_subreg);
Eric Anholtd5016492012-12-06 12:15:13 -0800926 struct brw_reg dispatch_mask;
Paul Berry3f929ef2012-06-18 14:50:04 -0700927
Kenneth Graunke53631be2013-07-06 00:36:46 -0700928 if (brw->gen >= 6)
Eric Anholtd5016492012-12-06 12:15:13 -0800929 dispatch_mask = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW);
930 else
931 dispatch_mask = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
932
Paul Berry3f929ef2012-06-18 14:50:04 -0700933 brw_push_insn_state(p);
934 brw_set_mask_control(p, BRW_MASK_DISABLE);
Eric Anholtd5016492012-12-06 12:15:13 -0800935 brw_MOV(p, flags, dispatch_mask);
Paul Berry3f929ef2012-06-18 14:50:04 -0700936 brw_pop_insn_state(p);
937}
938
939
Eric Anholta3b8c5e2011-11-23 10:13:39 -0800940static uint32_t brw_file_from_reg(fs_reg *reg)
941{
942 switch (reg->file) {
Eric Anholta3b8c5e2011-11-23 10:13:39 -0800943 case GRF:
944 return BRW_GENERAL_REGISTER_FILE;
945 case MRF:
946 return BRW_MESSAGE_REGISTER_FILE;
947 case IMM:
948 return BRW_IMMEDIATE_VALUE;
949 default:
950 assert(!"not reached");
951 return BRW_GENERAL_REGISTER_FILE;
952 }
953}
954
Eric Anholt11dd9e92011-05-24 16:34:27 -0700955static struct brw_reg
956brw_reg_from_fs_reg(fs_reg *reg)
957{
958 struct brw_reg brw_reg;
959
960 switch (reg->file) {
961 case GRF:
Eric Anholt11dd9e92011-05-24 16:34:27 -0700962 case MRF:
963 if (reg->smear == -1) {
Eric Anholta3b8c5e2011-11-23 10:13:39 -0800964 brw_reg = brw_vec8_reg(brw_file_from_reg(reg), reg->reg, 0);
Eric Anholt11dd9e92011-05-24 16:34:27 -0700965 } else {
Eric Anholta3b8c5e2011-11-23 10:13:39 -0800966 brw_reg = brw_vec1_reg(brw_file_from_reg(reg), reg->reg, reg->smear);
Eric Anholt11dd9e92011-05-24 16:34:27 -0700967 }
968 brw_reg = retype(brw_reg, reg->type);
969 if (reg->sechalf)
970 brw_reg = sechalf(brw_reg);
971 break;
972 case IMM:
973 switch (reg->type) {
974 case BRW_REGISTER_TYPE_F:
975 brw_reg = brw_imm_f(reg->imm.f);
976 break;
977 case BRW_REGISTER_TYPE_D:
978 brw_reg = brw_imm_d(reg->imm.i);
979 break;
980 case BRW_REGISTER_TYPE_UD:
981 brw_reg = brw_imm_ud(reg->imm.u);
982 break;
983 default:
984 assert(!"not reached");
985 brw_reg = brw_null_reg();
986 break;
987 }
988 break;
Eric Anholtab04f3b2013-04-29 16:05:05 -0700989 case HW_REG:
Eric Anholt11dd9e92011-05-24 16:34:27 -0700990 brw_reg = reg->fixed_hw_reg;
991 break;
992 case BAD_FILE:
993 /* Probably unused. */
994 brw_reg = brw_null_reg();
995 break;
996 case UNIFORM:
997 assert(!"not reached");
998 brw_reg = brw_null_reg();
999 break;
1000 default:
1001 assert(!"not reached");
1002 brw_reg = brw_null_reg();
1003 break;
1004 }
1005 if (reg->abs)
1006 brw_reg = brw_abs(brw_reg);
1007 if (reg->negate)
1008 brw_reg = negate(brw_reg);
1009
1010 return brw_reg;
1011}
1012
Eric Anholt461a2972012-12-05 00:06:30 -08001013/**
Eric Anholt4c1fdae2013-03-06 14:47:22 -08001014 * Sets the first word of a vgrf for gen7+ simd4x2 uniform pull constant
1015 * sampler LD messages.
Eric Anholt461a2972012-12-05 00:06:30 -08001016 *
Eric Anholt4c1fdae2013-03-06 14:47:22 -08001017 * We don't want to bake it into the send message's code generation because
1018 * that means we don't get a chance to schedule the instructions.
Eric Anholt461a2972012-12-05 00:06:30 -08001019 */
1020void
Eric Anholt4c1fdae2013-03-06 14:47:22 -08001021fs_generator::generate_set_simd4x2_offset(fs_inst *inst,
1022 struct brw_reg dst,
1023 struct brw_reg value)
Eric Anholt461a2972012-12-05 00:06:30 -08001024{
Eric Anholt461a2972012-12-05 00:06:30 -08001025 assert(value.file == BRW_IMMEDIATE_VALUE);
1026
1027 brw_push_insn_state(p);
1028 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1029 brw_set_mask_control(p, BRW_MASK_DISABLE);
Eric Anholt4c1fdae2013-03-06 14:47:22 -08001030 brw_MOV(p, retype(brw_vec1_reg(dst.file, dst.nr, 0), value.type), value);
Eric Anholt461a2972012-12-05 00:06:30 -08001031 brw_pop_insn_state(p);
1032}
1033
Chad Versace20dfa502013-01-09 11:46:42 -08001034/**
1035 * Change the register's data type from UD to W, doubling the strides in order
1036 * to compensate for halving the data type width.
1037 */
1038static struct brw_reg
1039ud_reg_to_w(struct brw_reg r)
1040{
1041 assert(r.type == BRW_REGISTER_TYPE_UD);
1042 r.type = BRW_REGISTER_TYPE_W;
1043
1044 /* The BRW_*_STRIDE enums are defined so that incrementing the field
1045 * doubles the real stride.
1046 */
1047 if (r.hstride != 0)
1048 ++r.hstride;
1049 if (r.vstride != 0)
1050 ++r.vstride;
1051
1052 return r;
1053}
1054
1055void
1056fs_generator::generate_pack_half_2x16_split(fs_inst *inst,
1057 struct brw_reg dst,
1058 struct brw_reg x,
1059 struct brw_reg y)
1060{
Kenneth Graunke53631be2013-07-06 00:36:46 -07001061 assert(brw->gen >= 7);
Chad Versace20dfa502013-01-09 11:46:42 -08001062 assert(dst.type == BRW_REGISTER_TYPE_UD);
Vinson Lee15599942013-01-26 08:27:50 +01001063 assert(x.type == BRW_REGISTER_TYPE_F);
1064 assert(y.type == BRW_REGISTER_TYPE_F);
Chad Versace20dfa502013-01-09 11:46:42 -08001065
1066 /* From the Ivybridge PRM, Vol4, Part3, Section 6.27 f32to16:
1067 *
1068 * Because this instruction does not have a 16-bit floating-point type,
1069 * the destination data type must be Word (W).
1070 *
1071 * The destination must be DWord-aligned and specify a horizontal stride
1072 * (HorzStride) of 2. The 16-bit result is stored in the lower word of
1073 * each destination channel and the upper word is not modified.
1074 */
1075 struct brw_reg dst_w = ud_reg_to_w(dst);
1076
1077 /* Give each 32-bit channel of dst the form below , where "." means
1078 * unchanged.
1079 * 0x....hhhh
1080 */
1081 brw_F32TO16(p, dst_w, y);
1082
1083 /* Now the form:
1084 * 0xhhhh0000
1085 */
1086 brw_SHL(p, dst, dst, brw_imm_ud(16u));
1087
1088 /* And, finally the form of packHalf2x16's output:
1089 * 0xhhhhllll
1090 */
1091 brw_F32TO16(p, dst_w, x);
1092}
1093
1094void
1095fs_generator::generate_unpack_half_2x16_split(fs_inst *inst,
1096 struct brw_reg dst,
1097 struct brw_reg src)
1098{
Kenneth Graunke53631be2013-07-06 00:36:46 -07001099 assert(brw->gen >= 7);
Chad Versace20dfa502013-01-09 11:46:42 -08001100 assert(dst.type == BRW_REGISTER_TYPE_F);
1101 assert(src.type == BRW_REGISTER_TYPE_UD);
1102
1103 /* From the Ivybridge PRM, Vol4, Part3, Section 6.26 f16to32:
1104 *
1105 * Because this instruction does not have a 16-bit floating-point type,
1106 * the source data type must be Word (W). The destination type must be
1107 * F (Float).
1108 */
1109 struct brw_reg src_w = ud_reg_to_w(src);
1110
1111 /* Each channel of src has the form of unpackHalf2x16's input: 0xhhhhllll.
1112 * For the Y case, we wish to access only the upper word; therefore
1113 * a 16-bit subregister offset is needed.
1114 */
1115 assert(inst->opcode == FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X ||
1116 inst->opcode == FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y);
1117 if (inst->opcode == FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y)
Chad Versace09740312013-01-24 21:48:40 -08001118 src_w.subnr += 2;
Chad Versace20dfa502013-01-09 11:46:42 -08001119
1120 brw_F16TO32(p, dst, src_w);
1121}
1122
Eric Anholt11dd9e92011-05-24 16:34:27 -07001123void
Eric Anholt5c5218e2013-03-19 15:28:11 -07001124fs_generator::generate_shader_time_add(fs_inst *inst,
1125 struct brw_reg payload,
1126 struct brw_reg offset,
1127 struct brw_reg value)
1128{
Kenneth Graunke53631be2013-07-06 00:36:46 -07001129 assert(brw->gen >= 7);
Eric Anholt5c5218e2013-03-19 15:28:11 -07001130 brw_push_insn_state(p);
1131 brw_set_mask_control(p, true);
1132
1133 assert(payload.file == BRW_GENERAL_REGISTER_FILE);
1134 struct brw_reg payload_offset = retype(brw_vec1_grf(payload.nr, 0),
1135 offset.type);
1136 struct brw_reg payload_value = retype(brw_vec1_grf(payload.nr + 1, 0),
1137 value.type);
1138
1139 assert(offset.file == BRW_IMMEDIATE_VALUE);
1140 if (value.file == BRW_GENERAL_REGISTER_FILE) {
1141 value.width = BRW_WIDTH_1;
1142 value.hstride = BRW_HORIZONTAL_STRIDE_0;
1143 value.vstride = BRW_VERTICAL_STRIDE_0;
1144 } else {
1145 assert(value.file == BRW_IMMEDIATE_VALUE);
1146 }
1147
1148 /* Trying to deal with setup of the params from the IR is crazy in the FS8
1149 * case, and we don't really care about squeezing every bit of performance
1150 * out of this path, so we just emit the MOVs from here.
1151 */
1152 brw_MOV(p, payload_offset, offset);
1153 brw_MOV(p, payload_value, value);
Eric Anholt3c9dc2d2013-10-02 14:07:40 -07001154 brw_shader_time_add(p, payload,
1155 c->prog_data.base.binding_table.shader_time_start);
Eric Anholt5c5218e2013-03-19 15:28:11 -07001156 brw_pop_insn_state(p);
Kenneth Graunke6d89bc82013-08-14 19:49:33 -07001157
Eric Anholt3c9dc2d2013-10-02 14:07:40 -07001158 mark_surface_used(c->prog_data.base.binding_table.shader_time_start);
Eric Anholt5c5218e2013-03-19 15:28:11 -07001159}
1160
1161void
Kenneth Graunkeea681a02012-11-09 01:05:47 -08001162fs_generator::generate_code(exec_list *instructions)
Eric Anholt11dd9e92011-05-24 16:34:27 -07001163{
Eric Anholtf2bd3e72012-02-03 11:50:42 +01001164 int last_native_insn_offset = p->next_insn_offset;
Eric Anholt11dd9e92011-05-24 16:34:27 -07001165 const char *last_annotation_string = NULL;
Eric Anholt97615b22012-08-27 14:35:01 -07001166 const void *last_annotation_ir = NULL;
Eric Anholt11dd9e92011-05-24 16:34:27 -07001167
Eric Anholt11dd9e92011-05-24 16:34:27 -07001168 if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
Eric Anholt97615b22012-08-27 14:35:01 -07001169 if (shader) {
1170 printf("Native code for fragment shader %d (%d-wide dispatch):\n",
Kenneth Graunkea303df82012-11-20 13:50:52 -08001171 prog->Name, dispatch_width);
Eric Anholt97615b22012-08-27 14:35:01 -07001172 } else {
1173 printf("Native code for fragment program %d (%d-wide dispatch):\n",
Kenneth Graunke1f740022012-11-20 14:41:21 -08001174 fp->Base.Id, dispatch_width);
Eric Anholt97615b22012-08-27 14:35:01 -07001175 }
Eric Anholt11dd9e92011-05-24 16:34:27 -07001176 }
1177
Eric Anholt7abfb672012-10-03 13:16:09 -07001178 cfg_t *cfg = NULL;
Eric Anholt080b1252012-04-10 12:01:50 -07001179 if (unlikely(INTEL_DEBUG & DEBUG_WM))
Kenneth Graunkeea681a02012-11-09 01:05:47 -08001180 cfg = new(mem_ctx) cfg_t(mem_ctx, instructions);
Eric Anholt080b1252012-04-10 12:01:50 -07001181
Kenneth Graunkeea681a02012-11-09 01:05:47 -08001182 foreach_list(node, instructions) {
Eric Anholt44ffb4a2011-07-29 11:52:39 -07001183 fs_inst *inst = (fs_inst *)node;
Eric Anholt11dd9e92011-05-24 16:34:27 -07001184 struct brw_reg src[3], dst;
1185
1186 if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
Eric Anholt080b1252012-04-10 12:01:50 -07001187 foreach_list(node, &cfg->block_list) {
Eric Anholt7abfb672012-10-03 13:16:09 -07001188 bblock_link *link = (bblock_link *)node;
1189 bblock_t *block = link->block;
Eric Anholt080b1252012-04-10 12:01:50 -07001190
1191 if (block->start == inst) {
1192 printf(" START B%d", block->block_num);
1193 foreach_list(predecessor_node, &block->parents) {
Eric Anholt7abfb672012-10-03 13:16:09 -07001194 bblock_link *predecessor_link =
1195 (bblock_link *)predecessor_node;
1196 bblock_t *predecessor_block = predecessor_link->block;
Eric Anholt080b1252012-04-10 12:01:50 -07001197 printf(" <-B%d", predecessor_block->block_num);
1198 }
1199 printf("\n");
1200 }
1201 }
1202
Eric Anholt11dd9e92011-05-24 16:34:27 -07001203 if (last_annotation_ir != inst->ir) {
1204 last_annotation_ir = inst->ir;
1205 if (last_annotation_ir) {
1206 printf(" ");
Eric Anholt97615b22012-08-27 14:35:01 -07001207 if (shader)
1208 ((ir_instruction *)inst->ir)->print();
1209 else {
1210 const prog_instruction *fpi;
1211 fpi = (const prog_instruction *)inst->ir;
1212 printf("%d: ", (int)(fpi - fp->Base.Instructions));
1213 _mesa_fprint_instruction_opt(stdout,
1214 fpi,
1215 0, PROG_PRINT_DEBUG, NULL);
1216 }
Eric Anholt11dd9e92011-05-24 16:34:27 -07001217 printf("\n");
1218 }
1219 }
1220 if (last_annotation_string != inst->annotation) {
1221 last_annotation_string = inst->annotation;
1222 if (last_annotation_string)
1223 printf(" %s\n", last_annotation_string);
1224 }
1225 }
1226
1227 for (unsigned int i = 0; i < 3; i++) {
1228 src[i] = brw_reg_from_fs_reg(&inst->src[i]);
Eric Anholt73b0a282011-10-03 15:12:10 -07001229
1230 /* The accumulator result appears to get used for the
1231 * conditional modifier generation. When negating a UD
1232 * value, there is a 33rd bit generated for the sign in the
1233 * accumulator value, so now you can't check, for example,
1234 * equality with a 32-bit value. See piglit fs-op-neg-uvec4.
1235 */
1236 assert(!inst->conditional_mod ||
1237 inst->src[i].type != BRW_REGISTER_TYPE_UD ||
1238 !inst->src[i].negate);
Eric Anholt11dd9e92011-05-24 16:34:27 -07001239 }
1240 dst = brw_reg_from_fs_reg(&inst->dst);
1241
1242 brw_set_conditionalmod(p, inst->conditional_mod);
Eric Anholt54679fc2012-10-03 13:23:05 -07001243 brw_set_predicate_control(p, inst->predicate);
Eric Anholt11dd9e92011-05-24 16:34:27 -07001244 brw_set_predicate_inverse(p, inst->predicate_inverse);
Eric Anholtb278f652012-12-06 10:36:11 -08001245 brw_set_flag_reg(p, 0, inst->flag_subreg);
Eric Anholt11dd9e92011-05-24 16:34:27 -07001246 brw_set_saturate(p, inst->saturate);
Eric Anholtef2fbf62012-11-28 14:16:03 -08001247 brw_set_mask_control(p, inst->force_writemask_all);
Eric Anholt11dd9e92011-05-24 16:34:27 -07001248
Kenneth Graunkea303df82012-11-20 13:50:52 -08001249 if (inst->force_uncompressed || dispatch_width == 8) {
Eric Anholt11dd9e92011-05-24 16:34:27 -07001250 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1251 } else if (inst->force_sechalf) {
1252 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
1253 } else {
1254 brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
1255 }
1256
1257 switch (inst->opcode) {
1258 case BRW_OPCODE_MOV:
1259 brw_MOV(p, dst, src[0]);
1260 break;
1261 case BRW_OPCODE_ADD:
1262 brw_ADD(p, dst, src[0], src[1]);
1263 break;
1264 case BRW_OPCODE_MUL:
1265 brw_MUL(p, dst, src[0], src[1]);
1266 break;
Eric Anholt3f78f712011-08-15 22:36:18 -07001267 case BRW_OPCODE_MACH:
1268 brw_set_acc_write_control(p, 1);
1269 brw_MACH(p, dst, src[0], src[1]);
1270 brw_set_acc_write_control(p, 0);
1271 break;
Eric Anholt11dd9e92011-05-24 16:34:27 -07001272
Eric Anholt7d55f372012-02-07 00:59:11 +01001273 case BRW_OPCODE_MAD:
Matt Turner69909c82013-09-19 22:55:24 -07001274 assert(brw->gen >= 6);
Eric Anholt7d55f372012-02-07 00:59:11 +01001275 brw_set_access_mode(p, BRW_ALIGN_16);
Kenneth Graunkea303df82012-11-20 13:50:52 -08001276 if (dispatch_width == 16) {
Eric Anholt7d55f372012-02-07 00:59:11 +01001277 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1278 brw_MAD(p, dst, src[0], src[1], src[2]);
1279 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
1280 brw_MAD(p, sechalf(dst), sechalf(src[0]), sechalf(src[1]), sechalf(src[2]));
1281 brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
1282 } else {
1283 brw_MAD(p, dst, src[0], src[1], src[2]);
1284 }
1285 brw_set_access_mode(p, BRW_ALIGN_1);
1286 break;
1287
Kenneth Graunke0a1d1452012-12-02 00:08:15 -08001288 case BRW_OPCODE_LRP:
Matt Turner69909c82013-09-19 22:55:24 -07001289 assert(brw->gen >= 6);
Kenneth Graunke0a1d1452012-12-02 00:08:15 -08001290 brw_set_access_mode(p, BRW_ALIGN_16);
1291 if (dispatch_width == 16) {
1292 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1293 brw_LRP(p, dst, src[0], src[1], src[2]);
1294 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
1295 brw_LRP(p, sechalf(dst), sechalf(src[0]), sechalf(src[1]), sechalf(src[2]));
1296 brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
1297 } else {
1298 brw_LRP(p, dst, src[0], src[1], src[2]);
1299 }
1300 brw_set_access_mode(p, BRW_ALIGN_1);
1301 break;
1302
Eric Anholt11dd9e92011-05-24 16:34:27 -07001303 case BRW_OPCODE_FRC:
1304 brw_FRC(p, dst, src[0]);
1305 break;
1306 case BRW_OPCODE_RNDD:
1307 brw_RNDD(p, dst, src[0]);
1308 break;
1309 case BRW_OPCODE_RNDE:
1310 brw_RNDE(p, dst, src[0]);
1311 break;
1312 case BRW_OPCODE_RNDZ:
1313 brw_RNDZ(p, dst, src[0]);
1314 break;
1315
1316 case BRW_OPCODE_AND:
1317 brw_AND(p, dst, src[0], src[1]);
1318 break;
1319 case BRW_OPCODE_OR:
1320 brw_OR(p, dst, src[0], src[1]);
1321 break;
1322 case BRW_OPCODE_XOR:
1323 brw_XOR(p, dst, src[0], src[1]);
1324 break;
1325 case BRW_OPCODE_NOT:
1326 brw_NOT(p, dst, src[0]);
1327 break;
1328 case BRW_OPCODE_ASR:
1329 brw_ASR(p, dst, src[0], src[1]);
1330 break;
1331 case BRW_OPCODE_SHR:
1332 brw_SHR(p, dst, src[0], src[1]);
1333 break;
1334 case BRW_OPCODE_SHL:
1335 brw_SHL(p, dst, src[0], src[1]);
1336 break;
Chad Versace20dfa502013-01-09 11:46:42 -08001337 case BRW_OPCODE_F32TO16:
Matt Turner69909c82013-09-19 22:55:24 -07001338 assert(brw->gen >= 7);
Chad Versace20dfa502013-01-09 11:46:42 -08001339 brw_F32TO16(p, dst, src[0]);
1340 break;
1341 case BRW_OPCODE_F16TO32:
Matt Turner69909c82013-09-19 22:55:24 -07001342 assert(brw->gen >= 7);
Chad Versace20dfa502013-01-09 11:46:42 -08001343 brw_F16TO32(p, dst, src[0]);
1344 break;
Eric Anholt11dd9e92011-05-24 16:34:27 -07001345 case BRW_OPCODE_CMP:
1346 brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]);
1347 break;
1348 case BRW_OPCODE_SEL:
1349 brw_SEL(p, dst, src[0], src[1]);
1350 break;
Matt Turner1f0f26d2013-04-09 19:22:34 -07001351 case BRW_OPCODE_BFREV:
Matt Turner69909c82013-09-19 22:55:24 -07001352 assert(brw->gen >= 7);
Matt Turner1f0f26d2013-04-09 19:22:34 -07001353 /* BFREV only supports UD type for src and dst. */
1354 brw_BFREV(p, retype(dst, BRW_REGISTER_TYPE_UD),
1355 retype(src[0], BRW_REGISTER_TYPE_UD));
1356 break;
1357 case BRW_OPCODE_FBH:
Matt Turner69909c82013-09-19 22:55:24 -07001358 assert(brw->gen >= 7);
Matt Turner1f0f26d2013-04-09 19:22:34 -07001359 /* FBH only supports UD type for dst. */
1360 brw_FBH(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]);
1361 break;
1362 case BRW_OPCODE_FBL:
Matt Turner69909c82013-09-19 22:55:24 -07001363 assert(brw->gen >= 7);
Matt Turner1f0f26d2013-04-09 19:22:34 -07001364 /* FBL only supports UD type for dst. */
1365 brw_FBL(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]);
1366 break;
1367 case BRW_OPCODE_CBIT:
Matt Turner69909c82013-09-19 22:55:24 -07001368 assert(brw->gen >= 7);
Matt Turner1f0f26d2013-04-09 19:22:34 -07001369 /* CBIT only supports UD type for dst. */
1370 brw_CBIT(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]);
1371 break;
Matt Turner014cce32013-09-19 13:01:08 -07001372 case BRW_OPCODE_ADDC:
1373 assert(brw->gen >= 7);
1374 brw_set_acc_write_control(p, 1);
1375 brw_ADDC(p, dst, src[0], src[1]);
1376 brw_set_acc_write_control(p, 0);
1377 break;
1378 case BRW_OPCODE_SUBB:
1379 assert(brw->gen >= 7);
1380 brw_set_acc_write_control(p, 1);
1381 brw_SUBB(p, dst, src[0], src[1]);
1382 brw_set_acc_write_control(p, 0);
1383 break;
Matt Turner1f0f26d2013-04-09 19:22:34 -07001384
1385 case BRW_OPCODE_BFE:
Matt Turner69909c82013-09-19 22:55:24 -07001386 assert(brw->gen >= 7);
Matt Turner1f0f26d2013-04-09 19:22:34 -07001387 brw_set_access_mode(p, BRW_ALIGN_16);
1388 if (dispatch_width == 16) {
1389 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1390 brw_BFE(p, dst, src[0], src[1], src[2]);
1391 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
1392 brw_BFE(p, sechalf(dst), sechalf(src[0]), sechalf(src[1]), sechalf(src[2]));
1393 brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
1394 } else {
1395 brw_BFE(p, dst, src[0], src[1], src[2]);
1396 }
1397 brw_set_access_mode(p, BRW_ALIGN_1);
1398 break;
1399
1400 case BRW_OPCODE_BFI1:
Matt Turner69909c82013-09-19 22:55:24 -07001401 assert(brw->gen >= 7);
Matt Turner1f0f26d2013-04-09 19:22:34 -07001402 brw_BFI1(p, dst, src[0], src[1]);
1403 break;
1404 case BRW_OPCODE_BFI2:
Matt Turner69909c82013-09-19 22:55:24 -07001405 assert(brw->gen >= 7);
Matt Turner1f0f26d2013-04-09 19:22:34 -07001406 brw_set_access_mode(p, BRW_ALIGN_16);
1407 if (dispatch_width == 16) {
1408 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1409 brw_BFI2(p, dst, src[0], src[1], src[2]);
1410 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
1411 brw_BFI2(p, sechalf(dst), sechalf(src[0]), sechalf(src[1]), sechalf(src[2]));
1412 brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
1413 } else {
1414 brw_BFI2(p, dst, src[0], src[1], src[2]);
1415 }
1416 brw_set_access_mode(p, BRW_ALIGN_1);
1417 break;
Eric Anholt11dd9e92011-05-24 16:34:27 -07001418
1419 case BRW_OPCODE_IF:
1420 if (inst->src[0].file != BAD_FILE) {
1421 /* The instruction has an embedded compare (only allowed on gen6) */
Kenneth Graunke53631be2013-07-06 00:36:46 -07001422 assert(brw->gen == 6);
Eric Anholt11dd9e92011-05-24 16:34:27 -07001423 gen6_IF(p, inst->conditional_mod, src[0], src[1]);
1424 } else {
Kenneth Graunkea303df82012-11-20 13:50:52 -08001425 brw_IF(p, dispatch_width == 16 ? BRW_EXECUTE_16 : BRW_EXECUTE_8);
Eric Anholt11dd9e92011-05-24 16:34:27 -07001426 }
Eric Anholt11dd9e92011-05-24 16:34:27 -07001427 break;
1428
1429 case BRW_OPCODE_ELSE:
1430 brw_ELSE(p);
1431 break;
1432 case BRW_OPCODE_ENDIF:
1433 brw_ENDIF(p);
Eric Anholt11dd9e92011-05-24 16:34:27 -07001434 break;
1435
1436 case BRW_OPCODE_DO:
Eric Anholtce6be332011-12-06 12:30:03 -08001437 brw_DO(p, BRW_EXECUTE_8);
Eric Anholt11dd9e92011-05-24 16:34:27 -07001438 break;
1439
1440 case BRW_OPCODE_BREAK:
Eric Anholtf1d89632011-12-06 12:44:41 -08001441 brw_BREAK(p);
Eric Anholt11dd9e92011-05-24 16:34:27 -07001442 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1443 break;
1444 case BRW_OPCODE_CONTINUE:
1445 /* FINISHME: We need to write the loop instruction support still. */
Kenneth Graunke53631be2013-07-06 00:36:46 -07001446 if (brw->gen >= 6)
Eric Anholt9f881472011-12-06 12:09:58 -08001447 gen6_CONT(p);
Eric Anholt11dd9e92011-05-24 16:34:27 -07001448 else
Eric Anholtf1d89632011-12-06 12:44:41 -08001449 brw_CONT(p);
Eric Anholt11dd9e92011-05-24 16:34:27 -07001450 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1451 break;
1452
Eric Anholtce6be332011-12-06 12:30:03 -08001453 case BRW_OPCODE_WHILE:
Eric Anholtce6be332011-12-06 12:30:03 -08001454 brw_WHILE(p);
Eric Anholt11dd9e92011-05-24 16:34:27 -07001455 break;
1456
Eric Anholt65b5cbb2011-08-05 12:38:58 -07001457 case SHADER_OPCODE_RCP:
1458 case SHADER_OPCODE_RSQ:
1459 case SHADER_OPCODE_SQRT:
1460 case SHADER_OPCODE_EXP2:
1461 case SHADER_OPCODE_LOG2:
Eric Anholt65b5cbb2011-08-05 12:38:58 -07001462 case SHADER_OPCODE_SIN:
1463 case SHADER_OPCODE_COS:
Kenneth Graunke53631be2013-07-06 00:36:46 -07001464 if (brw->gen >= 7) {
Kenneth Graunkea73c65c2011-10-18 12:24:47 -07001465 generate_math1_gen7(inst, dst, src[0]);
Kenneth Graunke53631be2013-07-06 00:36:46 -07001466 } else if (brw->gen == 6) {
Kenneth Graunke74e927b2011-08-18 11:55:42 -07001467 generate_math1_gen6(inst, dst, src[0]);
Kenneth Graunke53631be2013-07-06 00:36:46 -07001468 } else if (brw->gen == 5 || brw->is_g4x) {
Kenneth Graunke1b77d212013-03-30 00:15:54 -07001469 generate_math_g45(inst, dst, src[0]);
Kenneth Graunke74e927b2011-08-18 11:55:42 -07001470 } else {
1471 generate_math_gen4(inst, dst, src[0]);
1472 }
1473 break;
Kenneth Graunkeff8f2722011-09-28 17:37:54 -07001474 case SHADER_OPCODE_INT_QUOTIENT:
1475 case SHADER_OPCODE_INT_REMAINDER:
Kenneth Graunke74e927b2011-08-18 11:55:42 -07001476 case SHADER_OPCODE_POW:
Kenneth Graunke53631be2013-07-06 00:36:46 -07001477 if (brw->gen >= 7) {
Kenneth Graunkea73c65c2011-10-18 12:24:47 -07001478 generate_math2_gen7(inst, dst, src[0], src[1]);
Kenneth Graunke53631be2013-07-06 00:36:46 -07001479 } else if (brw->gen == 6) {
Kenneth Graunke74e927b2011-08-18 11:55:42 -07001480 generate_math2_gen6(inst, dst, src[0], src[1]);
1481 } else {
1482 generate_math_gen4(inst, dst, src[0]);
1483 }
Eric Anholt11dd9e92011-05-24 16:34:27 -07001484 break;
1485 case FS_OPCODE_PIXEL_X:
1486 generate_pixel_xy(dst, true);
1487 break;
1488 case FS_OPCODE_PIXEL_Y:
1489 generate_pixel_xy(dst, false);
1490 break;
1491 case FS_OPCODE_CINTERP:
1492 brw_MOV(p, dst, src[0]);
1493 break;
1494 case FS_OPCODE_LINTERP:
1495 generate_linterp(inst, dst, src);
1496 break;
Kenneth Graunkefebad172011-10-26 12:58:37 -07001497 case SHADER_OPCODE_TEX:
Eric Anholt11dd9e92011-05-24 16:34:27 -07001498 case FS_OPCODE_TXB:
Kenneth Graunkefebad172011-10-26 12:58:37 -07001499 case SHADER_OPCODE_TXD:
1500 case SHADER_OPCODE_TXF:
Chris Forbesf52ce6a2013-01-24 21:35:15 +13001501 case SHADER_OPCODE_TXF_MS:
Kenneth Graunkefebad172011-10-26 12:58:37 -07001502 case SHADER_OPCODE_TXL:
1503 case SHADER_OPCODE_TXS:
Matt Turnerb8aa9f72013-03-06 14:47:01 -08001504 case SHADER_OPCODE_LOD:
Chris Forbesfb455502013-03-31 21:31:12 +13001505 case SHADER_OPCODE_TG4:
Eric Anholt11dd9e92011-05-24 16:34:27 -07001506 generate_tex(inst, dst, src[0]);
1507 break;
Eric Anholt11dd9e92011-05-24 16:34:27 -07001508 case FS_OPCODE_DDX:
1509 generate_ddx(inst, dst, src[0]);
1510 break;
1511 case FS_OPCODE_DDY:
Paul Berryd08fdac2012-06-20 13:40:45 -07001512 /* Make sure fp->UsesDFdy flag got set (otherwise there's no
1513 * guarantee that c->key.render_to_fbo is set).
1514 */
1515 assert(fp->UsesDFdy);
Paul Berry82d25962012-06-20 13:40:45 -07001516 generate_ddy(inst, dst, src[0], c->key.render_to_fbo);
Eric Anholt11dd9e92011-05-24 16:34:27 -07001517 break;
1518
1519 case FS_OPCODE_SPILL:
1520 generate_spill(inst, src[0]);
1521 break;
1522
1523 case FS_OPCODE_UNSPILL:
1524 generate_unspill(inst, dst);
1525 break;
1526
Eric Anholt29340d02012-11-07 10:42:34 -08001527 case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
1528 generate_uniform_pull_constant_load(inst, dst, src[0], src[1]);
Eric Anholt11dd9e92011-05-24 16:34:27 -07001529 break;
1530
Eric Anholt461a2972012-12-05 00:06:30 -08001531 case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7:
1532 generate_uniform_pull_constant_load_gen7(inst, dst, src[0], src[1]);
1533 break;
1534
Eric Anholtd8214e42012-11-07 11:18:34 -08001535 case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD:
Eric Anholt70b27e02013-03-18 10:16:42 -07001536 generate_varying_pull_constant_load(inst, dst, src[0], src[1]);
Eric Anholtd8214e42012-11-07 11:18:34 -08001537 break;
1538
1539 case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7:
1540 generate_varying_pull_constant_load_gen7(inst, dst, src[0], src[1]);
1541 break;
1542
Eric Anholt11dd9e92011-05-24 16:34:27 -07001543 case FS_OPCODE_FB_WRITE:
1544 generate_fb_write(inst);
1545 break;
Paul Berry3f929ef2012-06-18 14:50:04 -07001546
1547 case FS_OPCODE_MOV_DISPATCH_TO_FLAGS:
Eric Anholtb278f652012-12-06 10:36:11 -08001548 generate_mov_dispatch_to_flags(inst);
Paul Berry3f929ef2012-06-18 14:50:04 -07001549 break;
1550
Eric Anholtbeafced2012-12-06 10:15:08 -08001551 case FS_OPCODE_DISCARD_JUMP:
1552 generate_discard_jump(inst);
1553 break;
1554
Eric Anholt71f06342012-11-27 14:10:52 -08001555 case SHADER_OPCODE_SHADER_TIME_ADD:
Eric Anholt5c5218e2013-03-19 15:28:11 -07001556 generate_shader_time_add(inst, src[0], src[1], src[2]);
Eric Anholt71f06342012-11-27 14:10:52 -08001557 break;
1558
Eric Anholt4c1fdae2013-03-06 14:47:22 -08001559 case FS_OPCODE_SET_SIMD4X2_OFFSET:
1560 generate_set_simd4x2_offset(inst, dst, src[0]);
Eric Anholt461a2972012-12-05 00:06:30 -08001561 break;
1562
Chad Versace20dfa502013-01-09 11:46:42 -08001563 case FS_OPCODE_PACK_HALF_2x16_SPLIT:
1564 generate_pack_half_2x16_split(inst, dst, src[0], src[1]);
1565 break;
1566
1567 case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X:
1568 case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y:
1569 generate_unpack_half_2x16_split(inst, dst, src[0]);
1570 break;
1571
Kenneth Graunke57a50252013-03-27 23:19:39 -07001572 case FS_OPCODE_PLACEHOLDER_HALT:
1573 /* This is the place where the final HALT needs to be inserted if
1574 * we've emitted any discards. If not, this will emit no code.
1575 */
1576 patch_discard_jumps_to_fb_writes();
1577 break;
1578
Eric Anholt11dd9e92011-05-24 16:34:27 -07001579 default:
Kenneth Graunkeb02492f2012-11-14 14:24:31 -08001580 if (inst->opcode < (int) ARRAY_SIZE(opcode_descs)) {
Eric Anholt11dd9e92011-05-24 16:34:27 -07001581 _mesa_problem(ctx, "Unsupported opcode `%s' in FS",
Kenneth Graunkeb02492f2012-11-14 14:24:31 -08001582 opcode_descs[inst->opcode].name);
Eric Anholt11dd9e92011-05-24 16:34:27 -07001583 } else {
1584 _mesa_problem(ctx, "Unsupported opcode %d in FS", inst->opcode);
1585 }
Kenneth Graunkedd1fd302012-11-20 17:02:23 -08001586 abort();
Eric Anholt11dd9e92011-05-24 16:34:27 -07001587 }
1588
1589 if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
Eric Anholtf2bd3e72012-02-03 11:50:42 +01001590 brw_dump_compile(p, stdout,
1591 last_native_insn_offset, p->next_insn_offset);
Eric Anholt080b1252012-04-10 12:01:50 -07001592
1593 foreach_list(node, &cfg->block_list) {
Eric Anholt7abfb672012-10-03 13:16:09 -07001594 bblock_link *link = (bblock_link *)node;
1595 bblock_t *block = link->block;
Eric Anholt080b1252012-04-10 12:01:50 -07001596
1597 if (block->end == inst) {
1598 printf(" END B%d", block->block_num);
1599 foreach_list(successor_node, &block->children) {
Eric Anholt7abfb672012-10-03 13:16:09 -07001600 bblock_link *successor_link =
1601 (bblock_link *)successor_node;
1602 bblock_t *successor_block = successor_link->block;
Eric Anholt080b1252012-04-10 12:01:50 -07001603 printf(" ->B%d", successor_block->block_num);
1604 }
1605 printf("\n");
1606 }
1607 }
Eric Anholt11dd9e92011-05-24 16:34:27 -07001608 }
1609
Eric Anholtf2bd3e72012-02-03 11:50:42 +01001610 last_native_insn_offset = p->next_insn_offset;
Eric Anholt11dd9e92011-05-24 16:34:27 -07001611 }
1612
1613 if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
1614 printf("\n");
1615 }
1616
Eric Anholt11dd9e92011-05-24 16:34:27 -07001617 brw_set_uip_jip(p);
1618
1619 /* OK, while the INTEL_DEBUG=wm above is very nice for debugging FS
1620 * emit issues, it doesn't get the jump distances into the output,
1621 * which is often something we want to debug. So this is here in
1622 * case you're doing that.
1623 */
1624 if (0) {
Eric Anholtf2bd3e72012-02-03 11:50:42 +01001625 brw_dump_compile(p, stdout, 0, p->next_insn_offset);
Eric Anholt11dd9e92011-05-24 16:34:27 -07001626 }
1627}
Kenneth Graunkeea681a02012-11-09 01:05:47 -08001628
1629const unsigned *
1630fs_generator::generate_assembly(exec_list *simd8_instructions,
1631 exec_list *simd16_instructions,
1632 unsigned *assembly_size)
1633{
1634 dispatch_width = 8;
1635 generate_code(simd8_instructions);
1636
1637 if (simd16_instructions) {
1638 /* We have to do a compaction pass now, or the one at the end of
1639 * execution will squash down where our prog_offset start needs
1640 * to be.
1641 */
1642 brw_compact_instructions(p);
1643
1644 /* align to 64 byte boundary. */
1645 while ((p->nr_insn * sizeof(struct brw_instruction)) % 64) {
1646 brw_NOP(p);
1647 }
1648
1649 /* Save off the start of this 16-wide program */
1650 c->prog_data.prog_offset_16 = p->nr_insn * sizeof(struct brw_instruction);
1651
1652 brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
1653
1654 dispatch_width = 16;
1655 generate_code(simd16_instructions);
1656 }
1657
1658 return brw_get_program(p, assembly_size);
1659}